我有以下变量:comorb_uveitis、comorb_ibd、comorb_psoriasis、asas_criteria_3_enthesitis、asas_criteria_2_arthritis、basdai_baseline
我希望基于以下条件创建一个名为“EAM_2”(=存在关节外表现 N=2)的新变量:
- 如果存在五个变量中的两个(葡萄膜炎、ibd、银屑病、附着点炎、关节炎)
- 并且如果 basdai_baseline 存在
- 回传1,否则回传0
资料样本
structure(list(comorb_uveitis = c(0L, 0L, 0L, 0L, 1L, NA, 0L,
0L, NA, 0L, 1L, 0L, 1L, NA, 0L, 0L, 0L, 0L, 0L, 0L), comorb_ibd = c(0L,
1L, 0L, 0L, 0L, NA, 0L, 0L, NA, 1L, 0L, 0L, 0L, NA, 0L, 0L, 0L,
0L, 0L, 0L), comorb_psoriasis = c(0L, 0L, 0L, 0L, 0L, NA, 0L,
0L, NA, 0L, 0L, 0L, 0L, NA, 1L, 0L, 0L, 0L, 0L, 0L), asas_criteria_3_enthesitis = c(1L,
0L, 1L, 0L, NA, NA, 0L, 1L, NA, 1L, NA, NA, 1L, 1L, 1L, 0L, NA,
NA, 1L, NA), asas_criteria_2_arthritis = c(1L, 1L, 0L, 1L, 0L,
1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L),
basdai_baseline = c(70, NA, 46, 36, 29, 40, NA, NA, NA, 52,
30, NA, 47, 98, 56, 57, NA, NA, 72, NA)), row.names = c("76271",
"46010", "76621", "46310", "76761", "76831", "76841", "76871",
"76951", "76961", "77011", "77041", "77091", "77281", "77291",
"77561", "77691", "77711", "77791", "77871"), class = "data.frame")
设法走到这一步,但这会检查是否存在至少“一个”EAM
df <- df2 <- mutate(df, EAM_2 = if_else (comorb_uveitis == 1 | comorb_uveitis == 0 | comorb_ibd == 1 | comorb_ibd == 0
| comorb_psoriasis == 1 | comorb_psoriasis == 0 | asas_criteria_3_enthesitis == 1 | asas_criteria_3_enthesitis == 0
| asas_criteria_2_arthritis == 1 | asas_criteria_2_arthritis == 0 & basdai_baseline != is.na(basdai_baseline), 1, 0))
编辑:在尝试@Iago 提出的代码时
EAM_2 comorb_uveitis comorb_ibd comorb_psoriasis asas_criteria_3_enthesitis asas_criteria_2_arthritis basdai_baseline
76271 1 0 0 0 1 1 70
46010 0 0 1 0 0 1 NA
76621 0 0 0 0 1 0 46
46310 0 0 0 0 0 1 36
76761 0 1 0 0 NA 0 29
76831 0 NA NA NA NA 1 40
76841 0 0 0 0 0 0 NA
76871 0 0 0 0 1 1 NA
76951 0 NA NA NA NA 0 NA
76961 1 0 1 0 1 0 52
77011 0 1 0 0 NA 0 30
77041 0 0 0 0 NA 1 NA
77091 1 1 0 0 1 1 47
77281 0 NA NA NA 1 1 98
81061 0 NA NA NA 1 1 84
最后两行存在错误。
uj5u.com热心网友回复:
library(dplyr)
df %>%
mutate(EAM_2 = case_when(rowSums(across(c(comorb_uveitis, comorb_ibd, comorb_psoriasis, asas_criteria_3_enthesitis, asas_criteria_2_arthritis), ~(. == 1L)), na.rm = TRUE) >= 2 & !is.na(basdai_baseline) ~ 1,
TRUE ~ 0))
comorb_uveitis comorb_ibd comorb_psoriasis asas_criteria_3_enthesitis asas_criteria_2_arthritis basdai_baseline EAM_2
76271 0 0 0 1 1 70 1
46010 0 1 0 0 1 NA 0
76621 0 0 0 1 0 46 0
46310 0 0 0 0 1 36 0
76761 1 0 0 NA 0 29 0
76831 NA NA NA NA 1 40 0
76841 0 0 0 0 0 NA 0
76871 0 0 0 1 1 NA 0
76951 NA NA NA NA 0 NA 0
76961 0 1 0 1 0 52 1
77011 1 0 0 NA 0 30 0
77041 0 0 0 NA 1 NA 0
77091 1 0 0 1 1 47 1
77281 NA NA NA 1 1 98 0
77291 0 0 1 1 1 56 1
77561 0 0 0 0 1 57 0
77691 0 0 0 NA 0 NA 0
77711 0 0 0 NA 0 NA 0
77791 0 0 0 1 1 72 1
77871 0 0 0 NA 1 NA 0
当然,您也可以使用if_else
或ifelse
代替case_when
和匿名函式来代替~
:
df %>%
mutate(EAM_2 = if_else(rowSums(across(c(comorb_uveitis, comorb_ibd, comorb_psoriasis, asas_criteria_3_enthesitis, asas_criteria_2_arthritis), function(var) var == 1L), na.rm = TRUE) >= 2 & !is.na(basdai_baseline), 1, 0))
如果目标是检查非缺失变量(而不是等于 1 的变量),那么答案将是
df %>%
mutate(EAM_2 = case_when(rowSums(across(c(comorb_uveitis, comorb_ibd, comorb_psoriasis, asas_criteria_3_enthesitis, asas_criteria_2_arthritis), ~!is.na(.))) >= 2 & !is.na(basdai_baseline) ~ 1,
TRUE ~ 0))
uj5u.com热心网友回复:
简单的:
# assuming you want "at least 2" comorbidities
df$EAM_2 <- rowSums(df[
c("comorb_uveitis", "comorb_ibd", "comorb_psoriasis",
"asas_criteria_3_enthesitis", "asas_criteria_2_arthritis")
], na.rm = TRUE) >= 2
df$EAM_2 <- df$EAM_2 & ! is.na(df$basdai_baseline)
# if you want 0/1 not TRUE/FALSE (why tho)
df$EAM_2 <- as.numeric(df$EAM_2)
0 评论