当前位置: 首页 > 知识库问答 >
问题:

使用NA和case_when按条件记录多列中的数据

养枫涟
2023-03-14

数据文件:-已编辑-

我正在使用ANES 2016时间序列研究数据。使用以下代码和类别对缺失数据进行编码,以表明其缺失的原因:

-1.不适用
-2.在单独文件或编码版本中提供的文本响应将包含在未来版本中
3.受限
4.错误
5.中断,足够的部分信息
6.没有选举后采访
7.没有帖子数据,由于信息不完整而删除
8.不知道
9.拒绝

我想在我的数据中使用case_when和Tidyverse对面的NA替换其中任何一个。下面是一个示例,其中我尝试将所选所有列中的值 1 替换为 NA。这只返回原始数据帧。我想保留不匹配的数据。

所有数据都格式化为带标签的数字,因此强制为整数值。

#Returns original data frame
anes %>%
  mutate(
    across(V162078:V161522,
           as.integer,
          ~case_when(. == 1 ~ NA_real_,
                     . != 1 ~ .)))

class(anes$V162078)
[1] "labelled" "numeric" 
> dput(head(anes))
structure(list(V160101f = c(0.8877, 1.1605, 0.4161, 0.3852, 0.6931, 
0.7588), V161010e = c("LA", "AR", "MS", "TN", "OH", "NJ"), V162078 = c(15, 
50, 50, 15, 30, 0), V162079 = c(85, 60, 70, 60, 15, 65), V161002 = c(1, 
1, 1, 1, 2, 1), V161003 = c(3, 1, 4, 3, 3, 1), V161004 = c(1, 
1, 3, 2, 2, 1), V161005 = c(2, 1, 2, 1, 1, 1), V161006 = c(-1, 
1, -1, 2, 1, 2), V161008 = c(3, 7, 3, 7, 5, 7), V161011 = c(1, 
1, 3, 1, 1, 1), V161019 = c(2, 4, -1, -1, -1, 4), V161020 = c(-1, 
-1, 2, -1, -1, -1), V161021 = c(2, 2, 2, 2, 2, 2), V161021a = c(-1, 
-1, -1, -1, -1, -1), V161022 = c(2, 2, -1, 1, 2, 2), V161030 = c(1, 
1, -1, -1, 1, -8), V161080 = c(2, 2, 2, 1, 2, 2), V161081 = c(2, 
2, 2, 2, 2, 2), V161082 = c(2, 2, 2, 2, 2, 2), V161083 = c(2, 
-8, 2, 2, 2, 2), V161084 = c(2, -8, 2, 2, 2, 2), V161085 = c(2, 
2, 2, 2, 2, 2), V161110 = c(3, 3, 3, 1, 5, 3), V161126 = c(99, 
5, 99, 99, 4, 6), V161128 = c(1, 1, 3, 2, 3, 1), V161129 = c(6, 
6, 5, 4, -8, 5), V161146 = c(2, 1, 2, 2, 1, 2), V161149 = c(2, 
1, 2, 1, 1, 2), V161151x = c(4, 4, 6, 7, 5, 1), V161158x = c(7, 
6, 3, 5, 3, 5), V161204x = c(4, 6, 4, 6, 7, 7), V161215 = c(4, 
4, 4, 2, 5, 5), V161216 = c(1, 1, 1, 1, 2, 1), V161217 = c(1, 
1, 1, 1, 3, 1), V161223 = c(1, 2, 2, 3, 2, 1), V161227x = c(4, 
1, 5, 1, 5, 1), V161228x = c(1, 1, 1, 1, 4, -8), V161235x = c(5, 
3, 3, 3, 4, 2), V161241 = c(1, 2, 2, 1, 1, 1), V161265x = c(2, 
8, 8, 2, 7, 4), V161267 = c(29, 26, 23, 58, 38, 60), V161268 = c(1, 
6, 6, 1, 4, 2), V161270 = c(9, 13, 9, 9, 9, 14), V161310x = c(1, 
1, 1, 1, 1, 1), V161315 = c(1, 1, 1, 1, 1, 1), V161324 = c(1, 
0, 2, 0, 3, 1), V161326 = c(1, 1, 1, 1, 2, 1), V161361x = c(13, 
17, 6, 20, 3, 1), V161522 = c(2, 3, 1, 2, 2, 1)), row.names = c(NA, 
6L), class = "data.frame")

如果我在原始数据上运行它而不强制为数字类型,会发生以下情况:

anes %>%
  mutate(
    across(V162078:V161522,
           ~case_when(. == 1 ~ NA_real_,
                      . != 1 ~ .)))

> rlang::last_error()
█
├─<error/dplyr:::mutate_error>
│ Problem with `mutate()` input `..1`.
│ x must have class `numeric`, not class `labelled/numeric`.
│ ℹ Input `..1` is `(function (.cols = everything(), .fns = NULL, ..., .names = NULL) ...`.
└─<error/rlang_error>
  must have class `numeric`, not class `labelled/numeric`.
Backtrace:
  1. `%>%`(...)
  8. dplyr::case_when(. == 1 ~ NA_real_, . != 1 ~ .)
  9. dplyr:::replace_with(...)
 10. dplyr:::check_class(val, x, name)
 11. dplyr:::glubort(header, "must have class `{exp_classes}`, not class `{out_classes}`.")
Run `rlang::last_trace()` to see the full context.

最终答案:

anes %>%
  mutate(across(V162078:V161522,
    ~case_when(
    . == -1 ~ NA_real_,
    . == -2 ~ NA_real_,
    . == -3 ~ NA_real_,
    . == -4 ~ NA_real_,
    . == -5 ~ NA_real_,
    . == -6 ~ NA_real_,
    . == -7 ~ NA_real_,
    . == -8 ~ NA_real_,
    . == -9 ~ NA_real_,
    . == 99 ~ NA_real_,
    . == 998 ~ NA_real_,
    . == 999 ~ NA_real_,
    TRUE ~ as.numeric((.))))) %>% #This catches all values that are not declared in case_when
  mutate(across(V162078:V161522, as.integer))

共有1个答案

袁亦
2023-03-14
anes %>%
  mutate(across(V162078:V161522, as.integer),
         across(V162078:V161522, 
                ~if_else(.x == 1, NA_integer_, .x)))

或者

anes %>%
  mutate(across(V162078:V161522, as.integer),
         across(V162078:V161522, 
                ~case_when(.x == 1 ~ NA_integer_, 
                           TRUE    ~ .x)))

或者我们可以使用dplyr::在中测试近似相等性,这样我们就可以完全跳过类型转换:

anes %>%
  mutate(across(V162078:V161522, 
                ~if_else(near(.x, 1), NA_real_, .x)))
 类似资料: