r - 将 dataframe 中的 value 插入另一个由 ID 和年份选择的 dataframe

我有一个 dataframe df1 如下 -

ID1 <- c(155,487,102,666,100)
year <- c(2002,2003,2002,2000,2004)
df1 <- data.frame(ID1, year)
df1
  ID1 year
1 155 2002
2 487 2003
3 102 2002
4 666 2000
5 100 2004

然后我有一个 dataframe df2 如下 -

library(dplyr)
ID2 <- c(487,334,555,100,250,366,155,788)
a <- c(0.3,0.33,1.34,0.96,1.9,2.23,0.43,3.73)
b <- c(0.55,0.63,0.46,.26,3.1,0.4,2.46,4.34)
c <- c(0.78,0.83,0.34,0.95,0.12,0.72,0.52,0.03)
d <- c(2.3,1.23,2.35,0.96,0.1,0.78,0.49,2.03)
e <- c(1.3,0.53,1.54,1.36,2.34,0.34,0.25,1.33)
df2 <- data.frame(ID2,a,b,c,d,e)
df2 <- rename(df2, "2000"=a, "2001"=b, "2002"=c, "2003"=d, "2004"=e)
df2
  ID2 2000 2001 2002 2003 2004
1 487 0.30 0.55 0.78 2.30 1.30
2 334 0.33 0.63 0.83 1.23 0.53
3 555 1.34 0.46 0.34 2.35 1.54
4 100 0.96 0.26 0.95 0.96 1.36
5 250 1.90 3.10 0.12 0.10 2.34
6 366 2.23 0.40 0.72 0.78 0.34
7 155 0.43 2.46 0.52 0.49 0.25
8 788 3.73 4.34 0.03 2.03 1.33

如果 df1 的 ID1 与 df2 的 ID2 匹配,我想从 df2 中选择 values 并将其插入到 df1 中。要从中选择 value 的 df2 列在 df1 的列 year 中命名。

结果应该是这样——

ID1 year value
1 155 2002  0.52
2 487 2003  2.30
3 102 2002    NA
4 666 2000    NA
5 100 2004  1.36

回答1

我们可以将第二个数据重塑为“长”格式,然后进行连接

library(dplyr)
library(tidyr)
df2 %>% 
  mutate(ID2 = as.character(ID2)) %>%
  pivot_longer(cols = -ID2, names_to = 'year') %>% 
  left_join(df1 %>% 
             mutate(across(everything(), as.character)), .,
     by = c("ID1" = "ID2", "year"))

-输出

ID1 year value
1 155 2002  0.52
2 487 2003  2.30
3 102 2002    NA
4 666 2000    NA
5 100 2004  1.36

或者使用 base Rrow/column 索引创建的 match

df1$value <- df2[-1][cbind(match(df1$ID1, df2$ID2), 
                      match(df1$year, names(df2)[-1]))]

数据

df1 <- structure(list(ID1 = c(155L, 487L, 102L, 666L, 100L), year = c(2002L, 
2003L, 2002L, 2000L, 2004L)), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5"))

df2 <- structure(list(ID2 = c(487L, 334L, 555L, 100L, 250L, 366L, 155L, 
788L), `2000` = c(0.3, 0.33, 1.34, 0.96, 1.9, 2.23, 0.43, 3.73
), `2001` = c(0.55, 0.63, 0.46, 0.26, 3.1, 0.4, 2.46, 4.34), 
    `2002` = c(0.78, 0.83, 0.34, 0.95, 0.12, 0.72, 0.52, 0.03
    ), `2003` = c(2.3, 1.23, 2.35, 0.96, 0.1, 0.78, 0.49, 2.03
    ), `2004` = c(1.3, 0.53, 1.54, 1.36, 2.34, 0.34, 0.25, 1.33
    )), class = "data.frame", row.names = c("1", "2", "3", "4", 
"5", "6", "7", "8"))

回答2

使用查找的另一个选项:

library(tidyverse)

df1 <- tribble(
  ~ID1, ~year,
  155, 2002,
  487, 2003,
  102, 2002,
  666, 2000,
  100, 2004
) |> 
  mutate(index = str_c(ID1, year))

df2 <- tribble(
  ~ID2, ~"2000", ~"2001", ~"2002", ~"2003", ~"2004",
  487, 0.30, 0.55, 0.78, 2.30, 1.30,
  334, 0.33, 0.63, 0.83, 1.23, 0.53,
  555, 1.34, 0.46, 0.34, 2.35, 1.54,
  100, 0.96, 0.26, 0.95, 0.96, 1.36,
  250, 1.90, 3.10, 0.12, 0.10, 2.34,
  366, 2.23, 0.40, 0.72, 0.78, 0.34,
  155, 0.43, 2.46, 0.52, 0.49, 0.25,
  788, 3.73, 4.34, 0.03, 2.03, 1.33
) |> 
  pivot_longer(-ID2, names_to = "year") |> 
  mutate(index = str_c(ID2, year))

df1 |> mutate(value = df2$value[match(index, df2$index)])
#> # A tibble: 5 × 4
#>     ID1  year index   value
#>   <dbl> <dbl> <chr>   <dbl>
#> 1   155  2002 1552002  0.52
#> 2   487  2003 4872003  2.3 
#> 3   102  2002 1022002 NA   
#> 4   666  2000 6662000 NA   
#> 5   100  2004 1002004  1.36

https://reprex.tidyverse.org (v2.0.1) 于 2022-05-18 创建

相似文章