我有一个 dataframe df1 如下 -
ID1 <- c(155,487,102,666,100)
year <- c(2002,2003,2002,2000,2004)
df1 <- data.frame(ID1, year)
df1
ID1 year
1 155 2002
2 487 2003
3 102 2002
4 666 2000
5 100 2004
然后我有一个 dataframe df2 如下 -
library(dplyr)
ID2 <- c(487,334,555,100,250,366,155,788)
a <- c(0.3,0.33,1.34,0.96,1.9,2.23,0.43,3.73)
b <- c(0.55,0.63,0.46,.26,3.1,0.4,2.46,4.34)
c <- c(0.78,0.83,0.34,0.95,0.12,0.72,0.52,0.03)
d <- c(2.3,1.23,2.35,0.96,0.1,0.78,0.49,2.03)
e <- c(1.3,0.53,1.54,1.36,2.34,0.34,0.25,1.33)
df2 <- data.frame(ID2,a,b,c,d,e)
df2 <- rename(df2, "2000"=a, "2001"=b, "2002"=c, "2003"=d, "2004"=e)
df2
ID2 2000 2001 2002 2003 2004
1 487 0.30 0.55 0.78 2.30 1.30
2 334 0.33 0.63 0.83 1.23 0.53
3 555 1.34 0.46 0.34 2.35 1.54
4 100 0.96 0.26 0.95 0.96 1.36
5 250 1.90 3.10 0.12 0.10 2.34
6 366 2.23 0.40 0.72 0.78 0.34
7 155 0.43 2.46 0.52 0.49 0.25
8 788 3.73 4.34 0.03 2.03 1.33
如果 df1 的 ID1 与 df2 的 ID2 匹配,我想从 df2 中选择 values 并将其插入到 df1 中。要从中选择 value 的 df2 列在 df1 的列 year 中命名。
结果应该是这样——
ID1 year value
1 155 2002 0.52
2 487 2003 2.30
3 102 2002 NA
4 666 2000 NA
5 100 2004 1.36
回答1
我们可以将第二个数据重塑为“长”格式,然后进行连接
library(dplyr)
library(tidyr)
df2 %>%
mutate(ID2 = as.character(ID2)) %>%
pivot_longer(cols = -ID2, names_to = 'year') %>%
left_join(df1 %>%
mutate(across(everything(), as.character)), .,
by = c("ID1" = "ID2", "year"))
-输出
ID1 year value
1 155 2002 0.52
2 487 2003 2.30
3 102 2002 NA
4 666 2000 NA
5 100 2004 1.36
或者使用 base R
和 row/column
索引创建的 match
df1$value <- df2[-1][cbind(match(df1$ID1, df2$ID2),
match(df1$year, names(df2)[-1]))]
数据
df1 <- structure(list(ID1 = c(155L, 487L, 102L, 666L, 100L), year = c(2002L,
2003L, 2002L, 2000L, 2004L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5"))
df2 <- structure(list(ID2 = c(487L, 334L, 555L, 100L, 250L, 366L, 155L,
788L), `2000` = c(0.3, 0.33, 1.34, 0.96, 1.9, 2.23, 0.43, 3.73
), `2001` = c(0.55, 0.63, 0.46, 0.26, 3.1, 0.4, 2.46, 4.34),
`2002` = c(0.78, 0.83, 0.34, 0.95, 0.12, 0.72, 0.52, 0.03
), `2003` = c(2.3, 1.23, 2.35, 0.96, 0.1, 0.78, 0.49, 2.03
), `2004` = c(1.3, 0.53, 1.54, 1.36, 2.34, 0.34, 0.25, 1.33
)), class = "data.frame", row.names = c("1", "2", "3", "4",
"5", "6", "7", "8"))
回答2
使用查找的另一个选项:
library(tidyverse)
df1 <- tribble(
~ID1, ~year,
155, 2002,
487, 2003,
102, 2002,
666, 2000,
100, 2004
) |>
mutate(index = str_c(ID1, year))
df2 <- tribble(
~ID2, ~"2000", ~"2001", ~"2002", ~"2003", ~"2004",
487, 0.30, 0.55, 0.78, 2.30, 1.30,
334, 0.33, 0.63, 0.83, 1.23, 0.53,
555, 1.34, 0.46, 0.34, 2.35, 1.54,
100, 0.96, 0.26, 0.95, 0.96, 1.36,
250, 1.90, 3.10, 0.12, 0.10, 2.34,
366, 2.23, 0.40, 0.72, 0.78, 0.34,
155, 0.43, 2.46, 0.52, 0.49, 0.25,
788, 3.73, 4.34, 0.03, 2.03, 1.33
) |>
pivot_longer(-ID2, names_to = "year") |>
mutate(index = str_c(ID2, year))
df1 |> mutate(value = df2$value[match(index, df2$index)])
#> # A tibble: 5 × 4
#> ID1 year index value
#> <dbl> <dbl> <chr> <dbl>
#> 1 155 2002 1552002 0.52
#> 2 487 2003 4872003 2.3
#> 3 102 2002 1022002 NA
#> 4 666 2000 6662000 NA
#> 5 100 2004 1002004 1.36
由 https://reprex.tidyverse.org (v2.0.1) 于 2022-05-18 创建