r - 如何从 dataframe 创建 list of 2?

我有英国城市之间的交通数据:

  • 中转:如果这两个城市之间有中转=1,否则=0
  • 重量:平均乘客人数

这是我的数据中的示例:

df2 <- data.frame (from  = c("London", "London", "London", "London" ,"Liverpool","Liverpool","Liverpool" , "Manchester", "Manchester", "Bristol"),
                  to = c("Liverpool", "Manchester", "Bristol","Derby", "Manchester", "Bristol","Derby","Bristol","Derby","Derby"),
                  Transit = c(1,0,1,1,1,1,1,1,0,1),
                  ave.pas = c(10,0,11,24,40,45,12,34,9,29))

输出

from         to Transit weight
1      London  Liverpool       1     10
2      London Manchester       0      0
3      London    Bristol       1     11
4      London      Derby       1     24
5   Liverpool Manchester       1     40
6   Liverpool    Bristol       1     45
7   Liverpool      Derby       1     12
8  Manchester    Bristol       1     34
9  Manchester      Derby       0      9
10    Bristol      Derby       1     29

现在我想将其转换为 2 的 list 以获得这样的数据(这是不同的数据,但想法是从我的 df 中获得相同的数据):

$nodes
# A tibble: 16 x 2
      id label         
   <int> <chr>         
 1     1 France        
 2     2 Belgium       
 3     3 Germany       
 4     4 Danemark      
 5     5 Croatia       
 6     6 Slovenia      
 7     7 Hungary       
 8     8 Spain         
 9     9 Italy         
10    10 Netherlands   
11    11 UK            
12    12 Austria       
13    13 Poland        
14    14 Switzerland   
15    15 Czech republic
16    16 Slovania      

$edges
# A tibble: 18 x 3
    from    to weight
   <int> <int>  <dbl>
 1     1     3    9  
 2     2     1    4  
 3     1     8    3  
 4     1     9    4  
 5     1    10    2  
 6     1    11    3  
 7     3    12    2  
 8     3    13    2  
 9     2     3    3  
10     3    14    2  
11     3    15    2  
12     3    10    2  
13     4     3    2  
14     5     3    2  
15     5    16    2  
16     5     7    2  
17     6     3    2  
18     7    16    2.5

回答1

在基础 R 中:

f2 = c('from', 'to')

nodes     = data.frame(label = unique(unlist(df2[f2])))
nodes$id  = seq_len(nrow(nodes)) 
edges     = df2[df2$Transit == 1, c(f2, 'ave.pas')]
edges[f2] = lapply(edges[f2], match, nodes$label)
nodes
#        label id
# 1     London  1
# 2  Liverpool  2
# 3 Manchester  3
# 4    Bristol  4
# 5      Derby  5

edges
#    from to ave.pas
# 1     1  2      10
# 3     1  4      11
# 4     1  5      24
# 5     2  3      40
# 6     2  4      45
# 7     2  5      12
# 8     3  4      34
# 10    4  5      29

回答2

创建唯一因子级别的 dataframe 并使用 as.numeric 创建 id,然后使用 match 将 values 替换为 id。

df1 <- data.frame(id = as.numeric(factor(unique(unlist(df2[c(1,2)])), levels = unique(unlist(df2[c(1,2)])))),
                  label = factor(unique(unlist(df2[c(1,2)])), levels = unique(unlist(df2[c(1,2)]))))
#  id      label
#1  1     London
#2  2  Liverpool
#3  3 Manchester
#4  4    Bristol
#5  5      Derby

df2$from <- df1$id[match(df2$from, df1$label)]
df2$to <- df1$id[match(df2$to, df1$label)]
#   from to Transit ave.pas
#1     1  2       1      10
#2     1  3       0       0
#3     1  4       1      11
#4     1  5       1      24
#5     2  3       1      40
#6     2  4       1      45
#7     2  5       1      12
#8     3  4       1      34
#9     3  5       0       9
#10    4  5       1      29

编辑:您实际上不需要转换为因子(这与@sindri_baldur 的答案非常接近):

un <- unique(unlist(df2[c(1, 2)]))
df1 <- data.frame(id = seq_along(un), label = un)
df2[c(1, 2)] <- sapply(df2[c(1, 2)], match, df1$label)

相似文章

最新文章