fruits <- c("apple", "orange", "pear")
df <- data.frame(string = c("appleorange",
"orangepear",
"applepear"))
期望的结果:
string | ||
---|---|---|
appleorange | apple | orange |
orangepear | orange | pear |
applepear | apple | pear |
回答1
这是使用正则表达式和 sub
的一种方法:
regex <- paste0("(?:", paste(fruits, collapse="|"), ")")
df$col1 <- sub(paste0(regex, "$"), "", df$string)
df$col2 <- sub(paste0("^", regex), "", df$string)
df
string col1 col2
1 appleorange apple orange
2 orangepear orange pear
3 applepear apple pear
数据:
fruits <- c("apple", "orange", "pear")
df <- data.frame(string = c("appleorange", "orangepear", "applepear"))
回答2
这是使用 stringr
包的解决方案:
library(dplyr)
library(stringr)
df %>%
mutate(col1 = str_extract(string, paste(fruits, collapse = '|')),
col2 = str_replace(string, col1, ''))
string col1 col2
1 appleorange apple orange
2 orangepear orange pear
3 applepear apple pear
回答3
使用 separate
library(dplyr)
library(stringr)
library(tidyr)
separate(df, string, into = c("col1", "col2"),
sep = glue::glue("(?<=[a-z])(?={str_c(fruits, collapse='|')})"), remove = FALSE)
string col1 col2
1 appleorange apple orange
2 orangepear orange pear
3 applepear apple pear