r - 给我一个更好的方法!如何在 R 中取消嵌套重度嵌套列表

我将首先说明我有工作代码,但它效率低下且笨拙令人尴尬。我希望社区中的某个人能够向我展示一种更好的方法来取消嵌套这个严重嵌套的列表。

作为背景,nfts 上的事务数据是高度嵌套的。我只是想得到一个数据框,最终下降到日常水平。我已经设法让代码为 totalPriceUSD 字段工作,但正如我所提到的,它很笨拙。

library(dplyr)
library(tidyr)
library(rlist)
library(jsonlite)

mydata <- fromJSON("https://api2.cryptoslam.io/api/nft-indexes/NFTGlobal")

#attempt at nested extraction
mydata <- rlist::list.flatten(mydata) %>% dplyr::bind_rows()
mydata <- select(mydata1, contains("totalPriceUSD"))
mydata <- select(mydata1, contains("daily"))

#change row name
rownames(mydata) <- "totalPriceUSD"
names(mydata) <- substring(names(mydata),24,33)

#change col names
names(mydata) <- format(as.Date(names(mydata), format = "%Y-%m-%d"))

mydata1 <- mydata %>%
  gather(date, totalPriceUSD)

mydata <- as.data.frame(mydata)
mydata$date <- as.Date(mydata$date, format = "%Y-%m-%d")

正如我所说,它有效,但它并不漂亮。有什么改进的建议吗?

非常感谢

回答1

library(dplyr)
mydata <- jsonlite::fromJSON("https://api2.cryptoslam.io/api/nft-indexes/NFTGlobal")
monthly <- bind_rows(lapply(mydata, `[[`, "monthlySummary"), .id = "monthly_id")
daily <- bind_rows(lapply(mydata, function(z) bind_rows(z[["dailySummaries"]], .id = "daily_id")), .id = "monthly_id")

monthly
# # A tibble: 60 x 6
#    monthly_id totalTransactions uniqueBuyers uniqueSellers totalPriceUSD isRollingHoursData
#    <chr>                  <int>        <int>         <int>         <dbl> <lgl>             
#  1 2017-06                  193           33            32        11570. FALSE             
#  2 2017-07                  613           61            57        89111. FALSE             
#  3 2017-08                  113           36            31        15133. FALSE             
#  4 2017-09                   63           22            19         5154. FALSE             
#  5 2017-10                   52           17            11         3041. FALSE             
#  6 2017-11                 7259         1077           508        72760. FALSE             
#  7 2017-12               265412        53406         23137     18804813. FALSE             
#  8 2018-01                30693         7682          4582      1360558. FALSE             
#  9 2018-02                34177         4142          4364      2931369. FALSE             
# 10 2018-03                29051         3752          2784       987256. FALSE             
# # ... with 50 more rows

daily
# # A tibble: 1,750 x 7
#    monthly_id daily_id            totalTransactions uniqueBuyers uniqueSellers totalPriceUSD isRollingHoursData
#    <chr>      <chr>                           <int>        <int>         <int>         <dbl> <lgl>             
#  1 2017-06    2017-06-23T00:00:00                27            9             6         1456. FALSE             
#  2 2017-06    2017-06-24T00:00:00                15            7             8          846. FALSE             
#  3 2017-06    2017-06-25T00:00:00                15            7             5          594. FALSE             
#  4 2017-06    2017-06-26T00:00:00                23           10            12         1076. FALSE             
#  5 2017-06    2017-06-27T00:00:00                35            8            15         2091. FALSE             
#  6 2017-06    2017-06-28T00:00:00                15            6             5         1431. FALSE             
#  7 2017-06    2017-06-29T00:00:00                41           13            11         2302. FALSE             
#  8 2017-06    2017-06-30T00:00:00                22           11             7         1775. FALSE             
#  9 2017-07    2017-07-01T00:00:00                12            7            10         3727. FALSE             
# 10 2017-07    2017-07-02T00:00:00                34           13            12         3117. FALSE             
# # ... with 1,740 more rows

回答2

使用 rrapply() + unnest_wider() 替代@r2evans 的答案。这也应该推广到任意级别的嵌套。

library(tidyr)
library(jsonlite)
library(rrapply)

mydata <- fromJSON("https://api2.cryptoslam.io/api/nft-indexes/NFTGlobal")

monthly <- rrapply(mydata, classes = "list", condition = \(x, .xname) .xname == "monthlySummary", how = "melt") |>
  unnest_wider(value)

daily <- rrapply(mydata, classes = "list", condition = \(x, .xparents) "dailySummaries" %in% head(.xparents, -1), how = "melt") |>
  unnest_wider(value) 

monthly
#> # A tibble: 60 × 9
#>    L1      L2          totalTransactio… uniqueBuyers uniqueSellers totalPriceUSD
#>    <chr>   <chr>                  <int>        <int>         <int>         <dbl>
#>  1 2017-06 monthlySum…              193           33            32        11570.
#>  2 2017-07 monthlySum…              613           61            57        89111.
#>  3 2017-08 monthlySum…              113           36            31        15133.
#>  4 2017-09 monthlySum…               63           22            19         5154.
#>  5 2017-10 monthlySum…               52           17            11         3041.
#>  6 2017-11 monthlySum…             7259         1077           508        72760.
#>  7 2017-12 monthlySum…           265412        53406         23137     18804813.
#>  8 2018-01 monthlySum…            30693         7682          4582      1360558.
#>  9 2018-02 monthlySum…            34177         4142          4364      2931369.
#> 10 2018-03 monthlySum…            29051         3752          2784       987256.
#> # … with 50 more rows, and 3 more variables: isRollingHoursData <lgl>,
#> #   productNames <lgl>, productNamesWithoutAnySale <lgl>

daily
#> # A tibble: 1,750 × 10
#>    L1      L2    L3    totalTransactio… uniqueBuyers uniqueSellers totalPriceUSD
#>    <chr>   <chr> <chr>            <int>        <int>         <int>         <dbl>
#>  1 2017-06 dail… 2017…               27            9             6         1456.
#>  2 2017-06 dail… 2017…               15            7             8          846.
#>  3 2017-06 dail… 2017…               15            7             5          594.
#>  4 2017-06 dail… 2017…               23           10            12         1076.
#>  5 2017-06 dail… 2017…               35            8            15         2091.
#>  6 2017-06 dail… 2017…               15            6             5         1431.
#>  7 2017-06 dail… 2017…               41           13            11         2302.
#>  8 2017-06 dail… 2017…               22           11             7         1775.
#>  9 2017-07 dail… 2017…               12            7            10         3727.
#> 10 2017-07 dail… 2017…               34           13            12         3117.
#> # … with 1,740 more rows, and 3 more variables: isRollingHoursData <lgl>,
#> #   productNames <lgl>, productNamesWithoutAnySale <lgl>

相似文章

最新文章