李家翔 2019-04-28
library(tidyverse)
library(readxl)
df <- read_excel("json2df.xlsx")
df %>%
mutate(
json_col = map(audit_context_data, ~jsonlite::fromJSON(paste0("[",.,"]")))
) -> tmp
safely(unnest)(tmp)
## $result
## NULL
##
## $error
## <Rcpp::exception: Column `col_c` can't be converted from integer to character>
注意看报错,col_c
数据类型在多个 json 格式中中,不一致
tmp[1,2] %>% pull %>% str
## List of 1
## $ :'data.frame': 1 obs. of 3 variables:
## ..$ col_a: int 22
## ..$ col_b: num 0.123
## ..$ col_c: int -9999999
tmp[2,2] %>% pull %>% str
## List of 1
## $ :'data.frame': 1 obs. of 3 variables:
## ..$ col_a: int 22
## ..$ col_b: num 1.12
## ..$ col_c: chr "610"
你可以看到第二行的样本中,col_c: chr
,因此转换的时候限定下变量类型即可。
tmp %>%
mutate(json_col = map(json_col, ~mutate_all(., as.character))) %>%
unnest()
## # A tibble: 4 x 5
## audit_context_data col_a col_b col_c col_d
## <chr> <chr> <chr> <chr> <chr>
## 1 "{\"col_a\":22,\"col_b\":0.123456789,\"col_c~ 22 0.12345~ -9999~ <NA>
## 2 "{\"col_a\":22,\"col_b\":1.123456789,\"col_c~ 22 1.12345~ 610 <NA>
## 3 "{\"col_a\":22,\"col_b\":2.123456789,\"col_d~ 22 2.12345~ -9999~ 123
## 4 "{\"col_a\":22,\"col_b\":3.123456789}" 22 3.12345~ <NA> <NA>