loads json r import dataframe

loads - Obtener datos json importados en un marco de datos



json object r (5)

Tengo un archivo que contiene más de 1500 objetos json con los que quiero trabajar en R. Pude importar los datos como una lista, pero tengo problemas para forzarlos a una estructura útil. Quiero crear un marco de datos que contenga una fila para cada objeto json y una columna para cada par clave: valor.

He recreado mi situación con este conjunto de datos pequeño y falso:

[{"name":"Doe, John","group":"Red","age (y)":24,"height (cm)":182,"wieght (kg)":74.8,"score":null}, {"name":"Doe, Jane","group":"Green","age (y)":30,"height (cm)":170,"wieght (kg)":70.1,"score":500}, {"name":"Smith, Joan","group":"Yellow","age (y)":41,"height (cm)":169,"wieght (kg)":60,"score":null}, {"name":"Brown, Sam","group":"Green","age (y)":22,"height (cm)":183,"wieght (kg)":75,"score":865}, {"name":"Jones, Larry","group":"Green","age (y)":31,"height (cm)":178,"wieght (kg)":83.9,"score":221}, {"name":"Murray, Seth","group":"Red","age (y)":35,"height (cm)":172,"wieght (kg)":76.2,"score":413}, {"name":"Doe, Jane","group":"Yellow","age (y)":22,"height (cm)":164,"wieght (kg)":68,"score":902}]

Algunas características de los datos:

  • Todos los objetos contienen el mismo número de pares clave: valor, aunque algunos de los valores son nulos
  • Hay dos columnas no numéricas por objeto (nombre y grupo)
  • nombre es el identificador único, hay 10 o más grupos
  • muchos de los nombres y grupos incluyen espacios, comas y otros signos de puntuación.

Basado en esta pregunta: R list (structure (list ())) al frame de datos , intenté lo siguiente:

json_file <- "test.json" json_data <- fromJSON(json_file) asFrame <- do.call("rbind.fill", lapply(json_data, as.data.frame))

Con mis datos reales y esta información falsa, la última línea me da este error:

Error in data.frame(name = "Doe, John", group = "Red", `age (y)` = 24, : arguments imply differing number of rows: 1, 0


Esto es muy simple si usa library(jsonlite) y la función de fromJSON . También maneja los valores null y los convierte a NA .

json_file <- ''[{"name":"Doe, John","group":"Red","age (y)":24,"height (cm)":182,"wieght (kg)":74.8,"score":null}, {"name":"Doe, Jane","group":"Green","age (y)":30,"height (cm)":170,"wieght (kg)":70.1,"score":500}, {"name":"Smith, Joan","group":"Yellow","age (y)":41,"height (cm)":169,"wieght (kg)":60,"score":null}, {"name":"Brown, Sam","group":"Green","age (y)":22,"height (cm)":183,"wieght (kg)":75,"score":865}, {"name":"Jones, Larry","group":"Green","age (y)":31,"height (cm)":178,"wieght (kg)":83.9,"score":221}, {"name":"Murray, Seth","group":"Red","age (y)":35,"height (cm)":172,"wieght (kg)":76.2,"score":413}, {"name":"Doe, Jane","group":"Yellow","age (y)":22,"height (cm)":164,"wieght (kg)":68,"score":902}]'' library(jsonlite) fromJSON(json_file) # name group age (y) height (cm) wieght (kg) score # 1 Doe, John Red 24 182 74.8 NA # 2 Doe, Jane Green 30 170 70.1 500 # 3 Smith, Joan Yellow 41 169 60.0 NA # 4 Brown, Sam Green 22 183 75.0 865 # 5 Jones, Larry Green 31 178 83.9 221 # 6 Murray, Seth Red 35 172 76.2 413 # 7 Doe, Jane Yellow 22 164 68.0 902 str(fromJSON(json_file)) # ''data.frame'': 7 obs. of 6 variables: # $ name : chr "Doe, John" "Doe, Jane" "Smith, Joan" "Brown, Sam" ... # $ group : chr "Red" "Green" "Yellow" "Green" ... # $ age (y) : int 24 30 41 22 31 35 22 # $ height (cm): int 182 170 169 183 178 172 164 # $ wieght (kg): num 74.8 70.1 60 75 83.9 76.2 68 # $ score : int NA 500 NA 865 221 413 902


Para eliminar valores nulos, use el parámetro nullValue

json_data <- fromJSON(json_file, nullValue = NA) asFrame <- do.call("rbind.fill", lapply(json_data, as.data.frame))

de esta manera no habrá citas innecesarias en su salida


Solo necesitas reemplazar tus NULLs con NAs:

require(RJSONIO) json_file <- ''[{"name":"Doe, John","group":"Red","age (y)":24,"height (cm)":182,"wieght (kg)":74.8,"score":null}, {"name":"Doe, Jane","group":"Green","age (y)":30,"height (cm)":170,"wieght (kg)":70.1,"score":500}, {"name":"Smith, Joan","group":"Yellow","age (y)":41,"height (cm)":169,"wieght (kg)":60,"score":null}, {"name":"Brown, Sam","group":"Green","age (y)":22,"height (cm)":183,"wieght (kg)":75,"score":865}, {"name":"Jones, Larry","group":"Green","age (y)":31,"height (cm)":178,"wieght (kg)":83.9,"score":221}, {"name":"Murray, Seth","group":"Red","age (y)":35,"height (cm)":172,"wieght (kg)":76.2,"score":413}, {"name":"Doe, Jane","group":"Yellow","age (y)":22,"height (cm)":164,"wieght (kg)":68,"score":902}]'' json_file <- fromJSON(json_file) json_file <- lapply(json_file, function(x) { x[sapply(x, is.null)] <- NA unlist(x) })

Una vez que tiene un valor no nulo para cada elemento, puede llamar a rbind sin obtener un error:

do.call("rbind", json_file) name group age (y) height (cm) wieght (kg) score [1,] "Doe, John" "Red" "24" "182" "74.8" NA [2,] "Doe, Jane" "Green" "30" "170" "70.1" "500" [3,] "Smith, Joan" "Yellow" "41" "169" "60" NA [4,] "Brown, Sam" "Green" "22" "183" "75" "865" [5,] "Jones, Larry" "Green" "31" "178" "83.9" "221" [6,] "Murray, Seth" "Red" "35" "172" "76.2" "413" [7,] "Doe, Jane" "Yellow" "22" "164" "68" "902"


dplyr::bind_rows(fromJSON(file_name))


library(rjson) Lines <- readLines("yelp_academic_dataset_business.json") business <- as.data.frame(t(sapply(Lines, fromJSON)))

Puede intentar esto para cargar datos JSON en R