Reputation: 43
My JSON file looks like this
[
{
"Hugo_Symbol": "FLT3",
"HGVSp_Short": "p.T227M",
"Other": {
"Type": "Renal",
"Drug": "Sunitinib",
"Effects": "Response"
}
},
{
"Hugo_Symbol": "KRAS",
"HGVSp_Short": "p.G12D",
"Other": {
"Type": "Ovarian",
"Drug": "Cetuximab",
"Effects": "Sensitivity"
}
},
{
"Hugo_Symbol": "KRAS",
"HGVSp_Short": "p.G12D",
"Other": {
"Type": "Lung",
"Drug": "Regorafenib",
"Effects": "Sensitivity"
}
},
{
"Hugo_Symbol": "KRAS",
"HGVSp_Short": "p.G12D",
"Other": {
"Type": "Leukemia",
"Drug": "Akt",
"Effects": "Resistance"
}
},
{
"Hugo_Symbol": "KRAS",
"HGVSp_Short": "p.G12D",
"Other": {
"Type": "Lung",
"Drug": "gefitimab",
"Effects": "Sensitivity"
}
},
{
"Hugo_Symbol": "ALK",
"HGVSp_Short": "p.G845=",
"Other": {
"Type": "",
"Drug": "",
"Effects": ""
}
},
{
"Hugo_Symbol": "APC",
"HGVSp_Short": "p.Q1338*",
"Other": {
"Type": "",
"Drug": "",
"Effects": ""
}
},
{
"Hugo_Symbol": "MET",
"HGVSp_Short": "p. M1S",
"Other": {
"Type": "Eye",
"Drug": "",
"Effects": "Response"
}
}
]
I want to merge the duplicates in my JSON file
Expected output
[
{
"Hugo_Symbol": "FLT3",
"HGVSp_Short": "p.T227M",
"Other": [
{"Type": "Renal", "Drug": "Sunitinib","Effect": "Response" }
]
},
{
"Hugo_Symbol": "KRAS",
"HGVSp_Short": "p.G12D",
"Other": [
{"Type": "Ovarian","Drug": "Cetuximab","Effect": "Sensitivity"},
{"Type": "Lung","Drug": "Regorafenib","Effect": "Sensitivity"},
{"Type": "Leukemia","Drug": "Akt","Effect": "Resistance"},
{"Type": "Lung", "Drug": "gefitimab", "Effect": "Sensitivity"}
]
},
{
"Hugo_Symbol": "ALK",
"HGVSp_Short": "p.G845=",
"Other": [
{"Type": "", "Drug": "","Effect": "" }
]
},
{
"Hugo_Symbol": "APC",
"HGVSp_Short": "p.Q1338*",
"Other":[
{"Type": "","Drug": "","Effect": ""}
]
},
{
"Hugo_Symbol": "MET",
"HGVSp_Short": "p.M1S",
"OtherTumorInfo": [
{"Type": "Eye","Drug": "","Effect": "Response" }
]
}
]
My initial file is a CSV file I have converted my CSV file into JSON using R
func <- function(x) {
grps <- split(names(x), gsub("[.].*", "", names(x)))
for (nm in names(grps)) {
if (length(grps[[nm]]) > 1 || !nm %in% grps[[nm]]) {
x[[nm]] <- setNames(subset(x, select = grps[[nm]]),
gsub("^[^.]+[.]", "", grps[[nm]]))
x[,setdiff(grps[[nm]], nm)] <- NULL
}
}
for (nm in names(x)) {
if (is.data.frame(x[[nm]])) {
x[[nm]] <- func(x[[nm]])
}
}
if (any(grepl("[.]", names(x)))) func(x) else x
}
I used this function to convert my CSV file to a nested JSON file
newdf= func(x)
Converting my data frame into JSON
library(jsonlite)
json_f=toJSON(newdf,dataframe = 'rows',pretty = T)
My CSV file looks like this
| Hugo_Symbol | HGVSp_Short | Other.Type | Other.Drug | Other.Effects |
| ----------- | ----------- | ---------- | ---------- | ------------- |
|FLT3 | p.T227M | Renal | Sunitinib | Response |
|KRAS | p.G12D | Ovarian | Cetuximab | Sensitivity |
|KRAS | p.G12D | Lung | Regorafenib| Sensitivity |
|KRAS | p.G12D | Leukemia | Akt | Resistance |
|KRAS | p.G12D | Lung | gefitimab | Sensitivity |
|ALK | p.G845= | | | |
|APC | p.Q1338* | | | |
|MET | p. M1S | Eye | | Response |
Can anyone suggest me a simple way to merge my duplicates array in the JSON file in the R?
Upvotes: 1
Views: 59
Reputation: 10637
Its much easier to do the re-nesting on tables, so lets convert the json back to a tibble:
library(jsonlite)
library(tidyverse)
read_json("data.json") %>%
map(as.data.frame) %>%
bind_rows() %>%
as_tibble() %>%
nest(-Hugo_Symbol, -HGVSp_Short) %>%
mutate(data = data %>% map(~ {
colnames(.x) <- colnames(.x) %>% str_remove("^Other.")
.x
})) %>%
rename(Other = data) %>%
toJSON(pretty = T)
resulting in
[
{
"Hugo_Symbol": "FLT3",
"HGVSp_Short": "p.T227M",
"Other": [
{
"Type": "Renal",
"Drug": "Sunitinib",
"Effects": "Response"
}
]
},
{
"Hugo_Symbol": "KRAS",
"HGVSp_Short": "p.G12D",
"Other": [
{
"Type": "Ovarian",
"Drug": "Cetuximab",
"Effects": "Sensitivity"
},
{
"Type": "Lung",
"Drug": "Regorafenib",
"Effects": "Sensitivity"
},
{
"Type": "Leukemia",
"Drug": "Akt",
"Effects": "Resistance"
},
{
"Type": "Lung",
"Drug": "gefitimab",
"Effects": "Sensitivity"
}
]
},
{
"Hugo_Symbol": "ALK",
"HGVSp_Short": "p.G845="
...
Upvotes: 3