Mano
Mano

Reputation: 43

Merge duplicates from Json file

My JSON file looks like this

 [
  {
    "Hugo_Symbol": "FLT3",
    "HGVSp_Short": "p.T227M",
    "Other": {
      "Type": "Renal",
      "Drug": "Sunitinib",
      "Effects": "Response"
    }
  },
  {
    "Hugo_Symbol": "KRAS",
    "HGVSp_Short": "p.G12D",
    "Other": {
      "Type": "Ovarian",
      "Drug": "Cetuximab",
      "Effects": "Sensitivity"
    }
  },
  {
    "Hugo_Symbol": "KRAS",
    "HGVSp_Short": "p.G12D",
    "Other": {
      "Type": "Lung",
      "Drug": "Regorafenib",
      "Effects": "Sensitivity"
    }
  },
  {
    "Hugo_Symbol": "KRAS",
    "HGVSp_Short": "p.G12D",
    "Other": {
      "Type": "Leukemia",
      "Drug": "Akt",
      "Effects": "Resistance"
    }
  },
  {
    "Hugo_Symbol": "KRAS",
    "HGVSp_Short": "p.G12D",
    "Other": {
      "Type": "Lung",
      "Drug": "gefitimab",
      "Effects": "Sensitivity"
    }
  },
  {
    "Hugo_Symbol": "ALK",
    "HGVSp_Short": "p.G845=",
    "Other": {
      "Type": "",
      "Drug": "",
      "Effects": ""
    }
  },
  {
    "Hugo_Symbol": "APC",
    "HGVSp_Short": "p.Q1338*",
    "Other": {
      "Type": "",
      "Drug": "",
      "Effects": ""
    }
  },
  {
    "Hugo_Symbol": "MET",
    "HGVSp_Short": "p. M1S",
    "Other": {
      "Type": "Eye",
      "Drug": "",
      "Effects": "Response"
    }
  }
]

I want to merge the duplicates in my JSON file

Expected output

[
    {
      "Hugo_Symbol": "FLT3",
      "HGVSp_Short": "p.T227M",
      "Other": [ 
      {"Type": "Renal", "Drug": "Sunitinib","Effect": "Response" }
      ]
    },
    {
      "Hugo_Symbol": "KRAS",
      "HGVSp_Short": "p.G12D",
      "Other": [ 
      {"Type": "Ovarian","Drug": "Cetuximab","Effect": "Sensitivity"},
      {"Type": "Lung","Drug": "Regorafenib","Effect": "Sensitivity"},
      {"Type": "Leukemia","Drug": "Akt","Effect": "Resistance"},
      {"Type": "Lung", "Drug": "gefitimab", "Effect": "Sensitivity"}
      ]
    },
    {
      "Hugo_Symbol": "ALK",
      "HGVSp_Short": "p.G845=",
      "Other": [ 
      {"Type": "", "Drug": "","Effect": "" }
      ]
    },
    {
      "Hugo_Symbol": "APC",
      "HGVSp_Short": "p.Q1338*",
      "Other":[
       {"Type": "","Drug": "","Effect": ""}
       ]
     },
     {
      "Hugo_Symbol": "MET",
      "HGVSp_Short": "p.M1S",
      "OtherTumorInfo": [
      {"Type": "Eye","Drug": "","Effect": "Response" }
      ]
     }
  ]

My initial file is a CSV file I have converted my CSV file into JSON using R

func <- function(x) {
     grps <- split(names(x), gsub("[.].*", "", names(x)))
     for (nm in names(grps)) {
         if (length(grps[[nm]]) > 1 || !nm %in% grps[[nm]]) {
             x[[nm]] <- setNames(subset(x, select = grps[[nm]]),
                                 gsub("^[^.]+[.]", "", grps[[nm]]))
             x[,setdiff(grps[[nm]], nm)] <- NULL
         }
     }
     for (nm in names(x)) {
         if (is.data.frame(x[[nm]])) {
             x[[nm]] <- func(x[[nm]])
         }
     }
     if (any(grepl("[.]", names(x)))) func(x) else x
 }

I used this function to convert my CSV file to a nested JSON file

newdf= func(x)

Converting my data frame into JSON

library(jsonlite)
json_f=toJSON(newdf,dataframe = 'rows',pretty = T)

My CSV file looks like this

| Hugo_Symbol | HGVSp_Short | Other.Type | Other.Drug | Other.Effects |
| ----------- | ----------- | ---------- | ---------- | ------------- | 
|FLT3         | p.T227M     | Renal      | Sunitinib  | Response      |
|KRAS         | p.G12D      | Ovarian    | Cetuximab  | Sensitivity   |
|KRAS         | p.G12D      | Lung       | Regorafenib| Sensitivity   |
|KRAS         | p.G12D      | Leukemia   | Akt        | Resistance    |
|KRAS         | p.G12D      | Lung       | gefitimab  | Sensitivity   |
|ALK          | p.G845=     |            |            |               |
|APC          | p.Q1338*    |            |            |               |
|MET          | p. M1S      | Eye        |            | Response      |

Can anyone suggest me a simple way to merge my duplicates array in the JSON file in the R?

Upvotes: 1

Views: 59

Answers (1)

danlooo
danlooo

Reputation: 10637

Its much easier to do the re-nesting on tables, so lets convert the json back to a tibble:

library(jsonlite)
library(tidyverse)

read_json("data.json") %>%
  map(as.data.frame) %>%
  bind_rows() %>%
  as_tibble() %>%
  nest(-Hugo_Symbol, -HGVSp_Short) %>%
  mutate(data = data %>% map(~ {
    colnames(.x) <- colnames(.x) %>% str_remove("^Other.")
    .x
  })) %>%
  rename(Other = data) %>%
  toJSON(pretty = T)

resulting in

[
  {
    "Hugo_Symbol": "FLT3",
    "HGVSp_Short": "p.T227M",
    "Other": [
      {
        "Type": "Renal",
        "Drug": "Sunitinib",
        "Effects": "Response"
      }
    ]
  },
  {
    "Hugo_Symbol": "KRAS",
    "HGVSp_Short": "p.G12D",
    "Other": [
      {
        "Type": "Ovarian",
        "Drug": "Cetuximab",
        "Effects": "Sensitivity"
      },
      {
        "Type": "Lung",
        "Drug": "Regorafenib",
        "Effects": "Sensitivity"
      },
      {
        "Type": "Leukemia",
        "Drug": "Akt",
        "Effects": "Resistance"
      },
      {
        "Type": "Lung",
        "Drug": "gefitimab",
        "Effects": "Sensitivity"
      }
    ]
  },
  {
    "Hugo_Symbol": "ALK",
    "HGVSp_Short": "p.G845="
...

Upvotes: 3

Related Questions