prav
prav

Reputation: 33

Remove duplicates in an object in JSONiq

This is an example object that I got:

{ "query1" : [ { "name" : "John", "id" : 1234 }, { "name" : "Rose", "id" : 3214 }, { "name" : "John", "id" : 1234 } ] }

How can I remove the duplicates using group by and array navigation / unboxing?

I tried implementing the group by clause after the where clause but did not get the correct answer

Upvotes: 1

Views: 131

Answers (3)

Ghislain Fourny
Ghislain Fourny

Reputation: 7279

In JSONiq, you can indeed remove duplicates with a group by and array unboxing, like so:

let $data := {
  "query1" : [
    { "name" : "John", "id" : 1234 },
    { "name" : "Rose", "id" : 3214 },
    { "name" : "John", "id" : 1234 }
  ]
}
return {
 "query1" : [
    for $obj in $data.query1[]
    group by $n := $obj.name, $i := $obj.id
    return $obj[1]
  ]
}

There is also a generic approach that will work even with unknown fields and more nested values:

let $data := {
  "query1" : [
    { "name" : "John", "id" : 1234 },
    { "name" : "Rose", "id" : 3214 },
    { "name" : "John", "id" : 1234 }
  ]
}
return {
  "query1" : [
    for $obj at $i in $data.query1[]
    where
      every $other in $data.query1[][position() lt $i]
      satisfies not deep-equal($obj, $other)
    return $obj
  ]
}

Upvotes: 0

It's possible to do this way, found the answer here:

data = {
    "query1": [
        {"name": "John", "id": 1234},
        {"name": "Rose", "id": 3214},
        {"name": "John", "id": 1234},
    ]
}

query1 = data.get('query1')

[dict(t) for t in {tuple(d.items()) for d in query1}]

Upvotes: 0

NIKUNJ KOTHIYA
NIKUNJ KOTHIYA

Reputation: 2165

For remove duplicates item from Json object you can use this code:

from collections import OrderedDict

data = {
    "query1": [
        {"name": "John", "id": 1234},
        {"name": "Rose", "id": 3214},
        {"name": "John", "id": 1234},
    ]
}

unique_data = {}
for key, array in data.items():
    unique_objects = OrderedDict()
    for obj in array:
        unique_objects[(obj["name"], obj["id"])] = obj
    unique_data[key] = list(unique_objects.values())

print(unique_data)

Result:

{'query1': [{'name': 'John', 'id': 1234}, {'name': 'Rose', 'id': 3214}]}

Upvotes: 1

Related Questions