Reputation: 291
I have got a json in my base:
{"a":1,"b":2,"c":[{"d":3,"e":"str_1"}, {"d":4,"e":"str_2"}]}
I need to get all unique values for every key, but I have some problems with extracting values for key 'd' and key 'e'.
Using:
SELECT
DISTINCT JSONExtractRaw(column, 'c')
FROM t1
I get:
[{"d":3,"e":"str_1"},
{"d":4,"e":"str_2"}]
But if I use JsonExtract variety again for key 'd' and key 'e' it returns nothing. How to solve this problem?
Upvotes: 6
Views: 25067
Reputation: 15226
If needed I would use 'safe' query like this that correctly processed unordered members and missed ones. This way is not pretty fast but reliable.
SELECT
json,
a_and_b,
d_uniq_values,
e_uniq_values
FROM (
SELECT
json,
JSONExtract(json, 'Tuple(a Nullable(Int32), b Nullable(Int32))') a_and_b,
JSONExtractRaw(json, 'c') c_json,
range(JSONLength(c_json)) AS array_indices,
arrayDistinct(arrayMap(i -> JSONExtractInt(c_json, i + 1, 'd'), array_indices)) AS d_uniq_values,
arrayDistinct(arrayMap(i -> JSONExtractString(c_json, i + 1, 'e'), array_indices)) AS e_uniq_values
FROM
(
/* test data */
SELECT arrayJoin([
'{}',
'{"a":1,"b":2}',
'{"b":1,"a":2}',
'{"b":1}',
'{"a":1,"b":2,"c":[]}',
'{"a":1,"b":2,"c":[{"d":3,"e":"str_1"}, {"d":4,"e":"str_2"}]}',
'{"b":1,"a":2,"c":[{"e":"3","d":1}, {"e":"4","d":2}]}',
'{"a":1,"b":2,"c":[{"d":3,"e":"str_1"}, {"d":4,"e":"str_2"}, {"d":3,"e":"str_1"}, {"d":4,"e":"str_1"}, {"d":7,"e":"str_9"}]}'
]) AS json
))
FORMAT Vertical;
/* Result:
Row 1:
──────
json: {}
a_and_b: (NULL,NULL)
d_uniq_values: []
e_uniq_values: []
Row 2:
──────
json: {"a":1,"b":2}
a_and_b: (1,2)
d_uniq_values: []
e_uniq_values: []
Row 3:
──────
json: {"b":1,"a":2}
a_and_b: (2,1)
d_uniq_values: []
e_uniq_values: []
Row 4:
──────
json: {"b":1}
a_and_b: (NULL,1)
d_uniq_values: []
e_uniq_values: []
Row 5:
──────
json: {"a":1,"b":2,"c":[]}
a_and_b: (1,2)
d_uniq_values: []
e_uniq_values: []
Row 6:
──────
json: {"a":1,"b":2,"c":[{"d":3,"e":"str_1"}, {"d":4,"e":"str_2"}]}
a_and_b: (1,2)
d_uniq_values: [3,4]
e_uniq_values: ['str_1','str_2']
Row 7:
──────
json: {"b":1,"a":2,"c":[{"e":"3","d":1}, {"e":"4","d":2}]}
a_and_b: (2,1)
d_uniq_values: [1,2]
e_uniq_values: ['3','4']
Row 8:
──────
json: {"a":1,"b":2,"c":[{"d":3,"e":"str_1"}, {"d":4,"e":"str_2"}, {"d":3,"e":"str_1"}, {"d":4,"e":"str_1"}, {"d":7,"e":"str_9"}]}
a_and_b: (1,2)
d_uniq_values: [3,4,7]
e_uniq_values: ['str_1','str_2','str_9']
*/
Upvotes: 7
Reputation: 13350
WITH
(
SELECT '{"a":1,"b":2,"c":[{"d":3,"e":"str_1"}, {"d":4,"e":"str_2"}]}'
) AS j
SELECT JSONExtract(j, 'c', 'Array(Tuple(Int64,String))')
┌─JSONExtract(j, 'c', 'Array(Tuple(Int64,String))')─┐
│ [(3,'str_1'),(4,'str_2')] │
└───────────────────────────────────────────────────┘
WITH
(
SELECT '{"a":1,"b":2,"c":[{"d":3,"e":"str_1"}, {"d":4,"e":"str_2"}, {"d":4,"e":"str_2"}]}'
) AS j
SELECT
JSONExtract(j, 'c', 'Array(Tuple(Int64,String))') AS t,
arrayReduce('groupUniqArray', arrayMap(x -> (x.1), t)) AS d,
arrayReduce('groupUniqArray', arrayMap(x -> (x.2), t)) AS e
┌─t─────────────────────────────────────┬─d─────┬─e─────────────────┐
│ [(3,'str_1'),(4,'str_2'),(4,'str_2')] │ [4,3] │ ['str_2','str_1'] │
└───────────────────────────────────────┴───────┴───────────────────┘
Upvotes: 4