Reputation: 2486
I have data with the following schema in ClickHouse:
CREATE TABLE table AS (
key String,
…
nested Nested (
key String,
value String
)
) …
Some example data:
key | … | nested |
----|---|-------------------------------|
k1 | | [{"key": "a", "value": "1"}] |
k1 | | [{"key": "a", "value": "2"}] |
k1 | | [{"key": "a", "value": "1"}, |
| | "key": "a", "value": "2"}] |
k1 | | [{"key": "b", "value": "3" |
I want to group by the key and collect all the distinct key-value pairs into two arrays:
key | nested.key | nested.value |
------|-----------------|------------------|
k1 | ["a", "a", "b"] | ["1", "2", "3"] |
What is the simplest and most efficient way to do this in ClickHouse?
Upvotes: 3
Views: 3341
Reputation: 15218
I would suggest this query:
SELECT DISTINCT
key,
arrayDistinct(groupArray((nested.key, nested.value))) AS distinctNested,
arrayMap(x -> (x.1), distinctNested) AS `nested.keys`,
arrayMap(x -> (x.2), distinctNested) AS `nested.values`
FROM test.table_002
ARRAY JOIN nested
GROUP BY key
/* Result
┌─key─┬─distinctNested──────────────────┬─nested.keys───┬─nested.values─┐
│ k1 │ [('a','1'),('a','2'),('b','3')] │ ['a','a','b'] │ ['1','2','3'] │
└─────┴─────────────────────────────────┴───────────────┴───────────────┘
*/
/* Test data preparing */
CREATE TABLE test.table_002 (
key String,
nested Nested (key String, value String)
) ENGINE = Memory;
INSERT INTO test.table_002
FORMAT JSONEachRow
{"key": "k1", "nested.key":["a"], "nested.value": ["1"]}
{"key": "k1", "nested.key":["a"], "nested.value": ["2"]}
{"key": "k1", "nested.key":["a", "a"], "nested.value": ["1", "2"]}
{"key": "k1", "nested.key":["b"], "nested.value": ["3"]}
Upvotes: 6