HackToHell
HackToHell

Reputation: 2393

How do I select unique key, value pairs from an array into a materialized view in Clickhouse?

The table structure is as follows,

CREATE TABLE metrics

(
    `created_date` Date,
    `created_at` DateTime,
    `host` LowCardinality(String),
    `metrics_name` LowCardinality(String),
    `tags` Nested(key LowCardinality(String), value LowCardinality(String))
)
ENGINE = MergeTree(created_date, (created_at), 8192)

How can I generate a materialized view that can have a structure similar to

Host Key Values
host Key: X Values : ["uniq1", "uniq2"]

Upvotes: 1

Views: 1564

Answers (2)

Denny Crane
Denny Crane

Reputation: 13310

MergeTree(created_date, (created_at), 8192)

This syntax is obsolete more than 3 years ago. Try to do not use it. It will be canceled eventually.

ENGINE = MergeTree partition by toYYYYMM(created_date) ORDER by (created_at)

CREATE TABLE metrics
(
    `created_date` Date,
    `created_at` DateTime,
    `host` LowCardinality(String),
    `metrics_name` LowCardinality(String),
    `tags` Nested(key LowCardinality(String), value LowCardinality(String))
)
ENGINE = MergeTree partition by toYYYYMM(created_date) ORDER by (created_at)


create materialized view metrics_mv (
     host LowCardinality(String),
     key LowCardinality(String),
     u_values SimpleAggregateFunction(groupUniqArrayArray, Array(String))
)     
Engine=AggregatingMergeTree order by (host, key) as
select host, 
       tags.key as key, 
       groupUniqArray(tags.value) as u_values
from metrics array join tags
group by host, key


insert into metrics values(today(), now(), 'h1', 'm1', ['k1','k2'], ['v1', 'v2']);
insert into metrics values(today(), now(), 'h1', 'm1', ['k1','k2'], ['v1', 'v2']);
insert into metrics values(today(), now(), 'h1', 'm1', ['k1','k3'], ['v11', 'v2']);
insert into metrics values(today(), now(), 'h2', 'm1', ['k1','k3'], ['v1', 'v22']);

optimize table  metrics_mv final;

select * from metrics_mv;

┌─host─┬─key─┬─u_values─────┐
│ h1   │ k1  │ ['v11','v1'] │
│ h1   │ k2  │ ['v2']       │
│ h1   │ k3  │ ['v2']       │
│ h2   │ k1  │ ['v1']       │
│ h2   │ k3  │ ['v22']      │
└──────┴─────┴──────────────┘

select host, key, groupUniqArrayArray(u_values) values
from metrics_mv
group by host, key

┌─host─┬─key─┬─values───────┐
│ h1   │ k2  │ ['v2']       │
│ h2   │ k3  │ ['v22']      │
│ h1   │ k3  │ ['v2']       │
│ h1   │ k1  │ ['v11','v1'] │
│ h2   │ k1  │ ['v1']       │
└──────┴─────┴──────────────┘

Upvotes: 2

vladimir
vladimir

Reputation: 15218

Consider using this MV:

CREATE MATERIALIZED VIEW metrics_mv
ENGINE = MergeTree()
ORDER BY host AS
SELECT
    host,
    tags.key key,
    groupArray(tags.value) values /* or 'groupUniqArray(tags.value) AS values' to get unique values */
FROM metrics
ARRAY JOIN tags
GROUP BY
    host,
    tags.key

Upvotes: 1

Related Questions