Reputation: 11829
The 'test_sessions' table
CREATE TABLE IF NOT EXISTS test_sessions (
id UInt64,
name String,
created_at DateTime
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(created_at)
ORDER BY name;
The 'test_sessions' table data
INSERT INTO test_sessions(id, name, created_at) VALUES
(1, 'start', now()),
(1, 'stop', now() + INTERVAL 1 day),
(2, 'start', now() + INTERVAL 1 HOUR );
+----+-------+---------------------+
| id | name | created_at |
+----+-------+---------------------+
| 1 | start | 2020-11-10 07:58:19 |
+----+-------+---------------------+
| 2 | start | 2020-11-10 08:58:19 |
+----+-------+---------------------+
| 1 | stop | 2020-11-11 07:58:19 |
+----+-------+---------------------+
The 'finished_sessions' materialized view
CREATE MATERIALIZED VIEW finished_sessions (
id UInt64,
start_at DateTime,
end_at DateTime
)
ENGINE = AggregatingMergeTree
PARTITION BY toYYYYMM(start_at)
ORDER BY (id)
POPULATE AS
SELECT
id,
minIf(created_at, name = 'start') AS start_at,
maxIf(created_at, name = 'stop') AS end_at
FROM test_sessions
GROUP BY id
HAVING end_at <> '1970-01-01 00:00:00';
The 'finished_sessions' materialized view data
SELECT * FROM finished_sessions;
+----+---------------------+---------------------+
| id | start_at | end_at |
+----+---------------------+---------------------+
| 1 | 2020-11-10 07:58:19 | 2020-11-11 07:58:19 |
+----+---------------------+---------------------+
Until this moment, everything works correctly: there is only 1 closed session
After the close of the second session
INSERT INTO test_sessions(id, name, created_at) VALUES
(2, 'stop', now())
Incorrect populating occurs
SELECT * from finished_sessions ORDER BY id;
+----+-------------------------------+---------------------+
| id | start_at | end_at |
+----+-------------------------------+---------------------+
| 1 | 2020-11-10 07:58:19 | 2020-11-11 07:58:19 |
+----+-------------------------------+---------------------+
| 2 | ---> 1970-01-01 00:00:00 <--- | 2020-11-10 08:06:24 |
+----+-------------------------------+---------------------+
How to fix it?
Upvotes: 0
Views: 525
Reputation: 13310
You you should use AggregateFunction or better SimpleAggregateFunction
It's impossible to partition table by AggregateFunction. Because AggregateFunction are computed during merges and merges are executed over partition.
MV is an insert trigger. https://youtu.be/ckChUkC3Pns?list=PLO3lfQbpDVI-hyw4MyqxEk3rDHw95SzxJ https://den-crane.github.io/Everything_you_should_know_about_materialized_views_commented.pdf
CREATE TABLE IF NOT EXISTS test_sessions ( id UInt64, name String, created_at DateTime ) ENGINE = MergeTree() PARTITION BY toYYYYMM(created_at) ORDER BY name; INSERT INTO test_sessions(id, name, created_at) VALUES (1, 'start', now()), (1, 'stop', now() + INTERVAL 1 day), (2, 'start', now() + INTERVAL 1 HOUR ); CREATE MATERIALIZED VIEW finished_sessions ENGINE = AggregatingMergeTree ORDER BY (id) POPULATE AS SELECT id, minStateIf(created_at, name = 'start') AS start_at, maxStateIf(created_at, name = 'stop') AS end_at FROM test_sessions GROUP BY id INSERT INTO test_sessions(id, name, created_at) VALUES (2, 'stop', now()); SELECT id, minMerge(start_at), maxMerge(end_at) FROM finished_sessions GROUP BY id Query id: d797eee4-6088-40b8-aa12-b10da62b60c5 ┌─id─┬──minMerge(start_at)─┬────maxMerge(end_at)─┐ │ 2 │ 2020-11-10 15:18:19 │ 2020-11-10 14:21:54 │ │ 1 │ 2020-11-10 14:18:19 │ 2020-11-11 14:18:19 │ └────┴─────────────────────┴─────────────────────┘
CREATE TABLE IF NOT EXISTS test_sessions ( id UInt64, name String, created_at DateTime ) ENGINE = MergeTree() PARTITION BY toYYYYMM(created_at) ORDER BY name; INSERT INTO test_sessions(id, name, created_at) VALUES (1, 'start', now()), (1, 'stop', now() + INTERVAL 1 day), (2, 'start', now() + INTERVAL 1 HOUR ); CREATE MATERIALIZED VIEW finished_sessions ( id UInt64, start_at SimpleAggregateFunction(min,DateTime), end_at SimpleAggregateFunction(max,DateTime) ) ENGINE = AggregatingMergeTree ORDER BY (id) POPULATE AS SELECT id, minIf(created_at, name = 'start') AS start_at, maxIf(created_at, name = 'stop') AS end_at FROM test_sessions GROUP BY id; INSERT INTO test_sessions(id, name, created_at) VALUES (2, 'stop', now()) optimize table finished_sessions final; SELECT id, min(start_at), max(end_at) FROM finished_sessions GROUP BY id ┌─id─┬───────min(start_at)─┬─────────max(end_at)─┐ │ 2 │ 1970-01-01 00:00:00 │ 2020-11-10 14:29:30 │ │ 1 │ 2020-11-10 14:29:15 │ 2020-11-11 14:29:15 │ └────┴─────────────────────┴─────────────────────┘
Upvotes: 1