cetver
cetver

Reputation: 11829

Incorrect populating of materialized view

The 'test_sessions' table

CREATE TABLE IF NOT EXISTS test_sessions (
    id UInt64,
    name String,
    created_at DateTime
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(created_at)
ORDER BY name;

The 'test_sessions' table data

INSERT INTO test_sessions(id, name, created_at) VALUES
(1, 'start', now()),
(1, 'stop',  now() + INTERVAL 1 day),
(2, 'start',  now() + INTERVAL 1 HOUR );

+----+-------+---------------------+
| id | name  | created_at          |
+----+-------+---------------------+
| 1  | start | 2020-11-10 07:58:19 |
+----+-------+---------------------+
| 2  | start | 2020-11-10 08:58:19 |
+----+-------+---------------------+
| 1  | stop  | 2020-11-11 07:58:19 |
+----+-------+---------------------+

The 'finished_sessions' materialized view

CREATE MATERIALIZED VIEW finished_sessions (
    id UInt64,
    start_at DateTime,
    end_at DateTime
)
ENGINE = AggregatingMergeTree
PARTITION BY toYYYYMM(start_at)
ORDER BY (id)
POPULATE AS
SELECT
    id,
    minIf(created_at, name = 'start') AS start_at,
    maxIf(created_at, name = 'stop')  AS end_at
FROM test_sessions
GROUP BY id
HAVING end_at <> '1970-01-01 00:00:00';

The 'finished_sessions' materialized view data

SELECT * FROM finished_sessions;

+----+---------------------+---------------------+
| id | start_at            | end_at              |
+----+---------------------+---------------------+
| 1  | 2020-11-10 07:58:19 | 2020-11-11 07:58:19 |
+----+---------------------+---------------------+

Until this moment, everything works correctly: there is only 1 closed session

After the close of the second session

INSERT INTO test_sessions(id, name, created_at) VALUES
(2, 'stop', now())

Incorrect populating occurs

SELECT * from finished_sessions ORDER BY id;

+----+-------------------------------+---------------------+
| id | start_at                      | end_at              |
+----+-------------------------------+---------------------+
| 1  | 2020-11-10 07:58:19           | 2020-11-11 07:58:19 |
+----+-------------------------------+---------------------+
| 2  | ---> 1970-01-01 00:00:00 <--- | 2020-11-10 08:06:24 |
+----+-------------------------------+---------------------+

How to fix it?

Upvotes: 0

Views: 525

Answers (1)

Denny Crane
Denny Crane

Reputation: 13310

  1. You you should use AggregateFunction or better SimpleAggregateFunction

  2. It's impossible to partition table by AggregateFunction. Because AggregateFunction are computed during merges and merges are executed over partition.

  3. MV is an insert trigger. https://youtu.be/ckChUkC3Pns?list=PLO3lfQbpDVI-hyw4MyqxEk3rDHw95SzxJ https://den-crane.github.io/Everything_you_should_know_about_materialized_views_commented.pdf

    CREATE TABLE IF NOT EXISTS test_sessions (
        id UInt64,
        name String,
        created_at DateTime
    )
    ENGINE = MergeTree()
    PARTITION BY toYYYYMM(created_at)
    ORDER BY name;
    
    INSERT INTO test_sessions(id, name, created_at) VALUES
    (1, 'start', now()),
    (1, 'stop',  now() + INTERVAL 1 day),
    (2, 'start',  now() + INTERVAL 1 HOUR );
    
    CREATE MATERIALIZED VIEW finished_sessions
    ENGINE = AggregatingMergeTree
    ORDER BY (id)
    POPULATE AS
    SELECT
        id, 
        minStateIf(created_at, name = 'start') AS start_at,
        maxStateIf(created_at, name = 'stop')  AS end_at
    FROM test_sessions
    GROUP BY id
    
    INSERT INTO test_sessions(id, name, created_at) VALUES
    (2, 'stop', now());
    
    SELECT
        id,
        minMerge(start_at),
        maxMerge(end_at)
    FROM finished_sessions
    GROUP BY id
    
    Query id: d797eee4-6088-40b8-aa12-b10da62b60c5
    
    ┌─id─┬──minMerge(start_at)─┬────maxMerge(end_at)─┐
    │  2 │ 2020-11-10 15:18:19 │ 2020-11-10 14:21:54 │
    │  1 │ 2020-11-10 14:18:19 │ 2020-11-11 14:18:19 │
    └────┴─────────────────────┴─────────────────────┘
CREATE TABLE IF NOT EXISTS test_sessions (
    id UInt64,
    name String,
    created_at DateTime
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(created_at)
ORDER BY name;

INSERT INTO test_sessions(id, name, created_at) VALUES
(1, 'start', now()),
(1, 'stop',  now() + INTERVAL 1 day),
(2, 'start',  now() + INTERVAL 1 HOUR );

CREATE MATERIALIZED VIEW finished_sessions
(
    id UInt64,
    start_at SimpleAggregateFunction(min,DateTime),
    end_at SimpleAggregateFunction(max,DateTime)
)
ENGINE = AggregatingMergeTree
ORDER BY (id)
POPULATE AS
SELECT
    id, 
    minIf(created_at, name = 'start') AS start_at,
    maxIf(created_at, name = 'stop')  AS end_at
FROM test_sessions
GROUP BY id;


INSERT INTO test_sessions(id, name, created_at) VALUES
(2, 'stop', now())

optimize table finished_sessions final;


SELECT
        id,
        min(start_at),
        max(end_at)
    FROM finished_sessions
    GROUP BY id

┌─id─┬───────min(start_at)─┬─────────max(end_at)─┐
│  2 │ 1970-01-01 00:00:00 │ 2020-11-10 14:29:30 │
│  1 │ 2020-11-10 14:29:15 │ 2020-11-11 14:29:15 │
└────┴─────────────────────┴─────────────────────┘    

Upvotes: 1

Related Questions