Reputation: 1330
I have a large table containing sensor readings, and a corresponding staging table with new readings.
I am trying to filter the records I get from the main table so as to get only the relevant information for calculating the deltas.
A self-contained, simplified example which:
DROP TABLE IF EXISTS tb_sensor_reading;
DROP TABLE IF EXISTS stg_sensor_reading;
CREATE TABLE tb_sensor_reading(
sensor_id INTEGER
, ts_reading TIMESTAMP
, reading_value NUMERIC(8, 3)
);
CREATE TABLE stg_sensor_reading AS TABLE tb_sensor_reading WITH NO DATA;
SELECT * FROM stg_sensor_reading;
INSERT INTO tb_sensor_reading(
sensor_id
, ts_reading
, reading_value
)
SELECT
generated_sensor_id
, generated_ts_reading
-- readings delta may vary between +25 and +49.9 between each reading
, EXTRACT(EPOCH FROM generated_ts_reading) / (3600 * 6 / 25)
- EXTRACT(EPOCH FROM NOW() - INTERVAL '7 days') / (3600 * 6 / 25)
+ RANDOM()*24.9
FROM
GENERATE_SERIES(1,6) AS generated_sensor_id
, GENERATE_SERIES(
NOW() - INTERVAL '7 days', NOW(), INTERVAL '6 hours'
) AS generated_ts_reading
;
-- generate dummy records in stg_reading
INSERT INTO stg_sensor_reading(
sensor_id
, ts_reading
, reading_value
)
SELECT
generated_sensor_id
, generated_ts_reading
, EXTRACT(EPOCH FROM generated_ts_reading) / (3600 * 6 / 25)
- EXTRACT(EPOCH FROM NOW() - INTERVAL '7 days') / (3600 * 6 / 25)
+ RANDOM()*24.9
FROM
GENERATE_SERIES(3, 5) AS generated_sensor_id
, GENERATE_SERIES(
NOW() - INTERVAL '3 days'
, NOW() - INTERVAL '2 days'
, INTERVAL '6 hours'
) AS generated_ts_reading
;
SELECT * FROM tb_sensor_reading LIMIT 5 ;
SELECT * FROM tb_sensor_reading LIMIT 5;
sensor_id | ts_reading | reading_value
-----------+----------------------------+---------------
1 | 2022-01-03 13:25:16.417613 | 7.154
1 | 2022-01-03 19:25:16.417613 | 46.736
1 | 2022-01-04 01:25:16.417613 | 73.835
1 | 2022-01-04 07:25:16.417613 | 89.147
1 | 2022-01-04 13:25:16.417613 | 105.444
(5 rows)
SELECT * FROM stg_sensor_reading ;
sensor_id | ts_reading | reading_value
-----------+----------------------------+---------------
3 | 2022-01-07 18:40:19.978908 | 406.839
3 | 2022-01-08 00:40:19.978908 | 435.935
3 | 2022-01-08 06:40:19.978908 | 454.385
3 | 2022-01-08 12:40:19.978908 | 495.853
3 | 2022-01-08 18:40:19.978908 | 516.214
4 | 2022-01-07 18:40:19.978908 | 410.096
4 | 2022-01-08 00:40:19.978908 | 442.941
4 | 2022-01-08 06:40:19.978908 | 462.816
4 | 2022-01-08 12:40:19.978908 | 496.893
4 | 2022-01-08 18:40:19.978908 | 517.247
5 | 2022-01-07 18:40:19.978908 | 419.876
5 | 2022-01-08 00:40:19.978908 | 425.990
5 | 2022-01-08 06:40:19.978908 | 464.086
5 | 2022-01-08 12:40:19.978908 | 486.731
5 | 2022-01-08 18:40:19.978908 | 515.454
(15 rows)
I am trying to select the records from tb_sensor_readings to return:
SELECT * FROM tb_sensor_reading
WHERE sensor_id IN (
SELECT DISTINCT sensor_id from stg_sensor_reading
)
ORDER BY sensor_id, ts_reading
LIMIT 5;
sensor_id | ts_reading | reading_value
-----------+----------------------------+---------------
3 | 2022-01-03 13:25:16.417613 | 24.669
3 | 2022-01-03 19:25:16.417613 | 48.384
3 | 2022-01-04 01:25:16.417613 | 56.889
3 | 2022-01-04 07:25:16.417613 | 95.980
3 | 2022-01-04 13:25:16.417613 | 120.536
MAX(tb_sensor_reading.ts_reading) < MIN(stg_sensor_reading.ts_reading)
OR tb_sensor_reading.ts_reading > MIN(stg_sensor_reading.ts_reading)
ON tb_sensor_reading.sensor_id = stg_sensor_reading. sensor_id
How can I :
Thank a lot!
Upvotes: 1
Views: 64
Reputation: 1330
I could select the desired rows from tb_reading by using a UNION operator.
The first query gets the last reading for each sensor_id from tb_reading, which occured prior to the first of the new readings.
The second subquery returns the readings from tb_sensor_reading, for each sensor_id, which happened during the same time period of the readings available in the staging table.
the WHERE tb.sensor_id = stg.sensor_id is not worth the cost if the role of the query is to limit the number of rows from which to calculate the deltas.
(SELECT -- most recent previous reading for each sensor_id
DISTINCT ON (sensor_id)
sensor_id
, ts_reading
, reading_value
FROM
tb_sensor_reading tb
WHERE sensor_id IN (SELECT DISTINCT sensor_id FROM stg_sensor_reading)
AND ts_reading < ALL(
SELECT MIN(ts_reading)
FROM stg_sensor_reading stg
WHERE tb.sensor_id = stg.sensor_id
) ORDER BY sensor_id, ts_reading DESC
)
UNION
SELECT -- all readings that happened in the same period than stg
sensor_id
, ts_reading
, reading_value
FROM tb_sensor_reading tb
WHERE
sensor_id IN (SELECT DISTINCT sensor_id FROM stg_sensor_reading)
AND ts_reading >= ALL(
SELECT MIN(ts_reading)
FROM stg_sensor_reading stg
WHERE tb.sensor_id = stg.sensor_id
)
AND ts_reading <= ALL(
SELECT MAX(ts_reading)
FROM stg_sensor_reading stg
WHERE tb.sensor_id = stg.sensor_id
)
ORDER BY sensor_id, ts_reading DESC ;
sensor_id | ts_reading | reading_value
-----------+----------------------------+---------------
3 | 2022-01-08 18:40:19.973512 | 514.387
3 | 2022-01-08 12:40:19.973512 | 490.200
3 | 2022-01-08 06:40:19.973512 | 468.323
3 | 2022-01-08 00:40:19.973512 | 438.597
3 | 2022-01-07 18:40:19.973512 | 420.665
4 | 2022-01-08 18:40:19.973512 | 524.803
4 | 2022-01-08 12:40:19.973512 | 497.348
4 | 2022-01-08 06:40:19.973512 | 468.300
4 | 2022-01-08 00:40:19.973512 | 430.468
4 | 2022-01-07 18:40:19.973512 | 411.204
5 | 2022-01-08 18:40:19.973512 | 504.969
5 | 2022-01-08 12:40:19.973512 | 482.629
5 | 2022-01-08 06:40:19.973512 | 473.075
5 | 2022-01-08 00:40:19.973512 | 432.396
5 | 2022-01-07 18:40:19.973512 | 405.156
(15 rows)
Upvotes: 1