Reputation: 2428
I'm working on calculating energy costs from time-series data using PostgreSQL 15 and TimescaleDB (TSL licence, so not all open source features available). The cost calculation is based on energy flow and prices over irregular intervals, and I'm not sure if I'm doing it right and how to improve the performance. I expect a lot of data coming in from streaming, but I don't need to calculate the costs continuosly (can be a nightly batch job).
The goal is to accurately calculate the costs using a formula like sum(prices.flow x prices.price_rp x delta_t)
Here is the schema setup:
CREATE TABLE nodes (
id integer PRIMARY KEY NOT NULL,
name character varying NOT NULL
);
CREATE INDEX ix_nodes_id ON nodes USING BTREE (id);
CREATE UNIQUE INDEX ix_nodes_name ON nodes USING BTREE (name);
CREATE TABLE prices (
node_id integer NOT NULL,
price_rp double precision NOT NULL,
in_flow double precision NOT NULL,
out_flow double precision NOT NULL,
timestamp timestamp with time zone NOT NULL,
inserted timestamp with time zone NOT NULL,
PRIMARY KEY (node_id, timestamp),
FOREIGN KEY (node_id) REFERENCES public.nodes (id)
);
CREATE INDEX prices_timestamp_idx ON prices USING BTREE (timestamp);
CREATE TABLE node_energy (
node_id integer NOT NULL,
status varchar NOT NULL,
value_wh integer NOT NULL,
timestamp timestamp with time zone NOT NULL,
inserted timestamp with time zone NOT NULL,
PRIMARY KEY (node_id, timestamp),
FOREIGN KEY (node_id) REFERENCES public.nodes (id)
);
CREATE INDEX node_energy_timestamp_idx ON node_energy USING BTREE (timestamp);
I enabled hypertables:
SELECT create_hypertable('node_energy', 'timestamp', if_not_exists => TRUE);
SELECT create_hypertable('prices', 'timestamp', if_not_exists => TRUE);
Here are sample data filled in:
-- populate nodes
INSERT INTO nodes (id, name) VALUES
(1, 'Node 1'),
(2, 'Node 2'),
(3, 'Node 3'),
(4, 'Node 4'),
(5, 'Node 5'),
(6, 'Node 6'),
(7, 'Node 7'),
(8, 'Node 8'),
(9, 'Node 9'),
(10, 'Node 10');
--- insert 10 x 1000 energy points
INSERT INTO node_energy (node_id, status, value_wh, timestamp, inserted)
SELECT
node_id,
CASE WHEN random() > 0.5 THEN 'SUPPLYING' ELSE 'DEMANDING' END AS status,
trunc(random() * 1000) AS value_wh,
timestamp '2024-01-01 00:00:00' + (floor((s - 1) / 30) * interval '1 day') + ((s - 1) % 30) * interval '10 minutes' AS timestamp,
now() AS inserted
FROM
generate_series(1, 10) AS node_id,
generate_series(1, 1000) AS s;
--- insert 10 x 1000 prices
INSERT INTO prices (node_id, price_rp, in_flow, out_flow, timestamp, inserted)
SELECT
node_id,
random() * 100 + 10 AS price_rp, -- Random price between 10 and 150
random() * 10 AS in_flow, -- Random in-flow rate
random() * 10 AS out_flow, -- Random out-flow rate
timestamp '2024-01-01 00:00:00' + (floor((s - 1) / 30) * interval '1 day') + ((s - 1) % 30) * interval '10 minutes' AS timestamp,
now() AS inserted
FROM
generate_series(1, 10) AS node_id,
generate_series(1, 1000) AS s;
And here is my drafted query:
WITH cost_data AS (
SELECT
ne.node_id,
ne.timestamp AS energy_time,
p.price_rp,
p.timestamp AS price_time,
(ne.value_wh / 1000.0) AS value_kwh, -- converting Wh to kWh
EXTRACT(EPOCH FROM (ne.timestamp - COALESCE(LAG(ne.timestamp) OVER (PARTITION BY ne.node_id ORDER BY ne.timestamp), ne.timestamp))) AS delta_t,
CASE WHEN ne.status = 'SUPPLYING' THEN p.out_flow
WHEN ne.status = 'DEMANDING' THEN p.in_flow
ELSE 0 END AS flow
FROM
public.node_energy ne
LEFT JOIN LATERAL
(SELECT price_rp, timestamp, in_flow, out_flow
FROM public.prices p
WHERE p.node_id = ne.node_id AND p.timestamp <= ne.timestamp
ORDER BY p.timestamp DESC
LIMIT 1) p ON true
ORDER BY
ne.timestamp
)
SELECT
node_id,
SUM(flow * price_rp * delta_t / 3600000) AS total_cost_rp -- converting seconds to hours and applying price rate
FROM
cost_data
GROUP BY
node_id
ORDER BY node_id;
The query so far does, I believe at least, what it should, however it does not scale well. How can I verify that the costs are calculated correctly, and how can I improve the performance? I would like to avoid introducing stream processing if possible, so that's why I'm looking at a SQL solution.
Upvotes: 0
Views: 68