Reputation: 21
I have 2 tables like below:
create table parent_child
(
parent_id int not null,
child_id int not null
);
INSERT INTO parent_child (parent_id, child_id) VALUES (117722, 273215);
INSERT INTO parent_child (parent_id, child_id) VALUES (117722, 117936);
INSERT INTO parent_child (parent_id, child_id) VALUES (117722, 117873);
INSERT INTO parent_child (parent_id, child_id) VALUES (117722, 123305);
INSERT INTO parent_child (parent_id, child_id) VALUES (104151, 240006);
INSERT INTO parent_child (parent_id, child_id) VALUES (104151, 240005);
INSERT INTO parent_child (parent_id, child_id) VALUES (104151, 239415);
INSERT INTO parent_child (parent_id, child_id) VALUES (104151, 239414);
INSERT INTO parent_child (parent_id, child_id) VALUES (5316, 118310);
INSERT INTO parent_child (parent_id, child_id) VALUES (5316, 130627);
INSERT INTO parent_child (parent_id, child_id) VALUES (5316, 298564);
INSERT INTO parent_child (parent_id, child_id) VALUES (5316, 118311);
INSERT INTO parent_child (parent_id, child_id) VALUES (5316, 118312);
create table child
(
child_id int not null,
tstamp datetime not null,
value float,
);
-- Parent 117722
INSERT INTO child (child_id, tstamp, value) VALUES (273215, '2021-07-14 00:00:00.000000', 29);
INSERT INTO child (child_id, tstamp, value) VALUES (117936, '2021-07-14 00:00:00.000000', 52);
INSERT INTO child (child_id, tstamp, value) VALUES (117873, '2021-07-14 00:00:00.000000', 51);
INSERT INTO child (child_id, tstamp, value) VALUES (123305, '2021-07-14 00:00:00.000000', 31);
-- Parent 104151
INSERT INTO child (child_id, tstamp, value) VALUES (240006, '2021-07-14 00:00:00.000000', 37);
INSERT INTO child (child_id, tstamp, value) VALUES (240005, '2021-07-14 00:00:00.000000', 88);
INSERT INTO child (child_id, tstamp, value) VALUES (239415, '2021-07-14 00:00:00.000000', 29);
INSERT INTO child (child_id, tstamp, value) VALUES (239414, '2021-07-14 00:00:00.000000', 19);
-- Parent 5316
INSERT INTO child (child_id, tstamp, value) VALUES (118310, '2021-07-14 00:00:00.000000', 42);
INSERT INTO child (child_id, tstamp, value) VALUES (130627, '2021-07-14 00:00:00.000000', 11);
INSERT INTO child (child_id, tstamp, value) VALUES (298564, '2021-07-14 00:00:00.000000', 36);
INSERT INTO child (child_id, tstamp, value) VALUES (118311, '2021-07-14 00:00:00.000000', 22);
INSERT INTO child (child_id, tstamp, value) VALUES (118312, '2021-07-14 00:00:00.000000', 9);
-- Parent 117722
INSERT INTO child (child_id, tstamp, value) VALUES (273215, '2021-07-14 00:05:00.000000', 72);
INSERT INTO child (child_id, tstamp, value) VALUES (117936, '2021-07-14 00:05:00.000000', 99);
INSERT INTO child (child_id, tstamp, value) VALUES (117873, '2021-07-14 00:05:00.000000', 13);
INSERT INTO child (child_id, tstamp, value) VALUES (123305, '2021-07-14 00:05:00.000000', 24);
-- Parent 104151
INSERT INTO child (child_id, tstamp, value) VALUES (240006, '2021-07-14 00:05:00.000000', 65);
INSERT INTO child (child_id, tstamp, value) VALUES (240005, '2021-07-14 00:05:00.000000', 63);
INSERT INTO child (child_id, tstamp, value) VALUES (239415, '2021-07-14 00:05:00.000000', 23);
INSERT INTO child (child_id, tstamp, value) VALUES (239414, '2021-07-14 00:05:00.000000', 15);
-- Parent 5316
INSERT INTO child (child_id, tstamp, value) VALUES (118310, '2021-07-14 00:05:00.000000', 19);
INSERT INTO child (child_id, tstamp, value) VALUES (130627, '2021-07-14 00:05:00.000000', 22);
INSERT INTO child (child_id, tstamp, value) VALUES (298564, '2021-07-14 00:05:00.000000', 47);
INSERT INTO child (child_id, tstamp, value) VALUES (118311, '2021-07-14 00:05:00.000000', 54);
INSERT INTO child (child_id, tstamp, value) VALUES (118312, '2021-07-14 00:05:00.000000', 12);
The data in child table repeats every 5 minutes for each child. That is, for each child of a parent, there will be 288 data points. And this will further repeat for each day with a different (or same) value at a data point.
(1) Find out parent_id
, date_when_count_of_value_above_30_more_than_12_times_in_a_day
, count_of_values_above_30_each_day
where value across all children of a parent exceeded 30 for more than 12 times a day and at least 3 days a week. The data points need not be consecutive. Put in other words, if the MAX(value)
across all children of a parent at a data point (e.g. 2021-07-14 00:00:00.000000) is above 30, then that is counted as one occurance for that parent on that date.
(2) Find out parent_id
, latest_datetime_in_the_week
, max_value_across_all_children_in_a_week
where the value
across all children of a parent was maximum and the datetime when it was maximum. If more than one datetime has the same max value, then pick the latest datetime.
If these two queries can be combined in a single query, that is what I want. Otherwise, these can be 2 differet queries. If single query, then the output of question (2) will repeat for each row from question (1) and that is acceptable.
A list of parent_id
s for example, WHERE parent_id IN (117722, 5316)
parent_id | date_when_count_of_value_above_30_more_than_12_times_in_a_day | count_of_values_above_30_each_day | max_value_across_all_children_in_a_week | latest_datetime_in_the_week
117722 | 2021-07-14 | 13 | 99 | 2021-07-09 16:15:00.000000
117722 | 2021-07-11 | 28 | 99 | 2021-07-09 16:15:00.000000
104151 | 2021-07-14 | 19 | 65 | 2021-07-11 18:30:00.000000
104151 | 2021-07-13 | 27 | 65 | 2021-07-11 18:30:00.000000
104151 | 2021-07-11 | 36 | 65 | 2021-07-11 18:30:00.000000
The above is just a sample output. Of course, a parent_id will not appear on the output if the number of occurances in a week is not more than 3.
Upvotes: 1
Views: 329
Reputation: 6749
Three selects, all depending on the parent id. And using Vertica's analytic limit clause: LIMIT 1 OVER()
WITH
w_p AS (
SELECT
parent_id
, child.*
FROM parent_child
JOIN child USING(child_id)
)
,
o_30_count AS (
SELECT
parent_id
, tstamp::DATE
, SUM(CASE WHEN val > 30 THEN 1 END) AS over_30_count
FROM w_p
GROUP BY 1,2
HAVING over_30_count > 12
)
,
max_per_week AS (
SELECT
parent_id
, WEEK(tstamp)
, MAX(val) AS max_per_week
FROM w_p
GROUP BY 1,2
LIMIT 1 OVER(PARTITION BY parent_id ORDER BY max_per_week DESC)
)
,
last_max_per_week AS (
SELECT
parent_id
, tstamp AS last_week_ts
FROM w_p
LIMIT 1 OVER(PARTITION BY parent_id,WEEK(tstamp) ORDER BY val DESC)
)
SELECT
o_30_count.parent_id
, over_30_count
, max_per_week
, last_week_ts
FROM o_30_count
JOIN max_per_week USING(parent_id)
JOIN last_max_per_week USING(parent_id);
Upvotes: 1
Reputation: 990
I tested this in Sybase. You can leverage the logic and convert Vertica specific. Also, For week I am taking year of week from timestamp. You can modify that as per your requirement. Like today -7 days or window based.
(1)
Select * from
(Select
p.parent_id as 'parent_id',
convert(varchar, c.tstamp, 101) as 'date_when_count_of_value_above_30_more_than_12_times_in_a_day',
count(*) as 'count_of_values_above_30_each_day '
from #parent_child p LEFT OUTER JOIN #child c
on p.child_id = c.child_id
where
c.value > 30 and
p.parent_id in (5316, 117722, 104151)
group by
convert(varchar, c.tstamp, 101), p.parent_id
having count(*) > 12) tb1
group by datepart(cwk, tb1.date_when_count_of_value_above_30_more_than_12_times_in_a_day), parent_id
having count(*) >=3
(2)
Select
parent_id as 'parent_id',
MAX(tstamp) as 'latest_datetime_in_the_week',
value as 'max_value_across_all_children_in_a_week'
from
(Select
p.parent_id,c.tstamp,c.value from #parent_child p LEFT OUTER JOIN #child c
on p.child_id = c.child_id
JOIN
(Select
MAX(c.value) as 'max_value'
from #parent_child p LEFT OUTER JOIN #child c
on p.child_id = c.child_id
where
datepart(cwk, tstamp) = 28
group by
datepart(cwk, tstamp), p.parent_id) tbl1
ON c.value = tbl1.max_value) tbl2
group by parent_id, value
Upvotes: 1
Reputation: 849
For (1):
Note: I moved parentId into the child table for readability
WITH BYDAY AS
(
SELECT COUNT(*) as CountHiValsOnDay, parent_id, CAST(tstamp as date) as tsDate, DATEPART(week, CAST(tstamp as date)) as TheWeek, YEAR(CAST(tstamp as date)) as TheYear
FROM child
WHERE value > 30
GROUP BY parent_id, CAST(tstamp as date)
HAVING COUNT(*) > 12
)
SELECT
bd1.parent_id,
bd1.tsDate,
bd1.CountHiValsOnDay,
bd2.CountThisWeek
FROM BYDAY bd1
JOIN (
SELECT COUNT(*) as CountThisWeek, jj.parent_id, jj.TheWeek, jj.TheYear
FROM BYDAY jj
GROUP BY jj.parent_id, jj.TheWeek, jj.TheYear
) bd2
ON bd2.parent_id = bd1.parent_id AND
bd2.TheWeek = bd1.TheWeek AND
bd2.TheYear = bd1.TheYear
WHERE bd2.CountThisWeek >= 3
Upvotes: 0