Reputation: 49
I am trying to figure out how to group a query result where the final Grouping should happen where the time difference is less than let's say one minute.
I have watermeter that logs my water usage and I am trying to group the results so that the graphs will make more sense. My sql queries for grouping the water usage per Year, Month, Day and hour are perfect, but then I would like to drill down to where the final result shows me a grouping where as an example I water the grass.
My Table Structure looks like:
id liter total_liters date time dater
9 3 184 2020/12/06 16:14:58 2020/12/06 16:14
10 1 185 2020/12/06 16:15:04 2020/12/06 16:15
11 3 188 2020/12/06 16:26:49 2020/12/06 16:26
12 2 190 2020/12/06 16:26:55 2020/12/06 16:26
13 2 192 2020/12/06 16:27:01 2020/12/06 16:27
14 1 193 2020/12/06 17:32:16 2020/12/06 17:32
15 1 194 2020/12/06 17:32:22 2020/12/06 17:32
16 1 195 2020/12/06 17:32:28 2020/12/06 17:32
17 1 196 2020/12/06 17:32:35 2020/12/06 17:32
18 1 197 2020/12/06 17:32:41 2020/12/06 17:32
19 1 198 2020/12/06 17:32:47 2020/12/06 17:32
20 1 199 2020/12/06 17:32:53 2020/12/06 17:32
21 1 200 2020/12/06 17:32:59 2020/12/06 17:32
22 1 201 2020/12/06 17:35:05 2020/12/06 17:35
23 1 202 2020/12/06 17:35:17 2020/12/06 17:35
24 1 203 2020/12/06 17:35:23 2020/12/06 17:35
25 1 204 2020/12/06 17:35:29 2020/12/06 17:35
26 1 205 2020/12/06 17:35:41 2020/12/06 17:35
27 1 206 2020/12/06 17:43:05 2020/12/06 17:43
28 3 209 2020/12/06 17:43:11 2020/12/06 17:43
29 2 211 2020/12/06 17:43:17 2020/12/06 17:43
30 2 213 2020/12/06 17:43:23 2020/12/06 17:43
31 2 215 2020/12/06 17:43:29 2020/12/06 17:43
32 3 218 2020/12/06 17:43:36 2020/12/06 17:43
33 2 220 2020/12/06 17:43:42 2020/12/06 17:43
And my current query looks like:
SELECT DATE_FORMAT(dater,'%H:%i') AS dater,
YEAR(dater),
MONTHNAME(dater),
DAY(dater),
HOUR(dater),
MINUTE(dater),
SUM(liter) as liter
FROM watermeter
WHERE date LIKE '2020-12-08'
GROUP BY YEAR(date), MONTHNAME(date), DAY(dater), HOUR(dater), MINUTE(dater)
ORDER BY id ASC`
The result should be to sum the Liters together by grouping them by Year then Month then Day then Hour and then it should group the results where the time difference is less than 60 seconds.
I might end up by grouping them Year, Month, Day and then by time difference is less than 60 seconds .
Like
2020-12-06 17:35:05 5 Liters
2020-12-06 17:43:05 13 Liters
Here is a phpmyaddmin sql dump if it helps
-- phpMyAdmin SQL Dump
-- version 4.6.6deb5
-- https://www.phpmyadmin.net/
--
-- Host: localhost:3306
-- Generation Time: Dec 10, 2020 at 07:27 AM
-- Server version: 10.3.17-MariaDB-0+deb10u1
-- PHP Version: 7.3.11-1~deb10u1
SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
SET time_zone = "+00:00";
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!40101 SET NAMES utf8mb4 */;
--
-- Database: `mysensors`
--
-- --------------------------------------------------------
--
-- Table structure for table `watermeter`
--
CREATE TABLE `watermeter` (
`id` int(10) NOT NULL,
`liter` int(11) NOT NULL,
`total_liters` int(11) NOT NULL,
`date` date NOT NULL,
`time` time NOT NULL,
`dater` datetime NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
--
-- Dumping data for table `watermeter`
--
INSERT INTO `watermeter` (`id`, `liter`, `total_liters`, `date`, `time`, `dater`) VALUES
(9, 3, 184, '2020-12-06', '16:14:58', '2020-12-06 16:14:58'),
(10, 1, 185, '2020-12-06', '16:15:04', '2020-12-06 16:15:04'),
(11, 3, 188, '2020-12-06', '16:26:49', '2020-12-06 16:26:49'),
(12, 2, 190, '2020-12-06', '16:26:55', '2020-12-06 16:26:55'),
(13, 2, 192, '2020-12-06', '16:27:01', '2020-12-06 16:27:01'),
(14, 1, 193, '2020-12-06', '17:32:16', '2020-12-06 17:32:16'),
(15, 1, 194, '2020-12-06', '17:32:22', '2020-12-06 17:32:22'),
(16, 1, 195, '2020-12-06', '17:32:28', '2020-12-06 17:32:28'),
(17, 1, 196, '2020-12-06', '17:32:35', '2020-12-06 17:32:35'),
(18, 1, 197, '2020-12-06', '17:32:41', '2020-12-06 17:32:41'),
(19, 1, 198, '2020-12-06', '17:32:47', '2020-12-06 17:32:47'),
(20, 1, 199, '2020-12-06', '17:32:53', '2020-12-06 17:32:53'),
(21, 1, 200, '2020-12-06', '17:32:59', '2020-12-06 17:32:59'),
(22, 1, 201, '2020-12-06', '17:35:05', '2020-12-06 17:35:05'),
(23, 1, 202, '2020-12-06', '17:35:17', '2020-12-06 17:35:17'),
(24, 1, 203, '2020-12-06', '17:35:23', '2020-12-06 17:35:23'),
(25, 1, 204, '2020-12-06', '17:35:29', '2020-12-06 17:35:29'),
(26, 1, 205, '2020-12-06', '17:35:41', '2020-12-06 17:35:41'),
(27, 1, 206, '2020-12-06', '17:43:05', '2020-12-06 17:43:05'),
(28, 3, 209, '2020-12-06', '17:43:11', '2020-12-06 17:43:11'),
(29, 2, 211, '2020-12-06', '17:43:17', '2020-12-06 17:43:17'),
(30, 2, 213, '2020-12-06', '17:43:23', '2020-12-06 17:43:23'),
(31, 2, 215, '2020-12-06', '17:43:29', '2020-12-06 17:43:29'),
(32, 3, 218, '2020-12-06', '17:43:36', '2020-12-06 17:43:36'),
(33, 2, 220, '2020-12-06', '17:43:42', '2020-12-06 17:43:42');
--
-- Indexes for dumped tables
--
--
-- Indexes for table `watermeter`
--
ALTER TABLE `watermeter`
ADD PRIMARY KEY (`id`),
ADD KEY `dater` (`dater`);
--
-- AUTO_INCREMENT for dumped tables
--
--
-- AUTO_INCREMENT for table `watermeter`
--
ALTER TABLE `watermeter`
MODIFY `id` int(10) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=1061;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
UPDATE 1.
Making little progress I think - Totals are not right yet.
SELECT '(a.dater, b.dater)', DATE_FORMAT(a.dater,'%H:%i') AS dater,
YEAR(a.dater),
MONTHNAME(a.dater),
DAY(a.dater),
HOUR(a.dater),
MINUTE(a.dater),
a.time,
SUM(a.liter) as liter
FROM watermeter a
INNER JOIN watermeter b
ON b.dater >= a.dater
WHERE b.dater <= DATE_ADD(a.dater, INTERVAL 60 SECOND)
AND a.date LIKE '2020-12-08' GROUP BY YEAR(a.date), MONTHNAME(a.date), DAY(a.dater), HOUR(a.dater), MINUTE(a.dater)
ORDER BY a.id ASC
Update2
So Update one does not give me the correct result. Tried now the following that I got from : MySQL GROUP BY DateTime +/- 3 seconds but also no joy yet.
SELECT COUNT(liter),DAY(dater),HOUR(dater),MINUTE(dater)
FROM watermeter
JOIN (SELECT watermeter.id, MAX(S.dater) AS ChainStartTime
FROM watermeter
JOIN (SELECT DISTINCT a.dater
FROM watermeter a
LEFT JOIN watermeter b
ON (b.dater >= a.dater - INTERVAL 60 SECOND
AND b.dater < a.dater)
WHERE b.dater IS NULL
AND a.date LIKE '2020-12-06') S
ON watermeter.dater >= S.dater
GROUP BY watermeter.id) GroupingQuery
ON watermeter.id = GroupingQuery.id
GROUP BY GroupingQuery.ChainStartTime
Upvotes: 1
Views: 996
Reputation: 49
With the help of Luuk's code and learning a lot about "case" and "lag" and lead" and using nested selects etc. I was able to get a working query for what I wanted.
SET @wgroup := 0;
with cte as (
select
id,
dater,
liter,
total_liters,
d1,
abs(time_to_sec(d1)) as g1,
case when abs(time_to_sec(d1))>60 then @wgroup := @wgroup+1 else @wgroup end as wgroup
from (
select
id,
liter,
total_liters,
dater,
(case
WHEN lag(dater) over (order by dater) IS NULL
THEN timediff(dater, lead(dater) over (order by dater))
ELSE timediff(dater, lag(dater) over (order by dater))
END) AS d1
from watermeter where date like '2020-12-06'
order by dater
) tmp1
)
(select dater,
wgroup,
SUM(liter)
from cte
GROUP BY wgroup)
;
With this I was able to sum all values where the time difference is less than 60 second from without loosing a single line.
+---------------------+--------+------------+
| dater | wgroup | SUM(liter) |
+---------------------+--------+------------+
| 2020-12-06 16:14:58 | 0 | 4 |
| 2020-12-06 16:26:49 | 1 | 7 |
| 2020-12-06 17:32:16 | 2 | 8 |
| 2020-12-06 17:35:05 | 3 | 5 |
| 2020-12-06 17:43:05 | 4 | 308 |
| 2020-12-06 19:19:03 | 5 | 120 |
| 2020-12-06 19:31:29 | 6 | 4 |
| 2020-12-06 19:34:48 | 7 | 1 |
| 2020-12-06 20:30:08 | 8 | 1 |
| 2020-12-06 21:27:06 | 9 | 23 |
+---------------------+--------+------------+
Upvotes: 0
Reputation: 14948
First find the difference in time from the previous row:
select
id,
liter,
total_liters,
dater,
lead(dater) over (order by dater) as "lead",
timediff(dater, lead(dater) over (order by dater)) as d1
from watermeter
order by dater;
output:
+----+-------+--------------+---------------------+---------------------+-----------+
| id | liter | total_liters | dater | lead | d1 |
+----+-------+--------------+---------------------+---------------------+-----------+
| 9 | 3 | 184 | 2020-12-06 16:14:58 | 2020-12-06 16:15:04 | -00:00:06 |
| 10 | 1 | 185 | 2020-12-06 16:15:04 | 2020-12-06 16:26:49 | -00:11:45 |
| 11 | 3 | 188 | 2020-12-06 16:26:49 | 2020-12-06 16:26:55 | -00:00:06 |
| 12 | 2 | 190 | 2020-12-06 16:26:55 | 2020-12-06 16:27:01 | -00:00:06 |
| 13 | 2 | 192 | 2020-12-06 16:27:01 | 2020-12-06 17:32:16 | -01:05:15 |
| 14 | 1 | 193 | 2020-12-06 17:32:16 | 2020-12-06 17:32:22 | -00:00:06 |
| 15 | 1 | 194 | 2020-12-06 17:32:22 | 2020-12-06 17:32:28 | -00:00:06 |
| 16 | 1 | 195 | 2020-12-06 17:32:28 | 2020-12-06 17:32:35 | -00:00:07 |
| 17 | 1 | 196 | 2020-12-06 17:32:35 | 2020-12-06 17:32:41 | -00:00:06 |
etc...
Then determine which times you would like to see, because they have a difference to their previous row which is larger than 60 seconds.
(The id
of the column is show in x
)
with cte as (
select id,
dater,
liter,
total_liters,
d1,
abs(time_to_sec(d1)) as g1,
case when abs(time_to_sec(d1))>60 then id else 0 end as x
from (
select
id,
liter,
total_liters,
dater,
lead(dater) over (order by dater) as "lead",
timediff(dater, lead(dater) over (order by dater)) as d1
from watermeter
order by dater
) tmp1
)
select * from cte;
output:
+----+---------------------+-------+--------------+-----------+------+----+
| id | dater | liter | total_liters | d1 | g1 | x |
+----+---------------------+-------+--------------+-----------+------+----+
| 9 | 2020-12-06 16:14:58 | 3 | 184 | -00:00:06 | 6 | 0 |
| 10 | 2020-12-06 16:15:04 | 1 | 185 | -00:11:45 | 705 | 10 |
| 11 | 2020-12-06 16:26:49 | 3 | 188 | -00:00:06 | 6 | 0 |
| 12 | 2020-12-06 16:26:55 | 2 | 190 | -00:00:06 | 6 | 0 |
| 13 | 2020-12-06 16:27:01 | 2 | 192 | -01:05:15 | 3915 | 13 |
| 14 | 2020-12-06 17:32:16 | 1 | 193 | -00:00:06 | 6 | 0 |
| 15 | 2020-12-06 17:32:22 | 1 | 194 | -00:00:06 | 6 | 0 |
| 16 | 2020-12-06 17:32:28 | 1 | 195 | -00:00:07 | 7 | 0 |
etc...
Next step is to determine the max(id) which 'belongs' to the x
:
with cte as (
select id,
dater,
liter,
total_liters,
d1,
abs(time_to_sec(d1)) as g1,
case when abs(time_to_sec(d1))>60 then id else 0 end as x
from (
select
id,
liter,
total_liters,
dater,
lead(dater) over (order by dater) as "lead",
timediff(dater, lead(dater) over (order by dater)) as d1
from watermeter
order by dater
) tmp1
)
select
id,
dater,
liter,
total_liters
,d1,
g1,
x,
(select min(x)-1 from cte c2 where c2.id>c1.x and c2.x>0) as y
from cte c1
where c1.x<>0
;
output:
+----+---------------------+-------+--------------+-----------+------+----+------+
| id | dater | liter | total_liters | d1 | g1 | x | y |
+----+---------------------+-------+--------------+-----------+------+----+------+
| 10 | 2020-12-06 16:15:04 | 1 | 185 | -00:11:45 | 705 | 10 | 12 |
| 13 | 2020-12-06 16:27:01 | 2 | 192 | -01:05:15 | 3915 | 13 | 20 |
| 21 | 2020-12-06 17:32:59 | 1 | 200 | -00:02:06 | 126 | 21 | 25 |
| 26 | 2020-12-06 17:35:41 | 1 | 205 | -00:07:24 | 444 | 26 | NULL |
+----+---------------------+-------+--------------+-----------+------+----+------+
Note that x
and y
are the minimum and maximum id
for your group.
Finally (this messy stuff):
with cte as (
select id,
dater,
liter,
total_liters,
d1,
abs(time_to_sec(d1)) as g1,
case when abs(time_to_sec(d1))>60 then id else 0 end as x
from (
select
id,
liter,
total_liters,
dater,
lead(dater) over (order by dater) as "lead",
timediff(dater, lead(dater) over (order by dater)) as d1
from watermeter
order by dater
) tmp1
)
select
id,
dater,
(select sum(liter) from watermeter where id between x and y) as rain
from (
select
id,
dater,
liter,
total_liters
,d1,
g1,
x,
(select min(x)-1 from cte c2 where c2.id>c1.x and c2.x>0) as y
from cte c1
where c1.x<>0
) tmp2
;
gives output:
+------+---------------------+------+
| id | dater | rain |
+------+---------------------+------+
| 10 | 2020-12-06 16:15:04 | 6 |
| 13 | 2020-12-06 16:27:01 | 9 |
| 21 | 2020-12-06 17:32:59 | 5 |
| 26 | 2020-12-06 17:35:41 | NULL |
+------+---------------------+------+
I do hope this is close to the expected output...
Upvotes: 1