Reputation: 2342
the data is in 15 minute interval:
Time Value 2010-01-01 00:15 3 2010-01-01 00:30 2 2010-01-01 00:45 4 2010-01-01 01:00 5 2010-01-01 01:15 1 2010-01-01 01:30 3 2010-01-01 01:45 4 2010-01-01 02:00 12 2010-01-01 02:15 13 2010-01-01 02:30 12 2010-01-01 02:45 14 2010-01-01 03:00 15 2010-01-01 03:15 3 2010-01-01 03:30 2 2010-01-01 03:45 3 2010-01-01 04:00 5 .......... .......... .......... 2010-01-02 00:00
Typically there will be 96 points.
According to the values, we may notice that the values from 00:15 to 01:45 are close to each other, and from 02:00 to 03:00 they are close to each other, and from 03:15 to 04:00 they are close to each other.
Based on the "close to each other" rule, I want the data to be "grouped" into 3 parts:
Please consider that the data could be random, and could be grouped into more than 3 parts according to the rule defined above, but maximum should not be more than 10 parts. And the grouping must honor the time sequence, for example, you cannot just put 00:15/02:30/04:45 into 1 group because these 3 points are NOT consecutive.
Please give some thoughts how to implement it in t-sql.
updated: The value could be:
Time Value 2010-01-01 00:15 3 2010-01-01 00:30 2 2010-01-01 00:45 4 2010-01-01 01:00 5 2010-01-01 01:15 1 2010-01-01 01:30 3 2010-01-01 01:45 4 2010-01-01 02:00 12 2010-01-01 02:15 13 2010-01-01 02:30 4 --suddenly decreased 2010-01-01 02:45 14 2010-01-01 03:00 15 2010-01-01 03:15 3 2010-01-01 03:30 2 2010-01-01 03:45 3 2010-01-01 04:00 5 .......... .......... .......... 2010-01-02 00:00
for these kinds of situation, we should not group 02:30 separately, because we want the group size has to be at least 3 points, and we will put that point (02:30) to the previous group (from 02:00 to 03:00).
Upvotes: 0
Views: 569
Reputation: 44346
Since your question changed so much, here is a new answer to the new question, I only included the code part.
declare @t table(time datetime, value int)
declare @variation float
set @variation = 2
set nocount on
insert @t values('2010-01-01 00:15',3)
insert @t values('2010-01-01 00:30',2)
insert @t values('2010-01-01 00:45',4)
insert @t values('2010-01-01 01:00',5)
insert @t values('2010-01-01 01:15',1)
insert @t values('2010-01-01 01:30',3)
insert @t values('2010-01-01 01:45',4)
insert @t values('2010-01-01 02:00',52)
insert @t values('2010-01-01 02:15',5)
insert @t values('2010-01-01 02:30',52)
insert @t values('2010-01-01 02:45',54)
insert @t values('2010-01-01 03:00',55)
insert @t values('2010-01-01 03:15',3)
insert @t values('2010-01-01 03:30',2)
insert @t values('2010-01-01 03:45',3)
insert @t values('2010-01-01 04:00',5)
declare @result table(mintime datetime, maxtime datetime)
a:
delete @result
;with t as
(
select *, rn = row_number() over(order by time), log(value) lv from @t where datediff(day, time, '2010-01-01') = 0
), a as
(
select time, lv, rn, 0 grp from t where rn = 1
union all
select t1.time, a.lv, t1.rn,
case when exists (select 1 from t t2 where t1.rn between rn + 1 and rn + 3 and
lv between t1.lv - @variation and t1.lv +@variation) then grp else grp + 1 end
from t t1 join a on
t1.rn = a.rn +1
)
insert @result
select min(time), max(time) from a group by grp
if @@rowcount > 10
begin
set @variation=@variation + .5
goto a
end
select * from @result
Result:
mintime maxtime
2010-01-01 00:15:00.000 2010-01-01 01:45:00.000
2010-01-01 02:00:00.000 2010-01-01 03:00:00.000
2010-01-01 03:15:00.000 2010-01-01 04:00:00.000
Upvotes: 0
Reputation: 44346
Declare and populate testdata:
set nocount on
declare @result table(mintime datetime, maxtime datetime)
declare @t table(time datetime, value int)
-- variation is how much difference will be allowed from one row to the next
declare @variation int
set @variation = 5
insert @t values('2010-01-01 00:15',3)
insert @t values('2010-01-01 00:30',2)
insert @t values('2010-01-01 00:45',4)
insert @t values('2010-01-01 01:00',5)
insert @t values('2010-01-01 01:15',1)
insert @t values('2010-01-01 01:30',3)
insert @t values('2010-01-01 01:45',4)
insert @t values('2010-01-01 02:00',12)
insert @t values('2010-01-01 02:15',13)
insert @t values('2010-01-01 02:30',12)
insert @t values('2010-01-01 02:45',14)
insert @t values('2010-01-01 03:00',15)
insert @t values('2010-01-01 03:15',3)
insert @t values('2010-01-01 03:30',2)
insert @t values('2010-01-01 03:45',3)
insert @t values('2010-01-01 04:00',5)
Code:
a:
;with t as
( -- add a rownumber
select *, rn = row_number() over(order by time) from @t
), a as
(-- increase group if current row's value varies more than @variation from last row's value
select time, value, rn, 0 grp from t where rn = 1
union all
select t.time, t.value, t.rn, case when t.value between
a.value - @variation and a.value +@variation
then grp else grp+1 end
from t join a on
t.rn = a.rn +1
)
insert @result
select min(time), max(time) from a group by grp
if @@rowcount > 10
begin
-- this will activate if more than 10 groups of numbers are found
-- start over with higher tolerance for variation
set @variation=@variation + 1
delete @result
goto a
end
select convert(char(5), mintime,114) + ' to ' + convert(char(5), maxtime,114)
from @result
Result here: https://data.stackexchange.com/stackoverflow/q/110891/declare-and-populate-testdata
Upvotes: 7