Reputation: 1
I have a large horse race database that I want to interrogate.
What I would like to do is create a new column (Average Turf Speed) to find a horse’s average speed (over the last 1 year only) prior to that day’s race and for a specific race type (Turf).
I used to do this in excel using the averageifs formular but with this much data the program would freeze and hang all the time so im planning to make the move to sql.
I’ve taken a very small sample and simplified it for demonstration purposes, I know it will contain lots of nulls but hopefully your understand my issue.
So for example if we look at Aazif in the last race on 21-05-2015 his average speed prior to that days race we would ignore the 1st race because its out of the date range and the wrong race type but we would take the 70 and 40 from the 2nd and 3rd race to give us an average of 55
Date and time Horse Race Type Speed Rating Average Turf Speed
10/05/2013 14:00 Aazif AW 60
10/05/2013 14:00 Bix AW 50
10/05/2013 14:00 Camelot AW 40
15/08/2014 15.00 Aazif Turf 70
15/08/2014 15.00 Bix Turf 60
15/08/2014 15.00 Camelot Turf 50
17/05/2015 13.00 Aazif Turf 40
17/05/2015 13.00 Bix Turf 30
17/05/2015 13.00 Camelot Turf 20
21/05/2015 14.00 Aazif Turf 50
21/05/2015 14.00 Bix Turf 40
21/05/2015 14.00 Camelot Turf 30
Upvotes: 0
Views: 305
Reputation: 5782
use this
--create temp table for data sample
DECLARE @yourdata AS TABLE
(
[Date and time] DATETIME ,
Horse NVARCHAR(30) ,
[Race Type] NVARCHAR(30) ,
[Speed Rating] FLOAT
)
INSERT INTO @yourdata
( [Date and time], Horse, [Race Type], [Speed Rating] )
VALUES ( '2013-05-10 14:00:00', 'Aazif', 'AW', 60 ),
( '2013-05-10 14:00:00', 'Bix', 'AW', 50 ),
( '2013-05-10 14:00:00', 'Camelot', 'AW', 40 ),
( '2014-05-15 15:00:00', 'Aazif', 'Turf', 70 ),
( '2014-05-15 15:00:00', 'Bix', 'Turf', 60 ),
( '2014-05-15 15:00:00', 'Camelot', 'Turf', 50 ),
( '2015-05-17 13:00:00', 'Aazif', 'Turf', 40 ),
( '2015-05-17 13:00:00', 'Bix', 'Turf', 30 ),
( '2015-05-17 13:00:00', 'Camelot', 'Turf', 20 ),
( '2015-05-21 14:00:00', 'Aazif', 'Turf', 50 ),
( '2015-05-21 14:00:00', 'Bix', 'Turf', 40 ),
( '2015-05-21 14:00:00', 'Camelot', 'Turf', 30 )
--Final query
--1. Ignore the last race
--2. Ignore the race with wrong race type
SELECT T.Horse ,
AVG(T.[Speed Rating]) AS [Average Turf Speed]
FROM ( SELECT * ,
ROW_NUMBER() OVER ( PARTITION BY Y.Horse, Y.[Race Type] ORDER BY Y.[Date and time] DESC ) AS RN
FROM @yourdata AS Y
) AS T
WHERE T.[Race Type] = 'Turf'
AND T.RN <> 1
GROUP BY T.Horse
output
if required to get average in one year range from the last not ignored race, use this:
--Final query
--1. Ignore the last race
--2. Ignore the race with wrong race type
--3. get average in one year range from the last not ignored race
SELECT Y.Horse ,
AVG(Y.[Speed Rating]) AS [Average Turf Speed]
FROM ( SELECT * ,
ROW_NUMBER() OVER ( PARTITION BY Y.Horse, Y.[Race Type] ORDER BY Y.[Date and time] DESC ) AS RN ,
MAX(Y.[Date and time]) OVER ( PARTITION BY Y.Horse,
Y.[Race Type] ) AS newlast
FROM @yourdata AS Y
) Y
WHERE CONVERT(DATE, Y.[Date and time]) BETWEEN CONVERT(DATE, DATEADD(YEAR,
-1, Y.newlast))
AND CONVERT(DATE, Y.newlast)
AND Y.RN <> 1
AND Y.[Race Type] = 'Turf'
GROUP BY Y.Horse
output
Upvotes: 0
Reputation: 5733
DECLARE @Table table
(
[DateTime] datetime,
Horse varchar(20),
RaceType varchar(40),
Speed int
)
INSERT @Table VALUES
(CONVERT(datetime, '10/05/2013 14:00',103),'Aazif','AW','60'),
(CONVERT(datetime, '10/05/2013 14:01',103),'Bix','AW','50'),
(CONVERT(datetime, '10/05/2013 14:02',103),'Camelot','AW','40'),
(CONVERT(datetime, '15/08/2014 15:00',103),'Aazif','Turf','70'),
(CONVERT(datetime, '15/08/2014 15:00',103),'Bix','Turf','60'),
(CONVERT(datetime, '15/08/2014 15:00',103),'Camelot','Turf','50'),
(CONVERT(datetime, '17/05/2015 13:00',103),'Aazif','Turf','40'),
(CONVERT(datetime, '17/05/2015 13:00',103),'Bix','Turf','30'),
(CONVERT(datetime, '17/05/2015 13:00',103),'Camelot','Turf','20'),
(CONVERT(datetime, '21/05/2015 14:00',103),'Aazif','Turf','50'),
(CONVERT(datetime, '21/05/2015 14:00',103),'Bix','Turf','40'),
(CONVERT(datetime, '21/05/2015 14:00',103),'Camelot','Turf','30')
SELECT
*
FROM
@Table data
OUTER APPLY
(
SELECT AVG(Speed) AS AverageSpeedOfPreviousYear
FROM @Table
WHERE
[DateTime] < data.[DateTime]
AND DATEDIFF(YEAR, [DateTime], data.DateTime) <= 1
AND Horse = data.Horse
AND RaceType = data.RaceType
) avg
Result
DateTime Horse RaceType Speed AverageSpeedOfPreviousYear
----------------------- -------------------- -------- ----------- --------------------------
2013-05-10 14:00:00.000 Aazif AW 60 NULL
2013-05-10 14:01:00.000 Bix AW 50 NULL
2013-05-10 14:02:00.000 Camelot AW 40 NULL
2014-08-15 15:00:00.000 Aazif Turf 70 NULL
2014-08-15 15:00:00.000 Bix Turf 60 NULL
2014-08-15 15:00:00.000 Camelot Turf 50 NULL
2015-05-17 13:00:00.000 Aazif Turf 40 70
2015-05-17 13:00:00.000 Bix Turf 30 60
2015-05-17 13:00:00.000 Camelot Turf 20 50
2015-05-21 14:00:00.000 Aazif Turf 50 55
2015-05-21 14:00:00.000 Bix Turf 40 45
2015-05-21 14:00:00.000 Camelot Turf 30 35
Upvotes: 0
Reputation: 10013
I'm not sure your definition of "large database" is large for the database engine. Usually I would do this with a more efficient left join, but you give a difficult criteria of "prior to that day’s race" that makes it difficult to write it that way. So I have one below that gives you what you want, but might have performance problems because the subquery that makes up the last column ([Prev Speed Rating]) will fire for each row. If it is slow, first try putting on a compound index made out of the columns that are used in the where clause.
create view vHorseResults
as
select a.Horse,
a.[Race Type],
a.[Speed Rating],
a.[date and time],
(select avg(b.[Speed Rating])
from Races b
where a.Horse = b.Horse
and a.[Race Type] = b.[Race Type]
and a.[date and time] > b.[date and time]
and b.[date and time] >= dateadd(year, -1, getdate())
as [Prev Speed Rating]
from Races a
-- show 1 years worth of data.
select *
from vHorseResults
where getdate() >= datediff(year,-1,[date and time])
Upvotes: 1
Reputation: 45096
select Horse, [Race Type], avg([turf speed])
from table
where [date and time] >= DateAdd(yy, -1, GetDate())
group by Horse, [Race Type]
Upvotes: 0
Reputation: 5458
select avg([turf speed])
where
[date and time] between [date and time]
and datediff(d,-365,[date and time])
and type='turf'
Upvotes: 0