Michael Felchlin
Michael Felchlin

Reputation: 91

Get an average for one column in sql query

I am trying to get an avereage of time elapsed for the case when cd.result = 'Pass'. I cannot put it in the where condition, because I need the other columns to appear for all conditions, not just pass situations. I tried to do this in a subselect, but it seems to take forever to execute (35 min and counting).

select ctir.item_dbkey, item_id
    ,ctir.portion_id
    ,Round(avg(CAST(time_elapsed as float)/1000), 3) as Avg_Time_Sec
    ,SUM(CASE when cd.result = 'Pass' Then 1 Else 0 END) as N_PASS
    ,SUM(CASE when cd.result = 'Fail' Then 1 Else 0 END) as N_FAIL
    ,count(ctir.testassignment_ID)as N_Total
from candidate_testItem_response ctir
join [dbo].[Candidate_TestAssignment_Portion] ctil on ctir.TestAssignment_ID = ctil.TestAssignment_ID and ctir.portion_id = ctil.portion_ID
join item_bank ib on ctir.item_dbkey = ib.item_dbkey
join candidate_data cd on ctir.testassignment_id = cd.TestAssignment_ID and ctir.Portion_ID = cd.Portion_ID
where ctir.portion_id = 15780
   and ctil.start_time >= '2017-01-01 00:00:00.000'    
group by ctir.item_dbkey, ctir.Portion_ID, item_id

my attempt with subselect that is far too slow, and may not end up working:

select ctir.item_dbkey, item_id
    ,ctir.portion_id
    ,(select Round(avg(CAST(time_elapsed as float)/1000), 3) as Avg_Time_Sec 
        from candidate_testItem_response ctir 
        join candidate_data cd on ctir.testassignment_id = cd.TestAssignment_ID and ctir.Portion_ID = cd.Portion_ID 
       where cd.result = 'pass')
    ,SUM(CASE when cd.result = 'Pass' Then 1 Else 0 END) as N_PASS
    ,SUM(CASE when cd.result = 'Fail' Then 1 Else 0 END) as N_FAIL
    ,count(ctir.testassignment_ID) as N_Total

from candidate_testItem_response ctir
join [dbo].[Candidate_TestAssignment_Portion] ctil on ctir.TestAssignment_ID = ctil.TestAssignment_ID and ctir.portion_id = ctil.portion_ID
join item_bank ib on ctir.item_dbkey = ib.item_dbkey
join candidate_data cd on ctir.testassignment_id = cd.TestAssignment_ID and ctir.Portion_ID = cd.Portion_ID
where ctir.portion_id = 15780
and ctil.start_time >= '2017-01-01 00:00:00.000'

group by ctir.item_dbkey, ctir.Portion_ID, item_id

so my goal is to add a column that averages the avg_time_sec for those with the cd.result = 'pass'. I have tried a couple of versions, but I'm stumped.

Upvotes: 1

Views: 73

Answers (4)

Paurian
Paurian

Reputation: 1402

In this situation, you can veer away from CASE statements and use UNION then aggregate the result - it should be much faster.

SELECT
  item_dbkey,
  item_id,
  portion_id,
  MAX(Avg_Time_Sec) AS Avg_Time_Sec,
  SUM(n_pass) AS N_PASS,
  SUM(n_fail) AS N_FAIL,
  SUM(n_pass) + SUM(n_fail) AS N_Total
FROM
  (
    SELECT
      ctir.item_dbkey,
      item_id,
      ctir.Portion_ID,
      ROUND(AVG(CAST(time_elapsed AS FLOAT) / 1000), 3) as Avg_Time_Sec,
      COUNT( 1 ) AS N_PASS,
      0 AS N_FAIL
    FROM
      candidate_testItem_response AS ctir
      join [dbo].[Candidate_TestAssignment_Portion] AS ctil
        on ctir.TestAssignment_ID = ctil.TestAssignment_ID and ctir.portion_id = ctil.portion_ID
      join item_bank AS ib
        on ctir.item_dbkey = ib.item_dbkey
      join candidate_data AS cd
        on ctir.testassignment_id = cd.TestAssignment_ID and ctir.Portion_ID = cd.Portion_ID
    WHERE
      ctir.portion_id = 15780 AND
      cd.result = 'Pass' AND
      ctil.start_time >= '2017-01-01 00:00:00.000'    
    GROUP BY
      ctir.item_dbkey,
      item_id,
      ctir.Portion_ID
    UNION
    SELECT
      ctir.item_dbkey,
      item_id,
      ctir.Portion_ID,
      0 AS Avg_Time_Sec,
      0 AS N_PASS,
      COUNT( 1 ) AS N_FAIL
    FROM
      candidate_testItem_response AS ctir
      join [dbo].[Candidate_TestAssignment_Portion] AS ctil
        on ctir.TestAssignment_ID = ctil.TestAssignment_ID and ctir.portion_id = ctil.portion_ID
      join item_bank AS ib
        on ctir.item_dbkey = ib.item_dbkey
      join candidate_data AS cd
        on ctir.testassignment_id = cd.TestAssignment_ID and ctir.Portion_ID = cd.Portion_ID
    WHERE
      ctir.portion_id = 15780 AND
      cd.result = 'Fail' AND
      ctil.start_time >= '2017-01-01 00:00:00.000'    
    GROUP BY
      ctir.item_dbkey,
      item_id,
      ctir.Portion_ID
  ) AS inner_query
GROUP BY
  item_dbkey,
  item_id,
  portion_id

Upvotes: 1

Serg
Serg

Reputation: 22811

Compute conditional Avg as conditional sum/conditional count

select ctir.item_dbkey, item_id
    ,ctir.portion_id
    ,Round(SUM(CASE when cd.result = 'Pass' THEN CAST(time_elapsed as float)/1000 END)/COUNT(CASE when cd.result = 'Pass' THEN 1 END), 3) as Avg_Time_Sec
    ,SUM(CASE when cd.result = 'Pass' Then 1 Else 0 END) as N_PASS
    ,SUM(CASE when cd.result = 'Fail' Then 1 Else 0 END) as N_FAIL
    ,count(ctir.testassignment_ID)as N_Total
from candidate_testItem_response ctir
join [dbo].[Candidate_TestAssignment_Portion] ctil on ctir.TestAssignment_ID = ctil.TestAssignment_ID and ctir.portion_id = ctil.portion_ID
join item_bank ib on ctir.item_dbkey = ib.item_dbkey
join candidate_data cd on ctir.testassignment_id = cd.TestAssignment_ID and ctir.Portion_ID = cd.Portion_ID
where ctir.portion_id = 15780
   and ctil.start_time >= '2017-01-01 00:00:00.000'    

group by ctir.item_dbkey, ctir.Portion_ID, item_id

Upvotes: 1

justiceorjustus
justiceorjustus

Reputation: 1965

Does a CTE help?

WITH TEMP
AS (
    SELECT ctir.item_dbkey
        ,item_id
        ,ctir.portion_id
        ,ctir.testassignment_id
        ,SUM(CASE 
                WHEN cd.result = 'Pass'
                    THEN 1
                ELSE 0
                END) AS N_PASS
        ,SUM(CASE 
                WHEN cd.result = 'Fail'
                    THEN 1
                ELSE 0
                END) AS N_FAIL
        ,count(ctir.testassignment_ID) AS N_Total
    FROM candidate_testItem_response ctir
    JOIN [dbo].[Candidate_TestAssignment_Portion] ctil ON ctir.TestAssignment_ID = ctil.TestAssignment_ID
        AND ctir.portion_id = ctil.portion_ID
    JOIN item_bank ib ON ctir.item_dbkey = ib.item_dbkey
    JOIN candidate_data cd ON ctir.testassignment_id = cd.TestAssignment_ID
        AND ctir.Portion_ID = cd.Portion_ID
    WHERE ctir.portion_id = 15780
        AND ctil.start_time >= '2017-01-01 00:00:00.000'
    GROUP BY ctir.item_dbkey
        ,ctir.Portion_ID
        ,item_id
    )
SELECT t.item_dbkey
    ,t.item_id
    ,t.portion_id
    ,t.N_PASS
    ,t.N_FAIL
    ,t.N_TOTAL
    ,Round(avg(CAST(ctir.time_elapsed AS FLOAT) / 1000), 3) AS Avg_Time_Sec
FROM TEMP t
LEFT JOIN candidate_testItem_response ctir
JOIN candidate_data cd ON ctir.testassignment_id = t.TestAssignment_ID
    AND ctir.Portion_ID = t.Portion_ID
WHERE cd.result = 'pass'
GROUP BY t.item_dbkey
,t.item_id
,t.portion_id
,t.N_PASS
,t.N_FAIL
,t.N_TOTAL

Edit: This way you will running on only one non-indexed table (your TEMP) once instead of lots of times on your subquery.

Upvotes: 1

Ricardo Paixao
Ricardo Paixao

Reputation: 324

have yoy tried something like:

select ctir.item_dbkey, item_id, ctir.portion_id
    ,Round(avg(
        CASE when cd.result = 'Pass' 
        THEN CAST(time_elapsed as float)/1000 
        ELSE null END), 3) as Avg_Time_Sec
    ,SUM(CASE when cd.result = 'Pass' Then 1 Else 0 END) as N_PASS
    ,SUM(CASE when cd.result = 'Fail' Then 1 Else 0 END) as N_FAIL
    ,count(ctir.testassignment_ID)as N_Total
from candidate_testItem_response ctir
    join [dbo].[Candidate_TestAssignment_Portion] ctil on ctir.TestAssignment_ID = ctil.TestAssignment_ID and ctir.portion_id = ctil.portion_ID
    join item_bank ib on ctir.item_dbkey = ib.item_dbkey
    join candidate_data cd on ctir.testassignment_id = cd.TestAssignment_ID and ctir.Portion_ID = cd.Portion_ID
where ctir.portion_id = 15780
    and ctil.start_time >= '2017-01-01 00:00:00.000'
group by ctir.item_dbkey, ctir.Portion_ID, item_id

Upvotes: 1

Related Questions