Keith Mifsud
Keith Mifsud

Reputation: 725

Split data totals by months

I'm working on a report and started off by creating this summary SQL statement:

SELECT  o.description, 
        sum(t.total_minutes) total_minutes, 
        sum(n.total_new_minutes) total_new_minutes
FROM    job i
            INNER JOIN
        operation o ON i.operation_id = o.operation_id
            CROSS APPLY
        (
            SELECT  SUM(minutes) total_minutes 
            FROM    job_time t 
            WHERE   i.job_id = t.job_id 
            AND     record_date between '1 JAN 2016' and '29 FEB 2016'
        ) t
            CROSS APPLY
        (
            SELECT  SUM(minutes) total_new_minutes 
            FROM    job_time t 
            WHERE   i.job_id = t.job_id 
            AND     record_date between '1 JAN 2016' and '29 FEB 2016'
            AND     NOT EXISTS (SELECT 1 FROM job_time v WHERE v.record_date < DATEADD(month, DATEDIFF(month, 0, t.record_date), 0) AND v.job_id = t.job_id)
        ) n
GROUP BY o.description

Table structure:

Table: job
job_id       | operation_id
-------------|-------------
1            | 1
2            | 1
3            | 2
4            | 2

Table: operation
operation_id | description
-------------|-------------
1            | 'OP1'
2            | 'OP2'

Table: job_time
job_id       | record_date  | minutes
-------------|--------------|---------
1            | '1 JAN 2016' | 20
1            | '2 JAN 2016' | 20
2            | '1 JAN 2016' | 20
2            | '1 FEB 2016' | 20
3            | '1 FEB 2016' | 20
3            | '2 FEB 2016' | 20
4            | '2 JAN 2016' | 20
4            | '2 FEB 2016' | 20

Result of my summary query:

description  | total_minutes | total_new_minutes
-------------|---------------|-----------------
'OP1'        | 80            | 60
'OP2'        | 80            | 60

SQL fiddle: http://sqlfiddle.com/#!6/f28f7e/1/0

The idea is to have the total hours spent on jobs in a given data range, while also getting the total hours spent on new jobs every month.

Example job 2: 20 minutes in January are considered to be on a new job, however in February, those 20 minutes spent are on an old job.

My issue is that I want to split this result per month on the given range as shown below:

description  | month     |total_minutes | total_new_minutes
-------------|-----------|--------------|-----------------
'OP1'        | '01/2016' | 60           | 60
'OP1'        | '02/2016' | 20           | 0
'OP2'        | '01/2016' | 20           | 20
'OP2'        | '02/2016' | 60           | 40

The query change to reflect this result:

SELECT  o.description, 
        sum(t.total_minutes) total_minutes, 
        sum(n.total_new_minutes) total_new_minutes,
        t.record_month      
FROM    job i
            INNER JOIN
        operation o ON i.operation_id = o.operation_id
            CROSS APPLY
        (
            SELECT  SUM(minutes) total_minutes, right(convert(varchar(10),t.record_date,103),7) record_month
            FROM    job_time t 
            WHERE   i.job_id = t.job_id 
            AND     record_date between '1 JAN 2016' and '29 FEB 2016'
            GROUP BY right(convert(varchar(10),t.record_date,103),7)
        ) t
            CROSS APPLY
        (
            SELECT  SUM(minutes) total_new_minutes, right(convert(varchar(10),t.record_date,103),7) record_month
            FROM    job_time t 
            WHERE   i.job_id = t.job_id 
            AND     record_date between '1 JAN 2016' and '29 FEB 2016'
            AND     NOT EXISTS (SELECT 1 FROM job_time v WHERE v.record_date < DATEADD(month, DATEDIFF(month, 0, t.record_date), 0) AND v.job_id = t.job_id)
           GROUP BY right(convert(varchar(10),t.record_date,103),7)
        ) n     
GROUP BY o.description, t.record_month

SQL Fiddle: http://sqlfiddle.com/#!6/f28f7e/3/0

The problem is that I'm not understanding how the join is being made between t and n, and in my dev db with actual data, one operation is reporting more minutes in total_new_minutes than total_minutes which is can never happen, doesn't matter how bad the data is.

Any idea what I'm doing wrong here or if I should change the query completely?

Upvotes: 3

Views: 92

Answers (1)

DVT
DVT

Reputation: 3127

I don't know why you want to use CROSS APPLY here. My strategy would be to calculate the total_minutes and total_new_minutes in separate subqueries, then join them together. The final query is this:

WITH new_job AS (
    SELECT
        jt1.JOB_ID
        , EOMONTH(jt1.RECORD_DATE) AS mon
        , SUM(jt1."MINUTES") AS "MINUTES"
    FROM
        #JOB_TIME jt1
    WHERE
        NOT EXISTS (
            SELECT 1
            FROM
                #JOB_TIME jt2
            WHERE
                jt2.JOB_ID = jt1.JOB_ID
                AND EOMONTH(jt1.RECORD_DATE, -1) = EOMONTH(jt2.RECORD_DATE)
        )
    GROUP BY
        jt1.JOB_ID
        , EOMONTH(jt1.RECORD_DATE)
), new_total AS (
    SELECT
        j.OPERATION_ID
        , nj.mon
        , SUM(nj."MINUTES") AS total_new_minutes
    FROM
        new_job nj
        JOIN #JOB j ON j.JOB_ID = nj.JOB_ID
    GROUP BY
        j.OPERATION_ID
        , nj.mon
), total AS (
    SELECT
        j.OPERATION_ID
        , EOMONTH(jt.RECORD_DATE) AS mon
        , SUM(jt."MINUTES") AS total_minutes
    FROM
        #JOB_TIME jt
        JOIN #JOB j ON jt.JOB_ID = j.JOB_ID
    GROUP BY
        j.OPERATION_ID
        , EOMONTH(jt.RECORD_DATE)
)
SELECT
    o."DESCRIPTION"
    , COALESCE(t.mon, nt.mon)
    , COALESCE(t.total_minutes,0) AS total_minutes
    , COALESCE(nt.total_new_minutes,0) AS total_new_minutes
FROM
    #OPERATION o
    LEFT JOIN total t ON o.OPERATION_ID = t.OPERATION_ID
    LEFT JOIN new_total nt ON o.OPERATION_ID = nt.OPERATION_ID AND nt.mon=t.mon;

All the query needed to test it would be this:

CREATE TABLE #JOB (
    JOB_ID INT
    , OPERATION_ID INT
);

CREATE TABLE #OPERATION (
    OPERATION_ID INT
    , "DESCRIPTION" NCHAR(5)
);

CREATE TABLE #JOB_TIME (
    JOB_ID INT
    , RECORD_DATE DATE
    , "MINUTES" INT
)

INSERT INTO #JOB (JOB_ID, OPERATION_ID)
VALUES
(1,1)
,(2,1)
,(3,2)
,(4,2);

INSERT INTO #OPERATION (OPERATION_ID, "DESCRIPTION")
VALUES
(1, 'OP1')
, (2, 'OP2');

INSERT INTO #JOB_TIME (JOB_ID, RECORD_DATE, "MINUTES")
VALUES
(1, '20160101', 20)
, (1, '20160102', 20)
, (2, '20160101', 20)
, (2, '20160201', 20)
, (3, '20160201', 20)
, (3, '20160202', 20)
, (4, '20160102', 20)
, (4, '20160202', 20);

WITH new_job AS (
    SELECT
        jt1.JOB_ID
        , EOMONTH(jt1.RECORD_DATE) AS mon
        , SUM(jt1."MINUTES") AS "MINUTES"
    FROM
        #JOB_TIME jt1
    WHERE
        NOT EXISTS (
            SELECT 1
            FROM
                #JOB_TIME jt2
            WHERE
                jt2.JOB_ID = jt1.JOB_ID
                AND EOMONTH(jt1.RECORD_DATE, -1) = EOMONTH(jt2.RECORD_DATE)
        )
    GROUP BY
        jt1.JOB_ID
        , EOMONTH(jt1.RECORD_DATE)
), new_total AS (
    SELECT
        j.OPERATION_ID
        , nj.mon
        , SUM(nj."MINUTES") AS total_new_minutes
    FROM
        new_job nj
        JOIN #JOB j ON j.JOB_ID = nj.JOB_ID
    GROUP BY
        j.OPERATION_ID
        , nj.mon
), total AS (
    SELECT
        j.OPERATION_ID
        , EOMONTH(jt.RECORD_DATE) AS mon
        , SUM(jt."MINUTES") AS total_minutes
    FROM
        #JOB_TIME jt
        JOIN #JOB j ON jt.JOB_ID = j.JOB_ID
    GROUP BY
        j.OPERATION_ID
        , EOMONTH(jt.RECORD_DATE)
)
SELECT
    o."DESCRIPTION"
    , COALESCE(t.mon, nt.mon)
    , COALESCE(t.total_minutes,0) AS total_minutes
    , COALESCE(nt.total_new_minutes,0) AS total_new_minutes
FROM
    #OPERATION o
    LEFT JOIN total t ON o.OPERATION_ID = t.OPERATION_ID
    LEFT JOIN new_total nt ON o.OPERATION_ID = nt.OPERATION_ID AND nt.mon=t.mon;


DROP TABLE #JOB;
DROP TABLE #JOB_TIME;
DROP TABLE #OPERATION;

Upvotes: 1

Related Questions