CodeNinja
CodeNinja

Reputation: 3278

Combine multiple datasets into a single query

I have multiple queries :

1)    select Year , Month, Sum(Stores) from ABC ;

2)     select Year, Month , Sum(SalesStores) from DEF ; 

3)     slect Year, Month, Sum(Products) from FGH;

I want a result like :

Year, Month , Sum(Stores), Sum(SalesStores), Sum(Products)

I tried Full Outer Join with a query that looks something like this :

SELECT ISNULL(x.[Year], y.[Year]) AS [Year],
ISNULL(x.[Month], y.[Month]) AS [Month],
x.Sum_Stores,
y.Sum_SalesStores,
z.Sum_products
FROM (select Year , Month, Sum(Stores) AS Sum_Stores from ABC ... GROUP BY [Month]) AS x
FULL OUTER JOIN (select Year, Month , Sum(SalesStores) AS Sum_SalesStores from DEF ... GROUP BY [Month]) AS y
    ON x.[Year] = y.[Year] AND x.[Month] = y.[Month]
FULL OUTER JOIN (select Year, Month , Sum(products) AS Sum_products from FGH ... GROUP BY [Month])  AS z
ON y.[Year] = z.[Year] AND y.[Month] = z.[Month]

The problem is that , ABC has data only for certain months, DEF has data for different months and FGH has data again for different months.

When I run my above query , I get a lot of nulls and 0's.

Can anyone correct me as to whats wrong with the query or tell me a solution that works for my case.

Upvotes: 0

Views: 3143

Answers (6)

Andriy M
Andriy M

Reputation: 77657

You could union the multiple sets like this:

SELECT Year, Month, 'Stores' AS Src, Stores AS Value FROM ABC
UNION ALL
SELECT Year, Month, 'SalesStores',   SalesStores     FROM DEF
UNION ALL
SELECT Year, Month, 'Products',      Products        FROM FGH

then PIVOT them with aggregation:

SELECT
  Year,
  Month,
  Stores,
  SalesStores,
  Products
FROM (
  SELECT Year, Month, 'Stores',      Stores      FROM ABC
  UNION ALL
  SELECT Year, Month, 'SalesStores', SalesStores FROM DEF
  UNION ALL
  SELECT Year, Month, 'Products',    Products    FROM FGH
) AS s (Year, Month, Src, Value)
PIVOT (
  SUM(Value) FOR Src IN (Stores, SalesStores, Products)
) AS p
;

Or it might be more efficient to first aggregate the results, then union the aggregated sets, then PIVOT:

SELECT
  Year,
  Month,
  Stores,
  SalesStores,
  Products
FROM (
  SELECT Year, Month, 'Stores',      SUM(Stores)      FROM ABC
  UNION ALL
  SELECT Year, Month, 'SalesStores', SUM(SalesStores) FROM DEF
  UNION ALL
  SELECT Year, Month, 'Products',    SUM(Products)    FROM FGH
) AS s (Year, Month, Src, Value)
PIVOT (
  MAX(Value) FOR Src IN (Stores, SalesStores, Products)
) AS p
;

As the PIVOT syntax requires the use of an aggregated function, we can use one that simply preserves an already aggregated value (in this case it was MAX).

Upvotes: 0

liebs19
liebs19

Reputation: 549

Here is an answer using full joins:

select
     coalesce(ABC.Year,DEF.Year,FGH.Year) as Year
    ,coalesce(ABC.Month,DEF.Month,FGH.Month) as Month
    ,ABC.Stores
    ,DEF.SalesStores
    ,FGH.Products
from (
    select Year , Month, Sum(Stores) as Stores from ABC group by Year , Month
) ABC
full join (
    select Year, Month , Sum(SalesStores) as SalesStores from DEF group by Year , Month
) DEF
    on DEF.year = ABC.Year
    and DEF.Month = ABC.Month
full join (
    select Year, Month, Sum(Products) as Products from FGH group by Year , Month
) FGH
    on (FGH.year = ABC.Year
    and FGH.Month = ABC.Month)
    or (FGH.Month = DEF.Year
    and FGH.Month = DEF.Month)

SQL Fiddle Demo

Upvotes: 0

laylarenee
laylarenee

Reputation: 3284

Here is another way to get this information. This example has a short setup to demonstrate this solution in action.

CREATE TABLE #ABC([Year] INT, [Month] INT, Stores INT);
CREATE TABLE #DEF([Year] INT, [Month] INT, SalesStores INT);
CREATE TABLE #GHI([Year] INT, [Month] INT, Products INT);

INSERT #ABC VALUES (2013,1,1);
INSERT #ABC VALUES (2013,1,2);
INSERT #ABC VALUES (2013,2,3);

INSERT #DEF VALUES (2013,1,4);
INSERT #DEF VALUES (2013,1,5);
INSERT #DEF VALUES (2013,2,6);

INSERT #GHI VALUES (2013,1,7);
INSERT #GHI VALUES (2013,1,8);
INSERT #GHI VALUES (2013,2,9);
INSERT #GHI VALUES (2013,3,10);


SELECT
     T.[Year]
    ,T.[Month]
    -- select the sum for each year/month combination using a correlated subquery (each result from the main query causes another data retrieval operation to be run)
    ,(SELECT SUM(Stores) FROM #ABC WHERE [Year]=T.[Year] AND [Month]=T.[Month]) AS [Sum_Stores]
    ,(SELECT SUM(SalesStores) FROM #DEF WHERE [Year]=T.[Year] AND [Month]=T.[Month]) AS [Sum_SalesStores]
    ,(SELECT SUM(Products) FROM #GHI WHERE [Year]=T.[Year] AND [Month]=T.[Month]) AS [Sum_Products]
FROM (
    -- this selects a list of all possible dates.
    SELECT [Year],[Month] FROM #ABC
    UNION
    SELECT [Year],[Month] FROM #DEF
    UNION
    SELECT [Year],[Month] FROM #GHI
) AS T;

EDIT: This can be wrapped up in parametrized stored procedure as requested:

-- Call proc like this

--EXEC [getMyReport]; -- params are not required, these will default to NULL if not specified.
--EXEC [getMyReport] @Year=2013; -- all 2013 data, all months
--EXEC [getMyReport] @Month=1; -- all January data, from all years
--EXEC [getMyReport] @Year=2013, @Month=2; -- Feb 2013 data only.

CREATE PROCEDURE [dbo].[getMyReport]
     @Year INT = NULL -- default to NULL, this makes the param optional when you exec the procedure.
    ,@Month INT = NULL
AS
BEGIN
    -- SET NOCOUNT ON added to prevent extra result sets from
    -- interfering with SELECT statements.
    SET NOCOUNT ON;

    SELECT
         T.[Year]
        ,T.[Month]
        -- select the sum for each year/month combination using a correlated subquery (each result from the main query causes another data retrieval operation to be run)
        ,(SELECT SUM(Stores) FROM #ABC WHERE [Year]=T.[Year] AND [Month]=T.[Month]) AS [Sum_Stores]
        ,(SELECT SUM(SalesStores) FROM #DEF WHERE [Year]=T.[Year] AND [Month]=T.[Month]) AS [Sum_SalesStores]
        ,(SELECT SUM(Products) FROM #GHI WHERE [Year]=T.[Year] AND [Month]=T.[Month]) AS [Sum_Products]
    FROM (
        -- this selects a list of all possible dates.
        SELECT [Year],[Month] FROM #ABC
        UNION
        SELECT [Year],[Month] FROM #DEF
        UNION
        SELECT [Year],[Month] FROM #GHI
    ) AS T
    WHERE
        -- if the param IS NULL, then it will not apply filtering
        -- if the param is specified, then it will filter by year or month
        (@Year IS NULL OR T.[Year]=@Year)
        AND
        (@Month IS NULL OR T.[Month]=@Month)
    ;
END
GO

Upvotes: 1

db9dreamer
db9dreamer

Reputation: 1715

It may be clearer to use an approach like:-

select 
    Year, 
    Month, 
    sum(Stores) as Stores, 
    sum(SalesStores) as SalesStores, 
    sum(products) as products
from (
    select Year, Month, Stores, 0.00 as SalesStores, 0.00 as products
    from ABC ...
    union all
    select Year, Month, 0 as Stores, SalesStores, 0 as products
    from DEF ...
    union all
    select Year, Month, 0 as Stores, 0 as SalesStores, products
    from FGH ...
) source
group by Year, Month

The first select statement in the derived table defines its structure - and you may need to cast the placeholder columns in that statement to types of the correct precision and scale to allow the data from subsequent unions to not lose any precision.

Upvotes: 0

John Gibb
John Gibb

Reputation: 10773

Use union alls and a group by to do it instead:

    select year, month, sum(stores) as sumStores, sum(salesStores) as sumSalesStores, sum(products) as sumProducts
    from (
        select year, month, stores, salesStores = null, products = null
        from ABC

        union all

        select year, month, stores = null, salesStores, products = null
        from DEF

        union all

        select year, month, stores = null, salesStores = null, products
        from FGH
    ) x
    group by year, month;

Upvotes: 0

jean
jean

Reputation: 4350

   select x,y,z from tableA
   UNION
   select x,y,z from tableB
   UNION
   select x,y,z from tableC

Upvotes: 0

Related Questions