ypacuba
ypacuba

Reputation: 39

Left join suggestion

I need to join multiple tables in SQL Server with a common column dates but I want to avoid repeating the values from the different tables when merge.

drop table if exists #d, #t1, #t2

create table #d  (DataDate date)
create table #t1 (DataDate date, Value1 float, Value2 float)
create table #t2 (DataDate date, Value3 float, Value4 float)

insert into #d values ('20181201'),('20181202'),('20181203')
insert into #t1 values 
    ('20181201', 3.14, 1.18),
    ('20181201', 3.135, 1.185),
    ('20181202', 3.15, 1.19),
    ('20181203', 3.16, 1.195)

insert into #t2 values 
    ('20181201', 4.14, 2.18),
    ('20181203', 4.15, 2.19),
    ('20181203', 4.1, 2.195)

select #d.DataDate,#t1.Value1,#t1.Value2,#t2.Value3,#t2.Value4
from #d
    left join #t1 on #d.DataDate = #t1.DataDate
    left join #t2 on #d.DataDate = #t2.DataDate

Actual Results

DataDate   Value1  Value2  Value3  Value4
12/1/2018  3.14    1.18    4.14    2.18
12/1/2018  3.135   1.185   4.14    2.18
12/2/2018  3.15    1.19    NULL    NULL
12/3/2018  3.16    1.195   4.15    2.19
12/3/2018  3.16    1.195   4.1     2.195

Desired Results

DataDate    Value1  Value2  Value3  Value4
12/1/2018   3.14    1.18    4.14    2.18
12/1/2018   3.135   1.185   NULL    NULL
12/2/2018   3.15    1.19    NULL    NULL
12/3/2018   3.16    1.195   4.15    2.19
12/3/2018   NULL    NULL    4.1     2.195

Upvotes: 3

Views: 156

Answers (2)

Milan
Milan

Reputation: 340

Here is a proposed solution.
I have added a third table, just to demonstrate that this could be solved for N tables with a common column.

Prepare demo data:

/* Prepare demo objects */
DROP TABLE IF EXISTS #d, #t1, #t2
CREATE TABLE #d  (DataDate date)
CREATE TABLE #t1 (DataDate date, Value1 float, Value2 float)
CREATE TABLE #t2 (DataDate date, Value3 float, Value4 float)
CREATE TABLE #t3 (DataDate date, Value5 float, Value6 float)

/* Insert demo data */
INSERT INTO #d VALUES ('20181201'),('20181202'),('20181203')
INSERT INTO #t1 VALUES 
    ('20181201', 3.14, 1.18),
    ('20181201', 3.135, 1.185),
    ('20181202', 3.15, 1.19),
    ('20181203', 3.16, 1.195)
INSERT INTO #t2 VALUES 
    ('20181201', 4.14, 2.18),
    ('20181203', 4.15, 2.19),
    ('20181203', 4.1, 2.195)
INSERT INTO #t3 VALUES 
    ('20181201', 3.14, 1.18),
    ('20181201', 3.135, 1.185),
    ('20181202', 3.16, 1.195)

Proposed QUERY Solution:

SELECT
    COALESCE(d.DataDate, t1.datadate, t2.datadate, t3.datadate) AS DataDate
    , t1.Value1
    , t1.Value2
    , t2.Value3
    , t2.Value4
    , t3.Value5
    , t3.Value6
FROM
    (SELECT
        *
        , ROW_NUMBER() OVER (PARTITION BY DataDate ORDER BY (SELECT NULL)) AS rn
    FROM #d) AS d
FULL JOIN
    (SELECT
        *
        , ROW_NUMBER() OVER (PARTITION BY DataDate ORDER BY (SELECT NULL)) AS rn
    FROM #t1) AS t1
    ON (t1.DataDate = d.DataDate AND t1.rn = d.rn)
FULL JOIN
    (SELECT
        *
        , ROW_NUMBER() OVER (PARTITION BY datadate ORDER BY (SELECT NULL)) AS rn
    FROM #t2) AS t2
    ON (t2.DataDate = d.DataDate AND t2.rn = d.rn)
    OR (t2.DataDate = t1.DataDate AND t2.rn = t1.rn)
FULL JOIN
    (SELECT
        *
        , ROW_NUMBER() OVER (PARTITION BY datadate ORDER BY (SELECT NULL)) AS rn
    FROM #t3) AS t3
    ON (t3.DataDate = d.DataDate AND t3.rn = d.rn)
    OR (t3.DataDate = t1.DataDate AND t3.rn = t1.rn)
    OR (t3.DataDate = t2.DataDate AND t3.rn = t2.rn)
ORDER BY DataDate;  

Demo fiddle is posted on db<>fiddle here

Results:

DataDate            | Value1 | Value2 | Value3 | Value4 | Value5 | Value6
:------------------ | -----: | -----: | -----: | -----: | -----: | -----:
01/12/2018 00:00:00 |   3.14 |   1.18 |   4.14 |   2.18 |   3.14 |   1.18
01/12/2018 00:00:00 |  3.135 |  1.185 |   null |   null |  3.135 |  1.185
02/12/2018 00:00:00 |   3.15 |   1.19 |   null |   null |   3.16 |  1.195
03/12/2018 00:00:00 |   3.16 |  1.195 |   4.15 |   2.19 |   null |   null
03/12/2018 00:00:00 |   null |   null |    4.1 |  2.195 |   null |   null

Note (optional):

You can greately improve performance by introducing indexes.
As a demo, I have added CLUSTERED INDEXES on DateData column and the preformance increase is significant.

/* Add to improve performance */
CREATE CLUSTERED INDEX CI_DataDate ON #d (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t1 (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t2 (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t3 (DataDate);

Upvotes: 1

Maria Nazari
Maria Nazari

Reputation: 690

Use min if you want the min value (or max depending on what you're looking for) associated by date and t1.value1. Your example, the values are not duplicates so distinct will not work

select #d.DataDate,#t1.Value1,min(#t1.Value2),max(#t2.Value3),min(#t2.Value4)
from #d
    left join #t1 on #d.DataDate = #t1.DataDate
    left join #t2 on #d.DataDate = #t2.DataDate
group by 1,2

if there are exact duplicates that you want to remove then use the following

select distinct #d.DataDate,#t1.Value1,#t1.Value2,#t2.Value3,#t2.Value4
    from #d
        left join #t1 on #d.DataDate = #t1.DataDate
        left join #t2 on #d.DataDate = #t2.DataDate

Upvotes: 0

Related Questions