Reputation: 39
I need to join multiple tables in SQL Server with a common column dates but I want to avoid repeating the values from the different tables when merge.
drop table if exists #d, #t1, #t2
create table #d (DataDate date)
create table #t1 (DataDate date, Value1 float, Value2 float)
create table #t2 (DataDate date, Value3 float, Value4 float)
insert into #d values ('20181201'),('20181202'),('20181203')
insert into #t1 values
('20181201', 3.14, 1.18),
('20181201', 3.135, 1.185),
('20181202', 3.15, 1.19),
('20181203', 3.16, 1.195)
insert into #t2 values
('20181201', 4.14, 2.18),
('20181203', 4.15, 2.19),
('20181203', 4.1, 2.195)
select #d.DataDate,#t1.Value1,#t1.Value2,#t2.Value3,#t2.Value4
from #d
left join #t1 on #d.DataDate = #t1.DataDate
left join #t2 on #d.DataDate = #t2.DataDate
Actual Results
DataDate Value1 Value2 Value3 Value4
12/1/2018 3.14 1.18 4.14 2.18
12/1/2018 3.135 1.185 4.14 2.18
12/2/2018 3.15 1.19 NULL NULL
12/3/2018 3.16 1.195 4.15 2.19
12/3/2018 3.16 1.195 4.1 2.195
Desired Results
DataDate Value1 Value2 Value3 Value4
12/1/2018 3.14 1.18 4.14 2.18
12/1/2018 3.135 1.185 NULL NULL
12/2/2018 3.15 1.19 NULL NULL
12/3/2018 3.16 1.195 4.15 2.19
12/3/2018 NULL NULL 4.1 2.195
Upvotes: 3
Views: 156
Reputation: 340
Here is a proposed solution.
I have added a third table, just to demonstrate that this could be solved for N tables with a common column.
Prepare demo data:
/* Prepare demo objects */
DROP TABLE IF EXISTS #d, #t1, #t2
CREATE TABLE #d (DataDate date)
CREATE TABLE #t1 (DataDate date, Value1 float, Value2 float)
CREATE TABLE #t2 (DataDate date, Value3 float, Value4 float)
CREATE TABLE #t3 (DataDate date, Value5 float, Value6 float)
/* Insert demo data */
INSERT INTO #d VALUES ('20181201'),('20181202'),('20181203')
INSERT INTO #t1 VALUES
('20181201', 3.14, 1.18),
('20181201', 3.135, 1.185),
('20181202', 3.15, 1.19),
('20181203', 3.16, 1.195)
INSERT INTO #t2 VALUES
('20181201', 4.14, 2.18),
('20181203', 4.15, 2.19),
('20181203', 4.1, 2.195)
INSERT INTO #t3 VALUES
('20181201', 3.14, 1.18),
('20181201', 3.135, 1.185),
('20181202', 3.16, 1.195)
Proposed QUERY Solution:
SELECT
COALESCE(d.DataDate, t1.datadate, t2.datadate, t3.datadate) AS DataDate
, t1.Value1
, t1.Value2
, t2.Value3
, t2.Value4
, t3.Value5
, t3.Value6
FROM
(SELECT
*
, ROW_NUMBER() OVER (PARTITION BY DataDate ORDER BY (SELECT NULL)) AS rn
FROM #d) AS d
FULL JOIN
(SELECT
*
, ROW_NUMBER() OVER (PARTITION BY DataDate ORDER BY (SELECT NULL)) AS rn
FROM #t1) AS t1
ON (t1.DataDate = d.DataDate AND t1.rn = d.rn)
FULL JOIN
(SELECT
*
, ROW_NUMBER() OVER (PARTITION BY datadate ORDER BY (SELECT NULL)) AS rn
FROM #t2) AS t2
ON (t2.DataDate = d.DataDate AND t2.rn = d.rn)
OR (t2.DataDate = t1.DataDate AND t2.rn = t1.rn)
FULL JOIN
(SELECT
*
, ROW_NUMBER() OVER (PARTITION BY datadate ORDER BY (SELECT NULL)) AS rn
FROM #t3) AS t3
ON (t3.DataDate = d.DataDate AND t3.rn = d.rn)
OR (t3.DataDate = t1.DataDate AND t3.rn = t1.rn)
OR (t3.DataDate = t2.DataDate AND t3.rn = t2.rn)
ORDER BY DataDate;
Demo fiddle is posted on db<>fiddle here
Results:
DataDate | Value1 | Value2 | Value3 | Value4 | Value5 | Value6 :------------------ | -----: | -----: | -----: | -----: | -----: | -----: 01/12/2018 00:00:00 | 3.14 | 1.18 | 4.14 | 2.18 | 3.14 | 1.18 01/12/2018 00:00:00 | 3.135 | 1.185 | null | null | 3.135 | 1.185 02/12/2018 00:00:00 | 3.15 | 1.19 | null | null | 3.16 | 1.195 03/12/2018 00:00:00 | 3.16 | 1.195 | 4.15 | 2.19 | null | null 03/12/2018 00:00:00 | null | null | 4.1 | 2.195 | null | null
Note (optional):
You can greately improve performance by introducing indexes.
As a demo, I have added CLUSTERED INDEXES on DateData column and the preformance increase is significant.
/* Add to improve performance */
CREATE CLUSTERED INDEX CI_DataDate ON #d (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t1 (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t2 (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t3 (DataDate);
Upvotes: 1
Reputation: 690
Use min if you want the min value (or max depending on what you're looking for) associated by date and t1.value1. Your example, the values are not duplicates so distinct will not work
select #d.DataDate,#t1.Value1,min(#t1.Value2),max(#t2.Value3),min(#t2.Value4)
from #d
left join #t1 on #d.DataDate = #t1.DataDate
left join #t2 on #d.DataDate = #t2.DataDate
group by 1,2
if there are exact duplicates that you want to remove then use the following
select distinct #d.DataDate,#t1.Value1,#t1.Value2,#t2.Value3,#t2.Value4
from #d
left join #t1 on #d.DataDate = #t1.DataDate
left join #t2 on #d.DataDate = #t2.DataDate
Upvotes: 0