Reputation: 39
I have a source as an Excel. How to load data from excel to SQL DB using Azure Data Factory?
Upvotes: 0
Views: 2003
Reputation: 1145
There are many ways to do this, but I will show you what I believe to be the simplest method.
Please refer to the screenshots and comments below for a walkthrough (some things like getting the sheetname aren't currently possible in ADF natively without using an Azure Function or something, but I covered as much as I could. If the excel is named the same as the sheet that could be used instead, that is an option and what I did in this example):
Overall Pipeline View
First Step Truncating the Stage Table to ensure it's empty prior to loading
2nd Step Load the Stage table with file input
Last Step is to run the stored procedure
Here is the code for the stored procedure I made below, plus a screenshot of the ADF stored Procedure activity:
create procedure stag.ttt_test (@ExcelFileName varchar(100))
as
DECLARE @ClassStartRow int
select @ClassStartRow = max(RowId)
from (
select
case when Details = '2.Class' then ROW_NUMBER() over (order by %%physloc%%) else 0 end as RowId
from Stag.ttt_test_stage
) as sub
;
INSERT INTO Stag.ttt_test_final
select
Case
when January is NOT NULL then DATEADD(day, -1, cast('2/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when February is NOT NULL then DATEADD(day, -1, cast('3/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when March is NOT NULL then DATEADD(day, -1, cast('4/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when April is NOT NULL then DATEADD(day, -1, cast('5/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when May is NOT NULL then DATEADD(day, -1, cast('6/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when June is NOT NULL then DATEADD(day, -1, cast('7/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when July is NOT NULL then DATEADD(day, -1, cast('8/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when August is NOT NULL then DATEADD(day, -1, cast('9/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when September is NOT NULL then DATEADD(day, -1, cast('10/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when October is NOT NULL then DATEADD(day, -1, cast('11/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when November is NOT NULL then DATEADD(day, -1, cast('12/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when December is NOT NULL then DATEADD(day, -1, cast('1/1/' + cast(YEAR(getdate()) + 1 as varchar(4)) as date))
end as [Date],
'name' as Category,
Details as [Type],
Coalesce(January, February, March, April, May, June, July, August, September, October, November, December) as [value],
@ExcelFileName as SheetName
from (
select
ROW_NUMBER() over (order by %%physloc%%) as RowId,
st.*
from Stag.ttt_test_stage as st
) as sub
where RowId > 1
and RowId < @ClassStartRow
;
INSERT INTO stag.ttt_test_final
select
Case
when January is NOT NULL then DATEADD(day, -1, cast('2/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when February is NOT NULL then DATEADD(day, -1, cast('3/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when March is NOT NULL then DATEADD(day, -1, cast('4/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when April is NOT NULL then DATEADD(day, -1, cast('5/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when May is NOT NULL then DATEADD(day, -1, cast('6/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when June is NOT NULL then DATEADD(day, -1, cast('7/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when July is NOT NULL then DATEADD(day, -1, cast('8/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when August is NOT NULL then DATEADD(day, -1, cast('9/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when September is NOT NULL then DATEADD(day, -1, cast('10/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when October is NOT NULL then DATEADD(day, -1, cast('11/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when November is NOT NULL then DATEADD(day, -1, cast('12/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when December is NOT NULL then DATEADD(day, -1, cast('1/1/' + cast(YEAR(getdate()) + 1 as varchar(4)) as date))
end as [Date],
'class' as Category,
Details as [Type],
Coalesce(January, February, March, April, May, June, July, August, September, October, November, December) as [value],
@ExcelFileName as SheetName
from (
select
ROW_NUMBER() over (order by %%physloc%%) as RowId,
st.*
from Stag.ttt_test_stage as st
) as sub
where RowId > @ClassStartRow
Please accept as answer if this works for you and let me know if you have any other comments or issues. Thanks
Upvotes: 1