Reputation: 12355
I'm developing an ETL with Azure Data Factory v1 (unfortunately I can't use Azure Data Factory v2).
I want to read all .csv files from a given blob storage container, then write the content of each file to a table in SQL Azure.
The destination table contains all the columns from the csv file. It must also contain a new column with the name of the file where the data come from.
This is where I'm stuck: I can't find a way to pass the file name from the source dataset (.csv file from blob storage source) to the destination dataset (Sql Azure sink).
I already implemented a pipeline that reads a file from blob storage and saves it to a table in SQL Azure.
Here is an excerpt from the json that copies a single file to SQL Azure:
{
"name": "pipelineFileImport",
"properties": {
"activities": [
{
"type": "Copy",
"typeProperties": {
"source": {
"type": "BlobSource",
"recursive": false
},
"sink": {
"type": "SqlSink",
"writeBatchSize": 0,
"writeBatchTimeout": "00:00:00"
},
"translator": {
"type": "TabularTranslator",
"columnMappings": "TypeOfRecord:TypeOfRecord,TPMType:TPMType,..."
}
},
"inputs": [
{
"name": "InputDataset-cn0"
}
],
"outputs": [
{
"name": "OutputDataset-cn0"
}
],
"policy": {
"timeout": "1.00:00:00",
"concurrency": 1,
"executionPriorityOrder": "NewestFirst",
"style": "StartOfInterval",
"retry": 3,
"longRetry": 0,
"longRetryInterval": "00:00:00"
},
"scheduler": {
"frequency": "Day",
"interval": 1
},
"name": "Activity-0-pipelineFileImport_csv->[staging]_[Files]"
}
],
"start": "2018-07-20T09:50:55.486Z",
"end": "2018-07-20T09:50:55.486Z",
"isPaused": false,
"hubName": "test_hub",
"pipelineMode": "OneTime",
"expirationTime": "3.00:00:00",
"datasets": [
{
"name": "InputDataset-cn0",
"properties": {
"structure": [
{
"name": "TypeOfRecord",
"type": "String"
},
{
"name": "TPMType",
"type": "String"
},
...
],
"published": false,
"type": "AzureBlob",
"linkedServiceName": "Source-TestBlobStorage",
"typeProperties": {
"fileName": "testFile001.csv",
"folderPath": "fileinput",
"format": {
"type": "TextFormat",
"columnDelimiter": ";",
"firstRowAsHeader": true
}
},
"availability": {
"frequency": "Day",
"interval": 1
},
"external": true,
"policy": {}
}
},
{
"name": "OutputDataset-cn0",
"properties": {
"structure": [
{
"name": "TypeOfRecord",
"type": "String"
},
{
"name": "TPMType",
"type": "String"
},...
],
"published": false,
"type": "AzureSqlTable",
"linkedServiceName": "Destination-SQLAzure-cn0",
"typeProperties": {
"tableName": "[staging].[Files]"
},
"availability": {
"frequency": "Day",
"interval": 1
},
"external": false,
"policy": {}
}
}
]
}
}
I need a way to pass the name of the source file to the destination dataset in order to write it in the SQL Azure database.
Upvotes: 0
Views: 1522
Reputation: 2363
No native way to handle this. But I think you could use a stored procedure to achieve this.
Please reference for stored procedure property. https://learn.microsoft.com/en-us/azure/data-factory/v1/data-factory-azure-sql-connector#copy-activity-properties
Upvotes: 1