Reputation: 71
At the moment, we have a pipeline to bring in data from On Premise SQL Server to Data Lakes to the Azure Dedicated SQL Pool. There's about 100 tables and it's very tedious (even using a For Each Activity) to manage/edit the pipeline in the Synapse workspace.
Is there a way to do this process using pure SQL? I did see some syntax for COPY INTO and CTAS.
Upvotes: 0
Views: 699
Reputation: 5297
AFAIK cop data activity is an activity in azure data factory or azure synapse to copy data from source to destination, we cannot create it using SQL script. But if you want to copy data from on-premises to SQL dedicated pool first save the data as flat file using BCP or SSIS and upload it azure data lake or copy them using ADF pipeline to data lake. In synapse dedicated pool create scoped credential for data lake access using below code
create database scoped credential creds
with identity ='Access-key',
secret ='accessKey'
create external data source below code:
CREATE EXTERNAL DATA SOURCE [ExternalDataSource1] WITH
(
TYPE = HADOOP,
LOCATION = 'abfss://[email protected]',
credential = creds
)
Create External file format using below code:
CREATE EXTERNAL FILE FORMAT [ExternalFileFormat] WITH
(
FORMAT_TYPE = DELIMITEDTEXT
)
Create a external table with external data source and file format code:
CREATE EXTERNAL TABLE [schema].[externalTablename]
(
<requred columns>
)
WITH
(
LOCATION = '<fileName>',
DATA_SOURCE = [ExternalDataSourceName],
FILE_FORMAT = [ExternalFileFormatName]
)
Crate a new table and copy extarnal table data to it using CTAS
CREATE TABLE [schema].[tableName]
WITH
(
DISTRIBUTION = REPLICATE,
CLUSTERED COLUMNSTORE INDEX
)
AS
SELECT * FROM [schema].[externalTablename]]
OPTION (LABEL = 'CTAS : Load [schema].[externalTablename]');
It will copy successfully. If you want more information regarding CTAS you can refer this.
But we can use this approach for less number of tables for more number tables like 100 of tables we need to create a stored procedure by following above procedure and execute that in Azure data factory or logic apps tools to control it. But it will become more complex to do that. you can create pipeline and use bulk insert option in copy data activity in Azure data factory to copy more number of tables from on-premises to sql dedicated pool using below pipeline
Use self-hosted integration run time to connect with on-premises sql server. Here is pipeline JSON
{
"name": "pipeline1",
"properties": {
"activities": [
{
"name": "Lookup1",
"type": "Lookup",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "SqlServerSource",
"sqlReaderQuery": "SELECT TABLE_SCHEMA, TABLE_NAME from INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE' AND TABLE_SCHEMA= 'dbo'",
"queryTimeout": "02:00:00",
"partitionOption": "None"
},
"dataset": {
"referenceName": "SqlServerTable1",
"type": "DatasetReference"
},
"firstRowOnly": false
}
},
{
"name": "ForEach1",
"type": "ForEach",
"dependsOn": [
{
"activity": "Lookup1",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"items": {
"value": "@activity('Lookup1').output.value",
"type": "Expression"
},
"isSequential": true,
"activities": [
{
"name": "Copy data1",
"type": "Copy",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "SqlServerSource",
"sqlReaderQuery": {
"value": "SELECT * FROM [@{item().TABLE_SCHEMA}].[@{item().TABLE_NAME}]",
"type": "Expression"
},
"queryTimeout": "02:00:00",
"partitionOption": "None"
},
"sink": {
"type": "SqlDWSink",
"preCopyScript": "SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.TABLES WHERE TABLE_TYPE = 'BASE TABLE' and TABLE_SCHEMA = 'dbo' ",
"writeBehavior": "Insert",
"sqlWriterUseTableLock": false,
"tableOption": "autoCreate",
"disableMetricsCollection": false
},
"enableStaging": true,
"stagingSettings": {
"linkedServiceName": {
"referenceName": "AzureDataLakeStorage1",
"type": "LinkedServiceReference"
},
"path": "synfiles"
},
"translator": {
"type": "TabularTranslator",
"typeConversion": true,
"typeConversionSettings": {
"allowDataTruncation": true,
"treatBooleanAsNumber": false
}
}
},
"inputs": [
{
"referenceName": "SqlServerTable1",
"type": "DatasetReference"
}
],
"outputs": [
{
"referenceName": "AzureSynapseAnalyticsTable1",
"type": "DatasetReference",
"parameters": {
"DWSchema": {
"value": "@item().TABLE_NAME",
"type": "Expression"
},
"DWTableName": {
"value": "@item().TABLE_SCHEMA",
"type": "Expression"
}
}
}
]
}
]
}
}
],
"annotations": []
}
}
Upvotes: 0