Dipanshu
Dipanshu

Reputation: 59

Error while ingesting data from azure append blob to kusto database using an azure data factory

I have an azure append blob(sharing.json) which is of content-type: application/json. I am trying to ingest this into a kusto database with an azure data factory(ADF) but the ingestion is always failing. I get the following error on the output of ADF:

"errors": [
        {
            "Code": 23302,
            "Message": "ErrorCode=KustoWriteFailed,'Type=Microsoft.DataTransfer.Common.Shared.HybridDeliveryException,Message=Write to Kusto failed with following error: 'An error occurred for source: 'DataReader'. Error: '''.,Source=Microsoft.DataTransfer.Runtime.KustoConnector,''Type=Kusto.Ingest.Exceptions.IngestClientException,Message=An error occurred for source: 'DataReader'. Error: '',Source=Kusto.Ingest,'",
            "EventType": 0,
            "Category": 5,
            "Data": {},
            "MsgId": null,
            "ExceptionType": null,
            "Source": null,
            "StackTrace": null,
            "InnerEventInfos": []
        }
    ]

Tried taking help from chatGPT and other online resource but no luck so far.

This is my ADF activity config:

{
    "name": "CopyPipeline_k0h",
    "properties": {
        "activities": [
            {
                "name": "Copy_k0h",
                "type": "Copy",
                "dependsOn": [],
                "policy": {
                    "timeout": "0.12:00:00",
                    "retry": 3,
                    "retryIntervalInSeconds": 30,
                    "secureOutput": false,
                    "secureInput": false
                },
                "userProperties": [
                    {
                        "name": "Source",
                        "value": "sil-xms-load-max-data//sharing.json"
                    },
                    {
                        "name": "Destination",
                        "value": "AggregatedSharingTest_v1"
                    }
                ],
                "typeProperties": {
                    "source": {
                        "type": "JsonSource",
                        "storeSettings": {
                            "type": "AzureBlobStorageReadSettings",
                            "recursive": true,
                            "enablePartitionDiscovery": false
                        },
                        "formatSettings": {
                            "type": "JsonReadSettings"
                        }
                    },
                    "sink": {
                        "type": "AzureDataExplorerSink",
                        "ingestionMappingName": "",
                        "additionalProperties": {
                            "tags": "drop-by:loadtest",
                            "format": "multijson"
                        }
                    },
                    "enableStaging": false,
                    "validateDataConsistency": false,
                    "logSettings": {
                        "enableCopyActivityLog": true,
                        "copyActivityLogSettings": {
                            "logLevel": "Info",
                            "enableReliableLogging": true
                        },
                        "logLocationSettings": {
                            "linkedServiceName": {
                                "referenceName": "LoadTestBlob",
                                "type": "LinkedServiceReference"
                            },
                            "path": "debug-logs"
                        }
                    },
                    "translator": {
                        "type": "TabularTranslator",
                        "mappings": [
                            {
                                "source": {
                                    "path": "$['deviceId']"
                                },
                                "sink": {
                                    "name": "deviceId",
                                    "type": "String"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['tenant']"
                                },
                                "sink": {
                                    "name": "tenant",
                                    "type": "String"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['tagsSerialNo']"
                                },
                                "sink": {
                                    "name": "tagsSerialNo",
                                    "type": "String"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['metricSum']"
                                },
                                "sink": {
                                    "name": "metricSum",
                                    "type": "Int64"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['metricCount']"
                                },
                                "sink": {
                                    "name": "metricCount",
                                    "type": "Int64"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['notMetricCount']"
                                },
                                "sink": {
                                    "name": "notMetricCount",
                                    "type": "Int64"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['timestamp']"
                                },
                                "sink": {
                                    "name": "timestamp",
                                    "type": "DateTime"
                                }
                            }
                        ],
                        "collectionReference": ""
                    }
                },
                "inputs": [
                    {
                        "referenceName": "SourceDataset_k0h",
                        "type": "DatasetReference"
                    }
                ],
                "outputs": [
                    {
                        "referenceName": "DestinationDataset_k0h",
                        "type": "DatasetReference"
                    }
                ]
            }
        ],
        "annotations": [],
        "lastPublishTime": "2023-04-18T11:30:35Z"
    },
    "type": "Microsoft.DataFactory/factories/pipelines"
}

This is the destination dataset config on ADF:

{
    "name": "DestinationDataset_k0h",
    "properties": {
        "linkedServiceName": {
            "referenceName": "LoadTestDump",
            "type": "LinkedServiceReference"
        },
        "annotations": [],
        "type": "AzureDataExplorerTable",
        "schema": [
            {
                "name": "deviceId",
                "type": "string"
            },
            {
                "name": "tenant",
                "type": "string"
            },
            {
                "name": "tagsSerialNo",
                "type": "string"
            },
            {
                "name": "metricSum",
                "type": "long"
            },
            {
                "name": "metricCount",
                "type": "long"
            },
            {
                "name": "notMetricCount",
                "type": "long"
            },
            {
                "name": "timestamp",
                "type": "datetime"
            }
        ],
        "typeProperties": {
            "table": "AggregatedSharingTest_v1"
        }
    },
    "type": "Microsoft.DataFactory/factories/datasets"
}

This is the Azure blob storage config on ADF:

{
    "name": "SourceDataset_k0h",
    "properties": {
        "linkedServiceName": {
            "referenceName": "LoadTestBlob",
            "type": "LinkedServiceReference"
        },
        "annotations": [],
        "type": "Json",
        "typeProperties": {
            "location": {
                "type": "AzureBlobStorageLocation",
                "fileName": "sharing.json",
                "container": "sil-xms-load-max-data"
            }
        },
        "schema": {
            "type": "object",
            "properties": {
                "deviceId": {
                    "type": "string"
                },
                "tenant": {
                    "type": "string"
                },
                "tagsSerialNo": {
                    "type": "string"
                },
                "metricSum": {
                    "type": "integer"
                },
                "metricCount": {
                    "type": "integer"
                },
                "notMetricCount": {
                    "type": "integer"
                },
                "timestamp": {
                    "type": "string"
                }
            }
        }
    },
    "type": "Microsoft.DataFactory/factories/datasets"
}

I have tested both the source and destination connections on azure portal and they look good. Not sure what exactly is going wrong since the pipeline runs and run details shows data read and data written but the data is never available on Kusto table for querying and eventually fails with above error

Upvotes: 1

Views: 427

Answers (1)

Rakesh Govindula
Rakesh Govindula

Reputation: 11514

I tried with your input JSON from storage account and your pipeline JSON and ended up with same error.

enter image description here

In your case, the reason for this error is additionalProperties in the copy activity sink.

When I removed the additionalProperties, I am able to copy the data successfully.

enter image description here

I have 4 rows data in kustos table and you can see two rows inserted from the source using copy activity after removing additonal properties.

enter image description here

Data in target table:

enter image description here

This is my Pipeline JSON for your reference:

{
    "name": "pipeline2",
    "properties": {
        "activities": [
            {
                "name": "Copy data1",
                "type": "Copy",
                "dependsOn": [],
                "policy": {
                    "timeout": "0.12:00:00",
                    "retry": 0,
                    "retryIntervalInSeconds": 30,
                    "secureOutput": false,
                    "secureInput": false
                },
                "userProperties": [
                    {
                        "name": "Source",
                        "value": "data//myjson.json"
                    },
                    {
                        "name": "Destination",
                        "value": "table1"
                    }
                ],
                "typeProperties": {
                    "source": {
                        "type": "JsonSource",
                        "storeSettings": {
                            "type": "AzureBlobFSReadSettings",
                            "recursive": true,
                            "enablePartitionDiscovery": false
                        },
                        "formatSettings": {
                            "type": "JsonReadSettings"
                        }
                    },
                    "sink": {
                        "type": "AzureDataExplorerSink",
                        "ingestionMappingName": ""
                    },
                    "enableStaging": false,
                    "logSettings": {
                        "enableCopyActivityLog": true,
                        "copyActivityLogSettings": {
                            "logLevel": "Info",
                            "enableReliableLogging": true
                        },
                        "logLocationSettings": {
                            "linkedServiceName": {
                                "referenceName": "AzureDataLakeStorage2",
                                "type": "LinkedServiceReference"
                            },
                            "path": "data/debug-logs"
                        }
                    },
                    "translator": {
                        "type": "TabularTranslator",
                        "mappings": [
                            {
                                "source": {
                                    "path": "$['deviceId']"
                                },
                                "sink": {
                                    "name": "deviceId",
                                    "type": "String"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['tenant']"
                                },
                                "sink": {
                                    "name": "tenant",
                                    "type": "Guid"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['tagsSerialNo']"
                                },
                                "sink": {
                                    "name": "tagsSerialNo",
                                    "type": "String"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['metricSum']"
                                },
                                "sink": {
                                    "name": "metricSum",
                                    "type": "Int64"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['metricCount']"
                                },
                                "sink": {
                                    "name": "metricCount",
                                    "type": "Int64"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['notMetricCount']"
                                },
                                "sink": {
                                    "name": "notMetricCount",
                                    "type": "Int64"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['timestamp']"
                                },
                                "sink": {
                                    "name": "timestamp",
                                    "type": "DateTime"
                                }
                            }
                        ],
                        "collectionReference": ""
                    }
                },
                "inputs": [
                    {
                        "referenceName": "Json1",
                        "type": "DatasetReference"
                    }
                ],
                "outputs": [
                    {
                        "referenceName": "AzureDataExplorerTable1",
                        "type": "DatasetReference"
                    }
                ]
            }
        ],
        "annotations": []
    }
}

Upvotes: 1

Related Questions