Reputation: 11
I am writing an Azure skillset and I want to use: Skills.Vision.OcrSkill, Vision.ImageAnalysisSkill, SplitSkill, and AzureOpenAIEmbeddingSkill. I am not sure what the correct order is to use these skills. In my skillset, I first use Skills.Vision.OcrSkill and Vision.ImageAnalysisSkill, then use Text.MergeSkill twice to merge their outputs with the document content. After that, I chunk the output of the previous operation and finally use Skills.Text.AzureOpenAIEmbeddingSkill. Is this logic correct?
{
"@odata.etag": "\"mioetag\"",
"name": "mioskillset",
"skills": [
{
"@odata.type": "#Microsoft.Skills.Util.DocumentExtractionSkill",
"name": "#1",
"description": "It extracts text and metadata from documents and applies OCR to images",
"context": "/document",
"parsingMode": "default",
"dataToExtract": "contentAndMetadata",
"inputs": [
{
"name": "file_data",
"source": "/document/file_data",
"inputs": []
}
],
"outputs": [
{
"name": "content",
"targetName": "extracted_text"
},
{
"name": "normalized_images",
"targetName": "extracted_normalized_images"
}
],
"configuration": {
"imageAction": "generateNormalizedImages",
"[email protected]": "#Int64",
"normalizedImageMaxWidth": 2000,
"[email protected]": "#Int64",
"normalizedImageMaxHeight": 2000
}
},
{
"description": "Extract text (plain and structured) from image.",
"@odata.type": "#Microsoft.Skills.Vision.OcrSkill",
"context": "/document/extracted_normalized_images/*",
"defaultLanguageCode": "en",
"detectOrientation": true,
"inputs": [
{
"name": "image",
"source": "/document/extracted_normalized_images/*"
}
],
"outputs": [
{
"name": "text"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Text.MergeSkill",
"description": "Create merged_text, which includes all the textual representation of each image inserted at the right location in the content field.",
"context": "/document",
"insertPreTag": " ",
"insertPostTag": " ",
"inputs": [
{
"name": "text",
"source": "/document/content"
},
{
"name": "itemsToInsert",
"source": "/document/extracted_normalized_images/*/text"
},
{
"name": "offsets",
"source": "/document/extracted_normalized_images/*/contentOffset"
}
],
"outputs": [
{
"name": "mergedText",
"targetName": "merged_text"
},
{
"name": "mergedOffsets",
"targetName": "first_mergedOffsets"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Vision.ImageAnalysisSkill",
"context": "/document/extracted_normalized_images/*",
"visualFeatures": [
"tags",
"description"
],
"inputs": [
{
"name": "image",
"source": "/document/extracted_normalized_images/*"
}
],
"outputs": [
{
"name": "adult"
},
{
"name": "brands"
},
{
"name": "categories"
},
{
"name": "description"
},
{
"name": "faces"
},
{
"name": "objects"
},
{
"name": "tags"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Text.MergeSkill",
"description": "Create merged_text, which includes all the textual representation of each image inserted at the right location in the content field.",
"context": "/document",
"insertPreTag": " ",
"insertPostTag": " ",
"inputs": [
{
"name": "text",
"source": "/document/merged_test"
},
{
"name": "itemsToInsert",
"source": "/document/extracted_normalized_images/*/description"
},
{
"name": "offsets",
"source": "/document/first_mergedOffsets"
}
],
"outputs": [
{
"name": "mergedText",
"targetName": "final_merged_text"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Text.SplitSkill",
"name": "#2",
"description": "It splits the text into overlapping segments for vectorization, with maximumPageLength set for Ada and textSplitMode configured to avoid breaking pages",
"context": "/document/merged_text",
"defaultLanguageCode": "en",
"textSplitMode": "pages",
"maximumPageLength": 3000,
"pageOverlapLength": 100,
"maximumPagesToTake": 0,
"unit": "characters",
"inputs": [
{
"name": "text",
"source": "/document/final_merged_text",
"inputs": []
}
],
"outputs": [
{
"name": "textItems",
"targetName": "pages"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Text.AzureOpenAIEmbeddingSkill",
"name": "#3",
"description": "It vectorizes the text for semantic search",
"context": "/document/merged_text/pages/*",
"resourceUri": "https://mioservizio.openai.azure.com",
"apiKey": "<redacted>",
"deploymentId": "text-embedding-ada-002",
"dimensions": 1536,
"modelName": "text-embedding-ada-002",
"inputs": [
{
"name": "text",
"source": "/document/merged_text/pages/*",
"inputs": []
}
],
"outputs": [
{
"name": "embedding",
"targetName": "embeddingVec"
}
]
}
],
"indexProjections": {
"selectors": [
{
"targetIndexName": "mioindice",
"parentKeyFieldName": "parent_id",
"sourceContext": "/document/merged_text/pages/*",
"mappings": [
{
"name": "content",
"source": "/document/merged_text/pages/*",
"inputs": []
},
{
"name": "contentVector",
"source": "/document/merged_text/pages/*/embeddingVec",
"inputs": []
},
{
"name": "title",
"source": "/document/metadata_storage_name",
"inputs": []
},
{
"name": "url",
"source": "/document/metadata_storage_path",
"inputs": []
},
{
"name": "filepath",
"source": "/document/metadata_storage_path",
"inputs": []
},
{
"name": "timestamp",
"source": "/document/metadata_storage_last_modified",
"inputs": []
},
{
"name": "chat_id",
"source": "/document/chat_id",
"inputs": []
},
{
"name": "sas_token",
"source": "/document/sas_token",
"inputs": [ ]
}
]
}
],
"parameters": {
"projectionMode": "skipIndexingParentDocuments"
}
}
}
I appreciate any suggestions.
Upvotes: 0
Views: 25