Reputation: 33
I’m encountering an issue where a file I deleted from my Azure Storage account (using the Containers Explorer) is still appearing in my Azure Cognitive Search results. I’m using the native soft delete method as described in the Azure documentation.
Steps Taken:
Deleted File: I deleted the file from Azure Blob Storage using the Containers Explorer.
Indexer and Index Configuration: Followed the instructions to set up the native soft delete method. Recreated both the index and indexer as per the guidelines provided. Verified that the deletion detection strategy was applied from the initial indexer run.
Despite these steps, the deleted file still appears in search results. According to the documentation, the deletion detection policy should be effective from the start of the indexer run. However, the file remains indexed and searchable.
Question:
Could the retention period for soft delete be affecting the immediate removal of the file? How does this impact the deletion process and indexing?
Are there additional configurations or steps I might be missing to ensure that files are removed from the index immediately upon deletion?
//Function App
const blobServiceClient = new BlobServiceClient(
`https://${accountName}.blob.core.windows.net`,
new StorageSharedKeyCredential(accountName, accountKey)
);
const searchClient = new SearchClient(
`https://${searchServiceName}.search.windows.net/`,
indexName,
new AzureKeyCredential(apiKey)
);
const indexerClient = new SearchIndexerClient(
`https://${searchServiceName}.search.windows.net/`,
new AzureKeyCredential(apiKey)
);
app.eventGrid('process-event-grid-us', {
handler: async (context, eventGridEvent) => {
try {
console.log(`Event received: ${JSON.stringify(eventGridEvent)}`);
const event = eventGridEvent.eventType;
const blobUrl = eventGridEvent.triggerMetadata.data?.url;
if (!blobUrl) {
console.error("Event data does not contain 'url':", eventGridEvent);
return;
}
const blobapi = eventGridEvent.triggerMetadata.data?.api;
const blobName = blobUrl.substring(blobUrl.lastIndexOf('/') + 1);
if (blobapi === 'PutBlob') {
await triggerIndexer();
} else if (blobapi === 'DeleteBlob') {
console.log(`to delete blobName : ${blobName}`);
await triggerIndexer();
} else {
console.log(`Unhandled blobapi type: ${blobapi}`);
}
} catch (error) {
console.error(`Error processing event: ${error}`, eventGridEvent);
}
}
});
async function triggerIndexer() {
try {
console.log(`Indexer "${indexerName}" `);
console.log(`indexerClient "${indexerClient}" `);
await indexerClient.runIndexer(indexerName) ;
console.log(`Indexer "${indexerName}" triggered successfully`);
} catch (error) {
console.error(`Error triggering indexer:`, error);
}
}
module.exports = app;
Index json:
{
"@odata.context": "https://../$metadata#indexes/$entity",
"@odata.etag": "\"0x8DCA64CE4DAF9B5\"",
"name": "aisearchindex",
"defaultScoringProfile": "",
"fields": [
{
"name": "id",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"stored": true,
"sortable": true,
"facetable": true,
"key": true,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "content",
"type": "Edm.String",
"searchable": true,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": "standard.lucene",
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_storage_content_type",
"type": "Edm.String",
"searchable": false,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_storage_size",
"type": "Edm.Int64",
"searchable": false,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_storage_last_modified",
"type": "Edm.DateTimeOffset",
"searchable": false,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_storage_content_md5",
"type": "Edm.String",
"searchable": false,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_storage_name",
"type": "Edm.String",
"searchable": false,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_storage_path",
"type": "Edm.String",
"searchable": false,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_storage_file_extension",
"type": "Edm.String",
"searchable": false,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_content_type",
"type": "Edm.String",
"searchable": false,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_language",
"type": "Edm.String",
"searchable": false,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_author",
"type": "Edm.String",
"searchable": false,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_creation_date",
"type": "Edm.DateTimeOffset",
"searchable": false,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "keyphrases",
"type": "Collection(Edm.String)",
"searchable": true,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": "standard.lucene",
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "ocr_text",
"type": "Edm.String",
"searchable": true,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "image_description",
"type": "Edm.String",
"searchable": true,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "image_tags",
"type": "Collection(Edm.String)",
"searchable": true,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "merged_text",
"type": "Edm.String",
"searchable": true,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": "standard.lucene",
"searchAnalyzer": "standard.lucene",
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
}
],
"scoringProfiles": [],
"corsOptions": null,
"suggesters": [],
"analyzers": [],
"normalizers": [],
"tokenizers": [],
"tokenFilters": [],
"charFilters": [],
"encryptionKey": null,
"similarity": {
"@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
"k1": null,
"b": null
},
"semantic": null,
"vectorSearch": null
}
Indexer:
{
"@odata.context": "https://....windows.net/$metadata#indexers/$entity",
"@odata.etag": "\"0x8DCA622FEEDA31E\"",
"name": "indexer1721195448068",
"description": null,
"dataSourceName": "azureaidatasource1",
"skillsetName": "skillset1719988983064",
"targetIndexName": "aisearchindex",
"disabled": null,
"schedule": null,
"parameters": {
"batchSize": null,
"maxFailedItems": null,
"maxFailedItemsPerBatch": null,
"base64EncodeKeys": null,
"configuration": {
"allowSkillsetToReadFileData": true,
"dataToExtract": "contentAndMetadata",
"imageAction": "generateNormalizedImages"
}
},
"fieldMappings": [
{
"sourceFieldName": "/document/normalized_images/*/ocr_text",
"targetFieldName": "ocr_text",
"mappingFunction": null
},
{
"sourceFieldName": "/document/normalized_images/*/image_description",
"targetFieldName": "image_description",
"mappingFunction": null
},
{
"sourceFieldName": "/document/normalized_images/*/image_tags",
"targetFieldName": "image_tags",
"mappingFunction": null
},
{
"sourceFieldName": "/document/content",
"targetFieldName": "merged_text",
"mappingFunction": null
}
],
"outputFieldMappings": [
{
"sourceFieldName": "/document/normalized_images/*/ocr_text",
"targetFieldName": "ocr_text"
},
{
"sourceFieldName": "/document/normalized_images/*/image_description",
"targetFieldName": "image_description"
},
{
"sourceFieldName": "/document/normalized_images/*/image_tags",
"targetFieldName": "image_tags"
},
{
"sourceFieldName": "/document/content",
"targetFieldName": "merged_text"
}
],
"cache": null,
"encryptionKey": null
}
Upvotes: 1
Views: 111
Reputation: 8140
According to this documentation the document key must be mapped to either blob property or blob metadata such as metadata_storage_path
- Document keys for the documents in your index must be mapped to either be a blob property or blob metadata, such as "metadata_storage_path".
So, the solution to your problem is either you create index with key using blob metadata it could metadata_storage_path
Or do the mapping to your index key field with blob metadata like below.
"fieldMappings": [
{
"sourceFieldName": "metadata_storage_path",
"targetFieldName": "id",
"mappingFunction": {
"name": "base64Encode",
"parameters": null
}
}
],
In my case the document key field is id
, same as yours.
Full index definition.
{
"@odata.context": "https://jgsaiservice.search.windows.net/$metadata#indexers/$entity",
"@odata.etag": "\"0x8DCB123BA06A6D6\"",
"name": "indexer1722403974229",
"description": null,
"dataSourceName": "jsons",
"skillsetName": null,
"targetIndexName": "tempidx",
"disabled": null,
"schedule": null,
"parameters": {
"batchSize": null,
"maxFailedItems": null,
"maxFailedItemsPerBatch": null,
"base64EncodeKeys": true,
"configuration": {
"parsingMode": "json"
}
},
"fieldMappings": [
{
"sourceFieldName": "metadata_storage_path",
"targetFieldName": "id",
"mappingFunction": {
"name": "base64Encode",
"parameters": null
}
}
],
"outputFieldMappings": [],
"cache": null,
"encryptionKey": null
}
After updating indexer, reset and run it.
Later, you can trigger from function when the files deleted.
Upvotes: 0