Su Myat
Su Myat

Reputation: 33

Azure Cognitive Search Still Returns Deleted Files After Using Native Soft Delete

I’m encountering an issue where a file I deleted from my Azure Storage account (using the Containers Explorer) is still appearing in my Azure Cognitive Search results. I’m using the native soft delete method as described in the Azure documentation.

Steps Taken:

Despite these steps, the deleted file still appears in search results. According to the documentation, the deletion detection policy should be effective from the start of the indexer run. However, the file remains indexed and searchable.

Question:

//Function App 
const blobServiceClient = new BlobServiceClient(
  `https://${accountName}.blob.core.windows.net`,
  new StorageSharedKeyCredential(accountName, accountKey)
);

const searchClient = new SearchClient(
  `https://${searchServiceName}.search.windows.net/`,
  indexName,
  new AzureKeyCredential(apiKey)
);

const indexerClient = new SearchIndexerClient(
  `https://${searchServiceName}.search.windows.net/`,
  new AzureKeyCredential(apiKey)
);

app.eventGrid('process-event-grid-us', {
  handler: async (context, eventGridEvent) => {
    try {
      console.log(`Event received: ${JSON.stringify(eventGridEvent)}`);
      const event = eventGridEvent.eventType;
      const blobUrl = eventGridEvent.triggerMetadata.data?.url;
      if (!blobUrl) {
        console.error("Event data does not contain 'url':", eventGridEvent);
        return;
      }

      const blobapi = eventGridEvent.triggerMetadata.data?.api;
      const blobName = blobUrl.substring(blobUrl.lastIndexOf('/') + 1);
      if (blobapi === 'PutBlob') {
        await triggerIndexer();
      } else if (blobapi === 'DeleteBlob') {
        console.log(`to delete blobName : ${blobName}`);
         await triggerIndexer();
      } else {
        console.log(`Unhandled blobapi type: ${blobapi}`);
      }
    } catch (error) {
      console.error(`Error processing event: ${error}`, eventGridEvent);
    }
  }
});

async function triggerIndexer() {
  try {
    console.log(`Indexer "${indexerName}" `);
    console.log(`indexerClient "${indexerClient}" `);
    await indexerClient.runIndexer(indexerName) ;
    console.log(`Indexer "${indexerName}" triggered successfully`);
  } catch (error) {
    console.error(`Error triggering indexer:`, error);
  }
}

module.exports = app;

Index json:

{
  "@odata.context": "https://../$metadata#indexes/$entity",
  "@odata.etag": "\"0x8DCA64CE4DAF9B5\"",
  "name": "aisearchindex",
  "defaultScoringProfile": "",
  "fields": [
    {
      "name": "id",
      "type": "Edm.String",
      "searchable": true,
      "filterable": true,
      "retrievable": true,
      "stored": true,
      "sortable": true,
      "facetable": true,
      "key": true,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "content",
      "type": "Edm.String",
      "searchable": true,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": "standard.lucene",
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "metadata_storage_content_type",
      "type": "Edm.String",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "metadata_storage_size",
      "type": "Edm.Int64",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "metadata_storage_last_modified",
      "type": "Edm.DateTimeOffset",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "metadata_storage_content_md5",
      "type": "Edm.String",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "metadata_storage_name",
      "type": "Edm.String",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "metadata_storage_path",
      "type": "Edm.String",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "metadata_storage_file_extension",
      "type": "Edm.String",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "metadata_content_type",
      "type": "Edm.String",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "metadata_language",
      "type": "Edm.String",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "metadata_author",
      "type": "Edm.String",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "metadata_creation_date",
      "type": "Edm.DateTimeOffset",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "keyphrases",
      "type": "Collection(Edm.String)",
      "searchable": true,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": "standard.lucene",
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "ocr_text",
      "type": "Edm.String",
      "searchable": true,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "image_description",
      "type": "Edm.String",
      "searchable": true,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "image_tags",
      "type": "Collection(Edm.String)",
      "searchable": true,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "merged_text",
      "type": "Edm.String",
      "searchable": true,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": "standard.lucene",
      "searchAnalyzer": "standard.lucene",
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    }
  ],
  "scoringProfiles": [],
  "corsOptions": null,
  "suggesters": [],
  "analyzers": [],
  "normalizers": [],
  "tokenizers": [],
  "tokenFilters": [],
  "charFilters": [],
  "encryptionKey": null,
  "similarity": {
    "@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
    "k1": null,
    "b": null
  },
  "semantic": null,
  "vectorSearch": null
}

Indexer:

{
  "@odata.context": "https://....windows.net/$metadata#indexers/$entity",
  "@odata.etag": "\"0x8DCA622FEEDA31E\"",
  "name": "indexer1721195448068",
  "description": null,
  "dataSourceName": "azureaidatasource1",
  "skillsetName": "skillset1719988983064",
  "targetIndexName": "aisearchindex",
  "disabled": null,
  "schedule": null,
  "parameters": {
    "batchSize": null,
    "maxFailedItems": null,
    "maxFailedItemsPerBatch": null,
    "base64EncodeKeys": null,
    "configuration": {
      "allowSkillsetToReadFileData": true,
      "dataToExtract": "contentAndMetadata",
      "imageAction": "generateNormalizedImages"
    }
  },
  "fieldMappings": [
    {
      "sourceFieldName": "/document/normalized_images/*/ocr_text",
      "targetFieldName": "ocr_text",
      "mappingFunction": null
    },
    {
      "sourceFieldName": "/document/normalized_images/*/image_description",
      "targetFieldName": "image_description",
      "mappingFunction": null
    },
    {
      "sourceFieldName": "/document/normalized_images/*/image_tags",
      "targetFieldName": "image_tags",
      "mappingFunction": null
    },
    {
      "sourceFieldName": "/document/content",
      "targetFieldName": "merged_text",
      "mappingFunction": null
    }
  ],
  "outputFieldMappings": [
    {
      "sourceFieldName": "/document/normalized_images/*/ocr_text",
      "targetFieldName": "ocr_text"
    },
    {
      "sourceFieldName": "/document/normalized_images/*/image_description",
      "targetFieldName": "image_description"
    },
    {
      "sourceFieldName": "/document/normalized_images/*/image_tags",
      "targetFieldName": "image_tags"
    },
    {
      "sourceFieldName": "/document/content",
      "targetFieldName": "merged_text"
    }
  ],
  "cache": null,
  "encryptionKey": null
}

Upvotes: 1

Views: 111

Answers (1)

JayashankarGS
JayashankarGS

Reputation: 8140

According to this documentation the document key must be mapped to either blob property or blob metadata such as metadata_storage_path

  • Document keys for the documents in your index must be mapped to either be a blob property or blob metadata, such as "metadata_storage_path".

So, the solution to your problem is either you create index with key using blob metadata it could metadata_storage_path

Or do the mapping to your index key field with blob metadata like below.

"fieldMappings": [
    {
      "sourceFieldName": "metadata_storage_path",
      "targetFieldName": "id",
      "mappingFunction": {
        "name": "base64Encode",
        "parameters": null
      }
    }
  ],

In my case the document key field is id, same as yours.

Full index definition.

{
  "@odata.context": "https://jgsaiservice.search.windows.net/$metadata#indexers/$entity",
  "@odata.etag": "\"0x8DCB123BA06A6D6\"",
  "name": "indexer1722403974229",
  "description": null,
  "dataSourceName": "jsons",
  "skillsetName": null,
  "targetIndexName": "tempidx",
  "disabled": null,
  "schedule": null,
  "parameters": {
    "batchSize": null,
    "maxFailedItems": null,
    "maxFailedItemsPerBatch": null,
    "base64EncodeKeys": true,
    "configuration": {
      "parsingMode": "json"
    }
  },
  "fieldMappings": [
    {
      "sourceFieldName": "metadata_storage_path",
      "targetFieldName": "id",
      "mappingFunction": {
        "name": "base64Encode",
        "parameters": null
      }
    }
  ],
  "outputFieldMappings": [],
  "cache": null,
  "encryptionKey": null
}

After updating indexer, reset and run it.

Later, you can trigger from function when the files deleted.

Upvotes: 0

Related Questions