Reputation: 1
I have a deployment script run in Azure as part of a BICEP file. The deployement script sends a API request to create the AISearchService index which is sucessful. The problems is that none of the ressource defined in the request body vectorSearch or semantic element are created when looking at the AISearchService in the Azure Portal. The script will also fail if the index has a field refering to a vectorSearch profile (since the profile does not exist).
I am using the 2024-05-01-preview REST API version as I am trying to use the AIStudio embedding model deployment. My goal is to replicate the portal index & vectorize button using integrated vectorization feature of the AISearchService index.
Request body defined in PS deployment script:
'name' = $indexBaseName;
'fields' = @(
@{ 'name' = 'chunk_id'; 'type' = 'Edm.String'; 'analyzer' = 'keyword'; 'retrievable' = $true; 'searchable' = $true; 'filterable' = $true; 'sortable' = $true; 'facetable' = $true; 'key' = $true };
@{ 'name' = 'parent_id'; 'type' = 'Edm.String'; 'retrievable' = $true; 'searchable' = $true; 'filterable' = $true; 'sortable' = $true; 'facetable' = $true; 'key' = $false };
@{ 'name' = 'chunk'; 'type' = 'Edm.String'; 'retrievable' = $true; 'searchable' = $true; 'filterable' = $true; 'sortable' = $true; 'facetable' = $true; 'key' = $false; };
@{ 'name' = 'title'; 'type' = 'Edm.String'; 'retrievable' = $true; 'searchable' = $true; 'filterable' = $true; 'sortable' = $true; 'facetable' = $true; 'key' = $false; };
@{ 'name' = 'metadata_storage_path'; 'type' = 'Edm.String'; 'retrievable' = $true; 'searchable' = $true; 'filterable' = $true; 'sortable' = $true; 'facetable' = $true; 'key' = $false; };
);
'similarity' = @{
'@odata.type' = '#Microsoft.Azure.Search.BM25Similarity';
'k1' = $null;
'b' = $null;
};
'semantic' = @{
'configurations' = @(
@{
'name' = $indexBaseName + '-semantic-configuration';
'prioritizedFields' = @{
'titleField' = @{ 'fieldName' = 'title';};
'prioritizedContentFields' = @( @{ 'fieldName' = 'content';}; );
};
};
);
};
'vectorSearch' = @{
'profiles' = @(
@{
'name' = $indexBaseName+'profile';
'algorithm' = $indexBaseName+'-algorithm';
};
);
'algorithms' = @(
@{
'name' = $indexBaseName+'-algorithm';
'kind' = 'hnsw';
'hnswParameters' = @{
'metric' = 'cosine';
'm' = 4;
'efConstruction' = 400;
'efSearch' = 500;
};
};
);
'compressions' = @(
@{
'name' = $indexBaseName+'-compression';
'kind' = 'scalarQuantization';
'rerankWithOriginalVectors' = $true;
'defaultOversampling' = 10.0;
'scalarQuantizationParameters' = @{
'quantizedDataType' = 'int8';
};
};
);
'vectorizers' = @(
@{
'name' = $indexBaseName+'-vectorizer';
'kind' = 'aml';
'amlParameters' = @{
'key' = 'XXXXX';
'modelName' = 'text-embedding-ada-002';
'uri' = $aiServiceRessourceId;
};
};
);
};
};
Deployment script output:
Setup Azure Search Service rag-rfp-searchservice starting...
Using Ai Service resource id https://canadaeast.api.cognitive.microsoft.com/openai/deployments/text-embedding-ada-002/embeddings?api-version=2023-05-15
Shoul match format subscriptions/{guid}/resourceGroups/{resource-group-name}/Microsoft.MachineLearningServices/workspaces/{workspace-name}/onlineendpoints/{endpoint_name}
[33;1mWARNING: Upcoming breaking changes in the cmdlet 'Get-AzAccessToken' :
The Token property of the output type will be changed from String to SecureString. Add the [-AsSecureString] switch to avoid the impact of this upcoming breaking change.
- The change is expected to take effect in Az version : '13.0.0'
- The change is expected to take effect in Az.Accounts version : '4.0.0'
Note : Go to https://aka.ms/azps-changewarnings for steps to suppress this breaking change warning, and other information on breaking changes in Azure PowerShell.[0m
Data source type: azureblob
Creating Index azureblob-vectorized-index
Creating Index System.Collections.Hashtable
[33;1mWARNING: Resulting JSON is truncated as serialization has exceeded the set depth of 2.[0m
[32;1mStatusCode : [0m201
[32;1mStatusDescription : [0mCreated
[32;1mContent : [0m{"@odata.context":"https://rag-rfp-searchservice.search.win
[32;1m[0mdows.net/$metadata#indexes/$entity","@odata.etag":"\"0x8DCC
[32;1m[0m3917D225648\"","name":"azureblob-vectorized-index","default
[32;1m[0mScoringProfile":null,"f…
[32;1mRawContent : [0mHTTP/1.1 201 Created
[32;1m [0mTransfer-Encoding: chunked
[32;1m [0mETag: "0x8DCC3917D225648"
[32;1m [0mLocation: https://rag-rfp-searchservice.search.windows.net/
[32;1m[0mindexes('azureblob-vectorized-index')?api-version=2024-05-0
[32;1m[0m1-previe…
[32;1mHeaders : [0m{[Transfer-Encoding, System.String[]], [ETag,
[32;1m[0mSystem.String[]], [Location, System.String[]], [Server,
[32;1m[0mSystem.String[]]…}
[32;1mImages : [0m{}
[32;1mInputFields : [0m{}
[32;1mLinks : [0m{}
[32;1mRawContentLength : [0m2188
[32;1mRelationLink : [0m{}
JSON of search index whne viewed in portal:
"@odata.context": "https://rag-rfp-searchservice.search.windows.net/$metadata#indexes/$entity",
"@odata.etag": "\"0x8DCC3917D225648\"",
"name": "azureblob-vectorized-index",
"defaultScoringProfile": null,
"fields": [
{
"name": "chunk_id",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"stored": true,
"sortable": true,
"facetable": true,
"key": true,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": "keyword",
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "parent_id",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"stored": true,
"sortable": true,
"facetable": true,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "chunk",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"stored": true,
"sortable": true,
"facetable": true,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "title",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"stored": true,
"sortable": true,
"facetable": true,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_storage_path",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"stored": true,
"sortable": true,
"facetable": true,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
}
],
"scoringProfiles": [],
"corsOptions": null,
"suggesters": [],
"analyzers": [],
"normalizers": [],
"tokenizers": [],
"tokenFilters": [],
"charFilters": [],
"encryptionKey": null,
"similarity": {
"@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
"k1": null,
"b": null
},
"semantic": {
"defaultConfiguration": null,
"configurations": []
},
"vectorSearch": {
"algorithms": [],
"profiles": [],
"vectorizers": [],
"compressions": []
}
}
Here is the result when using an index with an index field using a vectorSearchProfile.
Request body defined in PS deployment script:
'name' = $indexBaseName2;
'fields' = @(
@{ 'name' = 'chunk_id'; 'type' = 'Edm.String'; 'analyzer' = 'keyword'; 'retrievable' = $true; 'searchable' = $true; 'filterable' = $true; 'sortable' = $true; 'facetable' = $true; 'key' = $true };
@{ 'name' = 'parent_id'; 'type' = 'Edm.String'; 'retrievable' = $true; 'searchable' = $true; 'filterable' = $true; 'sortable' = $true; 'facetable' = $true; 'key' = $false };
@{ 'name' = 'chunk'; 'type' = 'Edm.String'; 'retrievable' = $true; 'searchable' = $true; 'filterable' = $true; 'sortable' = $true; 'facetable' = $true; 'key' = $false };
@{ 'name' = 'title'; 'type' = 'Edm.String'; 'retrievable' = $true; 'searchable' = $true; 'filterable' = $true; 'sortable' = $true; 'facetable' = $true; 'key' = $false };
@{ 'name' = 'metadata_storage_path'; 'type' = 'Edm.String'; 'retrievable' = $true; 'searchable' = $true; 'filterable' = $true; 'sortable' = $true; 'facetable' = $true; 'key' = $false };
@{ 'name' = 'text_vector'; 'type' = 'Collection(Edm.Single)'; 'dimensions'= 1536; 'vectorSearchProfile' = $indexBaseName2+'profile'; 'retrievable' = $true; 'searchable' = $true };
);
'similarity' = @{
'@odata.type' = '#Microsoft.Azure.Search.BM25Similarity';
'k1' = $null;
'b' = $null;
};
'semantic' = @{
'configurations' = @(
@{
'name' = $indexBaseName2 + '-semantic-configuration';
'prioritizedFields' = @{
'titleField' = @{ 'fieldName' = 'title'};
'contentFields' = @( @{ 'fieldName' = 'chunk'} );
};
};
);
'defaultConfiguration' = $indexBaseName2 + '-semantic-configuration';
};
'vectorSearch' = @{
'algorithms' = @(
@{
'name' = $indexBaseName2+'-algorithm';
'kind' = 'hnsw';
'hnswParameters' = @{
'metric' = 'cosine';
'm' = 4;
'efConstruction' = 400;
'efSearch' = 500;
};
};
);
'vectorizers' = @(
@{
'name' = $indexBaseName2+'-vectorizer';
'kind' = 'aml';
'amlParameters' = @{
'key' = 'XXXXX';
'modelName' = 'text-embedding-ada-002';
'uri' = $aiServiceRessourceId;
};
};
);
'profiles' = @(
@{
'name' = $indexBaseName2+'profile';
'algorithm' = $indexBaseName2+'-algorithm';
'vectorizer' = $indexBaseName2+'-vectorizer';
};
);
};
};
Deployment script output:
Setup Azure Search Service rag-rfp-searchservice starting...
Using Ai Service resource id https://canadaeast.api.cognitive.microsoft.com/openai/deployments/text-embedding-ada-002/embeddings?api-version=2023-05-15
Shoul match format subscriptions/{guid}/resourceGroups/{resource-group-name}/Microsoft.MachineLearningServices/workspaces/{workspace-name}/onlineendpoints/{endpoint_name}
[33;1mWARNING: Upcoming breaking changes in the cmdlet 'Get-AzAccessToken' :
The Token property of the output type will be changed from String to SecureString. Add the [-AsSecureString] switch to avoid the impact of this upcoming breaking change.
- The change is expected to take effect in Az version : '13.0.0'
- The change is expected to take effect in Az.Accounts version : '4.0.0'
Note : Go to https://aka.ms/azps-changewarnings for steps to suppress this breaking change warning, and other information on breaking changes in Azure PowerShell.[0m
Data source type: azureblob
Creating Index azureblob-vectorized-index-2
Creating Index System.Collections.Hashtable
[33;1mWARNING: Resulting JSON is truncated as serialization has exceeded the set depth of 2.[0m
[33;1mWARNING: Resulting JSON is truncated as serialization has exceeded the set depth of 2.[0m
Upvotes: 0
Views: 142
Reputation: 3639
According to MSDOC the index describes and contains all data that can be searched. Every data field you want to search gets its own field in the search index. Each field also has a data type and different properties – as if the field is sortable or facetable. The index reflects the data you want to search without the unnecessary fields you do not need and is precisely customized for your search case.
Example request structure for a search index as below in vector search or semantic resources:
{
"name": "my-index",
"fields": [
// define your fields here
],
"vectorSearch": {
"algorithmConfigurations": [
{
"name": "my-hnsw-algorithm",
"kind": "hnsw"
// additional parameters
}
]
},
"semantic": {
"configurations": [
{
"name": "default",
"prioritizedFields": {
"titleField": {"fieldName": "title"},
"contentFields": [{"fieldName": "content"}],
"keywordsField": {"fieldName": "keywords"}
}
}
]
}
}
Refer this MSDOC to Create Or Update , semantic capabilities and for Configuring a vectorizer using REST API
Problem elements are not created (e.g. vectorSearch profiles/vectorizer/algorithms/compression can be resloved adding vectorSearchProfile to the fileds like this
@{
name = "chunk"
type = "Collection(Edm.Single)"
searchable = $true
retrievable = $true
dimensions = 1536
vectorSearchProfile = "my-vector-profile"
}
and configuring the vectorSearch with algorithms and profiles.
For semantic configuration must have following fileds : Title field,Content fields and Keyword fields .Refer to this MSDOC for more details about Configuring semantic in Azure AI Search.
In below example Title field is Hotel name ,Content fields is Description and Keyword fields is Tags.
I refer this MSDOC to vector search in Azure AI Search . Refer to this MSDOC to integrate vectorization for Azure AI Search and Data Plane.
The below PowerShell script creates an index for Azure Cognitive Search. The sample index is designed for a vector search capabilities and semantic configurations.
$body = @{
name = $indexName
fields = @(
@{
name = "HotelId"
type = "Edm.String"
searchable = $false
filterable = $true
retrievable = $true
sortable = $false
facetable = $false
key = $true
},
@{
name = "HotelName"
type = "Edm.String"
searchable = $true
filterable = $false
retrievable = $true
sortable = $true
facetable = $false
},
@{
name = "HotelNameVector"
type = "Collection(Edm.Single)"
searchable = $true
retrievable = $true
dimensions = 1536
vectorSearchProfile = "my-vector-profile"
},
@{
name = "Description"
type = "Edm.String"
searchable = $true
filterable = $false
retrievable = $true
sortable = $false
facetable = $false
},
@{
name = "DescriptionVector"
type = "Collection(Edm.Single)"
searchable = $true
retrievable = $true
dimensions = 1536
vectorSearchProfile = "my-vector-profile"
},
@{
name = "Category"
type = "Edm.String"
searchable = $true
filterable = $true
retrievable = $true
sortable = $true
facetable = $true
},
@{
name = "Tags"
type = "Collection(Edm.String)"
searchable = $true
filterable = $true
retrievable = $true
sortable = $false
facetable = $true
},
@{
name = "Address"
type = "Edm.ComplexType"
fields = @(
@{
name = "City"
type = "Edm.String"
searchable = $true
filterable = $true
retrievable = $true
sortable = $true
facetable = $true
},
@{
name = "StateProvince"
type = "Edm.String"
searchable = $true
filterable = $true
retrievable = $true
sortable = $true
facetable = $true
}
)
},
@{
name = "Location"
type = "Edm.GeographyPoint"
searchable = $false
filterable = $true
retrievable = $true
sortable = $true
facetable = $false
}
)
vectorSearch = @{
algorithms = @(
@{
name = "my-hnsw-vector-config-1"
kind = "hnsw"
hnswParameters = @{
m = 4
efConstruction = 400
efSearch = 500
metric = "cosine"
}
},
@{
name = "my-hnsw-vector-config-2"
kind = "hnsw"
hnswParameters = @{
m = 4
metric = "euclidean"
}
},
@{
name = "my-eknn-vector-config"
kind = "exhaustiveKnn"
exhaustiveKnnParameters = @{
metric = "cosine"
}
}
)
profiles = @(
@{
name = "my-vector-profile"
algorithm = "my-hnsw-vector-config-1"
}
)
}
// Add your configuration options on how to vectorize text vector queries.
semantic = @{
configurations = @(
@{
name = "my-semantic-config"
prioritizedFields = @{
titleField = @{
fieldName = "HotelName"
}
prioritizedContentFields = @(
@{
fieldName = "Description"
}
)
prioritizedKeywordsFields = @(
@{
fieldName = "Tags"
}
)
}
}
)
}
} | ConvertTo-Json -Depth 10
$response = Invoke-RestMethod -Method Post -Uri
$response
Upvotes: 0