Reputation: 625
I am going to update a document using upsert_item
function of CosmosDB Python SDK.
Here is a script:
from dotenv import load_dotenv
from azure.cosmos import CosmosClient
import os
import uuid
def update():
load_dotenv()
# Initialize the Cosmos DB client
endpoint = os.getenv("COSMOSDB_ENDPOINT")
key = os.getenv("COSMOSDB_KEY")
client = CosmosClient(endpoint, key)
# Specify your database and container (collection) names
database_name = os.getenv("COSMOSDB_DB")
container_name = os.getenv("COSMOSDB_CONTAINER")
# Retrieve an item by its ID
container = client.get_database_client(database_name).get_container_client(container_name)
id = str(uuid.uuid4())
url = "https://google.com/"
container.create_item(
{
"id": id,
"status": "pending", # => started, analyzing, finished
"url": url,
"categories": ["7149b375-8cb2-4180-ae03-27fd0da409d0"],
"doctype": "url"
}
)
query_text = f"SELECT * FROM c WHERE c.id='{id}'"
query_items_response = container.query_items(query=query_text, enable_cross_partition_query=True)
data = list(query_items_response)
print(len(data)) # returns 1
for item in data:
print(f"Updating {item['url']}")
item["categories"] = []
item["doctype"] = "pdfDocument"
container.upsert_item(item)
updated_query_items_response = container.query_items(query=query_text, enable_cross_partition_query=True)
updated_data = list(updated_query_items_response)
print(updated_data)
print(len(updated_data)) # returns 0
# Confirm created data using CosmosDB data explorer.
if __name__ == "__main__":
update()
After creating a document, I updated it using the upsert_item
method. Sometimes, when I query the data by its ID, I can see the document.
However, in reality, it has been removed. Even after waiting for some time, I am unable to retrieve the document, both through the code and in the Cosmos DB Data Explorer.
And I found a strange problem.
If I change item["doctype"] = "pdfDocument"
to item["doctype"] = "site"
or others, it works as expected.
It is quite strange.
Upvotes: 0
Views: 112
Reputation: 1795
Python CosmosDB upsert_item deletes document
Try with the below script to upsert_item
into an Azure Cosmos DB. It checks the data before update and also after update and retrieve the updated data as shown in the output. As the Cosmos DB operates with eventual consistency by default, and it takes time for updates.
def update():
load_dotenv(dotenv_path="./sss.env")
endpoint = os.getenv("COSMOSDB_ENDPOINT")
key = os.getenv("COSMOSDB_KEY")
client = CosmosClient(endpoint, key)
database_name = os.getenv("COSMOSDB_DB")
container_name = os.getenv("COSMOSDB_CONTAINER")
container = client.get_database_client(database_name).get_container_client(container_name)
id = str(uuid.uuid4())
url = "https://google.com/"
container.create_item(
{
"id": id,
"status": "pending",
"url": url,
"categories": ["7149b375-8cb2-4180-ae03-27fd0da409d0"],
"doctype": "url"
}
)
query_text = f"SELECT * FROM c WHERE c.id='{id}'"
query_items_response = container.query_items(query=query_text, enable_cross_partition_query=True)
data = list(query_items_response)
print(f"Data count before update: {len(data)}")
for item in data:
print(f"Updating item with URL: {item['url']} and doctype: {item['doctype']}")
item["categories"] = []
item["doctype"] = "pdfDocument"
container.upsert_item(item)
print(f"Updated item: {item}")
updated_query_items_response = container.query_items(query=query_text, enable_cross_partition_query=True)
updated_data = list(updated_query_items_response)
print(f"Updated data count: {len(updated_data)}")
print(f"Updated data: {updated_data}")
Output:
Data count before update: 1
Updating item with URL: https://google.com/ and doctype: url
Updated item:
{
'id': '51c8415b-7d68-4f11-a1d6-e133d2d27f17',
'status': 'pending',
'url': 'https://google.com/',
'categories': [],
'doctype': 'pdfDocument'
}
Updated data count: 1
Updated data:
[
{
'id': '51c8415b-7d68-4f11-a1d6-e133d2d27f17',
'status': 'pending',
'url': 'https://google.com/',
'categories': [],
'doctype': 'pdfDocument',
}
]
Upvotes: 0