Reputation: 23
I’m working on a project where I need to validate a list of SKUs (stored in a CSV file) against the SKUs present in my Shopify store. My goal is to determine which SKUs from the CSV file are present in the store and which ones are missing. Here's what I’ve done so far, the issue I’m running into, and the full script I’m using.
skus.csv
) containing a list of SKUs to validate.sku_comparison_results.csv
) showing which SKUs are present or missing.Despite the bulk operation completing successfully, all SKUs from my input CSV file are marked as "Missing," even though I manually verified that some of them exist in the store.
I suspect one of the following could be causing the issue:
import requests
import time
import pandas as pd
SHOPIFY_STORE_DOMAIN = "your-store.myshopify.com"
ACCESS_TOKEN = "your-access-token"
API_URL = f"https://{SHOPIFY_STORE_DOMAIN}/admin/api/2023-01/graphql.json"
def start_bulk_operation():
"""Start a bulk operation to fetch all SKUs."""
headers = {
"Content-Type": "application/json",
"X-Shopify-Access-Token": ACCESS_TOKEN,
}
query = {
"query": """
mutation {
bulkOperationRunQuery(
query: "{ products(first: 100000) { edges { node { variants(first: 100) { edges { node { sku } } } } } } }"
) {
bulkOperation {
id
status
}
userErrors {
field
message
}
}
}
"""
}
response = requests.post(API_URL, json=query, headers=headers)
return response.json()
def get_bulk_operation_status():
"""Poll for the bulk operation status."""
headers = {
"Content-Type": "application/json",
"X-Shopify-Access-Token": ACCESS_TOKEN,
}
query = {
"query": """
{
currentBulkOperation {
id
status
url
}
}
"""
}
while True:
response = requests.post(API_URL, json=query, headers=headers)
data = response.json()
status = data["data"]["currentBulkOperation"]["status"]
print(f"Bulk operation status: {status}")
if status == "COMPLETED":
return data["data"]["currentBulkOperation"]["url"]
elif status == "FAILED":
raise Exception("Bulk operation failed.")
time.sleep(10)
def download_skus(url):
"""Download SKUs from the store."""
response = requests.get(url)
skus = []
for line in response.text.splitlines():
product = eval(line) # Convert JSONL string to dictionary
for variant in product.get("variants", {}).get("edges", []):
sku = variant["node"]["sku"]
if sku: # Skip empty SKUs
skus.append(sku.strip())
return skus
def fetch_store_skus():
print("Starting bulk operation to fetch all SKUs...")
start_bulk_operation()
print("Polling for bulk operation status...")
results_url = get_bulk_operation_status()
print("Downloading SKUs...")
skus = download_skus(results_url)
# Save SKUs to a CSV file
output_file = "store_skus.csv"
pd.DataFrame({"SKU": skus}).to_csv(output_file, index=False)
print(f"All store SKUs saved to {output_file}")
def compare_skus():
# Load SKUs from both files
store_skus_file = "store_skus.csv"
input_skus_file = "skus.csv"
output_file = "sku_comparison_results.csv"
store_skus = pd.read_csv(store_skus_file)["SKU"].tolist()
input_skus = pd.read_csv(input_skus_file)["SKU"].tolist()
# Compare SKUs
results = []
for sku in input_skus:
status = "Present" if sku in store_skus else "Missing"
results.append({"SKU": sku, "Status": status})
# Save results to a new CSV
pd.DataFrame(results).to_csv(output_file, index=False)
print(f"Comparison results saved to {output_file}")
If you’ve worked with the Bulk Operations API or have experience validating large lists of SKUs, I’d greatly appreciate your insights. How can I improve my process or troubleshoot the issue more effectively?
Thanks in advance for your help!
store_skus.csv
).store_skus.csv
with those in my skus.csv
.Upvotes: 0
Views: 27