Reputation: 383
I'm creating a search engine to search for recipes. I have a JSON file that has been loaded into the dictionary recipes. I am trying to count for how many times a specific work in a token has appeared and if so, add one to the counter value. In this case title_c etc adds one when the string is encountered in the dictionary value that corresponds to the 'title' key.
import json
import numpy as np
import string
file = open('recipes.json')
recipes = json.load(file)
def tokenisation(input_string):
#functions to remove digits and punctuation and replace it with whitespace
d_translate = str.maketrans(string.digits, ' '*len(string.digits))
p_translate = str.maketrans(string.punctuation, ' '*len(string.punctuation))
#clean the string
new_string = input_string.translate(d_translate)
new_string = new_string.translate(p_translate)
new_string = new_string.lower()
#split the string
splitted_string = new_string.split(" ")
#make a list to store tokens in
tokens = []
#checking length of token
for token in splitted_string:
if len(token) > 3:
tokens.append(token)
return tokens
def search(query, ordering = 'normal', count = 10):
token_list = tokenisation(query)
for recipe in recipes:
title_c = 0
cat_c = 0
ing_c = 0
dire_c = 0
for token in token_list:
for key, value in recipe.items():
if (token in recipe.values()) and (key == 'title'):
title_c += 1
elif (token in recipe.values()) and (key == 'categories'):
cat_c += 1
elif (token in recipe.values()) and (key == 'ingredients'):
ing_c += 1
elif (token in recipe.values()) and (key == 'directions'):
dire_c += 1
search('cheese!cheddar', 'normal', 10)
At the end the values in the for loop near the end should be non zero but when printing they are. I have made sure that the search query has the tokens that appear in the first "recipe" below:
{
"title": "\"Adult\" Pimiento Cheese ",
"categories": [
"Cheese",
"Vegetable",
"No-Cook",
"Vegetarian",
"Quick & Easy",
"Cheddar",
"Hot Pepper",
"Winter",
"Gourmet",
"Alabama"
],
"ingredients": [
"2 or 3 large garlic cloves",
"a 2-ounce jar diced pimientos",
"3 cups coarsely grated sharp Cheddar (preferably English, Canadian, or Vermont; about 12 ounces)",
"1/3 to 1/2 cup mayonnaise",
"crackers",
"toasted baguette slices",
"crudit\u00e9s"
],
"directions": [
"Force garlic through a garlic press into a large bowl and stir in pimientos with liquid in jar. Add Cheddar and toss mixture to combine well. Stir in mayonnaise to taste and season with freshly ground black pepper. Cheese spread may be made 1 day ahead and chilled, covered. Bring spread to room temperature before serving.",
"Serve spread with accompaniments."
],
"rating": 3.125
}
Upvotes: 0
Views: 63
Reputation: 1077
The first item in the JSON is "title" which is a string. When you call recipe.items()
it would break since strings don't have a items()
to call. Adding a string type check before it allows the code to run successfully.
import json
import numpy as np
import string
file = open('recipes.json')
recipes = json.load(file)
def tokenisation(input_string):
#functions to remove digits and punctuation and replace it with whitespace
d_translate = str.maketrans(string.digits, ' '*len(string.digits))
p_translate = str.maketrans(string.punctuation, ' '*len(string.punctuation))
#clean the string
new_string = input_string.translate(d_translate)
new_string = new_string.translate(p_translate)
new_string = new_string.lower()
#split the string
splitted_string = new_string.split(" ")
#make a list to store tokens in
tokens = []
#checking length of token
for token in splitted_string:
if len(token) > 3:
tokens.append(token)
return tokens
def search(query, ordering = 'normal', count = 10):
token_list = tokenisation(query)
for recipe in recipes:
print(recipe)
title_c = 0
cat_c = 0
ing_c = 0
dire_c = 0
for token in token_list:
if not isinstance(recipe, str): # Make sure we aren't checking the string for items()
for key, value in recipe.items():
if (token in recipe.values()) and (key == 'title'):
title_c += 1
elif (token in recipe.values()) and (key == 'categories'):
cat_c += 1
elif (token in recipe.values()) and (key == 'ingredients'):
ing_c += 1
elif (token in recipe.values()) and (key == 'directions'):
dire_c += 1
search('cheese!cheddar', 'normal', 10)
Upvotes: 1