Nhyi
Nhyi

Reputation: 383

Not successfully reading a string in a dictionary value?

I'm creating a search engine to search for recipes. I have a JSON file that has been loaded into the dictionary recipes. I am trying to count for how many times a specific work in a token has appeared and if so, add one to the counter value. In this case title_c etc adds one when the string is encountered in the dictionary value that corresponds to the 'title' key.

import json
import numpy as np
import string

file = open('recipes.json') 
recipes = json.load(file)

def tokenisation(input_string):
    
    #functions to remove digits and punctuation and replace it with whitespace
    d_translate = str.maketrans(string.digits, ' '*len(string.digits))
    p_translate = str.maketrans(string.punctuation, ' '*len(string.punctuation))
    
    #clean the string
    new_string = input_string.translate(d_translate)
    new_string = new_string.translate(p_translate)
    new_string = new_string.lower()
    
    #split the string
    splitted_string = new_string.split(" ")
    
    #make a list to store tokens in
    tokens = []
    
    #checking length of token
    for token in splitted_string:
        if len(token) > 3:
            tokens.append(token)
    
    return tokens

def search(query, ordering = 'normal', count = 10):
    
    token_list = tokenisation(query)
    
    for recipe in recipes:
        
        title_c = 0
        cat_c = 0
        ing_c = 0
        dire_c = 0         
                    
        
        for token in token_list:
            for key, value in recipe.items():

                if (token in recipe.values()) and (key == 'title'):
                    title_c += 1
                elif (token in recipe.values()) and (key == 'categories'):
                    cat_c += 1
                elif (token in recipe.values()) and (key == 'ingredients'):
                    ing_c += 1
                elif (token in recipe.values()) and (key == 'directions'):
                    dire_c += 1      

search('cheese!cheddar', 'normal', 10)

At the end the values in the for loop near the end should be non zero but when printing they are. I have made sure that the search query has the tokens that appear in the first "recipe" below:

{
  "title": "\"Adult\" Pimiento Cheese ",
  "categories": [
   "Cheese",
   "Vegetable",
   "No-Cook",
   "Vegetarian",
   "Quick & Easy",
   "Cheddar",
   "Hot Pepper",
   "Winter",
   "Gourmet",
   "Alabama"
  ],
  "ingredients": [
   "2 or 3 large garlic cloves",
   "a 2-ounce jar diced pimientos",
   "3 cups coarsely grated sharp Cheddar (preferably English, Canadian, or Vermont; about 12 ounces)",
   "1/3 to 1/2 cup mayonnaise",
   "crackers",
   "toasted baguette slices",
   "crudit\u00e9s"
  ],
  "directions": [
   "Force garlic through a garlic press into a large bowl and stir in pimientos with liquid in jar. Add Cheddar and toss mixture to combine well. Stir in mayonnaise to taste and season with freshly ground black pepper. Cheese spread may be made 1 day ahead and chilled, covered. Bring spread to room temperature before serving.",
   "Serve spread with accompaniments."
  ],
  "rating": 3.125
}

Upvotes: 0

Views: 63

Answers (1)

Halmon
Halmon

Reputation: 1077

The first item in the JSON is "title" which is a string. When you call recipe.items() it would break since strings don't have a items() to call. Adding a string type check before it allows the code to run successfully.

import json
import numpy as np
import string

file = open('recipes.json') 
recipes = json.load(file)

def tokenisation(input_string):
    
    #functions to remove digits and punctuation and replace it with whitespace
    d_translate = str.maketrans(string.digits, ' '*len(string.digits))
    p_translate = str.maketrans(string.punctuation, ' '*len(string.punctuation))
    
    #clean the string
    new_string = input_string.translate(d_translate)
    new_string = new_string.translate(p_translate)
    new_string = new_string.lower()
    
    #split the string
    splitted_string = new_string.split(" ")
    
    #make a list to store tokens in
    tokens = []
    
    #checking length of token
    for token in splitted_string:
        if len(token) > 3:
            tokens.append(token)
    
    return tokens

def search(query, ordering = 'normal', count = 10):
    
    token_list = tokenisation(query)
    
    for recipe in recipes:
        print(recipe)
        
        title_c = 0
        cat_c = 0
        ing_c = 0
        dire_c = 0         
                    
        
        for token in token_list:
            if not isinstance(recipe, str): # Make sure we aren't checking the string for items()
              for key, value in recipe.items():

                  if (token in recipe.values()) and (key == 'title'):
                      title_c += 1
                  elif (token in recipe.values()) and (key == 'categories'):
                      cat_c += 1
                  elif (token in recipe.values()) and (key == 'ingredients'):
                      ing_c += 1
                  elif (token in recipe.values()) and (key == 'directions'):
                      dire_c += 1      
search('cheese!cheddar', 'normal', 10)

Upvotes: 1

Related Questions