saretta2
saretta2

Reputation: 37

Optimizing DBpedia Queries for a Knowledge-Based Movie Recommender System

I am working on creating a Knowledge-Based Recommender System for movies using the MovieLens Small dataset and retrieving additional information from DBpedia. I have implemented a solution that extracts the directors for all the movies in the dataset present in DBpedia, but it is quite slow when loading. Could you suggest a way to speed up the search?

def fetch_director_from_dbpedia(title):
    movie_title_clean = normalize_title(title)

    print(f"Searching for director of: {movie_title_clean}")  # Debug: mostra il titolo del film senza la data
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.setReturnFormat(JSON)

    # Query SPARQL per cercare il regista del film con il titolo esatto
    query = f"""
    SELECT  ?director
    WHERE {{
        ?film rdf:type dbo:Film .
        ?film foaf:name "{movie_title_clean}"@en .
        ?film dbo:director ?directorResource .
        ?directorResource foaf:name ?director .
        
    }}
    """

    sparql.setQuery(query)
    results = sparql.query().convert()

    print(f"Query result: {results}")  # Debug: mostra il risultato della query

    director = None
    if results['results']['bindings']:
        for result in results['results']['bindings']:
            director = result['director']['value']
            print(f"Director found: {director}")  # Debug: mostra il regista trovato
    else:
        print(f"No director found for: {movie_title_clean}")  # Debug: mostra se non si trova un regista

    return director

in main :

 # Step 2: Favorite director
st.markdown('<div class="subheader">Do you have favorite director?</div>', unsafe_allow_html=True)

# Estrai i titoli dei film dal dataset
movie_titles = movies['title'].tolist()

print(f"Extracted {len(movie_titles)} movie titles.")  # Debug: mostra quanti titoli sono stati estratti

# Dizionario per memorizzare i registi dei film
directors = {}

# Recupera i registi da DBpedia per i film nel dataset
for title in movie_titles:
    director = fetch_director_from_dbpedia(title)
    if director:
        directors[title] = director
    else:
        print(f"Skipping {title} as no director was found.")  # Messaggio per saltare il film senza regista
        continue  # Passa al film successivo se non è stato trovato un regista

print(f"Directors dictionary: {directors}")  # Debug: mostra il dizionario dei registi

# Estrai i registi unici per popolare la selectbox
unique_directors = sorted(set(directors.values()))

print(f"Unique directors: {unique_directors}")  # Debug: mostra i registi unici trovati
print(f"Total unique directors: {len(unique_directors)}")  # Contare i registi unici

Thank you

I expect a list of all directors for the movies present in the dataset, retrieved from DBpedia

Upvotes: 0

Views: 95

Answers (0)

Related Questions