Reputation: 37
I am working on creating a Knowledge-Based Recommender System for movies using the MovieLens Small dataset and retrieving additional information from DBpedia. I have implemented a solution that extracts the directors for all the movies in the dataset present in DBpedia, but it is quite slow when loading. Could you suggest a way to speed up the search?
def fetch_director_from_dbpedia(title):
movie_title_clean = normalize_title(title)
print(f"Searching for director of: {movie_title_clean}") # Debug: mostra il titolo del film senza la data
sparql = SPARQLWrapper("http://dbpedia.org/sparql")
sparql.setReturnFormat(JSON)
# Query SPARQL per cercare il regista del film con il titolo esatto
query = f"""
SELECT ?director
WHERE {{
?film rdf:type dbo:Film .
?film foaf:name "{movie_title_clean}"@en .
?film dbo:director ?directorResource .
?directorResource foaf:name ?director .
}}
"""
sparql.setQuery(query)
results = sparql.query().convert()
print(f"Query result: {results}") # Debug: mostra il risultato della query
director = None
if results['results']['bindings']:
for result in results['results']['bindings']:
director = result['director']['value']
print(f"Director found: {director}") # Debug: mostra il regista trovato
else:
print(f"No director found for: {movie_title_clean}") # Debug: mostra se non si trova un regista
return director
in main :
# Step 2: Favorite director
st.markdown('<div class="subheader">Do you have favorite director?</div>', unsafe_allow_html=True)
# Estrai i titoli dei film dal dataset
movie_titles = movies['title'].tolist()
print(f"Extracted {len(movie_titles)} movie titles.") # Debug: mostra quanti titoli sono stati estratti
# Dizionario per memorizzare i registi dei film
directors = {}
# Recupera i registi da DBpedia per i film nel dataset
for title in movie_titles:
director = fetch_director_from_dbpedia(title)
if director:
directors[title] = director
else:
print(f"Skipping {title} as no director was found.") # Messaggio per saltare il film senza regista
continue # Passa al film successivo se non è stato trovato un regista
print(f"Directors dictionary: {directors}") # Debug: mostra il dizionario dei registi
# Estrai i registi unici per popolare la selectbox
unique_directors = sorted(set(directors.values()))
print(f"Unique directors: {unique_directors}") # Debug: mostra i registi unici trovati
print(f"Total unique directors: {len(unique_directors)}") # Contare i registi unici
Thank you
I expect a list of all directors for the movies present in the dataset, retrieved from DBpedia
Upvotes: 0
Views: 95