Reputation: 2055
I am running below AQL query, that does relevant nodes search. If I run with depth one it takes about 200-300ms and when I try depth 2, its almost 2 mins :)
From the query you would see, I am trying to fetch all the job_id vertices that are connected to a set of start vertices and using a sort of relevance score to rank them.
is there anything I can change to optimise this query? or is there a better way to do it?
I have about 100,000 nodes and 12,000,000 edges.
LET start_vertices = [
"skill/ae1258a4-0f85-3485-a6f9-09627c75098b",
"skill/9f79967e-fee8-3d70-9016-101de169736f"
]
LET additional_weight = {
"skill/b8f90723-e0c1-3826-a58b-80dd88be98b": 0.6,
"skill/b8f90723-e0c1-3826-a58b-80dd88be98b": 0.8
}
LET vertices_boost = {
"title": 5,
"skill": 4,
"location": 10,
"team": 2
}
LET vertices_type = ["job_id"]
LET edges = [
"has_job_title",
"requires_skill",
"team_has_open_job",
"location_has_open_job",
"is_related_to_skill"
]
LET w1 = 5
LET w2 = 2
LET w3 = 1
LET job_scores = (
FOR id IN start_vertices
FOR vertex, edge, path IN 1..2
ANY id
GRAPH "KG"
OPTIONS { uniqueEdges: "path" }
FILTER (path.edges[*].type ALL IN edges)
FILTER vertex.type IN vertices_type
LET relevance_score =
w1 * (additional_weight[path.vertices[1]._id] || 0) +
w2 * log(MAX(path.edges[*].properties.edgeCount)) +
w3 * (vertices_boost[path.vertices[1]._type] || 0)
RETURN { job_id: vertex._id, relevance_score, vertex }
)
LET aggregated_scores = (
FOR job IN job_scores
COLLECT job_id = job.job_id INTO group = job
LET total_score = SUM(group[*].relevance_score)
RETURN {
job_id: job_id,
relevance_score: total_score,
vertex: UNSET(group[0].vertex, 'embedding')
}
)
FOR job IN aggregated_scores
SORT job.relevance_score DESC
limit 0, 10
RETURN job
Upvotes: 0
Views: 16