Bikas Katwal
Bikas Katwal

Reputation: 2055

Slow arango DB Graph traversal - group and ranking results based on multiple scoring attribute

I am running below AQL query, that does relevant nodes search. If I run with depth one it takes about 200-300ms and when I try depth 2, its almost 2 mins :)

From the query you would see, I am trying to fetch all the job_id vertices that are connected to a set of start vertices and using a sort of relevance score to rank them.

is there anything I can change to optimise this query? or is there a better way to do it?

I have about 100,000 nodes and 12,000,000 edges.

LET start_vertices = [
    "skill/ae1258a4-0f85-3485-a6f9-09627c75098b",
    "skill/9f79967e-fee8-3d70-9016-101de169736f"
]

LET additional_weight = {
    "skill/b8f90723-e0c1-3826-a58b-80dd88be98b": 0.6,
    "skill/b8f90723-e0c1-3826-a58b-80dd88be98b": 0.8
}

LET vertices_boost = {
    "title": 5,
    "skill": 4,
    "location": 10,
    "team": 2
}

LET vertices_type = ["job_id"]
LET edges = [
    "has_job_title",
    "requires_skill",
    "team_has_open_job",
    "location_has_open_job",
    "is_related_to_skill"
]

LET w1 = 5
LET w2 = 2
LET w3 = 1

LET job_scores = (
    FOR id IN start_vertices
    FOR vertex, edge, path IN 1..2
        ANY id
        GRAPH "KG"
        OPTIONS { uniqueEdges: "path" }
        FILTER (path.edges[*].type ALL IN edges) 
        FILTER vertex.type IN vertices_type
        LET relevance_score = 
            w1 * (additional_weight[path.vertices[1]._id] || 0) +
            w2 * log(MAX(path.edges[*].properties.edgeCount)) +
            w3 * (vertices_boost[path.vertices[1]._type] || 0)
        RETURN { job_id: vertex._id, relevance_score, vertex }
)

LET aggregated_scores = (
    FOR job IN job_scores
    COLLECT job_id = job.job_id INTO group = job
    LET total_score = SUM(group[*].relevance_score)
    RETURN {
        job_id: job_id,
        relevance_score: total_score,
        vertex: UNSET(group[0].vertex, 'embedding')
    }
)

FOR job IN aggregated_scores
    SORT job.relevance_score DESC
    limit 0, 10
    RETURN job

Upvotes: 0

Views: 16

Answers (0)

Related Questions