Robin White
Robin White

Reputation: 169

How to search a specific path using node labels and return node ids in Networkx?

I have a network graph using Networkx where each node has an id: 1 to N. Each node also has a label which is a string (for simplicity, letters). I want to be able to search a specific list of these labels and find the node id corresponding to the path. For example, from the network below I want to find the path ['l', 'a', 'o'] and return the node id, correct answer being [1, 4, 5]:

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt


def set_node_labels(G, arr):
    values = arr.flatten()
    labels = {}
    for node in G.nodes():
        labels[node] = values[node]
    nx.set_node_attributes(G, labels, "label")
    return labels

test_array = np.array([['f', 'l', 'p'],
                       ['r', 'a', 'o'],
                       ['b', 't', 'd']])


W,H = test_array.shape

index_array = np.arange(0, len(test_array.ravel()), 1, dtype=np.uint8).reshape((W,H))
print(index_array)


node_connections = []
for j in range(H-1):
    for i in range(W-1):
        n1 = (index_array[j, i], index_array[j, i+1])
        n2 = (index_array[j, i], index_array[(j+1), i])
        n3 = (index_array[j, i], index_array[(j+1), i+1])

        node_connections.extend([n1, n2, n3])
    n4 = (index_array[j, W-1], index_array[(j + 1), W-1])
    node_connections.extend([n4])

print(node_connections)

# create graph
G = nx.MultiGraph()
G.add_edges_from(node_connections)

labels = set_node_labels(G, test_array)

This has two troubles: 1) How to search a specific path, not just source and destination; how to search using labels and return id? Any help is greatly appreciated.

Upvotes: 0

Views: 332

Answers (1)

Robin White
Robin White

Reputation: 169

I managed to figure a result. It may not be the most efficient but if anyone else thinks of a better way, please post. Adding this in case this is potentially helpful to anyone.

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt


def set_node_labels(G, arr):
    values = arr.flatten()
    labels = {}
    for node in G.nodes():
        labels[node] = values[node]
    nx.set_node_attributes(G, labels, "label")
    return labels


def get_possible_seq(seq, idx_array, letter_array, G):
    found_seqs = []
    found_sqs_loc = []
    first_letter = seq[0]
    last_letters = seq[1:]
    first_letter_idx = idx_array[np.where(letter_array == first_letter)]
    last_letters_idx = []
    for letters in last_letters:
        last_letters_idx.extend(idx_array[np.where(letter_array == [letter for letter in letters])])

    for s in first_letter_idx:

        # correction for double counting on start letter position
        last_letters_idx = np.array(last_letters_idx)[last_letters_idx != s]

        for path in nx.all_simple_paths(G, s, last_letters_idx, cutoff=len(seq) - 1):
            string = []
            for node in path:
                string.extend(G.nodes[node]['label'])
            found_seqs.append(string)
            found_sqs_loc.append(path)
    return found_seqs, found_sqs_loc


test_array = np.array([['f', 'l', 'p'],
                       ['r', 'a', 'o'],
                       ['b', 't', 'd']])

test_seqs = ['lao']

W,H = test_array.shape

index_array = np.arange(0, len(test_array.ravel()), 1, dtype=np.uint8).reshape((W,H))

node_connections = []
for j in range(H-1):
    for i in range(W-1):
        n1 = (index_array[j, i], index_array[j, i+1])
        n2 = (index_array[j, i], index_array[(j+1), i])
        n3 = (index_array[j, i], index_array[(j+1), i+1])

        node_connections.extend([n1, n2, n3])
    n4 = (index_array[j, W-1], index_array[(j + 1), W-1])
    node_connections.extend([n4])

# create graph
G = nx.MultiGraph()
G.add_edges_from(node_connections)

labels = set_node_labels(G, test_array)

found_seqs, found_seqs_loc = get_possible_seq(test_seqs[0], index_array, test_array, G)
print(found_seqs)
print(found_seqs_loc)
print(list(test_seqs[0]))
target_seq_idx = found_seqs.index(list(test_seqs[0]))
print(target_seq_idx)
print(found_seqs_loc[target_seq_idx])

This gives correct answer of [1, 4, 5]. However this will only work iff there is only one instance of the sequence in question. It will just give the first instance if not.

Upvotes: 1

Related Questions