jmora
jmora

Reputation: 491

Tarjan's strongly connected components algorithm in python not working

I implemented the Tarjan's strongly connected components algorithm, according to wikipedia, in Python, but it isn't working. The algorithm is quite short and I cannot find any difference, so I cannot tell why it isn't working. I tried to check the original paper, but could not find it.

Here is the code.

def strongConnect(v):
  global E, idx, CCs, c, S
  idx[v] = (c, c) #idx[v][0] for v.index # idx[v][1] for v.lowlink
  c += 1
  S.append(v)  
  for w in [u for (v2, u) in E if v == v2]:
    if idx[w][0] < 0:
      strongConnect(w)
      # idx[w] = (idx[w][0], min(idx[v][1], idx[w][1])) #fixed, thx
      idx[v] = (idx[v][0], min(idx[v][1], idx[w][1]))
    elif w in S:
      idx[v] = (idx[v][0], min(idx[v][1], idx[w][0]))
  if (idx[v][0] == idx[v][1]):
    i = S.index(v)
    CCs.append(S[i:])
    S = S[:i]

E = [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'), ('E', 'A'), ('A', 'E'), ('C', 'A'), ('C', 'E'), ('D', 'F'), ('F', 'B'), ('E', 'F')]
idx = {}
CCs = []
c = 0
S = []
for (u, v) in E:
  idx[u] = (-1, -1)
  idx[v] = (-1, -1)
for v in idx.keys():
  if idx[v][0] < 0:
    strongConnect(v)

print(CCs)

You can check the graph visually if you prefer. As you can see this is a quite forward translation of the pseudocode in wikipedia. However, this is the output:

[['D', 'E', 'F'], ['B', 'C'], ['A']]

There should be only one strongly connected component, not three. I hope the question is right in all its aspects, if not I'm sorry. In any case, thank you very much.

Upvotes: 15

Views: 9669

Answers (2)

mic
mic

Reputation: 1266

I modified senderle's answer for Python 3.6+ and added type hints and comments so that it made more sense to me.

from itertools import chain
from collections import defaultdict
from typing import Iterable, DefaultDict, List, Dict, Generic, TypeVar, Tuple, Set

T = TypeVar('T')  # label for a vertex

class Graph(Generic[T]):
    def __init__(self, edges: Iterable[Tuple[T, T]], vertices: Iterable[T] = ()):
        edges = [list(x) for x in edges]
        self.edges = edges
        self.vertices: Set[T] = set(chain(*edges)).union(vertices)
        self.adj_list: DefaultDict[T, List[T]] = defaultdict(list)  # i.e., neighbors of a given node
        for head, tail in self.edges:
            self.adj_list[head].append(tail)

    @classmethod
    def from_dict(cls, edge_dict: Dict[T, Iterable[T]]):
        return cls((k, v) for k, vs in edge_dict.items() for v in vs)

def strongly_connected_components(graph: Graph[T]) -> List[List[T]]:
    idx = 0  # index to be assigned to the next node
    node_idxs: Dict[T, int] = {}  # index of a visited node
    lowlink: Dict[T, int] = {}  # low-link number is the lowest node number (index) reachable by the node that is in the same connected component – its own number, or the low-link number of a previous unvisited neighbor, or the node number of a visited neighbor in the stack
    stack: List[T] = []
    connected_components: List[List[T]] = []

    def visit(head: T) -> None:
        nonlocal idx
        lowlink[head] = node_idxs[head] = idx
        idx += 1
        stack.append(head)

        for neighbor in graph.adj_list[head]:
            if neighbor not in node_idxs:  # i.e., not visited
                visit(neighbor)
                lowlink[head] = min(lowlink[head], lowlink[neighbor])
            elif node_idxs[neighbor] < node_idxs[head]:
                if neighbor in stack:
                    lowlink[head] = min(lowlink[head], node_idxs[neighbor])

        if lowlink[head] == node_idxs[head]:
            component: List[T] = []
            while stack and node_idxs[stack[-1]] >= node_idxs[head]:
                component.append(stack.pop())
            connected_components.append(component)

    for v in graph.vertices:
        if v not in node_idxs:
            visit(v)
    return connected_components

if __name__ == '__main__':
    edges = [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'),
                ('E', 'A'), ('A', 'E'), ('C', 'A'), ('C', 'E'),
                ('D', 'F'), ('F', 'B'), ('E', 'F')]
    print(strongly_connected_components(Graph(edges)))  # [['F', 'D', 'C', 'B', 'A', 'E']]
    edge_dict = {'a':['b', 'c', 'd'],
                    'b':['c', 'a'],
                    'c':['d', 'e'],
                    'd':['e'],
                    'e':['c']}
    print(strongly_connected_components(Graph.from_dict(edge_dict)))  # [['e', 'd', 'c'], ['b', 'a']]

Upvotes: 0

senderle
senderle

Reputation: 151007

Ok, I had some more time to think about this. I'm no longer certain that filtering the edges was the problem, as I previously stated. In fact, I think there's an ambiguity in the pseudocode; does for each (v, w) in E mean for each edge (as the literal meaning of for each suggests), or only each edge beginning with v, (as you reasonably assumed)? Then, after the for loop, is the v in question the final v from the for loop, as it would be in Python? Or does that go back to being the original v? Pseudocode doesn't have clearly defined scoping behavior in this case! (It would be really weird if the v at the end were to be the last, arbitrary, value of v from the loop. That suggests that filtering is correct, because in that case, v means the same thing all the way through.)

However, under any circumstances, the clear error in your code is here:

  idx[w] = (idx[w][0], min(idx[v][1], idx[w][1]))

According to the pseudocode, that should definitely be

  idx[v] = (idx[v][0], min(idx[v][1], idx[w][1]))

Once you make that change, you get the expected result. Frankly it doesn't surprise me that you made that mistake, because you're using a really weird and counterintuitive data structure. Here's what I think is an improvement -- it adds only a few more lines, and I find it to be much more readable.

import itertools

def strong_connect(vertex):
    global edges, indices, lowlinks, connected_components, index, stack
    indices[vertex] = index
    lowlinks[vertex] = index
    index += 1
    stack.append(vertex)

    for v, w in (e for e in edges if e[0] == vertex):
        if indices[w] < 0:
            strong_connect(w)
            lowlinks[v] = min(lowlinks[v], lowlinks[w])
        elif w in stack:
            lowlinks[v] = min(lowlinks[v], indices[w])

    if indices[vertex] == lowlinks[vertex]:
        connected_components.append([])
        while stack[-1] != vertex:
            connected_components[-1].append(stack.pop())
        connected_components[-1].append(stack.pop())

edges = [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'), 
         ('E', 'A'), ('A', 'E'), ('C', 'A'), ('C', 'E'), 
         ('D', 'F'), ('F', 'B'), ('E', 'F')]
vertices = set(v for v in itertools.chain(*edges))
indices = dict((v, -1) for v in vertices)
lowlinks = indices.copy()
connected_components = []

index = 0
stack = []
for v in vertices:
    if indices[v] < 0:
        strong_connect(v)

print(connected_components)

However, I find the use of global variables here distasteful. You could hide this away in its own module, but I prefer the idea of creating a callable class. After looking more closely at Tarjan's original pseudocode, (which confirms that the "filtered" version is correct, by the way), I wrote this. It includes a simple Graph class and does couple of basic tests:

from itertools import chain
from collections import defaultdict

class Graph(object):
    def __init__(self, edges, vertices=()):
        edges = list(list(x) for x in edges)
        self.edges = edges
        self.vertices = set(chain(*edges)).union(vertices)
        self.tails = defaultdict(list)
        for head, tail in self.edges:
            self.tails[head].append(tail)

    @classmethod
    def from_dict(cls, edge_dict):
        return cls((k, v) for k, vs in edge_dict.iteritems() for v in vs)

class _StrongCC(object):
    def strong_connect(self, head):
        lowlink, count, stack = self.lowlink, self.count, self.stack
        lowlink[head] = count[head] = self.counter = self.counter + 1
        stack.append(head)

        for tail in self.graph.tails[head]:
            if tail not in count:
                self.strong_connect(tail)
                lowlink[head] = min(lowlink[head], lowlink[tail])
            elif count[tail] < count[head]:
                if tail in self.stack:
                    lowlink[head] = min(lowlink[head], count[tail])

        if lowlink[head] == count[head]:
            component = []
            while stack and count[stack[-1]] >= count[head]:
                component.append(stack.pop())
            self.connected_components.append(component)

    def __call__(self, graph):
        self.graph = graph
        self.counter = 0
        self.count = dict()
        self.lowlink = dict()
        self.stack = []
        self.connected_components = []

        for v in self.graph.vertices:
            if v not in self.count:
                self.strong_connect(v)

        return self.connected_components

strongly_connected_components = _StrongCC()

if __name__ == '__main__':
    edges = [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'),
             ('E', 'A'), ('A', 'E'), ('C', 'A'), ('C', 'E'),
             ('D', 'F'), ('F', 'B'), ('E', 'F')]
    print strongly_connected_components(Graph(edges))
    edge_dict = {'a':['b', 'c', 'd'],
                 'b':['c', 'a'],
                 'c':['d', 'e'],
                 'd':['e'],
                 'e':['c']}
    print strongly_connected_components(Graph.from_dict(edge_dict))

Upvotes: 16

Related Questions