Reputation: 1233
I need an algorithm to partition the vertices of an undirected graph into one or more subgraphs, such that each subgraph is a complete graph (every vertex adjacent to every other vertex). Each vertex needs to be in exactly one of the subgraphs.
Here's an example:
input = [
(A, B),
(B, C),
(A, C),
(B, D),
(D, E),
]
output = myalgo(input) # [(A, B, C), (D, E)]
Here's an image that better describes the problem:
The input list is sorted in decreasing order by distance, that's why I connect A-B-C instead of B-D.
I thought this might be called "strongly connected components", and have already tried the following solutions:
Finding a Strongly Connected Components in unDirected Graphs: it's looking for something different
Finding all cycles in undirected graphs: it gives me many cycles and not the best, it doesn't care about the input order.
An algorithm to create clusters from data pairs in python: it connects all the components, just because there is a path between them (A-B-C-D-E).
Kosaraju's algorithm: it works only with a directed graph.
Upvotes: 3
Views: 2379
Reputation: 2720
Here's a class that implements the segmentation into complete subgraphs. It's by no means optimized and can likely be improved significantly, but it's a starting point
class SCCManager:
def __init__(self, edges):
self.clusters = []
self.edges = edges
def clusters_in(self, conn):
first, second = conn
f_clust = None
s_clust = None
for i, clust in enumerate(self.clusters):
if first in clust:
f_clust = i
if second in clust:
s_clust = i
# break early if both already found
if f_clust and s_clust:
break
return (f_clust, s_clust)
def all_connected(self, cluster, vertex):
for v in cluster:
connected = (v, vertex) in self.edges or (vertex, v) in self.edges
# break early if any element is not connected to the candidate
if not connected:
return False
return True
def get_scc(self):
for edge in self.edges:
c_first, c_second = self.clusters_in(edge)
# case 1: none of the vertices are in an existing cluster
# -> create new cluster containing the vertices
if c_first == c_second == None:
self.clusters.append([edge[0], edge[1]])
continue
# case 2: first is in a cluster, second isn't
# -> add to cluster if eligible
if c_first != None and c_second == None:
# check if the second is connected to all cluster components
okay = self.all_connected(self.clusters[c_first], edge[1])
# add to cluster if eligible
if okay:
self.clusters[c_first].append(edge[1])
continue
# case 3: other way round
if c_first == None and c_second != None:
okay = self.all_connected(self.clusters[c_second], edge[0])
if okay:
self.clusters[c_second].append(edge[0])
continue
# case 4: both are in different clusters
# -> merge clusters if allowed
if c_first != c_second:
# check if clusters can be merged
for v in self.clusters[c_first]:
merge = self.all_connected(self.clusters[c_second], v)
# break if any elements are not connected
if not merge:
break
# merge if allowed
if merge:
self.clusters[c_first].extend(self.clusters[c_second])
self.clusters.remove(self.clusters[c_second])
# case 5: both are in the same cluster
# won't happen if input is sane, but doesn't require an action either way
return self.clusters
... and here's a working example:
inp = [
('A', 'B'),
('B', 'C'),
('A', 'C'),
('B', 'D'),
('D', 'E'),
('C', 'E')
]
test = SCCManager(inp)
print(test.get_scc())
[['A', 'B', 'C'], ['D', 'E']]
Upvotes: 1
Reputation: 71451
You can find all paths, and then group by connectivity:
from itertools import groupby as gb
d = [('A', 'B'), ('B', 'C'), ('A', 'C'), ('B', 'D'), ('D', 'E')]
def connect_num(node):
return [sum(a == node for a, _ in d), sum(b == node for _, b in d)]
def group_paths(data):
new_d = sorted([[i, connect_num(i)] for i in data], key=lambda x:max(x[1]))
return [[k for k, _ in b] for _, b in gb(new_d, key=lambda x:max(x[1]))]
def get_paths(start, c = [], seen = []):
new_vals = [a for a, _ in d if a not in seen+c]
if (vals:=[b for a, b in d if a == start]):
for i in vals:
yield from get_paths(i, c=c+vals, seen=seen)
else:
yield c
for i in new_vals:
yield from get_paths(i, c = [i], seen=c+seen)
result = sorted(map(set, get_paths(d[0][0])), key=len, reverse=True)
new_result = [a for i, a in enumerate(result) if not any(all(k in c for k in a) for c in result[:i])]
final_result = [group_paths(i) for i in new_result]
Output:
#final_result[0]
[['E', 'D'], ['A', 'C', 'B']]
Upvotes: 0
Reputation: 195428
Another attempt:
lst = [
('A', 'B'),
('B', 'C'),
('A', 'C'),
('B', 'D'),
('D', 'E'),
]
d = {}
for i, j in lst:
d.setdefault(i, []).append(j)
d.setdefault(j, []).append(i)
from itertools import combinations
rv, seen_segments, seen_vertices = [], set(), set()
for k, v in d.items():
if len(v) == 1:
segment = set((k, v[0])).difference(seen_vertices)
seen_vertices.update(segment)
rv.append([tuple(segment), ])
else:
graph = []
for i, j in combinations([k] + v, 2):
if not j in d[i]:
break
else:
graph.append(tuple(sorted((i, j))))
else:
if graph:
graph = [segment for segment in graph if segment not in seen_segments]
seen_segments.update(graph)
if graph:
rv.append(graph)
from pprint import pprint
pprint(rv)
Prints:
[[('A', 'B'), ('A', 'C'), ('B', 'C')], [('D', 'E')]]
For input
lst = [
('A', 'B'),
('B', 'C'),
]
Prints:
[[('A', 'B')], [('C',)]]
For input:
lst = [
('A', 'B'),
('B', 'C'),
('C', 'D'),
]
Prints:
[[('B', 'A')], [('D', 'C')]]
Upvotes: 1
Reputation: 6324
from collections import defaultdict
def create_adjacency_matrix(connections):
matrix = defaultdict(dict)
for a, b in connections:
matrix[a][b] = 1
matrix[b][a] = 1
return matrix
def is_connected_to_all(vertex, group, matrix):
for v in group:
if vertex != v and vertex not in matrix[v]:
return False
return True
def group_vertexes(input):
matrix = create_adjacency_matrix(input)
groups = []
current_group = set()
for vertex in matrix.keys():
if is_connected_to_all(vertex, current_group, matrix):
current_group.add(vertex)
else:
groups.append(current_group)
current_group = {vertex}
groups.append(current_group)
return groups
input = [("A", "B"), ("B", "C"), ("A", "C"), ("B", "E"), ("E", "D")]
print(group_vertexes(input))
# [{'C', 'B', 'A'}, {'D', 'E'}]
WARNING: This relies on the fact that dict keeps insertion order in python 3.7+. In older versions you have to use matrix = DefaultOrderedDict(dict)
https://stackoverflow.com/a/35968897/9392216:
Upvotes: 1