Reputation: 49
I wrote a simple algorithm to generate 1000 random graphs given total nodes and associated node degree. Graph is undirected with multiple edges and no self-loops. My current algorithm is very slow:
Graph properties: no self-loops, undirected, multiple edges (i.e. many edges b/w same pair of vertices)
from collections import defaultdict
for iteration in range(1000): # 1000 random graphs
all_stop = False
all_stop_2 = False
count = count +1
success_dict = defaultdict(list)
popped_dict = defaultdict(dict)
grid_dens_dict = defaultdict(set) # final graph being built
# pfxlinks = [edge1,edge2,edge3,edge4,........edgen] # just a placeholder edge list to help me identify which node connected with which node in success_dict
pfxlinks = [i for i in range(842814)] # total 842814 edges each connected to a node in grid_ids_withlinks , so if we add all node degrees in grid_ids_withlinks we get (842814 * 2)
grid_ids_withlinks = {1107415: 751065,1125583: 15256,1144686: 108969,1115625: 17038,1081048: 6749,1103814: 6476,1108340: 107431,1111992: 45946,1117451: 3594,1093803: 10860,1117452: 2126,1089226: 52518,1082859: 21211,1105613: 94587,1092862: 43891,1083786: 17073,1092899: 999,1141954: 4347,1106506: 2072,1094690: 119736,1116547: 3284,1104705: 2404,1135637: 3815,1121070:16598,1087417: 4514,1103777: 310,1114682: 4265,1091948: 5468,1093788: 2176, 1098316: 2067,1105597: 19090,1141055: 8454,1097427: 3041,1092875: 4159,1086500: 2204,1095619: 9732,1087430: 2041,1112884: 2167,1097413: 17056,1107414: 34769,1111088: 2025,1083768: 2176,1130180: 1886, 1144699: 988,1146499: 6818,1111081: 12509,1104687: 6186,1092866: 4272,1091037: 3,1121044: 39,1098333: 294,1118359: 27,1151091: 21,1107441: 10766,1141094: 3523,1102898: 53,1115634: 2199,1100140: 4347,1086515: 3029,1116505: 238,1082883: 4070,1118366:2065,1102866: 1590,1115631: 4345,1091990: 2131,1144703: 4053,1075589: 19,1081062: 2124,1097425: 11,1133804: 8,1112864: 158,1088307: 112,1138312: 112,1127446: 6245,1108356: 155,1082874: 6315,1115640: 3978,1107432: 2234,1131077: 2032,1115590: 2672,1094696: 13,1136502: 52,1094683: 20,1110183: 2,1113821: 56,1106515: 6,1120183: 11,1083765: 23,1101079: 6,1091944: 12,1085599: 10,1083783: 25,1148339: 6}
# dict with node_id : node degree (total nodes: 93)
for pfxpair in pfxlinks:
start_put = False
end_put = False
if all_stop_2 == True:
break
while True:
if all_stop == True:
all_stop_2 = True
break
try:
grid_start_id, grid_end_id = (np.random.choice(list(grid_ids_withlinks.keys()),size = 2, replace = False)) # get 2 random node ids
grid_start_id = int(grid_start_id)
grid_end_id = int(grid_end_id)
if start_put == False:
start_value = grid_dens_dict.get(grid_start_id) # if node id exists in my dict and node degree hasnt reached capacity
start_process_condition = (not start_value) or ( (start_value) and (len(grid_dens_dict[grid_start_id]) < grid_ids_withlinks[grid_start_id]) )
if start_process_condition:
grid_dens_dict[grid_start_id].add(pfxpair)
start_put = True
if len(grid_dens_dict[grid_start_id]) == grid_ids_withlinks[grid_start_id]: # node degree for node full, remove from dict
try:
#print('deleted key: ',grid_start_id, 'with size:',grid_dens_dict[grid_start_id],'Capacity:',grid_ids_withlinks[grid_start_id])
popped_dict[grid_start_id] = {'orig_capacity': grid_ids_withlinks[grid_start_id],'size':len(grid_dens_dict[grid_start_id]) }
grid_ids_withlinks.pop(grid_start_id)
except:
print('already popped')
else:
print('check')
if end_put == False:
end_value = grid_dens_dict.get(grid_end_id)
if (not end_value) or (end_value and (len(grid_dens_dict[grid_end_id]) < grid_ids_withlinks[grid_end_id])):
grid_dens_dict[grid_end_id].add(pfxpair)
end_put = True
if len(grid_dens_dict[grid_end_id]) == grid_ids_withlinks[grid_end_id]:
try:
#print('deleted key: ',grid_end_id, 'with size:',grid_dens_dict[grid_end_id],'Capacity:',grid_ids_withlinks[grid_end_id])
popped_dict[grid_end_id] = {'orig_capacity': grid_ids_withlinks[grid_end_id],'size':len(grid_dens_dict[grid_end_id]) }
grid_ids_withlinks.pop(grid_end_id)
except: # only 1 node left with large degree, start breaking edges
print('already popped')
else:
print('check')
if (start_put == False and end_put == True): # only end while when both nodes have been assigned a link
grid_dens_dict[grid_end_id].discard(pfxpair)
end_put = False
if (start_put == True and end_put == False):
grid_dens_dict[grid_start_id].discard(pfxpair)
start_put = False
if start_put == True and end_put == True:
success_dict[pfxpair].append((grid_start_id,grid_end_id))
break
except:
#print('In except block')
grid2grid = defaultdict(list)
for k,v in success_dict.items():
grid2grid[v[0][0]].append(v[0][1])
grid2grid[v[0][1]].append(v[0][0])
# ppick 2 random gridids
while True:
pop_id1, pop_id2 = (np.random.choice(list(grid_dens_dict.keys()),size = 2, replace = False)) # get 2 random node ids for popping
pop_id1 = int(pop_id1)
pop_id2 = int(pop_id2)
if (pop_id1 != list(grid_ids_withlinks.keys())[0] and pop_id2 != list(grid_ids_withlinks.keys())[0] and (pop_id1 in grid2grid[pop_id2] and pop_id2 in grid2grid[pop_id1])): ##have an assigned link
grid2grid[pop_id1].remove(pop_id2)
grid2grid[pop_id2].remove(pop_id1)
grid2grid[list(grid_ids_withlinks.keys())[0]].append(pop_id1)
grid2grid[list(grid_ids_withlinks.keys())[0]].append(pop_id2)
grid2grid[pop_id1].append(list(grid_ids_withlinks.keys())[0])
grid2grid[pop_id2].append(list(grid_ids_withlinks.keys())[0])
if len(grid2grid[list(grid_ids_withlinks.keys())[0]]) == grid_ids_withlinks[list(grid_ids_withlinks.keys())[0]]:
for k,v in grid2grid.items():
grid2grid[k] = Counter(v)
if len(all_itr_list) != 0:
# check if current grpah same as any previous
for graph in all_itr_list:
#same_counter = 0
#for a,b in graph.items():
shared_items = {k: graph[k] for k in graph if k in grid2grid and graph[k] == grid2grid[k]}
len(shared_items)
if len(shared_items) == grid_ids_withlinks: # total no of grids
#print('no of same nodes: ',len(shared_items))
break
all_itr_list.append(grid2grid)
filename = 'path'
with open(filename,'wb') as handle:
pickle.dump(grid2grid, handle)
all_stop = True
break
print('iteration no:',count)
if all_stop == False:
grid2grid = defaultdict(list)
for k,v in success_dict.items():
grid2grid[v[0][0]].append(v[0][1])
grid2grid[v[0][1]].append(v[0][0])
for k,v in grid2grid.items():
grid2grid[k] = Counter(v)
all_itr_list.append(grid2grid)
filename = 'path'
with open(filename,'wb') as handle:
pickle.dump(grid2grid, handle)
> ##from answer
import pickle
from collections import defaultdict
import numpy as np
for iteration in range(1000):
graph = defaultdict(list)
filename2 = r'path'
filename3 = r'path'
with open(filename2,'rb') as handle:
pfxlinks = pickle.load(handle,encoding ='latin-1')
with open(filename3,'rb') as handle:
grid_ids_withlinks = pickle.load(handle,encoding ='latin-1')
nodes = list(grid_ids_withlinks.keys())
degrees = list(grid_ids_withlinks.values())
while len(nodes) > 0:
# Get a random index from current nodes
node_idx = np.random.randint(0, len(nodes)-1)
# Store the node and its corresponding degree
node = nodes[node_idx]
degree = degrees[node_idx]
# Swap that node and its degree with the last node/degree and pop
# This helps us to remove them in O(1) time
# We don't need them anymore since we are going to exhaust the required edges
# for this particular node.
# This also prevents self-edges.
nodes[node_idx], nodes[-1] = nodes[-1], nodes[node_idx]
nodes.pop()
degrees[node_idx], degrees[-1] = degrees[-1], degrees[node_idx]
degrees.pop()
for _ in range(degree): # this is the amount of edges this node needs
# To make sure we don't get out of bounds.
# This could potentially happen unless
# there is a guarantee that the degrees and number of nodes
# are made such that they fit exactly
if len(nodes) == 0:
break
neighbor_idx = np.random.randint(0, len(nodes)-1)
graph[node].append(nodes[neighbor_idx])
graph[nodes[neighbor_idx]].append(node)
degrees[neighbor_idx] -= 1
if degrees[neighbor_idx] == 0:
# we need to remove the neighbor node if it has its maximum edges already
nodes[neighbor_idx], nodes[-1] = nodes[-1], nodes[neighbor_idx]
nodes.pop()
degrees[neighbor_idx], degrees[-1] = degrees[-1], degrees[neighbor_idx]
degrees.pop()
print('done')
Upvotes: 1
Views: 535
Reputation: 6808
Since you have also posted parts of the code that are unrelated to the core algorithm, it makes going through the code and finding bottlenecks relatively difficult.
Here's an algorithm that is faster from what I've seen in your code. It runs in O(n * m)
for creating each graph, where n
is the number of nodes, and m
is the max degree that any of the nodes can have. In other words it's O(V + E)
where V
is the number of vertices and E
the number of edges.
nodes
, like [1, 2, ..., n]
.degrees
, where degrees[i]
is the degree of nodes[i]
.O(1)
complexity. Let's call this graph
. A defaultdict(list)
from collections
in python would make a good adjacency list. For this algorithm I assume graph
is a defaultdict(list)
.while len(nodes) > 0:
and do as follows:# Get a random index from current nodes
node_idx = random.randint(0, len(nodes)-1)
# Store the node and its corresponding degree
node = nodes[node_idx]
degree = degrees[node_idx]
# Swap that node and its degree with the last node/degree and pop
# This helps us to remove them in O(1) time
# We don't need them anymore since we are going to exhaust the required edges
# for this particular node.
# This also prevents self-edges.
nodes[node_idx], nodes[-1] = nodes[-1], nodes[node_idx]
nodes.pop()
degrees[node_idx], degrees[-1] = degrees[-1], degrees[node_idx]
degrees.pop()
for _ in degree: # this is the amount of edges this node needs
# To make sure we don't get out of bounds.
# This could potentially happen unless
# there is a guarantee that the degrees and number of nodes
# are made such that they fit exactly
if len(nodes) == 0:
break
neighbor_idx = random.randint(0, len(nodes)-1)
graph[node].append(nodes[neighbor_idx])
graph[nodes[neighbor_idx]].append(node)
degrees[neighbor_idx] -= 1
if degrees[neighbor_idx] == 0:
# we need to remove the neighbor node if it has its maximum edges already
nodes[neighbor_idx], nodes[-1] = nodes[-1], nodes[neighbor_idx]
nodes.pop()
degrees[neighbor_idx], degrees[-1] = degrees[-1], degrees[neighbor_idx]
degrees.pop()
This algorithm potentially leaves one node at the end that has not all its required edges, but this isn't a shortcoming of the algorithm but can happen if the number of edges for the nodes dosn't work out. I'm not sure how to express is mathematically though.
Also note that this algorithm could produce multiple edges between two nodes. It isn't clear to me if this is allowed or not for the particular graph you are looking for. If so, the code can be ammended such that it avoids such edges without sacrificing the time complexity. But it has the potential to leave multiple nodes with less edges than required. This wouldn't be a shortcoming of the algorithm but a result of how the degrees for particular nodes are defined.
Upvotes: 2