Reputation: 49
I wrote an algorithm that generates random graphs given number of nodes and associated node degree. The algorithm is slow for large graphs. It works like this:
Sometimes algorithm reaches an exception as I randomly pick nodes to connect, and in the end I am left with a node with a large unassigned degree. And since I cannot have self loops, I start breaking random edges from intermediate graph and connecting them to this node. I think this is naive and inefficient. Any guidance on how to go about this? Graph is undirected with no self loops but multiple edges between 2 nodes. Tried using python libraries that do this like configuration model but they allow self loops
for iteration in range(1000): # 1000 random graphs
all_stop = False
all_stop_2 = False
count = count +1
success_dict = defaultdict(list)
popped_dict = defaultdict(dict)
grid_dens_dict = defaultdict(set)
with open(filename2,'rb') as handle:
pfxlinks = pickle.load(handle,encoding ='latin-1')
with open(filename3,'rb') as handle:
grid_ids_withlinks = pickle.load(handle,encoding ='latin-1')
for pfxpair in pfxlinks:
start_put = False
end_put = False
if all_stop_2 == True:
break
while True:
if all_stop == True:
all_stop_2 = True
break
try:
grid_start_id, grid_end_id = (np.random.choice(list(grid_ids_withlinks.keys()),size = 2, replace = False)) # get 2 random node ids
grid_start_id = int(grid_start_id)
grid_end_id = int(grid_end_id)
if start_put == False:
start_value = grid_dens_dict.get(grid_start_id) # if node id exists in my dict and node degree hasnt reached capacity
start_process_condition = (not start_value) or ( (start_value) and (len(grid_dens_dict[grid_start_id]) < grid_ids_withlinks[grid_start_id]) )
if start_process_condition:
grid_dens_dict[grid_start_id].add(pfxpair)
start_put = True
if len(grid_dens_dict[grid_start_id]) == grid_ids_withlinks[grid_start_id]: # node degree for node full, remove from dict
try:
#print('deleted key: ',grid_start_id, 'with size:',grid_dens_dict[grid_start_id],'Capacity:',grid_ids_withlinks[grid_start_id])
popped_dict[grid_start_id] = {'orig_capacity': grid_ids_withlinks[grid_start_id],'size':len(grid_dens_dict[grid_start_id]) }
grid_ids_withlinks.pop(grid_start_id)
except:
print('already popped')
else:
print('check')
if end_put == False:
end_value = grid_dens_dict.get(grid_end_id)
if (not end_value) or (end_value and (len(grid_dens_dict[grid_end_id]) < grid_ids_withlinks[grid_end_id])):
grid_dens_dict[grid_end_id].add(pfxpair)
end_put = True
if len(grid_dens_dict[grid_end_id]) == grid_ids_withlinks[grid_end_id]:
try:
#print('deleted key: ',grid_end_id, 'with size:',grid_dens_dict[grid_end_id],'Capacity:',grid_ids_withlinks[grid_end_id])
popped_dict[grid_end_id] = {'orig_capacity': grid_ids_withlinks[grid_end_id],'size':len(grid_dens_dict[grid_end_id]) }
grid_ids_withlinks.pop(grid_end_id)
except:
print('already popped')
else:
print('check')
if (start_put == False and end_put == True): # only end while when both nodes have been assigned a link
grid_dens_dict[grid_end_id].discard(pfxpair)
end_put = False
if (start_put == True and end_put == False):
grid_dens_dict[grid_start_id].discard(pfxpair)
start_put = False
if start_put == True and end_put == True:
success_dict[pfxpair].append((grid_start_id,grid_end_id))
break
Upvotes: 0
Views: 226
Reputation: 11602
You could solve for the graph with maximum edge weight subject to degree constraints using linear programming. In general, choosing different random weights will lead to different graphs. Here is an example using PuLP:
# !pip install PuLP
import pulp
import numpy as np
from itertools import combinations
# set up fake data
np.random.seed(0)
num_nodes = 10
node_list = list(range(num_nodes))
degrees = np.random.randint(1, 5, size=num_nodes)
# random weights, num_nodes by num_nodes
rw = np.random.rand(num_nodes, num_nodes)
# define optimization problem
lp_problem = pulp.LpProblem(name="RandomGraph")
# define optimization variables
edges = pulp.LpVariable.dicts("Edges", (node_list, node_list), cat="Binary")
# objective is to find the matching with maximum random edge weights
lp_problem += sum(edges[i][j] * rw[i,j]
for i in node_list for j in node_list)
# graph is undirected:
for i, j in combinations(node_list, 2):
lp_problem += edges[i][j] == edges[j][i]
# no self-loops:
for i in node_list:
lp_problem += edges[i][i] == 0
# degree constraints are respected:
for i in node_list:
lp_problem += sum(edges[i][j] for j in node_list) == degrees[i]
# solve the problem
lp_problem.solve()
# make sure that we got an optimal solution
assert lp_problem.status == 1
# recover edges
edges_solution = [(i, j) for i in node_list for j in node_list
if edges[i][j].varValue == 1]
# visualize the graph
import networkx as nx
# construct networkx graph
g = nx.Graph()
g.add_edges_from(edges_solution)
for i, d in enumerate(degrees):
g.nodes[i]["intended_degree"] = d
# draw, labeling the intended degrees
labels = nx.get_node_attributes(g, 'intended_degree')
nx.draw_circular(g, labels=labels)
The following picture shows that node degrees are correct:
Upvotes: 1