skynaive
skynaive

Reputation: 49

Fatest algorithm to generate many distinct random undirected graphs with multiple edges given number of nodes and node degree

I wrote an algorithm that generates random graphs given number of nodes and associated node degree. The algorithm is slow for large graphs. It works like this:

  1. Pick 2 random nodes from node list and connect them while checking node degree of these nodes hasn't reached capacity. Also every random graph must be different from previous ones given this is possible (1000 graphs)

Sometimes algorithm reaches an exception as I randomly pick nodes to connect, and in the end I am left with a node with a large unassigned degree. And since I cannot have self loops, I start breaking random edges from intermediate graph and connecting them to this node. I think this is naive and inefficient. Any guidance on how to go about this? Graph is undirected with no self loops but multiple edges between 2 nodes. Tried using python libraries that do this like configuration model but they allow self loops

for iteration in range(1000):  # 1000 random graphs
    all_stop = False
    all_stop_2 = False
    count = count +1
    
    success_dict = defaultdict(list)  
    popped_dict = defaultdict(dict)
    grid_dens_dict = defaultdict(set)
        
    with open(filename2,'rb') as handle:
        pfxlinks = pickle.load(handle,encoding ='latin-1')  


    with open(filename3,'rb') as handle:
        grid_ids_withlinks = pickle.load(handle,encoding ='latin-1')
    
    for pfxpair in pfxlinks:
        start_put = False    
        end_put = False 
        
        if all_stop_2 == True:
            break
        
        while True:
            
            if all_stop == True:
                all_stop_2 = True
                break
           
            try:
                grid_start_id, grid_end_id = (np.random.choice(list(grid_ids_withlinks.keys()),size = 2, replace = False)) # get 2 random node ids
                grid_start_id = int(grid_start_id)
                grid_end_id = int(grid_end_id)
                
                if start_put == False:
                    
                    start_value = grid_dens_dict.get(grid_start_id)    # if node id exists in my dict and node degree hasnt reached capacity
                    
                    start_process_condition = (not start_value) or ( (start_value) and (len(grid_dens_dict[grid_start_id]) < grid_ids_withlinks[grid_start_id]) )
                    if start_process_condition:
                        grid_dens_dict[grid_start_id].add(pfxpair)
                        start_put = True
                    
                        if len(grid_dens_dict[grid_start_id]) == grid_ids_withlinks[grid_start_id]: # node degree for node full, remove from dict
                            
                            try:
                                #print('deleted key: ',grid_start_id, 'with size:',grid_dens_dict[grid_start_id],'Capacity:',grid_ids_withlinks[grid_start_id])
                                popped_dict[grid_start_id] = {'orig_capacity': grid_ids_withlinks[grid_start_id],'size':len(grid_dens_dict[grid_start_id]) }
        
                                grid_ids_withlinks.pop(grid_start_id)
                            
                            except:
                                print('already popped')
                else:
                    print('check')
                              
                if end_put == False: 
                    
                                        
                    end_value = grid_dens_dict.get(grid_end_id)
                    if (not end_value) or (end_value and (len(grid_dens_dict[grid_end_id]) < grid_ids_withlinks[grid_end_id])):
                        grid_dens_dict[grid_end_id].add(pfxpair)
                        end_put = True
                        
                        if len(grid_dens_dict[grid_end_id]) == grid_ids_withlinks[grid_end_id]:
                            try:
                                #print('deleted key: ',grid_end_id, 'with size:',grid_dens_dict[grid_end_id],'Capacity:',grid_ids_withlinks[grid_end_id])
                                popped_dict[grid_end_id] = {'orig_capacity': grid_ids_withlinks[grid_end_id],'size':len(grid_dens_dict[grid_end_id]) }
        
                                grid_ids_withlinks.pop(grid_end_id)
        
                            except:
                                print('already popped')
                else:
                    print('check')
                
                if (start_put == False and end_put == True):      # only end while when both nodes have been assigned a link
                    grid_dens_dict[grid_end_id].discard(pfxpair)
                    end_put = False
                if (start_put == True and end_put == False):
                    grid_dens_dict[grid_start_id].discard(pfxpair)
                    start_put = False
        
                        
                if start_put == True and end_put == True:
                    success_dict[pfxpair].append((grid_start_id,grid_end_id))
        
                    break

Upvotes: 0

Views: 226

Answers (1)

hilberts_drinking_problem
hilberts_drinking_problem

Reputation: 11602

You could solve for the graph with maximum edge weight subject to degree constraints using linear programming. In general, choosing different random weights will lead to different graphs. Here is an example using PuLP:

# !pip install PuLP
import pulp
import numpy as np
from itertools import combinations

# set up fake data
np.random.seed(0)
num_nodes = 10
node_list = list(range(num_nodes))
degrees = np.random.randint(1, 5, size=num_nodes)

# random weights, num_nodes by num_nodes
rw = np.random.rand(num_nodes, num_nodes)

# define optimization problem
lp_problem = pulp.LpProblem(name="RandomGraph")

# define optimization variables
edges = pulp.LpVariable.dicts("Edges", (node_list, node_list), cat="Binary")

# objective is to find the matching with maximum random edge weights
lp_problem += sum(edges[i][j] * rw[i,j]
            for i in node_list for j in node_list)

# graph is undirected:
for i, j in combinations(node_list, 2):
  lp_problem += edges[i][j] == edges[j][i]

# no self-loops:
for i in node_list:
  lp_problem += edges[i][i] == 0

# degree constraints are respected:
for i in node_list:
  lp_problem += sum(edges[i][j] for j in node_list) == degrees[i]

# solve the problem
lp_problem.solve()
# make sure that we got an optimal solution
assert lp_problem.status == 1
# recover edges
edges_solution = [(i, j) for i in node_list for j in node_list
                   if edges[i][j].varValue == 1]

# visualize the graph
import networkx as nx

# construct networkx graph
g = nx.Graph()
g.add_edges_from(edges_solution)
for i, d in enumerate(degrees):
  g.nodes[i]["intended_degree"] = d

# draw, labeling the intended degrees
labels = nx.get_node_attributes(g, 'intended_degree')
nx.draw_circular(g, labels=labels) 

The following picture shows that node degrees are correct:

enter image description here

Upvotes: 1

Related Questions