skynaive
skynaive

Reputation: 49

Efficient algorithm to generate random multigraph (undirected) given nodes and degree

I wrote a simple algorithm to generate 1000 random graphs given total nodes and associated node degree. Graph is undirected with multiple edges and no self-loops. My current algorithm is very slow:

Graph properties: no self-loops, undirected, multiple edges (i.e. many edges b/w same pair of vertices)

from collections import defaultdict

for iteration in range(1000):  # 1000 random graphs
    all_stop = False
    all_stop_2 = False
    count = count +1
    
    success_dict = defaultdict(list)  
    popped_dict = defaultdict(dict)
    grid_dens_dict = defaultdict(set) # final graph being built
        
    
    # pfxlinks = [edge1,edge2,edge3,edge4,........edgen] # just a placeholder edge list to help me identify which node connected with which node in success_dict 
    pfxlinks = [i for i in range(842814)]     # total 842814 edges each connected to a node in grid_ids_withlinks , so if we add all node degrees in grid_ids_withlinks we get (842814 * 2)
    

    
    grid_ids_withlinks = {1107415: 751065,1125583: 15256,1144686: 108969,1115625: 17038,1081048: 6749,1103814: 6476,1108340: 107431,1111992: 45946,1117451: 3594,1093803: 10860,1117452: 2126,1089226: 52518,1082859: 21211,1105613: 94587,1092862: 43891,1083786: 17073,1092899: 999,1141954: 4347,1106506: 2072,1094690: 119736,1116547: 3284,1104705: 2404,1135637: 3815,1121070:16598,1087417: 4514,1103777: 310,1114682: 4265,1091948: 5468,1093788: 2176,              1098316: 2067,1105597: 19090,1141055: 8454,1097427: 3041,1092875: 4159,1086500: 2204,1095619: 9732,1087430: 2041,1112884: 2167,1097413: 17056,1107414: 34769,1111088: 2025,1083768: 2176,1130180: 1886,              1144699: 988,1146499: 6818,1111081: 12509,1104687: 6186,1092866: 4272,1091037: 3,1121044: 39,1098333: 294,1118359: 27,1151091: 21,1107441: 10766,1141094: 3523,1102898: 53,1115634: 2199,1100140: 4347,1086515: 3029,1116505: 238,1082883: 4070,1118366:2065,1102866: 1590,1115631: 4345,1091990: 2131,1144703: 4053,1075589: 19,1081062: 2124,1097425: 11,1133804: 8,1112864: 158,1088307: 112,1138312: 112,1127446: 6245,1108356: 155,1082874: 6315,1115640: 3978,1107432: 2234,1131077: 2032,1115590: 2672,1094696: 13,1136502: 52,1094683: 20,1110183: 2,1113821: 56,1106515: 6,1120183: 11,1083765: 23,1101079: 6,1091944: 12,1085599: 10,1083783: 25,1148339: 6}

# dict with node_id : node degree (total nodes: 93)

    
    for pfxpair in pfxlinks:
        start_put = False    
        end_put = False 
        
        if all_stop_2 == True:
            break
        
        while True:
            
            if all_stop == True:
                all_stop_2 = True
                break
           
            try:
                grid_start_id, grid_end_id = (np.random.choice(list(grid_ids_withlinks.keys()),size = 2, replace = False)) # get 2 random node ids
                grid_start_id = int(grid_start_id)
                grid_end_id = int(grid_end_id)
                
                if start_put == False:
                    
                    start_value = grid_dens_dict.get(grid_start_id)    # if node id exists in my dict and node degree hasnt reached capacity
                    
                    start_process_condition = (not start_value) or ( (start_value) and (len(grid_dens_dict[grid_start_id]) < grid_ids_withlinks[grid_start_id]) )
                    if start_process_condition:
                        grid_dens_dict[grid_start_id].add(pfxpair)
                        start_put = True
                    
                        if len(grid_dens_dict[grid_start_id]) == grid_ids_withlinks[grid_start_id]: # node degree for node full, remove from dict
                            
                            try:
                                #print('deleted key: ',grid_start_id, 'with size:',grid_dens_dict[grid_start_id],'Capacity:',grid_ids_withlinks[grid_start_id])
                                popped_dict[grid_start_id] = {'orig_capacity': grid_ids_withlinks[grid_start_id],'size':len(grid_dens_dict[grid_start_id]) }
        
                                grid_ids_withlinks.pop(grid_start_id)
                            
                            except:
                                print('already popped')
                else:
                    print('check')
                              
                if end_put == False: 
                    
                                        
                    end_value = grid_dens_dict.get(grid_end_id)
                    if (not end_value) or (end_value and (len(grid_dens_dict[grid_end_id]) < grid_ids_withlinks[grid_end_id])):
                        grid_dens_dict[grid_end_id].add(pfxpair)
                        end_put = True
                        
                        if len(grid_dens_dict[grid_end_id]) == grid_ids_withlinks[grid_end_id]:
                            try:
                                #print('deleted key: ',grid_end_id, 'with size:',grid_dens_dict[grid_end_id],'Capacity:',grid_ids_withlinks[grid_end_id])
                                popped_dict[grid_end_id] = {'orig_capacity': grid_ids_withlinks[grid_end_id],'size':len(grid_dens_dict[grid_end_id]) }
        
                                grid_ids_withlinks.pop(grid_end_id)
        
                            except: # only 1 node left with large degree, start breaking edges
                                print('already popped')
                else:
                    print('check')
                
                if (start_put == False and end_put == True):      # only end while when both nodes have been assigned a link
                    grid_dens_dict[grid_end_id].discard(pfxpair)
                    end_put = False
                if (start_put == True and end_put == False):
                    grid_dens_dict[grid_start_id].discard(pfxpair)
                    start_put = False
        
                        
                if start_put == True and end_put == True:
                    success_dict[pfxpair].append((grid_start_id,grid_end_id))
        
                    break

       
            except:
                    #print('In except block')
                    grid2grid = defaultdict(list)
                    for k,v in success_dict.items():
                        grid2grid[v[0][0]].append(v[0][1])
                        grid2grid[v[0][1]].append(v[0][0])
                    
                    # ppick 2 random gridids
                    while True:
                        pop_id1, pop_id2 = (np.random.choice(list(grid_dens_dict.keys()),size = 2, replace = False)) # get 2 random node ids for popping
                        pop_id1 = int(pop_id1)
                        pop_id2 = int(pop_id2)
                        if (pop_id1 != list(grid_ids_withlinks.keys())[0] and pop_id2 != list(grid_ids_withlinks.keys())[0] and  (pop_id1 in grid2grid[pop_id2] and pop_id2 in grid2grid[pop_id1])):  ##have an assigned link 
                            grid2grid[pop_id1].remove(pop_id2)
                            grid2grid[pop_id2].remove(pop_id1)
                            grid2grid[list(grid_ids_withlinks.keys())[0]].append(pop_id1)
                            grid2grid[list(grid_ids_withlinks.keys())[0]].append(pop_id2)
                            grid2grid[pop_id1].append(list(grid_ids_withlinks.keys())[0])
                            grid2grid[pop_id2].append(list(grid_ids_withlinks.keys())[0])
        
        
                            
                        if len(grid2grid[list(grid_ids_withlinks.keys())[0]]) == grid_ids_withlinks[list(grid_ids_withlinks.keys())[0]]:
                            for k,v in grid2grid.items():
                                grid2grid[k] = Counter(v)
                            
                            if len(all_itr_list) != 0:
                                # check if current grpah same as any previous
                                for graph in all_itr_list:
                                    #same_counter = 0
                                    #for a,b in graph.items():
                                        
                                    shared_items = {k: graph[k] for k in graph if k in grid2grid and graph[k] == grid2grid[k]}
                                    len(shared_items)
                                    
                                    if len(shared_items) == grid_ids_withlinks:                               # total no of grids
                                        #print('no of same nodes: ',len(shared_items))
                                        break
                    
                                
                                
                            all_itr_list.append(grid2grid)
                            
                            filename = 'path'
                            with open(filename,'wb') as handle:
                                pickle.dump(grid2grid, handle)
                            all_stop = True

                            break
        
        print('iteration no:',count)
        if all_stop == False:
            grid2grid = defaultdict(list)
            for k,v in success_dict.items():
                grid2grid[v[0][0]].append(v[0][1])
                grid2grid[v[0][1]].append(v[0][0])
            
            for k,v in grid2grid.items():
                grid2grid[k] = Counter(v)
            all_itr_list.append(grid2grid)

            filename = 'path'
            with open(filename,'wb') as handle:
                pickle.dump(grid2grid, handle)
        

> ##from answer

    import pickle
    from collections import defaultdict
    import numpy as np
    
    
    
    for iteration in range(1000):
        graph = defaultdict(list)
        filename2 = r'path'
        
        filename3 = r'path'
        
        
        with open(filename2,'rb') as handle:
            pfxlinks = pickle.load(handle,encoding ='latin-1')  
        
        with open(filename3,'rb') as handle:
            grid_ids_withlinks = pickle.load(handle,encoding ='latin-1')
        
        nodes = list(grid_ids_withlinks.keys())
        degrees = list(grid_ids_withlinks.values())
        
        while len(nodes) > 0:
            # Get a random index from current nodes
            node_idx = np.random.randint(0, len(nodes)-1)
            # Store the node and its corresponding degree
            node = nodes[node_idx]
            degree = degrees[node_idx]
            
            # Swap that node and its degree with the last node/degree and pop
            # This helps us to remove them in O(1) time
            # We don't need them anymore since we are going to exhaust the required edges
            # for this particular node.
            # This also prevents self-edges.
            nodes[node_idx], nodes[-1] = nodes[-1], nodes[node_idx]
            nodes.pop()
            degrees[node_idx], degrees[-1] = degrees[-1], degrees[node_idx]
            degrees.pop()
            
            
            
            for _ in range(degree): # this is the amount of edges this node needs
            # To make sure we don't get out of bounds.
            # This could potentially happen unless
            # there is a guarantee that the degrees and number of nodes
            # are made such that they fit exactly
              if len(nodes) == 0:
                break
            
              neighbor_idx = np.random.randint(0, len(nodes)-1)
              graph[node].append(nodes[neighbor_idx])
              graph[nodes[neighbor_idx]].append(node)
              degrees[neighbor_idx] -= 1
              if degrees[neighbor_idx] == 0:
                # we need to remove the neighbor node if it has its maximum edges already
                nodes[neighbor_idx], nodes[-1] = nodes[-1], nodes[neighbor_idx]
                nodes.pop()
                degrees[neighbor_idx], degrees[-1] = degrees[-1], degrees[neighbor_idx]
                degrees.pop()
    print('done')
    
            
            
        
    
    
    
                    
                    
                

Upvotes: 1

Views: 535

Answers (1)

user1984
user1984

Reputation: 6808

Since you have also posted parts of the code that are unrelated to the core algorithm, it makes going through the code and finding bottlenecks relatively difficult.

Here's an algorithm that is faster from what I've seen in your code. It runs in O(n * m) for creating each graph, where n is the number of nodes, and m is the max degree that any of the nodes can have. In other words it's O(V + E) where V is the number of vertices and E the number of edges.

  1. Create a list for the nodes, called nodes, like [1, 2, ..., n].
  2. Create a corresponding list for degrees, called degrees, where degrees[i] is the degree of nodes[i].
  3. Create a store for your graph however you like it. Adjacency list, matrix. Just make sure that adding edges to the graph is of O(1) complexity. Let's call this graph. A defaultdict(list) from collections in python would make a good adjacency list. For this algorithm I assume graph is a defaultdict(list).
  4. Run a while loop on nodes. while len(nodes) > 0: and do as follows:
# Get a random index from current nodes
node_idx = random.randint(0, len(nodes)-1)
# Store the node and its corresponding degree
node = nodes[node_idx]
degree = degrees[node_idx]

# Swap that node and its degree with the last node/degree and pop
# This helps us to remove them in O(1) time
# We don't need them anymore since we are going to exhaust the required edges
# for this particular node.
# This also prevents self-edges.
nodes[node_idx], nodes[-1] = nodes[-1], nodes[node_idx]
nodes.pop()
degrees[node_idx], degrees[-1] = degrees[-1], degrees[node_idx]
degrees.pop()

for _ in degree: # this is the amount of edges this node needs
# To make sure we don't get out of bounds.
# This could potentially happen unless
# there is a guarantee that the degrees and number of nodes
# are made such that they fit exactly
  if len(nodes) == 0:
    break

  neighbor_idx = random.randint(0, len(nodes)-1)
  graph[node].append(nodes[neighbor_idx])
  graph[nodes[neighbor_idx]].append(node)
  degrees[neighbor_idx] -= 1
  if degrees[neighbor_idx] == 0:
    # we need to remove the neighbor node if it has its maximum edges already
    nodes[neighbor_idx], nodes[-1] = nodes[-1], nodes[neighbor_idx]
    nodes.pop()
    degrees[neighbor_idx], degrees[-1] = degrees[-1], degrees[neighbor_idx]
    degrees.pop()

This algorithm potentially leaves one node at the end that has not all its required edges, but this isn't a shortcoming of the algorithm but can happen if the number of edges for the nodes dosn't work out. I'm not sure how to express is mathematically though.

Also note that this algorithm could produce multiple edges between two nodes. It isn't clear to me if this is allowed or not for the particular graph you are looking for. If so, the code can be ammended such that it avoids such edges without sacrificing the time complexity. But it has the potential to leave multiple nodes with less edges than required. This wouldn't be a shortcoming of the algorithm but a result of how the degrees for particular nodes are defined.

Upvotes: 2

Related Questions