Louvain Algorithm Implementation In Python

Question

I am attempting to implement the Louvain algorithm in python. On testing it on the Karate Club dataset, although there is a correct answer, it is not exactly the same as my slower implementation of the same, which instead of calculating the modularity change, calculates the modularity itself. The slower version works fine. However, this version does not work fine on my dataset and gets stuck in a loop of reducing and increasing the modularity.

I have fixed some bugs I the implementation but am unable to see what else I am doing wrong. The code is given below.



from graph_tool.all import *

class MyList:

    def __init__(self, arr):

        self.arr = arr

    # https://stackoverflow.com/questions/1047021/overriding-in-python-iadd-method

    # https://stackoverflow.com/questions/5082190/typeerror-after-overriding-the-add-method

    def __add__(self,other_arr):

        if type(other_arr) == list:

            self.arr.extend(other_arr)

        if type(other_arr) == MyList:

            self.arr.extend(other_arr.arr)

        return self

    def __radd__(self,other_arr):

        if type(other_arr) == list:

            self.arr.extend(other_arr)

        if type(other_arr) == MyList:

            self.arr.extend(other_arr.arr)

        return self

        import time
from collections import Counter
from tabulate import tabulate

def getModularity(G, parts_idx, edge_weights):
    return modularity(G, parts_idx, weight=edge_weights)

def moveNodes(G, parts_idx, edge_weights, vertex_degrees, total_edges):
    # https://arxiv.org/pdf/1810.08473.pdf

    count = 0

    # Note that when we called this function, the vertex degrees passed here,
    # are actually the sum totals of each node in a community giving us
    # sum_tot as required for finding dQ. (See report)

    # So we make a copy of the community degrees for us to modify, so as to
    # speed up dQ calculations. Every time we move a vertex to a different
    # community, we subtract and add its degree from the community it moved
    # from and the community it moved to respectively.
    community_tots = {}
    for comm_idx,tot in zip(parts_idx.a, vertex_degrees.a):
        community_tots[comm_idx] = tot

    # Louvain's Part 1 Main Loop
    while True:
        # Get the modularity before we try to move each vertex.
        mod_old = getModularity(G, parts_idx, edge_weights)

        # Now go over each vertex to find it's best community
        for v in G.iter_vertices():
            # if (count == 2):
            #     print("mod:", getModularity(G, parts_idx, edge_weights))
            # Keep track of communities we have to visit along with the number
            # of edges between v and each community.
            neighbour_communities = {parts_idx[v] : 0}

            # Now go through all edges of v to find neighbouring communities.
            for s, t, weight in G.iter_all_edges(v, eprops=[edge_weights]):
                # We do not need to explicitly check for self loops as that is
                # handled by the s == v check.
                # Check if s is v
                if s == v:
                    # https://stackoverflow.com/questions/42315072/python-update-a-key-in-dict-if-it-doesnt-exist
                    # Add community if not seen before
                    if parts_idx[t] not in neighbour_communities:
                        neighbour_communities[parts_idx[t]] = weight
                    # Else add the weight
                    else:
                        neighbour_communities[parts_idx[t]] += weight
                # Do the same for t
                if t == v:
                    # https://stackoverflow.com/questions/42315072/python-update-a-key-in-dict-if-it-doesnt-exist
                    # Add community if not seen before
                    if parts_idx[s] not in neighbour_communities:
                        neighbour_communities[parts_idx[s]] = weight
                    # Else add the weight
                    else:
                        neighbour_communities[parts_idx[s]] += weight
            # The dQ if we do not change anything is 0.
            curr_max_dQ = 0

            # The current best swap is no swap as dQ would remain 0.
            curr_best_swap = parts_idx[v]

            # Precompute k_v/m
            # precomp_coeff = vertex_degrees[v]*precomp_denom2

            # Prefetch k_v_own
            k_v_own = neighbour_communities[parts_idx[v]]

            # Prefetch own community tot
            own_community_tot = community_tots[parts_idx[v]]

            # Now go through the neighbouring communities to maximize dQ.
            # https://stackoverflow.com/questions/3294889/iterating-over-dictionaries-using-for-loops
            for community, k_v_comm in neighbour_communities.items():
                # Skip if it is our own community
                if community == parts_idx[v]:
                    continue

                # Calculate dQ
                dQ = k_v_comm - vertex_degrees[v]*community_tots[community]/(2*total_edges) + vertex_degrees[v]*own_community_tot/(2*total_edges) - k_v_own
                # print(community, dQ)

                # Update our curr vals if dQ bigger than our curr max dQ
                if dQ >= curr_max_dQ:
                    curr_max_dQ = dQ
                    curr_best_swap = community

            # Now update v's community if we found a better one
            if curr_max_dQ > 0:
                # First update community_tots
                community_tots[curr_best_swap] += vertex_degrees[v]
                community_tots[parts_idx[v]] -= vertex_degrees[v]

                # Now change v's community
                parts_idx[v] = curr_best_swap
            # print(parts_idx.a)

        count += 1
        mod_new = getModularity(G, parts_idx, edge_weights)
        print(f"After moveNodes iteration {count}:", mod_new)
        if mod_new == mod_old:
            break
    return len(set(list(parts_idx)))

def Louvain(G):
    # https://arxiv.org/pdf/1810.08473.pdf
    # Copy to prevent unintended changes
    G_copy = G.copy()

    # Create a list of MyLists where each MyList contains the list of vertices
    # a community contains.
    master_partition = G_copy.new_vp("object")
    for idx,v in enumerate(G_copy.iter_vertices()):
        master_partition[v] = MyList([G_copy.vertex_index[idx]])

    # To keep track of the number communities
    num_partitions = G_copy.num_vertices()

    # To calculate dQ
    total_edges = G_copy.num_edges()

    # To keep track of community index for easy swaps in moveNodes()
    partition_idxs = G_copy.new_vp("int")
    for i in range(G_copy.num_vertices()):
        partition_idxs[i] = i

    curr_iteration = 0

    # To keep track of number of intra- and inter-community edges
    edge_weights = G_copy.new_ep("int")
    edge_weights.set_value(1)


    # Louvain's Phase-2 Loop
    while True:
        # To keep track of vertex degrees
        vertex_degrees = G_copy.degree_property_map("total", weight=edge_weights)

        # Get the current modularity
        old_mod = getModularity(G_copy, partition_idxs, edge_weights)

        # Move nodes between communitites to improve modularity
        num_partitions = moveNodes(G_copy, partition_idxs, edge_weights, vertex_degrees, total_edges)

        curr_iteration += 1

        # Get the new modularity
        new_mod = getModularity(G_copy, partition_idxs, edge_weights)

        print(f"After iteration {curr_iteration}", new_mod)

        # If the change in modularity was less than 1e-7, then stop
        if abs(old_mod - new_mod) < 0.0000001:
            break

        # Else condense the communities into singular nodes
        G_copy, partition_idxs, vcount, ecount, avp, aep = condensation_graph(G_copy, partition_idxs, avprops=[master_partition], aeprops=[edge_weights], self_loops=True)

        # Get the new weights, partition sets and degrees
        edge_weights = aep[0]
        master_partition = avp[0]

    final_part_idxs = G.new_vp("int")
    for part_idx,part in enumerate(master_partition):
        for node in part.arr:
            final_part_idxs[node] = part_idx
    return final_part_idxs

# g.set_directed(False)
# print(g.is_directed())
# part_idxs = Louvain(g)
# with open("louvaincomms.txt", "w") as f:
#     print(part_idxs, file=f)
# Use the below instead of above to test Louvain on the famous Zachary's Karate Club Dataset
# Results are validated by https://scholarworks.sjsu.edu/cgi/viewcontent.cgi?article=1528&context=etd_projects
g = collection.data["karate"]
part_idxs = Louvain(g)

# https://stackoverflow.com/questions/12282232/how-do-i-count-occurrence-of-unique-values-inside-a-list
parts = Counter(part_idxs).keys() # equals to list(set(part_idxs))-
freq = Counter(part_idxs).values() # counts the elements' frequency

# https://stackoverflow.com/questions/41140647/python-printing-lists-with-tabulate
table = zip(parts, freq)
print(tabulate(table, headers=["Community Index", "Number of Nodes"]))

The output is

Community Index    Number of Nodes
-----------------  -----------------
                0                11
                1                 2
                2                 3
                3                10
                4                 4
                5                 4

Louvain Algorithm Implementation In Python

Answers (0)

Related Questions