vgeta
vgeta

Reputation: 507

Number of paths between two nodes in a Directed Cyclic Graph


I want to find the number of paths between vertex 1 and vertex n in a directed graph. The graph contains cycles. If any path between vertex 1 and vertex n has a cycle, then the result should be "INFINITE PATHS" else the number of paths. This is what I tried.


1) I modified the DFS algorithm, it starts with node 1, All nodes are white initially.
2) If a gray node is visited, it means that there is a cycle.
3) A path array which records the vertices visited is used to backtrack the nodes in cycle.
4) For each node in cycle unmodified DFS is used to search the nth vertex from that node.
5) If DFS succeeds from any one of the node, then a cycle exists in path between vertex 1 and vertex n so it returns else the algo continues calculating number of paths.

c++ implementation


#include <stdio.h>
#include <malloc.h>
#include <vector>
#include <algorithm>

#define WHITE 0
#define GRAY 1
#define BLACK 2

using namespace std;
typedef struct vertexstruct{
   int color;
   vector<int> edgelist;
}vertex;

int ordinarydfs(int v,vertex *vertices,int numvertices){
    if(v == numvertices){
        return 1;       
    }

    if(vertices[v].color == WHITE){
         vertices[v].color = GRAY;
         for(int i=0;i<vertices[v].edgelist.size();i++){
             int x = ordinarydfs(vertices[v].edgelist[i],vertices,numvertices);
             if(x ==1) return 1;
         }
         vertices[v].color = BLACK; 
     }
     return 0;
}

int checkcycle(int v,vector<int>path,vertex *vertices,int numvertices){
    vector<int>::iterator it;
    vector<int>::iterator x;
    it = find (path.begin(), path.end(), v);
    for(x =it;x<path.end();x++)
        vertices[*x].color = WHITE;
    for(x =it;x<path.end();x++){
        if(ordinarydfs(*x,vertices,numvertices))
            return 1;   
    }
    for(x =it;x<path.end();x++)
        vertices[*x].color = GRAY;

    return 0;

}

long dfs(int v,vertex *vertices,int numvertices,vector<int> path){
    long paths = 0;
    if(v == numvertices){
            return 1;       
    }
    if(vertices[v].color == GRAY) {
         if(checkcycle(v,path,vertices,numvertices)) return -1;
    }   
    if(vertices[v].color == WHITE){
        vertices[v].color = GRAY;
        path.push_back(v);
        for(int i=0;i<vertices[v].edgelist.size();i++){     
            long x = dfs(vertices[v].edgelist[i],vertices,numvertices,path);
            vertices[vertices[v].edgelist[i]].color = WHITE;
            if(x == -1) return -1;
            paths += x;
        }
        vertices[v].color = BLACK;  
    }
    return paths % 1000000000;
}

void numpaths(int numvertices,int numedges,vertex *vertices){
    vector<int> path;
    long numpaths = 0;
    numpaths = dfs(1,vertices,numvertices,path);
    if(numpaths == -1)
        printf("INFINITE PATHS\n");
    else
        printf("%ld\n",numpaths);
}



int main(){
    int edgecount=0;
    int  numverts,numedges;
    fscanf(stdin,"%d %d",&numverts,&numedges);
   vertex verts[numverts+1];
   for(int i =1;i<=numverts;i++)
       verts[i].color = WHITE;
   edgecount = 0; 
   int a,b;
   while(edgecount < numedges){
       scanf("%d %d",&a,&b);
       verts[a].edgelist.push_back(b);
       edgecount++;
       }
   numpaths(numverts,numedges,verts);
}

The algorithm is too slow for large graphs. Is there a correct approach for this problem? Share your thoughts. Thank you.

Upvotes: 5

Views: 4988

Answers (4)

tom
tom

Reputation: 22989

You are not using any caching, so dfs() and ordinarydfs() will be called many many times. The path vector used to check for cycles is redundant since you can tell if a vertex is in the current path by its colour. There is also no need for the special check to see if the cycle connects to the last vertex since you are already calculating how many paths there are to the last vertex.

I put the entire BFS in a single method and rewrote most of the code:

#include <cstdio>
#include <vector>
#include <algorithm>

using namespace std;

class Vertex;

const int WHITE = 0, GRAY = 1, BLACK = 2;
// WHITE: unseen, GRAY: in the current path, BLACK: finished (num paths known)
const int CYCLE = -1;

class Vertex
{
public:
    long long paths;
    int color;
    bool hasLoop;
    vector<Vertex *> nbrs;

    Vertex();
    long long countPaths();
};

int main()
{
    int numverts, numedges;
    Vertex** verts; // dynamically-allocated array of Vertex*
    scanf("%d %d", &numverts, &numedges);

    verts = new Vertex*[numverts+1];
    for (int i = 0; i < numverts + 1; i++) verts[i] = new Vertex();

    for (int i = 0; i < numedges; i++)
    {
        int a, b;
        scanf("%d %d", &a, &b);
        verts[a]->nbrs.push_back(verts[b]);
    }

    verts[numverts]->paths = 1; // the base case

    long long numPaths = verts[1]->countPaths();

    // free dynamic memory, set pointers to NULL to prevent accidents
    for (int i = 0; i < numverts; i++) { delete verts[i]; verts[i] = NULL; }
    delete[] verts; verts = NULL;

    if (numPaths == CYCLE) printf("INFINITE PATHS\n");
    else printf("%lld\n", numPaths);

    return 0;
}


Vertex::Vertex()
{
    paths = 0;
    color = WHITE;
    hasLoop = false;
}

long long Vertex::countPaths()
{
    // sets this->paths to the number of paths, or CYCLE (-1) for cycles
    // returns this->paths

    if (color == BLACK)
    {
        // paths already calculated, no need to do anything
    }
    else if (color == GRAY)
    {
        // detected a loop
        hasLoop = true;
    }
    else if (color == WHITE)
    {
        // recursively find all the paths from the neighbours
        color = GRAY;
        for (unsigned int i = 0; i < nbrs.size(); i++)
        {
            nbrs[i]->countPaths();
            if (nbrs[i]->paths == CYCLE)
            {
                // found cycle, no point continuing
                paths = CYCLE;
                break;
            }
            paths += nbrs[i]->paths;
        }
        color = BLACK;

        // check if some other node found a cycle to 'this'
        if (hasLoop && paths > 0) paths = CYCLE;
    }

    return paths;
}

Upvotes: 2

grdvnl
grdvnl

Reputation: 636

We could solve this problem with the following steps:

  1. Calculate the strongly connected components(SCC) of the graph.
  2. Collapse all the nodes within the SCC into one node and mark them. The end result of this is that we have a DAG, G', of SCC's. Some of the SCC's could contain just one node.
  3. Do topological sort on G'.
  4. Count the paths from target node to source node ( using the approach suggested here. While using the approach treat the nodes that represent the SCC's as special case and mark them as having infinite paths.(MAX_LONG)

( I will try to post an implementation to this later, if it is still required)

Upvotes: 1

vgeta
vgeta

Reputation: 507

I tried this, its fast but I am not sure whether it is correct. When you reach a black node it means that you have already calculated path from that node so I stored the number of paths in the structure of node and used it.


#include <stdio.h>
#include <malloc.h>
#include <vector>
#include <algorithm>

#define WHITE 0
#define GRAY  1
#define BLACK 2

using namespace std; 
typedef struct vertexstruct{
    int color;
    vector<int> edgelist;
    long paths;
}vertex;

int cyclenode;
int throughend;
long dfs(int v,vertex *vertices,int numvertices){
    long paths = 0;
if(vertices[v].color == BLACK) return vertices[v].paths;
if(vertices[v].color == GRAY) {
    if(cyclenode == 0)cyclenode = v;
}   
if(vertices[v].color == WHITE){
    vertices[v].color = GRAY;
    for(int i=0;i<vertices[v].edgelist.size();i++){     
        long x = dfs(vertices[v].edgelist[i],vertices,numvertices);
        if(cyclenode) vertices[v].cycle=1;
        if(x == -1) return -1;
        paths += x;
    }
    if(cyclenode && paths >0) return -1;
    if(v == numvertices){
                if(cyclenode) {
                        return -1;
                }
        throughend = 0;
        vertices[v].paths = 1;      
                return 1;
        }
    if(cyclenode ==v && throughend == 0) cyclenode = 0;
    vertices[v].color = BLACK;
    vertices[v].paths = paths % 1000000000; 
}

return paths % 1000000000;
}

void numpaths(int numvertices,int numedges,vertex *vertices){
        long numpaths = 0;
        cyclenode = 0;
                throughend =0;
        numpaths = dfs(1,vertices,numvertices);
        if(numpaths == -1)
            printf("INFINITE PATHS\n");
        else
            printf("%ld\n",numpaths);
}



int main(){
        int edgecount=0;
        int  numverts,numedges;
        fscanf(stdin,"%d %d",&numverts,&numedges);
    vertex verts[numverts+1];
    for(int i =1;i<=numverts;i++){
        verts[i].color = WHITE;
        verts[i].paths = 0;
    }
    edgecount = 0; 
        int a,b;
    while(edgecount < numedges){
                scanf("%d %d",&a,&b);
        verts[a].edgelist.push_back(b);
                edgecount++;
        }
    numpaths(numverts,numedges,verts);
}

Upvotes: 0

Guy Adini
Guy Adini

Reputation: 5504

A completely different approach would be to represent your graph as an adjacency matrix A[i][j]=1 if (i,j) is an edge, 0 otherwise. Then A^i[s][t] counts the number of paths from s to t, which are of length i (this can be proven by induction. Think of what A^2 represents). Sum A[s][t] for the powers 1..|V|, and you have all of the possible paths of length up to |V|. To check if a cycle exists, see (by multiplying the matrix again) if a path of length |V|+1,...,2|V| exists.

Does this help?

Upvotes: 3

Related Questions