Tristan
Tristan

Reputation: 55

Constructing a Graph of Strings (Levenshtein Distance)

Currently, in my computer-science course, we are discussing graphs and how to find shortest distance using graphs. I received an assignment about a week ago where the teacher gave us the code for a graph using integers, and we have to adapt it to be able to calculate Levenshtein Distance using a list of words. The problem I'm having though is that I don't understand really how graphs work enough to manipulate one. I've tried googling graphs in c++ but none of the things I found resemble the type of program I was given.

We just finished a unit on linked lists, and I think graphs operate similarly? I understand that each node will point to many other nodes, but in a case where I have 2000 words all pointing to each other, how do I keep track of 2000 pointers per node without declaring that many nodes in my struct? I believe (not 100%) that in the program I was given my teacher used a vector of integer vectors to keep track but I don't know how to implement that.

I'm not asking anyone to fully comment each line as that is an enormous amount of work, but if someone could roughly explain how I would accomplish what I asked above and perhaps read the code and give me a rough understanding of what some sections mean (I'll put comments on some sections I'm specifically having trouble understanding) I would be extremely grateful.

Here is the code we were given:

#include <iostream>
#include <vector>
#include <algorithm> //for max<>
#include <limits>

using namespace std;

typedef vector <int> ivec;
typedef vector <ivec> imatrix; //A vector of vectors, not how this works or how to implement
typedef vector <bool> bvec;

struct graph
{
    imatrix edges; //list of attached vertices for each node
    int numVertices;
};

//I understand the ostream overloading
ostream & operator << (ostream & stream, ivec &vec)
{
    for (int i = 0; i < vec.size(); i++)
    {
        stream << vec[i] << " ";
    }
    return stream;
}

ostream & operator << (ostream & stream, graph &g)
{
    stream << endl << "numVert = " << g.numVertices << endl;
    for (int i = 0; i < g.numVertices; i++)
    {
        stream << "vertex = " << i+1 << " | edges = " << g.edges[i] << endl;
    }
    return stream;
}

const int sentinel = -1;

bvec inTree;
ivec distanceNodes;
ivec parents;

void initGraph(graph * g);
void insertEdge(graph * g, int nodeNum, int edgeNum);
void initSearch(graph * g);
void shortestPath(graph * g, int start, int end);

int main()
{
    //I understand the main, the two numbers in insertEdge are being hooked together and the two numbers in shortestPath are what we are looking to connect in the shortest way possible
    graph g;
    initGraph(&g);
    insertEdge(&g, 1, 2);
    insertEdge(&g, 1, 3);
    insertEdge(&g, 2, 1);
    insertEdge(&g, 2, 3);
    insertEdge(&g, 2, 4);
    insertEdge(&g, 3, 1);
    insertEdge(&g, 3, 2);
    insertEdge(&g, 3, 4);
    insertEdge(&g, 4, 2);
    insertEdge(&g, 4, 3);
    insertEdge(&g, 4, 5);
    insertEdge(&g, 5, 4);
    insertEdge(&g, 6, 7);
    insertEdge(&g, 7, 6);
    cout << "The graph is " << g << endl;
    shortestPath(&g, 1, 5);
    shortestPath(&g, 2, 4);
    shortestPath(&g, 5, 2);
    shortestPath(&g, 1, 7);
    return 0;
}

void initGraph(graph * g)
{
    g -> numVertices = 0; //Why set the number of vertices to 0?
}

void insertEdge(graph * g, int nodeNum, int edgeNum)
{
    int numVertices = max(nodeNum, edgeNum); //Max finds the larger of two numbers I believe? How can this be used with strings, one is not bigger than the other
    numVertices = max(1, numVertices);
    if (numVertices > g->numVertices)
    {
        for (int i = g->numVertices; i <= numVertices; i++)
        {
            ivec nodes;
            if (g->edges.size() < i)
            {
                g -> edges.push_back(nodes);
            }
        }
        g->numVertices = numVertices;
    }
    g->edges[nodeNum - 1].push_back(edgeNum);
}

void initSearch(graph * g) //I believe this function simply resets the values from a previous search
{
    if (g == NULL)
    {
        return;
    }
    inTree.clear();
    distanceNodes.clear();
    parents.clear();
    for (int i = 0; i <= g->numVertices; i++)
    {
        inTree.push_back(false);
        distanceNodes.push_back(numeric_limits <int> :: max());
        parents.push_back(sentinel);
    }
}

void shortestPath(graph * g, int start, int end)
{
    //Very confused about how this function works
    initSearch(g);
    int edge;
    int curr; //current node
    int dist;
    distanceNodes[start] = 0; 
    curr = start;
    while (! inTree[curr])
    {
        inTree[curr] = true;
        ivec edges = g->edges[curr - 1];
        for (int i = 0; i < edges.size(); i++)
        {
            edge = edges[i];

            if (distanceNodes[edge] > distanceNodes[curr] + 1)
            {
                distanceNodes[edge] = distanceNodes[curr] + 1;
                parents[edge] = curr;
            }
        }
        curr = 1;
        dist = numeric_limits <int> :: max();
        for (int i = 1; i <= g->numVertices; i++)
        {
            if ((!inTree[i]) && (dist > distanceNodes[i]))
            {
                dist = distanceNodes[i];
                curr = i;
            }
        }
    }
    ivec path;
    if (distanceNodes[end] == numeric_limits <int> :: max()) //is there a numeric_limits <string> :: max?
    {
        cout << "No way from " << start << " to " << end << endl;
    }
    else
    {
        int temp = end;
        while (temp != start)
        {
            path.push_back(temp);
            temp = parents[temp];
        }
        path.push_back(start);
        reverse(path.begin(), path.end());
        cout << "From " << start << " to " << end << " is " << path << endl;
    }
}

If you can help, that would be most welcome as I most likely will have more projects with graphs and I'm struggling due to not understanding them.

Thank you, Tristan

Upvotes: 0

Views: 1762

Answers (2)

Rakib
Rakib

Reputation: 7625

typedef vector <ivec> imatrix; //A vector of vectors, not how this works or how to implement

Here the graph is represented as Adjacency Matrix. You can also represent a graph using Adjacency List, where each Node would hold a array/linked list of neighboring nodes.

 g -> numVertices = 0; //Why set the number of vertices to 0?

It initializes the graph, at startup number of vertices/nodes is zero. When edges and nodes will be inserted using insertEdge method then this number will be updated.

int numVertices = max(nodeNum, edgeNum); //Max finds the larger of two numbers I believe? How can this be used with strings, one is not bigger than the other

though you have not posted full code, I think that the maximum value is used to add required number of vertices before inserting an edge.

ivec nodes;
if (g->edges.size() < i)
{
  g -> edges.push_back(nodes);
}

above code inserts new vertices. You will probably do integer comparison as here for your version, not string, string is the data of node, not number of node. Still if you need string comparison, C++ already has overloaded operators for this.

About initSearch and shortestPath methods, here the latter finds shortest path between nodes using an algorithm( I don't know which, you can search), and before searching for a shortest path, the former method initializes the values that will be used to search. For example it could set the distances between each pair of node to infinity initially, when a path is found between them, it will be updated.

Upvotes: 1

Jerry Jeremiah
Jerry Jeremiah

Reputation: 9618

Some answers:

Q. You asked why numVertices is set to 0 in the following:

void initGraph(graph * g)
{
    g -> numVertices = 0; //Why set the number of vertices to 0?
}

A. Look at the declaration of g - it is default initialized:

int main()
{
    graph g;
    ....
}

Now look at the definition of graph - it has no constructor:

struct graph
{
    imatrix edges; //list of attached vertices for each node
    int numVertices;
};

So edges gets initialized properly by default because vectors have a constructor. But numVertices is a primitive type so it will contain whatever random value happens to be in that memory location - so that means it needs to be manually initialized. Thats why initGraph doesn't need to initialize edges but it does need to initalize numVertices.

Q. You asked how you can find the larger of two std::strings knowing that max() returns the larger of two integers:

int numVertices = max(nodeNum, edgeNum); //Max finds the larger of two numbers I believe? How can this be used with strings, one is not bigger than the other

A. According to http://www.cplusplus.com/reference/algorithm/max/ max uses "The function uses operator< (or comp, if provided) to compare the values." but std::strings can be compared using the < operator so there really is no problem.

Q. You asked about a vector of vectors:

typedef vector <int> ivec;
typedef vector <ivec> imatrix; //A vector of vectors, not how this works or how to implement

A. You can access a vector with [] so if you had a variable called x of imatrix type you could say x[0] which would return an ivec (because that is the type of object stored in an imatrix vector. So if you said x[0][0] that would return the first integer stored in the ivec that is returned by x[0]. To change it to use a string just say:

typedef vector <std::string> ivec;
typedef vector <ivec> imatrix;

You could also rename the variables if you wanted.

You would also need to #include <string>

Upvotes: 0

Related Questions