katetsu
katetsu

Reputation: 109

How to efficiently use boost graph

I have the following piece of code:

struct Vertex {
std::string name;
};

struct Edge {
std::string name;
};

class MyGraph {
public:
  MyGraph() = default;

  using Graph = adjacency_list<vecS, vecS, bidirectionalS,
                               std::shared_ptr<Vertex>, std::shared_ptr<Edge>>;
  using VertexDesc = graph_traits<Graph>::vertex_descriptor;
  using EdgeDesc = graph_traits<Graph>::edge_descriptor;

  void addNode(std::shared_ptr<Vertex> node){
    const auto name = node->name;

    if (vertexMap_.find(name) == vertexMap_.end()) {
      const auto vertex = add_vertex(node, graph_);
      vertexLabelArray_.emplace_back(name);
      vertexMap_[name] = vertex;
    }
  }
  void addEdge(std::shared_ptr<Vertex> src, std::shared_ptr<Vertex> dst, std::shared_ptr<Edge> weight = nullptr) {

  const auto srcName = src->name;
  const auto dstName = dst->name;

  const auto vertexPair = std::make_pair(srcName, dstName);

    if (edgeSet_.find(vertexPair) == edgeSet_.end()) {
        addNode(src);
        addNode(dst);
        const auto edge = add_edge(vertexMap_[srcName], vertexMap_[dstName], weight, graph_).first;
        edgeSet_.insert(vertexPair);
        edgeLabelMap_[edge] = weight ? weight->name : "";

    }
  }
  void print(std::ostream& out)
  {
    write_graphviz(out, graph_, 
        make_label_writer(vertexLabelArray_.data()), 
        make_label_writer(make_assoc_property_map(edgeLabelMap_)));
  }


private:
  std::vector<std::string> vertexLabelArray_;
  std::map<EdgeDesc, std::string> edgeLabelMap_;
  std::map<std::string, VertexDesc> vertexMap_;
  std::set<std::pair<std::string, std::string>> edgeSet_;
  Graph graph_;
};


struct Node : Vertex {};
struct Arc : Edge {};

int main() {
    MyGraph g;

    const auto n1 = std::make_shared<Node>(Node{{"n1"}});
    const auto n2 = std::make_shared<Node>(Node{{"n2"}});
    const auto e1 = std::make_shared<Arc>(Arc{{"e1"}});

    g.addEdge(n1, n2, e1);

    g.print(std::cout);

}

I would like to reduce some of the potential redundancies:

1 - how can i use setS instead of vecS to avoid checking is a vertex/edge exists. when i do so, the write_graphiz function complains that it fails with lots of errors beginning with

2 - i am using a shared_ptr for allowing for custom object to be attached to vertices and edges. is there an easy way for looking up a vertex index based on its attached object?

3 - Is it possible to remove most of the external data structures and use boost property instead somehow?

the whole thing is uploaded here: https://godbolt.org/z/qbEnEoYj3

any help is appreciated.

Upvotes: 2

Views: 1296

Answers (1)

sehe
sehe

Reputation: 393009

1 - how can i use setS instead of vecS to avoid checking is a Q.: vertex/edge exists. when i do so, the write_graphiz function complains that it fails with lots of errors beginning with

Sure. In fact the edge selector is no problem to begin with: https://godbolt.org/z/b1xbYMM4s. Making the vertex containter setS won't do what you mean, because every added vertex is unique by definition.

But in the interest of showing the technical issue at hand: there is no implicit vertex index (ordinal numbering [0..numvertices()) for that storage strategy. Some algorithms require one. Also, you assume one with VertexLabelArray_ which no longer makes sense, so let's change into VertexLabelMap_ just like for edge labels:

Live On Compiler Explorer

#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/graphviz.hpp>

struct Vertex {
    std::string name;
};

struct Edge {
    std::string name;
};

class MyGraph {
public:
    MyGraph() = default;

    using Graph      = boost::adjacency_list< //
        boost::setS, boost::listS, boost::bidirectionalS,
        std::shared_ptr<Vertex>, std::shared_ptr<Edge>>;
    using VertexDesc = boost::graph_traits<Graph>::vertex_descriptor;
    using EdgeDesc   = boost::graph_traits<Graph>::edge_descriptor;

    void addNode(std::shared_ptr<Vertex> node)
    {
        const auto name = node->name;

        if (vertexMap_.find(name) == vertexMap_.end()) {
            const auto vertex = add_vertex(node, graph_);
            vertexLabelMap_.emplace(vertex, name);
            vertexMap_[name] = vertex;
        }
    }

    void addEdge(std::shared_ptr<Vertex> src, std::shared_ptr<Vertex> dst,
                std::shared_ptr<Edge> weight = nullptr)
    {
        const auto srcName = src->name;
        const auto dstName = dst->name;

        const auto vertexPair = std::make_pair(srcName, dstName);

        if (edgeSet_.find(vertexPair) == edgeSet_.end()) {
            addNode(src);
            addNode(dst);
            const auto edge = add_edge(vertexMap_[srcName], vertexMap_[dstName],
                                    weight, graph_)
                                .first;
            edgeSet_.insert(vertexPair);
            edgeLabelMap_[edge] = weight ? weight->name : "";
        }
    }

    void print(std::ostream& out)
    {
        std::map<VertexDesc, int> idmap;

        for (auto v : boost::make_iterator_range(vertices(graph_))) {
            idmap.emplace(v, idmap.size());
        }
        write_graphviz(out, graph_,
                    boost::make_label_writer(
                        boost::make_assoc_property_map(vertexLabelMap_)),
                    boost::make_label_writer(
                        boost::make_assoc_property_map(edgeLabelMap_)),
                    boost::default_writer{},
                    boost::make_assoc_property_map(idmap));
    }

private:
    std::map<VertexDesc, std::string>             vertexLabelMap_;
    std::map<EdgeDesc, std::string>               edgeLabelMap_;
    std::map<std::string, VertexDesc>             vertexMap_;
    std::set<std::pair<std::string, std::string>> edgeSet_;
    Graph                                         graph_;
};

struct Node : Vertex {};
struct Arc : Edge {};

#include <iostream>
int main() {
    MyGraph g;

    const auto n1 = std::make_shared<Node>(Node{{"n1"}});
    const auto n2 = std::make_shared<Node>(Node{{"n2"}});
    const auto e1 = std::make_shared<Arc>(Arc{{"e1"}});

    g.addEdge(n1, n2, e1);

    g.print(std::cout);

}

Alternatively, if such an index can be made part of Vertex:

Live On Compiler Explorer

#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/graphviz.hpp>
#include <boost/property_map/transform_value_property_map.hpp>

struct Vertex {
    int id;
    std::string name;
};

struct Edge {
    std::string name;
};

class MyGraph {
public:
    MyGraph() = default;

    using Graph      = boost::adjacency_list< //
        boost::setS, boost::listS, boost::bidirectionalS,
        std::shared_ptr<Vertex>, std::shared_ptr<Edge>>;
    using VertexDesc = boost::graph_traits<Graph>::vertex_descriptor;
    using EdgeDesc   = boost::graph_traits<Graph>::edge_descriptor;

    void addNode(std::shared_ptr<Vertex> const& node)
    {
        const auto name = node->name;

        if (vertexMap_.find(name) == vertexMap_.end()) {
            const auto vertex = add_vertex(node, graph_);
            vertexLabelArray_.push_back(name);
            vertexMap_[name] = vertex;
        }
    }

    void addEdge(std::shared_ptr<Vertex> const& src,
                std::shared_ptr<Vertex> const& dst,
                std::shared_ptr<Edge> const&   weight = nullptr)
    {
        const auto srcName = src->name;
        const auto dstName = dst->name;

        const auto vertexPair = std::make_pair(srcName, dstName);

        if (edgeSet_.find(vertexPair) == edgeSet_.end()) {
            addNode(src);
            addNode(dst);
            const auto edge = add_edge(vertexMap_[srcName], vertexMap_[dstName],
                                    weight, graph_)
                                .first;
            edgeSet_.insert(vertexPair);
            edgeLabelMap_[edge] = weight ? weight->name : "";
        }
    }

    void print(std::ostream& out)
    {
        auto idmap = boost::make_transform_value_property_map(
                [](auto const& sp) { return sp->id; },
                get(boost::vertex_bundle, graph_));

        write_graphviz(
            out, graph_,
            boost::make_label_writer(boost::make_iterator_property_map(
                vertexLabelArray_.begin(), idmap)),
            boost::make_label_writer(
                boost::make_assoc_property_map(edgeLabelMap_)),
            boost::default_writer{}, idmap);
    }

private:
    std::vector<std::string>                      vertexLabelArray_;
    std::map<EdgeDesc, std::string>               edgeLabelMap_;
    std::map<std::string, VertexDesc>             vertexMap_;
    std::set<std::pair<std::string, std::string>> edgeSet_;
    Graph                                         graph_;
};

struct Node : Vertex {};
struct Arc : Edge {};

#include <iostream>
int main() {
    MyGraph g;

    const auto n1 = std::make_shared<Node>(Node{{0, "n1"}});
    const auto n2 = std::make_shared<Node>(Node{{1, "n2"}});
    const auto e1 = std::make_shared<Arc>(Arc{{"e1"}});

    g.addEdge(n1, n2, e1);

    g.print(std::cout);

}

Q.: 2 - i am using a shared_ptr for allowing for custom object to be attached to vertices and edges.

I noticed this. I think it's a little bit questionable. If you're looking for efficiency, I wouldn't be using shared_ptr all over the place.

Q.: is there an easy way for looking up a vertex index based on its attached object?

Not built in. There is the (undocumented?) labeled_graph adaptor that has... some convenience. YMMV. Also, you can use a bimap or similar.

Q.: 3 - Is it possible to remove most of the external data structures and use boost property instead somehow?

I would strongly consider this. Of course you can. Basically, you already do this.

Review

Your code has many opportunities for improvement (including effective optimizations).

  1. pass the shared pointers by const&:

  2. do not pass shared pointers in the first place, since you're not intending to share ownership/observe refcounts/lifetimes. I'll show this when I show a version that embodies the node/edge data in the graph model directly.

  3. void addNode could return the vertex descriptor, avoiding a lot of redundant lookups. This also makes it more explicit about an item already existing not being an error. (Right now it simply ignores the addNode/addEdge?)

    void ensureEdge(std::shared_ptr<Vertex> const& src,
                    std::shared_ptr<Vertex> const& dst,
                    std::shared_ptr<Edge> const&   edge = {})
    {
        if (edgeSet_.emplace(src->name, dst->name).second) {
            auto [descriptor, isnew] = add_edge( //
                ensureNode(src), ensureNode(dst), edge, graph_);
    
            edgeLabelMap_.emplace(descriptor, edge ? edge->name : "");
        }
    }
    

    Doing less work means being more efficient.

  4. Like you almost mentioned you can already test whether an edge exists using the result of add_edge, obviating the extra edgeSet_:

    EdgeDesc ensureEdge(std::shared_ptr<Vertex> const& src,
                        std::shared_ptr<Vertex> const& dst,
                        std::shared_ptr<Edge> const&   edge = {})
    {
        auto [descriptor, isnew] =
            add_edge(ensureNode(src), ensureNode(dst), edge, graph_);
    
        if (isnew)
            edgeLabelMap_.emplace(descriptor, edge ? edge->name : "");
    
        return descriptor;
    }
    

    Note that now, because the behaviour is more consistent, we can also return the edge descriptor. This will remove the need to query for the edge we just added.

Current version Live On Compiler Explorer:

#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/graphviz.hpp>
#include <boost/property_map/transform_value_property_map.hpp>

struct Vertex { std::string name; };
struct Edge   { std::string name; };
class MyGraph {
public:
    MyGraph() = default;

    using Graph      = boost::adjacency_list< //
        boost::setS, boost::vecS, boost::bidirectionalS,
        std::shared_ptr<Vertex>, std::shared_ptr<Edge>>;
    using VertexDesc = Graph::vertex_descriptor;
    using EdgeDesc   = Graph::edge_descriptor;

    VertexDesc ensureNode(std::shared_ptr<Vertex> const& node)
    {
        auto const& name = node->name;

        auto it = vertexMap_.find(name);
        if (it == vertexMap_.end()) {
            auto descriptor = add_vertex(node, graph_);
            vertexLabelArray_.push_back(name);
            vertexMap_[name] = descriptor;
            it = vertexMap_.emplace(name, descriptor).first;
        }

        return it->second;
    }

    EdgeDesc ensureEdge(std::shared_ptr<Vertex> const& src,
                        std::shared_ptr<Vertex> const& dst,
                        std::shared_ptr<Edge> const&   edge = {})
    {
        auto [descriptor, isnew] =
            add_edge(ensureNode(src), ensureNode(dst), edge, graph_);

        if (isnew)
            edgeLabelMap_.emplace(descriptor, edge ? edge->name : "");

        return descriptor;
    }

    void print(std::ostream& out) const
    {
        write_graphviz(
            out, graph_, //
            boost::make_label_writer(vertexLabelArray_.data()),
            boost::make_label_writer(make_assoc_property_map(edgeLabelMap_)));
    }

private:
    std::vector<std::string>          vertexLabelArray_;
    std::map<EdgeDesc, std::string>   edgeLabelMap_;
    std::map<std::string, VertexDesc> vertexMap_;
    Graph                             graph_;
};

struct Node : Vertex {};
struct Arc : Edge {};

#include <iostream>
int main() {
    MyGraph g;

    const auto n1 = std::make_shared<Node>(Node{{"n1"}});
    const auto n2 = std::make_shared<Node>(Node{{"n2"}});
    const auto e1 = std::make_shared<Arc>(Arc{{"e1"}});

    g.ensureEdge(n1, n2, e1);
    g.print(std::cout);
}

Still prints

digraph G {
0[label=n2];
1[label=n1];
1->0 [label=e1];
}

Other Steps

  1. You specified BidirectionalGraph support, but don't currently use the in-edge interface. Consider dropping that so you don't incur the overhead for maintaining the redundant edge information

    using Graph      = boost::adjacency_list< //
        boost::setS, boost::vecS, boost::directedS,
        std::shared_ptr<Vertex>, std::shared_ptr<Edge>>;
    
  2. Consider using value semantics for the property bundles. This will reduce allocations, increase locality of reference and may enable a host of storage optimization opportunities.

    Consider this version, which no longer uses vertexLabelArray_ nor edgeLabelMap_ (both properties simply reside inside the graph model after all):

    void print(std::ostream& out) const {
        write_graphviz(out, graph_,
                       make_label_writer(get(&Vertex::name, graph_)),
                       make_label_writer(get(&Edge::name, graph_)));
    }
    

Live On Compiler Explorer

#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/graphviz.hpp>

struct Vertex { std::string name; };
struct Edge   { std::string name; };

class MyGraph {
public:
    using Graph      = boost::adjacency_list< //
        boost::setS, boost::vecS, boost::directedS, Vertex, Edge>;
    using VertexDesc = Graph::vertex_descriptor;
    using EdgeDesc   = Graph::edge_descriptor;

    VertexDesc ensureNode(Vertex const& node) {
        if (auto it = vertexMap_.find(node.name); it != vertexMap_.end())
            return it->second;

        auto descriptor       = add_vertex(node, graph_);
        vertexMap_[node.name] = descriptor;
        vertexMap_.emplace(node.name, descriptor);
        return descriptor;
    }

    EdgeDesc ensureEdge(Vertex const& src, Vertex const& dst, Edge edge) {
        auto [descriptor, isnew] =
            add_edge(ensureNode(src), ensureNode(dst), std::move(edge), graph_);
        return descriptor; // TODO maybe raise an error if !isnew?
    }

    void print(std::ostream& out) const {
        write_graphviz(out, graph_,
                    make_label_writer(get(&Vertex::name, graph_)),
                    make_label_writer(get(&Edge::name, graph_)));
    }
private:
    std::map<std::string, VertexDesc> vertexMap_;
    Graph graph_;
};

struct Node : Vertex { };
struct Arc : Edge { };

#include <iostream>
int main() {
    MyGraph g;

    Node n1{{"n1"}}, n2{{"n2"}};
    Arc  e1{{"e1"}};

    g.ensureEdge(n1, n2, e1);
    g.ensureEdge({"n3"}, {"n4"}, {"e2"});
    g.print(std::cout);
}

Prints

digraph G {
0[label=n2];
1[label=n1];
2[label=n4];
3[label=n3];
1->0 [label=e1];
3->2 [label=e2];
}

Much More Efficient Vertex Lookup?

If you really care about storage and lookup efficiency, make vertexMap_ key a string_view. This requires careful thought about the lifetime of the referred-to strings.

// use stored property, not parameter as vertexMap_ key!
vertexMap_.emplace(graph_[vd].name, vd);

For starters, you NEED a container selector with reference stabiility for the vertex container (e.g. listS).

Also, consider making the container a flat_map so storage and lookup will be much optimized. They will effectively become vector<tuple<char const*, size_t, size_t> >:

boost::container::flat_map<std::string_view, VertexDesc> vertexMap_;

See It Live On Compiler Explorer

#include <boost/container/flat_map.hpp>
#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/graphviz.hpp>

struct Vertex { std::string name; };
struct Edge   { std::string name; };

class MyGraph {
public:
    using Graph      = boost::adjacency_list< //
        boost::setS, boost::listS, boost::directedS, Vertex, Edge>;
    using VertexDesc = Graph::vertex_descriptor;
    using EdgeDesc   = Graph::edge_descriptor;

    VertexDesc ensureNode(Vertex const& node) {
        if (auto it = vertexMap_.find(node.name); it != vertexMap_.end())
            return it->second;

        auto vd = add_vertex(node, graph_);
        // use stored property, not parameter as vertexMap_ key!
        vertexMap_.emplace(graph_[vd].name, vd);
        return vd;
    }

    EdgeDesc ensureEdge(Vertex const& src, Vertex const& dst, Edge edge) {
        auto [ed, isnew] =
            add_edge(ensureNode(src), ensureNode(dst), std::move(edge), graph_);
        return ed; // TODO maybe raise an error if !isnew?
    }

    void print(std::ostream& out) const {
        write_graphviz(out, graph_,
                    make_label_writer(get(&Vertex::name, graph_)),
                    make_label_writer(get(&Edge::name, graph_)),
                    boost::default_writer{},
                    get(&Vertex::name, graph_));
    }

private:
    boost::container::flat_map<std::string_view, VertexDesc> vertexMap_;
    Graph graph_;
};

struct Node : Vertex { };
struct Arc : Edge { };

#include <iostream>
int main() {
    MyGraph g;

    Node n1{{"n1"}}, n2{{"n2"}};
    Arc  e1{{"e1"}};

    g.ensureEdge(n1, n2, e1);
    g.ensureEdge({"n3"}, {"n4"}, {"e2"});
    g.print(std::cout);
}

Note this cheated a little by using the Vertex name as the graphviz node ID. That makes a lot of sense anyways:

digraph G {
n2[label=n2];
n1[label=n1];
n4[label=n4];
n3[label=n3];
n1->n2 [label=e1];
n3->n4 [label=e2];
}

If you insist on integral IDs - you can hack them back using the fact that vertexMap_ is now contiguous:

void print(std::ostream& out) const {
    auto node_id = boost::make_function_property_map<VertexDesc>(
        [this](VertexDesc vd) {
            return vertexMap_.find(graph_[vd].name) - vertexMap_.begin();
        });
    write_graphviz(out, graph_,
                   make_label_writer(get(&Vertex::name, graph_)),
                   make_label_writer(get(&Edge::name, graph_)),
                   boost::default_writer{}, node_id);
}

This prints (Live Link):

digraph G {
1[label=n2];
0[label=n1];
3[label=n4];
2[label=n3];
0->1 [label=e1];
2->3 [label=e2];
}

Note that doing the listS vertex container selection makes a BIG difference in resource trade-offs in the graph implementation itself, so if the vertex lookup via vertexMap_ is not your bottleneck, don't do this optimization!

More BGL facilities

As I mentioned there's some supported for labeled graphs in BGL. I'm not recommending that as I've run into my share of implementation bugs/quirks. But I would be remiss not to mention it here.

Also see How to create a named_graph with Boost Graph Library? which is a very good answer

Upvotes: 2

Related Questions