Reputation: 7225
We are doing a project on high performance computing, which using MPI as parallel computing framework. There are just a few algorithms already implemented on legacy platform. What we do is rewriten the original serial algorithm to parallel version based on MPI.
I encounter this performance problem: When running parallel algorithm based on MPI, there are a lot of comunication overhead between multiple process. The inter-process comunication is consist of three steps:
We found these comunication steps, especially serialize/deserialize steps, cost huge amount of time. How could we hand this performance issue?
By the Way, in our C++ code, We use a lot of STL, which is more complex then C-like struct.
P.S. I am doing this(serialization) now by written code traversing all fields of the objects and copy them sequentially into a byte array.
To demonstrate what I doing, there is a code snippet. Note that this is just a single feature construction process:
sic::GeometryFeature *ptFeature =
(GeometryFeature *) outLayer->getFeature(iFeature);
sic::Geometry* geom = ptFeature->getGeometry();
std::string geomClassName = geom->getClassName();
sic::Geometry* ptGeom = geom;
unsigned char *wkbBuffer = NULL;
OGRGeometry * gtGeom = NULL;
if (geomClassName == "Point") {
ptGeom = new sic::MultiPoint();
((sic::MultiPoint *) ptGeom)->insert(geom);
gtGeom = new OGRMultiPoint();
int wkbSize = ((sic::MultiPoint *) ptGeom)->WkbSize();
wkbBuffer = (unsigned char *) malloc(wkbSize);
((sic::GeometryCollection *) ptGeom)->exportToWkb(sic::wkbNDR,
wkbBuffer, wkbMultiPoint);
}
} else if (...) {
......
}
gtGeom->importFromWkb(wkbBuffer);
free(wkbBuffer);
assert(gtGeom);
OGRFeature * poFeature = OGRFeature::CreateFeature(
poLayer->GetLayerDefn());
poFeature->SetGeometry(gtGeom);
And more about What I am doing serializing objects:
unsigned char *bytes = (unsigned char *) malloc(size);
size_t offset = 0;
size_t type_size = sizeof(OGRwkbGeometryType);
OGRwkbGeometryType type = layer->GetGeomType();
memcpy(bytes + offset, &type, type_size);
offset += type_size;
size_t count_size = sizeof(int);
int count = layer->GetFeatureCount();
memcpy(bytes + offset, &count, count_size);
offset += count_size;
layer->ResetReading();
for (OGRFeature *feature = layer->GetNextFeature(); feature != NULL;
feature = layer->GetNextFeature()) {
OGRGeometry *geometry = feature->GetGeometryRef();
if (geometry) {
geometry->exportToWkb(wkbNDR, bytes + offset);
offset += geometry->WkbSize();
} else {
(*(int *) (bytes + type_size))--;
}
OGRFeature::DestroyFeature(feature);
}
return bytes;
Any Comment will be appreciated. Thanks!
Upvotes: 0
Views: 2429
Reputation: 106068
(Brian's answer's offering to help you use a library... he's a very experienced programmer - sounds like it could be worth a go.)
Separately, I looked at your code - there's lots of temporary buffers, new/malloc allocation, use of sizeof
etc.. so I thought I'd illustrate a "quick, simple but nice" approach to cleaning that up - enough to hopefully get you started...
First create a binary stream type that factors and hides a lot of the low-level work:
#include <arpa/inet.h> // for htonl/s, ntoh/s
#include <endian.h> // for htonbe64, if you have it...
#include <iostream>
#include <string>
#include <map>
// support routines - use C++ overloading to polymorphically dispatch htonl/s
// uint64_t hton(uint64_t n) { return htonbe64(n); }
uint32_t hton(uint32_t n) { return htonl(n); }
uint16_t hton(uint16_t n) { return htons(n); }
// there are no "int" versions - this is ugly but effective...
uint32_t hton(int32_t n) { return htonl(n); }
uint16_t hton(int16_t n) { return htons(n); }
// uint64_t ntoh(uint64_t n) { return betoh64(n); }
uint32_t ntoh(uint32_t n) { return ntohl(n); }
uint16_t ntoh(uint16_t n) { return ntohl(n); }
template <typename OStream>
class Binary_OStream : public OStream
{
public:
typedef Binary_OStream This;
This& write(const char* s, std::streamsize n)
{
OStream::write(s, n);
return *this;
}
template <typename T>
This& rawwrite(const T& t)
{
static_cast<OStream&>(*this) << '[' << sizeof t << ']';
return write((const char*)&t, sizeof t);
}
template <typename T>
This& hton(T h)
{
T n = ::hton(h);
return rawwrite(n);
}
// conversions for inbuilt & Standard-library types...
friend This& operator<<(This& bs, bool x) { return bs << (x ? 'T' : 'F'); }
friend This& operator<<(This& bs, int8_t x) { return bs << x; }
friend This& operator<<(This& bs, uint8_t x) { return bs << x; }
friend This& operator<<(This& bs, int16_t x) { return bs.hton(x); }
friend This& operator<<(This& bs, uint16_t x) { return bs.hton(x); }
friend This& operator<<(This& bs, int32_t x) { return bs.hton(x); }
friend This& operator<<(This& bs, uint32_t x) { return bs.hton(x); }
friend This& operator<<(This& bs, double d) { return bs.rawwrite(d); }
friend This& operator<<(This& bs, const std::string& x)
{
bs << x.size();
return bs.write(x.data(), x.size());
}
template <typename K, typename V, typename A>
friend This& operator<<(This& bs, const std::map<K, V, A>& m)
{
typedef typename std::map<K, V, A>::const_iterator It;
bs << m.size();
for (It it = m.begin(); it != m.end(); ++it)
bs << it->first << it->second;
return bs;
}
// add any others you want...
};
Creating a user-defined binary-serialisable type...
// for your own objects...
struct Object
{
Object(const std::string& s, double x) : s_(s), x_(x) { }
std::string s_;
double x_;
// specify how you want binary serialisation performed (which fields/order etc)
template <typename T>
friend Binary_OStream<T>& operator<<(Binary_OStream<T>& os, const Object& o)
{
return os << o.s_ << o.x_;
}
};
Example usage:
#include <iomanip>
#include <sstream>
// support routines just to help you observe/debug the serialisation...
std::string printable(char c)
{
std::ostringstream oss;
if (isprint(c))
oss << c;
else
oss << "\\x" << std::hex << std::setw(2) << std::setfill('0')
<< (int)(uint8_t)c << std::dec;
return oss.str();
}
std::string printable(const std::string& s)
{
std::string result;
for (std::string::const_iterator i = s.begin(); i != s.end(); ++i)
result += printable(*i);
return result;
}
int main()
{
{
Binary_OStream<std::ostringstream> bs;
Object o("pi", 3.14);
bs << o;
std::cout << "serialised to '" << printable(bs.str()) << "'\n";
}
{
Binary_OStream<std::ostringstream> bs;
std::map<int, std::string> m;
m[0] = "zero";
m[1] = "one";
m[2] = "two";
bs << m;
std::cout << "serialised to '" << printable(bs.str()) << "'\n";
}
}
The next step is to create a Binary_IStream
- it's very, very similar to the above. (boost
reduces the work a little by using the '%' operator instead of the traditional <<
and >>
, such that the same function can specify fields for serialiation and deserialisation.)
Implementation notes/thoughts:
std::ostream&
into a private
member variable, then send all streaming operations to that data member.
Binary_Stream
to any existing stream at any time (great if someone's passing you a pre-existing stream).ostream
member functions that you want to be accessible to Binary_Stream
users (more control but tedious), or provide a (less convenient/elegant?) std::ostream& stream() { return s_; }
-style accessor.Upvotes: 1