Reputation: 655
I have a Java library that is writing an Arrow Table to a VectorSchemaRoot
object in memory. Those serailized bytes are available to me in a std::string
object in C++. How do I de-serialize and read the data?
Java:
try (final ArrowStreamWriter arrowStreamWriter
= new ArrowStreamWriter(vectorSchemaRoot, provider, outputStream)) {
arrowStreamWriter.start();
arrowStreamWriter.writeBatch();
arrowStreamWriter.end();
return buffer.byteArray();
}
C++
std::string bytes;
???
Upvotes: 0
Views: 727
Reputation: 13902
Assuming you've writen a RecordBatch, I think you can read it back this way:
#include <arrow/api.h>
#include <arrow/ipc/writer.h>
#include <arrow/io/memory.h>
// ...
std::shared_ptr<arrow::io::BufferReader> bufferReader = std::make_shared<arrow::io::BufferReader>(bytes);
std::shared_ptr<arrow::ipc::RecordBatchStreamReader> reader = arrow::ipc::RecordBatchStreamReader::Open(bufferReader.get()).ValueOrDie();
std::shared_ptr<arrow::RecordBatch> recordBatchBack = reader->Next().ValueOrDie();
std::cout << recordBatchBack->num_rows() << std::endl;
Here's an end to end test in c++:
#include <arrow/api.h>
#include <arrow/ipc/writer.h>
#include <arrow/ipc/reader.h>
#include <arrow/io/memory.h>
BOOST_AUTO_TEST_CASE(RecordBatchStreamReaderTest) {
arrow::Int32Builder builder;
builder.Append(1);
builder.Append(2);
builder.Append(3);
auto schema = arrow::schema({arrow::field("hello", arrow::int32())});
auto structArray = arrow::StructArray::Make({builder.Finish().ValueOrDie()}, {"hello"}).ValueOrDie();
auto recordBatch = arrow::RecordBatch::FromStructArray(structArray).ValueOrDie();
auto outputStream = arrow::io::BufferOutputStream::Create().ValueOrDie();
auto writer = arrow::ipc::MakeStreamWriter(outputStream.get(), schema).ValueOrDie();
writer->WriteRecordBatch(*recordBatch);
writer->Close();
auto buffer = outputStream->Finish().ValueOrDie();
std::string bytes = buffer->ToString();
std::shared_ptr<arrow::io::BufferReader> bufferReader = std::make_shared<arrow::io::BufferReader>(bytes);
std::shared_ptr<arrow::ipc::RecordBatchStreamReader> reader = arrow::ipc::RecordBatchStreamReader::Open(bufferReader.get()).ValueOrDie();
std::shared_ptr<arrow::RecordBatch> recordBatchBack = reader->Next().ValueOrDie();
std::cout << recordBatchBack->num_rows() << std::endl;
}
Upvotes: 2