Write binary file to disk super fast in MEX

Question

I need to write a large array of data to disk as fast as possible. From MATLAB I can do that with fwrite:

function writeBinaryFileMatlab(data)
    fid = fopen('file_matlab.bin', 'w');
    fwrite(fid, data, class(data));
    fclose(fid);
end

Now I have to do the same, but from a MEX file called by MATLAB. So I setup a MEX function that can write to file using either fstream or fopen (Inspired by the results of this SO post). This is however much slower than calling fwrite from MATLAB, as you can see below. Why is this the case, and what can I do to increase my write speed from the MEX function.

#include "mex.h"
#include 
#include 
#include 

using namespace std;

void writeBinFile(int16_t *data, size_t size)
{
    FILE *fID;
    fID = fopen("file_fopen.bin", "wb");
    fwrite(data, sizeof(int16_t), size, fID);
    fclose(fID);
}

void writeBinFileFast(int16_t *data, size_t size)
{
    ofstream file("file_ostream.bin", std::ios::out | std::ios::binary);
    file.write((char *)&data[0], size * sizeof(int16_t));
    file.close();
}

void mexFunction(int nlhs, mxArray *plhs[],
                 int nrhs, const mxArray *prhs[])
{
    const mxArray *mxPtr = prhs[0];
    size_t nelems = mxGetNumberOfElements(mxPtr);
    int16_t *ptr = (int16_t *)mxGetData(mxPtr);
#ifdef USE_OFSTREAM
    writeBinFileFast(ptr, nelems);
#else
    writeBinFile(ptr, nelems);
#endif
}

Then I check the performance using the following script:

mex -R2018a -Iinclude CXXFLAGS="$CXXFLAGS -O3" -DUSE_OFSTREAM main.cpp -output writefast_ofstream
mex -R2018a -Iinclude CXXFLAGS="$CXXFLAGS -O3" main.cpp -output writefast_fwrite

for k = 1:10
    sizeBytes = 2^k * 1024 * 1024;
    fprintf('Generating data of size %i MB
', sizeBytes / 2^20)
    M = sizeBytes / 2; % 2 bytes for an int16
    sizeMB(k) = sizeBytes / 2^20;
    data = int16(rand(M, 1) * 100);

    fprintf('TESTING: write matlab
')
    t_matlab(k) = timeit(@() writeBinaryFileMatlab(data));

    fprintf('TESTING: write ofstream
')
    t_ofstream(k) = timeit(@() writefast_ofstream(data), 0);

    fprintf('TESTING: write fwrite
')
    t_fwrite(k) = timeit(@() writefast_fwrite(data), 0);
end

% and plot result
figure(14); clf;
plot((sizeMB), t_matlab)
hold on
plot((sizeMB), t_ofstream)
plot((sizeMB), t_fwrite)
legend('Matlab', 'ofstream', 'fwrite')
xticks(sizeMB)

Which gives me the plot below. Why is calling fwrite from MATLAB so much faster than doing it from MEX? How can I reach the same speed in my MEX function?

I am using Windows 10. Laptop with Core i7, SSD.

UPDATE

I have tried various suggestions in the comments, but still do not reach MATLAB's fwrite performance. See the repo with the source code here: https://github.com/rick3rt/saveBinaryDataMex

This is the result with MSVC 2017, incorporating the suggestion of rahnema1:

UPDATE 2

Wow I finally got something that's faster than MATLAB! Rahnema1's answer did the trick :) Here the figures with all suggested methods combined (complete src can be found on Github).

rahnema1 · Accepted Answer

As indicated in some posts very large buffers tend to decrease performance. So the buffer is written to the file part by part. For me 8 MiB gives the best performance.

void writeBinFilePartByPart(int16_t *int_data, size_t size)
{        
  size_t part = 8 * 1024 * 1024;

  size = size * sizeof(int16_t);
  
  char *data = reinterpret_cast (int_data);

  HANDLE file = CreateFileA (
    "windows_test.bin", 
    GENERIC_WRITE, 
    0, 
    NULL,
    CREATE_ALWAYS, 
    FILE_FLAG_SEQUENTIAL_SCAN, 
    NULL);
  
  // Expand file size
  SetFilePointer (file, size, NULL, FILE_BEGIN);
  SetEndOfFile (file);
  SetFilePointer (file, 0, NULL, FILE_BEGIN);

  DWORD written;
  if (size < part)
    {
      WriteFile (file, data, size, &written, NULL);  
      CloseHandle (file);
      return;
    }

  size_t rem = size % part;
  for (size_t i = 0; i < size-rem; i += part)
    {
      WriteFile (file, data+i, part, &written, NULL);
    }

  if (rem)
    WriteFile (file, data+size-rem, rem, &written, NULL);
  
  CloseHandle (file);
}

The output is compared to C++ Std lib method that is mentioned by @Cris Luengo :

Write binary file to disk super fast in MEX

Answers (2)

Related Questions