user11381501
user11381501

Reputation:

How to stream files to and from the computer in browser JavaScript?

I am working on a web app (pure HTML/Javascript, no libraries) that does byte-level processing of a file (Huffman Encoding demo). It works beautifully (you do NOT want to know how long it took to get there), but my sense of completion is bothering me just a bit because I have to load the files to and from an ArrayBuffer instead of streaming from the HDD. There's also a filesize limitation, although it would admittedly take quite a long time to compress a 4GB file (the maximum that my data structures support).

Still, in the interest of making this app work on low-resource devices, how might I stream a file from a file input box (I need multiple passes for the frequency counting, filesize detection, and actual write) and to a browser download of some sort (that's in one pass at least, thankfully)?

Here are the relevant functions that handle it right now (I apologize for the globals :P):

//Load the file
  function startProcessingFile(){ //Loads the file and sets up a callback to start the main process when done.
    var ff=document.getElementById("file");//I am assuming that you don't need to see the HTML here. :D
    if (ff.files.length === 0) {
      displayError("No file selected");
    }
    else{
      displayStatus("Loading File...");
      var fr = new FileReader;
      fr.onload=function () {inp = new DataView(fr.result); boot();}
      fr.onerror=function () {displayError(fr.error)};
      fr.readAsArrayBuffer(ff.files[0]);
    }
  }

//A bit later on -- one of the functions that reads the data from the input file
function countTypes(c){ //counts the frequencies. c is # bytes processed.
  if (die){
    die=false;
    return;
  }
  var i=Math.ceil(inputSize/100.0);
  while (c<inputSize && i>0){
    var d=inp.getUint8(c);
    frequencies[d]=frequencies[d]+1;
    i--;
    c++;//Accidental, but funny.
  }
  var perc=100.0*c/inputSize;
  updateProgress(perc);
  if (c<inputSize){
    setTimeout(function () {countTypes(c);}, 0);
  }
  else{
    updateProgress(100);
    system_state++;
    taskHandle();
  }
}

//Here's where the file is read the last time and also where the bits come from that I want to save. If I could stream the data directly I could probably even get rid of the dry-run stage I currently need to count how many bytes to allocate for the output ArrayBuffer. I mean, Google Drive can download files without telling the browser the size, just whether it's done yet or not, so I'd assume that's a feature I could access here too. I'm just not sure how you actually gain access to a download from JS in the first place.
function encode(c,d){ //performs the Huffman encoding. 
//If d is true, does not actually write. c is # of bits processed so far.
  if (die){
    die=false;
    return;
  }
  var i=Math.ceil(inputSize/250.0);
  while (c<inputSize && i>0){
    var b=inp.getUint8(c);
    var seq;
    for (var j=0; j<table.length; j++){
      if (table[j].value===b){
        seq=table[j].code
      }
    }
    for (var j=0; j<seq.length; j++){
      writeBit(seq[j],d);
    }
    i--;
    c++;//Accidental, but funny.
  }
  var perc=100.0*c/inputSize;
  updateProgress(perc);
  if (c<inputSize){
    setTimeout(function () {encode(c,d);}, 0);
  }
  else{
    updateProgress(100);
    system_state++;
    taskHandle();
  }
}

//Finally, bit-level access for unaligned read/write so I can actually take advantage of the variable word size of the Huffman encoding (the read is used for decoding).
function readBit(){ //reads one bit (b) from the ArrayBuffer/DataView. The offset of 4 is for the filesize int.
  var data_byte=inp.getUint8(byte_index+4);
  var res=data_byte>>>bit_index;
  bit_index+=1;
  if (bit_index>7){
    bit_index=0;
    byte_index++;
  }
  return (res&1);
}

function writeBit(b,d){ //writes one bit (b) to the output Arraybuffer/Dataview. If d is true, does not actually write.
  if (d===false){ //i.e. not dry-run mode
    var bitmask=0xff;
    var flag=1<<bit_index;
    bitmask=bitmask^flag;
    current_byte=current_byte&bitmask;
    current_byte=current_byte|(b<<bit_index);
    output.setUint8(byte_index+4, current_byte);
  }
  bit_index+=1;
  if (bit_index>7){
    bit_index=0;
    byte_index++;
  }
}

function readByte(){ //reads a byte using readBit. Unaligned.
  var b=0;
  for (var i=0; i<8; i++){
    var t=readBit();
    b=b|(t<<i);
  }
  return b;
}

function writeByte(b,d){ //writes a byte using writeByte. Unaligned.
  for (var i=0; i<8; i++){
    var res=b>>>i;
    writeBit((res&1),d); 
  }
}

//And finally the download mechanism I'm using.
function downloadResult(){//download processed file with specified extension
  var blobObject = new Blob([output], {type: 'application/octet-stream'});
  var n=source_name.split('\\').pop().split('/').pop();
  if (doEncode){
    n=n+fext
  }else{
    n=n.replace(fext,"");
  }
  var a = document.createElement("a");
  a.setAttribute("href", URL.createObjectURL(blobObject));
  a.setAttribute("download", n);
  a.click();
  delete a;
  running=false;
  var b=document.getElementById("ac");
  if (b.classList.contains("activeNav")){
    clearRes();
  }
}

I basically want to rip most of that out and replace it with something that can read bytes or medium-ish chunks of data out of the file that the user selects, and then when it gets to the actual output stage, trickle that data byte-by-byte through a more-or-less vanilla download to their download folder.

I do know that multiple files can be selected in a file input box, so perhaps if it's possible to download to a subfolder I could work out how to make an in-browser file archiver for the heck of it. Wouldn't that be fun! ...Mind, I'm fairly sure it's not possible (I don't see why you shouldn't be able to create a subdirectory in the browser downloads folder from the webpage, but there's probably a security reason).

Let me know if you need to see more code, but as this is a class project I don't want to get accused of plagiarizing my own app...

Upvotes: 5

Views: 9748

Answers (2)

Kaiido
Kaiido

Reputation: 137006

To read from the disk as a stream

you can use the Blob.stream() method which returns a ReadableStream from that Blob (or File).

inp.onchange = async (evt) => {
  const stream = inp.files[ 0 ].stream();
  const reader = stream.getReader();
  while( true ) {
    const { done, value } = await reader.read();
    if( done ) { break; }
    handleChunk( value );
  }
  console.log( "all done" );
};

function handleChunk( buf ) {
  console.log( "received a new buffer", buf.byteLength );
}
<input type="file" id="inp">

For older browsers that don't support this method, you can still read the File by chunks only using its .slice() method:

inp.onchange = async (evt) => {
  const file = inp.files[ 0 ];
  const chunksize = 64 * 1024;
  let offset = 0;
  while( offset < file.size ) {
    const chunkfile = await file.slice( offset, offset + chunksize );
    // Blob.arrayBuffer() can be polyfilled with a FileReader
    const chunk = await chunkfile.arrayBuffer();
    handleChunk( chunk );
    offset += chunksize;
  }
  console.log( "all done" );
};

function handleChunk( buf ) {
  console.log( "received a new buffer", buf.byteLength );
}
<input type="file" id="inp">


Writing to disk as stream however is a bit harder.

There is a great hack by Jimmy Wärting called StreamSaver.js which uses Service Workers. I'm not sure how far its browser support goes by though, and while awesome, it's still an "hack" and requires a Service Worker to run.

An easier way to do so is to use the being defined File System API which is currently only available in Chrome. You can see this Q/A for a code example.

Upvotes: 9

Transformer
Transformer

Reputation: 7439

There is a streams API now already supported from the modern browsers in Javascript

Mozilla Streams MDN with samples

// setup your stream with the options, it will help handle the size limitations etc.
var readableStream = new ReadableStream(underlyingSource[, queuingStrategy]);

fetch("https://www.example.org/").then((response) => {
  const reader = response.body.getReader();
  const stream = new ReadableStream({
    start(controller) {
      // The following function handles each data chunk
      function push() {
        // "done" is a Boolean and value a "Uint8Array"
        reader.read().then(({ done, value }) => {
          // Is there no more data to read?
          if (done) {
            // Tell the browser that we have finished sending data
            controller.close();
            return;
          }

          // Get the data and send it to the browser via the controller
          controller.enqueue(value);
          push();
        });
      };
      
      push();
    }
  });

  return new Response(stream, { headers: { "Content-Type": "text/html" } });
});

Upvotes: 1

Related Questions