Gundam Meister
Gundam Meister

Reputation: 1495

Calculate MD5 hash of a large file using javascript

How do you upload a 500mb file and get a MD5 hash with CryptoJS?

Here is my code:

$('#upload-file').change(function(){
    var reader = new FileReader();
    reader.addEventListener('load',function () {
        var hash = CryptoJS.MD5(CryptoJS.enc.Latin1.parse(this.result));
        window.md5 = hash.toString(CryptoJS.enc.Hex);
    });

    reader.readAsBinaryString(this.files[0]);
});

If the file is under 200mb, it works. Anything bigger, this.result is an empty "".

I've tried:

filereader api on big files

javascript FileReader - parsing long file in chunks

and almost got this to work , but console is complaining about .join("")

http://dojo4.com/blog/processing-huge-files-with-an-html5-file-input

Upvotes: 2

Views: 10782

Answers (3)

Pankaj
Pankaj

Reputation: 71

Modified @Tamas answer for doing file hash, added async await and some checks. Cheers!

export default async function getFileHash(file,
  {
    CryptoJS = null,
    cbProgress = null,
    algo = "SHA256",
    encoding = "Base64url",
    chunkSize = 1 * 1024 * 1024
  } = {}) {

  if (!CryptoJS) {
    return Promise.reject("crypto-js module not provided")
  }
  if (!CryptoJS.algo.hasOwnProperty(algo)) {
    return Promise.reject("Given hash algo not supported")
  }
  if (!["Base64", "Base64url", "Hex"].includes(encoding)) {
    return Promise.reject(
      `Given hash encoding not supported. Supported encodings are "Base64", "Base64url", "Hex"`
    )
  }

  return new Promise((resolve, reject) => {
    var hashObj = CryptoJS.algo[algo].create()
    var fileSize = file.size;
    var offset = 0;

    var reader = new FileReader();
    reader.onload = function () {
      if (reader.error) {
        return reject(reader.error);
      }
      hashObj.update(CryptoJS.enc.Latin1.parse(reader.result));

      offset += reader.result.length;
      if (cbProgress) {
        cbProgress(offset / fileSize);
      }

      if (offset >= fileSize) {
        var hash = hashObj.finalize();
        var hashHex = hash.toString(CryptoJS.enc[encoding]);
        return resolve(hashHex);
      }
      readNext();
    };

    reader.onerror = function (err) {
      reject(err);
    };

    function readNext() {
      var fileSlice = file.slice(offset, offset + chunkSize);
      reader.readAsBinaryString(fileSlice);
    }
    readNext();
  });
}

Upvotes: 0

Tamas Hegedus
Tamas Hegedus

Reputation: 29936

CryptoJS has a progressive api for hash digests. The rest is taken form alediaferia's answer with slight modifications.

function process() {
  getMD5(
    document.getElementById("my-file-input").files[0],
    prog => console.log("Progress: " + prog)
  ).then(
    res => console.log(res),
    err => console.error(err)
  );
}

function readChunked(file, chunkCallback, endCallback) {
  var fileSize   = file.size;
  var chunkSize  = 4 * 1024 * 1024; // 4MB
  var offset     = 0;
  
  var reader = new FileReader();
  reader.onload = function() {
    if (reader.error) {
      endCallback(reader.error || {});
      return;
    }
    offset += reader.result.length;
    // callback for handling read chunk
    // TODO: handle errors
    chunkCallback(reader.result, offset, fileSize); 
    if (offset >= fileSize) {
      endCallback(null);
      return;
    }
    readNext();
  };

  reader.onerror = function(err) {
    endCallback(err || {});
  };

  function readNext() {
    var fileSlice = file.slice(offset, offset + chunkSize);
    reader.readAsBinaryString(fileSlice);
  }
  readNext();
}

function getMD5(blob, cbProgress) {
  return new Promise((resolve, reject) => {
    var md5 = CryptoJS.algo.MD5.create();
    readChunked(blob, (chunk, offs, total) => {
      md5.update(CryptoJS.enc.Latin1.parse(chunk));
      if (cbProgress) {
        cbProgress(offs / total);
      }
    }, err => {
      if (err) {
        reject(err);
      } else {
        // TODO: Handle errors
        var hash = md5.finalize();
        var hashHex = hash.toString(CryptoJS.enc.Hex);
        resolve(hashHex);
      }
    });
  });
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/components/core.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/components/md5.js"></script>
<input id="my-file-input" type="file">
<button onclick="process()">Process</button>

Upvotes: 15

jcaron
jcaron

Reputation: 17720

You don't need to read the whole file at once and feed it all in one go to CryptoJS routines.

You can create the hasher object, and feed chunks as you read them, and then get the final result.

Sample taken from the CryptoJS documentation

var sha256 = CryptoJS.algo.SHA256.create();
sha256.update("Message Part 1");
sha256.update("Message Part 2");
sha256.update("Message Part 3");
var hash = sha256.finalize();

Upvotes: 2

Related Questions