David J.
David J.

Reputation: 1913

JSZip Changes Contents of Files (pdf) While Decompressing

I'm using the FileReader api to load the contents of a file input into the browser's memory. This jsfiddle includes a working example: https://jsfiddle.net/qyh1s60d/

I turn the result into a Uint8Array and display it in a div:

<label for="file-pdf">select a pdf:</label>
<input type="file" id="file-pdf" name="files[]" />
<div id="result-pdf">
</div>

JS

document.getElementById('file-pdf').addEventListener('change', handlePdfSelect, false);
function handlePdfSelect(evt) {
    var files = evt.target.files; // FileList object
    var reader = new FileReader();
    reader.onload = (e)=>{
       var res = new Uint8Array(e.target.result);
       resultPdf.innerHTML = res.slice(0, 5)
    }

    reader.readAsArrayBuffer(files[0]);

  }

So far so good. The div displays an array of numbers as expected.

Next I want to use JSZip to get the same pdf located inside of a zip file. Here's the html:

label for="file-zip">select a zip:</label>
<input type="file" id="file-zip" name="files[]" />
<div id="result-zip">
</div>

And JS

document.getElementById('file-zip').addEventListener('change', handleZipSelect, false); 

function handleZipSelect(evt) {
    var files = evt.target.files; // FileList object
    var reader = new FileReader();
    reader.onload = (e)=>{
       var res = new Uint8Array(e.target.result);
       JSZip.loadAsync(res).then(function(data){
       var pdfs = []       
        Object.keys(data['files']).forEach(f => {
                         if(/\.pdf$/.test(f)){
                             pdfs.push(data['files'][f])
                         }
        })
        var pdfcontents = pdfs[0]['_data']['compressedContent'].slice(0, 5)
        resultZip.innerHTML = pdfcontents;

       })

    }

    reader.readAsArrayBuffer(files[0]);


  }

Even though I use the same pdf in these two file uploads, I get different Uint8Arrays displayed in the browser. Can anyone tell me why these are different, and how I can get the pdf data taken from the zip file to look the same as when it is directly uploaded as pdf?

Upvotes: 0

Views: 549

Answers (1)

Mohamed Sa&#39;ed
Mohamed Sa&#39;ed

Reputation: 801

Yes the two files are different from each other and they give different Uint8Arrays, that's because the first one is the original pdf file and the other one is the same pdf file but in his compressed state.

So if you need to extract the content of the pdf file you need an extra line of code to extract the Content by using: async()

async(type[, onUpdate])

Return a Promise of the content in the asked type.

Possible values for type :

  • base64 : the result will be a string, the binary in a base64 form.
  • text (or string): the result will be an unicode string.
  • binarystring: the result will be a string in “binary” form, using 1 byte per char (2 bytes).
  • array: the result will be an Array of bytes (numbers between 0 and 255).
  • uint8array : the result will be a Uint8Array. This requires a compatible browser.
  • arraybuffer : the result will be a ArrayBuffer. This requires a compatible browser.
  • blob : the result will be a Blob. This requires a compatible browser.
  • nodebuffer : the result will be a nodejs Buffer. This requires nodejs.

Here the documentation on GitHub

And here How to read a file using jszip

The working example : https://jsfiddle.net/e2Lxyrpn/

var resultPdf = document.getElementById('result-pdf');
var resultZip = document.getElementById('result-zip');
document.getElementById('file-pdf').addEventListener('change', handlePdfSelect, false);
function handlePdfSelect(evt) {
    var files = evt.target.files; // FileList object
    var reader = new FileReader();
    reader.onload = (e)=>{
       var res = new Uint8Array(e.target.result);
       
       resultPdf.innerHTML = res.slice(0, 5)
    }

    reader.readAsArrayBuffer(files[0]);
  
  }
  
document.getElementById('file-zip').addEventListener('change', handleZipSelect, false); 

function handleZipSelect(evt) {
    var files = evt.target.files; // FileList object
    var reader = new FileReader();
    reader.onload = (e)=>{
       var res = new Uint8Array(e.target.result);
       JSZip.loadAsync(res).then(function(data){
       var pdfs = []       
        Object.keys(data['files']).forEach(f => {
                         if(/\.pdf$/.test(f)){
                             pdfs.push(data['files'][f])
                         }
        });
        //>>>>>>>>------read the file Content --------
        pdfs[0].async('Uint8Array').then(function (fileData) {
							var allstring = fileData.slice(0, 5);
       						resultZip.innerHTML = allstring;
						});
        //--------------------------------------------
       
       })

    }

    reader.readAsArrayBuffer(files[0]);
    
  
  }
  
  console.log(JSZip)
  
<script src="https://cdnjs.cloudflare.com/ajax/libs/jszip/3.2.2/jszip.min.js"></script>
<label for="file-pdf">select a pdf:</label>
<input type="file" id="file-pdf" name="files[]" />
<div id="result-pdf">
</div>
<br>
<br>
<br>
<br>
<label for="file-zip">select a zip containing the same pdf:</label>
<input type="file" id="file-zip" name="files[]" />
<div id="result-zip">
</div>

Upvotes: 1

Related Questions