Reputation: 570
I am building a page which is using PDF.js to load and render a pdf as the following code.
var url = '/path-to-pdf.js';
PDFJS.workerSrc = "./js/external/pdf.worker.js";
PDFJS.getDocument(url).then(function getPdfHelloWorld(pdf) {
var pageNumber = 1;
renderPage($(".center-info")[0], pdf, 1, function pageRenderingComplete() {
if (pageNumber > pdf.numPages) {
return; // All pages rendered
}
// Continue rendering of the next page
renderPage($("display-div")[0], pdf, ++pageNumber, pageRenderingComplete);
});
});
I would like to make client-side download, which means I have to access the raw PDF directly. Is it possible to do that here?
Upvotes: 2
Views: 4793
Reputation: 3184
async function extract(input) {
const pdf = await pdfJS.getDocument(input);
const elements = [];
for (let pageNumber = 1; pageNumber <= pdf.numPages; pageNumber++) {
const page = await pdf.getPage(pageNumber);
const textContent = await page.getTextContent({
normalizeWhitespace: true,
disableCombineTextItems: false,
});
textContent.items.forEach(item => {
elements.push(item);
});
}
return elements;
}
Upvotes: -1
Reputation: 570
I just got the answer. We can access the data by getData() method.
PDFJS.getDocument(url).then(function getPdfHelloWorld(pdf) {
pdf.getData().then(function(arrayBuffer) {
var pdfraw = String.fromCharCode.apply(null, arrayBuffer);
// Operation your raw pdf here...
});
Cheers
Upvotes: 6