Abhishek Solanki
Abhishek Solanki

Reputation: 43

Show Searched text of a pdf along with page number

I was looking for a solution where i have a PDF file and i want to search a particular text in that file and the result of that searched text should come in a list format along with its page number. I looked for online solution but was unable to find the perfect and proper solution to it...

Although there is same type of feature available in adobe reader which is called as "comments" where user can view all the searched items in a list format along with its page numbers.

Your answer would be really helpful for me and if possible please provide example too..

Thank you in advance.

Upvotes: 2

Views: 11753

Answers (1)

async5
async5

Reputation: 2691

Here is the example that might help you to display found text grouped per page using PDF.js.

var searchText = "JavaScript";
function searchPage(doc, pageNumber) {
  return doc.getPage(pageNumber).then(function (page) {
    return page.getTextContent();
  }).then(function (content) {
    // Search combined text content using regular expression
    var text = content.items.map(function (i) { return i.str; }).join('');
    var re = new RegExp("(.{0,20})" + searchText + "(.{0,20})", "gi"), m;
    var lines = [];
    while (m = re.exec(text)) {
      var line = (m[1] ? "..." : "") + m[0] + (m[2] ? "..." : "");
      lines.push(line);
    }
    return {page: pageNumber, items: lines};
  });
}

var loading = PDFJS.getDocument("//cdn.mozilla.net/pdfjs/tracemonkey.pdf");
loading.promise.then(function (doc) {
  var results = [];
  for (var i = 1; i <= doc.numPages; i++)
    results.push(searchPage(doc, i));
  return Promise.all(results);
}).then(function (searchResults) {
  // Display results using divs
  searchResults.forEach(function (result) {
    var div = document.createElement('div'); div.className="pr"; document.body.appendChild(div);
    div.textContent = 'Page ' + result.page + ':';
    result.items.forEach(function (s) {
      var div2 = document.createElement('div'); div2.className="prl"; div.appendChild(div2);
      div2.textContent = s; 
    });
  });
}).catch(console.error);
.pr { font-family: sans-serif; font-weight: bold; }
.prl { font-style: italic; font-weight: normal; }
<script src="//npmcdn.com/pdfjs-dist/build/pdf.js"></script>

Upvotes: 6

Related Questions