Mehdi Abbas
Mehdi Abbas

Reputation: 105

Google App Script find text from Google Document next to key text

I have a PDF file saved in Google Drive, I want to find a text from that file i.e USD then pick the value next to found text i.e: 167.1764, and insert it in my google spreadsheet.

Below is the preview of my PDF File. Link to my PDF File. enter image description here

Here is the code below which I tried but failed to find the text and reached to that value which is next to it.

below is my code.

function extractTextFromPDF() {

  var drive = DriveApp;
  var folders = drive.getFolderById('folderid');
  var newfile = folders.getFilesByName('08-Sep-2021.pdf');
  if(newfile.hasNext()){
    var file1 = newfile.next().getBlob();
  }
  
  var blob = file1;
  var resource = {
    title: blob.getName(),
    mimeType: blob.getContentType()
  };

  // Enable the Advanced Drive API Service
  var file = Drive.Files.insert(resource, blob, {ocr: true, ocrLanguage: "en"});

  // Extract Text from PDF file
  var doc = DocumentApp.openById(file.id);
  var text = doc.getBody().getText();
  Logger.log(text);
  //DriveApp.getFileById(file.id).setTrashed(true);
  var body = doc.getBody();
  var foundElement = body.findText("(USD)");

while (foundElement != null) {
    // Get the text object from the element
    var foundText = foundElement.getElement().asText();

    // Where in the element is the found text?
    var start = foundElement.getStartOffset();
    var end = foundElement.getEndOffsetInclusive();
}
    // i want the value of USD i.e 167.1144 in log
    Logger.log(foundText);
  
  
}

Upvotes: 0

Views: 1258

Answers (1)

RemcoE33
RemcoE33

Reputation: 1610

With the help of RegEx you can extract this. I'm not the best with those patterns. But maybe somebody else can optimize so the split is not necessary. (here is a link).

The code:

function extractTextFromPDF() {
  const folders = DriveApp.getFolderById('1QVo_pxxx387WPH9Yx');
  const newfile = folders.getFilesByName('08-Sep-2021.pdf');
  if(newfile.hasNext()){
    var file1 = newfile.next().getBlob();
  }
  
  const blob = file1;
  const resource = {
    title: blob.getName(),
    mimeType: blob.getContentType()
  };

  // Enable the Advanced Drive API Service
  const file = Drive.Files.insert(resource, blob, {convert: true});

  // Extract Text from PDF file
  const doc = DocumentApp.openById(file.id);
  const text = doc.getBody().getText();
  Logger.log(text);
  const buying = /USD\n(.*?)$/gm.exec(text)[1].trim();
  const selling = /USD\n\s*\S*\n(.*?)$/gm.exec(text)[1].trim();
  
  console.log(buying) 
  console.log(selling)

  //Remove the converted file.
  DriveApp.getFileById(file.id).setTrashed(true);

}

Upvotes: 3

Related Questions