Gavis
Gavis

Reputation: 233

Extract data from gmail into a spreadsheet ignoring the first same field

With this script I am trying to parse the content from my Gmail to Google Spreadsheet and it works well with Google Apps Script.

The problem is that in my mail there are two identical fields called "Indirizzo mail:" that I need to be parsed, but I would to parse only the second.

How could I ignore the first one into the body of the mail?

--- Mail Body Example:

Indirizzo mail: [email protected]
Phone: 00000000
Website: www.test.it


Lorem Ipsum

Cognome: XXX
Nome: XXX
Codice fiscale: XXX
Indirizzo mail: [email protected]


function parseEmailMessages(start) {

start = start || 0;

var label = GmailApp.getUserLabelByName("testparser");
var threads = label.getThreads();
var sheet = SpreadsheetApp.getActiveSheet();

for (var i = 0; i < threads.length; i++) {

var tmp,
  message = threads[i].getMessages()[0],
  content = message.getPlainBody();

if (content) {

  tmp = content.match(/Cognome:\s*([A-Za-z0-9!"?`?|õüö’çëÅíšÃÉÁÇÃáéñãóú#&;()-,'@./\s\-]+)(\r?\n)/);
  var Cognome = (tmp && tmp[1]) ? tmp[1].trim() : 'Null';

  tmp = content.match(/Nome:\s*([A-Za-z0-9!"?`?|õüö’çëÅíšÃÉÁÇÃáéñãóú#&;()-,'@./\s\-]+)(\r?\n)/);
  var Nome = (tmp && tmp[1]) ? tmp[1].trim() : 'Null';

  tmp = content.match(/Codice fiscale:\s*([A-Za-z0-9!"?`?|õüö’çëÅíšÃÉÁÇÃáéñãóú#&;()-,'@./\s\-]+)(\r?\n)/);
  var CF = (tmp && tmp[1]) ? tmp[1].trim() : 'Null';

  tmp = content.match(/Indirizzo mail:\s*([A-Za-z0-9!"?`?|õüö’çëÅíšÃÉÁÇÃáéñãóú#&;()-,'@./\s\-]+)(\r?\n)/);
  var Mail = (tmp && tmp[1]) ? tmp[1].trim() : 'Null';

  sheet.appendRow([Cognome,Nome,CF,Mail]);

} // End if

  } // End for loop
}

UPDATE:
I have solved to parse all the text except this one:

tmp = content.match(/City (Lake's Camil):\s*([A-Za-z0-9!"?`?|õüö’çëÅíšÃÉÁÇÃáéñãóú#&;()-,'@./\s\-]+)(\r?\n)/);
  var ID = (tmp && tmp[1]) ? tmp[1].trim() : 'Null';

I suppose not parse and so return null value for round brackets near the word Lake's Camil. I have tried to insert "\" before "(" but without results. It is very strange, cause in regex101.com it works.

Upvotes: 0

Views: 205

Answers (2)

Daniel
Daniel

Reputation: 3725

I'm not that great at regex but this should work based on your current code:

function parseEmailMessages(start) {

start = start || 0;

var label = GmailApp.getUserLabelByName("testparser");
var threads = label.getThreads();
var sheet = SpreadsheetApp.getActiveSheet();

for (var i = 0; i < threads.length; i++) {

  var lastposition = threads[0].getId()
  var helper = sheet.getRange("H1") //set the cell where the ID will be stored
   
  if (helper.getValue()==threads[i].getId()){
    helper.setValue(lastposition)  
    break;
  }
  
  if (helper.getValue()==""){// runs only if the helper cell is empty
    helper.setValue(lastposition)
  }

var tmp,
  message = threads[i].getMessages()[0],
  content = message.getPlainBody();

if (content) {

  tmp = content.match(/Cognome:\s*([A-Za-z0-9!"?`?|õüö’çëÅíšÃÉÁÇÃáéñãóú#&;()-,'@./\s\-]+)(\r?\n)/);
  var Cognome = (tmp && tmp[1]) ? tmp[1].trim() : 'Null';

  tmp = content.match(/Nome:\s*([A-Za-z0-9!"?`?|õüö’çëÅíšÃÉÁÇÃáéñãóú#&;()-,'@./\s\-]+)(\r?\n)/);
  var Nome = (tmp && tmp[1]) ? tmp[1].trim() : 'Null';

  tmp = content.match(/Codice fiscale:\s*([A-Za-z0-9!"?`?|õüö’çëÅíšÃÉÁÇÃáéñãóú#&;()-,'@./\s\-]+)(\r?\n)/);
  var CF = (tmp && tmp[1]) ? tmp[1].trim() : 'Null';

  tmp = [...content.matchAll(/(?<=Indirizzo mail: ).*?(?= |\\|\r|\n)/g)];
  var Mail = (tmp && tmp[1][0]) ? tmp[1][0].trim() : 'Null';

  sheet.appendRow([Cognome,Nome,CF,Mail]);

} // End if

  } // End for loop
}

Explanation: I modified your regex to match all instances of the email and used matchAll() instead of match() to get multiple results. This results in a multidimensional array so tmp[0][1] holds the "bad" email and tmp[1][1] holds the "good" one.

Upvotes: 1

Cooper
Cooper

Reputation: 64062

I'd try something like this:

function parseEmailMessages(start = 0) {
  const ss = SpreadsheetApp.getActive();
  const sh = ss.getActiveSheet();
  const label = GmailApp.getUserLabelByName("testparser");
  const threads = label.getThreads();
  for (let i = 0; i < threads.length; i++) {
    let message = threads[i].getMessages()[0];
    let content = message.getPlainBody();
    if (content) {
      let obj = { pA: [] };
      content.split('\n').filter(l => ~l.indexOf(':')).forEach(l => {
        let lt = l.trim();
        let t = lt.split(':');
        if (t.length == 2) {
          let a = t[0].trim();
          let b = t[1].trim();
          if (!obj.hasOwnProperty(a)) {
            obj[a] = [b];
            obj.pA.push(a);
          } else {
            obj[a].push(b);
          }
        }
      });
      const Cognome = obj['Cognome'];
      const Nome = obj['Nome'];
      const CF = obj['Codice Fiscale']
      const Mail = obj['Indirizzo mail'][1]
      if (Cogname && Nome && CF && Mail) {
        sh.appendRow([Cognome, Nome, CF, Mail]);
      }
    }
  }
}

Upvotes: 1

Related Questions