Reputation: 2188
I'm new to Google Apps Script and I'm trying to make a script for a spreadsheet where I'll store all the email addresses found by .getFrom()
method in the sheet and ignore the same email addresses so that I get only one email address instead of multiple times. So far storing is working successfully but ignoring same emails is not working. I get same emails multiple times in my sheet's column.
Here's my code:
var n=threads.length;
var messages=thread.getMessages();
var getfrom = 0;
var allMails = [];
for (var i=0; i<n; i++)
{
for (var j=0; j<messages.length; j++)
{
var message=messages[j];
getfrom = message.getFrom();
var first_name = getfrom.substring(0, getfrom.indexOf(" "));
var last_name = getfrom.substring(getfrom.indexOf(" ")+1, getfrom.indexOf(" <"));
var email_address = 0;
if (first_name == '' && last_name == '')
{
email_address = getfrom;
} else {
email_address = getfrom.substring(getfrom.indexOf("<")+1, getfrom.indexOf(">"));
}
// This is how I check if I already have the email address or not
if (email_address == my_email || email_address[j] == email_address[j-1])
{
continue;
}
}
allMails.push([email_address]);
}
Logger.log(allMails);
sheet1.getRange(2, 3, n, 1).setValues(allMails);
Browser.msgBox("Operation complete");
How can I ignore duplicate values and get one email address instead of multiple times?
Upvotes: 0
Views: 721
Reputation: 11278
Also check the Gmail Extractor - it saves the email addresses from Gmail in a Google spreadsheet.
Upvotes: 0
Reputation: 45750
You can either ensure uniqueness before adding emails to the list, or build the full list first and remove duplicates later.
This example builds a one-dimensional array of addresses; because it's a simple array we can use the JavaScript built-in .indexOf() method to check for uniqueness. After all threads have been examined, the simple array is converted to a two-dimensional array for storage in the spreadsheet, using another Array built-in, map()
. Before that though, the array gets sorted - just because we can. You might want to do other filtering, such as removing "no-reply" addresses.
function getUniqueFromAddresses1() {
var my_email = Session.getActiveUser().getEmail();
var threads = GmailApp.getInboxThreads();
var n=threads.length;
var allMails = [];
for (var i=0; i<n; i++)
{
var thread = threads[i];
var messages=thread.getMessages();
for (var j=0; j<messages.length; j++)
{
var message=messages[j];
var getfrom = message.getFrom();
// Use RegEx to extract just email address
var email_address = getfrom.match(/[^<> ]*\@[^> ]*/)[0];
// Skip messages I sent or addresses already collected
var index = allMails.indexOf(email_address);
if (email_address !== my_email && allMails.indexOf(email_address) == -1) {
allMails.push(email_address);
}
}
}
// Could do further filtering & sorting of allEmails here
allMails = allMails.sort()
Logger.log(JSON.stringify(allMails));
// convert allMails array to two-dimensional array
allMails = allMails.map( function(item){
return [item];
});
Logger.log(JSON.stringify(allMails));
// Store in spreadsheet; use dimensions of array to avoid mismatching range size
sheet1.getRange(2, 3, allMails.length, allMails[0].length).setValues(allMails);
debugger; // Pause in debugger
Browser.msgBox("Operation complete");
}
Here's the alternate approach, removing duplicates after the array is built. The JavaScript magic here was lifted from this answer. We still use a one-dimensional array to collect and filter addresses. There's also an extra step required to remove our own address from the list.
Performance: This should be faster than approach 1, as there will be fewer comparisons required. HOWEVER, the bulk of the time used in the whole operation is tied up in accessing messages, so time savings in native JavaScript are negligible.
function getUniqueFromAddresses2() {
var my_email = Session.getActiveUser().getEmail();
var threads = GmailApp.getInboxThreads();
var n=threads.length;
var allMails = [];
for (var i=0; i<n; i++)
{
var thread = threads[i];
var messages=thread.getMessages();
for (var j=0; j<messages.length; j++)
{
var message=messages[j];
var getfrom = message.getFrom();
// Use RegEx to extract just email address
var email_address = getfrom.match(/[^<> ]*\@[^> ]*/)[0];
// Save the address
allMails.push(email_address);
// Skip messages I sent or addresses already collected
var index = allMails.indexOf(email_address);
if (email_address !== my_email && allMails.indexOf(email_address) == -1) {
allMails.push(email_address);
}
}
}
// Remove duplicates - https://stackoverflow.com/a/32533637/1677912
allMails = allMails.sort().reduce(function(a, b){ if (b != a[0]) a.unshift(b); return a }, []);
// Remove my address
if ((mine=allMails.indexOf(my_email)) > -1) allMails.splice(mine,1);
// Could do further filtering & sorting of allEmails here
allMails = allMails.sort()
Logger.log(JSON.stringify(allMails));
// convert allMails array to two-dimensional array
allMails = allMails.map( function(item){ return [item]; });
Logger.log(JSON.stringify(allMails));
sheet1.getRange(2, 3, n, 1).setValues(allMails);
debugger; // Pause in debugger
Browser.msgBox("Operation complete");
}
The original function took several steps to identify an email address in the string returned by message.getFrom()
. It's tricky, because that string can contain just an email address, or a name and an address. The operation can be simplified by using a regular expression to match just the email address, and ignore whatever other text is in the string.
// Use RegEx to extract just email address
var email_address = getfrom.match(/[^<> ]*\@[^> ]*/)[0];
The expression looks for @
and the text immediately before and after it, bordered by a space or angle braces. You can try this out in an online demo.
/[^<> ]*\@[^> ]*/
[^<> ]* match a single character not present in the list below
Quantifier: * Between zero and unlimited times, as many times as possible, giving back as needed [greedy]
<> a single character in the list "<> " literally (case sensitive)
\@ matches the character @ literally
Upvotes: 1
Reputation: 7377
You need to cross check your allMails array for a given email address to ensure it's not in the list, however you can't easily check against allMails directly because it is a two-dimensional array.
I would add a single dimensional array purely for the purpose of cross-checking.
var n=threads.length;
var messages=thread.getMessages();
var getfrom = 0;
var allMails = [];
var cross_check = [];
for (var i=0; i<n; i++)
{
for (var j=0; j<messages.length; j++)
{
var message=messages[j];
getfrom = message.getFrom();
var first_name = getfrom.substring(0, getfrom.indexOf(" "));
var last_name = getfrom.substring(getfrom.indexOf(" ")+1, getfrom.indexOf(" <"));
var email_address = 0;
if (first_name == '' && last_name == '')
{
email_address = getfrom;
} else {
email_address = getfrom.substring(getfrom.indexOf("<")+1, getfrom.indexOf(">"));
}
if(email_address != my_email && cross_check.indexOf(email_address) == -1){
cross_check.push(email_address);
allMails.push([email_address]);
}
}
}
Logger.log(allMails);
sheet1.getRange(2, 3, n, 1).setValues(allMails);
Browser.msgBox("Operation complete");
See the documentation for the indexOf function, which explains why we check against -1, here:
https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/Array/indexOf
Upvotes: 1