Reputation: 459
I am using the below script to delete duplicate rows from the google spreadsheet. The script was working good but as the data in the spreadsheet is being added daily, now the script is throwing "Exceeded maximum execution time" error. As I am new to scripting I don't understand what is my problem.
Could someone help me in solving this problem of mine.
function Deleteduplicates() {
var SpreadSheetKey = "My key";
var sheetD = SpreadsheetApp.openById(SpreadSheetKey).getSheetByName("Daily");
var sheetW = SpreadsheetApp.openById(SpreadSheetKey).getSheetByName("Weekly");
var dataD = sheetD.getDataRange().getValues();
var dataW = sheetW.getDataRange().getValues();
//Daily
var newDataD = new Array();
for(i in dataD){
var row = dataD[i];
var duplicate = false;
for(j in newDataD){
if(row.join() == newDataD[j].join()){
duplicate = true;
}
}
if(!duplicate){
newDataD.push(row);
}
}
//weekly
var newDataW = new Array();
for(i in dataW){
var row = dataW[i];
var duplicate = false;
for(j in newDataW){
if(row.join() == newDataW[j].join()){
duplicate = true;
}
}
if(!duplicate){
newDataW.push(row);
}
}
sheetD.clearContents();
sheetW.clearContents();
sheetD.getRange(1, 1, newDataD.length, newDataD[0].length).setValues(newDataD);
sheetW.getRange(1, 1, newDataW.length, newDataW[0].length).setValues(newDataW);
}
Upvotes: 4
Views: 4142
Reputation: 3728
Conceptually, this should be quite a bit faster. I have not tried it on a large data set. The first version will leave the rows sorted as they were originally. The second version will be faster but will leave the rows sorted according to the columns from first to last on first text.
function Deleteduplicates() {
var SpreadSheetKey = "My key";
var ss = SpreadsheetApp.openById(SpreadSheetKey);
var sheetD = ss.getSheetByName("Daily");
var sheetW = ss.getSheetByName("Weekly");
var sheets = [sheetD, sheetW];
var toSs = {};
for(s in sheets) {
var data = sheets[s].getDataRange().getValues();
for(i in data){
// EDIT: remove commas from join("") for blank test
data[i].unshift(data[i].join(""),(1000000 + i).toString());
}
data.sort();
// remove blank rows -- Edit
var blank = 0;
while(data[blank][0].trim().length == 0) {blank++};
if(blank > 0) data.splice(0, blank);
// end Edit
var len = data.length - 1;
for(var x = len; x > 0; x-- ) {
if(data[x][0] == data[x-1][0]) {
data.splice(x, 1);
};
};
for(i in data) {
data[i].splice( 0, 1);
};
data.sort();
for(i in data) {
data[i].splice(0, 1);
};
toSs[sheets[s].getSheetName()] = data;
};
for(s in sheets) {
var data = toSs[sheets[s].getSheetName()];
sheets[s].clearContents();
sheets[s].getRange(1, 1, data.length, data[0].length).setValues(data);
}
}
Faster leaving rows sorted by join() created to test for duplicates
function Deleteduplicates() {
var SpreadSheetKey = "My key";
var ss = SpreadsheetApp.openById(SpreadSheetKey);
var sheetD = ss.getSheetByName("Daily");
var sheetW = ss.getSheetByName("Weekly");
var sheets = [sheetD, sheetW];
var toSs = {};
for(s in sheets) {
var data = sheets[s].getDataRange().getValues();
for(i in data){
// EDIT: remove commas from join("") for blank test
data[i].unshift(data[i].join(""));
}
data.sort();
// remove blank rows -- Edit
var blank = 0;
while(data[blank][0].trim().length == 0) {blank++};
if(blank > 0) data.splice(0, blank);
// end Edit
var len = data.length - 1;
for(var x = len; x > 0; x-- ) {
if(data[x][0] == data[x-1][0]) {
data.splice(x, 1);
};
};
for(i in data) {
data[i].splice( 0, 1);
};
toSs[sheets[s].getSheetName()] = data;
};
for(s in sheets) {
var data = toSs[sheets[s].getSheetName()];
sheets[s].clearContents();
sheets[s].getRange(1, 1, data.length, data[0].length).setValues(data);
}
}
Edited per Henrique's comment.
Edited 5/8: Remove blank rows(2 edited areas marked)
Upvotes: 3
Reputation: 17752
There is no problem with your script. It is just exceeding the "maximum execution time" allowed for any script (which is currently 6 minutes).
To workaround this problem you'll have to split your problem into "less than 6 minutes" parts.
For example, in your code you're clearing duplicates from 2 sheets. Trying creating two functions, one for each, and run them separately.
Also, there could be some performance enhancements that could make the script run under 6 minutes. For example, I'm not sure joining each row is the best way (performance-wise) to do an array comparison.
Creating a new array to re-set the data might not be optimal either, I'd probably go with a map verification, which is constant-time, instead of O(n^2) double array checking you're doing.
Bottom line, this is a limitation you have to live with in Apps Script. And any solution anyone proposes is just a workaround, that will also eventually fail if your data gets overly big.
Upvotes: 2