Reputation: 55
I have around 300 Spreadsheets that I need to copy all data from each spreadsheet and merge into a Master Spreadsheet. I have a spreadsheet that lists all 300 spreadsheet Ids. This script works however its Very slow!
I also tried to manually enter all document Ids as a variable and it did not seem to make a difference.
Is there a better way to handle?
function combineData() {
const masterID = "ID";
const masterSheet = SpreadsheetApp.openById(masterID).getSheets()[0];
let targetSheets = docIds();
for (let i = 0, len = targetSheets.length; i < len; i++) {
let sSheet = SpreadsheetApp.openById(targetSheets[i]).getActiveSheet();
let sData = sSheet.getDataRange().getValues();
sData.shift() //Remove header row
if (sData.length > 0) { //Needed to add to remove errors on Spreadsheets with no data
let fRow = masterSheet.getRange("A" + (masterSheet.getLastRow())).getRow() + 1;
let filter = sData.filter(function (row) {
return row.some(function (cell) {
return cell !== ""; //If sheets have blank rows in between doesnt grab
})
})
masterSheet.getRange(fRow, 1, filter.length, filter[0].length).setValues(filter)
}
}
}
function docIds() {
let listOfId = SpreadsheetApp.openById('ID').getSheets()[0]; //list of 300 Spreadsheet IDs
let values = listOfID.getDataRange().getValues()
let arrayId = []
for (let i = 1, len = values.length; i < len; i++) {
let data = values[i];
let ssID = data[1];
arrayId.push(ssID)
}
return arrayId
}
Upvotes: 2
Views: 391
Reputation: 7616
Use the Sheets
API, depending on the data it is an order of magintude faster than the native SpreadsheetApp
. Add the Google Sheets API
under Services in the left pane of the Apps Script editor.
Here is a code snipped of how we use one or the other API:
if(gridData && gridHeight) {
let range = sheet.getRange(startRow, 1, gridHeight, gridData[0].length);
if(useSheetsAPI) {
try {
SpreadsheetApp.flush();
let valueRange = Sheets.newValueRange();
valueRange.values = gridData;
let idAndName = getSpreadsheetIdAndSheetNameByName_(sheetName);
let rangeA1 = idAndName.sheetName + '!' + range.getA1Notation();
let options = { valueInputOption: 'USER_ENTERED' };
let result = Sheets.Spreadsheets.Values.update(valueRange, idAndName.spreadsheetId, rangeA1, options);
debugLog_('sheetReplace(): Sheets.Spreadsheets.Values.update result: '+result);
} catch (err) {
Logger.log('sheetReplace() ERROR: %s', err.message);
return 'ERROR: sheetReplace() failed: ' + err.message;
}
} else {
range.setValues(gridData);
}
}
/**
* Get spreadsheet Id and sheet name by sheet name
*
* @param {string|null} name name of sheet, either "sheet_id:Tab Name", "Tab Name"
* @return {object} object object with spreadsheetId and sheetName
*/
function getSpreadsheetIdAndSheetNameByName_(name) {
let spreadsheetId = '';
if(name && name.length > 44 && name.indexOf(':') > 40) {
// assume format: "sheet_id:Tab Name"
spreadsheetId = name.replace(/:.*$/, '');
name = name.replace(/^.*?:/, '');
} else {
// assume format "Tab Name"
spreadsheetId = SpreadsheetApp.getActiveSpreadsheet().getId();
}
return { spreadsheetId: spreadsheetId, sheetName: name };
}
Also, I submitted an enhancement request for better performance, see https://issuetracker.google.com/issues/222337394 and vote for it.
Upvotes: 1
Reputation: 201338
I believe your goal is as follows.
In the current stage, unfortunately, there is no method for retrieving the values from multiple Spreadsheets, simultaneously. If the sample script is prepared, it is required to obtain the values from each spreadsheet in a loop. In this case, the process cost becomes high. I think that this might be the reason for your current issue.
In this answer, as another approach, I would like to propose the following flow.
By this flow, I thought that the process cost can be reduced. When this flow is reflected in a sample script, how about the following sample script?
Please set masterID
and ssId
. And, please enable Sheets API at Advanced Google services. And, please run myFunction
.
function myFunction() {
const masterID = "###"; // Please set the master Spreadsheet ID.
const ssId = "###"; // Please set the Spreadsheet ID including the Spreadsheet IDs you want to retrieve in column "B".
// Retrieve Spreadsheet IDs.
const sheet = SpreadsheetApp.openById(ssId).getSheets()[0];
const ssIds = sheet.getRange("B2:B" + sheet.getLastRow()).getDisplayValues().reduce((ar, [b]) => {
if (b) ar.push(b);
return ar;
}, []);
// Retrieve values from all Spreadsheets.
const workers = 50; // Please adjust this value.
const headers = { authorization: "Bearer " + ScriptApp.getOAuthToken() };
const reqs = [...Array(Math.ceil(ssIds.length / workers))].map(_ => ssIds.splice(0, workers).map(id => ({ url: `https://docs.google.com/spreadsheets/export?exportFormat=csv&id=${id}`, headers, muteHttpExceptions: true })));
const values = reqs.flatMap(r =>
UrlFetchApp.fetchAll(r).flatMap(rr => {
if (rr.getResponseCode() == 200) {
const [, ...val] = Utilities.parseCsv(rr.getContentText());
return val;
}
return [];
})
);
// Put values to the master sheet.
const masterSheet = SpreadsheetApp.openById(masterID).getSheets()[0];
Sheets.Spreadsheets.Values.update({ values }, masterID, `'${masterSheet.getSheetName()}'!A${masterSheet.getLastRow() + 1}`, { valueInputOption: "USER_ENTERED" });
// DriveApp.getFiles(); // This comment line is used for automatically detecting the scope for Drive API. So, please don't remove this line.
}
When this script is run,
ssId
.const workers = 50;
to const workers = 100;
, the values are retrieved from every 100 Spreadsheets. But, if an error occurs when this value is increased, please adjust the value.When I tested this script for 50 Spreadsheet, the processing time was about 20 seconds. But, I'm not sure about your actual situation. So, please test this script.
In your script, listOfID
is not declared. Please be careful about this.
Unfortunately, I cannot know your all Spreadsheets. So, if all values are more than 10,000,000 cells, an error occurs because of the maximum number of cells in a Spreadsheet. Please be careful about this.
If the number of values is large, an error might occur. At that time, please check my report.
Upvotes: 2
Reputation: 2140
The .setValues()
and .getValues()
function themselves already run quite heavily specially if you have large data in the sheet, and using it together with for loop will really cause it to be slow since it iterates over 1 by 1. How about changing the for loop to forEach()
Try:
function combineData() {
const masterID = "1aRQ7rW9tGF25xdmjAfOtT6HtyZKQq0_AIYOGSZMKOcA";
const masterSheet = SpreadsheetApp.openById(masterID).getSheetByName("Master");
let targetSheets = docIds();
targetSheets.forEach(function(x){
let sSheet = SpreadsheetApp.openById(x).getActiveSheet();
let sData = sSheet.getDataRange().getValues();
sData.shift() //Remove header row
if (sData.length > 0) { //Needed to add to remove errors on Spreadsheets with no data
let fRow = masterSheet.getRange("A" + (masterSheet.getLastRow())).getRow() + 1;
let filter = sData.filter(function (row) {
return row.some(function (cell) {
return cell !== ""; //If sheets have blank rows in between doesnt grab
})
})
masterSheet.getRange(fRow, 1, filter.length, filter[0].length).setValues(filter)
}
})
}
function docIds() {
let listOfId = SpreadsheetApp.openById('1aRQ7rW9tGF25xdmjAfOtT6HtyZKQq0_AIYOGSZMKOcA').getSheets()[0]; //list of 300 Spreadsheet IDs
let values = listOfId.getDataRange().getValues();
values.shift()
let arrayId = []
values.forEach(function(val){
let data = val;
let ssID = data[1];
arrayId.push(ssID)
})
return arrayId
}
Also here are some of the best practices to improve the performance of the script: Best Practices
More details on forEach:
Let me know if this helps!
Upvotes: 0