Reputation: 5811
GAS is quite powerful and you could write a full fledged web-app using a Google Sheet as the DB back-end. There are many reasons not to do this but I figure in some cases it is okay.
I think the biggest issue will be performance issues when looking for rows based on some criteria in a sheet with a lot of rows. I know there are many ways to "query" a sheet but I can't find reliable information on which is the fastest.
One of the complexities is that many people can edit a sheet which means there are a variable number of situations you'd have to account for. For the sake of simplicity, I want to assume the sheet:
=row()
)The most basic query is finding a row where a specific column equals some value.
Which method would be the fastest?
Upvotes: 6
Views: 5940
Reputation: 108
Consider using the CacheService. You can cache the results to avoid both having to fetch values from the spreadsheet and re-render the html/css.
For example, you could use something like this:
const cachedHtmlString = CacheService.getDocumentCache().get("yourKey")
if (cachedHtmlString) {
console.log("fetching from cache")
return HtmlService.createHtmlOutput(cachedHtmlString)
} else {
// do your normal stuff here
// and save the results to the cache before returning, eg
CacheService.getDocumentCache().put("yourKey", newHtmlString)
}
Remember to use Cache.remove("yourKey") if you update your sheet's data so you aren't serving old info to users.
IMTheNachoMan's answer is very helpful, especially if the Google Visualization API (GViz) suits your needs. Unfortunately, GViz requires you to parse the resulting json/csv/html and re-render it. In my testing, rendering the data into html/css is also a significant bottleneck, not just fetching the data from the sheet. GViz seems to be the best way to fetch the data from the sheet, but caching may help improve performance even more.
Upvotes: 0
Reputation: 7
function testIt() In case 3: you enter method3 incorrectly, you must enter method4
Upvotes: 1
Reputation: 5811
I have a sheet with ~19k rows and ~38 columns, filled with all sorts of unsorted real-world data. That is almost 700k rows so I figured it would be a good sheet to time a few methods and see which is the fastest.
UrlFetch
call to Google visualization query and don't provide last rowUrlFetch
call to Google visualization query and provide last rowHere are the my query functions.
function method1(spreadsheetID, sheetName, columnIndex, query)
{
// get the sheet values excluding header,
var rowValues = SpreadsheetApp.openById(spreadsheetID).getSheetByName(sheetName).getSheetValues(2, 1, -1, -1);
// loop through each row
for(var i = 0, numRows = rowValues.length; i < numRows; ++i)
{
// return it if found
if(rowValues[i][columnIndex] == query) return rowValues[i]
}
return false;
}
function method2(spreadsheetID, sheetName, columnIndex, query)
{
// get the sheet values excluding header
var rowValues = SpreadsheetApp.openById(spreadsheetID).getSheetByName(sheetName).getSheetValues(2, 1, -1, -1);
// sort it
rowValues.sort(function(a, b){
if(a[columnIndex] < b[columnIndex]) return -1;
if(a[columnIndex] > b[columnIndex]) return 1;
return 0;
});
// search using binary search
var foundRow = matrixBinarySearch(rowValues, columnIndex, query, 0, rowValues.length - 1);
// return if found
if(foundRow != -1)
{
return rowValues[foundRow];
}
return false;
}
function method3(spreadsheetID, sheetName, queryColumnLetterStart, queryColumnLetterEnd, queryColumnLetterSearch, query)
{
// SQL like query
myQuery = "SELECT * WHERE " + queryColumnLetterSearch + " = '" + query + "'";
// the query URL
// don't provide last row in range selection
var qvizURL = 'https://docs.google.com/spreadsheets/d/' + spreadsheetID + '/gviz/tq?tqx=out:json&headers=1&sheet=' + sheetName + '&range=' + queryColumnLetterStart + ":" + queryColumnLetterEnd + '&tq=' + encodeURIComponent(myQuery);
// fetch the data
var ret = UrlFetchApp.fetch(qvizURL, {headers: {Authorization: 'Bearer ' + ScriptApp.getOAuthToken()}}).getContentText();
// remove some crap from the return string
return JSON.parse(ret.replace("/*O_o*/", "").replace("google.visualization.Query.setResponse(", "").slice(0, -2));
}
function method4(spreadsheetID, sheetName, queryColumnLetterStart, queryColumnLetterEnd, queryColumnLetterSearch, query)
{
// find the last row in the sheet
var lastRow = SpreadsheetApp.openById(spreadsheetID).getSheetByName(sheetName).getLastRow();
// SQL like query
myQuery = "SELECT * WHERE " + queryColumnLetterSearch + " = '" + query + "'";
// the query URL
var qvizURL = 'https://docs.google.com/spreadsheets/d/' + spreadsheetID + '/gviz/tq?tqx=out:json&headers=1&sheet=' + sheetName + '&range=' + queryColumnLetterStart + "1:" + queryColumnLetterEnd + lastRow + '&tq=' + encodeURIComponent(myQuery);
// fetch the data
var ret = UrlFetchApp.fetch(qvizURL, {headers: {Authorization: 'Bearer ' + ScriptApp.getOAuthToken()}}).getContentText();
// remove some crap from the return string
return JSON.parse(ret.replace("/*O_o*/", "").replace("google.visualization.Query.setResponse(", "").slice(0, -2));
}
My binary search algorithm:
function matrixBinarySearch(matrix, columnIndex, query, firstIndex, lastIndex)
{
// find the value using binary search
// https://www.w3resource.com/javascript-exercises/javascript-array-exercise-18.php
// first make sure the query string is valid
// if it is less than the smallest value
// or larger than the largest value
// it is not valid
if(query < matrix[firstIndex][columnIndex] || query > matrix[lastIndex][columnIndex]) return -1;
// if its the first row
if(query == matrix[firstIndex][columnIndex]) return firstIndex;
// if its the last row
if(query == matrix[lastIndex][columnIndex]) return lastIndex;
// now start doing binary search
var middleIndex = Math.floor((lastIndex + firstIndex)/2);
while(matrix[middleIndex][columnIndex] != query && firstIndex < lastIndex)
{
if(query < matrix[middleIndex][columnIndex])
{
lastIndex = middleIndex - 1;
}
else if(query > matrix[middleIndex][columnIndex])
{
firstIndex = middleIndex + 1;
}
middleIndex = Math.floor((lastIndex + firstIndex)/2);
}
return matrix[middleIndex][columnIndex] == query ? middleIndex : -1;
}
This is the function I used to test them all:
// each time this function is called it will try one method
// the first time it is called it will try method1
// then method2, then method3, then method4
// after it does method4 it will start back at method1
// we will use script properties to save which method is next
// we also want to use the same query string for each batch so we'll save that in script properties too
function testIt()
{
// get the sheet where we're staving run times
var runTimesSheet = SpreadsheetApp.openById("...").getSheetByName("times");
// we want to see true speed tests and don't want server side caching so we a copy of our data sheet
// make a copy of our data sheet and get its ID
var tempSheetID = SpreadsheetApp.openById("...").copy("temp sheet").getId();
// get script properties
var scriptProperties = PropertiesService.getScriptProperties();
// the counter
var searchCounter = Number(scriptProperties.getProperty("searchCounter"));
// index of search list we want to query for
var searchListIndex = Number(scriptProperties.getProperty("searchListIndex"));
// if we're at 0 then we need to get the index of the query string
if(searchCounter == 0)
{
searchListIndex = Math.floor(Math.random() * searchList.length);
scriptProperties.setProperty("searchListIndex", searchListIndex);
}
// query string
var query = searchList[searchListIndex];
// save relevant data
var timerRow = ["method" + (searchCounter + 1), searchListIndex, query, 0, "", "", "", ""];
// run the appropriate method
switch(searchCounter)
{
case 0:
// start time
var start = (new Date()).getTime();
// run the query
var ret = method1(tempSheetID, "Extract", 1, query);
// end time
timerRow[3] = ((new Date()).getTime() - start) / 1000;
// if we found the row save its values in the timer output so we can confirm it was found
if(ret)
{
timerRow[4] = ret[0];
timerRow[5] = ret[1];
timerRow[6] = ret[2];
timerRow[7] = ret[3];
}
break;
case 1:
var start = (new Date()).getTime();
var ret = method2(tempSheetID, "Extract", 1, query);
timerRow[3] = ((new Date()).getTime() - start) / 1000;
if(ret)
{
timerRow[4] = ret[0];
timerRow[5] = ret[1];
timerRow[6] = ret[2];
timerRow[7] = ret[3];
}
break;
case 2:
var start = (new Date()).getTime();
var ret = method3(tempSheetID, "Extract", "A", "AL", "B", query);
timerRow[3] = ((new Date()).getTime() - start) / 1000;
if(ret.table.rows.length)
{
timerRow[4] = ret.table.rows[0].c[0].v;
timerRow[5] = ret.table.rows[0].c[1].v;
timerRow[6] = ret.table.rows[0].c[2].v;
timerRow[7] = ret.table.rows[0].c[3].v;
}
break;
case 3:
var start = (new Date()).getTime();
var ret = method3(tempSheetID, "Extract", "A", "AL", "B", query);
timerRow[3] = ((new Date()).getTime() - start) / 1000;
if(ret.table.rows.length)
{
timerRow[4] = ret.table.rows[0].c[0].v;
timerRow[5] = ret.table.rows[0].c[1].v;
timerRow[6] = ret.table.rows[0].c[2].v;
timerRow[7] = ret.table.rows[0].c[3].v;
}
break;
}
// delete the temp file
DriveApp.getFileById(tempSheetID).setTrashed(true);
// save run times
runTimesSheet.appendRow(timerRow);
// start back at 0 if we're the end
if(++searchCounter == 4) searchCounter = 0;
// save the search counter
scriptProperties.setProperty("searchCounter", searchCounter);
}
I have a global variable searchList
that is an array of various query strings -- some are in the sheet, some are not.
I ran testit
on a trigger to run every minute. After 152 iterations I had 38 batches. Looking at the result, this is what I see for each method:
| Method | Minimum Seconds | Maximum Seconds | Average Seconds |
|---------|-----------------|-----------------|-----------------|
| method1 | 8.24 | 36.94 | 11.86 |
| method2 | 9.93 | 23.38 | 14.09 |
| method3 | 1.92 | 5.48 | 3.06 |
| method4 | 2.20 | 11.14 | 3.36 |
So it appears that, at least for my data-set, is using Google visualization query is the fastest.
Upvotes: 14