blade_runner
blade_runner

Reputation: 25

How can I improve the performance of this Google Sheets custom function?

I have a Google Sheets project that uses this custom function 50-100 times, so I'm trying to make the function as efficient as possible. The function filters data that is written to the INPUT worksheet (via the Google API) and then draws a table for a subset of the data.

I have provided a working sample spreadsheet here: https://docs.google.com/spreadsheets/d/1KVjDl0Ix2bnlPqPEnsY4wh34MIPeayDPk3cq7DdU3g4/edit?usp=sharing

The 'META' worksheet is simply to trigger the custom function to run (i.e. mimic the INPUT sheet being populated via the Google API) by changing the value in cell A1, which is an argument for the custom function call.

The 'INPUT' sheet contains sample input data. The greyed out columns (Group name, Field name, Type, Concatenated Id, VLookup value) are not used by the function:

Instance Id Group name Group Id Field name Field Id Type Value File Id Role Concatenated Id VLookup value
Instance1A Widgets 91c7db0a-c52a-407d-869a-af8ba8bf8ba7 Field 1 09456c1a-abb4-4e81-94bd-7ce4c88afffc CURRENCY 100 Pilot_File Pilot 09456c1a-abb4-4e81-94bd-7ce4c88afffcInstance1A 100
Instance1A Widgets 91c7db0a-c52a-407d-869a-af8ba8bf8ba7 Field 2 474f6395-83a7-4c2b-aa5a-ceb00e200f8e CURRENCY 200 Pilot_File Pilot 474f6395-83a7-4c2b-aa5a-ceb00e200f8eInstance1A 200
Instance1A Widgets 91c7db0a-c52a-407d-869a-af8ba8bf8ba7 Field 3 ac64e001-fe85-400a-92e4-69cebf1c260d CURRENCY 300 Pilot_File Pilot ac64e001-fe85-400a-92e4-69cebf1c260dInstance1A 300
Instance1B Widgets 91c7db0a-c52a-407d-869a-af8ba8bf8ba7 Field 1 09456c1a-abb4-4e81-94bd-7ce4c88afffc CURRENCY 110 Pilot_File Pilot 09456c1a-abb4-4e81-94bd-7ce4c88afffcInstance1B 110
Instance1B Widgets 91c7db0a-c52a-407d-869a-af8ba8bf8ba7 Field 2 474f6395-83a7-4c2b-aa5a-ceb00e200f8e CURRENCY 220 Pilot_File Pilot 474f6395-83a7-4c2b-aa5a-ceb00e200f8eInstance1B 220
Instance1B Widgets 91c7db0a-c52a-407d-869a-af8ba8bf8ba7 Field 3 ac64e001-fe85-400a-92e4-69cebf1c260d CURRENCY 330 Pilot_File Pilot ac64e001-fe85-400a-92e4-69cebf1c260dInstance1B 330
Instance2A Widgets 91c7db0a-c52a-407d-869a-af8ba8bf8ba7 Field 1 09456c1a-abb4-4e81-94bd-7ce4c88afffc CURRENCY 1000 Co-PIlot_File Co-Pilot 09456c1a-abb4-4e81-94bd-7ce4c88afffcInstance2A 1000
Instance2A Widgets 91c7db0a-c52a-407d-869a-af8ba8bf8ba7 Field 2 474f6395-83a7-4c2b-aa5a-ceb00e200f8e CURRENCY 2000 Co-PIlot_File Co-Pilot 474f6395-83a7-4c2b-aa5a-ceb00e200f8eInstance2A 2000
Instance2A Widgets 91c7db0a-c52a-407d-869a-af8ba8bf8ba7 Field 3 ac64e001-fe85-400a-92e4-69cebf1c260d CURRENCY 3000 Co-PIlot_File Co-Pilot ac64e001-fe85-400a-92e4-69cebf1c260dInstance2A 3000

The 'TABLE_CONFIG' sheet contains configuration attributes for the results table. The greyed out column (Description) is not used by the function:

Field Id Description Desired table field column Group Id
09456c1a-abb4-4e81-94bd-7ce4c88afffc Field 1 1 91c7db0a-c52a-407d-869a-af8ba8bf8ba7
474f6395-83a7-4c2b-aa5a-ceb00e200f8e Field 2 2 91c7db0a-c52a-407d-869a-af8ba8bf8ba7
ac64e001-fe85-400a-92e4-69cebf1c260d Field 3 3 91c7db0a-c52a-407d-869a-af8ba8bf8ba7

The 'RESULTS_Pilot' and 'RESULTS_Co-Pilot' sheets are examples of how the custom function is called from various places throughout the spreadsheet. The headers are static. The function is called like this:

=getTable("91c7db0a-c52a-407d-869a-af8ba8bf8ba7", "TABLE_CONFIG", "Pilot", META!A1)

The results table must always be returned to display in the format shown in the sample:

Instance Id Field 1 Field 2 Field 3 File Id
Instance1A 100 200 300 Pilot_File
Instance1B 110 220 330 Pilot_File

I am fairly new to coding and very new to Google Apps Script. The custom function is working, but I am hoping for tips on speeding it up, or pointers where my code is inefficient or redundant.

Thank you.

// Filters INPUT by Group Id and Role. Called from getTable().
function filterInput(group, role) {
  var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("INPUT");
  var range = sheet.getDataRange();
  var values = range.getValues();
  var results = [];
  values.forEach(function (row) {
    if (row[2] === group && row[8] === role) {
      results.push(row);
    }
  });
  return results;
}

// Builds matrix. Called from getTable().
let generateMatrix = function (m, n, value) {
  let matrix = [];
  for (let i = 0; i < m; i++) {
    let row = [];
    for (let j = 0; j < n; j++) {
      row.push(value);
    }
    matrix.push(row);
  }
  return matrix;
};

// Main function called from RESULTS_Pilot and RESULTS_Co-Pilot worksheets
function getTable(groupUUID, configSheetName, role) {
  // Filter INPUT tab to get only rows for group and role
  values = filterInput(groupUUID, role);
  // If filtered INPUT contains 0 rows, return empty string
  if (values.length === 0) {
    Logger.log("There are no matching rows in INPUT.");
    return ("");
  }
  else {
    Logger.log("There are " + values.length + " filtered rows for the specified group and role.");
    var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName(configSheetName);
    var range = sheet.getDataRange();
    var configValues = range.getValues();
    var configFields = 0;
    var rowNum = 0;
    var uniqueInstanceRows = 0; // Need to find the number of unique instance rows (i.e. unique Instance Ids) to build table
    var colValues = []; // Create array with only first column values (Instance Ids) from filtered input rows
    for (i = 0; i < values.length; i++) {
      colValues.push(values[i][0]);
    }

    const unique = (value, index, self) => { return self.indexOf(value) === index; } // Get only unique Instance Ids from array
    var Unique_List = colValues.filter(unique);
    uniqueInstanceRows = Unique_List.length; // Will be used to build empty table using generateMatrix()
    Logger.log("# of unique Instance Id rows: " + uniqueInstanceRows);

    configValues.forEach(function (configRow) { // Count number of config fields for the group in TABLE_CONFIG worksheet
      if (configRow[3] === groupUUID && configRow[2] !== "") {
        configFields = configFields + 1;
      }
    })
    Logger.log("There are " + configFields + " config fields.");

    // Generate table structure
    table = generateMatrix(uniqueInstanceRows + 1, configFields, "");

    // Fill first column in results table with unique Instance Ids
    for (i = 0; i < Unique_List.length; i++) {
      table[i][0] = Unique_List[i];
    }

    // Generate a table to store Instance Id / File Id pairs 
    pairsTable = generateMatrix(Unique_List.length, 2, "");
    values.forEach(function (row) {
      for (i = 0; i < Unique_List.length; i++) {
        if (row[0] === Unique_List[i]) {
          pairsTable[i][0] = Unique_List[i];
          pairsTable[i][1] = row[7];
        }
      }
    })

    // Fill last column in results table with corresponding File Ids
    table.forEach(function (row, index) {
      for (i = 0; i < pairsTable.length; i++) {
        if (row[0] === pairsTable[i][0]) {
          table[index][configFields + 1] = pairsTable[i][1];
        }
      }
    })

    // Populate results table with remaining group field values
    values.forEach(function (row) {
      configValues.forEach(function (configRow) {
        if (row[4] === configRow[0] && configRow[2] !== "") {
          for (i = 0; i <= Unique_List.length; i++) {
            if (row[0] === Unique_List[i]) {
              rowNum = i;
            }
          }
          let val = row[6];
          table[rowNum][configRow[2]] = val;
        }
      })
    })
    // Return results table
    return table;
  }
}

Upvotes: 1

Views: 195

Answers (2)

doubleunary
doubleunary

Reputation: 18784

It is unclear why you would need a custom function for this when plain vanilla spreadsheet formulas seem to suffice. Try query() with a pivot clause, like this:

=query(INPUT!A1:K, "select A, max(G), H where I = 'Pilot' group by A, H pivot D", 1)

=query(INPUT!A1:K, "select A, max(G), H where I = 'Co-Pilot' group by A, H pivot D", 1)

These formulas exactly match the custom function results you show, and they should run much faster.

To reorder the data so that it falls into the column numbers given in the TABLE_CONFIG sheet, put the first query() formula in a new sheet in cell query!A1, and put these formulas in yet another sheet:

cell A1:

=arrayformula( 
  { 
    "Instance Id", 
    iferror( 
      vlookup( 
        sequence(1, max(TABLE_CONFIG!C2:C4) - 1, 2), 
        { TABLE_CONFIG!C2:C, TABLE_CONFIG!B2:B }, 
        2, false 
      ) 
    ), 
    "File Id" 
  } 
)

cell A2:

=arrayformula( 
  iferror( 
    hlookup( 
      A1:I1, 
      query!A1:E, 
      sequence(counta(query!A2:A), 1, 2), 
      false 
    ) 
  ) 
)

The formulas can be put together so that no helper sheet is needed, but that is left as an exercise for the reader. This is just to show that the data can be aggregated using plain vanilla spreadsheet formulas without resorting to scripting.

Upvotes: 1

BeRT2me
BeRT2me

Reputation: 13242

Using this nifty project AlaSQLGS~

We can use AlaSQL to simplify a lot of data manipulation functions that (imo) would be better done in a database than javascript/appscript.

Here's an example that you could modify that uses your data:

const alasql = AlaSQLGS.load()

function Update_Results() {
  const groupUUID = '91c7db0a-c52a-407d-869a-af8ba8bf8ba7';
  const role = 'Pilot';
  const inputSheetName = 'INPUT';
  const outputSheetName = 'RESULTS_Pilot';

  const ss = SpreadsheetApp.getActiveSpreadsheet();
  const inputSheet = ss.getSheetByName(inputSheetName);
  const outputSheet = ss.getSheetByName(outputSheetName)
  
  let data = inputSheet.getDataRange().getValues();
  data.shift() // Remove column names~
  
  // Create a Table and insert values into it.
  // I use \` so often, because you have Spaces and illegal sql column names.
  // Note that these are backticks, not single quotes.
  alasql(`
    CREATE TABLE data (
      \`Instance Id\` STRING,
      \`Group Id\` STRING,
      \`Field name\` STRING,
      \`Field Id\` STRING,
      \`Value\` INT,
      \`File Id\` STRING,
      \`Role\` STRING
    );
    SELECT [0] AS \`Instance Id\`
         , [2] AS \`Group Id\`
         , [3] AS \`Field name\`
         , [4] AS \`Field Id\`
         , [6] AS \`Value\`
         , [7] AS \`File Id\`
         , [8] AS \`Role\`
    INTO data
    FROM ?;
  `, [data])

  // Query the Table
  // MATRIX gives us an Array of Arrays instead of an Array of Objects.
  let output = alasql(`
    SELECT MATRIX
           \`Instance Id\`
         , \`Field name\` 
         , \`File Id\`
         , [Value]
      FROM data
     PIVOT (MAX([Value]) FOR \`Field name\`)
     WHERE \`Group Id\` = ?
       AND \`Role\` = ?
  `, [groupUUID, role]);
  
  // Add Column Names
  output.unshift(['Instance Id', 'File Id', ...output[0].slice(2).map((x,i) => `Field ${i+1}`)])
  
  // Insert the result into the sheet
  outputSheet.clearContents()
  outputSheet.getRange(1, 1, output.length, output[0].length).setValues(output);

  Logger.log(output)
}

Logger Output:

[[Instance Id, File Id, Field 1, Field 2, Field 3],
 [Instance1A, Pilot_File, 100.0, 200.0, 300.0],
 [Instance1B, Pilot_File, 110.0, 220.0, 330.0]]

GSheet Output:

Instance Id File Id Field 1 Field 2 Field 3
Instance1A Pilot_File 100 200 300
Instance1B Pilot_File 110 220 330

Upvotes: 0

Related Questions