Angelito Saliva
Angelito Saliva

Reputation: 47

Fetch xlsx file from email and copy file

I saw this script from one of the questions here, but can't make it work. It is meant to fetch an Excel file from my Gmail and copy it to a Google Sheet. Google Sheet file is here: https://docs.google.com/spreadsheets/d/1rED5zphZKp7JIK9qaTpHLKBSMsSiLAvhBAXc6Tn0zqI/edit#gid=301517543

Script:

function getRawData() {
  var threads = GmailApp.search('subject:myRentokil Report Delivery: Canpac | Pest Activity');
  var message = threads[0].getMessages()[0];
  var attachment = message.getAttachments()[0];

  var xlsxBlob = attachment[0]; // Is supposes that attachment[0] is the blob of xlsx file.
  var convertedSpreadsheetId = Drive.Files.insert({mimeType: MimeType.GOOGLE_SHEETS}, xlsxBlob).id;
  var sheet = SpreadsheetApp.openById(convertedSpreadsheetId).getSheets()[0]; // There is the data in 1st tab.
  var data = sheet.getDataRange().getValues();
  Drive.Files.remove(convertedSpreadsheetId); // Remove the converted file.

  var sheet = SpreadsheetApp.openById("1rED5zphZKp7JIK9qaTpHLKBSMsSiLAvhBAXc6Tn0zqI").getSheetByName("RawData");
  sheet.clearContents();
  var range = sheet.getRange(1, 1, data.length, data[0].length);range.setValues(data);

 }

Upvotes: 1

Views: 2724

Answers (2)

Csaba Csonka
Csaba Csonka

Reputation: 31

There is an even faster solution that eliminates the need of converting the xlsx file to Google Sheet and gets the data directly from the attachment xlsx file. The excel blob is first unzipped and then the resulted xml files are processed using string methods.

function getRawData() {
  var threads = GmailApp.search('subject:myRentokil Report Delivery: Canpac | Pest Activity');
  var message = threads[0].getMessages()[0];
  var attachment = message.getAttachments()[0];

  var xlsxBlob = attachment[0]; // Is supposes that attachment[0] is the blob of xlsx file.
  var data = var data = Object.values(parseMSExcelBlob(xlsxBlob))[0];

  var sheet = SpreadsheetApp.openById("1rED5zphZKp7JIK9qaTpHLKBSMsSiLAvhBAXc6Tn0zqI").getSheetByName("RawData");
  sheet.clearContents();
  var range = sheet.getRange(1, 1, data.length, data[0].length);range.setValues(data);

}



/**
* Parsing MS Excel files and returns values in JSON format.
*
* @param {BlobSource} blob the blob from MS Excel file
* @param {String[]} requiredSheets the array of required sheet names (if omitted returns all)
* @return {Object} Object of sheet names and values (2D arrays)
*/
function parseMSExcelBlob(blob, requiredSheets){
    var col_cache = {};
    var forbidden_chars = {
        "&lt;": "<",
        "&gt;": ">",
        "&amp;": "&",
        "&apos;": "'",
        "&quot;": '"'
    };
    
    blob.setContentType("application/zip");
    var parts = Utilities.unzip(blob);
    
    var relationships = {};
    for( var part of parts ){
        var part_name = part.getName();
        if( part_name === "xl/_rels/workbook.xml.rels" ){
            var txt = part.getDataAsString();
            var rels = breakUpString(txt, '<Relationship ', '/>');
            for( var i = 0; i < rels.length; i++ ){
                var rId = breakUpString(rels[i], 'Id="', '"')[0];
                var path = breakUpString(rels[i], 'Target="', '"')[0];
                relationships[rId] = "xl/" + path;
            }
        }
    }
    
    var worksheets = {};
    for( var part of parts ){
        var part_name = part.getName();
        if( part_name === "xl/workbook.xml" ){
            var txt = part.getDataAsString();
            var sheets = breakUpString(txt, '<sheet ', '/>');
            for( var i = 0; i < sheets.length; i++ ){
                var sh_name = breakUpString(sheets[i], 'name="', '"')[0];
                sh_name = decodeForbiddenChars(sh_name);
                var rId = breakUpString(sheets[i], 'r:id="', '"')[0];
                var path = relationships[rId];
                if( path.includes("worksheets") ){
                    worksheets[path] = sh_name;
                }
            }
        }
    }
    
    requiredSheets = Array.isArray(requiredSheets) && requiredSheets.length && requiredSheets || [];
    var worksheets_needed = [];
    for( var path in worksheets ){
        if( !requiredSheets.length || requiredSheets.includes(worksheets[path]) ){
            worksheets_needed.push(path);
        }
    }
    if( !worksheets_needed.length ) return {"Error": "Requested worksheets not found"};
    
    var sharedStrings = [];
    for( var part of parts ){
        var part_name = part.getName();
        if( part_name === "xl/sharedStrings.xml" ){
            var txt = part.getDataAsString();
            txt = txt.replace(/ xml:space="preserve"/g, "");
            sharedStrings = breakUpString(txt, '<si>', '</si>');
            for( var i = 0; i < sharedStrings.length; i++ ){
                var str = breakUpString(sharedStrings[i], '<t>', '</t>')[0];
                sharedStrings[i] = decodeForbiddenChars(str);
            }
        }
    }
    
    var result = {};
    for( var part of parts ){
        var part_name = part.getName();
        if( worksheets_needed.includes(part_name) ){
            var txt = part.getDataAsString();
            txt = txt.replace(/ xml:space="preserve"/g, "");
            var cells = breakUpString(txt, '<c ', '</c>');
            var tbl = [[]];
            for( var i = 0; i < cells.length; i++ ){
                var r = breakUpString(cells[i], 'r="', '"')[0];
                var t = breakUpString(cells[i], 't="', '"')[0];
                if( t === "inlineStr" ){
                    var data = breakUpString(cells[i], '<t>', '</t>')[0];
                    data = decodeForbiddenChars(data);
                }else if( t === "s" ){
                    var v = breakUpString(cells[i], '<v>', '</v>')[0];
                    var data = sharedStrings[v];
                }else{
                    var v = breakUpString(cells[i], '<v>', '</v>')[0];
                    var data = Number(v);
                }
                var row = r.replace(/[A-Z]/g, "") - 1;
                var col = colNum(r.replace(/[0-9]/g, "")) - 1;
                if( tbl[row] ){
                    tbl[row][col] = data;
                }else{
                    tbl[row] = [];
                    tbl[row][col] = data;
                }
            }
            var sh_name = worksheets[part_name];
            result[sh_name] = squareTbl(tbl);
        }
    }
    
    
    function decodeForbiddenChars(txt){
        if( !txt ) return txt;
        for( var char in forbidden_chars ){
            var regex = new RegExp(char,"g");
            txt = txt.replace(regex, forbidden_chars[char]);
        }
        return txt;
    }
    
    function breakUpString(str, start_patern, end_patern){
        var arr = [], raw = str.split(start_patern), i = 1, len = raw.length;
        while( i < len ){ arr[i - 1] = raw[i].split(end_patern, 1)[0]; i++ };
        return arr;
    }
    
    function colNum(char){
        if( col_cache[char] ) return col_cache[char];
        var alph = "ABCDEFGHIJKLMNOPQRSTUVWXYZ", i, j, result = 0;
        for( i = 0, j = char.length - 1; i < char.length; i++, j-- ){
            result += Math.pow(alph.length, j) * (alph.indexOf(char[i]) + 1);
        }
        col_cache[char] = result;
        return result;
    }
    
    function squareTbl(arr){
        var tbl = [];
        var x_max = 0;
        var y_max = arr.length;
        for( var y = 0; y < y_max; y++ ){
            arr[y] = arr[y] || [];
            if( arr[y].length > x_max ){ x_max = arr[y].length };
        }
        for( var y = 0; y < y_max; y++ ){
            var row = [];
            for( var x = 0; x < x_max; x++ ){
                row.push(arr[y][x] || arr[y][x] === 0 ? arr[y][x] : "");
            }
            tbl.push(row);
        }
        return tbl.length ? tbl : [[]];
    }
    
    
    return result;
}

Upvotes: 0

iansedano
iansedano

Reputation: 6481

Modified Script

function getRawData() {
    // This gets the threads
    var threads = GmailApp.search('subject:excel attachment');
    var message = threads[0].getMessages()[0]; // returns the first message from the first thread
    var attachment = message.getAttachments()[0]; // returns the first attachment
    var xlsxBlob = attachment; // This is a blob, yes.
    
    var convertedSpreadsheetId = Drive.Files.insert({mimeType: MimeType.GOOGLE_SHEETS}, xlsxBlob).id;
    var sheet = SpreadsheetApp.openById(convertedSpreadsheetId).getSheets()[0];
    var data = sheet.getDataRange().getValues(); // gets values from first tab
    
    // You were overwriting some variables here, so changed to dataSheet and dataRange
    var dataSheet = SpreadsheetApp.openById("1rED5zphZKp7JIK9qaTpHLKBSMsSiLAvhBAXc6Tn0zqI").getSheetByName("RawData");
    var dataRange = dataSheet.getRange(1, 1, data.length, data[0].length);
    dataRange.setValues(data);
    
    // Moved this line to the end because you were deleting it before using the data inside it.
    // Changed to trash instead of remove.
    Drive.Files.trash(convertedSpreadsheetId); // Remove the converted file.
 }
  • I commented your script to make the steps clearer.

  • There were some variables that were initialized twice so just changed those names.

  • The reason for the error is that remove does not exist as a method in the Drive API v2.

    enter image description here

    https://developers.google.com/drive/api/v2/reference/files/trash

    remove actually does seem to work and deletes the file, but it always returns the error. trash works as expected. If you want to skip the trash you can delete instead.

Upvotes: 1

Related Questions