Reputation: 165
In a Google App Script attached to a Google Sheet, I have the file ID of an excel file. I want to read the worksheet names of that excel file. The tutorials I've seen on conversion load the excel file as a blob then write it to Drive as a Google Sheet, then read it.
Is there a way to do this that does not to create artifacts that I then need to delete? The reasoning is that I am concerned with the following: safety if there's a bug (the wrong thing gets deleted), additional processing time (I need to process a long list of excel files), and leftover artifacts if the script aborts unexpectedly between inserting and deleting.
Thank you!
Upvotes: 0
Views: 291
Reputation: 31
It is possible to get the data of your excel file from your Google Drive the following way. Since every excel file is just a compressed folder of xml files, you can get the file blob and unzip it. You can then parse the xml files and get the data you need. This way you do not need to first convert the .xls file into a gsheet.
function getDataFromDrive(){
var ss = SpreadsheetApp.getActiveSpreadsheet();
// getting a MS Excel file in Google Drive
var file = DriveApp.getFileById("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
var blob = file.getBlob();
// if second parameter is not provided all sheets will be parsed
var data = parseMSExcelBlob(blob, ["Orders"]);
// test if everything is good
if( data["Error"] ) return ss.toast(data["Error"], "Something went wrong 🙄", 10);
// here we have the data in 2D array
var tbl = data["Orders"];
// do your stuff
// ...
}
/**
* Parsing MS Excel files and returns values in JSON format.
*
* @param {BlobSource} blob the blob from MS Excel file
* @param {String[]} requiredSheets the array of required sheet names (if omitted returns all)
* @return {Object} Object of sheet names and values (2D arrays)
*/
function parseMSExcelBlob(blob, requiredSheets){
var col_cache = {};
var forbidden_chars = {
"<": "<",
">": ">",
"&": "&",
"'": "'",
""": '"'
};
blob.setContentType("application/zip");
var parts = Utilities.unzip(blob);
var relationships = {};
for( var part of parts ){
var part_name = part.getName();
if( part_name === "xl/_rels/workbook.xml.rels" ){
var txt = part.getDataAsString();
var rels = breakUpString(txt, '<Relationship ', '/>');
for( var i = 0; i < rels.length; i++ ){
var rId = breakUpString(rels[i], 'Id="', '"')[0];
var path = breakUpString(rels[i], 'Target="', '"')[0];
relationships[rId] = "xl/" + path;
}
}
}
var worksheets = {};
for( var part of parts ){
var part_name = part.getName();
if( part_name === "xl/workbook.xml" ){
var txt = part.getDataAsString();
var sheets = breakUpString(txt, '<sheet ', '/>');
for( var i = 0; i < sheets.length; i++ ){
var sh_name = breakUpString(sheets[i], 'name="', '"')[0];
sh_name = decodeForbiddenChars(sh_name);
var rId = breakUpString(sheets[i], 'r:id="', '"')[0];
var path = relationships[rId];
if( path.includes("worksheets") ){
worksheets[path] = sh_name;
}
}
}
}
requiredSheets = Array.isArray(requiredSheets) && requiredSheets.length && requiredSheets || [];
var worksheets_needed = [];
for( var path in worksheets ){
if( !requiredSheets.length || requiredSheets.includes(worksheets[path]) ){
worksheets_needed.push(path);
}
}
if( !worksheets_needed.length ) return {"Error": "Requested worksheets not found"};
var sharedStrings = [];
for( var part of parts ){
var part_name = part.getName();
if( part_name === "xl/sharedStrings.xml" ){
var txt = part.getDataAsString();
txt = txt.replace(/ xml:space="preserve"/g, "");
sharedStrings = breakUpString(txt, '<si>', '</si>');
for( var i = 0; i < sharedStrings.length; i++ ){
var str = breakUpString(sharedStrings[i], '<t>', '</t>')[0];
sharedStrings[i] = decodeForbiddenChars(str);
}
}
}
var result = {};
for( var part of parts ){
var part_name = part.getName();
if( worksheets_needed.includes(part_name) ){
var txt = part.getDataAsString();
txt = txt.replace(/ xml:space="preserve"/g, "");
var cells = breakUpString(txt, '<c ', '</c>');
var tbl = [[]];
for( var i = 0; i < cells.length; i++ ){
var r = breakUpString(cells[i], 'r="', '"')[0];
var t = breakUpString(cells[i], 't="', '"')[0];
if( t === "inlineStr" ){
var data = breakUpString(cells[i], '<t>', '</t>')[0];
data = decodeForbiddenChars(data);
}else if( t === "s" ){
var v = breakUpString(cells[i], '<v>', '</v>')[0];
var data = sharedStrings[v];
}else{
var v = breakUpString(cells[i], '<v>', '</v>')[0];
var data = Number(v);
}
var row = r.replace(/[A-Z]/g, "") - 1;
var col = colNum(r.replace(/[0-9]/g, "")) - 1;
if( tbl[row] ){
tbl[row][col] = data;
}else{
tbl[row] = [];
tbl[row][col] = data;
}
}
var sh_name = worksheets[part_name];
result[sh_name] = squareTbl(tbl);
}
}
function decodeForbiddenChars(txt){
if( !txt ) return txt;
for( var char in forbidden_chars ){
var regex = new RegExp(char,"g");
txt = txt.replace(regex, forbidden_chars[char]);
}
return txt;
}
function breakUpString(str, start_patern, end_patern){
var arr = [], raw = str.split(start_patern), i = 1, len = raw.length;
while( i < len ){ arr[i - 1] = raw[i].split(end_patern, 1)[0]; i++ };
return arr;
}
function colNum(char){
if( col_cache[char] ) return col_cache[char];
var alph = "ABCDEFGHIJKLMNOPQRSTUVWXYZ", i, j, result = 0;
for( i = 0, j = char.length - 1; i < char.length; i++, j-- ){
result += Math.pow(alph.length, j) * (alph.indexOf(char[i]) + 1);
}
col_cache[char] = result;
return result;
}
function squareTbl(arr){
var tbl = [];
var x_max = 0;
var y_max = arr.length;
for( var y = 0; y < y_max; y++ ){
arr[y] = arr[y] || [];
if( arr[y].length > x_max ){ x_max = arr[y].length };
}
for( var y = 0; y < y_max; y++ ){
var row = [];
for( var x = 0; x < x_max; x++ ){
row.push(arr[y][x] || arr[y][x] === 0 ? arr[y][x] : "");
}
tbl.push(row);
}
return tbl.length ? tbl : [[]];
}
return result;
}
Upvotes: 1
Reputation: 2261
Answering your questions, the reason the tutorials first convert the Excel file to a Google Sheet is to interact with it (in your case, to gather the worksheet names) it's because the Google APIs or Apps Script cannot interact with the Excel file as row data, and Google needs to convert the file to something readable using Google APIs.
A workaround for this will be to use Excel JavaScript API to read the information original Excel file, you can use externals API in Apps Script since it's based in JavaScript, so you will use Apps Script as an IDE.
However, you can do the same with any other IDE that works with JavaScript.
There are some examples on how to list the worksheets using the Excel JavaScript API in this blog.
If you will like to keep using Google APIs, and using the Google Apps Script built-in services. You will need to convert the file to Google Sheets.
Updating Answer:
You can review more about the Excel Services API services here.
Upvotes: 1