Reputation: 345
Good day all,
I'm trying to get a tar.gz attachment from Gmail, extract the file and save it to Google Drive. It's a daily auto generated report which I'm getting, compressed due to >25mb raw size.
I got this so far:
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Setup");
var gmailLabels = sheet.getRange("B2:B2").getValue(); //I have my Gmail Label stored here
var driveFolder = sheet.getRange("B5:B5").getValue(); //I have my GDrive folder name stored here
// apply label filter, search only last 24hrs mail
var filter = "has:attachment label:" + gmailLabels + " after:" + Utilities.formatDate(new Date(new Date().getTime()-1*(24*60*60*1000)), "GMT", "yyyy/MM/dd");
var threads = GmailApp.search(filter, 0, 1); // check only 1 email at a time
var folder = DriveApp.getFoldersByName(driveFolder);
if (folder.hasNext()) {
folder = folder.next();
} else {
folder = DriveApp.createFolder(driveFolder);
}
var message = threads[0].getMessages()[0];
var desc = message.getSubject() + " #" + message.getId();
var att = message.getAttachments();
for (var z=0; z<att.length; z++) {
var attName = att[z].getName()
var attExt = attName.search('csv')
if (attExt > 0){ var fileType = "csv"; }
else {
var attExt = attName.search('tar.gz');
if (attExt > 0){ var fileType = "gzip"; }
else {
threads[x].addLabel(skipLabel);
continue;
}
}
// save the file to GDrive
try {
file = folder.createFile(att[z]);
file.setDescription(desc);
}
catch (e) {
Logger.log(e.toString());
}
// extract if gzip
if (fileType == 'gzip' ){
var ungzippedFile = Utilities.ungzip(file);
try {
gz_file = folder.createFile(ungzippedFile);
gz_file.setDescription(desc);
}
catch (e) {
Logger.log(e.toString());
}
}
}
Everything works fine, but in the last step it only decompresses the .gz file saving .tar file in the Drive. What can I do with it next? The .tar file contains a .csv file which I need to extract and process afterwards.
I should probably add that I'm limited to use GAS only.
Any help warmly appreciated.
Upvotes: 3
Views: 1820
Reputation: 201438
How about this answer? Unfortunately, in the current stage, there are no methods for extracting files from a tar file in Google Apps Script, yet. But fortunately, from wiki of tar, we can retrieve the structure of the tar data. I implemented this method with Google Apps Script using this structure data.
Before you run this script, please set the file ID of tar file to run()
. Then, run run()
.
function tarUnarchiver(blob) {
var mimeType = blob.getContentType();
if (!mimeType || !~mimeType.indexOf("application/x-tar")) {
throw new Error("Inputted blob is not mimeType of tar. mimeType of inputted blob is " + mimeType);
}
var baseChunkSize = 512;
var byte = blob.getBytes();
var res = [];
do {
var headers = [];
do {
var chunk = byte.splice(0, baseChunkSize);
var headerStruct = {
filePath: function(b) {
var r = [];
for (var i = b.length - 1; i >= 0; i--) {
if (b[i] != 0) {
r = b.slice(0, i + 1);
break;
}
}
return r;
}(chunk.slice(0, 100)),
fileSize: chunk.slice(124, 124 + 11),
fileType: Utilities.newBlob(chunk.slice(156, 156 + 1)).getDataAsString(),
};
Object.keys(headerStruct).forEach(function(e) {
var t = Utilities.newBlob(headerStruct[e]).getDataAsString();
if (e == "fileSize") t = parseInt(t, 8);
headerStruct[e] = t;
});
headers.push(headerStruct);
} while (headerStruct.fileType == "5");
var lastHeader = headers[headers.length - 1];
var filePath = lastHeader.filePath.split("/");
var blob = Utilities.newBlob(byte.splice(0, lastHeader.fileSize)).setName(filePath[filePath.length - 1]).setContentTypeFromExtension();
byte.splice(0, Math.ceil(lastHeader.fileSize / baseChunkSize) * baseChunkSize - lastHeader.fileSize);
res.push({fileInf: lastHeader, file: blob});
} while (byte[0] != 0);
return res;
}
// Following function is a sample script for using tarUnarchiver().
// Please modify this to your situation.
function run() {
// When you want to extract the files from .tar.gz file, please use the following script.
var id = "### file ID of .tar.gz file ###";
var gz = DriveApp.getFileById(id).getBlob().setContentTypeFromExtension();
var blob = Utilities.ungzip(gz).setContentTypeFromExtension();
// When you want to extract the files from .tar file, please use the following script.
var id = "### file ID of .tar file ###";
var blob = DriveApp.getFileById(id).getBlob().setContentType("application/x-tar");
// Extract files from a tar data.
var res = tarUnarchiver(blob);
// If you want to create the extracted files to Google Drive, please use the following script.
res.forEach(function(e) {
DriveApp.createFile(e.file);
});
// You can see the file information by below script.
Logger.log(res);
}
If this script is used for your script, for example, how about this? tarUnarchiver()
of above script is used. But I'm not sure how you want to use this script. So please think of this as a sample.
// extract if gzip
if (fileType == 'gzip' ){
var ungzippedFile = Utilities.ungzip(file);
try {
var blob = ungzippedFile.setContentType("application/x-tar"); // Added
tarUnarchiver(blob).forEach(function(e) {folder.createFile(e.file)}); // Added
}
catch (e) {
Logger.log(e.toString());
}
}
ungzippedFile
(tar data) is put to my script and run tarUnarchiver()
. Then, each file is created to the folder.tarUnarchiver()
. You can see it as a property of fileInf
from the response.When this script is used, there is the limitations as follows. These limitations are due to Google's specification.
In my environment, I could confirm that the script worked. But if this script didn't work, I apologize. At that time, can you provide a sample tar file? I would like to check it and modify the script.
Upvotes: 4