Reputation: 309
So, I'm trying to write a CSV-file importer using AngularJS on the frontend side and NodeJS for the backend. My problem is, that I'm not sure about the encoding of the incoming CSV files. Is there a way to automatically detect it?
I first tried to use FileReader.readAsDataURL() and do the detection in Node. But the file contents will be Base64-encoded, so I cannot do that (When I decode the file, I already need to know the encoding). If I do a FileReader.readAsText(), I also need to know the encoding beforehand. I also cannot do it BEFORE initializing the FileReader, because the actual file object doesn't seem include the files contents.
My current code:
generateFile = function(file){
reader = new FileReader();
reader.onload = function (evt) {
if (checkSize(file.size) && isTypeValid(file.type)) {
scope.$apply(function () {
scope.file = evt.target.result;
file.encoding = Encoding.detect(scope.file);
if (angular.isString(scope.fileName)) {
return scope.fileName = name;
}
});
if (form) {
form.$setDirty();
}
scope.fileArray.push({
name: file.name,
type: file.type,
size: file.size,
date: file.lastModified,
encoding: file.encoding,
file: scope.file
});
--scope.pending;
if (scope.pending === 0){
scope.$emit('file-dropzone-drop-event', scope.fileArray);
scope.fileArray = [];
}
}
};
let fileExtExpression = /\.csv+$/i;
if(fileExtExpression.test(file.name)){
reader.readAsText(file);
}
else{
reader.readAsDataURL(file);
}
++scope.pending;
}
Is this just impossible to do or what am I doing wrong? I even tried to solve this using FileReader.readAsArrayBuffer() and extract the file header from there, but this was way too complex for me and/or didn't seem to work.
Upvotes: 5
Views: 17111
Reputation: 373
You could try this:
$ npm install detect-file-encoding-and-language
And then detect the encoding like so:
// index.js
const languageEncoding = require("detect-file-encoding-and-language");
const pathToFile = "/home/username/documents/my-text-file.txt"
languageEncoding(pathToFile).then(fileInfo => console.log(fileInfo));
// Possible result: { language: japanese, encoding: Shift-JIS, confidence: { language: 0.97, encoding: 1 } }
Upvotes: 3
Reputation: 1577
I suggest you open your CSV using readAsBinaryString()
from FileReader. This is the trick. Then you can detect the encoding using the library jschardet
More info here: CSV encoding detection in javascript
Upvotes: 5