Reputation: 1321
I have a Nodejs script that reads a file, and than prints a paragraph on the console.
var fs = require('fs');
var math = require('mathjs');
var piece = "";
var path = "./divina_commedia.txt";
var stats = fs.statSync(path);
var start = math.round(math.random(stats.size));
var opt = { flags: 'r', encoding: 'utf8', autoclose: true, start: start, end: start + 2000 };
var input = fs.createReadStream(path, opt);
input.on('end', () => { clean() })
input.on('data', store);
function store(chunk) {
piece = piece + chunk;
}
function clean() {
var subs = piece.match(/[A-Z][^\.]*\./g);
console.log(subs[0] + subs[1]);
}
console.log("ò"); // <<-- this is printed on the terminal
The accented characters are not printed on the terminal. By the way it's possible to print on the terminal accented characters, the last line of my script proves it.
Upvotes: 0
Views: 335
Reputation: 40374
The problem is that your file isn't in utf8
, is a windows-1252
encoded file.
Use iconv-lite to decode it.
var fs = require('fs');
var math = require('mathjs');
var iconv = require('iconv-lite');
var piece = "";
var path = "./divina_commedia.txt";
var opt = {
flags: 'r',
autoclose: true,
start: start,
end: start + 2000
//remove utf8
};
var input = fs.createReadStream(path, opt)
.pipe(iconv.decodeStream('win1252')); //decode
input.on('end', clean);
input.on('data', store);
function store(chunk) {
piece = piece + chunk;
}
function clean() {
piece = piece.toString(); //Buffer to string
var subs = piece.match(/[A-Z][^\.]*\./g);
console.log(piece); //ò printed correctly
}
Or you can just convert the file to utf8 beforehand and use your code.
Upvotes: 1