Reputation: 14796
I am trying to figure out what is the correct way to load a UTF-8 web page. Below are my failed attempts so far.
I should also mention that:
iconv
module, but also failed (+ I suspect that there should be a solution without it).var request = require("request");
var http = require("http");
var fs = require("fs");
var url = "http://www.globes.co.il/portal/quotes/unlinked-fixed-rate-government-bonds";
// Attempt 1: Ask 'request' for utf8
request({url:url, encoding:'utf8'}, function(error, response, html) {
console.log(html);
});
// Attempt 2: Ask 'request' for null
request({url:url, encoding:null}, function(error, response, html) {
console.log(html.toString('utf8'));
});
// Attempt 3: Avoid using 'request'
var options = {
host: 'www.globes.co.il',
path: '/portal/quotes/unlinked-fixed-rate-government-bonds'
};
http.get(options, function(res) {
res.on("data", function(chunk) {
console.log(chunk.toString('utf8'));
});
});
Upvotes: 0
Views: 174
Reputation: 106726
It seems that despite the HTML having a reported charset of utf8, the HTTP response says the content charset is windows-1255.
Using iconv-lite
to convert from the binary response to utf8 works (tested on Linux):
var request = require('request'),
iconv = require('iconv-lite');
var url = 'http://www.globes.co.il/portal/quotes/unlinked-fixed-rate-government-bonds';
request({url:url, encoding:null}, function(error, response, html) {
html = iconv.decode(html, 'win1255');
console.log(html);
});
Upvotes: 1