DannyB
DannyB

Reputation: 14796

Loading a UTF-8 page

I am trying to figure out what is the correct way to load a UTF-8 web page. Below are my failed attempts so far.

I should also mention that:

Code:

var request = require("request");
var http = require("http");
var fs = require("fs");

var url = "http://www.globes.co.il/portal/quotes/unlinked-fixed-rate-government-bonds";

// Attempt 1: Ask 'request' for utf8
request({url:url, encoding:'utf8'}, function(error, response, html) {
    console.log(html);
});

// Attempt 2: Ask 'request' for null
request({url:url, encoding:null}, function(error, response, html) {
    console.log(html.toString('utf8'));
});

// Attempt 3: Avoid using 'request'
var options = {
    host: 'www.globes.co.il',
    path: '/portal/quotes/unlinked-fixed-rate-government-bonds'
};

http.get(options, function(res) {
    res.on("data", function(chunk) {
        console.log(chunk.toString('utf8'));
    });
});

Side by side: actual vs. expected:

Side by side actual vs expected

Upvotes: 0

Views: 174

Answers (1)

mscdex
mscdex

Reputation: 106726

It seems that despite the HTML having a reported charset of utf8, the HTTP response says the content charset is windows-1255.

Using iconv-lite to convert from the binary response to utf8 works (tested on Linux):

var request = require('request'),
    iconv = require('iconv-lite');

var url = 'http://www.globes.co.il/portal/quotes/unlinked-fixed-rate-government-bonds';

request({url:url, encoding:null}, function(error, response, html) {
  html = iconv.decode(html, 'win1255');
  console.log(html);
});

Upvotes: 1

Related Questions