Reputation: 2996
I read html page encoded win1251. But I cant render it, because it shows me bad encoded symbols. with utf8 this code works fine. How can I read and show not utf8? Thanks
var charset = require('charset');
var iconv = require('iconv-lite');
var router = express.Router();
// accept POST request on the homepage
router.post('/', function (req, res) {
request(req.body.url, function (error, response, body) {
var result = [];
if (error || response.statusCode != 200) {
console.log(error);
} else {
console.log(charset(response.headers, body));
var enc = charset(response.headers, body);
if (enc != 'utf-8') {
body = iconv.decode(body, 'win1251');
console.log(body);
}
var $ = cheerio.load(body);
//get title
result.push("Title-> " + $("title").text());
Upvotes: 0
Views: 1981
Reputation: 2996
If someone gets this problem too. I did instead of (which is good too)
body = body.toString('utf8');
My code
var iconv = require('iconv-lite');
.......................
body = iconv.decode(body, 'win1251');
Upvotes: 0
Reputation: 106736
If you set encoding: null
in your request()
options, body
will be a Buffer
instead of a UTF-8 string. This will allow you to correctly convert the encoding to UTF-8.
Example:
request({url: req.body.url, encoding: null}, function (error, response, body) {
If the encoding of the body ends up being UTF-8, you can simply just do:
body = body.toString('utf8');
Upvotes: 2