Reputation: 43
There is a page i am trying to scrape using nodejs. I tried the same using python and i got success . But when i make the same http get request using node js request, i get a gibberish chunk as response . I am new to node js and i have no idea what went wrong .
Here is my code :
var request = require('request');
var options = {
url: 'myurlhere',
headers: {'User-Agent':' Secure_User',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language':' en-US,en;q=0.5',
'Accept-Encoding':' gzip, deflate'
}
};
function callback(error, response, body) {
if (!error && response.statusCode == 200) {
// var info = JSON.parse(body);
// console.log(info.stargazers_count + " Stars");
console.log(body);
}
}
request(options, callback);
Same code in python works :
import requests
headers= {'User-Agent':' Secure_User',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language':' en-US,en;q=0.5',
'Accept-Encoding':' gzip, deflate'
}
url1="myurl"
r1=requests.get(url=url1)
print r1.text
In python i get the exact html as response.In node js i get something like :
_��������f�,�%vL�=d��A�
Ђg���[�!�q)Ӆhr��+2�&�������)Jh��d���(�r��<-30>9a5@�{�F��7��O��Brg��E~BaU�S�V��B�SB!��ѡz�ո��a1��4�'W��ls'���z�$��z�1J7.���]�!��}Ρ�(�)�*wz�NB��p���&�C:��G,TM1�����_\�\2�s�
~���
-�P�T4�6A��+���+���@EU�T-��s���.�� K�:��z"�i-����z��S $�-4��[y��{|�Q
F��ٝ'
Њ�s
%��xx��]H�!��H({�H�,��^o0�S���?�����N�:f4qք�j��4��5V�yZ���G Xg�T���߬�X��e����W�B
5V{�jOي���A��[�*�<埸N�i<�p���]#�������,��2�@�!�8 Bۋs*
�EVM�����3��Qu�(T�PL�iӮՈ�`�x36*J������b�Du��=���%֣V/��u�**��b^��<O��AY�Nis�1�j�3_���֔d�����g���'0�^<{Є��ς?r�-� ��[�,ۆ�JPzI�C���r+�֑��e$���'KMJIK�y+��0�Ep�B�[����*���>��﮻�V3%��'%��ŏ"�S:ϥ*�`��4��S!�-ʳdj�J#����*8�}Jw����J�~]][} i����6�
���`:�խ���_Z� P�;�?wN
���/��~�H� �}s���/-TQ�>���O�0��~�>��بS�˾��M�pf�mF�]��\���jfr���BL㏾����#�h8?�L ���^�
���`�\ ��i��k&7�sd���x�tp�g����%��Z=�����UͰ�|"��y���'�x�,n��|H������%z���Qa�� lX��S���_���6G�;��kj5�~`�U�KY-�Q�
\j 0<�Ad�Ӻ��Cc��G�[umO�qk����$<����V])�'S��r؋D��L��t��4��g���
^=v;+�4q����(�)l)��7�+����Z����d���ьK�R8�rdB��y�.�?c7��T]�����И/��4���8�
M`��h�l�� ���
<Ɖ��V����LG���C���-��]�$�D�D��O���.殡�6�2�� ��
��ENYF��[Ѩ��?���=.���7&O��_܊?F��E�v�@у�S�����X5�O�d{�?�����z�U�����{���\� �O*��h�a�Ղ�r9ǒ:�S ����Ie�I�c�<ޓ�\�'��W��]���P��˒جc
~�Ǣci�-q�����+�K����w9P�(�&����Z���'L
����
KO9�����"�)-)���C@�GF���C�)���n�a�f�6�gR'��h��f��j��ݜ�N����t���k��l�i]�n�n.>�Y�g(�D�&Q L�A�K��v��-�}Ë��$���esf�s�m�ϝf���b%�z�#&�aS�5Ɩ,M�*�m�\,X�ʂ5�+�1���3��|+�p*�F�OlD^ۣ�V��<d�OTw�߬T����U����2a�D!G�̓8���p6��=�n���݆���&g������ma |y�`�80m�u,�t<�s��(&˸��Q��e��Tq6ey8��'��L'�����C�ׄȂ�0�;
�@,�kD�
Upvotes: 3
Views: 2022
Reputation: 106696
The response is most likely gzipped, since you've indicated to the server that you are willing to accept such a response.
However, request
does not automatically decompress these responses by default. You can verify that this is what's happening by checking the output of console.dir(response.headers)
. If this is the case, you can set gzip: true
in your request options to have body
contain the decompressed response instead.
Upvotes: 2