Reputation: 4510
I'm using node.js to use the twitter streaming api. Everything works fine except when I try to parse the json I get back. Here is an example of what I try to parse :
{
"text": "NEWS Nº2559 (use google translator to read it): http://t.co/dF3ClUC",
"in_reply_to_user_id": null,
"in_reply_to_status_id": null,
"favorited": false,
"in_reply_to_status_id_str": null,
"id_str": "93748566299918337",
"in_reply_to_screen_name": null,
"in_reply_to_user_id_str": null,
"geo": null,
"source": "web",
"contributors": null,
"retweeted": false,
"retweet_count": 0,
"entities": {
"user_mentions": [],
"hashtags": [],
"urls": [
{
"display_url": "luxatenealibros.blogspot.com/2011/07/lux-at…",
"indices": [
48,
67
],
"expanded_url": "http://luxatenealibros.blogspot.com/2011/07/lux-atenea-news-n2559-cinderella-fables.html",
"url": "http://t.co/dF3ClUC"
}
]
},
"place": null,
"coordinates": null,
"user": {
"favourites_count": 0,
"profile_sidebar_fill_color": "efefef",
"profile_image_url": "http://a0.twimg.com/profile_images/983835547/logo_LUX_ATENEA_WEBZINE_normal.JPG",
"default_profile_image": false,
"show_all_inline_media": false,
"geo_enabled": false,
"profile_background_tile": true,
"screen_name": "LUXATENEAWEBZIN",
"id_str": "112305851",
"profile_link_color": "009999",
"url": null,
"description": "LUX ATENEA WEBZINE\u000d\u000aREVISTA CULTURAL GÓTICA ATIS&NYD\u000d\u000a",
"follow_request_sent": null,
"statuses_count": 3027,
"verified": false,
"profile_sidebar_border_color": "eeeeee",
"time_zone": null,
"contributors_enabled": false,
"profile_use_background_image": true,
"location": "",
"is_translator": false,
"lang": "es",
"profile_background_image_url_https": "https://si0.twimg.com/images/themes/theme14/bg.gif",
"profile_background_color": "131516",
"protected": false,
"listed_count": 2,
"profile_background_image_url": "http://a1.twimg.com/images/themes/theme14/bg.gif",
"friends_count": 3,
"followers_count": 55,
"name": "LUX ATENEA WEBZINE",
"notifications": null,
"created_at": "Mon Feb 08 00:53:45 +0000 2010",
"id": 112305851,
"default_profile": false,
"following": null,
"utc_offset": null,
"profile_text_color": "333333",
"profile_image_url_https": "https://si0.twimg.com/profile_images/983835547/logo_LUX_ATENEA_WEBZINE_normal.JPG"
},
"truncated": false,
"id": 93748566299918340,
"created_at": "Wed Jul 20 18:26:14 +0000 2011"
}
jsonlint.com tells me that it is valid json but it's impossible to parse it from node.js. Any idea why ?
Upvotes: 0
Views: 1413
Reputation: 4510
I have found the problem, it comes from the user.description part and the characters \u000d & \u000a. Here is how I did to make it work :
var test = '{"text": "NEWS Nº2559 (use google translator to read it): http://t.co/dF3ClUC","in_reply_to_user_id": null,"in_reply_to_status_id": null,"favorited": false,"in_reply_to_status_id_str": null,"id_str": "93748566299918337","in_reply_to_screen_name": null,"in_reply_to_user_id_str": null,"geo": null,"source": "web","contributors": null,"retweeted": false,"retweet_count": 0,"entities": {"user_mentions": [],"hashtags": [],"urls": [{"display_url": "luxatenealibros.blogspot.com/2011/07/lux-at…","indices": [48,67],"expanded_url": "http://luxatenealibros.blogspot.com/2011/07/lux-atenea-news-n2559-cinderella-fables.html","url": "http://t.co/dF3ClUC"}]},"place": null,"coordinates": null,"user": {"favourites_count": 0,"profile_sidebar_fill_color": "efefef","profile_image_url": "http://a0.twimg.com/profile_images/983835547/logo_LUX_ATENEA_WEBZINE_normal.JPG","default_profile_image": false,"show_all_inline_media": false,"geo_enabled": false,"profile_background_tile": true,"screen_name": "LUXATENEAWEBZIN","id_str": "112305851","profile_link_color": "009999","url": null,"description": "LUX ATENEA WEBZINE\u000d\u000aREVISTA CULTURAL GÓTICA ATIS&NYD\u000d\u000a","follow_request_sent": null,"statuses_count": 3027,"verified": false,"profile_sidebar_border_color": "eeeeee","time_zone": null,"contributors_enabled": false,"profile_use_background_image": true,"location": "","is_translator": false,"lang": "es","profile_background_image_url_https": "https://si0.twimg.com/images/themes/theme14/bg.gif","profile_background_color": "131516","protected": false,"listed_count": 2,"profile_background_image_url": "http://a1.twimg.com/images/themes/theme14/bg.gif","friends_count": 3,"followers_count": 55,"name": "LUX ATENEA WEBZINE","notifications": null,"created_at": "Mon Feb 08 00:53:45 +0000 2010","id": 112305851,"default_profile": false,"following": null,"utc_offset": null,"profile_text_color": "333333","profile_image_url_https": "https://si0.twimg.com/profile_images/983835547/logo_LUX_ATENEA_WEBZINE_normal.JPG"},"truncated": false,"id": 93748566299918340,"created_at": "Wed Jul 20 18:26:14 +0000 2011"}';
test = test.replace(/\n/g, '');
test = test.replace(/\r/g, '');
console.log(JSON.parse(test));
Upvotes: 0
Reputation: 120546
I noticed that
"id_str": "93748566299918337",
and
"id": 93748566299918340,
seem to be two different representations of the same data, but the number form seems to have lost some precision.
Is it possible that the JSON number parser is detecting a loss of precision due to the ID number literal being right up against the limit of the mantissa and bails on that?
JSON doesn't actually specify any semantics for numbers, and doesn't specify how lossy number parsers can be, but implementations might bail on numbers they can't represent.
E.g. only a JSON parser that can use a good bigint/bigdecimal representation, like python's, will be able to do something reasonable with { "foo": 1e500 }
whereas a JavaScript JS parser (that represents numbers using its native number type) would probably turn that number into Infinity
which is not round-trippable via JSON.
Section 4 of RFC 4627 says
4 Parsers
... An implementation may set limits on the range of numbers.
EDIT:
The other clue I notice is in
"text": "NEWS Nº2559 ...",
^
which contains a non-ASCII character. If you're using Node.js and you're opening a file without specifying the correct encoding, the JSON parser might be assuming UTF-8 since RFC 4627 says
3 Encoding
JSON text SHALL be encoded in Unicode. The default encoding is UTF-8.
and if your file is not UTF-8 then that might lead to a byte sequence that is not valid in UTF-8 which would have to be rejected by the decoder.
Upvotes: 4