mravey
mravey

Reputation: 4510

JSON.parse not working with json from twitter streaming api

I'm using node.js to use the twitter streaming api. Everything works fine except when I try to parse the json I get back. Here is an example of what I try to parse :

{
    "text": "NEWS Nº2559 (use google translator to read it): http://t.co/dF3ClUC",
    "in_reply_to_user_id": null,
    "in_reply_to_status_id": null,
    "favorited": false,
    "in_reply_to_status_id_str": null,
    "id_str": "93748566299918337",
    "in_reply_to_screen_name": null,
    "in_reply_to_user_id_str": null,
    "geo": null,
    "source": "web",
    "contributors": null,
    "retweeted": false,
    "retweet_count": 0,
    "entities": {
        "user_mentions": [],
        "hashtags": [],
        "urls": [
            {
                "display_url": "luxatenealibros.blogspot.com/2011/07/lux-at…",
                "indices": [
                    48,
                    67
                ],
                "expanded_url": "http://luxatenealibros.blogspot.com/2011/07/lux-atenea-news-n2559-cinderella-fables.html",
                "url": "http://t.co/dF3ClUC"
            }
        ]
    },
    "place": null,
    "coordinates": null,
    "user": {
        "favourites_count": 0,
        "profile_sidebar_fill_color": "efefef",
        "profile_image_url": "http://a0.twimg.com/profile_images/983835547/logo_LUX_ATENEA_WEBZINE_normal.JPG",
        "default_profile_image": false,
        "show_all_inline_media": false,
        "geo_enabled": false,
        "profile_background_tile": true,
        "screen_name": "LUXATENEAWEBZIN",
        "id_str": "112305851",
        "profile_link_color": "009999",
        "url": null,
        "description": "LUX ATENEA WEBZINE\u000d\u000aREVISTA CULTURAL GÓTICA ATIS&NYD\u000d\u000a",
        "follow_request_sent": null,
        "statuses_count": 3027,
        "verified": false,
        "profile_sidebar_border_color": "eeeeee",
        "time_zone": null,
        "contributors_enabled": false,
        "profile_use_background_image": true,
        "location": "",
        "is_translator": false,
        "lang": "es",
        "profile_background_image_url_https": "https://si0.twimg.com/images/themes/theme14/bg.gif",
        "profile_background_color": "131516",
        "protected": false,
        "listed_count": 2,
        "profile_background_image_url": "http://a1.twimg.com/images/themes/theme14/bg.gif",
        "friends_count": 3,
        "followers_count": 55,
        "name": "LUX ATENEA WEBZINE",
        "notifications": null,
        "created_at": "Mon Feb 08 00:53:45 +0000 2010",
        "id": 112305851,
        "default_profile": false,
        "following": null,
        "utc_offset": null,
        "profile_text_color": "333333",
        "profile_image_url_https": "https://si0.twimg.com/profile_images/983835547/logo_LUX_ATENEA_WEBZINE_normal.JPG"
    },
    "truncated": false,
    "id": 93748566299918340,
    "created_at": "Wed Jul 20 18:26:14 +0000 2011"
}

jsonlint.com tells me that it is valid json but it's impossible to parse it from node.js. Any idea why ?

Upvotes: 0

Views: 1413

Answers (2)

mravey
mravey

Reputation: 4510

I have found the problem, it comes from the user.description part and the characters \u000d & \u000a. Here is how I did to make it work :

var test = '{"text": "NEWS Nº2559 (use google translator to read it): http://t.co/dF3ClUC","in_reply_to_user_id": null,"in_reply_to_status_id": null,"favorited": false,"in_reply_to_status_id_str": null,"id_str": "93748566299918337","in_reply_to_screen_name": null,"in_reply_to_user_id_str": null,"geo": null,"source": "web","contributors": null,"retweeted": false,"retweet_count": 0,"entities": {"user_mentions": [],"hashtags": [],"urls": [{"display_url": "luxatenealibros.blogspot.com/2011/07/lux-at…","indices": [48,67],"expanded_url": "http://luxatenealibros.blogspot.com/2011/07/lux-atenea-news-n2559-cinderella-fables.html","url": "http://t.co/dF3ClUC"}]},"place": null,"coordinates": null,"user": {"favourites_count": 0,"profile_sidebar_fill_color": "efefef","profile_image_url": "http://a0.twimg.com/profile_images/983835547/logo_LUX_ATENEA_WEBZINE_normal.JPG","default_profile_image": false,"show_all_inline_media": false,"geo_enabled": false,"profile_background_tile": true,"screen_name": "LUXATENEAWEBZIN","id_str": "112305851","profile_link_color": "009999","url": null,"description": "LUX ATENEA WEBZINE\u000d\u000aREVISTA CULTURAL GÓTICA ATIS&NYD\u000d\u000a","follow_request_sent": null,"statuses_count": 3027,"verified": false,"profile_sidebar_border_color": "eeeeee","time_zone": null,"contributors_enabled": false,"profile_use_background_image": true,"location": "","is_translator": false,"lang": "es","profile_background_image_url_https": "https://si0.twimg.com/images/themes/theme14/bg.gif","profile_background_color": "131516","protected": false,"listed_count": 2,"profile_background_image_url": "http://a1.twimg.com/images/themes/theme14/bg.gif","friends_count": 3,"followers_count": 55,"name": "LUX ATENEA WEBZINE","notifications": null,"created_at": "Mon Feb 08 00:53:45 +0000 2010","id": 112305851,"default_profile": false,"following": null,"utc_offset": null,"profile_text_color": "333333","profile_image_url_https": "https://si0.twimg.com/profile_images/983835547/logo_LUX_ATENEA_WEBZINE_normal.JPG"},"truncated": false,"id": 93748566299918340,"created_at": "Wed Jul 20 18:26:14 +0000 2011"}';

test = test.replace(/\n/g, '');
test = test.replace(/\r/g, '');

console.log(JSON.parse(test));

Upvotes: 0

Mike Samuel
Mike Samuel

Reputation: 120546

I noticed that

"id_str": "93748566299918337",

and

"id":      93748566299918340,

seem to be two different representations of the same data, but the number form seems to have lost some precision.

Is it possible that the JSON number parser is detecting a loss of precision due to the ID number literal being right up against the limit of the mantissa and bails on that?

JSON doesn't actually specify any semantics for numbers, and doesn't specify how lossy number parsers can be, but implementations might bail on numbers they can't represent.

E.g. only a JSON parser that can use a good bigint/bigdecimal representation, like python's, will be able to do something reasonable with { "foo": 1e500 } whereas a JavaScript JS parser (that represents numbers using its native number type) would probably turn that number into Infinity which is not round-trippable via JSON.

Section 4 of RFC 4627 says

4 Parsers

... An implementation may set limits on the range of numbers.

EDIT:

The other clue I notice is in

"text": "NEWS Nº2559 ...",
               ^

which contains a non-ASCII character. If you're using Node.js and you're opening a file without specifying the correct encoding, the JSON parser might be assuming UTF-8 since RFC 4627 says

3 Encoding

JSON text SHALL be encoded in Unicode. The default encoding is UTF-8.

and if your file is not UTF-8 then that might lead to a byte sequence that is not valid in UTF-8 which would have to be rejected by the decoder.

Upvotes: 4

Related Questions