Nick
Nick

Reputation: 1413

Sanitizing a string into JSON

I'm web scraping and the website on get request returns an string like this:

jQuery18305426675335038453_1429531451051({"d":[{"__metadata":"cool"}]})

The whole code is here:

var baseUrl = "http://SOMEURL.COM?spatialFilter=nearby(52.47952651977539,-1.911009430885315,400)&$select=*&$top=200&$format=json&key=AjF8l9J6TH-WM5tkfFYdYE8NVUx9SFe4ya9aBaxKFFPBImvFWWHPOsRMSBesWblU&jsonp=jQuery18305426675335038453_1429531451051&_=1429532300821%20HTTP/1.1";

var casper = require('casper').create({
    verbose: false,
    logLevel: 'debug',
    pageSettings: {
        loadImages:  false,
        loadPlugins: false
        }
});


var fs = require('fs'),
    shopInfo,
    savePath, 
    date = new Date(),
    secondsNow = date.getSeconds(),
    day = date.getDate(),
    minute = date.getMinutes();
    month = date.getMonth() + 1, 
    fname = 'virginmedia-'+month+'-'+day+'-'+minute+'-'+secondsNow+'.txt';

function saveToFile(finalData) {
    savePath = fs.pathJoin(fs.workingDirectory,
     'output',fname);
    fs.write(savePath, finalData, 'w');

}


casper.start(baseUrl, {
            method: 'get',
            headers: {
            'Accept': 'application/json'
                    }});
casper.then(function getData(){


    var rawData = this.getPageContent();

    shopInfo = rawData;
        shopInfo = shopInfo.replace("jQuery18305426675335038453_1429531451051(",'');
        shopInfo = shopInfo.replace(/\)$/,'');
        shopInfo = JSON.parse(shopInfo);
    var resultPack = shopInfo.d.results;

    var finalData = resultPack.map(function(val){
    return [
              val.Latitude,
              val.Longitude,
              val.EntityStoreName
           ];
    });

    saveToFile(JSON.stringify(finalData)); 
    casper.echo("\n Hello! I just returned " + finalData.length
    + " shops");

});
casper.run();

In other words valid json inside a function call!. But I need the JSON part.

Inside browser I could easily makeup a function with the same name that returns its own parameters:

function jQuery18305426675335038453_1429531451051() {
  return arguments[0];

}

But in casperjs it just doesn't work. So my last option was using regex to get out the JSON string:

shopInfo = shopInfo.replace("jQuery18305426675335038453_1429531451051(",'');
shopInfo = shopInfo.replace(/\)$/,'');

Is there any better way to do it?

Edit 1 : From comments I found out that it's actually JSONP, and not JSON and life got easy! I found my answer from here after searching for JSONP instead.

Edit 2 : Another solution found in the comments: by changing the request, the website returns proper JSON on its own!

Upvotes: 0

Views: 1036

Answers (1)

Nick
Nick

Reputation: 1413

After reading the comments, here's the answer:

  1. That format is called JSONP, or JSON with padding. It's discussed in SO in here

  2. There's actually no need to go that way, the HTTP request can be changed so that it returns true JSON data. Just remove this part from the request: jsonp=jQuery18305426675335038453_1429531451051&_=1429532300821%20HTTP/1.1

Upvotes: 1

Related Questions