Howard
Howard

Reputation: 3758

Need some help scraping with CasperJS

This is what I currently have:

var casper = require('casper').create();

var fs = require('fs');
var folderName = 'CARD_DATA';
var fileName = 'championDecks.txt';
var save = fs.pathJoin(fs.workingDirectory, folderName, fileName);

// init jquery 
var casper = require('casper').create({
    clientScripts: ['jquery.min.js']
});

// parse URL
var parseUrl = 'http://magic.wizards.com/en/events/coverage/mtgochamp14';

// scrape
function getDeckData() {
    var meta = $('.deck-meta h4').text();
    var event = $('.deck-meta h5').text().trim();

    return [meta, event];
}

casper.start(parseUrl, function() {
    var data = this.evaluate(getDeckData);

    fs.write(save, data + '\n', 'w');
});

casper.run();

I'm trying to scrape http://magic.wizards.com/en/events/coverage/mtgochamp14 in a format that looks similar to this:

{
    "event": "2014 Magic Online Championship",
    "deckName": "(Vintage) Magnus Lantto's Pyromancer Control",
    "deck": [
        "1 Dack Fayden",
        "3 Snapcaster Mage",
        "4 Young Pyromancer",
        "3 Cabal Therapy",
        "1 Demonic Tutor",
        "4 Gitaxian Probe",
        "1 Ponder",
        "3 Preordain",
        "1 Time Walk",
        "1 Treasure Cruise",
        "1 Ancestral Recall",
        "1 Brainstorm",
        "3 Dig Through Time",
        "4 Force of Will",
        "3 Gush",
        "3 Lightning Bolt",
        "4 Mental Misstep",
        "1 Pyroblast",
        "1 Black Lotus",
        "1 Mox Jet",
        "1 Mox Ruby",
        "1 Mox Sapphire",
        "3 Flooded Strand",
        "1 Island",
        "4 Scalding Tarn",
        "1 Strip Mine",
        "2 Underground Sea",
        "3 Volcanic Island"
    ],
    "sideboard": [
        "1 Pyroblast",
        "1 Dread of Night",
        "1 Electrickery",
        "4 Grafdigger's Cage",
        "4 Ingot Chewer",
        "1 Mountain",
        "1 Null Rod",
        "2 Pulverize"
    ],
    "event": "2014 Magic Online Championship",
    "deckName": "(MODERN) MAGNUS LANTTO'S ELF COMPANY",
    "deck": [ ... ],
    "sideboard": [ ... ]

    ...and so on...
}

I can't seem to figure out how to get data individually. This is what I'm currently getting:

(Vintage) Magnus Lantto's Pyromancer Control(Modern) Magnus Lantto's Elf Company(Standard) Magnus Lantto's Atarka Devotion(Vintage) Jasper de Jong's Mentor Control(Modern) Jasper de Jong's Melira and Company(Standard) Jasper de Jong's Green-White Devotion(Vintage) Aleksa Telarov's Delver(Modern) Aleksa Telarov's Burn(Standard) Aleksa Telarov's Jund Megamorph(Vintage) Antonio Del Moral León's Omni-Oath(Modern) Antonio Del Moral León's Splinter Twin(Standard) Antonio Del Moral León's Abzan Midrange,2014 Magic Online Championship                        
                      2014 Magic Online Championship                        
                      2014 Magic Online Championship                        
                      2014 Magic Online Championship                        
                      2014 Magic Online Championship                        
                      2014 Magic Online Championship                        
                      2014 Magic Online Championship                        
                      2014 Magic Online Championship                        
                      2014 Magic Online Championship                        
                      2014 Magic Online Championship                        
                      2014 Magic Online Championship                        
                      2014 Magic Online Championship

Can anyone provide a little wisdom to send me in the right direction?

Upvotes: 0

Views: 141

Answers (1)

Sebastian Nette
Sebastian Nette

Reputation: 7832

Assuming there is an event for each deck name:

var meta = $('.deck-meta h4');
var event = $('.deck-meta h5');
var output = [];

for(var i = 0; i < meta.length; i++) {
    output.push({
        deckName: $(meta[i]).text(),
        event: $(event[i]).text().trim()
    });    
}

return output;

Upvotes: 1

Related Questions