Frederik 84
Frederik 84

Reputation: 61

How to get attribute from tricky elements with Cheerio?

Hi im trying to extract some info from a webpage and its a bit tricky. The elements that I need info from looks like this;

<div id="1449822" class="match_line score_row other_match e_true " data-
cntr="0" data-parent-competition="A-LEAGUE" data-note="Venue: Etihad Stadium. 
Turf: Natural. Capacity: 56,347. Distance: 1,667km. Sidelined Players: 
MELBOURNE VICTORY - AUSTIN MITCHELL, DENG THOMAS, NIGRO STEFAN (Injured). 
BRISBANE ROAR FC - BROWN COREY, DE VERE LUKE, O TOOLE CONNOR, THEO MICHAEL, 
CALETTI JOE, D AGOSTINO NICHOLAS (Injured)." data-competition-name="A-LEAGUE" 
data-league-type="LEAGUE" data-season="2017/2018" data-statustype="sched" 
data-ko="09:50" data-home-team="MELBOURNE VICTORY" data-away-team="BRISBANE 
ROAR FC" data-league-sort="11" data-correction="0" data-matchday="2018-02-09" 
data-game-status="Sched" data-league-code="41256" data-league-name="A-LEAGUE" 
data-country-name="AUSTRALIA" data-league-round="20" data-league-short="AL" 
data-home-id="28529" data-away-id="28531" data-ftr="false">

Im especially interested whats inside:

 data-season= 
 data-note=
 data-league-name=
 data-country-name=
 data-home-team=
 data-Away-team=

But im unsure how to get this info here is what I have tried

var http = require('http');
var request = require('request');
var cheerio = require('cheerio');

http.createServer(function (req, res) {
  request('http://www.xscores.com/soccer', function (error, response, html) {
    if (!error && response.statusCode == 200) {
      var $ = cheerio.load(html);
      var list_items = "";

      $('div.match_line.score_row.other_match.e_true').each(function (i, element) {
        var a = $(this).text();
        list_items += "<li>" + a + "</li>";
      });

      var html = "<ul>" + list_items + "</ul>"
      res.writeHead(200, {
        'Content-Type': 'text/html'
      });
      res.end(html);
    }
  });
}).listen(8080);
console.log('Server is running at http://178.62.253.206:8080/');

However it looks like the above code does not fetch what is inside this element but rather it fetches info from all div elements that is under this , Here is what my code returned: http://178.62.253.206:8080/

10:50 SCH SHOW GAMES FROM AUSTRALIA AL MELBOURNE VICTORY 5 Â  Â  BRISBANE 
ROAR FC 7 Â  Â  Match Details

Any help with this matter would be much appreciated

frederik

Upvotes: 1

Views: 2932

Answers (1)

ckgupta
ckgupta

Reputation: 41

const cheerio = require('cheerio')
const divElement = `your_div_element`;
const $ = cheerio.load(divElement);

$(divElement).map((index, element) => {
    const attributes = element.attribs;
    Object.keys(attributes).map(key => {
        console.log(key, ': ', attributes[key]);
    })
});

I have created this simple script which will help you get all the attribute names and their values, you can filter out the one that you are interested in.

Upvotes: 1

Related Questions