profiler
profiler

Reputation: 627

Parse table from html using nodejs and cheerio

I have the problem with parsing html table to json.

Htlm table page:

  <div id="content">
    <h1>content-information</h1>
              <table class="testinformation">
        <thead>
            <tr>
                <th>hello</th>
                <th>test_text</th>
            </tr>
        </thead>
        <tbody>
            <tr>
                <td><a href="https://example.com">hello1</a></td>
                <td><a href="https://example.com/test_text">test_text</a></td>
            </tr>
            <tr>
                <td><a href="https://example.com">hello2</a></td>
                <td><a href="https://example.com/test_text2">test_text2</a></td>
            </tr>            
        </tbody>
    </table>
  </div>

Node js/cheerio script, it is not working properly:

  var cheerio = require('cheerio'),
cheerioTableparser = require('cheerio-tableparser');
const request = require('request');


request('https://correct-url.com', function (error, response, html) {
  if (!error) {
    const $ = cheerio.load(html)
    cheerioTableparser($);
    var data = $("testinformation").parsetable();
    console.log(data);
  }
})

But the response is empty.

Upvotes: 0

Views: 2024

Answers (1)

Shoyeb Memon
Shoyeb Memon

Reputation: 1159

I will give you an example based on my work on cheerio, it might be helpful to you

var cheerio = require('cheerio');
var request = require('request');

 function mainHtml(url, callback){
  request(url,function(error,response,html) {
    console.log(url);
    var $ =cheerio.load(html);

    $('#saleS').each(function(i,element){
        var data = $(this);
        var parsedHTML = data.html();
        callback(parsedHTML);
    });  
  });
 }

I have made a callback function which includes the main div of the data i needed to scrape. The mainHTML() function returns the 'HTML' which i will use in other functions to retrieve data from it.

 function cardDiv(parsedHTML, callback){
 var $ = cheerio.load(parsedHTML);
 $(' #resultBlockWrapper').each(function(i,element){
     var data = $(this);
     var parsedData = data.children().text();
     callback(parsedData);
 })  
}

In cardDiv() function i retrieved data from #saleS div's children div using mainHTML() funtion.

var express = require('express');
var app = express();
var router = express.Router();
var scraper = require('./scraper');

router.get('/scrape', function (req, res) {

https: url = "https://www.example.com";
 scraper.mainHtml(url, function(parsedHTML){
    scraper.cardDiv(parsedHTML,function(parsedData) {

      console.log(n + " " +parsedData);     
   })
 }); 

Above is the API code.kindly refer to cheerio for more examples.

Upvotes: 3

Related Questions