Reputation: 627
I have the problem with parsing html table to json.
Htlm table page:
<div id="content">
<h1>content-information</h1>
<table class="testinformation">
<thead>
<tr>
<th>hello</th>
<th>test_text</th>
</tr>
</thead>
<tbody>
<tr>
<td><a href="https://example.com">hello1</a></td>
<td><a href="https://example.com/test_text">test_text</a></td>
</tr>
<tr>
<td><a href="https://example.com">hello2</a></td>
<td><a href="https://example.com/test_text2">test_text2</a></td>
</tr>
</tbody>
</table>
</div>
Node js/cheerio script, it is not working properly:
var cheerio = require('cheerio'),
cheerioTableparser = require('cheerio-tableparser');
const request = require('request');
request('https://correct-url.com', function (error, response, html) {
if (!error) {
const $ = cheerio.load(html)
cheerioTableparser($);
var data = $("testinformation").parsetable();
console.log(data);
}
})
But the response is empty.
Upvotes: 0
Views: 2024
Reputation: 1159
I will give you an example based on my work on cheerio, it might be helpful to you
var cheerio = require('cheerio');
var request = require('request');
function mainHtml(url, callback){
request(url,function(error,response,html) {
console.log(url);
var $ =cheerio.load(html);
$('#saleS').each(function(i,element){
var data = $(this);
var parsedHTML = data.html();
callback(parsedHTML);
});
});
}
I have made a callback function which includes the main div of the data i needed to scrape. The mainHTML() function returns the 'HTML' which i will use in other functions to retrieve data from it.
function cardDiv(parsedHTML, callback){
var $ = cheerio.load(parsedHTML);
$(' #resultBlockWrapper').each(function(i,element){
var data = $(this);
var parsedData = data.children().text();
callback(parsedData);
})
}
In cardDiv() function i retrieved data from #saleS div's children div using mainHTML() funtion.
var express = require('express');
var app = express();
var router = express.Router();
var scraper = require('./scraper');
router.get('/scrape', function (req, res) {
https: url = "https://www.example.com";
scraper.mainHtml(url, function(parsedHTML){
scraper.cardDiv(parsedHTML,function(parsedData) {
console.log(n + " " +parsedData);
})
});
Above is the API code.kindly refer to cheerio for more examples.
Upvotes: 3