Reputation: 13
I am running the below code to scrape data. However, the code only scrapes the first element.
const cheerio = require('cheerio')
const jsonframe = require('jsonframe-cheerio')
const got = require('got');
async function scrapeCoinmarketCap() {
const url = 'https://coinmarketcap.com/all/views/all/'
const html = await got(url)
const $ = cheerio.load(html.body)
jsonframe($) // initializing the plugin
let frame = {
"Coin": "td.no-wrap.currency-name > a",
"url": "td.no-wrap.currency-name > a @ href",
"Symbol": "td.text-left.col-symbol",
"Price": "td:nth-child(5) > a",
}
console.log($('body').scrape(frame, {
string: true
}))
}
scrapeCoinmarketCap()
//Output -> only the first element
// {
// "Coin": "Bitcoin",
// "url": "/currencies/bitcoin/",
// "Symbol": "BTC",
// "Price": "$6122.67"
// }
Any suggestions what I am doing wrong?
Upvotes: 0
Views: 768
Reputation: 57214
got
and jsonframe-cheerio
are deprecated. Avoid them.
Here's how to do this with standard fetch and cheerio code (fetch is native in Node 18+, and cheerio has 7,870,278 weekly downloads at the current time, so it's 655,856 times more popular than jsonframe-cheerio's 12 weekly downloads).
const cheerio = require("cheerio"); // ^1.0.0-rc.12
const url = "<Your URL>";
fetch(url)
.then(res => {
if (!res.ok) {
throw Error(res.statusText);
}
return res.text();
})
.then(html => {
const $ = cheerio.load(html);
const data = [...$("table:last").find("tr:has(td)")].map(e => ({
coin: $(e).find("td:nth-child(2)").text(),
url: $(e).find("td:nth-child(2) a").attr("href"),
symbol: $(e).find("td:nth-child(3)").text(),
price: $(e).find("td:nth-child(5)").text(),
}));
console.log(data.slice(0, 20));
})
.catch(err => console.error(err));
Output:
[
{
coin: 'BTCBitcoin',
url: '/currencies/bitcoin/',
symbol: 'BTC',
price: '$70,271.61'
},
{
coin: 'ETHEthereum',
url: '/currencies/ethereum/',
symbol: 'ETH',
price: '$3,606.56'
},
{
// ...
Note that at the time of writing, prices are loaded asynchronously beyond the first 20 or so, so if you want those details, consider using a browser automation library like Puppeteer, or look into making API calls.
Upvotes: 0
Reputation: 5941
You can get all currency data with the List / Array pattern:
let frame = {
currency: {
_s: "tr",
_d: [{
"Coin": "td.no-wrap.currency-name > a",
"url": "td.no-wrap.currency-name > a @ href",
"Symbol": "td.text-left.col-symbol",
"Price": "td:nth-child(5) > a"
}]
}
}
console.log($('body').scrape(frame, {
string: true
}))
Upvotes: 1