Reputation: 4335
I want to parse HTML table values into a JSON object using CheerioJS.
I'm struggling to get to grips with the Cheerio API, but I have come up with an awful convoluted solution.
I am trying to parse this HTML
<table summary="Account summary" class="accounts-table">
<thead>
<tr>
<th>Accounts</th>
<th>Total value</th>
<th>Available</th>
<th>Actions</th>
</tr>
</thead>
<tfoot>
<tr>
<td>
Total
</td>
<td>
£TOTALAMOUNT
</td>
<td>
£CASH
</td>
<td></td>
</tr>
</tfoot>
<tbody>
<tr>
<td style="white-space: normal">
<a href="https://awebsitehere.co.uk/account_summary/account/22" title="View your Stocks ISA"
class="product-name">
Stocks ISA
</a>
</td>
<td>
<a href="https://awebsitehere.co.uk/account_summary/account/22" title="View your Stocks ISA">
ISA-VAL
</a>
</td>
<td>
<a href="https://awebsitehere.co.uk/amount_available/account/22"
title="View cash summary for your Stocks ISA">
ISA-CASH
</a>
</td>
<td>
<a href="https://awebsitehere.co.uk/topup/account/22" title="Top up your Stocks ISA"
class="top-up-button">
Top up
</a>
<a href="https://awebsitehere.co.uk/topup/account/22l" title="Place a deal in your Stocks ISA"
class="deal-button">
</a>
</td>
</tr>
<tr>
<td style="white-space: normal">
<a href="https://awebsitehere.co.uk/account_summary/account/26" title="View your Junior ISA"
class="product-name">
Junior ISA
</a>
</td>
<td>
<a href="https://awebsitehere.co.uk/my-accounts/account_summary/account/26"
title="View your Junior ISA">
JUNIOR-VAL
</a>
</td>
<td>
<a href="https://awebsitehere.co.uk/my-accounts/amount_available/account/26"
title="View cash summary for your Junior ISA">
JUNIOR-CASH
</a>
</td>
<td>
</td>
</tr>
<tr>
<td style="white-space: normal">
<a href="https://awebsitehere.co.uk/my-accounts/account_summary/account/98"
title="View your Stocks Account" class="product-name">
Stocks Account
</a>
</td>
<td>
<a href="https://awebsitehere.co.uk/my-accounts/account_summary/account/98"
title="View your Stocks Account">
STOCKS-VAL
</a>
</td>
<td>
<a href="https://awebsitehere.co.uk/my-accounts/amount_available/account/98"
title="View cash summary for your stocks Account">
STOCKS-CASH
</a>
</td>
<td>
<a href="https://awebsitehere.co.uk/my-accounts/stock_and_fund_search/account/98/action/deal"
title="Place a deal in your stocks Account" class="deal-button">
<span style="padding-right:8px;">Deal</span>
</a>
</td>
</tr>
</tbody>
</table>
Into a JSON object that looks like this:
{
"accounts": {
"Stocks ISA": {
"investments":
"total value": stocks-val
"cash": stocks-cash,
"link": "the href attached to this account"
},
"Junior ISA": {
"investments":
"total value": junior-val,
"cash": junior-cash,
"link": "the href attached to this account"
},
"stocks account": {
"investments":
"total value": stocks-val,
"cash": stocks-cash,
"link": "the href attached to this account"
}
}
}
This is the code that I've attempted so far, but I'm struggling to get my head round the HTML traversing.
const $ = cheerio.load(body)
$('table[class="accounts-table"] tbody tr').each(
function (i, element) {
//@ts-ignore
let children = $(this).children()
children.each(
function (i, elem) {
//@ts-ignore
let children = $(this).children().text().trim()
}
)
}
)
I would be very grateful to anyone who could point me in the right direction!
Upvotes: 0
Views: 432
Reputation: 30715
This ought to parse the html into the structure you're looking for:
const $ = cheerio.load(body)
function parseAccountRow(elem) {
let row = [];
let href = null;
$("td a", elem).each((n, link) => {
row.push($(link).text().trim());
href = href || link.attribs.href;
});
return { name: row[0], value: { "investments": "", "total value": (row[1]+"").toLowerCase(), cash: (row[2]+"").toLowerCase(), link: href } };
}
let parsedObj = {};
$('table[class="accounts-table"] tbody tr').each((i, elem) => {
let row = parseAccountRow(elem);
parsedObj[row.name] = row.value;
});
console.log("Result:", parsedObj);
I'm getting the result below:
{
"Stocks ISA": {
"investments": "",
"total value": "isa-val",
"cash": "isa-cash",
"link": "https://awebsitehere.co.uk/account_summary/account/22"
},
"Junior ISA": {
"investments": "",
"total value": "junior-val",
"cash": "junior-cash",
"link": "https://awebsitehere.co.uk/account_summary/account/26"
},
"Stocks Account": {
"investments": "",
"total value": "stocks-val",
"cash": "stocks-cash",
"link": "https://awebsitehere.co.uk/my-accounts/account_summary/account/98"
}
}
Upvotes: 1