Reputation:
In the book Node.js The Right Way there is an example of getting an RDF file parsed. I cannot get this example working as described in the book.
The RDF parser is this:
"use strict";
const fs = require("fs");
const cheerio = require("cheerio");
module.exports = function(filename, callback) {
fs.readFile(filename, function(err, data) {
if (err) {
return callback(err);
}
let $ = cheerio.load(data.toString());
let collect = function(index, elem) {
return $(elem).text();
};
callback(null, {
_id: $('pgterms\\:ebook').attr('rdf:about').replace('ebooks/', ''),
title: $('dcterms\\:title').text(),
authors: $('pgterms\\:agent pgterms\\:name').map(collect),
subjects: $('[rdf\\:resource$="/LCSH"] ~ rdf\\:value').map(collect)
});
});
};
The input RDF file is this:
<rdf:RDF>
<pgterms:ebook rdf:about="ebooks/132">
<dcterms:subject>
<rdf:Description>
<dcam:memberOf rdf:resource="http://purl.org/dc/terms/LCSH"/>
<rdf:value>Military art and science -- Early works to 1800</rdf:value>
<rdf:value>War -- Early works to 1800</rdf:value>
</rdf:Description>
</dcterms:subject>
<dcterms:title>The Art of War</dcterms:title>
</pgterms:ebook>
<pgterms:agent rdf:about="2009/agents/4349">
<pgterms:name>Sunzi (6th cent. BC)</pgterms:name>
</pgterms:agent>
<pgterms:agent rdf:about="2009/agents/5101">
<pgterms:name>Giles, Lionel</pgterms:name>
</pgterms:agent>
</rdf:RDF>
And I believe the output of the parser should look like this:
{
"_id": "132",
"title": "The Art of War",
"authors": [
"Sunzi (6th cent. BC)",
"Giles, Lionel"
],
"subjects": [
"Military art and science -- Early works to 1800",
"War -- Early works to 1800"
]
}
The code does run and parse the RDF file, but has a lot of extra "stuff" I'm not familiar with. I cannot tell whether I'm not using the example code right or whether there is a bug in the book's source code.
The command I use to run the parser is this:
node -e 'require("./lib/rdf-parser.js")("test/pg132.rdf", console.log)'
And I get the following output instead of the expected output above:
null { _id: '132',
title: 'The Art of War',
authors:
{ '0': 'Sunzi (6th cent. BC)',
'1': 'Giles, Lionel',
options:
{ withDomLvl1: true,
normalizeWhitespace: false,
xmlMode: false,
decodeEntities: true },
_root: { '0': [Object], options: [Object], length: 1, _root: [Circular] },
length: 2,
prevObject:
{ '0': [Object],
'1': [Object],
options: [Object],
_root: [Object],
length: 2,
prevObject: [Object] } },
subjects:
{ options:
{ withDomLvl1: true,
normalizeWhitespace: false,
xmlMode: false,
decodeEntities: true },
_root: { '0': [Object], options: [Object], length: 1, _root: [Circular] },
length: 0,
prevObject:
{ options: [Object],
_root: [Object],
length: 0,
prevObject: [Object] } } }
What is the problem?
Upvotes: 0
Views: 499
Reputation: 11
The new code for rdf-parser.js's last two lines should read:
authors: $('pgterms\\:agent pgterms\\:name').map(collect).get(),
subjects : $('[rdf\\:resource$="/LCSH"]').siblings('rdf\\:value').map(collect).toArray()
Also, as per the author at https://forums.pragprog.com/forums/301/topics/12439, the new example JSON file should read (note the change in the subjects section):
{
"id": "132",
"title": "",
"authors": [
"Sunzi, active 6th century B.C.",
"Giles, Lionel"
],
"subjects": [
"Military art and science -- Early works to 1800"
]
}
Upvotes: 0
Reputation: 143
I just debugged it and i found for the authors that you need to call .get() .map(collect).get()
And for the subject you need to call siblings instead of ~ .siblings('rdf\\:value').map(collect).get()
I hope this helps
Cheers Ian
Upvotes: 1
Reputation: 1209
Cheerio has been updated since the book has been released. It is probably easiest is you use an older version of cheerio.
npm install [email protected]
Upvotes: 1