Reputation: 998
I have been trying to parse image URLs from various news RSS URLs like
https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml
I am using rss-parser library for this purpose but the documentation and examples have every other item other than media content and images. Is there a proper way for parsing images from all kind of rss urls of news channels.? My attempt is given below.
const Parser = require('rss-parser');
const parser = new Parser({
customFields: {
feed: ['author'],
item: ['categories', 'author'],
}
});
rssUrlParser: async (url,url_id="",department_id="") => {
try {
const result = await parser.parseURL(url);
const feed = {
"title": result.title || "",
"link": result.link || "",
"author": result.author || "",
"description": result.description || "",
"image": (result.image && result.url) || "",
"url_id": url_id,
"department_id":department_id
};
const items = result.items.length ? result.items.map(i => {
i.author = i.author || "";
i.image = i.enclosure ? i.enclosure.url : "";
i.categories = i.categories || [];
i.enclosure = i.enclosure || {};
return i;
}) : [];
return {
feed,
items
};
} catch (e) {
return false;
}
Upvotes: 1
Views: 138
Reputation: 7267
You can try this. so.xml
is just the xml content i got from your url above.
const fs = require('fs')
const { transform } = require('camaro')
;(async function () {
const xml = fs.readFileSync('so.xml', 'utf-8')
const template = {
images: ['//media:content[@medium="image"]', '@url']
}
const result = await transform(xml, template)
console.log(JSON.stringify(result, null, 4));
})()
output
{
"images": [
"https://static01.nyt.com/images/2019/09/16/world/16yemen2-promo/16yemen2-promo-moth-v2.jpg",
"https://static01.nyt.com/images/2019/09/16/us/politics/16dc-prexy/16dc-prexy-moth.jpg",
"https://static01.nyt.com/images/2019/09/16/business/16UAW1/16UAW1-moth-v2.jpg",
"https://static01.nyt.com/images/2019/06/19/science/19xp-hair/19xp-hair-moth.jpg",
"https://static01.nyt.com/images/2019/09/16/us/politics/00Dems-Age-promo/00Dems-Age-01-moth.jpg",
"https://static01.nyt.com/images/2019/09/16/us/politics/16dc-trump1/16dc-trump1-moth.jpg",
"https://static01.nyt.com/images/2019/09/16/us/politics/16warren-rally-new1/16warren-rally-new1-moth.jpg",
"https://static01.nyt.com/images/2019/09/16/us/politics/16dc-coons1-promo/16dc-coons1-promo-moth-v2.jpg",
"https://static01.nyt.com/images/2019/09/17/pageoneplus/17readers-kavanaugh/17readers-kavanaugh-moth.jpg",
"https://static01.nyt.com/images/2019/09/16/us/politics/15dc-marja1/00dc-marjah1-moth.jpg",
"https://static01.nyt.com/images/2019/09/17/world/17usbriefing_amcore/17usbriefing_israel_elections-moth.jpg",
"https://static01.nyt.com/images/2019/09/15/nyregion/17nytoday-1/13giuliani-sub1-moth.jpg",
"https://static01.nyt.com/images/2019/09/09/us/politics/16daily1/16daily1-moth-v2.jpg",
"https://static01.nyt.com/images/2019/09/16/business/16db-newsletter-saudi/merlin_160850271_c07da32b-53d5-4cec-9c29-1887c4f262c8-moth.jpg",
"https://static01.nyt.com/images/2019/09/16/opinion/16Landau1/merlin_160480368_10a849bf-724c-4da6-a5d4-0a3d84d9e5ac-moth.jpg",
"https://static01.nyt.com/images/2019/09/17/opinion/17shehadeh3/17shehadeh3-moth.jpg",
"https://static01.nyt.com/images/2019/09/16/opinion/16saudiarabia/16saudiarabia-moth.jpg",
"https://static01.nyt.com/images/2019/09/16/opinion/16warzel-illo/16warzel-illo-moth.jpg",
"https://static01.nyt.com/images/2019/09/16/opinion/16krugmanWeb/16krugmanWeb-moth.jpg",
"https://static01.nyt.com/images/2019/09/16/opinion/16Bacevich1/16Bacevich1-moth-v2.jpg",
"https://static01.nyt.com/images/2019/09/16/opinion/16rallyWeb/16rallyWeb-moth.jpg",
"https://static01.nyt.com/images/2019/09/12/opinion/12sainato/12sainato-moth.jpg",
"https://static01.nyt.com/images/2019/09/15/opinion/15Leonhardt/merlin_160698273_634a6005-f3ef-44aa-8ccd-c066ae269432-moth.jpg",
"https://static01.nyt.com/images/2019/09/15/opinion/15Blow/15Blow-moth.jpg",
"https://static01.nyt.com/images/2019/09/11/multimedia/11xp-skate6/11xp-skate6-moth-v3.jpg",
"https://static01.nyt.com/images/2019/09/16/us/politics/16dc-judiciary/16dc-judiciary-moth.jpg",
"https://static01.nyt.com/images/2019/09/17/books/17Chrisrock/17Chrisrock-moth.jpg",
"https://static01.nyt.com/images/2019/09/16/sports/16russia1/merlin_147710904_598b80f7-f69a-48ce-be03-3a3d3f212269-moth.jpg",
"https://static01.nyt.com/images/2019/09/17/realestate/17trump-jamaica1/merlin_126207707_09d87e42-6d3a-4946-9797-1026ac691fe4-moth.jpg",
"https://static01.nyt.com/images/2019/09/15/us/politics/15dc-guantanamo1/15dc-guantanamo1-moth.jpg",
"https://static01.nyt.com/images/2019/09/17/science/00ILSI4/00ILSI4-moth.jpg",
"https://static01.nyt.com/images/2019/09/16/science/16PURDUE1/16PURDUE1-moth.jpg",
"https://static01.nyt.com/images/2019/09/16/world/16Brexit-sub/16Brexit-sub-moth.jpg",
"https://static01.nyt.com/images/2019/09/09/smarter-living/00wc-flossing/00wc-flossing-moth.jpg",
"https://static01.nyt.com/images/2019/09/16/smarter-living/16sl-phonelife/00sl-phonelife-moth.jpg",
"https://static01.nyt.com/images/2019/08/12/smarter-living/12sl-wrongthing/16sl-wrongthing-moth.jpg",
"https://static01.nyt.com/images/2019/09/12/magazine/12atwar-stargazing-2/12atwar-stargazing-2-moth.jpg",
"https://static01.nyt.com/images/2019/09/17/books/17bookbarry1/17bookbarry1-moth.jpg",
"https://static01.nyt.com/images/2019/09/10/arts/10lopez-answers1/merlin_160385682_2ceafcda-79e1-44c0-bc0b-b3d6a426faf9-moth.jpg",
"https://static01.nyt.com/images/2019/09/11/dining/06Kitchen-stew1/06Kitchen-stew1-moth.jpg",
"https://static01.nyt.com/images/2019/09/11/science/11GERMS-NURSINGHOMES1/merlin_160631244_6b1bdb0c-11fe-496d-8c3b-0e243ac1c889-moth.jpg",
"https://static01.nyt.com/images/2019/09/10/science/10LASKER1/10LASKER1-moth.jpg",
"https://static01.nyt.com/images/2019/09/17/science/10SCI-MATTER-SKULLS1/10SCI-MATTER-SKULLS1-moth.jpg",
"https://static01.nyt.com/images/2019/09/16/travel/16italy-transumanza1/16italy-transumanza1-moth-v2.jpg",
"https://static01.nyt.com/images/2019/09/10/t-magazine/10tmag-pepper-slide-5DNI-copy/10tmag-pepper-slide-5DNI-copy-moth-v2.jpg",
"https://static01.nyt.com/images/2019/09/16/t-magazine/16tmag-romanandwilliams/16tmag-romanandwilliams-moth.jpg",
"https://static01.nyt.com/images/2019/09/16/sports/16nfl-learned-top/16nfl-learned-top-moth-v2.jpg"
]
}
Upvotes: 1