IbrahimMitko
IbrahimMitko

Reputation: 1207

Parsing an RSS feed containing CDATA using jQuery

I am trying to parse an RSS feed into an array but the feed is adding CDATA tags and combining certain elements.

My code below parses through the rss feed (url) and adds certain elements to an array. However when I look at the feed itself, it is combining multiple key elements in CDATA tags.

How do I parse through the CDATA tags to get usable xml fields?

Code

buildXMLDoc = function (url) {
    var list =[];

    $(listXML).find('item').each(function (){
        var el = $(this);
        console.log(el.find("title").text());
        console.log(el.find("pubDate").text());
        console.log(el.find("description").text());
        list.push({title: el.find("title").text(), description: el.find("description").text(), modified: el.find("pubDate").text()});
    });

    return list;

};

XML

<?xml version="1.0" encoding="UTF-8"?>
<!--RSS generated by Microsoft SharePoint Foundation RSS Generator on 8/29/2017 10:23:18 AM -->
<?xml-stylesheet type="text/xsl" href="/_layouts/RssXslt.aspx?List=43aaf08e-0153-4f1d-9b46-e66bba563fde" version="1.0"?>
<rss version="2.0">
  <channel>
    <title>Webdocs: Test</title>
    <description>RSS feed for the Test list.</description>
    <lastBuildDate>Tue, 29 Aug 2017 14:23:18 GMT</lastBuildDate>
    <generator>Microsoft SharePoint Foundation RSS Generator</generator>
    <ttl>60</ttl>
    <language>en-US</language>
    <item>
      <title>Alternative Methods for Determining LCRs</title>
      <description><![CDATA[<div><b>Short Title:</b> Determining LCRs</div>
<div><b>Description:</b> <div class="ExternalClass6280076BC79848078688B86006BA554F"><p>​<span style="font-size:11.5pt;font-family:&quot;calibri&quot;, &quot;sans-serif&quot;">This project is a carryover from the 2017 effort to identify an alternative method for calculating the Locational Minimum Installed Capacity Requirements (LCRs). </span></p></div></div>
<div><b>Governance Process Status:</b> Progress</div>
<div><b>Topic State:</b> Open/Current</div>
<div><b>Updated Placeholder:</b> updated</div>
]]></description>
      <pubDate>Wed, 12 Jul 2017 13:41:06 GMT</pubDate>
    </item>

Console Log: The highlighted items are suppose to be separate elements.

Upvotes: 1

Views: 695

Answers (1)

gaetanoM
gaetanoM

Reputation: 42054

In order to get the CDATA part details I may suggest to use jquery.contents() and so getting the relative sub sections by positon. This may give you wrong results if the positions change but it's a possibility.

var listXML = '<?xml version="1.0" encoding="UTF-8"?>\
    <!--RSS generated by Microsoft SharePoint Foundation RSS Generator on 8/29/2017 10:23:18 AM -->\
<?xml-stylesheet type="text/xsl" href="/_layouts/RssXslt.aspx?List=43aaf08e-0153-4f1d-9b46-e66bba563fde" version="1.0"?>\
<rss version="2.0">\
        <channel>\
        <title>Webdocs: Test</title>\
<description>RSS feed for the Test list.</description>\
<lastBuildDate>Tue, 29 Aug 2017 14:23:18 GMT</lastBuildDate>\
<generator>Microsoft SharePoint Foundation RSS Generator</generator>\
<ttl>60</ttl>\
<language>en-US</language>\
<item>\
<title>Alternative Methods for Determining LCRs</title>\
<description><![CDATA[<div><b>Short Title:</b> Determining LCRs</div>\
<div><b>Description:</b> <div class="ExternalClass6280076BC79848078688B86006BA554F"><p>​<span style="font-size:11.5pt;font-family:&quot;calibri&quot;, &quot;sans-serif&quot;">This project is a carryover from the 2017 effort to identify an alternative method for calculating the Locational Minimum Installed Capacity Requirements (LCRs). </span></p></div></div>\
<div><b>Governance Process Status:</b> Progress</div>\
<div><b>Topic State:</b> Open/Current</div>\
<div><b>Updated Placeholder:</b> updated</div>\
    ]]></description>\
<pubDate>Wed, 12 Jul 2017 13:41:06 GMT</pubDate>\
</item>';

var list =[];

$(listXML).find('item').each(function (){
    var el = $(this);
    var cdat = $(listXML).find('item description').contents();
    console.log(cdat.eq(1).text() + cdat.eq(2).text());
    console.log(cdat.eq(5).contents().eq(0).text()  + cdat.eq(5).contents().eq(1).text());
    console.log(cdat.eq(6).contents().eq(0).text()  + cdat.eq(6).contents().eq(1).text());
    list.push({title: cdat.eq(2).text(), description: cdat.eq(5).contents().eq(1).text(), modified: cdat.eq(6).contents().eq(1).text()});
});

console.log('list: ' + JSON.stringify(list));
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>

A different approach is to get the description element, replace the inner CDATA and convert the result to a jQuery object. On this object you can use find in order to select sub elements.

var listXML = '<?xml version="1.0" encoding="UTF-8"?>\
    <!--RSS generated by Microsoft SharePoint Foundation RSS Generator on 8/29/2017 10:23:18 AM -->\
<?xml-stylesheet type="text/xsl" href="/_layouts/RssXslt.aspx?List=43aaf08e-0153-4f1d-9b46-e66bba563fde" version="1.0"?>\
<rss version="2.0">\
        <channel>\
        <title>Webdocs: Test</title>\
<description>RSS feed for the Test list.</description>\
<lastBuildDate>Tue, 29 Aug 2017 14:23:18 GMT</lastBuildDate>\
<generator>Microsoft SharePoint Foundation RSS Generator</generator>\
<ttl>60</ttl>\
<language>en-US</language>\
<item>\
<title>Alternative Methods for Determining LCRs</title>\
<description><![CDATA[<div><b>Short Title:</b> Determining LCRs</div>\
<div><b>Description:</b> <div class="ExternalClass6280076BC79848078688B86006BA554F"><p>​<span style="font-size:11.5pt;font-family:&quot;calibri&quot;, &quot;sans-serif&quot;">This project is a carryover from the 2017 effort to identify an alternative method for calculating the Locational Minimum Installed Capacity Requirements (LCRs). </span></p></div></div>\
<div><b>Governance Process Status:</b> Progress</div>\
<div><b>Topic State:</b> Open/Current</div>\
<div><b>Updated Placeholder:</b> updated</div>\
    ]]></description>\
<pubDate>Wed, 12 Jul 2017 13:41:06 GMT</pubDate>\
</item>';

var list =[];

$(listXML).find('item').each(function (){
    var el = $(this);
    var cdat = $(listXML).find('item description').contents();
    var html = $($(listXML).find('item description')[0].innerHTML.replace('<!--[CDATA[', '')).html();
    console.log(html);
});
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>

Upvotes: 1

Related Questions