SripadRaj
SripadRaj

Reputation: 1725

how to parse <media:thumbnail> tag from rss feed xml?

I am parsing a rss feed xml on android and was able to retrieve all other content but failed to retrieve the images in the feeds.

i am using sax parser.

The images are embedded in <media:thumbnail> tag. It looks like this.

<media:thumbnail>http://www.*some_website*.com/..../Client000040008200000082068.jpg</media:thumbnail>

i have seen many examples but they parse from the links embedded url, also seen links embedded like this

 <media:thumbnail="link here"></media:thumbnail>

are they both same? and how to parse such tags?

Please help me with answers.

Thank you.

EDIT

this is my rss handler

public void startDocument() throws SAXException {
    mFeed = new Feed();
}

public void endDocument() throws SAXException {
    Date now = new Date();
    //Date now = Calendar.getInstance().getTime();
    mFeed.setRefresh(now);
}

public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
    //Only consider elements from allowed third-party namespaces
    if (NAMESPACES.contains(uri)) {
        mSb = new StringBuffer();
        String value = localName.trim();

        if (value.equalsIgnoreCase("rss") ||  value.equalsIgnoreCase("rdf")) {
            isType = true;
        } else if (value.equalsIgnoreCase("feed")) {
            isType = true;
            isFeed = true;
        } else if (value.equalsIgnoreCase("channel")) {
            isFeed = true;
        } else if (value.equalsIgnoreCase("item") || value.equalsIgnoreCase("entry")) {
            mItem = new Item();
            isItem = true;
            mNbrItems++;
        } else if (value.equalsIgnoreCase("title"))
            isTitle = true;
        else if (value.equalsIgnoreCase("link")) {
            // Get attributes from link element for Atom format
            if (attributes != null) {
                // Enclosure for Atom format
                if (attributes.getValue("rel") != null && attributes.getValue("rel").equalsIgnoreCase("enclosure")) {
                    mEnclosure = new Enclosure();
                    mMimeAttribute = attributes.getValue("type");
                    isEnclosure = true;
                }
                mHrefAttribute = attributes.getValue("href");
            }
            isLink = true;
        } else if (value.equalsIgnoreCase("pubDate") || value.equalsIgnoreCase("published") || value.equalsIgnoreCase("date"))
            isPubdate = true;
        else if (value.equalsIgnoreCase("guid") || value.equalsIgnoreCase("id"))
            isGuid = true;
        else if (value.equalsIgnoreCase("description") || value.equalsIgnoreCase("summary"))
            isDescription = true;
        else if ((value.trim().equals("media:thumbnail"))) 
        {
        //  isImageLink = true;
            String attrValue = attributes.getValue("url");
            int tmp =Integer.parseInt(attrValue);
      //      tmp.setLink(attrValue);

            /*if (localName.trim().equals("thumbnail")) {          
                String thumbnail = attributes.getValue("url");            
            }*/
            }
        else if (value.equalsIgnoreCase("encoded") || value.equalsIgnoreCase("content"))
            isContent = true;
        else if (value.equalsIgnoreCase("source"))
            isSource = true;
        else if (value.equalsIgnoreCase("enclosure")) {
            // Enclosure for RSS format
            if (attributes != null) {
                mEnclosure = new Enclosure();
                mMimeAttribute = attributes.getValue("type");
                mHrefAttribute = attributes.getValue("url");
                isEnclosure = true;
            }
        }
    }
}

public void endElement(String uri, String localName, String qName) throws SAXException {
    //Only consider elements from allowed third-party namespaces
    if (NAMESPACES.contains(uri)) {
        String value = localName.trim();

        if (value.equalsIgnoreCase("rss")) {
            mFeed.setType(Feed.TYPE_RSS);
            isType = false;
        } else if (value.equalsIgnoreCase("feed")) {
            mFeed.setType(Feed.TYPE_ATOM);
            isType = false;
            isFeed = false;
        } else if (value.equalsIgnoreCase("RDF")) {
            mFeed.setType(Feed.TYPE_RDF);
            isType = false;
        } else if (value.equalsIgnoreCase("channel")) {
            isFeed = false;
        } else if (value.equalsIgnoreCase("item") || value.equalsIgnoreCase("entry")) {
            if (mNbrItems <= maxItems) {
                if (mItem.getGuid() == null)
                    mItem.setGuid(mItem.getLink().toString());
                mFeed.addItem(mItem);
            }
            isItem = false;
        } else if (value.equalsIgnoreCase("title") && !isSource) {
            if (isItem)
                mItem.setTitle(Html.fromHtml(mSb.toString().trim()).toString());
            else if (isFeed)
                mFeed.setTitle(Html.fromHtml(mSb.toString().trim()).toString());
            isTitle = false;
        } else if (value.equalsIgnoreCase("link") && !isSource) {
            if (isItem) {
                try {
                    if (isEnclosure) {
                        // Enclosure for Atom format
                        mEnclosure.setMime(mMimeAttribute);
                        mEnclosure.setURL(new URL(mHrefAttribute));
                        mItem.addEnclosure(mEnclosure);
                        mMimeAttribute = null;
                        isEnclosure = false;
                    } else if (mHrefAttribute != null)
                        mItem.setLink(new URL(mHrefAttribute));
                    else
                        mItem.setLink(new URL(mSb.toString().trim()));
                } catch(MalformedURLException mue) {
                    throw new SAXException(mue);
                }
            } else if (isFeed && mFeed.getHomePage() == null) {
                try {
                    if (mSb != null && mSb.toString() != "") // RSS
                        mFeed.setHomePage(new URL(mSb.toString().trim()));
                    else if (mMimeAttribute == "text/html") //Atom
                        mFeed.setHomePage(new URL(mHrefAttribute));
                } catch(MalformedURLException mue) {
                    throw new SAXException(mue);
                }
            }
            mHrefAttribute = null;
            isLink = false;
        } else if (value.equalsIgnoreCase("pubDate") || value.equalsIgnoreCase("published") || value.equalsIgnoreCase("date")) {
            if (isItem) {
                for (int i = 0; i < DATE_FORMATS.length; i++) {
                    try {
                        //String pattern = mSimpleDateFormats[i].toPattern();
                        mItem.setPubdate(mSimpleDateFormats[i].parse(mSb.toString().trim()));
                        break;
                    } catch (ParseException pe) {
                        if (i == DATE_FORMATS.length-1) {
                            throw new SAXException(pe);
                        }   
                    }
                }
            }
            isPubdate = false;
        } else if ((value.equalsIgnoreCase("guid") || value.equalsIgnoreCase("id")) && !isSource) {
            if (isItem)
                mItem.setGuid(mSb.toString().trim());
            isGuid = false;
        } else if (value.equalsIgnoreCase("description") || value.equalsIgnoreCase("summary")) {
            if (isItem)
                //mItem.setContent(Html.fromHtml(mSb.toString().trim()).toString());
                mItem.setContent(removeContentSpanObjects(mSb).toString().trim() + System.getProperty("line.separator" ));
            isDescription = false;

        } else if (value.equalsIgnoreCase("media:thumbnail") || value.equalsIgnoreCase("media:content")) {
                    if (isItem)
                        //mItem.setContent(Html.fromHtml(mSb.toString().trim()).toString());
                        System.out.println(removeContentSpanObjects(mSb).toString().trim());
                        mItem.setMimageLink(removeContentSpanObjects(mSb).toString().trim());
                        isImageLink = false;
        } else if (value.equalsIgnoreCase("encoded") || value.equalsIgnoreCase("content")) {
            if (isItem)
                //mItem.setContent(Html.fromHtml(mSb.toString().trim()).toString());
                mItem.setContent(removeContentSpanObjects(mSb).toString().trim() + System.getProperty("line.separator" ));
            isContent = false;
        } else if (value.equalsIgnoreCase("source"))
            isSource = false;
        else if (value.equalsIgnoreCase("enclosure")) {
            if (isItem) {
                try {
                    // Enclosure for RSS format
                    mEnclosure.setMime(mMimeAttribute);
                    mEnclosure.setURL(new URL(mHrefAttribute));
                    mItem.addEnclosure(mEnclosure);
                    mMimeAttribute = null;
                    mHrefAttribute = null;
                } catch(MalformedURLException mue) {
                    throw new SAXException(mue);
                }
            }
            isEnclosure = false;
        }
    }
}

public void characters(char[] ch, int start, int length) throws SAXException {
    if (isType || isTitle || isLink || isPubdate || isGuid || isDescription || isImageLink  || isContent)
        mSb.append(new String(ch, start, length));
}

public Feed handleFeed(URL url) throws IOException, SAXException, ParserConfigurationException {
    getParser().parse(new InputSource(url.openStream()));
    // Reordering the list of items, first item parsed (most recent) -> last item in the list
    Collections.reverse(mFeed.getItems());
    mFeed.setURL(url);
    if (mFeed.getHomePage() == null)
        mFeed.setHomePage(url);
    return mFeed;
}

private XMLReader getParser() throws SAXException, ParserConfigurationException {
    SAXParserFactory spf = SAXParserFactory.newInstance();
    SAXParser sp = spf.newSAXParser();
    XMLReader xr = sp.getXMLReader();
    xr.setContentHandler(this);
    return xr;

Upvotes: 0

Views: 2775

Answers (1)

Paresh Mayani
Paresh Mayani

Reputation: 128458

You need to use the qName.

From the doc:

localName - The local name (without prefix), or the empty string if Namespace processing is not being performed.

qName - The qualified name (with prefix), or the empty string if qualified names are not available.

The local name basically strips off the name space, so if you wanted, you could also do 'thumbnail' which would also match.

Upvotes: 0

Related Questions