Reputation: 1725
I am parsing a rss feed xml on android and was able to retrieve all other content but failed to retrieve the images in the feeds.
i am using sax parser.
The images are embedded in <media:thumbnail>
tag.
It looks like this.
<media:thumbnail>http://www.*some_website*.com/..../Client000040008200000082068.jpg</media:thumbnail>
i have seen many examples but they parse from the links embedded url, also seen links embedded like this
<media:thumbnail="link here"></media:thumbnail>
are they both same? and how to parse such tags?
Please help me with answers.
Thank you.
EDIT
this is my rss handler
public void startDocument() throws SAXException {
mFeed = new Feed();
}
public void endDocument() throws SAXException {
Date now = new Date();
//Date now = Calendar.getInstance().getTime();
mFeed.setRefresh(now);
}
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
//Only consider elements from allowed third-party namespaces
if (NAMESPACES.contains(uri)) {
mSb = new StringBuffer();
String value = localName.trim();
if (value.equalsIgnoreCase("rss") || value.equalsIgnoreCase("rdf")) {
isType = true;
} else if (value.equalsIgnoreCase("feed")) {
isType = true;
isFeed = true;
} else if (value.equalsIgnoreCase("channel")) {
isFeed = true;
} else if (value.equalsIgnoreCase("item") || value.equalsIgnoreCase("entry")) {
mItem = new Item();
isItem = true;
mNbrItems++;
} else if (value.equalsIgnoreCase("title"))
isTitle = true;
else if (value.equalsIgnoreCase("link")) {
// Get attributes from link element for Atom format
if (attributes != null) {
// Enclosure for Atom format
if (attributes.getValue("rel") != null && attributes.getValue("rel").equalsIgnoreCase("enclosure")) {
mEnclosure = new Enclosure();
mMimeAttribute = attributes.getValue("type");
isEnclosure = true;
}
mHrefAttribute = attributes.getValue("href");
}
isLink = true;
} else if (value.equalsIgnoreCase("pubDate") || value.equalsIgnoreCase("published") || value.equalsIgnoreCase("date"))
isPubdate = true;
else if (value.equalsIgnoreCase("guid") || value.equalsIgnoreCase("id"))
isGuid = true;
else if (value.equalsIgnoreCase("description") || value.equalsIgnoreCase("summary"))
isDescription = true;
else if ((value.trim().equals("media:thumbnail")))
{
// isImageLink = true;
String attrValue = attributes.getValue("url");
int tmp =Integer.parseInt(attrValue);
// tmp.setLink(attrValue);
/*if (localName.trim().equals("thumbnail")) {
String thumbnail = attributes.getValue("url");
}*/
}
else if (value.equalsIgnoreCase("encoded") || value.equalsIgnoreCase("content"))
isContent = true;
else if (value.equalsIgnoreCase("source"))
isSource = true;
else if (value.equalsIgnoreCase("enclosure")) {
// Enclosure for RSS format
if (attributes != null) {
mEnclosure = new Enclosure();
mMimeAttribute = attributes.getValue("type");
mHrefAttribute = attributes.getValue("url");
isEnclosure = true;
}
}
}
}
public void endElement(String uri, String localName, String qName) throws SAXException {
//Only consider elements from allowed third-party namespaces
if (NAMESPACES.contains(uri)) {
String value = localName.trim();
if (value.equalsIgnoreCase("rss")) {
mFeed.setType(Feed.TYPE_RSS);
isType = false;
} else if (value.equalsIgnoreCase("feed")) {
mFeed.setType(Feed.TYPE_ATOM);
isType = false;
isFeed = false;
} else if (value.equalsIgnoreCase("RDF")) {
mFeed.setType(Feed.TYPE_RDF);
isType = false;
} else if (value.equalsIgnoreCase("channel")) {
isFeed = false;
} else if (value.equalsIgnoreCase("item") || value.equalsIgnoreCase("entry")) {
if (mNbrItems <= maxItems) {
if (mItem.getGuid() == null)
mItem.setGuid(mItem.getLink().toString());
mFeed.addItem(mItem);
}
isItem = false;
} else if (value.equalsIgnoreCase("title") && !isSource) {
if (isItem)
mItem.setTitle(Html.fromHtml(mSb.toString().trim()).toString());
else if (isFeed)
mFeed.setTitle(Html.fromHtml(mSb.toString().trim()).toString());
isTitle = false;
} else if (value.equalsIgnoreCase("link") && !isSource) {
if (isItem) {
try {
if (isEnclosure) {
// Enclosure for Atom format
mEnclosure.setMime(mMimeAttribute);
mEnclosure.setURL(new URL(mHrefAttribute));
mItem.addEnclosure(mEnclosure);
mMimeAttribute = null;
isEnclosure = false;
} else if (mHrefAttribute != null)
mItem.setLink(new URL(mHrefAttribute));
else
mItem.setLink(new URL(mSb.toString().trim()));
} catch(MalformedURLException mue) {
throw new SAXException(mue);
}
} else if (isFeed && mFeed.getHomePage() == null) {
try {
if (mSb != null && mSb.toString() != "") // RSS
mFeed.setHomePage(new URL(mSb.toString().trim()));
else if (mMimeAttribute == "text/html") //Atom
mFeed.setHomePage(new URL(mHrefAttribute));
} catch(MalformedURLException mue) {
throw new SAXException(mue);
}
}
mHrefAttribute = null;
isLink = false;
} else if (value.equalsIgnoreCase("pubDate") || value.equalsIgnoreCase("published") || value.equalsIgnoreCase("date")) {
if (isItem) {
for (int i = 0; i < DATE_FORMATS.length; i++) {
try {
//String pattern = mSimpleDateFormats[i].toPattern();
mItem.setPubdate(mSimpleDateFormats[i].parse(mSb.toString().trim()));
break;
} catch (ParseException pe) {
if (i == DATE_FORMATS.length-1) {
throw new SAXException(pe);
}
}
}
}
isPubdate = false;
} else if ((value.equalsIgnoreCase("guid") || value.equalsIgnoreCase("id")) && !isSource) {
if (isItem)
mItem.setGuid(mSb.toString().trim());
isGuid = false;
} else if (value.equalsIgnoreCase("description") || value.equalsIgnoreCase("summary")) {
if (isItem)
//mItem.setContent(Html.fromHtml(mSb.toString().trim()).toString());
mItem.setContent(removeContentSpanObjects(mSb).toString().trim() + System.getProperty("line.separator" ));
isDescription = false;
} else if (value.equalsIgnoreCase("media:thumbnail") || value.equalsIgnoreCase("media:content")) {
if (isItem)
//mItem.setContent(Html.fromHtml(mSb.toString().trim()).toString());
System.out.println(removeContentSpanObjects(mSb).toString().trim());
mItem.setMimageLink(removeContentSpanObjects(mSb).toString().trim());
isImageLink = false;
} else if (value.equalsIgnoreCase("encoded") || value.equalsIgnoreCase("content")) {
if (isItem)
//mItem.setContent(Html.fromHtml(mSb.toString().trim()).toString());
mItem.setContent(removeContentSpanObjects(mSb).toString().trim() + System.getProperty("line.separator" ));
isContent = false;
} else if (value.equalsIgnoreCase("source"))
isSource = false;
else if (value.equalsIgnoreCase("enclosure")) {
if (isItem) {
try {
// Enclosure for RSS format
mEnclosure.setMime(mMimeAttribute);
mEnclosure.setURL(new URL(mHrefAttribute));
mItem.addEnclosure(mEnclosure);
mMimeAttribute = null;
mHrefAttribute = null;
} catch(MalformedURLException mue) {
throw new SAXException(mue);
}
}
isEnclosure = false;
}
}
}
public void characters(char[] ch, int start, int length) throws SAXException {
if (isType || isTitle || isLink || isPubdate || isGuid || isDescription || isImageLink || isContent)
mSb.append(new String(ch, start, length));
}
public Feed handleFeed(URL url) throws IOException, SAXException, ParserConfigurationException {
getParser().parse(new InputSource(url.openStream()));
// Reordering the list of items, first item parsed (most recent) -> last item in the list
Collections.reverse(mFeed.getItems());
mFeed.setURL(url);
if (mFeed.getHomePage() == null)
mFeed.setHomePage(url);
return mFeed;
}
private XMLReader getParser() throws SAXException, ParserConfigurationException {
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser sp = spf.newSAXParser();
XMLReader xr = sp.getXMLReader();
xr.setContentHandler(this);
return xr;
Upvotes: 0
Views: 2775
Reputation: 128458
You need to use the qName.
From the doc:
localName - The local name (without prefix), or the empty string if Namespace processing is not being performed.
qName - The qualified name (with prefix), or the empty string if qualified names are not available.
The local name basically strips off the name space, so if you wanted, you could also do 'thumbnail' which would also match.
Upvotes: 0