Get retrieve XML attrib using ElementTree Python

Question

I am trying to get an elements attribute but all I am getting is a None value or empty list depending on what I try to get it. Also if anyone knows a better way to get the particular tag for the element I would appreciate it. Here is the code and the spaced out part is what should return the url but doesnt.

import xml.etree.ElementTree as ET
import webbrowser,time,urllib.request
import tkinter as tk
import urllib

# webbrowser.get('windows-default').open_new('http://www.reddit.com/'+'r/blender')
main = tk.Tk()
class Application(tk.Frame):



    def __init__(self, master=None):
        tk.Frame.__init__(self, master)
        self.pack()
        self.createWidgets()
        self.initial()

    def createWidgets(self):
        # print('Went to createWidgets()')
        self.send_entry = tk.Entry(self)
        self.send_entry.grid(row=0,column=0)
        self.change_sub = tk.Button(self,text='Change Subreddit', command=lambda :self.getXML(self.send_entry.get())).grid(row=0 , column=2)
        self.lb_scrollY = tk.Scrollbar(self,orient=tk.VERTICAL)
        self.lb_scrollY.grid(row=1,column=1,sticky=tk.NS)
        self.thread_lb = tk.Listbox(self,yscrollcommand=self.lb_scrollY.set)
        self.lb_scrollY['command']=self.thread_lb.yview
        self.thread_lb.grid(row=1,column=0)
        self.QUIT = tk.Button(self, text="QUIT", fg="red", command=main.destroy).grid(row=2)




    def descStripper(self,desc):
        x1=int(desc.find('alt="'))
        if x1 != -1:
            x2Start = x1+5
            x2=int(desc.find('"',x2Start))
            desc = desc[x1+5:x2]
            return desc
        else:
            desc = "There is no description. Maybe it's a link"
            return desc

    def lbPopulator(self,title,pub,link):
        # print('Went to lbPopulator()')
        self.thread_lb.delete(0,tk.END)
        for item in title:
            self.thread_lb.insert(tk.END,item)

    def getXmlData(self):
        counter = 0
        self.threadPubDateList = []
        self.threadTitleList = []
        self.threadLinkList = []
        self.threadDescList = []
        self.threadThumbNail = []
        tree=ET.parse('rss.xml')
        root=tree.getroot()
        for channel in root:
            for SubChannel in channel:
                if SubChannel.tag == 'item':
                    for threadInfo in SubChannel:
                        # print(SubChannel.getchildren())
                        if threadInfo.tag == 'title':
                            self.threadTitleList.append(threadInfo.text)
                        if threadInfo.tag == 'pubDate':
                            self.threadPubDateList.append(threadInfo.text[:-6])
                        if threadInfo.tag == 'link':
                            self.threadLinkList.append(threadInfo.text)
                        if threadInfo.tag == 'description':
                            self.threadDescList.append(self.descStripper(threadInfo.text))









                        if threadInfo.tag == '{http://search.yahoo.com/mrss/}title':
                            print(threadInfo.tag)
                            print(threadInfo.attrib)
                            print(threadInfo.get('url'))











        self.lbPopulator(self.threadTitleList,self.threadPubDateList,self.threadLinkList)
        # print(self.threadTitleList)
        # print(self.threadPubDateList)
        # print(self.threadLinkList)
        # print(self.threadDescList)
    def getXML(self,subreddit):
        try:
            url = 'http://www.reddit.com'+subreddit+'.rss'
            source = urllib.request.urlretrieve(url,'rss.xml')
            self.getXmlData()
        except urllib.error.HTTPError as err:
            print('Too many requests-Try again')
    def initial(self):
        try:
            source = urllib.request.urlretrieve('http://www.reddit.com/.rss','rss.xml')
            self.getXmlData()
        except urllib.error.HTTPError as err:
            print('Too many requests-Trying again 3')
            time.sleep(3)
            self.__init__()


# main.geometry("250x150")

app = Application(master=main)
app.mainloop()

Here is a the bit of the code that should be returning a url of thumbnail when passed a XML file. It is all the last 'if' statement and the rest all work fine.

def getXmlData(self):
    counter = 0
    self.threadPubDateList = []
    self.threadTitleList = []
    self.threadLinkList = []
    self.threadDescList = []
    self.threadThumbNail = []
    tree=ET.parse('rss.xml')
    root=tree.getroot()
    for channel in root:
        for SubChannel in channel:
            if SubChannel.tag == 'item':
                for threadInfo in SubChannel:
                    # print(SubChannel.getchildren())
                    if threadInfo.tag == 'title':
                        self.threadTitleList.append(threadInfo.text)
                    if threadInfo.tag == 'pubDate':
                        self.threadPubDateList.append(threadInfo.text[:-6])
                    if threadInfo.tag == 'link':
                        self.threadLinkList.append(threadInfo.text)
                    if threadInfo.tag == 'description':
                        self.threadDescList.append(self.descStripper(threadInfo.text))
                    if threadInfo.tag == '{http://search.yahoo.com/mrss/}title':
                        print(threadInfo.tag)
                        print(threadInfo.attrib)
                        print(threadInfo.get('url'))

msnider · Accepted Answer

The only tag that has an attribute called url is the media:thumbnail tag. And as you pointed out, media is declared at the top with xmlns:media="http://search.yahoo.com/mrss/". This leads me to believe that your last if statement should be:

if threadInfo.tag == '{http://search.yahoo.com/mrss/}thumbnail':
   print(threadInfo.tag)
   print(threadInfo.attrib)
   print(threadInfo.get('url'))

Which should produce the output:

'{http://search.yahoo.com/mrss/}thumbnail'
{'url' : 'http://a.thumbs.redditmedia.com/cozEqqG9muj-tT3Z.jpg'}
'http://a.thumbs.redditmedia.com/cozEqqG9muj-tT3Z.jpg'

Get retrieve XML attrib using ElementTree Python

Answers (1)

Related Questions