Finding and downloading images from websites using urllib2

Question

I'm writing a small program that given an http address, it will find and download the images within. My current situation is this:

import urllib2, html

class HTMLNode(object):
    def __init__(self,tag,attr,content,closed=True):
        self.tag = tag
        self.attr = attr
        self.content = content
        self.closed = closed

    def istext(self):
        return self.tag == '_text_'

    def tostring(self):
        if self.istext(): 
            return self.content
        ret = '<'+self.tag
        for k, v in self.attr.items():
            ret += ' '+k+'="'+v+'"'
        ret += '>'
        if self.closed:
            for c in self.content:
                ret += c.tostring()
            ret += ''
        return ret

    def find_by_tag(self,tag):
            ret = []
            if self.tag == tag: ret += [self]
            if not self.istext():
                for c in self.content:
                    ret += c.find_by_tag(tag)
            return ret

def imagegrab(url):
    req = urllib2.Request(url)
    response = urllib2.urlopen(req)
    output = open(url, 'wb')
    output.write(response.read())

The missing link is finding a way to use the "find_by_tag" function of the HTMLNode class so that the program will scan for the "" tag and download the pics from the website. Can anybody help me with that?

Finding and downloading images from websites using urllib2

Answers (1)

Related Questions