Russell Teapot
Russell Teapot

Reputation: 523

Type error when trying to use urllib.request, 'str' object is not callable

I'm having some issues with urllopen.rquest, I can't even say exactly what's going on, so I'll provide the code and the traceback. First, the code:

import re as rex
import urllib.request 
import os


#qui ci piazzo le classi per i vari siti

class Amazon:
    def __init__(self,manager,dati):
        self.manager=manager
        self.indirizzo=None
        self.datiAttuali=dati
        self.URLimmagine=None
        self.prezzo=None
        self.speseSped=None

        self.setURLimmagine()
        self.setPrezzoSpese()
        self.caricaDati()



    def setURLimmagine(self):
        patBlocco=rex.compile(r'<div id="imgTagWrapperId"[\s\S]*</div>')
        patUrl=rex.compile(r'http://ecx.images-amazon.com/images/I/[^.]{0,13}')
        blocco=rex.search(patBlocco,self.datiAttuali)
        if blocco==None:
            self.URLimmagine='ND'
        else:
            url=rex.search(patUrl,blocco.group())
            if url!=None:
                self.URLimmagine=url.group()+'.jpg'
            else:
                self.URLimmagine='blocco trovato,ND'

    def setPrezzoSpese(self):
        patBlocco=rex.compile(r'<div class="a-box"[\s\S]*</div>')
        blocco=rex.search(patBlocco,self.datiAttuali)
        if blocco!=None:

            patSpanPrezzo=rex.compile(r'<span class="a-color-price price3P[\s\S]*</span>')
            patSpanSped=rex.compile(r'<span class="a-size-small a-color-secondary shipping3P[\s\S]*</span>')
            spanPrezzo=rex.search(patSpanPrezzo,self.datiAttuali)
            spanSped=rex.search(patSpanSped,self.datiAttuali)
            if spanPrezzo!=None:
                patPrezzoText=rex.compile(r'EUR \w+,\w{0,2}')
                patSpedText=rex.compile(r'(EUR \w+,\w{0,2})|(Spedizione gratuita)')
                prezzoPulito=rex.search(patPrezzoText,spanPrezzo.group()).group()
                spedPulito=rex.search(patSpedText,spanSped.group()).group()
                self.prezzo=prezzoPulito
                self.speseSped=spedPulito
            else:
                patPrezzo=rex.compile(r'(<span id="priceblock_saleprice"[\s\S]*</span>)|(<span id="priceblock_ourprice"[\s\S]*</span>)')
                prezzo=rex.search(patPrezzo,self.datiAttuali)
                if prezzo!=None:
                    patPrezzoRifinito=rex.compile(r'(EUR \w+,\w{0,2})( - )?(EUR \w+,\w{0,2})?')
                    prezzoRifinito=rex.search(patPrezzoRifinito,prezzo.group())
                    prezzoRaffinato=prezzoRifinito.group()
                    self.prezzo=prezzoRaffinato
                    self.speseSped='ND'
                else:
                    self.prezzo='ND'
                    self.speseSped='ND'                

        else:

            patPrezzo=rex.compile(r'<span id="priceblock_ourprice"[\s\S]*</span>')
            prezzo=rex.search(patPrezzo,self.datiAttuali)
            if prezzo!=None:
                patPrezzoPulito=rex.compile(r'rice">[\s\S]{1,21}</span>')
                prezzoPulito=rex.search(patPrezzoPulito,prezzo.group())
                prezzoFinale=prezzoPulito.group()
                patPrezzoRifinito=rex.compile(r'(EUR \w+,\w{0,2})( - )?(EUR \w+,\w{0,2})?')
                prezzoRifinito=rex.search(patPrezzoRifinito,prezzoFinale)
                prezzoRaffinato=prezzoRifinito.group()
                self.prezzo=prezzoRaffinato
                self.speseSped='ND'
            else:
                self.prezzo='ND'
                self.speseSped='ND' 

    def caricaDati(self):
        self.manager.URLimmagine=self.URLimmagine
        self.manager.prezzo=self.prezzo
        self.manager.spedizione=self.speseSped

#qui la classe principale

class DBmanager:
    def __init__(self,nome):
        self.percorso=None
        self.nomeDB=nome
        self.siti={'amazon':lambda self,URL:Amazon(self,dati)}
        self.nomeSito=None #quando verrà passato un URL, per prima cosa verrà pescato il nome del sito, per poter chiamare la
                           #classe adeguata  
        self.ID=None
        self.URL=None
        self.sorgente=None
        self.URLimmagine=None
        self.prezzo=None
        self.spedizione=None
        self.descrizione='descrizione'
        self.contenuti=[]  #qui verranno posizionati gli atomi, creati dopo aver raccolto le info necessarie tramite la classe
                           #apposita


        self.setPercorso()
        self.inizializza(nome)


    def setPercorso(self):
        file=open('config.txt')
        percorso=file.readline()
        file.close()
        self.percorso=percorso[:len(percorso)-1] #questo serve a togliere il \n finale

    def setURL(self,URL):
        self.URL=URL
        pat=rex.compile=r'.+\.(.*)\.'
        trovaNome=rex.search(pat,self.URL)
        self.nomeSito=trovaNome.group(1)
        apri=urllib.request.urlopen(URL)
        self.sorgente=str(apri.read())



    def chiamaSito(self):
        self.siti[self.nomeSito](self,self.sorgente)

    def inizializza(self,nome):
        try:
            file=open(self.percorso+'DATABASE/'+nome+'.adb','r')
            file.close()
        except:
            file=open(self.percorso+'DATABASE/'+nome+'.adb','w')
            file.write('<<0>>\n') #se il file adb non esiste, crealo e scrivi l'ID generale
            file.close()
            os.mkdir(self.percorso+'DATABASE/'+nome) #crea la cartella che conterrà le immagini




def main():
    nome='piripacchio'
    URL=r'http://www.amazon.it/IMMACOLATA-PORCELLANA-SCULTURA-IMMACULATE-INMACULAD/dp/B016APLOEE/ref=sr_1_1?ie=UTF8&qid=1453325779&sr=8-1&keywords=madonna+statua'
    man=DBmanager(nome)
    man.setURL(URL)
    man.chiamaSito()


if __name__=='__main__':
    main()

The idea for this program is that the user enters an URL from Amazon or other sites like that and the program retrieves the image of the object, the sell price and (eventually) shipping fees. This is done by creating a sort of simple database (DBmanager will populate the list self.contenuti with some dictionaries containing the data). Now, DBmanager has to beahave exactly the same, no matter the site, and this is accomplished by creating specific classes (like the class Amazon) that will have the specific functions for retrieving the data (every site has is own structure, so I can't use the same regex) and simply putting them in the dictionary self.siti: whenever self.chiamaSito is called, the appropriate class (here I have only Amazon) will be called passing self.sorgente as an argument, and this will be the content of the site provided, converted into str, so the class Amazon can search the appropriate informations via regex.

This is for home uses, I already created a similar program that, with the data collected, writes a simple HTML file that is a sort of image gallery of all the objects of interest. What I want to do here, with DBmanager, is downloading the image so it will be displayed into a tkinter label when I will write the GUI. No HTML file needed, the whole thing will be GUI based, like a sort of database.

The problem is that, whatever URL I provide (here I'm doing it in the main() function, for debug purposes) I get this evil and nasty "thing":

File "C:\Users\Admin\pyproj\amazons\amazzone2_0.py", line 178, in <module>
  main()
File "C:\Users\Admin\pyproj\amazons\amazzone2_0.py", line 173, in main
  man.setURL(URL)
File "C:\Users\Admin\pyproj\amazons\amazzone2_0.py", line 148, in setURL
  apri=urllib.request.urlopen(URL)
File "C:\Python34\Lib\urllib\request.py", line 153, in urlopen
  return opener.open(url, data, timeout)
File "C:\Python34\Lib\urllib\request.py", line 440, in open
  req = Request(fullurl, data)
File "C:\Python34\Lib\urllib\request.py", line 258, in __init__
  self.full_url = url
File "C:\Python34\Lib\urllib\request.py", line 283, in full_url
  self._full_url, self.fragment = splittag(self._full_url)
File "C:\Python34\Lib\urllib\parse.py", line 952, in splittag
  _tagprog = re.compile('^(.*)#([^#]*)$')

builtins.TypeError: 'str' object is not callable

From what I understand, the problem is in urllib: for some reason the URL is not parsed correctly and I end up with the error. So, I tried to write this:

import urllib.request
sito=urllib.request.urlopen('http://www.amazon.it/Shaving-Factory-rasoio-professionali-singolo/dp/B003DRL6KK/ref=sr_1_2?ie=UTF8&qid=1453313892&sr=8-2&keywords=rasoio')

sorgente=sito.read()


print('yup')

This is wrote in another module, for testing purpose, there is only this piece of code. If I have it for its own, it works perfectly, with no error. I tried with other URLs, same thing: if I try the second piece of code, no errors; if I try to pass it to DBmanager, I got the error. What is going on?

Upvotes: 0

Views: 467

Answers (1)

Martijn Pieters
Martijn Pieters

Reputation: 1123410

You re-bound re.compile() to a string in the setURL method:

pat=rex.compile=r'.+\.(.*)\.'

This makes two assignments, one to pat and the same object to rex.compile (you imported the re module as rex).

You probably meant to call re.compile() there:

pat = rex.compile(r'.+\.(.*)\.')

Because you rebound re.compile any other code trying to use that function fails with the same exception; you can't call a string.

Upvotes: 2

Related Questions