Reputation: 9
Can anyone can help me figure out this problem?
Code
import re
import time
import requests
import pandas as pd
from retrying import retry
from concurrent.futures import ThreadPoolExecutor
start = time.clock()
plist=[]
for i in range(1,101):
j=44*(i-1)
plist.append(j)
listno=plist
datamsp=pd.DataFrame(columns=[])
while True:
@retry(stop_max_attempt_number=8)
def network_programming(num):
url='https://s.taobao.com/search?q=沙发&type=p&tmhkh5=&spm=a21wu.241046-global.a2227oh.d100&from=sea_1_suggest&catId=100&bcoffset=3&ntoffset=3&p4ppushleft=1%2C48&s='+str(num)
web=requests.get(url,headers=headers)
web=encoding ='utf-8'
return web
def multithreading():
number=listno
event=[]
with ThreadPoolExecutor(max_workers=10)as executor:
for result in executor.map (network_programming ,number,chunksize=10):
event.append(result)
return event
headers={'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
listpg=[]
event= multithreading()
for i in event:
json=re.findall('"auctions":(.*?),"recommendAuctions"',i.text)
if len(json):
table=pd.read_json(json[0])
datamsp=pd.concat([datamsp,table],axis=0,ignore_index=True)
pg=findall('"pageNum":(.*?),"p4pbottom_up"',i.text[0])
listpg.append(pg)
lists=[]
for a in listpg:
b=44*(int(a)-1)
lists.append(b)
listn=listno
listno=[]
for p in listn:
listno.append(p)
if len(listno)==0:
break
datamsp.to_excel('datamsp.xls',index=False)
end=time.clock()
print("time cost:",end-start,'s')
Error:
AttributeError Traceback (most recent call last)
<ipython-input-7-35c03bdc4fc9> in <module>
38 event= multithreading()
39 for i in event:
---> 40 json=re.findall('"auctions":(.*?),"recommendAuctions"',i.text)
41 if len(json):
42 table=pd.read_json(json[0])
AttributeError: 'str' object has no attribute 'text'
Upvotes: 0
Views: 457
Reputation: 330
In for i in event
block, i
is already a string. Change line 40 to json = re.findall('"auctions:(.*?),"recommendAuctions"', i)
Upvotes: 3