求教:python爬取淘宝页面的图片,网页已经爬取成功,但无法匹配到图片,不知道什么问题,是正则表达式的问题吗?
import urllib.request
import re
'''get access to taobao'''
words="连衣裙"
keywords=urllib.request.quote(words)
#fake header
headers=("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36")
opener=urllib.request.build_opener()
opener.addheaders=[headers]
urllib.request.install_opener(opener)
'''climb the webs and corresponding pictures'''
for i in range(1,3):
url = 'http://s.taobao.com/list?&q='+keywords+'&cat=16&style=grid&seller_type=taobao&bcoffset=0&s='+str(i*60)
data=urllib.request.urlopen(url).read().decode('utf-8', 'ignore')
#此处以上均成功
pat='"pic_url":"//(.*?).jpg"'
pics=re.compile(pat).findall(data)
for j in range (0, len(pics)):
thispic=pics[j]
thispicurl="https://"+thispic
file='E:/python/练习文件/results/taobao_pictures/'+str(i)+str(j)+'.html'
urllib.request.urlretrieve( thispicurl, filename=file )