无法爬取淘宝商品的图片
夜445 2021-04-19 10:39:02 按照教程的编写,仍然无法将淘宝商品的图片下载下来,到底哪里出了问题?有没有人可以告诉我。
import urllib.request
import re
import random
keyname = "连衣裙"
key = urllib.request.quote(keyname)
uapools = ["Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/86.0.4240.111 Safari/537.36",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)",
]
def ua(uapools):
thisua = random.choice(uapools)
print(thisua)
headers = ("User-Agent", thisua)
opener = urllib.request.build_opener()
opener.addheaders = [headers]
# 安装为全局
urllib.request.install_opener(opener)
for i in range(1, 10):
url = "https://s.taobao.com/search?q="+key+"&s="+str((i-1)*44)
ua(uapools)
date = urllib.request.urlopen(url).read().decode("utf-8", "ignore")
pat = '"pic_url":"//(.*?)"'
#本人测试到这里,pat 的长度值是有的,证明pat没有问题。
imglist = re.compile(pat).findall(date)
#本人测试到这里,imglist的长度是0。
for j in range(0, len(imglist)):
thisimg = imglist[j]
thisimgurl = "http://"+thisimg
localfile = "D:\\Program Files (x86)\\PyCharm Community Edition 2020.3.4\\date\\page\\" \
"taobao\\"+str(i)+str(j)+".jpg"
urllib.request.urlretrieve(thisimgurl, filename=localfile)