61,115
社区成员
发帖
与我相关
我的任务
分享
import os
import urllib
import urllib2
import re
url = u"http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1470037956784_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=%E4%B8%AD%E5%B1%B1%E9%99%B5%E9%9F%B3%E4%B9%90%E5%8F%B0"
#url = u"https://lvyou.baidu.com/zhongshanlingyinyuetai/fengjing/"
outpath = "d:\\g"
def getHtml(url):
request = urllib2.Request(url)
webfile = urllib2.urlopen(request)
outhtml = webfile.read()
#print outhtml
return outhtml
def getImageList(html):
restr = ur'('
restr += ur'http:\/\/[^\s,"]*\.jpg'
restr += ur'|http:\/\/[^\s,"]*\.jpeg'
restr += ur'|http:\/\/[^\s,"]*\.png'
restr += ur'|http:\/\/[^\s,"]*\.gif'
restr += ur'|http:\/\/[^\s,"]*\.bmp'
restr += ur'|https:\/\/[^\s,"]*\.jpeg'
restr += ur'|https:\/\/[^\s,"]*\.jpeg'
restr += ur'|https:\/\/[^\s,"]*\.png'
restr += ur'|https:\/\/[^\s,"]*\.gif'
restr += ur'|https:\/\/[^\s,"]*\.bmp'
restr += ur')'
htmlurl = re.compile(restr)
imgList = re.findall(htmlurl, html)
print imgList
return imgList
def download(imgList, page):
x = 1
for imgurl in imgList:
filepathname = str(outpath + 'pic_%09d_%010d' % (page, x) + str(
os.path.splitext(urllib2.unquote(imgurl).decode('utf8').split('/')[-1])[1])).lower()
print '[Debug] Download file :' + imgurl + ' >> ' + filepathname
urllib.urlretrieve(imgurl, filepathname)
x += 1
def downImageNum(pagenum):
page = 1
pageNumber = pagenum
while (page <= pageNumber):
html = getHtml(url) # 获得url指向的html内容
imageList = getImageList(html) # 获得所有图片的地址,返回列表
download(imageList, page) # 下载所有的图片
page = page + 1
if __name__ == '__main__':
downImageNum(1)