37,719
社区成员
发帖
与我相关
我的任务
分享
# coding=UTF-8
from bs4 import BeautifulSoup
import requests
import os
import time
# 爬取网页图片
class Car():
def __init__(self):
self.headers = {
'User-Agent': 'Mozilla/5.0(Windows NT 6.1;WOW64;Trident/7.0;rv:11.0)like Gecko'}
self.web_url = 'https://car.autohome.com.cn/photolist/series/28279/3696799.html#pvareaid=101467'
self.folder_path = 'D:\car'
def request(self, url):
#r = requests.get(self.web_url, headers=self.headers)
r = requests.get(url, headers=self.headers)
return r
def mkdir(self, path):
path = path.strip()
isExists = os.path.exists(path)
if not isExists:
print('开始创建名字为', path, '的文件夹')
os.makedirs(path)
print('创建文件夹成功')
else:
print(path, ' ready !')
def save_img(self, url, name):
print('开始保存图片')
img = self.request(url)
time.sleep(5)
file_name = name + '.jpg'
print('开始保存文件')
with open(file_name, 'wb') as f:
f.write(img.content)
print file_name, ' save ok '
f.close()
def get_pic(self):
print('开始网页get请求')
r = self.request(self.web_url)
print('开始获取所有img标签')
all_img = BeautifulSoup(r.text, 'lxml').find_all('img')
print('开始创建文件夹')
self.mkdir(self.folder_path)
print('开始切换文件夹')
os.chdir(self.folder_path)
print len(all_img)
i = 1
for img in all_img:
img_str = 'http:' + img['src']
print 'img:', img_str
self.save_img(img_str, str(i))
i += 1
car = Car()
car.get_pic()