不知道为什么保存不了图片。。。
import json
import re
from urllib.parse import urlencode
import pymongo as pymongo
from ppp import *
import lxml
import requests
from bs4 import BeautifulSoup
from requests import RequestException
from hashlib import md5
import os
client = pymongo.MongoClient(MONGO_URL)
db = client[MONGO_DB]
def get_page_index():
data={
'offset': '0',
'format': 'json',
'keyword':'四驱兄弟',
'autoload':'true',
'count': '20',
'cur_tab':'3',
'from':'gallery'
}
url = 'https://www.toutiao.com/search_content/?'+ urlencode(data)
response = requests.get(url)
try:
if response.status_code == 200:
return response.text
else:
print('请求索引页出错')
return None
except RequestException:
return None
def qwer(html):
data = json.loads(html)
for item in data['data']:
yield item.get('article_url')
def open(url):
response = requests.get(url)
try:
if response.status_code == 200:
return response.text
else:
return None
except RequestException:
print('请求详情页出错')
def ww(html):
soup = BeautifulSoup(html,'lxml')
title = soup.select('title')[0].get_text()
pattern = re.compile('JSON.parse\((.*?)\)', re.S)
image = re.search(pattern,html)
if image:
result = json.loads(image.group(1))
result = json.loads(result)
print(result)
for item in result.get('sub_images'):
url = item.get('url')
print('正在下载:',url)
download(url)
return {
'tltle':title,
'url':url
}
def download(it):
response = requests.get(it)
try:
if response.status_code == 200:
return save(response.content)
print(response.text)
else:
print('请求图片出错')
return None
except RequestException:
return None
def save(concent):
file_path = '{0}/{1}.{2}'.format(os.getcwd(), md5(concent).hexdigest(), 'jpg')
if not os.path.exists(file_path):
with open(file_path, 'wb') as f:
f.write(concent)
f.close()
'''
if os.path.exists(os.getcwd() + '\\images'):
file_path = '{0}\\images\\{1}.{2}'.format(os.getcwd(), md5(concent).hexdigest(), 'jpg')
else:
os.mkdir(os.getcwd() + '\\images')
file_path = '{0}\\images\\{1}.{2}'.format(os.getcwd(), md5(concent).hexdigest(), 'jpg')
if not os.path.exists(file_path):
with open(file_path, 'wb') as fp:
fp.write(concent)
'''
def save_mongo(result):
try:
if db[MONGO_TABLE].insert(result):
print('存储到Mongodb成功')
return True
else:
print('这条存储失败了')
except:
print('333333')
def main() :
html=get_page_index()
for url in qwer(html):
html=open(url)
url = ww(html)
if __name__ == '__main__':
main()
在函数save处出错了
TypeError: open() takes 1 positional argument but 2 were given
不是很理解。。。。