34
社区成员
发帖
与我相关
我的任务
分享
import requests
from bs4 import BeautifulSoup
def crawler(book_list):
headers = {
"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
}
id_list = []
rank_list = []
id_rank_dict = {}
for page in range(1, 6):
if len(id_list) < 3:
base_url = 'https://www.qidian.com/rank/yuepiao?style=1&page={}'.format(page)
response = requests.get(base_url, headers=headers)
response.encoding = 'utf-8'
html = response.text
book_name = []
book_rank = []
htmls = BeautifulSoup(html, 'html.parser')
# xpath守则
for temp in htmls.find_all(class_='book-mid-info'):
tempname = temp.find('a')
book_name.append(tempname.string)
for temp in htmls.find_all(class_='book-img-box'):
tempnum = temp.span
book_rank.append(tempnum.next_element)
for i in book_list:
k = ''
j = 0
if i in book_name:
for k in book_name:
if k == i:
id_list.append(i)
rank_list.append(int(book_rank[j]))
break
else:
j += 1
elif len(id_list) == 3:
id_rank_dict = dict(zip(id_list, rank_list))
d_order = sorted(id_rank_dict.items(), key=lambda x: x[1], reverse=False)
d_order[0] = list(d_order[0])
d_order[1] = list(d_order[1])
d_order[2] = list(d_order[2])
d_order[0][1] = 1
d_order[1][1] = 2
d_order[2][1] = 3
id_rank_dict = dict(d_order)
return (id_rank_dict)
if __name__ == '__main__':
book_list = ["大奉打更人", "明克街13号","灵境行者"]
print(crawler(book_list))