37,719
社区成员
发帖
与我相关
我的任务
分享
import requests
from bs4 import BeautifulSoup
import urllib
from urllib import request
import time
import threading
from tkinter import *
import pymysql
from datetime import datetime
from tkinter.scrolledtext import ScrolledText
'''
数据库参数
'''
host = 'localhost'
user = 'root'
password = '123456'
database = 'txt'
db = pymysql.connect(host, user, password, database)
cursor = db.cursor()
'''
申请http头
'''
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh,zh-TW;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6',
'cache-control': 'max-age=0',
'cookie': 'UM_distinctid=1669ab46d60cf-0b7dbd5850197f-b79193d-1fa400-1669ab46d6176b; width=85%25; CNZZDATA1259955010=2006150613-1540191850-https%253A%252F%252Fwww.baidu.com%252F%7C1541933446; Hm_lvt_17077e8d2ddd3bade67fd92a3bcbbc9f=1541930887,1541931811,1541933115,1541933565; Hm_lpvt_17077e8d2ddd3bade67fd92a3bcbbc9f=1541933565',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36'
}
"""
爬取小说类
"""
class Downloader(object):
"""
构造函数
"""
def __init__(self, target):
self._target = target
"""
获取每一章的url
"""
def get_url(self):
req = requests.get(url=self._target)
bf = BeautifulSoup(req.text, 'html5lib')
content_url = bf.find('div', id='list')
bf_a = BeautifulSoup(str(content_url), 'html5lib')
a = bf_a.find_all('a')
return a
"""
获取每一章的内容
"""
def get_txt(self, content_url, title):
req = requests.get(url=content_url)
bf = BeautifulSoup(req.text, 'html.parser')
text_content = bf.find('div', id='content')
content = text_content.text.replace(' ', '\n\n ').replace(' ', '\n\n ').replace('\n\n', '\n')
txt = title + '\n' + content
return txt
"""
下载文件
"""
def write_txt(self, filename, txt):
with open(filename, 'a', encoding='utf-8') as f:
f.write(txt)
"""
根据名字搜索小说
"""
def search_novel(content):
content_code = urllib.request.quote(content) # 解决中文编码的问题
url = 'https://www.biquge5200.cc/modules/article/search.php?searchkey=' + content_code
r = requests.get(url, params=headers)
soup = BeautifulSoup(r.text, "html.parser")
for table in soup.findAll('table')[0:1]:
for trr in table.findAll('tr'):
for tdd in trr.findAll('td')[0:1]:
if tdd.a.string == content:
return tdd.a['href']
def downloading(content,d,a,i,filename):
txt = d.get_txt(a[i].get('href'), a[i].string)
d.write_txt(filename, txt)
"""
command事件函数
"""
def doit(var):
content = var
tk = Tk()
tk.title("下载进度")
tk.geometry('300x200')
text = ScrolledText(tk, font=('微软雅黑', 10), fg='blue')
text.grid()
tk.update()
now = datetime.now()
time_begin = datetime.strftime(now, '%Y-%m-%d %H:%M:%S')
target = search_novel(content)
filename = content + '.txt'
d = Downloader(target)
a = d.get_url()
length = len(a)
for i in range(9, length):
th = threading.Thread(target=downloading, args=(content,d,a,i,filename,))
th.setDaemon(True) # 守护线程
th.start()
text.insert(END, a[i].string+'ok' + '\n')
time.sleep(0.001)
now = datetime.now()
time_end = datetime.strftime(now, '%Y-%m-%d %H:%M:%S')
database(content,time_begin,time_end)
'''
数据库操作函数
'''
def database(name,time_begin,time_end):
cursor.execute("insert into text values ('%s','%s','%s')" % (name, time_begin, time_end))
cursor.connection.commit()
cursor.close()
db.close()
"""
主函数
"""
def main():
root = Tk()
root.title("TXT")
root.geometry('300x100')
Label(root, text="请输入小说名字:").grid(row=0, column=0)
var = StringVar()
e1 = Entry(root, textvariable=var)
e1.grid(row=0, column=1)
theButton1 = Button(root, text="开始", width=10, command=lambda: doit(e1.get())).grid(row=2, column=1)
root.mainloop()
"""
开始入口
"""
if __name__ == '__main__':
main()