python 用mulitiprocessing多线程爬虫出现这个问题是为什么

x894724813 2018-10-10 01:21:42
多进程爬虫出现TypeError: cannot serialize '_io.BufferedReader' object


run.py 调用main.py:


import os
import sys
# 配置环境变量
BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, BASE_PATH)

if __name__ == '__main__':
from qianchengwuyou.core.spider.main import main
main()


main.py:

import multiprocessing
from qianchengwuyou.core.spider.log import flush_log # 清空日志
from qianchengwuyou.core.spider.search import search # 爬虫主程序
from qianchengwuyou.core.db.mysql import Create, Select # 数据库
from qianchengwuyou.core.analy.analy import Df # 数据分析

# 进程锁
lock = multiprocessing.Lock()

# 多进程。参数:函数、关键词、并发量
def concurrence(funct, kw, step):
# 连接数据库
db = Create(kw)
pool = [] # 进程池
for offset in range(step): # offset是页码偏移量
p = multiprocessing.Process(target=funct, args=(db, offset, step))
pool.append(p)
for p in pool:
p.start()
for p in pool:
p.join()

def analy(kw):
db = Select(kw)
table = db.select_all()
df = Df(table, kw)
df.write()

def main():
# 爬虫前清空日志
flush_log()
# 关键词
kw = input('关键词: ').strip()
print('1、爬虫\n2、分析\n3、爬虫+分析')
n = input('选择数字').strip()
if n == '1':
num = int(input('进程数').strip())
concurrence(search, kw, num)
elif n == '2':
analy(kw)
elif n == '3':
num = int(input('进程数').strip())
concurrence(search, kw, num)
analy(kw)
else:
print('退出')






出错原因:

C:\Users\xiach\PycharmProjects\python3\py\Scripts\python.exe C:/Users/xiach/PycharmProjects/python3/qianchengwuyou/launch/run.py
关键词: java
1、爬虫
2、分析
3、爬虫+分析
选择数字1
进程数1
连接成功

create table java(
url char(255) primary key,
post char(255) not null,
cp_name char(255) not null,
workplace char(255),
salary char(255),
kwd char(255),
exp char(255),
edu char(255),
cp_type char(255),
cp_scale char(255),
welfare char(255),
detail text(65535),
date_time timestamp
);

Traceback (most recent call last):
File "C:/Users/xiach/PycharmProjects/python3/qianchengwuyou/launch/run.py", line 11, in <module>
main()
File "C:\Users\xiach\PycharmProjects\python3\qianchengwuyou\core\spider\main.py", line 38, in main
concurrence(search, kw, num)
File "C:\Users\xiach\PycharmProjects\python3\qianchengwuyou\core\spider\main.py", line 19, in concurrence
p.start()
File "C:\Users\xiach\Anaconda3\lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "C:\Users\xiach\Anaconda3\lib\multiprocessing\context.py", line 212, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Users\xiach\Anaconda3\lib\multiprocessing\context.py", line 313, in _Popen
return Popen(process_obj)
File "C:\Users\xiach\Anaconda3\lib\multiprocessing\popen_spawn_win32.py", line 66, in __init__
reduction.dump(process_obj, to_child)
File "C:\Users\xiach\Anaconda3\lib\multiprocessing\reduction.py", line 59, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot serialize '_io.BufferedReader' object

Process finished with exit code 1
...全文
242 1 打赏 收藏 转发到动态 举报
写回复
用AI写文章
1 条回复
切换为时间正序
请发表友善的回复…
发表回复
x894724813 2018-10-10
  • 打赏
  • 举报
回复
我用的pycharm 和Anaconda3 ,python版本3.7

37,720

社区成员

发帖
与我相关
我的任务
社区描述
JavaScript,VBScript,AngleScript,ActionScript,Shell,Perl,Ruby,Lua,Tcl,Scala,MaxScript 等脚本语言交流。
社区管理员
  • 脚本语言(Perl/Python)社区
  • IT.BOB
加入社区
  • 近7日
  • 近30日
  • 至今

试试用AI创作助手写篇文章吧