missing 1 required positional argument :parser

qq_28615467 2017-10-08 10:56:44
按照python数据处理这本书的介绍利用slate.py正在对pdf文件内容进行解析,现在当运行cmd时 会出现这个错误 。
missing 1 required positional argument :parser

import sys
PYTHON_3 = sys.version_info[0] == 3
if PYTHON_3:
from io import StringIO
else:
from StringIO import StringIO
from pdfminer.pdfpage import PDFPage


from pdfminer.pdfparser import PDFParser
from pdfminer.pdfinterp import PDFResourceManager
from pdfminer.pdfinterp import PDFPageInterpreter as PI
from pdfminer.layout import LAParams
from pdfminer.converter import TextConverter
# the internal API has changed between versions upstream,
# allow both here..
try:
from pdfminer.pdfparser import PDFDocument
except ImportError:
from pdfminer.pdfdocument import PDFDocument
try:
from pdfminer.pdfparser import PDFPage
except ImportError:
from pdfminer.pdfpage import PDFPage
import utils

__all__ = ['PDF']

class PDFPageInterpreter(PI):
def process_page(self, page):
if hasattr(self, 'debug'):
if 1 <= self.debug:
print >>stderr, 'Processing page: %r' % page
(x0,y0,x1,y1) = page.mediabox
if page.rotate == 90:
ctm = (0,-1,1,0, -y0,x1)
elif page.rotate == 180:
ctm = (-1,0,0,-1, x1,y1)
elif page.rotate == 270:
ctm = (0,1,-1,0, y1,-x0)
else:
ctm = (1,0,0,1, -x0,-y0)
self.device.outfp.seek(0)
self.device.outfp.truncate(0)
self.device.begin_page(page, ctm)
self.render_contents(page.resources, page.contents, ctm=ctm)
self.device.end_page(page)
return self.device.outfp.getvalue()

class PDF(list):
def __init__(self, file, password='', just_text=1, check_extractable=True, char_margin=1.0, line_margin=0.1, word_margin=0.1):
self.parser = PDFParser(file)
self.laparams = LAParams(char_margin=char_margin, line_margin=line_margin, word_margin=word_margin)

if PYTHON_3:
self.doc = PDFDocument()
self.parser.set_document(self.doc)
self.doc.set_parser(self.parser)
self.doc.initialize(password)
else:
self.doc = PDFDocument(self.parser, password)

if not check_extractable or self.doc.is_extractable:
self.resmgr = PDFResourceManager()
self.device = TextConverter(self.resmgr, outfp=StringIO(), laparams=self.laparams)
self.interpreter = PDFPageInterpreter(
self.resmgr, self.device)

if PYTHON_3:
page_generator = self.doc.get_pages()
else:
page_generator = PDFPage.create_pages(self.doc)

for page in page_generator:
self.append(self.interpreter.process_page(page))
self.metadata = self.doc.info
if just_text:
self._cleanup()

def _cleanup(self):
"""
Frees lots of non-textual information, such as the fonts
and images and the objects that were needed to parse the
PDF.
"""
self.device = None
self.doc = None
self.parser = None
self.resmgr = None
self.interpreter = None

def text(self, clean=True):
"""
Returns the text of the PDF as a single string.
Options:

:clean:
Removes misc cruft, like lots of whitespace.
"""
if clean:
return utils.normalise_whitespace(''.join(self).replace('\n', ' '))
else:
return ''.join(self)





import slate

pdf = 'EN-FINAL Table 9.pdf'

with open(pdf) as f:
doc = slate.PDF(f)

for page in doc[:2]:
print (type(page))
...全文
1323 回复 打赏 收藏 转发到动态 举报
写回复
用AI写文章
回复
切换为时间正序
请发表友善的回复…
发表回复
Transformer发轫于NLP(自然语言处理),并跨界应用到CV(计算机视觉)领域。目前已成为深度学习的新范式,影响力和应用前景巨大。  本课程对Transformer的原理和PyTorch代码进行精讲,来帮助大家掌握其详细原理和具体实现。  原理精讲部分包括:注意力机制和自注意力机制、Transformer的架构概述、Encoder的多头注意力(Multi-Head Attention)、Encoder的位置编码(Positional Encoding)、残差链接、层规范化(Layer Normalization)、FFN(Feed Forward Network)、Transformer的训练及性能、Transformer的机器翻译工作流程。   代码精讲部分使用Jupyter Notebook对Transformer的PyTorch代码进行逐行解读,包括:安装PyTorch、Transformer的Encoder代码解读、Transformer的Decoder代码解读、Transformer的超参设置代码解读、Transformer的训练示例(人为随机数据)代码解读、Transformer的训练示例(德语-英语机器翻译)代码解读。相关课程: 《Transformer原理与代码精讲(PyTorch)》https://edu.csdn.net/course/detail/36697《Transformer原理与代码精讲(TensorFlow)》https://edu.csdn.net/course/detail/36699《ViT(Vision Transformer)原理与代码精讲》https://edu.csdn.net/course/detail/36719《DETR原理与代码精讲》https://edu.csdn.net/course/detail/36768《Swin Transformer实战目标检测:训练自己的数据集》https://edu.csdn.net/course/detail/36585《Swin Transformer实战实例分割:训练自己的数据集》https://edu.csdn.net/course/detail/36586《Swin Transformer原理与代码精讲》 https://download.csdn.net/course/detail/37045

37,720

社区成员

发帖
与我相关
我的任务
社区描述
JavaScript,VBScript,AngleScript,ActionScript,Shell,Perl,Ruby,Lua,Tcl,Scala,MaxScript 等脚本语言交流。
社区管理员
  • 脚本语言(Perl/Python)社区
  • IT.BOB
加入社区
  • 近7日
  • 近30日
  • 至今

试试用AI创作助手写篇文章吧