37,736
社区成员
发帖
与我相关
我的任务
分享
def body(url1,url2):
try:
txt = urllib.urlopen(url1).read()
except Exception, e:
txt = urllib.urlopen(url2).read()
txt = unicode(txt,"gbk")
parser = html2txt()
parser.feed(txt)
parser.close()
return parser.text
def cn2juhao(txt):
p = re.compile(r'(。|!|……)')
p.sub('。\n',txt)
return txt
<type 'unicode'>
danju= cr2n(unicode(body(url1,url2),"gbk"))
TypeError: decoding Unicode is not supported