37,717
社区成员
发帖
与我相关
我的任务
分享
found = False
for line in html.split('\n'):
if(found):
post_html = post_html+line #结果是只保留了最后一次的line值,前面的都没存上,后来在这行下边加了print,但print根本就不执行,但用debug跟踪,全是正常的,但就是一运行就不对了。
if(line.find('</table>')>-1):
break
if line.find('<td class="t_msgfont" id="postmessage')>-1:
post_html= line
found = True
import re
html=''
post_html=[]
with open('zz.html','r+') as f:
html=f.read()
pattern=re.compile('<td class="t_msgfont" id="postmessage.*?</table>',re.S)
items=re.findall(pattern,html)
for item in items:
post_html.append(item)
print(post_html)