37,721
社区成员
发帖
与我相关
我的任务
分享
<body>
<div class="nav">fdsafkdsajkfdslajkfldsajfklsdajlfdslafjdsalfjdskalfkdsajlkfds</div>
<div class="content">
<p>fdaskfdska</p>
<p>fdaskfdska</p>
<p>fdaskfdska</p>
<p>fdaskfdska</p>
<p>fdaskfdska</p>
<!--匹配class 为content里面的所有内容[包括标签]-->
</div>
<div class="bottom">fdsafkdsajkfdslajkfldsajfklsdajlfdslafjdsalfjdskalfkdsajlkfds</div>
</body>
'''
从本地html中筛选出内容部分
@path 文件路径
'''
def get_qsykcontent(path):
with open(path,"rb") as rsread:
kisa=rsread.read().decode('utf8')
pasn=re.compile('(?<=class="content").*')#这一句报错
result=pasn.findall(kisa)
print result
<body>
<div class="nav">fdsafkdsajkfdslajkfldsajfklsdajlfdslafjdsalfjdskalfkdsajlkfds</div>
<div class="content">
<p>fdaskfdska</p>
<p>fdaskfdska</p>
<p>fdaskfdska</p>
<p>fdaskfdska</p>
<p>fdaskfdska</p>
<!--匹配class 为content里面的所有内容[包括标签]-->
</div><!--只匹配到这个地方-->
<div class="bottom">fdsafkdsajkfdslajkfldsajfklsdajlfdslafjdsalfjdskalfkdsajlkfds</div>
</body>
谢谢
Str = '''
<body>
<div class="nav">fdsafkdsajkfdslajkfldsajfklsdajlfdslafjdsalfjdskalfkdsajlkfds</div>
<div class="content">
<p>fdaskfdska</p>
<p>fdaskfdska</p>
<p>fdaskfdska</p>
<p>fdaskfdska</p>
<p>fdaskfdska</p>
</div>
<div class="bottom">fdsafkdsajkfdslajkfldsajfklsdajlfdslafjdsalfjdskalfkdsajlkfds</div>
</body>
'''
for one in Str.split("</div>"):
index = one.find('''<div class="content">''')
if index >= 0:
print one
>>>
<div class="content">
<p>fdaskfdska</p>
<p>fdaskfdska</p>
<p>fdaskfdska</p>
<p>fdaskfdska</p>
<p>fdaskfdska</p>
with open(path,"rb") as rsread: kisa=rsread.read().decode('utf8') pasn=re.compile('(?<=class="content").*')#这一句报错 result=pasn.findall(kisa)