37,719
社区成员
发帖
与我相关
我的任务
分享
<div class="c">
<a href="/u/">
user
</a>
[所需内容]
<span class="cc">
bbb
</span>
<span class="ct">
ccc
</span>
</div>
from bs4 import BeautifulSoup as bs
import bs4
html = """
<div class="c">
<a href="/u/">
user
</a>
[所需内容]<h2>title</h2>
<span class="cc">
bbb
</span>
<span class="ct">
ccc
</span>
</div>
"""
soup = bs(html, 'html.parser')
div = soup.find('div',class_='c')
all_contents = div.contents
IS_FIRST_a = True
IS_FIRST_span = True
index_a = 0
index_span = 0
for i, child in enumerate(all_contents):
print(i, '----', child)
if child.name == 'a' and IS_FIRST_a:
index_a = i
IS_FIRST_a = False
if child.name == 'span' and IS_FIRST_span:
index_span = i
IS_FIRST_span = False
print(index_a, index_span)
content = all_contents[index_a + 1:index_span]
print(content)
want_content = []
for text in content:
if type(text) is bs4.element.Tag:
want_content.append(text)
elif text.strip() != '':
want_content.append(text.strip())
print(want_content)
--------------------------------打印结果-------------------------------------
0 ----
1 ---- <a href="/u/">
user
</a>
2 ----
[所需内容]
3 ---- <h2>title</h2>
4 ----
5 ---- <span class="cc">
bbb
</span>
6 ----
7 ---- <span class="ct">
ccc
</span>
8 ----
1 5
['\n [所需内容]', <h2>title</h2>, '\n']
['[所需内容]', <h2>title</h2>]