443
社区成员
发帖
与我相关
我的任务
分享
pip install bs4
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# 2023-02-23
import requests
from bs4 import BeautifulSoup
# 获取C占首页【热点】资讯
url = 'https://www.csdn.net/?spm=1001.2100.3001.4476'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.50'
}
response = requests.get(url, headers=headers)
html = response.text
# 解析HTML代码
soup = BeautifulSoup(html, 'html.parser')
# 找到标题区域
article_tags = soup.select('.headswiper-item')
# 提取每篇文章的标题和链接
results = []
for tag in article_tags:
title_tags = tag.select('.headswiper-item > a')
if title_tags:
title = title_tags[0].get_text().strip()
link = title_tags[0]['href']
results.append((title, link))
for item in results:
print(f'{list(item)[0]}、{list(item)[1]}')