37,743
社区成员




import requests
from bs4 import BeautifulSoup
import re
class CsdnHelper:
csdn_login_url = 'https://passport.csdn.net/account/login?ref=toolbar'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'DNT': '1',
'Referer': 'https://passport.csdn.net/account/login?ref=toolbar',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,ja;q=0.4',
}
def __init__(self):
self._session = requests.session()
self._session.headers = CsdnHelper.headers
def login(self, username, password):
form_data = self._prepare_login_form_data(username, password)
response = self._session.post(CsdnHelper.csdn_login_url, data=form_data)
print(response.text)
if 'UserNick' in response.cookies:
print(response.cookies['UserNick'])
else:
raise Exception('登录失败')
def _prepare_login_form_data(self, username, password):
response = self._session.get(CsdnHelper.csdn_login_url)
login_page = BeautifulSoup(response.text, 'lxml')
login_form = login_page.find('form', id='fm1')
lt = login_form.find('input', attrs={'name': 'lt'})['value']
execution = login_form.find('input', attrs={'name': 'lt'})['value']
eventId = login_form.find('input', attrs={'name': 'lt'})['value']
form = {
'username': username,
'password': password,
'lt': lt,
'execution': execution,
'_eventId': eventId
}
return form
def _get_blog_count(self):
blog_base_url = 'http://write.blog.csdn.net/postlist/'
response = self._session.get(blog_base_url)
blog_page = BeautifulSoup(response.text, 'lxml')
span = blog_page.find('div', class_='page_nav').span
pattern = re.compile(r'(\d+)条 共(\d*)页')
result = pattern.match(span.string)
blog_count = int(result.group(1))
page_count = int(result.group(2))
return (blog_count, page_count)
def print_blogs(self):
blog_count, page_count = self._get_blog_count()
for index in range(1, page_count + 1):
url = f'http://write.blog.csdn.net/postlist/0/0/enabled/{index}'
response = self._session.get(url)
page = BeautifulSoup(response.text, 'lxml')
links = page.find_all('a', href=re.compile(r'http://blog.csdn.net/u011054333/article/details/(\d+)'))
print(f'----------第{index}页----------')
for link in links:
blog_name = link.string
blog_url = link['href']
print(f'文章名称:{blog_name} 文章链接:{blog_url}')
if __name__ == '__main__':
csdn_helper = CsdnHelper()
username = XXX
password = XXX
csdn_helper.login(username, password)