想用python爬新闻老是出错,python小白,求大神指教
# coding:utf-8
# 引入相关模块
import requests
from bs4 import BeautifulSoup
from requests_html import HTMLSession
url ='https://gy.house.ifeng.com//news'
wbdata = requests.get(url).text
# 对获取到的文本进行解析
soup = BeautifulSoup(wbdata,'lxml')
# 从解析文件中通过select选择器定位指定的元素,返回一个列表
news_titles = soup.select('body > div.w1180.mb30 > div.content.clearfix > div.newsList.clearfix.fl > div.newsDetail > a')
# 对返回的列表进行遍历
for n in news_titles:
# 提取出标题和链接信息
title = n.get_text()
link = n.get("href")
date = {'标题':"".join(title.split()),'链接':link}
date1={"".join(title.split())}
session = HTMLSession()
r = session.get(date1)
title1 = {r.html.find('body > div.w1180.mb30 > div.content.clearfix > div.article-content.fl > div.article > div.title', first=True)}
context1 = r.html.find('body > div.w1180.mb30 > div.content.clearfix > div.article-content.fl > div.article > div.content-info>p', first=True)
print(title1.text)
print(context1.text)
用这个代码取https://gy.house.ifeng.com//news新闻网的文章老是出错,python小白,求大神指教
错误代码:"C:\Program Files\Python38\python.exe" C:/Users/sikezx-all/PycharmProjects/PythonTest/Test1.py
Traceback (most recent call last):
File "C:\Program Files\Python38\lib\site-packages\requests\models.py", line 379, in prepare_url
scheme, auth, host, port, path, query, fragment = parse_url(url)
File "C:\Program Files\Python38\lib\site-packages\urllib3\util\url.py", line 392, in parse_url
return six.raise_from(LocationParseError(source_url), None)
File "<string>", line 3, in raise_from
urllib3.exceptions.LocationParseError: Failed to parse: {'前三季度全国累计新增减税降费超1.78万亿元10月30日,国家税务总局召开新闻发布会,介绍今年前三季度税务部门落实减税降费、组织税收收入、深化“放管服”改革、优化税收营商环境等情况人民日报2019-11-01企业创新联合科技作用'}
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/sikezx-all/PycharmProjects/PythonTest/Test1.py", line 71, in <module>
r = session.get(date1)
File "C:\Program Files\Python38\lib\site-packages\requests\sessions.py", line 546, in get
return self.request('GET', url, **kwargs)
File "C:\Program Files\Python38\lib\site-packages\requests\sessions.py", line 519, in request
prep = self.prepare_request(req)
File "C:\Program Files\Python38\lib\site-packages\requests\sessions.py", line 452, in prepare_request
p.prepare(
File "C:\Program Files\Python38\lib\site-packages\requests\models.py", line 313, in prepare
self.prepare_url(url, params)
File "C:\Program Files\Python38\lib\site-packages\requests\models.py", line 381, in prepare_url
raise InvalidURL(*e.args)
requests.exceptions.InvalidURL: Failed to parse: {'前三季度全国累计新增减税降费超1.78万亿元10月30日,国家税务总局召开新闻发布会,介绍今年前三季度税务部门落实减税降费、组织税收收入、深化“放管服”改革、优化税收营商环境等情况人民日报2019-11-01企业创新联合科技作用'}
Process finished with exit code 1