环境:win10+python 3.4版本,已调为默认uft-8编码。
源代码(
放其它人电脑上正常执行,在本机报错):
#!/usr/python3
import re
import urllib.request
def gethtml(url):
page=urllib.request.urlopen(url)
html=page.read()
return html
def getimg(html):
reg = r'src="(.*?\.jpg)"'
img=re.compile(reg)
html=html.decode('utf-8') #python3
imglist=re.findall(img,html)
x = 0
for imgurl in imglist:
urllib.request.urlretrieve(imgurl,'%s.jpg'%x)
x = x+1
html=gethtml("http://news.ifeng.com/a/20161115/50258273_0.shtml")
print(getimg(html))
报错内容:
UnicodeEncodeError: 'ascii' codec can't encode characters in position 65-69: ordinal not in range(128)
C:\Users\www55\PycharmProjects\untitled1\venv\Scripts\python.exe D:\python\Soft\pycharm2017pjb\pycharm-professional-2017\pycharm-professional-2017.3.4\helpers\pydev\pydev_run_in_console.py 56826 56827 D:/python/study/22.py
import sys; print('Python %s on %s' % (sys.version, sys.platform))
sys.path.extend(['C:\\Users\\www55\\PycharmProjects\\untitled1', 'D:/python/study'])
Running D:/python/study/22.py
Traceback (most recent call last):
File "D:\python\Soft\pycharm2017pjb\pycharm-professional-2017\pycharm-professional-2017.3.4\helpers\pydev\pydev_run_in_console.py", line 53, in run_file
pydev_imports.execfile(file, globals, locals) # execute the script
File "D:\python\Soft\pycharm2017pjb\pycharm-professional-2017\pycharm-professional-2017.3.4\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "D:/python/study/22.py", line 24, in <module>
print(getImg(html))
File "D:/python/study/22.py", line 20, in getImg
urllib.request.urlretrieve(imgurl,'%s.jpg'%x)
File "D:\python\IDE\lib\urllib\request.py", line 248, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "D:\python\IDE\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "D:\python\IDE\lib\urllib\request.py", line 526, in open
response = self._open(req, data)
File "D:\python\IDE\lib\urllib\request.py", line 544, in _open
'_open', req)
File "D:\python\IDE\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "D:\python\IDE\lib\urllib\request.py", line 1361, in https_open
context=self._context, check_hostname=self._check_hostname)
File "D:\python\IDE\lib\urllib\request.py", line 1318, in do_open
encode_chunked=req.has_header('Transfer-encoding'))
File "D:\python\IDE\lib\http\client.py", line 1239, in request
self._send_request(method, url, body, headers, encode_chunked)
File "D:\python\IDE\lib\http\client.py", line 1250, in _send_request
self.putrequest(method, url, **skips)
File "D:\python\IDE\lib\http\client.py", line 1117, in putrequest
self._output(request.encode('ascii'))
UnicodeEncodeError: 'ascii' codec can't encode characters in position 65-69: ordinal not in range(128)