37,719
社区成员
发帖
与我相关
我的任务
分享
>>> req = urllib2.Request('http://www.tudou.com/programs/view/kS03BynGs8Q')
>>> req.add_header('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:5.0)')
>>> page = urllib2.urlopen(req)
>>> data = page.read()
>>> print data
ヒ
>>> print len(data)
7202
import urllib2
import StringIO
import gzip
url = 'http://www.tudou.com/programs/view/kS03BynGs8Q'
req = urllib2.Request(url)
req.add_header('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:5.0)')
response = urllib2.urlopen(req)
content = response.read()
response.close()
html = ""
if response.headers["Content-Encoding"] == 'gzip':
html = gzip.GzipFile(fileobj = StringIO.StringIO(content)).read()
else:
html = content
print html