37,742
社区成员
发帖
与我相关
我的任务
分享
#coding:utf-8
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import urllib2
import chardet
def getWebContent2(url):
print(url)
req = urllib2.urlopen(url, timeout=20)
content=req.read()
print("code:"+chardet.detect(content)['encoding'])
print(content)
if chardet.detect(content)['encoding']=='ISO-8859-2':
ucontent = unicode(content,'utf-8')
else:
ucontent = unicode(content,chardet.detect(content)['encoding'],'replace')
return ucontent
if __name__=="__main__":
print("Debug:test qs_getweb.py")
url="hao123.com"
response = getWebContent2("http://www.%s" % url)
print(response)
#coding:utf-8
import urllib2
import chardet
def getWebContent2(url):
print(url)
req = urllib2.urlopen(url, timeout=40)
content=req.read()
mychar=chardet.detect(content)
#print mychar
bianma=mychar['encoding']
if bianma == 'utf-8' or bianma == 'UTF-8':
#html=html.decode('utf-8','ignore').encode('utf-8')
ucontent=content
else :
#看hao123编码是gbk
ucontent=content.decode('gbk','ignore').encode('utf-8')
#html =html.decode('gb2312').encode('utf-8')
return ucontent
if __name__=="__main__":
print("Debug:test qs_getweb.py")
#url="hao123.com"
#response = getWebContent2("http://www.%s" % url)
response = getWebContent2("http://www.hao123.com")
print(response)