37,720
社区成员
发帖
与我相关
我的任务
分享
user_agent = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"
headers = {"User-Agent":user_agent}
baseUrl = 'http://www.baidu.com/s'
word = search_str #搜索关键词
data = {'wd':word}
data = urllib.urlencode(data)
url = baseUrl+'?'+data
#伪装浏览器——加一个 user-agent
try :
qingqiu = urllib2.Request(url,headers=headers);
xaingying = urllib2.urlopen(qingqiu)
#筛选:
items = re.findall('(百度为您找到相关结果约)([0-9].*?)(个)',xaingying.read(), re.S)
for item in items:
str = item[1];
num = int(str.replace(',', ''),10)
if num >= 10000:
return num
else:
continue
# print xaingying.read()
except urllib2.URLError,e:
if hasattr(e,"code"):
print e.code
if hasattr(e,"reason"):
print e.reason
user_agent = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"
headers = {"User-Agent":user_agent}
baseUrl = 'http://www.baidu.com/s'
word = search_str #搜索关键词
data = {'wd':word}
data = urllib.urlencode(data)
url = baseUrl+'?'+data
#伪装浏览器——加一个 user-agent
try :
qingqiu = urllib2.Request(url,headers=headers);
xaingying = urllib2.urlopen(qingqiu, timeout=10) #设置超时时间
#筛选:
items = re.findall('(百度为您找到相关结果约)([0-9].*?)(个)',xaingying.read(), re.S)
for item in items:
str1 = item[1]; #str是关键字最好不要用它做变量名
num = int(str1.replace(',', ''),10)
if num >= 10000:
return num
else:
continue
# print xaingying.read()
except urllib2.URLError,e:
if hasattr(e,"code"):
print e.code
if hasattr(e,"reason"):
print e.reason
except Exception as e: #获取所有异常,如果你能明确代码只会有URLError的话 可以不写
print e