37,720
社区成员
发帖
与我相关
我的任务
分享
#coding:utf-8
import urllib2
import re
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent': user_agent }
class QSBK:
def getPage(self):
url='http://www.qiushibaike.com/hot/page/1'
request = urllib2.Request(url,headers = headers)
response = urllib2.urlopen(request)
return response.read()
def getContent(self):
pattern = r'<div class="content">(.*?)</div>'
page = self.getPage()
content = re.findall(pattern,page)
for c in content:
print(c[0])
qsbk = QSBK()
qsbk.getContent()
content = re.findall(pattern, page, re.DOTALL | re.IGNORECASE | re.MULTILINE)
for c in content:
print(c)