37,721
社区成员
发帖
与我相关
我的任务
分享
import lxml.html
import urllib
num=1
while(num<=5):
url = "http://www.sdzk.gov.cn/score/gaokao/2010/100804/Book"
final_url = "http://www.sdzk.gov.cn/score/gaokao/2010/100804/Book"+str(num)+".htm"
page = urllib.urlopen(final_url)
str = page.read().decode("gb2312")
doc = lxml.html.fromstring(str)
name_ele = doc.cssselect("td")
i = 26
plan = 0
post = 0
while(i<len(name_ele)):
if((i-26)%11==0):
try:
plan = int(name_ele[i+6].text)
post = int(name_ele[i+8].text)
print "%s,%d" % (name_ele[i].text,plan-post)
except TypeError:
print "None"
i = i+1
num = num+1
try:
final_url = "http://www.sdzk.gov.cn/score/gaokao/2010/100804/Book"+str(num)+".htm"
except ValueError:
final_url = "http://www.sdzk.gov.cn/score/gaokao/2010/100804/Book"+num+".htm"