37,719
社区成员
发帖
与我相关
我的任务
分享
import urllib
import urllib2
import cookielib
import re
CSRF_TOKEN_RE = '<input type="hidden" name="_CSRF_TOKEN" value="(.*?)" />'
#url='https://www.gldpcms.gov.hk/etb_prod/jsp_public/cn/scn00203.jsp?CONTRACT_AWARD_NOTICE_AWARD_ID=13425'
def readSecondPage(CSRF_TOKEN_ID):
print CSRF_TOKEN_ID
url = 'https://www.gldpcms.gov.hk/etb_prod/jsp_public/cn/scn00202.jsp?_CSRF_TOKEN='+CSRF_TOKEN_ID[0]+'&ACTION=&MODE=&BO_STATE=0&UPDATE_MODE=&NAV_ID=&WINDOW_NAME=&LAST_ACTION=&NAVLINK_REDIRECT=&URL_USED=&SHOW_REFINE_SEARCH=Y&SHOW_ADVANCED_SEARCH=Y'
request = urllib2.Request(url)
response = urllib2.urlopen(request)
print response.read()
if __name__ == "__main__":
firsturl = "https://www.gldpcms.gov.hk/etb_prod/jsp_public/cn/scn00201.jsp"
cj = cookielib.LWPCookieJar()
cookie_support = urllib2.HTTPCookieProcessor(cj)
opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler)
urllib2.install_opener(opener)
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31',
'Referer' : 'https://www.gldpcms.gov.hk/etb_prod/jsp_public/cn/scn00201.jsp'
}
request = urllib2.Request(firsturl, None, headers)
response = urllib2.urlopen(request)
page = response.read()
CSRF_TOKEN_ID = re.findall(CSRF_TOKEN_RE, page)
readSecondPage(CSRF_TOKEN_ID)