新人请教个网页爬虫request的问题

Andrew_2015_3 2017-02-24 12:42:50

from bs4 import BeautifulSoup
import requests
url_slave = 'https://www.tripadvisor.cn/Saves/631371'
headers = {
'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Mobile Safari/537.36',
'Cookie':'hideSmartbanner=1; ServerPool=C; TAUnique=%1%enc%3AxPLXK5CpiZ68z7U7iMMiGOyua9NW2XrUIuzNXcyLuPsBSxJpMbQxvQ%3D%3D; TASSK=enc%3AAK5KB5cv4kDFcQTFkBMH0CW9ntpuOi8Gn11qJjPCDuORv1Zf2Uvmzg%2FsH4SNCDX9v4idv8G462kSQxT9EeimD5QqI6AZRS2NLhiJgANTYJTL%2BeIBl3U4FeFkkq3kIZihZQ%3D%3D; TAPD=tripadvisor.cn; __gads=ID=20fe73b5b62b4ed5:T=1487854581:S=ALNI_MaTivfcJvaKPi6O_Mfnv4k9dadZBA; MobileLastViewedList=%1%%2FAttractions-g60763-Activities-New_York_City_New_York.html; bdshare_firstime=1487861859740; SecureLogin2=3.4%3AACsqYBxAwAUB6ukEbpLPR8fh%2BRQFGSnaPsIzQ5IXaotWCzid%2FViASGYzMJrxx10XRYDP0M%2BiVTofNMJn0Uy44pi8FjQYVM5fi%2BNfz%2FUxrDwR22kAiXSwdHkd6NSIZZsek%2F2UUomlQhI0%2BPNRHhSA24wSJ6N%2F8Lkk7hPyMcrE2Ni9vbm5np0cRxCW8s%2Bv7eUauq%2B8ZygYCsUNSt8f6s6UX9c%3D; TAAuth2=%1%3%3Ae27cdd8730d872e4411b9ed49a0ef850%3AAHz2H5de3LB%2FqzIqJ45VrnZ5CHVnknVZn3rCMbLxqyIb8%2FO6yOnPHTVmb1y9d8RM6uS%2Fk1BUNfaTiF7tO3UJZCdBAgd3%2FwReSUfCNjeBtxzkFWWAuTMMDl6bpeEFQQ4UeNyxPa2g3KfXdUgOxDOx8x2gwZxVcyiKJ15HlZ4D6kR%2FhLBP7pAWZ3sNptKoqEVxhFNhtuUD9u18yWQMDR8pvrd7mm%2FfkBL8FGNIR5M1TC9M; _jzqckmp=1; interstitialCounter=%1%-1; taMobileRV=%1%%7B%2210021%22%3A%5B9846506%2C548804%2C532140%5D%2C%2210028%22%3A%5B60763%5D%7D; TATravelInfo=V2*A.2*MG.-1*HP.2*FL.3*RVL.9846506_54l532132_54l548804_54l1687489_54l60763_54l532140_54; VRMCID=%1%V1*id.13091*llp.%2F*e.1488470625701; _smt_uid=58af07cc.178632d1; _jzqx=1.1487861953.1487865838.3.jzqsr=tripadvisor%2Ecn|jzqct=/attraction_review-g60763-d548804-reviews-new_york_university-new_york_city_new_york%2Ehtml.jzqsr=tripadvisor%2Ecn|jzqct=/; CommercePopunder=SuppressAll*1487865840756; CM=%1%HanaPersist%2C%2C-1%7CPremiumMobSess%2C%2C-1%7Ct4b-pc%2C%2C-1%7CHanaSession%2C%2C-1%7CRCPers%2C%2C-1%7CWShadeSeen%2C%2C-1%7CFtrPers%2C%2C-1%7CTheForkMCCPers%2C%2C-1%7CHomeASess%2C2%2C-1%7CPremiumSURPers%2C%2C-1%7CPremiumMCSess%2C%2C-1%7Csesscoestorem%2C%2C-1%7CCpmPopunder_1%2C1%2C1487948317%7CCCSess%2C%2C-1%7CCpmPopunder_2%2C1%2C-1%7CViatorMCPers%2C%2C-1%7Csesssticker%2C%2C-1%7C%24%2C%2C-1%7CPremiumORSess%2C%2C-1%7Ct4b-sc%2C%2C-1%7CMC_IB_UPSELL_IB_LOGOS2%2C%2C-1%7Cb2bmcpers%2C%2C-1%7CMC_IB_UPSELL_IB_LOGOS%2C%2C-1%7CPremMCBtmSess%2C%2C-1%7CPremiumSURSess%2C%2C-1%7CLaFourchette+Banners%2C%2C-1%7Csess_rev%2C%2C-1%7Csessamex%2C%2C-1%7Cperscoestorem%2C%2C-1%7CPremiumRRSess%2C%2C-1%7CSaveFtrPers%2C%2C-1%7CTheForkRRSess%2C%2C-1%7Cpers_rev%2C%2C-1%7CMetaFtrSess%2C%2C-1%7Cmds%2C%2C-1%7CRBAPers%2C%2C-1%7CWAR_RESTAURANT_FOOTER_PERSISTANT%2C%2C-1%7CFtrSess%2C%2C-1%7CHomeAPers%2C%2C-1%7CPremiumMobPers%2C%2C-1%7CRCSess%2C%2C-1%7CLaFourchette+MC+Banners%2C%2C-1%7Cbookstickcook%2C%2C-1%7Csh%2C%2C-1%7CLastPopunderId%2C137-1859-null%2C-1%7Cpssamex%2C%2C-1%7CTheForkMCCSess%2C%2C-1%7CCCPers%2C%2C-1%7CWAR_RESTAURANT_FOOTER_SESSION%2C%2C-1%7Cb2bmcsess%2C%2C-1%7CViatorMCSess%2C%2C-1%7CPremiumMCPers%2C%2C-1%7CPremiumRRPers%2C%2C-1%7CPremMCBtmPers%2C%2C-1%7CTheForkRRPers%2C%2C-1%7CSaveFtrSess%2C%2C-1%7CPremiumORPers%2C%2C-1%7CRBASess%2C%2C-1%7Cbookstickpers%2C%2C-1%7Cperssticker%2C%2C-1%7CMetaFtrPers%2C%2C-1%7C; TAReturnTo=%1%%2FSmartDeals-g60763-d532140-Shakespeare_Garden-New_York_City_New_York-Hotel-Deals.html; ki_t=1487854590010%3B1487854590010%3B1487865880428%3B1%3B12; ki_r=; _qzja=1.219742380.1487861952985.1487863792125.1487865838026.1487865838026.1487866834142..0.0.5.3; _qzjb=1.1487865838025.2.0.0.0; _qzjc=1; _qzjto=2.1.0; _jzqa=1.582079789026251500.1487861953.1487863792.1487865838.3; _jzqc=1; _jzqb=1.2.10.1487865838.1; TASession=%1%V2ID.1552E06EC220901F2DB370E127D4582D*SQ.103*MC.13091*LR.https%3A%2F%2Ftripadvisor%5C.woqu%5C.com%2Fcenter%2Forder*LP.%2F*LS.MobileSaves*GR.81*TCPAR.39*TBR.1*EXEX.22*ABTR.47*PPRP.40*PHTB.20*FS.21*CPU.41*HS.popularity*ES.popularity*AS.popularity*DS.5*SAS.popularity*FPS.oldFirst*TS.5F82355CD9B40773E273344DA1319BC9*LF.zhCN*FA.1*DF.0*IR.3*OD.zh*MS.-1*RMS.-1*FLO.60763*TRA.true*LD.532140; TAUD=LA-1487854566680-1*LG-12273513-2.0.F.*LD-12273514-.....; roybatty=TNI1625!AH5Fe2NE26OgdrJylTdT3bnfWZKGi11ZxM9MRUWw5IT5yUZSN3BJ99EvnKOVumbSEo%2BLWXJJAM3amQQAuROImJEU9%2BsuSHNy7EHKzdLrgOsJzJ%2F0WaGpRQKnO5Eo4M87azo%2FHzJLKfVnkDhsLb9Lvt89EmO31U9S25SQgef5HAAe%2C1; Hm_lvt_2947ca2c006be346c7a024ce1ad9c24a=1487854583; Hm_lpvt_2947ca2c006be346c7a024ce1ad9c24a=1487866844'
}
web_data = requests.get(url_slave,headers=headers)
print(web_data.text)

这个问题是获取某网页已登录后获取的html,为什么死活出来的html和网页上的对不上
...全文
103 点赞 收藏 1
写回复
1 条回复
切换为时间正序
当前发帖距今超过3年,不再开放新的回复
发表回复
屎克螂 2017-02-24
网站可能是认session不认cookie。你得找到登入接口 用程序登入 你的代码看起来像这样 s = requests.session() s.post('xxx/login', {'user':''}) s.get('xxx/index')
回复
相关推荐
发帖
脚本语言
创建于2007-08-27

3.7w+

社区成员

JavaScript,VBScript,AngleScript,ActionScript,Shell,Perl,Ruby,Lua,Tcl,Scala,MaxScript 等脚本语言交流。
申请成为版主
帖子事件
创建了帖子
2017-02-24 12:42
社区公告

CSDN 脚本语言社区接受专栏投稿(专栏会在顶部创建专属你的栏目),投稿需满足以下要求:

  • 脚本语言技术相关;
  • 文章持续更新,保持活跃;
  • 内容清晰明了,干货为主;
  • 文章排版有序,有条有理。

本社区开通招聘专栏,发布招聘信息请联系版主,发布者需要保证招聘信息真实有效,CSDN 平台和版主不对招聘内容负责!

联系方式:私聊版主、发送邮件、QQ联系等均可: