import scrapy
from scrapy.http import Request,FormRequest
class DbSpider(scrapy.Spider):
name = 'db'
allowed_domains = ['douban.com']
header = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"}
'''
start_urls = ['http://douban.com/']
'''
def start_request(self):
return [Request('https://accounts.douban.com/login',
callback=self.parse,
meta={
'cookiejar':1
}
)]
def parse(self, response):
url='https://accounts.douban.com/login'
print('此时没有验证码')
data={
"form_email":"746337002@qq.com",
"form_password":"fydxxn2055",
"redir":"https://www.douban.com/people/180702197/",
}
print("登陆中")
return [FormRequest.from_response(response,
meta={"cookiejar":response.meta["cookiejar"]},
headers=self.header,
formdata=data,
callback=self.next,
)]
def next(self,response):
print("此时已经登录完成并爬取了个人中心的数据")
title=response.xpath('/html/head/title/text()').extract()
note=response.xpath('//div[@class="note"]/text()]').extract()
print(title[0])
print(note[0])