scrapy框架爬虫翻页问题

qq_44388663 2019-04-19 05:15:39

哪位大神帮我看看，我有scrapy框架里的rules规则爬虫，为什么翻不了页，爬出来的数据是空的，爬的是腾讯招聘网

下面是代码

from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule

class Txzp1Spider(CrawlSpider):
name = 'txzp1'
# allowed_domains = ['hr.tencent.com']
start_urls = ['https://hr.tencent.com/position.php?lid=&tid=&keywords=java&start=0#a']

rules = (
Rule(LinkExtractor(allow=r'position.php?lid=&tid=&keywords=java&start=\d#a'),follow=True),
Rule(LinkExtractor(allow=r'position_detail.php?id=\d+&keywords=java&tid=0&lid=0'),
callback="parse_detail", follow=False),
)

def parse_detail(self, response):
print("===========")
title = response.xpath("//tr[@class='h']/td/text()").get()
region = response.xpath("//tr[@class='c bottomline']/td[1]/text()").get()
position_type = response.xpath("//tr[@class='c bottomline']/td[2]/text()").get()
number = response.xpath("//tr[@class='c bottomline']/td[3]/text()").get()
duty = response.xpath(
"//table[@class='tablelist textl']//tr[@class='c'][1]//ul[@class='squareli']/li/text()").getall()
yaoqiu = response.xpath(
"//table[@class='tablelist textl']//tr[@class='c'][2]//ul[@class='squareli']/li/text()").getall()
item = {"title": title, "position_type": position_type, "number": number, "region": region, "duty": duty,
"yaoqiu": yaoqiu}
print(item)