37,719
社区成员
发帖
与我相关
我的任务
分享
from datetime import datetime
from scrapy.spiders import CrawlSpider, Rule
from scrapy.shell import inspect_response
from tencent_app.items import TencentAppItem
import time
import scrapy
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
class Yingyongbao(CrawlSpider):
name = 'yingyongbao_app'
def __init__(self):
super(Yingyongbao,self).__init__()
bid = ['qq']
self.start_urls = ['http://sj.qq.com/myapp/search.htm?kw=%s' % i for i in bid]
self.url = 'http://sj.qq.com/myapp/detail.htm?apkName='
self.driver = webdriver.Firefox()
def parse(self,response):
url_set = set()
self.driver.get(response.url)
while True:
try:
wait = WebDriverWait(self.driver, 2)
wait.until(lambda driver: driver.find_element_by_xpath('/html/body/div[3]/div[3]/div[2]/a'))
#time.sleep(1)
next_page = self.driver.find_element_by_xpath('/html/body/div[3]/div[3]/div[2]/a/i')
next_page.click()
except:
break