github上下载到本地的代码如何执行

qq_32289115 2017-08-22 06:33:23
# -*- coding: utf-8 -*-

import time
import scrapy
import selenium
from selenium import webdriver
from selenium.webdriver.firefox.webdriver import FirefoxProfile
from amazon.items import AmazonItem

class AmazonSpider(scrapy.Spider):
num = 1
pages = 100

# start = (num - 1) * pages + 1
# end = num * pages + 1
start = 0 + 1
end = 1000 + 1

i = start

name = 'amazon_' + str(num)

def __init__(self):
self.start_urls =
self.allowed_domains =
# self.profile = FirefoxProfile('/home/romitas/.mozilla/firefox/68h3udd9.AmazonScraper')
profile = FirefoxProfile()
profile.set_preference('network.protocol-handler.external.mailto', False)
self.driver = webdriver.Firefox(self.profile)
self.driver_login()


def parse(self, response):

for reviewer in response.xpath('//tr/td/a'):
name = reviewer.xpath('b/text()').extract()
href = reviewer.xpath('@href').extract()

rev_url = 'http://www.amazon.com' + href

self.driver.get(rev_url)
rev_id = rev_url.split('/')
if rev_id == '':
rev_id = response.url.split('/')

email_xpath = '//a'
email = ''

try:
email_link = self.driver.find_element_by_xpath(email_xpath)
email_link.click()
time.sleep(1)
except:
email = '-'

sel = scrapy.Selector(text=self.driver.page_source)

if email != '-':
email = sel.xpath(email_xpath + '/text()').extract()
name = sel.xpath('//h1/text()').extract()

item = AmazonItem()
item = name
item = email

yield item

self.i += 1
if self.i <= self.end:
yield scrapy.Request('http://www.amazon.com/review/top-reviewers?page=' + str(self.i), callback=self.parse)

def driver_login(self):
login_url = "https://www.amazon.com/ap/signin?_encoding=UTF8&openid.assoc_handle=usflex&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.amazon.com%2Freview%2Ftop-reviewers%2F%3Fref_%3Dnav_signin"
self.driver.get(login_url)

login = self.driver.find_element_by_xpath('//input')
password = self.driver.find_element_by_xpath('//input')

submit = self.driver.find_element_by_xpath('//input')

# Sure won't work this way.
# In order to login you have to put your own Amazon email/password here
login.send_keys('<tr########com>')
password.send_keys('<######>')
submit.click()


def parse_reviewer(self):
time.sleep(5)
cur_url = self.driver.current_url
rev_id = cur_url.split('/')
if rev_id == '':
rev_id = response.url.split('/')

email_xpath = '//a'

email_link = self.driver.find_element_by_xpath(email_xpath)
email_link.click()

sel = scrapy.Selector(text=driver.page_source)

email = sel.xpath(email_xpath + '/text()').extract()
name = sel.xpath('//h1/text()').extract()

item = Amazon.Item()
item = name
item = email

yield item



这个出现了没有模块?
在哪里呢?
 RESTART: C:\Users\IBM\Desktop\AmazonReviewersScraper-master\amazon\spiders\amazon_spider_1.py 
Traceback (most recent call last):
File "C:\Users\IBM\Desktop\AmazonReviewersScraper-master\amazon\spiders\amazon_spider_1.py", line 8, in <module>
from amazon.items import AmazonItem
ModuleNotFoundError: No module named 'amazon'


怎么解决呢?
其他的
...全文
2395 2 打赏 收藏 转发到动态 举报
写回复
用AI写文章
2 条回复
切换为时间正序
请发表友善的回复…
发表回复
Steven·简谈 2019-05-21
  • 打赏
  • 举报
回复
看看项目有没有完整下载吧 如果是自己定义的模块,项目文件夹里面会有的
tianfang 2017-08-23
  • 打赏
  • 举报
回复
你不会把github的url贴出来吗? 让别人怎么找 https://github.com/Romitas/AmazonReviewersScraper 你把项目完整下载了吗? AmazonItem 就在items中定义的 https://github.com/Romitas/AmazonReviewersScraper/blob/master/amazon/items.py

37,720

社区成员

发帖
与我相关
我的任务
社区描述
JavaScript,VBScript,AngleScript,ActionScript,Shell,Perl,Ruby,Lua,Tcl,Scala,MaxScript 等脚本语言交流。
社区管理员
  • 脚本语言(Perl/Python)社区
  • IT.BOB
加入社区
  • 近7日
  • 近30日
  • 至今

试试用AI创作助手写篇文章吧