关于pythonrequest库Max retries exceeded with url的问题

qq_33411563 2017-06-06 02:00:15

一个多线程批量下载煎蛋网上图片的爬虫，先上代码

import random

import requests

import threading

import Queue

import bs4

import time

class SpiderMan(threading.Thread):

    def __init__(self,queue):

        threading.Thread.__init__(self)

        self.queue=queue

    def run(self):

        while not self.queue.empty():

            page=self.queue.get()

            spider(page)

            time.sleep(0.1)

        else:

            print time.ctime()

def spider(page):

    print page



    url=("http://jandan.net/ooxx/page-%s#comments")%(page)

    print url

    myheader={

        'Host': 'jandan.net',

        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0',

        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',

         'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',

         'Accept-Encoding': 'gzip, deflate',

    }



    rr=requests.get(url,headers=myheader)

    mydom=bs4.BeautifulSoup(rr.content)

    #print mydom



    alist=mydom.find_all("img")

    for i in alist:

       DownloadJpg("http:"+i['src'],random.choice('abcdefghijklmn')+str(int(time.time())))



def main():

    queue=Queue.Queue()

    for i in range(1,96):

        queue.put(i)

    threadlist=[]

    threadnum=50

    for i in range(threadnum):

        t=SpiderMan(queue)

        threadlist.append(t)

        t.start()

    for i in threadlist:

        i.join()

def DownloadJpg(url,name):

    try:

        f = requests.get(url)

        print url

        with open(("img1\%s.gif")%(name), "wb") as code:

          code.write(f.content)

    except Exception,e:

        print str(e)

        pass





if __name__ == '__main__':

   main()