python如何异步读取网页?

kenshu 2017-09-18 03:24:24
新手,谢谢!
我要等4个网页一起读完,再进行下一步,怎么做?

getHtml(ls_swap) #这里如何变成异步?


------------------------------
import urllib.request
import datetime
import time

total_return = 0

def getHtml(url):
global total_return

page = urllib.request.urlopen(url)
html = page.read()

print (html) #处理一些事情
return

def main(key1,key2):
global total_return
ls_site = ["www.1.com","www.2.com","www.3.com","www.4.com"]

now = int(time.time())
timeArray = time.localtime(now)
timeString = time.strftime("%Y%m%d%H%M%S", timeArray)

for i in range(0,len(ls_site)):
ls_swap = "http://" + ls_site[i] + "/?" + timeString
getHtml(ls_swap) #这里如何变成异步?

while total_return<4: #等待上面4个全部完成
time.sleep(1)

#进行下面的工作


...全文
340 3 打赏 收藏 转发到动态 举报
写回复
用AI写文章
3 条回复
切换为时间正序
请发表友善的回复…
发表回复
张强1990 2017-09-18
  • 打赏
  • 举报
回复
from concurrent.futures import ThreadPoolExecutor
import requests

urls = ['http://httpbin.org'] * 16
threadpool = ThreadPoolExecutor(max_workers=4)

def get_webpage(url):
    r = requests.get(url)
    print(r)
    return r

for url in urls:
    threadpool.submit(get_webpage, url)
kenshu 2017-09-18
  • 打赏
  • 举报
回复
非常感谢,结贴。
混沌鳄鱼 2017-09-18
  • 打赏
  • 举报
回复

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import urllib.request
import time
from threading import Thread


class ThreadWithResult(Thread):

    def __init__(self, func, args=()):
        super(ThreadWithResult, self).__init__()
        self.func = func
        self.args = args
        self.result = None

    def run(self):
        self.result = self.func(*self.args)

    def get_result(self):
        return self.result


def getHtml(url):
    try:
        print(url)
        page = urllib.request.urlopen(url, timeout=10)
        html = page.read()
        print(html)  # 处理一些事情
    except Exception:
        pass

    single_result = 1   # 前面的结果
    return single_result


def main(key1, key2):

    print(key1)
    print(key2)

    total_return = 0

    ls_site = ["www.1.com", "www.2.com", "www.3.com", "www.4.com"]
    time_string = time.strftime("%Y%m%d%H%M%S")
    threads = []
    for site in ls_site:
        url = "http://{}/?{}".format(site, time_string)
        t = ThreadWithResult(func=getHtml, args=(url,))
        t.start()
        threads.append(t)

    for t in threads:
        t.join()
        total_return += t.get_result()

    print(total_return)

if __name__ == '__main__':
    main('key1', 'key2')

37,719

社区成员

发帖
与我相关
我的任务
社区描述
JavaScript,VBScript,AngleScript,ActionScript,Shell,Perl,Ruby,Lua,Tcl,Scala,MaxScript 等脚本语言交流。
社区管理员
  • 脚本语言(Perl/Python)社区
  • IT.BOB
加入社区
  • 近7日
  • 近30日
  • 至今

试试用AI创作助手写篇文章吧