需求:asyncio异步下载30张图片 VS for循环下载30张图片
代码:
common.py
import os
import sys
import time
import random
import aiohttp
import requests
def read_txt_file(file_path, source_dir=DATA_DIR):
if not os.path.exists(file_path):
file_path = os.path.join(source_dir, file_path)
if not os.path.exists(file_path):
raise ValueError(f"file path does not exists. {file_path}")
contents = []
with open(file_path) as fp:
for line in fp.readlines():
contents.append(line.strip())
return contents
def download_image_to_local(img_url):
if not img_url:
return
parts = img_url.split('/')
image_name = parts[-1]
output_path = os.path.join(OUTPUT_DIR, image_name)
resp = requests.get(img_url)
with open(output_path, 'wb') as pf:
pf.write(resp.content)
async def download_image_to_local_v2(img_url):
if not img_url:
return
parts = img_url.split('/')
image_name = parts[-1]
output_path = os.path.join(OUTPUT_DIR, image_name)
resp = requests.get(img_url)
with open(output_path, 'wb') as pf:
pf.write(resp.content)
async def download_image_to_local_v3(img_url):
"""
异步下载
"""
async with aiohttp.ClientSession() as session:
async with session.get(img_url) as resp:
parts = img_url.split('/')
image_name = parts[-1]
output_path = os.path.join(OUTPUT_DIR, image_name)
content = await resp.content.read()
with open(output_path, 'wb') as pf:
pf.write(content)
main.py
import os
import sys
PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(PROJECT_DIR)
import time
import asyncio
from utils.common import (download_image_to_local, read_txt_file, download_image_to_local_v2,
download_image_to_local_v3)
async def download_images(images, task_name):
for img in images:
# print(f"img:{img} task:{task_name}")
download_image_to_local(img)
print(f"task {task_name}")
async def download_images_v2(images, task_name):
for idx, img in enumerate(images):
await download_image_to_local_v2(img)
print(f"task:{task_name}")
async def download_images_v3(images, task_name):
print(f"begin task:{task_name}")
await download_image_to_local_v3(images)
print(f"end task:{task_name}")
async def download_images_with_multi_coroutings(image_file_name="image_urls.txt"):
images = read_txt_file(image_file_name)
task_1 = asyncio.create_task(download_images(images[0:10], "task_1"))
task_2 = asyncio.create_task(download_images(images[10:20], "task_2"))
task_3 = asyncio.create_task(download_images(images[20:30], "task_3"))
print(f"begin time {time.strftime('%X')}")
await task_1
await task_2
await task_3
print(f"end time {time.strftime('%X')}")
async def download_images_with_multi_coroutings_v2(image_file_name="image_urls.txt"):
images = read_txt_file(image_file_name)
task_1 = asyncio.create_task(download_images_v2(images[0:10], "task_1"))
task_2 = asyncio.create_task(download_images_v2(images[10:20], "task_2"))
task_3 = asyncio.create_task(download_images_v2(images[20:30], "task_3"))
print(f"begin time {time.strftime('%X')}")
await task_1
await task_2
await task_3
print(f"end time {time.strftime('%X')}")
def download_images_with_forloop(image_file_name="image_urls.txt"):
images = read_txt_file(image_file_name)
print(f"begin time {time.strftime('%X')}")
for img in images:
download_images_to_local(img)
print(f"end time {time.strftime('%X')}")
async def download_images_with_multi_coroutings_v3(image_file_name="image_urls.txt"):
images = read_txt_file(image_file_name)
print(f"begin time {time.strftime('%X')}")
L = await asyncio.gather(
download_images_v3(images[0:10], "task_1"),
download_images_v3(images[10:20], "task_2"),
download_images_v3(images[20:30], "task_3"),
)
print(L)
print(f"end time {time.strftime('%X')}")
if __name__ == "__main__":
asyncio.run(download_images_with_multi_coroutings())
print("******************\n")
asyncio.run(download_images_with_multi_coroutings_v2())
asyncio.run(download_images_with_multi_coroutings_v3())
print("******************\n")
download_images_with_forloop()
备注:
测试结果:
只有download_images_with_multi_coroutings_v3这个函数真正用到了asyncio的并发,download_images_with_multi_coroutings和download_images_with_multi_coroutings_v2没有并发执行,实际上还是同步执行。查了一下,发现requests.get是个阻塞的方法,asyncio要实现异步编程,它的每个环节都必须是异步的才可以