您的位置:首页 > 技术中心 > 其他 >

Python中aiohttp如何使用

时间:2023-05-11 10:38

1.定义

aiohttp 是一个基于 asyncio 的异步 HTTP 网络模块,它既提供了服务端,又提供了客户端

2.基本使用

import aiohttpimport asyncioasync def fetch(session, url):    # 声明一个支持异步的上下文管理器    async with session.get(url) as response:        # response.text()是coroutine对象 需要加await        return await response.text(), response.statusasync def main():    # 声明一个支持异步的上下文管理器    async with aiohttp.ClientSession() as session:        html, status = await fetch(session, 'https://cuiqingcai.com')        print(f'html: {html[:100]}...')        print(f'status: {status}')if __name__ == '__main__':    #  Python 3.7 及以后,不需要显式声明事件循环,可以使用 asyncio.run(main())来代替最后的启动操作    asyncio.get_event_loop().run_until_complete(main())

3.请求类型

session.post('http://httpbin.org/post', data=b'data')session.put('http://httpbin.org/put', data=b'data')session.delete('http://httpbin.org/delete')session.head('http://httpbin.org/get')session.options('http://httpbin.org/get')session.patch('http://httpbin.org/patch', data=b'data')

4.相应字段

print('status:', response.status) # 状态码print('headers:', response.headers)# 响应头print('body:', await response.text())# 响应体print('bytes:', await response.read())# 响应体二进制内容print('json:', await response.json())# 响应体json数据

5.超时设置

import aiohttpimport asyncioasync def main():   #设置 1 秒的超时    timeout = aiohttp.ClientTimeout(total=1)   async with aiohttp.ClientSession(timeout=timeout) as session:       async with session.get('https://httpbin.org/get') as response:           print('status:', response.status)if __name__ == '__main__':   asyncio.get_event_loop().run_until_complete(main())

6.并发限制

import asyncioimport aiohttp# 声明最大并发量为5CONCURRENCY = 5semaphore = asyncio.Semaphore(CONCURRENCY)URL = 'https://www.baidu.com'session = Noneasync def scrape_api():   async with semaphore:       print('scraping', URL)       async with session.get(URL) as response:           await asyncio.sleep(1)           return await response.text()    async def main():   global session   session = aiohttp.ClientSession()   scrape_index_tasks = [asyncio.ensure_future(scrape_api()) for _ in range(10000)]   await asyncio.gather(*scrape_index_tasks)if __name__ == '__main__':   asyncio.get_event_loop().run_until_complete(main())

7.实际应用

import asyncioimport aiohttpimport loggingimport jsonlogging.basicConfig(level=logging.INFO,                    format='%(asctime)s - %(levelname)s: %(message)s')INDEX_URL = 'https://dynamic5.scrape.center/api/book/?limit=18&offset={offset}'DETAIL_URL = 'https://dynamic5.scrape.center/api/book/{id}'PAGE_SIZE = 18PAGE_NUMBER = 100CONCURRENCY = 5semaphore = asyncio.Semaphore(CONCURRENCY)session = Noneasync def scrape_api(url):   async with semaphore:       try:           logging.info('scraping %s', url)           async with session.get(url) as response:               return await response.json()       except aiohttp.ClientError:           logging.error('error occurred while scraping %s', url, exc_info=True)async def scrape_index(page):   url = INDEX_URL.format(offset=PAGE_SIZE * (page - 1))   return await scrape_api(url)async def main():   global session   session = aiohttp.ClientSession()   scrape_index_tasks = [asyncio.ensure_future(scrape_index(page)) for page in range(1, PAGE_NUMBER + 1)]   results = await asyncio.gather(*scrape_index_tasks)   logging.info('results %s', json.dumps(results, ensure_ascii=False, indent=2))   if __name__ == '__main__':   asyncio.get_event_loop().run_until_complete(main())

以上就是Python中aiohttp如何使用的详细内容,更多请关注Gxl网其它相关文章!

热门排行

今日推荐

热门手游