python脚本

为什么httpx比requests快？深度剖析Python HTTP客户端的性能之争

发表于 2025年05月01日阅读 534 评论 0

大家好，我是何三，80后老猿，独立开发者

在Python生态中，requests库长期占据HTTP客户端的主导地位，但近年来httpx异军突起，凭借其出色的性能表现赢得了众多开发者的青睐。今天我们就来深入探讨httpx比requests快的真正原因，并通过实际代码演示来验证这一结论。

同步与异步的本质区别

想象一下你在一家咖啡厅点单：requests就像是一个固执的服务员，每次只处理一个顾客的订单，必须等这个顾客完全拿到咖啡后才接待下一位；而httpx则像是一个高效的服务员团队，可以同时处理多个顾客的订单，在等待咖啡制作的过程中就能服务其他顾客。

import time
import requests
import httpx
import asyncio

# 同步请求示例
def sync_requests(urls):
    start = time.time()
    for url in urls:
        response = requests.get(url)
        print(f"获取 {url}，状态码: {response.status_code}")
    return time.time() - start

# 异步请求示例
async def async_httpx(urls):
    start = time.time()
    async with httpx.AsyncClient() as client:
        tasks = [client.get(url) for url in urls]
        responses = await asyncio.gather(*tasks)
        for response in responses:
            print(f"获取 {response.url}，状态码: {response.status_code}")
    return time.time() - start

# 测试URL列表
test_urls = [
    "https://www.example.com",
    "https://www.python.org",
    "https://www.deepseek.com",
    "https://httpbin.org/delay/1",
    "https://httpbin.org/delay/2"
]

# 执行同步请求
print("同步requests耗时:", sync_requests(test_urls))

# 执行异步请求
print("异步httpx耗时:", asyncio.run(async_httpx(test_urls)))

运行这段代码你会发现，httpx的异步版本完成所有请求的时间远少于requests的同步版本，特别是当URL中包含延迟较高的端点时（如httpbin.org/delay/2），差异更加明显。

为什么异步更快？深入I/O等待

计算机执行网络请求时，大部分时间都花在了等待网络传输上，CPU实际上是空闲的。同步请求就像是在火车站排队买票，即使前面的人正在查询复杂的行程，你也只能干等着；而异步请求则像是网上购票，提交请求后你可以去做其他事情，等结果返回时再处理。

import httpx
import asyncio

async def fetch_with_deepseek(query):
    async with httpx.AsyncClient() as client:
        # 调用DeepSeek API接口示例
        response = await client.post(
            "https://api.deepseek.com/v1/chat/completions",
            json={
                "model": "deepseek-chat",
                "messages": [{"role": "user", "content": query}]
            },
            headers={"Authorization": "Bearer YOUR_API_KEY"}
        )
        return response.json()

# 同时发起多个DeepSeek API请求
async def multi_deepseek_queries():
    queries = [
        "解释Python中的GIL",
        "如何优化Python性能",
        "异步编程有什么优势"
    ]
    tasks = [fetch_with_deepseek(query) for query in queries]
    return await asyncio.gather(*tasks)

# 执行多个DeepSeek查询
results = asyncio.run(multi_deepseek_queries())
for result in results:
    print(result['choices'][0]['message']['content'])

在这个DeepSeek API调用示例中，异步方式可以同时发起多个查询请求，而不必等待前一个完成，这在需要聚合多个API结果时尤其有用。

requests多线程 vs httpx异步

你可能会想：requests配合多线程不也能实现并发吗？确实可以，但两者的实现机制和资源消耗大不相同。

import requests
import threading
import time
import httpx
import asyncio

# requests多线程方式
def requests_with_threads(urls):
    start = time.time()
    threads = []
    results = []

    def fetch(url):
        response = requests.get(url)
        results.append((url, response.status_code))

    for url in urls:
        thread = threading.Thread(target=fetch, args=(url,))
        thread.start()
        threads.append(thread)

    for thread in threads:
        thread.join()

    for url, status in results:
        print(f"获取 {url}，状态码: {status}")

    return time.time() - start

# 测试对比
print("requests多线程耗时:", requests_with_threads(test_urls))
print("httpx异步耗时:", asyncio.run(async_httpx(test_urls)))

虽然多线程requests也能实现并发，但每个线程都需要独立的系统资源，线程切换也有开销。而httpx的异步模型在单个线程内通过事件循环管理所有请求，资源利用率更高，特别是在高并发场景下优势更明显。

正确使用asyncio的最佳实践

要充分发挥httpx的异步优势，必须正确使用asyncio。常见错误包括混用同步代码、不恰当的任务管理等。

# 正确的httpx异步使用方式
async def proper_async_fetch():
    # 使用同一个Client实例
    async with httpx.AsyncClient(timeout=httpx.Timeout(10.0)) as client:
        # 合理控制并发量
        semaphore = asyncio.Semaphore(5)  # 限制最大并发数为5

        async def limited_get(url):
            async with semaphore:
                response = await client.get(url)
                # 处理响应
                data = response.json() if 'application/json' in response.headers.get('content-type', '') else response.text
                return data

        urls = [f"https://httpbin.org/get?id={i}" for i in range(10)]
        tasks = [limited_get(url) for url in urls]
        results = await asyncio.gather(*tasks, return_exceptions=True)

        for result in results:
            if isinstance(result, Exception):
                print(f"请求失败: {result}")
            else:
                print(f"获取数据: {result.get('args', {}).get('id')}")

# 执行示例
asyncio.run(proper_async_fetch())

这段代码展示了几个最佳实践：使用上下文管理器管理Client实例、通过Semaphore控制并发量、正确处理异常、合理设置超时等。

性能对比的量化分析

为了更客观地比较性能，我们设计一个量化测试：

import statistics
import matplotlib.pyplot as plt

async def benchmark():
    test_url = "https://httpbin.org/delay/1"
    num_requests = 100

    # 测试同步requests
    sync_times = []
    for _ in range(5):
        start = time.time()
        for _ in range(num_requests):
            requests.get(test_url)
        sync_times.append(time.time() - start)

    # 测试多线程requests (10线程)
    thread_times = []
    for _ in range(5):
        start = time.time()
        threads = []
        for _ in range(num_requests):
            thread = threading.Thread(target=requests.get, args=(test_url,))
            thread.start()
            threads.append(thread)

        for thread in threads:
            thread.join()
        thread_times.append(time.time() - start)

    # 测试异步httpx
    async def run_async():
        async with httpx.AsyncClient() as client:
            tasks = [client.get(test_url) for _ in range(num_requests)]
            await asyncio.gather(*tasks)

    async_times = []
    for _ in range(5):
        start = time.time()
        asyncio.run(run_async())
        async_times.append(time.time() - start)

    # 输出结果
    print(f"同步requests平均耗时: {statistics.mean(sync_times):.2f}s")
    print(f"多线程requests平均耗时: {statistics.mean(thread_times):.2f}s")
    print(f"异步httpx平均耗时: {statistics.mean(async_times):.2f}s")

    # 绘制图表
    plt.bar(['同步requests', '多线程requests(10)', '异步httpx'], 
            [statistics.mean(sync_times), statistics.mean(thread_times), statistics.mean(async_times)])
    plt.ylabel('完成100个请求的总时间(s)')
    plt.title('HTTP客户端性能对比')
    plt.show()

asyncio.run(benchmark())