# 导入urllib3模块
import urllib3
# 创建一个PoolManager对象,用于处理连接池和线程安全的所有详细信息
http = urllib3.PoolManager()
# 提出请求,请使用request()
r = http.request('GET','http://httpbin.org/robots.txt')
print(r.data) # b'User-agent: *\nDisallow: /deny\n'
import urllib3
import json
http = urllib3.PoolManager()
r = http.request('POST','https://httpbin.org/post',fields={'hello':'world'})
print(json.loads(r.data.decode('utf-8')))
status
,data
和headers
属性import urllib3
http = urllib3.PoolManager()
r = http.request('GET',"http://httpbin.org/ip")
# 响应状态
print(r.status) # 200
# 响应数据
print(r.data) # b'{\n "origin": "120.239.165.180"\n}\n'
# 响应头文件
print(r.headers) # HTTPHeaderDict({'Date': 'Fri, 22 Apr 2022 02:34:03 GMT', 'Content-Type': 'application/json', 'Content-Length': '34', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true'})
JSON内容可以通过解码和反序列化来加载。 data
请求的属性:
import json
import urllib3
http = urllib3.PoolManager()
r = http.request('GET','https://httpbin.org/ip')
json_data = json.loads(r.data.decode('utf-8'))
print(json_data) # {'origin': '120.239.165.180'}
data
响应的属性始终设置为表示响应内容的字节字符串
import urllib3
import json
http = urllib3.PoolManager()
r = http.request('GET','http://httpbin.org/bytes/8')
print(r.data) # b'\xa6\xe6\xb6\x7f\xc1\xd7\xbb9'
有时候你想用 io.TextIOWrapper 或类似的对象,如直接使用 HTTPResponse 数据。要使这两个接口很好地结合在一起,需要使用 auto_close 通过将其设置为 False 。默认情况下,读取所有字节后关闭HTTP响应,这将禁用该行为:
import io
import urllib3
http = urllib3.PoolManager()
r = http.request('GET', 'https://www.qq.com', preload_content=False)
r.auto_close = False
for line in io.TextIOWrapper(r):
print(line)
import urllib3
import json
http = urllib3.PoolManager()
r = http.request('GET','https://httpbin.org/headers',headers={'X-Something': 'value'})
json_data_headers = json.loads(r.data.decode('utf-8'))['headers']
print(json_data_headers)
A、为了 GET , HEAD 和 DELETE 请求,您可以简单地将参数作为字典传递到 fields 参数 request() 。
import urllib3
import json
http = urllib3.PoolManager()
r = http.request('GET', 'https://httpbin.org/get', fields={'get_key': 'get_value'})
json_data_args = json.loads(r.data.decode('utf-8'))['args']
print(json_data_args) # {'get_key': 'get_value'}
import urllib3
import json
http = urllib3.PoolManager()
r = http.request('POST', 'https://httpbin.org/post', fields={'post_key': 'post_value'})
json_data_form = json.loads(r.data.decode('utf-8'))['args']
print(json_data_form) # {}
B、为了 POST 和 PUT 请求时,需要在URL中手动编码查询参数。
import urllib3
import json
from urllib.parse import urlencode
http = urllib3.PoolManager()
encoded_args = urlencode({'arg': 'value'})
url = 'https://httpbin.org/post?' + encoded_args
r = http.request('POST', url)
print(json.loads(r.data.decode('utf-8'))['args']) # {'arg': 'value'}
为了 PUT 和 POST 请求时,urllib3将在 fields 参数提供给 request() :
import urllib3
import json
http = urllib3.PoolManager()
r = http.request('POST', 'https://httpbin.org/post', fields={'post_key': 'post_value'})
json_data_form = json.loads(r.data.decode('utf-8'))['form']
print(json_data_form) # {'post_key': 'post_value'}
import json
import urllib3
http = urllib3.PoolManager()
data = {'attribute': 'value'}
headers = {'Content-Type': 'application/json'}
encoded_data = json.dumps(data).encode('utf-8')
print(encoded_data) # b'{"attribute": "value"}'
r = http.request('POST', 'https://httpbin.org/post', body=encoded_data, headers=headers)
print(json.loads(r.data.decode('utf-8'))['json']) # {'attribute': 'value'}
import urllib3
import json
http = urllib3.PoolManager()
with open('example.txt', mode='r', encoding='utf-8') as fp:
file_data = fp.read()
r = http.request('POST', 'https://httpbin.org/post',
fields={
# 将文件字段指定为 (file_name, file_data)
'filefield': ('example.txt', file_data),
})
json_data_files = json.loads(r.data.decode('utf-8'))['files']
print(json_data_files) # {'filefield': '..文本文档中的内容,其中换行显示为“\n”..'}
import urllib3
import json
http = urllib3.PoolManager()
with open('example.txt', mode='r', encoding='utf-8') as fp:
file_data = fp.read()
r = http.request('POST', 'https://httpbin.org/post',
fields={
# 下面的第三个参数,指定文件的MIME类型
'filefield':('example.txt',file_data,'text/plain'),
})
json_data_files = json.loads(r.data.decode('utf-8'))['files']
print(json_data_files) # {'filefield': '..文本文档中的内容,其中换行显示为“\n”..'}
import urllib3
import json
http = urllib3.PoolManager()
with open('img001.jpg', mode='rb') as fp:
binary_data = fp.read()
r = http.request('POST', 'https://httpbin.org/post',
# 发送二进制文件时,只需指定body参数
body=binary_data,
headers={
# 在Content-Type,设置指定文件类型
'Content-type': 'image/jpeg',}
)
json_data = json.loads(r.data.decode('utf-8'))['data']
print(json_data)
import urllib3
http = urllib3.PoolManager()
# timeout 超过设置的4.0秒后,自动断开,并报错
r = http.request('GET', 'https://httpbin.org/delay/3', timeout=4.0)
print(r) # <urllib3.response.HTTPResponse>
# timeout 超过设置的2.5秒后,自动断开,并报错
r1 = http.request('GET', 'https://httpbin.org/delay/3', timeout=2.5)
print(r1) # MaxRetryError caused by ReadTimeoutError
# Python版本:3.6
# -*- coding:utf-8 -*-
import urllib3
http = urllib3.PoolManager()
r = http.request('GET', 'http://httpbin.org/delay/3', timeout=urllib3.Timeout(connect=1.0))
print(r) # <urllib3.response.HTTPResponse>
r1 = http.request('GET', 'http://httpbin.org/delay/3',
timeout=urllib3.Timeout(connect=1.0, read=2.0))
print(r1) # MaxRetryError caused by ReadTimeoutError
import urllib3
http = urllib3.PoolManager(timeout=3.0)
# 同上,请求受到相同的超时
http = urllib3.PoolManager(timeout=urllib3.Timeout(connect=1.0,read=2.0))
import urllib3
http = urllib3.PoolManager()
r = http.requests('GET', 'http://httpbin.org/ip', retries=10) # 重次10次
import urllib3
http = urllib3.PoolManager()
# NewConnectionError,新建连接错误,重试连接错误,因为链接不支持重试
# r = http.request('GET', 'http://nxdomain.example.com', retries=False)
# print(r.status) # NewConnectionError错误
r1 = http.request('GET', 'https://httpbin.org/redirect/1', retries=False)
print(r1.status) # 302
重定向没有一步到底会报错
比如:下方的代码 redirect=2 代表重定向2次,实际重定向还有1次,故报错
# Python版本:3.6
# -*- coding:utf-8 -*-
import urllib3
http = urllib3.PoolManager()
# Retry(5, redirect=3):总共重试 5 次,但仅限于 2 次重定向,没有重定向一步到底(位)
r = http.request('GET','http://httpbin.org/redirect/3',retries=urllib3.Retry(5, redirect=2))
print(r.status) # 报错,MaxRetryError
重定向一步到底
比如:下方的代码 redirect=3 代表重定向3次,实际重定向共3次,故没有报错
# Python版本:3.6
# -*- coding:utf-8 -*-
import urllib3
http = urllib3.PoolManager()
# Retry(5, redirect=3):总共重试 5 次,但仅限于 2 次重定向,没有重定向一步到底(位)
r = http.request('GET','http://httpbin.org/redirect/3',retries=urllib3.Retry(5, redirect=3))
print(r.status) # 200
分析上方的重定向
Retry(5, redirect=3):总共重试 5 次,但仅限于 3 次重定向
重定向由 http://httpbin.org/redirect/3 重定向到 http://httpbin.org/get,中间重定向3次,次数少了会报错
-------------------------------------
次数 重定向到(指定的网页)
1 http://httpbin.org/redirect/2
2 http://httpbin.org/redirect/1
3 http://httpbin.org/get
import urllib3
http = urllib3.PoolManager()
r = http.request('GET', 'http://httpbin.org/redirect/3',
retries=urllib3.Retry(redirect=2, raise_on_redirect=False))
print(r.status) # 302
import urllib3
# 禁用重试
http = urllib3.PoolManager(retries=False)
import urllib3
# 精细控制,分配重试总次数和重定向
http = urllib3.PoolManager(retries=urllib3.Retry(5, redirect=2))
import urllib3
http = urllib3.PoolManager()
try:
http.request('GET','nx.example.com',retries=False)
except urllib3.exceptions.NewConnectionError as e:
print('连接失败!',e)
更改 urllib3 记录器的日志级别
logging.getLogger("urllib3").setLevel(logging.WARNING)
import urllib3
# 向许多不同的主机发出请求,则增加此数量可能会提高性能,同时增加内存和套接字消耗
http = urllib3.PoolManager(num_pools=50)
import urllib3
# 二者选一
http = urllib3.PoolManager(maxsize=10)
http = urllib3.HTTPConnectionPool('https://cn.bing.com', maxsize=10)
在处理大型响应时,最好将响应内容流式传输
import urllib3
http = urllib3.PoolManager()
r = http.request(
'GET',
'https://httpbin.org/bytes/1024',
preload_content=False) # 预加载连接为False,即将http连接释放回连接池,以便重新使用
for chunk in r.stream(32): # stream()允许迭代响应内容的块
print(chunk)
import urllib3
http = urllib3.PoolManager()
response = http.request(
'GET',
'https://httpbin.org/bytes/1024',
preload_content=False)
print(response.read(4)) # b'\xae\x95\n\xc2'
read()
将阻塞,直到有更多响应数据可用import urllib3
import io
http = urllib3.PoolManager()
r = http.request(
'GET',
'https://httpbin.org/bytes/1024',
preload_content=False) # 预加载连接为False,即将http连接释放回连接池,以便重新使用
reader = io.BufferedReader(r, 8)
print(reader.read(4)) # b'\xcec\x1f\r'
# 释放连接
r.release_conn()
import urllib3
import json
import codecs
http = urllib3.PoolManager()
reader = codecs.getreader('utf-8')
r = http.request(
'GET',
'http://httpbin.org/ip',
preload_content=False)
print(json.load(reader(r))) # {'origin': '120.239.165.180'}