gzip — 读写 GNU zip 压缩文件
优质
小牛编辑
133浏览
2023-12-01
写压缩文件
# gzip_write.py
import gzip
import io
import os
outfilename = 'example.txt.gz'
with gzip.open(outfilename, 'wb') as output:
with io.TextIOWrapper(output, encoding='utf-8') as enc:
enc.write('Contents of the example file go here.\n')
print(outfilename, 'contains', os.stat(outfilename).st_size,
'bytes')
os.system('file -b --mime {}'.format(outfilename))
# gzip_compresslevel.py
import gzip
import io
import os
import hashlib
def get_hash(data):
return hashlib.md5(data).hexdigest()
data = open('lorem.txt', 'r').read() * 1024
cksum = get_hash(data.encode('utf-8'))
print('Level Size Checksum')
print('----- ---------- ---------------------------------')
print('data {:>10} {}'.format(len(data), cksum))
for i in range(0, 10):
filename = 'compress-level-{}.gz'.format(i)
with gzip.open(filename, 'wb', compresslevel=i) as output:
with io.TextIOWrapper(output, encoding='utf-8') as enc:
enc.write(data)
size = os.stat(filename).st_size
cksum = get_hash(open(filename, 'rb').read())
print('{:>5d} {:>10d} {}'.format(i, size, cksum))
# gzip_writelines.py
import gzip
import io
import itertools
import os
with gzip.open('example_lines.txt.gz', 'wb') as output:
with io.TextIOWrapper(output, encoding='utf-8') as enc:
enc.writelines(
itertools.repeat('The same line, over and over.\n',
10)
)
os.system('gzcat example_lines.txt.gz')
读压缩数据
# gzip_read.py
import gzip
import io
with gzip.open('example.txt.gz', 'rb') as input_file:
with io.TextIOWrapper(input_file, encoding='utf-8') as dec:
print(dec.read())
# gzip_seek.py
import gzip
with gzip.open('example.txt.gz', 'rb') as input_file:
print('Entire file:')
all_data = input_file.read()
print(all_data)
expected = all_data[5:15]
# rewind to beginning
input_file.seek(0)
# move ahead 5 bytes
input_file.seek(5)
print('Starting at position 5 for 10 bytes:')
partial = input_file.read(10)
print(partial)
print()
print(expected == partial)
处理流
# gzip_BytesIO.py
import gzip
from io import BytesIO
import binascii
uncompressed_data = b'The same line, over and over.\n' * 10
print('UNCOMPRESSED:', len(uncompressed_data))
print(uncompressed_data)
buf = BytesIO()
with gzip.GzipFile(mode='wb', fileobj=buf) as f:
f.write(uncompressed_data)
compressed_data = buf.getvalue()
print('COMPRESSED:', len(compressed_data))
print(binascii.hexlify(compressed_data))
inbuffer = BytesIO(compressed_data)
with gzip.GzipFile(mode='rb', fileobj=inbuffer) as f:
reread_data = f.read(len(uncompressed_data))
print('\nREREAD:', len(reread_data))
print(reread_data)