有时候pdf太大不好处理,以下代码是将文件夹下面的所有pdf进行三等分
from PyPDF2 import PdfFileReader, PdfFileWriter
import os
def cut_pdf(pdf,diff,pages,count):#拆分pdf的函数,diff为页面数/等分份的值
pdf_writer = PdfFileWriter()#创建PdfFileWriter对象
if count == 2:#因为pdf可能不是三的倍数,所以最后的部分应该特殊处理。改变等分份数便需要改变这个值,例如四等分count == 3
for i in range(count*diff,pages):
pdf_writer.addPage(pdf.getPage(i))#将页面存储
else:
for i in range(count*diff,diff*(count+1)):
pdf_writer.addPage(pdf.getPage(i))##将页面存储
return pdf_writer
def get_pdfpath():#得到文件夹下的所有pdf的路径
pdfpath = []
pdfnames = os.walk('./data_pdf/')
for filepath,dirnames,filenames in pdfnames:
for filename in filenames:
pdfpath.append(os.path.join(filepath,filename))
filepath = './data_pdf/'
pdfpath = get_pdfpath(filepath)
for path in pdfpath:#遍历pdf路径的列表
pdf = PdfFileReader(path)#读取pdf
pages = len(pdf.pages)#获取页数
diff = round(pages/3)#向上取整
for i in range(0,3):#三等分所以是range(0,3)
pdf_writer = cut_pdf(pdf,diff,pages,i)
with open('{}_temp{}.pdf'.format(path,i+1),'w') as page_data:
pdf_writer.write(page_data)