read pdf by python

佟和安
2023-12-01
import pdfplumber
import re

file_name = "your_fileb.pdf"
path = r"input\\" + file_name
output = open(r"output\\" + file_name.replace("pdf", "csv"), 'w')
pdf = pdfplumber.open(path)

index = 0
for page in pdf.pages:
    index += 1
    print("page ", index)

    #print(page.extract_text())

    for pdf_table in page.extract_tables():
        for row in pdf_table:
            print(",".join(row).replace("\n", " "), file=output)

pdf.close()
output.close()
 类似资料:

相关阅读

相关文章

相关问答