当前位置: 首页 > 工具软件 > PDFBox > 使用案例 >

pdf读取(PDFBox)

商畅
2023-12-01

package com.tanling.act;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;

import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;

public class PdfReader {
    public PdfReader(){}
    /**
     * 得到pdf文本
     * @param filePath    文件路径
     * @return
     *
     * @author 谭领
     */
    public String getTextFromPDF(String filePath) {//@date 2008-8-22
        //pdf文本
        String result = null;
        //pdf输入
        FileInputStream is = null;  
        PDDocument document = null;  
        try {  
            is = new FileInputStream(filePath);  
            PDFParser parser = new PDFParser(is);  
            parser.parse();  
            document = parser.getPDDocument();  
            PDFTextStripper stripper = new PDFTextStripper();  
            result = stripper.getText(document);  
        } catch (FileNotFoundException e) {  
            // TODO Auto-generated catch block  
            e.printStackTrace();  
        } catch (IOException e) {  
            // TODO Auto-generated catch block  
            e.printStackTrace();  
        } finally {  
            if (is != null) {  
                try {  
                    is.close();  
                } catch (IOException e) {  
                    // TODO Auto-generated catch block  
                    e.printStackTrace();  
                }  
            }  
            if (document != null) {  
                try {  
                    document.close();  
                } catch (IOException e) {  
                    // TODO Auto-generated catch block  
                    e.printStackTrace();  
                }  
            }  
        }  
        return result;  
    }  
}

 类似资料: