引入依赖
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jempbox</artifactId>
<version>1.8.11</version>
</dependency>
使用
try {
InputStream stream = httpUtil.getStream(url);
PDDocument helloDocument = PDDocument.load(stream);
PDFTextStripper textStripper = new PDFTextStripper();
System.out.println(textStripper.getText(helloDocument));
} catch (IOException e) {
logger.error("读取文件数据失败,参考信息:" + e.getMessage());
}
参考文章