所需要的maven汇总:
<repositories> <repository> <id>com.e-iceblue</id> <url>https://repo.e-iceblue.cn/repository/maven-public/</url> </repository> </repositories> <dependencies> <dependency> <groupId>net.sourceforge.jexcelapi</groupId> <artifactId>jxl</artifactId> <version>2.6.12</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.15</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.15</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.15</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.15</version> </dependency> <dependency> <groupId>e-iceblue</groupId> <artifactId>spire.presentation.free</artifactId> <version>3.9.0</version> </dependency> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.4</version> </dependency> <dependency> <groupId>org.projectlombok</groupId> <artifactId>lombok</artifactId> <version>1.18.22</version> </dependency> </dependencies>
Pptx:
import com.spire.presentation.Presentation;
import content.Content;
import org.apache.poi.sl.usermodel.Shape;
import org.apache.poi.sl.usermodel.Slide;
import org.apache.poi.sl.usermodel.SlideShow;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.poi.xslf.usermodel.XSLFTable;
import org.apache.poi.xslf.usermodel.XSLFTableCell;
import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.*;
public class ReadPptx {
public static String PPTXutil(String fileName, int limit,String readPhoto) {
Presentation ppt = new Presentation();
InputStream is=null;
SlideShow slideShow = null;
StringBuilder sb = new StringBuilder();
String str = "";
try {
is = new FileInputStream(fileName);
slideShow = new XMLSlideShow(is);
FileInputStream in = new FileInputStream(fileName);
XMLSlideShow xmlSlideShow = new XMLSlideShow(in);
List<XSLFSlide> slides = xmlSlideShow.getSlides();
for (XSLFSlide slide : slides) {
CTSlide rawSlide = slide.getXmlObject();
CTGroupShape gs = rawSlide.getCSld().getSpTree();
CTShape[] shapes = gs.getSpArray();
String next = "!!";
sb.append(next);
for (CTShape shape : shapes) {
CTTextBody tb = shape.getTxBody();
String nextline = " ";
sb.append(nextline);
if (null == tb) {
continue;
}
CTTextParagraph[] paras = tb.getPArray();
for (CTTextParagraph textParagraph : paras) {
CTRegularTextRun[] textRuns = textParagraph.getRArray();
for (CTRegularTextRun textRun : textRuns) {
sb.append(textRun.getT());
}
}
}
}
sb.append("\n");
sb.append("表格:");
if (slideShow != null) {
for (Slide slide : (List<Slide>) slideShow.getSlides()) {
List shapes = slide.getShapes();
for (int i = 0; i < shapes.size(); i++) {
Shape shape = (Shape) shapes.get(i);
if (shape instanceof XSLFTable) {
int rowSize = ((XSLFTable) shape).getNumberOfRows();
int columnSize = ((XSLFTable) shape).getNumberOfColumns();
for (int rowNum = 0; rowNum < rowSize; rowNum++) {
for (int columnNum = 0; columnNum < columnSize; columnNum++) {
XSLFTableCell cell = ((XSLFTable) shape).getCell(rowNum, columnNum);
String text = cell.getText();
String line=" ";
sb.append(text);
sb.append(line);
}
sb.append(",");
}
}
}
}
}
sb.delete(0, 3);
sb.delete((sb.length()-1),sb.length());
str = sb.toString();
if (limit==0){
System.out.println(str);
}
if (limit<str.length()){
String string = str.substring(0,limit);
System.out.println(string);
}
xmlSlideShow.close();
ppt.loadFromFile(fileName);
for (int i = 0; i < ppt.getImages().getCount(); i++) {
BufferedImage image = ppt.getImages().get(i).getImage();
ImageIO.write(image, "PNG", new File(String.format(readPhoto + "PPTX"+"photo%1$s.jpg", i)));
}
} catch (FileNotFoundException e) {
} catch (Exception e) {
} finally {
try {
if (slideShow != null) {
slideShow.close();
}
if (is != null) {
is.close();
}
} catch (IOException e) {
}
}
return str;
}
private static void decoderBase64File(Map<String, String> map) {
}
public static void main(String[] args) {
//fileName读取文件路径,reaePhoto为保存读取的图片路径
PPTXutil("E://ss//666.pptx",0,"src//main//java//photo//");
}
}
Ppt:
import com.spire.presentation.Presentation;
import content.Content;
import org.apache.poi.hslf.extractor.PowerPointExtractor;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
public class ReadPpt {
public static String PPTutil( String fileName,int limit,String readPhoto) {
String str="";
PowerPointExtractor test = null;
InputStream in = null;
try {
in = new FileInputStream(fileName);
test = new PowerPointExtractor(in);
str = test.getText();
if (limit == 0) {
System.out.println(str);
} else {
str = str.substring(0, limit);
System.out.println(str);
}
Presentation photo = new Presentation();
photo.loadFromFile(fileName);
for (int i = 0; i < photo.getImages().getCount(); i++) {
BufferedImage image = photo.getImages().get(i).getImage();
ImageIO.write(image, "PNG", new File(String.format(readPhoto +"ppt"+"photo%1$s.jpg", i)));
}
}catch (Exception e) {
e.printStackTrace();
} finally {
try {
test.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return str;
}
public static void main(String[] args) throws IOException {
//fileName读取文件路径,reaePhoto为保存读取的图片路径
PPTutil("E:\\ss\\555.ppt",0,"src//main//java//photo//");
}
}
Doc:
import content.Content;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class ReadDoc {
public static String wordutil(String fileName, int limit,String readPhoto){
String str = "";
InputStream input = null;
Range range = null;
HWPFDocument doc = null;
List<Picture> pictsList = null;
try {
input = new FileInputStream(new File(fileName));
WordExtractor wex = new WordExtractor(input);
str = wex.getText();
if (limit==0){
System.out.println(str);
}else {
str = str.substring(0,limit);
System.out.println(str);
}
doc = new HWPFDocument(new FileInputStream(fileName));
range = doc.getRange();
pictsList = new ArrayList();
byte[] dataStream = doc.getDataStream();
int numChar = range.numCharacterRuns();
PicturesTable pTable = new PicturesTable(doc, dataStream, new byte[1024]);
for (int j = 0; j < numChar; ++j) {
CharacterRun cRun = range.getCharacterRun(j);
boolean has = pTable.hasPicture(cRun);
if (has) {
Picture picture = pTable.extractPicture(cRun, true);
pictsList.add(picture);
}
}
int size = pictsList.size();
for (int i = 0; i < size; ++i) {
Picture p = pictsList.get(i);
p.writeImageContent(new FileOutputStream(readPhoto+"/" +"DocPhoto"+ i+".jpg"));
}
} catch (Exception e) {
e.printStackTrace();
}
return str;
}
public static void main(String[] args) {
//fileName读取文件路径,reaePhoto为保存读取的图片路径
wordutil("E:\\ss\\333.doc",0,"src/main/java/photo");
}
}
Docx:
package word;
import content.Content;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
public class ReadDocx{
public static String wordutil(String fileName,int limit,String readPhoto){
File file = new File(fileName);
String str = "";
try {
FileInputStream fis = new FileInputStream(file);
XWPFDocument xdoc = new XWPFDocument(fis);
XWPFWordExtractor extractor = new XWPFWordExtractor(xdoc);
String doc1 = extractor.getText();
fis.close();
str = doc1;
if (limit==0){
System.out.println(str);
}else {
String string = str.substring(0,limit);
System.out.println(string);
}
List<XWPFPictureData> picList = xdoc.getAllPictures();
for (int i = 0; i < picList.size(); i++) {
XWPFPictureData pic = picList.get(i);
byte[] bytev = pic.getData();
if (bytev.length > 300) {
FileOutputStream fos = new FileOutputStream(readPhoto+"Docx" + "photo"+i+".jpg");
fos.write(bytev);
}
}
fis.close();
} catch (Exception e) {
e.printStackTrace();
}
return str;
}
public static void main(String[] args) {
//fileName读取文件路径,reaePhoto为保存读取的图片路径
wordutil("E:\\ss\\222.Docx", 0,"src/main/java/photo/");
}
}
Xlsx:
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
public class XlsxReader {
public static String searchExcelXlsx(String fileUrl){
String str = new String();
List<String> list = new ArrayList<>();
File file = new File(fileUrl);
try {
InputStream is =new FileInputStream(file.getAbsolutePath());
XSSFWorkbook hb = new XSSFWorkbook(is);
System.out.println("表单数量:"+hb.getNumCellStyles());
for (int i = 0; i < hb.getNumCellStyles(); i++) {
Sheet sheet = hb.getSheetAt(i);
int firstrow = sheet.getFirstRowNum();
int lastrow = sheet.getLastRowNum();
for (int j = firstrow; j < lastrow + 1; j++) {
Row row = sheet.getRow(j);
list.add("\n");
if (row != null) {
int firstcell = row.getFirstCellNum();
int lastcell = row.getLastCellNum();
for (int k = firstcell; k < lastcell; k++) {
Cell cell = row.getCell(k);
if (cell != null) {
list.add(cell.toString());
list.add("|");
}
}
}
}
}
str= String.valueOf(list);
str = str.replace(",","");
System.out.println(str);
} catch (Exception e) {
e.printStackTrace();
}
return str;
}
public static void main(String[] args) {
//fileName读取文件路径
searchExcelXlsx("E://ss//999.xlsx");
}
}
Xls:
import jxl.Sheet;
import jxl.Workbook;
import jxl.read.biff.BiffException;
import java.io.*;
public class XlsReader {
public static String excelutil(String fileName, int limit){
StringBuilder builder = new StringBuilder("");
String str ="";
File file = new File(fileName);
try {
InputStream is = new FileInputStream(file.getAbsolutePath());
Workbook wb = Workbook.getWorkbook(is);
int sheet_size = wb.getNumberOfSheets();
for (int index = 0; index < sheet_size; index++) {
Sheet sheet = wb.getSheet(index);
for (int i = 0; i < sheet.getRows(); i++) {
for (int j = 0; j < sheet.getColumns(); j++) {
str = sheet.getCell(j, i).getContents();
builder.append(str);
String test =" ";
builder.append(test);
}
}
}
str= String.valueOf(builder);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (BiffException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
if (limit==0){
System.out.println(builder);
}else {
str = builder.substring(0,limit);
System.out.println(str);
}
return str;
}
public static void main(String[] args) {
//fileName读取文件路径
excelutil( "E:\\ss\\111.xls",0);
}
}
Rtf:
import javax.swing.text.DefaultStyledDocument; import javax.swing.text.rtf.RTFEditorKit; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; public class ReadRtf { public static String RTFutil(String fileName) { String str = null; File file = new File(fileName); try { DefaultStyledDocument styledDoc = new DefaultStyledDocument(); InputStream is = new FileInputStream(file); new RTFEditorKit().read(is, styledDoc, 0); str = new String(styledDoc.getText(0,styledDoc.getLength()).getBytes("ISO8859_1"),"GBK"); } catch (Exception e) { e.printStackTrace(); } System.out.println(str); return str; } public static void main(String[] args) { RTFutil("E://ss//777.rtf"); }