POI操作latex公式转mathml生成word文档

冀望
2023-12-01

POI操作latex公式转mathml生成word文档

转发请注明出处,谢谢!

 

我的上一篇文章已经讲解了用Itext操作的方法,这篇就直接上干货,效果图,注意事项,功能说明及Itext操作请见

http://blog.csdn.net/qq_35834998/article/details/79170388             这篇文章和此文章紧密相关

 

 

Itext,POI操作---项目中所有的依赖

<!-- https://mvnrepository.com/artifact/commons-codec/commons-codec -->
<dependency>
   <groupId>commons-codec</groupId>
   <artifactId>commons-codec</artifactId>
   <version>1.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/dom4j/dom4j -->
<dependency>
   <groupId>dom4j</groupId>
   <artifactId>dom4j</artifactId>
   <version>1.6.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/stax/stax-api -->
<dependency>
   <groupId>stax</groupId>
   <artifactId>stax-api</artifactId>
   <version>1.0.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/xml-apis/xml-apis -->
<dependency>
   <groupId>xml-apis</groupId>
   <artifactId>xml-apis</artifactId>
   <version>1.4.01</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.xmlbeans/xmlbeans -->
<dependency>
   <groupId>org.apache.xmlbeans</groupId>
   <artifactId>xmlbeans</artifactId>
   <version>2.6.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.lowagie/itext -->
<dependency>
   <groupId>com.lowagie</groupId>
   <artifactId>itext</artifactId>
   <version>2.1.7</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.itextpdf/itext-asian -->
<dependency>
   <groupId>com.itextpdf</groupId>
   <artifactId>itext-asian</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/com.lowagie/itext-rtf -->
<dependency>
   <groupId>com.lowagie</groupId>
   <artifactId>itext-rtf</artifactId>
   <version>2.1.7</version>
</dependency>
<dependency>
   <groupId>fmath</groupId>
   <artifactId>fmath-mathml-java</artifactId>
   <version>3.1</version>
</dependency>
<dependency>
   <groupId>org.scilab.forge</groupId>
   <artifactId>jlatexmath</artifactId>
   <version>1.0.6</version>
</dependency>


<dependency>
   <groupId>jdom</groupId>
   <artifactId>jdom-jar</artifactId>
   <version>2.0.6</version>
</dependency>


<dependency>
   <groupId>org.apache.poi</groupId>
   <artifactId>poi-scratchpad</artifactId>
   <version>3.9</version>
</dependency>
<dependency>
   <groupId>org.apache.poi</groupId>
   <artifactId>poi-ooxml</artifactId>
   <version>3.16</version>
</dependency>
<dependency>
   <groupId>org.apache.poi</groupId>
   <artifactId>poi-scratchpad</artifactId>
   <version>3.16</version>
</dependency>
<dependency>
   <groupId>org.apache.poi</groupId>
   <artifactId>ooxml-schemas</artifactId>
   <version>1.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jdom/jdom2 -->
<dependency>
   <groupId>org.jdom</groupId>
   <artifactId>jdom2</artifactId>
   <version>2.0.6</version>
</dependency>



<dependency>
   <groupId>org.apache.commons</groupId>
   <artifactId>commons-lang3</artifactId>
   <version>3.0</version>
</dependency>

<!--将latex转换成mathml所需依赖-->
      <dependency>
          <groupId>uk.ac.ed.ph.snuggletex</groupId>
          <artifactId>snuggletex-core</artifactId>
          <version>1.2.2</version>
      </dependency>

 

直接上代码 :

@RunWith(SpringRunner.class)
@SpringBootTest
@Slf4j
public class TextTest {


    @Autowired
    private QuesService quesService;

    private Pattern pattern = Pattern.compile("\\\\\\\\\\[(.*?)\\\\\\\\\\]");

    private  Pattern compile = Pattern.compile("\\p{Lower}");

    private  SnuggleEngine engine = new SnuggleEngine();

    static File stylesheet = new File("C:\\Program Files\\Microsoft Office\\root\\Office16\\MML2OMML.XSL");
    static TransformerFactory tFactory = TransformerFactory.newInstance();
    static StreamSource stylesource = new StreamSource(stylesheet);

    static CTOMath getOMML(String mathML) throws TransformerException, IOException, XmlException {
        Transformer transformer = tFactory.newTransformer(stylesource);

        StringReader stringreader = new StringReader(mathML);
        StreamSource source = new StreamSource(stringreader);

        StringWriter stringwriter = new StringWriter();
        StreamResult result = new StreamResult(stringwriter);

        transformer.transform(source, result);

        String ooML = stringwriter.toString();
        stringwriter.close();

        CTOMathPara ctOMathPara = CTOMathPara.Factory.parse(ooML);

        CTOMath ctOMath = ctOMathPara.getOMathArray(0);

        //for making this to work with Office 2007 Word also, special font settings are necessary
        XmlCursor xmlcursor = ctOMath.newCursor();
        while (xmlcursor.hasNextToken()) {
            XmlCursor.TokenType tokentype = xmlcursor.toNextToken();
            if (tokentype.isStart()) {
                if (xmlcursor.getObject() instanceof CTR) {
                    CTR cTR = (CTR) xmlcursor.getObject();
                    cTR.addNewRPr2().addNewRFonts().setAscii("Cambria Math");
                    cTR.getRPr2().getRFonts().setHAnsi("Cambria Math");
                }
            }
        }

        return ctOMath;
    }


    @Test
    public void test() throws Exception {
        XWPFDocument document = new XWPFDocument();

        //题目编号
        int number = 0;
        List<String> stringList = Arrays.asList("01162767-2f7d-458d-8945-877a81fd3361",
                                                "0040110f-2c26-4865-b999-c8754f5803e3",
                                                "04e03596-eb46-4acc-9446-e492f1e664ba",
                                                "01ab3b66-81ce-47ee-946e-51247190a9b1",
                                                "03275325-4dbb-4d7b-a700-602156faaa2e",
                                                "200b40af-9b46-4f76-9856-e77007ff83ca",
                                                "0188832e-53f2-4223-bfc7-c9083adc572c"
                                                );
        for (String id : stringList ) {
            XWPFParagraph paragraph = document.createParagraph();
            paragraph.setSpacingAfter(200);

            Ques ques = quesService.getQuesFromId(id);


            //题干类容段落
            String[] titleSplit = titleGroup(++number, ques);
            //按照顺序写入文档
            for (int i = 0 ; i < titleSplit.length ; i++ ) {
                splitWrite(paragraph,document,i,titleSplit);
                //splitWriteImageLatex(paragraph,document,i,titleSplit);
            }




            //选项段落
            paragraph = document.createParagraph();
            paragraph.setSpacingAfter(200);

            //四个选项
            String[] splitOptions = ques.getOptions().split("\\|\\|\\|");
            for (int i = 0 ; i < splitOptions.length ; i++ ) {
                if (splitOptions[i].contains("http")){
                    XWPFRun run = paragraph.createRun();
                    run.setText(chooseTag(i));
                    paragraph.addRun(run);

                    String image = download(splitOptions[i], "E:\\题库文档\\test06\\" + i + ".png");
                    writeImage(paragraph,document,image);
                }else{
                    XWPFRun run = paragraph.createRun();
                    run.setText(chooseTag(i));
                    paragraph.addRun(run);

                    String[] optionsGroup = optionsGroup(splitOptions[i]);
                    for (int a = 0 ; a < optionsGroup.length ; a++ ){
                        splitWrite(paragraph,document,a,optionsGroup);
                    }
                    //latex2mathMl(paragraph,optionsGroup[a],engine);
                    //splitWriteImageLatex(paragraph,document,i,splitOptions);
                }
            }
            

            //详解类容
            paragraph = document.createParagraph();
            paragraph.setSpacingAfter(200);

            //题目类容段落
            String[] analysisSplit = analysisGroup(ques);
            //按照顺序写入文档
            for (int i = 0 ; i < analysisSplit.length ; i++ ) {
                splitWrite(paragraph,document,i,analysisSplit);
                //splitWriteImageLatex(paragraph,document,i,analysisSplit);
            }
        }

        document.write(new FileOutputStream("E:\\题库文档\\test06\\text.docx"));
        document.close();

    }



   /**
    *  测试带图片段落
    *  */
    @Test
    public void testCreateParagraphWithImg() throws Exception {
        BufferedImage read = ImageIO.read(new URL("https://img0.xuehuilema.com/70e93c38-0a35-4944-8d82-a9c00c451d46.png"));
        System.out.println(read);
        ImageIO.write(read, "png", new File("E:\\题库文档\\test06\\123.png"));
    }

    /**
     *写入文档操作latex
     * */
    private void splitWrite(XWPFParagraph paragraph,XWPFDocument document,Integer i,String[] split){
        Matcher mather = compile.matcher(split[i]);
        if (!mather.find()){
            String comment = split[i];
            if(comment.contains("$")){
                comment = comment.replaceAll("\\$","");
            }
            XWPFRun run = paragraph.createRun();
            run.setText(comment);
            paragraph.addRun(run);
        }else{
            try {
                latex2mathMl(paragraph,split[i]);
            }catch (Exception e){
                String image = null;
                try {
                    image = latexImage(split[i], "E:\\题库文档\\test06\\" + i + ".png");
                    writeImage(paragraph,document,image);
                } catch (Exception e1) {
                    e1.printStackTrace();
                }
            }
        }

    }

    /**
     *写入文档操作latexImage
     * */
    private void splitWriteImageLatex(XWPFParagraph paragraph,XWPFDocument document,Integer i,String[] split) throws Exception {
        Matcher mather = compile.matcher(split[i]);
        if (!mather.find()){
            XWPFRun run = paragraph.createRun();
            run.setText(split[i]);
            paragraph.addRun(run);
        }else{
            String image = latexImage(split[i], "E:\\题库文档\\test06\\" + i + ".png");
            writeImage(paragraph,document,image);
        }
    }

    /**
     * 转换latex公式并写入文档
     * */
    private void latex2mathMl(XWPFParagraph paragraph,String latex) throws XmlException, TransformerException, IOException {
        String mathML = fmath.conversion.ConvertFromLatexToMathML.convertToMathML(latex);
        mathML = mathML.replaceFirst("<math ", "<math xmlns=\"http://www.w3.org/1998/Math/MathML\" ");
        CTOMath ctOMath = getOMML(mathML);
        CTP ctp = paragraph.getCTP();
        CTOMath ctoMath = ctp.addNewOMath();
        ctoMath.set(ctOMath);

    }


    /**
     * @Description: 设置段落对齐
     */
    public void setParagraphAlignInfo(XWPFParagraph paragraph,
                                      ParagraphAlignment pAlign, TextAlignment vAlign) {
        if (pAlign != null) {
            paragraph.setAlignment(pAlign);
        }
        if (vAlign != null) {
            paragraph.setVerticalAlignment(vAlign);
        }
    }
    /**
     * @Description: 设置段落间距信息,一行=100 一磅=20
     */
    public void setParagraphSpacingInfo(XWPFParagraph paragraph, boolean isSpace,
                                        String before, String after, String beforeLines, String afterLines,
                                        boolean isLine, String line, STLineSpacingRule.Enum lineValue) {
        CTPPr pPPr = getParagraphCTPPr(paragraph);
        CTSpacing pSpacing = pPPr.getSpacing() != null ? pPPr.getSpacing()
                : pPPr.addNewSpacing();
        if (isSpace) {
            // 段前磅数
            if (before != null) {
                pSpacing.setBefore(new BigInteger(before));
            }
            // 段后磅数
            if (after != null) {
                pSpacing.setAfter(new BigInteger(after));
            }
            // 段前行数
            if (beforeLines != null) {
                pSpacing.setBeforeLines(new BigInteger(beforeLines));
            }
            // 段后行数
            if (afterLines != null) {
                pSpacing.setAfterLines(new BigInteger(afterLines));
            }
        }
        // 间距
        if (isLine) {
            if (line != null) {
                pSpacing.setLine(new BigInteger(line));
            }
            if (lineValue != null) {
                pSpacing.setLineRule(lineValue);
            }
        }
    }

    /**
     * @Description: 得到段落CTPPr
     */
    public CTPPr getParagraphCTPPr(XWPFParagraph p) {
        CTPPr pPPr = null;
        if (p.getCTP() != null) {
            if (p.getCTP().getPPr() != null) {
                pPPr = p.getCTP().getPPr();
            } else {
                pPPr = p.getCTP().addNewPPr();
            }
        }
        return pPPr;
    }


    /**
     * 判断是否创建createRun
     * */
    public XWPFRun getOrAddParagraphFirstRun(XWPFParagraph paragraph, boolean isInsert,
                                             boolean isNewLine) {
        XWPFRun pRun = null;
        if (isInsert) {
            pRun = paragraph.createRun();
        } else {
            if (paragraph.getRuns() != null && paragraph.getRuns().size() > 0) {
                pRun = paragraph.getRuns().get(0);
            } else {
                pRun = paragraph.createRun();
            }
        }
        if (isNewLine) {
            pRun.addBreak();
        }
        return pRun;
    }

    /**
     * @Description 设置字体信息
     */
    public void setParagraphRunFontInfo(XWPFParagraph paragraph, XWPFRun pRun,
                                        String content, String fontFamily, String fontSize) {
        CTRPr pRpr = getRunCTRPr(paragraph, pRun);
        if (StringUtils.isNotBlank(content)) {
            pRun.setText(content);
        }
        // 设置字体
        CTFonts fonts = pRpr.isSetRFonts() ? pRpr.getRFonts() : pRpr
                .addNewRFonts();
        fonts.setAscii(fontFamily);
        fonts.setEastAsia(fontFamily);
        fonts.setHAnsi(fontFamily);
        // 设置字体大小
        CTHpsMeasure sz = pRpr.isSetSz() ? pRpr.getSz() : pRpr.addNewSz();
        sz.setVal(new BigInteger(fontSize));
        CTHpsMeasure szCs = pRpr.isSetSzCs() ? pRpr.getSzCs() : pRpr
                .addNewSzCs();
        szCs.setVal(new BigInteger(fontSize));
    }


    /**
     * @Description: 得到XWPFRun的CTRPr
     */
    public CTRPr getRunCTRPr(XWPFParagraph p, XWPFRun pRun) {
        CTRPr pRpr = null;
        if (pRun.getCTR() != null) {
            pRpr = pRun.getCTR().getRPr();
            if (pRpr == null) {
                pRpr = pRun.getCTR().addNewRPr();
            }
        } else {
            pRpr = p.getCTP().addNewR().addNewRPr();
        }
        return pRpr;
    }


    /**
     * 创建图片xml
     * */
    private void createPicture(String blipId, int id, int width, int height,
                              XWPFParagraph paragraph) {
        final int EMU = 9525;
        width *= EMU;
        height *= EMU;
        // String blipId =
        // getAllPictures().get(id).getPackageRelationship().getId();
        if (paragraph == null) {
            XWPFDocument document = new XWPFDocument();
            paragraph = document.createParagraph();
        }
        CTInline inline = paragraph.createRun().getCTR().addNewDrawing()
                .addNewInline();
        String picXml = ""
                + "<a:graphic xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\">"
                + "   <a:graphicData uri=\"http://schemas.openxmlformats.org/drawingml/2006/picture\">"
                + "      <pic:pic xmlns:pic=\"http://schemas.openxmlformats.org/drawingml/2006/picture\">"
                + "         <pic:nvPicPr>" + "            <pic:cNvPr id=\""
                + id
                + "\" name=\"img_"
                + id
                + "\"/>"
                + "            <pic:cNvPicPr/>"
                + "         </pic:nvPicPr>"
                + "         <pic:blipFill>"
                + "            <a:blip r:embed=\""
                + blipId
                + "\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"/>"
                + "            <a:stretch>"
                + "               <a:fillRect/>"
                + "            </a:stretch>"
                + "         </pic:blipFill>"
                + "         <pic:spPr>"
                + "            <a:xfrm>"
                + "               <a:off x=\"0\" y=\"0\"/>"
                + "               <a:ext cx=\""
                + width
                + "\" cy=\""
                + height
                + "\"/>"
                + "            </a:xfrm>"
                + "            <a:prstGeom prst=\"rect\">"
                + "               <a:avLst/>"
                + "            </a:prstGeom>"
                + "         </pic:spPr>"
                + "      </pic:pic>"
                + "   </a:graphicData>" + "</a:graphic>";
        // CTGraphicalObjectData graphicData =
        // inline.addNewGraphic().addNewGraphicData();
        XmlToken xmlToken = null;
        try {
            xmlToken = XmlToken.Factory.parse(picXml);
        } catch (XmlException xe) {
            xe.printStackTrace();
        }
        inline.set(xmlToken);
        // graphicData.set(xmlToken);
        inline.setDistT(0);
        inline.setDistB(0);
        inline.setDistL(0);
        inline.setDistR(0);
        CTPositiveSize2D extent = inline.addNewExtent();
        extent.setCx(width);
        extent.setCy(height);
        CTNonVisualDrawingProps docPr = inline.addNewDocPr();
        docPr.setId(id);
        docPr.setName("docx_img_ " + id);
        docPr.setDescr("docx Picture");
    }






    /**
     * 将网络图片保存到本地
     * */
    private String download(String url,String savePathAndName) throws Exception {
        url = url.replaceAll("http","https");
        BufferedImage read = ImageIO.read(new URL(url));
        ImageIO.write(read, "png", new File(savePathAndName));
        return savePathAndName;
    }

    /**
     * 题干类容段落处理
     * */
    private String[] titleGroup(Integer number,Ques ques){
        String title = number + "." + ques.getTitle();
        //公式处理
        title = title.replaceAll("\\\\","\\\\\\\\");
        String titleNot = title.replaceAll("\\\\\\\\\\[(.*?)\\\\\\\\\\]","@@@");
        Matcher mather = pattern.matcher(title);
        while (mather.find()){
            String formula = mather.group(1);
            titleNot = titleNot.replaceFirst("@@@", "@@\\$"+formula+"\\$@@");
        }
        String[] titleSplit = titleNot.split("@@");
        return titleSplit;
    }

    /**
     * 获取选择题选项标签
     * */
    private String chooseTag(Integer i){
        if (i == 0){
          return "A:";
        }else if(i == 1){
            return "        B:";
        }else if(i == 2){
            return "        C:";
        }else{
            return "        D:";
        }
    }

    /**
     * 详解类容段落处理
     * */
    private String[] analysisGroup(Ques ques){
        String analysis = ques.getAnalysis();
        //公式处理
        analysis = analysis.replaceAll("\\\\","\\\\\\\\");
        String analysisNot = analysis.replaceAll("\\\\\\\\\\[(.*?)\\\\\\\\\\]","@@@");
        Matcher mather = pattern.matcher(analysis);
        while (mather.find()){
            String formula = mather.group(1);
            analysisNot = analysisNot.replaceFirst("@@@", "@@\\$"+formula+"\\$@@");
        }
        String[] analysisSplit = analysisNot.split("@@");
        return analysisSplit;
    }

    /**
     * 选择类容段落处理
     * */
    private String[] optionsGroup(String option){
        //公式处理
        option = option.replaceAll("\\\\","\\\\\\\\");
        String optionNot = option.replaceAll("\\\\\\\\\\[(.*?)\\\\\\\\\\]","@@@");
        Matcher mather = pattern.matcher(option);
        while (mather.find()){
            String formula = mather.group(1);
            optionNot = optionNot.replaceFirst("@@@", "@@\\$"+formula+"\\$@@");
        }
        String[] optionSplit = optionNot.split("@@");
        return optionSplit;
    }


    /**
     * latex公式转图片
     * */
    private String latexImage(String formulaStr,String path) throws IOException, BadElementException {
        TeXFormula tf = new TeXFormula(formulaStr);
        TeXIcon ti = tf.createTeXIcon(TeXConstants.STYLE_DISPLAY, 15);
        BufferedImage bimg = new BufferedImage(ti.getIconWidth(), ti.getIconHeight(), BufferedImage.TYPE_4BYTE_ABGR);
        Graphics2D g2d = bimg.createGraphics();
        g2d.setColor(Color.white);
        g2d.fillRect(0,0,ti.getIconWidth(), ti.getIconHeight());
        JLabel jl = new JLabel();
        jl.setForeground(new Color(0, 0, 0));
        ti.paintIcon(jl, g2d, 0, 0);
        File out = new File(path);
        ImageIO.write(bimg, "png", out);
        return path;
    }


    /**
     * 将图片写入文档
     * */
    private void writeImage(XWPFParagraph paragraph,XWPFDocument document,String image) throws IOException, InvalidFormatException {
        File file = new File(image);
        BufferedImage read = ImageIO.read(file);

        String blipId = paragraph.getDocument().addPictureData(
                new FileInputStream(file),
                Document.PICTURE_TYPE_PNG);
        createPicture(blipId,
                document.getNextPicNameNumber(Document.PICTURE_TYPE_PNG),  read.getWidth(), read.getHeight(),paragraph);
    }

}

 

如果在操作中遇到什么问题,可留言,博主会及时回答,给你意见及解决的办法,博客写得不好的地方,希望谅解!

 

 

 

 

 类似资料: