问题：

使用PDFBox将PDF转换为PDF/A

甘学潞

2023-03-14

目前我正在尝试将PDF转换为PDF/A。

然而，不知何故，我不知道我是否可以转换色彩空间，有没有办法这样做？

这是我的代码，然而：

PDDocumentInformation info = doc.getDocumentInformation();
System.out.println("Page Count=" + doc.getNumberOfPages());
System.out.println("Title=" + info.getTitle());
System.out.println("Author=" + info.getAuthor());
System.out.println("Subject=" + info.getSubject());
System.out.println("Keywords=" + info.getKeywords());
System.out.println("Creator=" + info.getCreator());
System.out.println("Producer=" + info.getProducer());
System.out.println("Creation Date=" + info.getCreationDate());
System.out.println("Modification Date=" + info.getModificationDate());
System.out.println("Trapped=" + info.getTrapped());


PDDocumentCatalog cat = doc.getDocumentCatalog();
XMPMetadata xmp = XMPMetadata.createXMPMetadata();

PDFAIdentificationSchema pdfaid = xmp.createAndAddPFAIdentificationSchema();
pdfaid.setConformance("A");
pdfaid.setPart(3);
pdfaid.setAboutAsSimple(null);


DublinCoreSchema dublinCoreSchema = xmp.createAndAddDublinCoreSchema();
dublinCoreSchema.setTitle(info.getTitle());

dublinCoreSchema.addCreator(info.getAuthor());


AdobePDFSchema adobePDFSchema = xmp.createAndAddAdobePDFSchema();
adobePDFSchema.setProducer(info.getProducer());


XMPBasicSchema xmpBasicSchema = xmp.createAndAddXMPBasicSchema();
xmpBasicSchema.setCreatorTool(info.getCreator());
xmpBasicSchema.setCreateDate(info.getCreationDate());
xmpBasicSchema.setModifyDate(info.getModificationDate());

xmp.addSchema(pdfaid);
XmpSerializer serializer = new XmpSerializer();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializer.serialize(xmp, baos, true);


InputStream colorProfile = PdfConverter.class.getResourceAsStream("/sRGBColorSpaceProfile.icm");
PDOutputIntent oi = new PDOutputIntent(doc, colorProfile);

oi.setInfo("sRGB IEC61966-2.1");
oi.setOutputCondition("sRGB IEC61966-2.1");
oi.setOutputConditionIdentifier("sRGB IEC61966-2.1");
oi.setRegistryName("http://www.color.org");


cat.addOutputIntent(oi);
PDMetadata metadata = new PDMetadata(doc);
metadata.importXMPMetadata(baos.toByteArray());
cat.setMetadata(metadata);

色彩空间被添加但是在验证我得到：

2.3.2 : Unexpected key in Graphic object definition, The ColorSpace is unknown

对于每个页面/元素，它都经常出现。

我能做点什么来反对它吗？比如转换颜色空间？使用她的图书馆？

袁智明

2023-03-14

我发现了将pdf转换为pdfA的技巧。

填写PDF表单
将其转换为图像
按照PDFBox网站中的说明创建有效的PDFA表单
填充作为结果创建的图像

在这个例子中，我使用了：OoPdfFormExample.pdf，可以在Internet中轻松找到。

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType0Font;
import org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.preflight.Format;
import org.apache.pdfbox.preflight.PreflightDocument;
import org.apache.pdfbox.preflight.ValidationResult;
import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
import org.apache.pdfbox.preflight.parser.PreflightParser;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.tools.imageio.ImageIOUtil;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.DublinCoreSchema;
import org.apache.xmpbox.schema.PDFAIdentificationSchema;
import org.apache.xmpbox.type.BadFieldValueException;
import org.apache.xmpbox.xml.XmpSerializer;

import javax.xml.transform.TransformerException;
import java.awt.image.BufferedImage;
import java.io.*;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Random;

public class CreatePDFAFile {

    private static final String OUTPUT_DIR = "tmp";
    static String separator = FileSystems.getDefault().getSeparator();


    public static void main(String[] args) throws IOException {
        Path tmpDir = getRandomPath();
        String fileInput = fillForm("template/OoPdfFormExample.pdf", tmpDir);
        String image = PDF2Image(fileInput, tmpDir);
        String pdfa = createPDFA(image, tmpDir);
        checkPDFAValidation(pdfa);
    }

    private static String fillForm(String formTemplate, Path path) throws IOException {
        String fileOut = path + separator + "FillForm.pdf";
        try (PDDocument pdfDocument = PDDocument.load(new File(formTemplate))) {
            PDAcroForm acroForm = pdfDocument.getDocumentCatalog().getAcroForm();
            if (acroForm != null) {
                acroForm.getField(acroForm.getFields().get(0).getFullyQualifiedName()).setValue("TEST");
            }
            acroForm.refreshAppearances();
            acroForm.flatten();
            pdfDocument.save(fileOut);
        }
        return fileOut;
    }

    public static String PDF2Image(String fileInput, Path path) {
        String fileName = "";
        try (final PDDocument document = PDDocument.load(new File(fileInput))) {
            PDFRenderer pdfRenderer = new PDFRenderer(document);
            for (int page = 0; page < document.getNumberOfPages(); ++page) {
                BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
                fileName = path + separator + "image-" + page + ".png";
                ImageIOUtil.writeImage(bim, fileName, 300);
            }
        } catch (IOException e) {
            System.err.println("Exception while trying to create pdf document - " + e);
        }
        return fileName;
    }

    public static String createPDFA(String imagePath, Path path) throws IOException {
        try (PDDocument doc = new PDDocument()) {
            PDPage page = new PDPage();
            doc.addPage(page);
            PDFont font = PDType0Font.load(doc, new File("template" + separator + "LiberationSans-Regular.ttf"));
            if (!font.isEmbedded()) {
                throw new IllegalStateException("PDF/A compliance requires that all fonts used for"
                        + " text rendering in rendering modes other than rendering mode 3 are embedded.");
            }
            try (PDPageContentStream contents = new PDPageContentStream(doc, page)) {
                contents.beginText();
                contents.setFont(font, 12);
                contents.newLineAtOffset(100, 700);
                contents.showText("");
                contents.endText();
            }

            // add XMP metadata
            XMPMetadata xmp = XMPMetadata.createXMPMetadata();

            String fileName = path + separator + "FinalPDFAFile.pdf";
            try {
                DublinCoreSchema dc = xmp.createAndAddDublinCoreSchema();
                dc.setTitle(fileName);

                PDFAIdentificationSchema id = xmp.createAndAddPFAIdentificationSchema();
                id.setPart(1);
                id.setConformance("B");

                XmpSerializer serializer = new XmpSerializer();
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                serializer.serialize(xmp, baos, true);

                PDMetadata metadata = new PDMetadata(doc);
                metadata.importXMPMetadata(baos.toByteArray());
                doc.getDocumentCatalog().setMetadata(metadata);
            } catch (BadFieldValueException | TransformerException e) {
                throw new IllegalArgumentException(e);
            }

            // sRGB output intent
            InputStream colorProfile = new FileInputStream(new File("template/sRGB.icc"));
            PDOutputIntent intent = new PDOutputIntent(doc, colorProfile);
            intent.setInfo("");
            intent.setOutputCondition("");
            intent.setOutputConditionIdentifier("");
            intent.setRegistryName("");
            doc.getDocumentCatalog().addOutputIntent(intent);

            PDImageXObject pdImage = PDImageXObject.createFromFile(imagePath, doc);

            try (PDPageContentStream contentStream = new PDPageContentStream(doc, page, PDPageContentStream.AppendMode.APPEND, true, true)) {
                float scale = 1 / 5f;
                contentStream.drawImage(pdImage, 20, 20, pdImage.getWidth() * scale, pdImage.getHeight() * scale);
            }
            doc.save(fileName);
            return fileName;
        }
    }

    private static void checkPDFAValidation(String fileName) throws IOException {

        ValidationResult result = null;
        PreflightParser parser = new PreflightParser(fileName);
        try {

            parser.parse(Format.PDF_A1B);
            PreflightDocument document = parser.getPreflightDocument();
            document.validate();
            // Get validation result
            result = document.getResult();
            document.close();

        } catch (SyntaxValidationException e) {
            result = e.getResult();
        }

        if (result.isValid()) {
            System.out.println("The file " + fileName + " is a valid PDF/A-1b file");
        } else {
            System.out.println("The file" + fileName + " is not valid, error(s) :");
            for (ValidationResult.ValidationError error : result.getErrorsList()) {
                System.out.println(error.getErrorCode() + " : " + error.getDetails());
            }
        }

    }

    private static Path getRandomPath() throws IOException {
        String path = generateRandom();
        Path tmpDir = Paths.get(OUTPUT_DIR + separator + path + separator);
        Files.createDirectory(tmpDir);
        return tmpDir;
    }

    private static String generateRandom() {
        String aToZ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890";
        Random rand = new Random();
        StringBuilder res = new StringBuilder();
        for (int i = 0; i < 17; i++) {
            int randIndex = rand.nextInt(aToZ.length());
            res.append(aToZ.charAt(randIndex));
        }
        return res.toString();
    }

}

使用PDFBox将PDF转换为PDF/A

共有1个答案

相关问答

相关文章

相关阅读

相关工具

相关文档