使用java在pdf内部展平矢量图形并提取

我试图获取嵌入在PDF文件中的图像的大小(宽度和深度)。 PDF中的图像都是高分辨率矢量图像。

  • 我尝试使用PDFBox。 PDFBox库完美地提取图像以用于普通图形。 但是,当它获得矢量图像时,它会将不同的图层提取为不同的图像。
  • 我也读过有关iText的内容。 但是iText可以将整个页面转换为光栅化图像。 然而,我的PDF页面实际上包含多个图像,我需要以不同的方式提取/获取所有这些图像的大小。

我在这里附加我的PDFBox图像提取代码。 请让我知道,如何将一个矢量图像作为一个图像而不是图层。

我的代码如下:

package com.abp.pdf.util; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import java.util.Iterator; import java.util.List; import java.util.Map; import javax.imageio.ImageIO; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.encryption.AccessPermission; import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial; import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject; import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectForm; import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage; public class ExtractImages { private int imageCounter = 1; private ExtractImages() { } public static void main(String[] args) throws Exception { ExtractImages extractor = new ExtractImages(); extractor.extractImages(args); } private void extractImages(String[] args) throws Exception { String pdfFile = null; String password = ""; String prefix = null; boolean addKey = false; boolean useNonSeqParser = true; pdfFile = "/home/suvankar/Resources/myfile.pdf"; if (prefix == null && pdfFile.length() > 4) { prefix = pdfFile.substring(0, pdfFile.lastIndexOf("/") + 1) + "extracted/images" + pdfFile.substring(pdfFile.lastIndexOf("/"), pdfFile.length() - 4); } PDDocument document = null; try { if (useNonSeqParser) { document = PDDocument.loadNonSeq(new File(pdfFile), null, password); } else { document = PDDocument.load(pdfFile); if (document.isEncrypted()) { StandardDecryptionMaterial spm = new StandardDecryptionMaterial( password); document.openProtection(spm); } } AccessPermission ap = document.getCurrentAccessPermission(); if (!ap.canExtractContent()) { throw new IOException( "Error: You do not have permission to extract images."); } List pages = document.getDocumentCatalog().getAllPages(); Iterator iter = pages.iterator(); while (iter.hasNext()) { PDPage page = (PDPage) iter.next(); PDResources resources = page.getResources(); processResources(resources, prefix, addKey); } } finally { if (document != null) { document.close(); } } } private void processResources(PDResources resources, String prefix, boolean addKey) throws IOException { if (resources == null) { return; } Map xobjects = resources.getXObjects(); if (xobjects != null) { Iterator xobjectIter = xobjects.keySet().iterator(); while (xobjectIter.hasNext()) { String key = xobjectIter.next(); PDXObject xobject = xobjects.get(key); // write the images if (xobject instanceof PDXObjectImage) { PDXObjectImage image = (PDXObjectImage) xobject; String name = null; if (addKey) { name = getUniqueFileName(prefix + "_" + key, image.getSuffix()); } else { name = getUniqueFileName(prefix, image.getSuffix()); } System.out.println("Writing image:" + name + "\nHeight - "+ image.getHeight() + "\nWidth - " + image.getWidth()); // name="extracted/images/" + name; /*BufferedImage ib= image.getRGBImage(); File outputfile = new File(name + "-buffered.jpg"); ImageIO.write(ib, "jpeg", outputfile);*/ image.write2file(name); } // maybe there are more images embedded in a form object else if (xobject instanceof PDXObjectForm) { PDXObjectForm xObjectForm = (PDXObjectForm) xobject; PDResources formResources = xObjectForm.getResources(); processResources(formResources, prefix, addKey); } } } } private String getUniqueFileName(String prefix, String suffix) { String uniqueName = null; File f = null; while (f == null || f.exists()) { uniqueName = prefix + "-" + imageCounter; f = new File(uniqueName + "." + suffix); imageCounter++; } return uniqueName; } /** * This will print the usage requirements and exit. */ private static void usage() { System.err .println("Usage: java org.apache.pdfbox.ExtractImages [OPTIONS] \n" + " -password  Password to decrypt document\n" + " -prefix  Image prefix(default to pdf name)\n" + " -addkey add the internal image key to the file name\n" + " -nonSeq Enables the new non-sequential parser\n" + "  The PDF document to use\n"); System.exit(1); } }