Java:使用apache POI如何将ms word文件转换为pdf?

通过使用apache POI如何将ms word文件转换为pdf

我使用以下代码,但它没有工作给出错误,我想我正在导入错误的类?

 import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.OutputStream; import org.apache.poi.hslf.record.Document; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.poifs.filesystem.POIFSFileSystem; public class TestCon { /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub POIFSFileSystem fs = null; Document document = new Document(); try { System.out.println("Starting the test"); fs = new POIFSFileSystem(new FileInputStream("/document/test2.doc")); HWPFDocument doc = new HWPFDocument(fs); WordExtractor we = new WordExtractor(doc); OutputStream file = new FileOutputStream(new File("/document/test.pdf")); PdfWriter writer = PdfWriter.getInstance(document, file); Range range = doc.getRange(); document.open(); writer.setPageEmpty(true); document.newPage(); writer.setPageEmpty(true); String[] paragraphs = we.getParagraphText(); for (int i = 0; i < paragraphs.length; i++) { org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i); // CharacterRun run = pr.getCharacterRun(i); // run.setBold(true); // run.setCapitalized(true); // run.setItalic(true); paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", ""); System.out.println("Length:" + paragraphs[i].length()); System.out.println("Paragraph" + i + ": " + paragraphs[i].toString()); // add the paragraph to the document document.add(new Paragraph(paragraphs[i])); } System.out.println("Document testing completed"); } catch (Exception e) { System.out.println("Exception during test"); e.printStackTrace(); } finally { // close the document document.close(); } } } 

得到了解决

 import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.OutputStream; import com.lowagie.text.Document; import com.lowagie.text.DocumentException; import com.lowagie.text.Paragraph; import com.lowagie.text.pdf.PdfWriter; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.poifs.filesystem.POIFSFileSystem; public class TestCon { /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub POIFSFileSystem fs = null; Document document = new Document(); try { System.out.println("Starting the test"); fs = new POIFSFileSystem(new FileInputStream("D:/Resume.doc")); HWPFDocument doc = new HWPFDocument(fs); WordExtractor we = new WordExtractor(doc); OutputStream file = new FileOutputStream(new File("D:/test.pdf")); PdfWriter writer = PdfWriter.getInstance(document, file); Range range = doc.getRange(); document.open(); writer.setPageEmpty(true); document.newPage(); writer.setPageEmpty(true); String[] paragraphs = we.getParagraphText(); for (int i = 0; i < paragraphs.length; i++) { org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i); // CharacterRun run = pr.getCharacterRun(i); // run.setBold(true); // run.setCapitalized(true); // run.setItalic(true); paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", ""); System.out.println("Length:" + paragraphs[i].length()); System.out.println("Paragraph" + i + ": " + paragraphs[i].toString()); // add the paragraph to the document document.add(new Paragraph(paragraphs[i])); } System.out.println("Document testing completed"); } catch (Exception e) { System.out.println("Exception during test"); e.printStackTrace(); } finally { // close the document document.close(); } } } 

以下代码对我有用:

 Public class DocToPdfConverter{ public static void main(String[] args) { String k=null; OutputStream fileForPdf =null; try { String fileName="/document/test2.doc"; //Below Code is for .doc file if(fileName.endsWith(".doc")) { HWPFDocument doc = new HWPFDocument(new FileInputStream( fileName)); WordExtractor we=new WordExtractor(doc); k = we.getText(); fileForPdf = new FileOutputStream(new File( "/document/DocToPdf.pdf")); we.close(); } //Below Code for else if(fileName.endsWith(".docx")) { XWPFDocument docx = new XWPFDocument(new FileInputStream( fileName)); // using XWPFWordExtractor Class XWPFWordExtractor we = new XWPFWordExtractor(docx); k = we.getText(); fileForPdf = new FileOutputStream(new File( "/document/DocxToPdf.pdf")); we.close(); } Document document = new Document(); PdfWriter.getInstance(document, fileForPdf); document.open(); document.add(new Paragraph(k)); document.close(); fileForPdf.close(); } catch (Exception e) { e.printStackTrace(); } } } 

这对我有用: –

资料来源: – http://www.programcreek.com/java-api-examples/index.php?api=org.apache.poi.xwpf.converter.pdf.PdfConverter

 package pdf; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.OutputStream; import org.apache.poi.xwpf.converter.pdf.PdfConverter; import org.apache.poi.xwpf.converter.pdf.PdfOptions; import org.apache.poi.xwpf.usermodel.XWPFDocument; public class PDF { public static void main(String[] args) throws Exception { String inputFile="D:/TEST.docx"; String outputFile="D:/TEST.pdf"; if (args != null && args.length == 2) { inputFile=args[0]; outputFile=args[1]; } System.out.println("inputFile:" + inputFile + ",outputFile:"+ outputFile); FileInputStream in=new FileInputStream(inputFile); XWPFDocument document=new XWPFDocument(in); File outFile=new File(outputFile); OutputStream out=new FileOutputStream(outFile); PdfOptions options=null; PdfConverter.getInstance().convert(document,out,options); } } 

这里有几个步骤:

  1. 使用POI将Word文档读取为与格式无关的forms
  2. 将格式不可知的表单转换为PDF
  3. 写PDF

我不知道POI是否会为你做第2步。 我推荐别的东西,比如iText。

作为旁注,还可以直接从Word / Excel内容流中即时读取内容,而不是从文件系统中读取内容并将其序列化到磁盘,例如从CMIS存储库中检索内容时:

例如

  //HWPFDocument docx = new HWPFDocument(fs); HWPFDocument docx = new HWPFDocument(doc.getContentStream().getStream()); 

(doc的类型为org.apache.chemistry.opencmis.client.api.Document ,在这种情况下,我调整了您的代码,通过opencmis从Alfresco存储库中检索word文件并将其转换为PDF)

HTH

除了Kushagra的答案,这里更新的maven依赖项:

   fr.opensagres.xdocreport fr.opensagres.xdocreport.converter.docx.xwpf 2.0.1   fr.opensagres.xdocreport fr.opensagres.xdocreport.converter 2.0.1   fr.opensagres.xdocreport fr.opensagres.poi.xwpf.converter.pdf 2.0.1   fr.opensagres.xdocreport fr.opensagres.poi.xwpf.converter.xhtml 2.0.1