如何使用iText java读取PDF中的表格?

我对使用java的pdf处理不太了解。我想使用iText java库读取PDF文件中的表。 如何进行?

您可以从内容流中提取文本,但对于普通PDF,结果将是纯文本(没有任何结构)。 如果页面上有表格,则该表格不会被识别。 您将获得内容和一些空白区域,但这不是表格结构! 只有拥有标记的PDF,才能获得XML文件。 如果PDF包含被识别为表标记的标记,则这将反映在PDF中。


要从PDF文件中读取表格内容,您只需使用任何API(我使用iText的PdfTextExtracter.getTextFromPage()将PDF转换为文本文件,然后通过Java程序读取该txt文件。 阅读完之后,主要任务就完成了。 您必须过滤所需的数据,您可以通过不断使用String类的split方法来执行此操作,直到找到所需的记录。

下面是我的代码,其中我从PDF文件中提取了部分记录并将其写入.CSV文件。 您可以在此处查看PDF文件: http : //www.cea.nic.in/reports/monthly/generation_rep/actual/jan13/opm_02.pdf

 public static void genrateCsvMonth_Region(String pdfpath, String csvpath) { try { String line = null; // Appending Header in CSV file... BufferedWriter writer1 = new BufferedWriter(new FileWriter(csvpath, true)); writer1.close(); // Checking whether file is empty or not.. BufferedReader br = new BufferedReader(new FileReader(csvpath)); if ((line = br.readLine()) == null) { BufferedWriter writer = new BufferedWriter(new FileWriter( csvpath, true)); writer.append("REGION,"); writer.append("YEAR,"); writer.append("MONTH,"); writer.append("THERMAL,"); writer.append("NUCLEAR,"); writer.append("HYDRO,"); writer.append("TOTAL\n"); writer.close(); } // Reading the pdf file.. PdfReader reader = new PdfReader(pdfpath); BufferedWriter writer = new BufferedWriter(new FileWriter(csvpath, true)); // Extracting records from page into String.. String page = PdfTextExtractor.getTextFromPage(reader, 1); // Extracting month and Year from String.. String period1[] = page.split("PEROID"); String period2[] = period1[0].split(":"); String month[] = period2[1].split("-"); String period3[] = month[1].split("ENERGY"); String year[] = period3[0].split("VIS"); // Extracting Northen region String northen[] = page.split("NORTHEN REGION"); String nthermal1[] = northen[0].split("THERMAL"); String nthermal2[] = nthermal1[1].split(" "); String nnuclear1[] = northen[0].split("NUCLEAR"); String nnuclear2[] = nnuclear1[1].split(" "); String nhydro1[] = northen[0].split("HYDRO"); String nhydro2[] = nhydro1[1].split(" "); String ntotal1[] = northen[0].split("TOTAL"); String ntotal2[] = ntotal1[1].split(" "); // Appending filtered data into CSV file.. writer.append("NORTHEN" + ","); writer.append(year[0] + ","); writer.append(month[0] + ","); writer.append(nthermal2[4] + ","); writer.append(nnuclear2[4] + ","); writer.append(nhydro2[4] + ","); writer.append(ntotal2[4] + "\n"); // Extracting Western region String western[] = page.split("WESTERN"); String wthermal1[] = western[1].split("THERMAL"); String wthermal2[] = wthermal1[1].split(" "); String wnuclear1[] = western[1].split("NUCLEAR"); String wnuclear2[] = wnuclear1[1].split(" "); String whydro1[] = western[1].split("HYDRO"); String whydro2[] = whydro1[1].split(" "); String wtotal1[] = western[1].split("TOTAL"); String wtotal2[] = wtotal1[1].split(" "); // Appending filtered data into CSV file.. writer.append("WESTERN" + ","); writer.append(year[0] + ","); writer.append(month[0] + ","); writer.append(wthermal2[4] + ","); writer.append(wnuclear2[4] + ","); writer.append(whydro2[4] + ","); writer.append(wtotal2[4] + "\n"); // Extracting Southern Region String southern[] = page.split("SOUTHERN"); String sthermal1[] = southern[1].split("THERMAL"); String sthermal2[] = sthermal1[1].split(" "); String snuclear1[] = southern[1].split("NUCLEAR"); String snuclear2[] = snuclear1[1].split(" "); String shydro1[] = southern[1].split("HYDRO"); String shydro2[] = shydro1[1].split(" "); String stotal1[] = southern[1].split("TOTAL"); String stotal2[] = stotal1[1].split(" "); // Appending filtered data into CSV file.. writer.append("SOUTHERN" + ","); writer.append(year[0] + ","); writer.append(month[0] + ","); writer.append(sthermal2[4] + ","); writer.append(snuclear2[4] + ","); writer.append(shydro2[4] + ","); writer.append(stotal2[4] + "\n"); // Extracting eastern region String eastern[] = page.split("EASTERN"); String ethermal1[] = eastern[1].split("THERMAL"); String ethermal2[] = ethermal1[1].split(" "); String ehydro1[] = eastern[1].split("HYDRO"); String ehydro2[] = ehydro1[1].split(" "); String etotal1[] = eastern[1].split("TOTAL"); String etotal2[] = etotal1[1].split(" "); // Appending filtered data into CSV file.. writer.append("EASTERN" + ","); writer.append(year[0] + ","); writer.append(month[0] + ","); writer.append(ethermal2[4] + ","); writer.append(" " + ","); writer.append(ehydro2[4] + ","); writer.append(etotal2[4] + "\n"); // Extracting northernEastern region String neestern[] = page.split("NORTH"); String nethermal1[] = neestern[2].split("THERMAL"); String nethermal2[] = nethermal1[1].split(" "); String nehydro1[] = neestern[2].split("HYDRO"); String nehydro2[] = nehydro1[1].split(" "); String netotal1[] = neestern[2].split("TOTAL"); String netotal2[] = netotal1[1].split(" "); writer.append("NORTH EASTERN" + ","); writer.append(year[0] + ","); writer.append(month[0] + ","); writer.append(nethermal2[4] + ","); writer.append(" " + ","); writer.append(nehydro2[4] + ","); writer.append(netotal2[4] + "\n"); writer.close(); } catch (IOException ioe) { ioe.printStackTrace(); } } 


 package com.geek.tutorial.itext.table; import java.io.FileOutputStream; import com.lowagie.text.pdf.PdfPTable; import com.lowagie.text.pdf.PdfPCell; import com.lowagie.text.pdf.PdfWriter; import com.lowagie.text.Document; import com.lowagie.text.Paragraph; public class SimplePDFTable { public SimplePDFTable() throws Exception { Document document = new Document(); PdfWriter.getInstance(document, new FileOutputStream("SimplePDFTable.pdf")); document.open(); PdfPTable table = new PdfPTable(2); // Code 1 // Code 2 table.addCell("1"); table.addCell("2"); // Code 3 table.addCell("3"); table.addCell("4"); // Code 4 table.addCell("5"); table.addCell("6"); // Code 5 document.add(table); document.close(); } public static void main(String[] args) { try { SimplePDFTable pdfTable = new SimplePDFTable(); } catch(Exception e) { System.out.println(e); } } }