1. 程式人生 > >java實現pdf轉word(文字)

java實現pdf轉word(文字)

end write 適合 common start -i void comm fbo

1:添加依賴

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>pdfToWord</groupId>
    <artifactId>pdfToWord</artifactId>
    <version>1.0-SNAPSHOT</version>
   <dependencies>
       <dependency>
           <groupId>commons-logging</groupId>
           <artifactId>commons-logging</artifactId>
           <version>1.2</version>
       </dependency>
       <dependency>
           <groupId>org.apache.pdfbox</groupId>
           <artifactId>fontbox</artifactId>
           <version>2.0.11</version>
       </dependency>
       <dependency>
           <groupId>com.levigo.jbig2</groupId>
           <artifactId>levigo-jbig2-imageio</artifactId>
           <version>2.0</version>
       </dependency>
       <dependency>
           <groupId>org.apache.pdfbox</groupId>
           <artifactId>pdfbox-tools</artifactId>
           <version>2.0.11</version>
       </dependency>
       <dependency>
           <groupId>commons-io</groupId>
           <artifactId>commons-io</artifactId>
           <version>2.6</version>
       </dependency>
   </dependencies>
    
</project>

2:編寫轉換的方法

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

import java.io.*;

/**
 * 把pdf轉換為word格式
 *
 * @author Angin
 * @date 2019/3/18 0018.
 */
public class PdfToWord {
    /**
     * 轉換
     */
    public void convertText(String pdfPath) {
        PDDocument doc 
= null; OutputStream fos = null; Writer writer = null; PDFTextStripper stripper = null; try { doc = PDDocument.load(new File(pdfPath)); fos = new FileOutputStream(pdfPath.substring(0, pdfPath.indexOf(".")) + ".doc"); writer = new
OutputStreamWriter(fos, "UTF-8"); stripper = new PDFTextStripper(); int pageNumber = doc.getNumberOfPages(); stripper.setSortByPosition(true); stripper.setStartPage(1); stripper.setEndPage(pageNumber); stripper.writeText(doc, writer); writer.close(); doc.close(); } catch (IOException e) { e.printStackTrace(); } System.out.println("end.."); } }

3:main方法中進行測試

/**
 * main方法測試
 * @author Angin
 * @date 2019/3/18 0018.
 */
public class MainClass {
    public static void main(String[] args) {
   PdfToWord convert=new PdfToWord();
   convert.convertText("E:\\pdfToWord.pdf");
    }
}

此方法只適合文檔型的pdf轉換,如果圖片的話,轉換後無法讀取。

java實現pdf轉word(文字)