1. 程式人生 > >Java實現Word轉PDF方案選擇

Java實現Word轉PDF方案選擇

Java實現Word轉PDF方案選擇

      很多應用場景中都會涉及到Word轉PDF,但Word轉PDF的方案在網上一搜一大把,讓人眼花繚亂,筆者踩過無數的坑後,最終總結出以下三種方案

  • OpenOffice實現Word轉ODF
  • docx2pdf實現Word轉ODF
  • itext+POI實現Word轉ODF
方案
OpenOffice實現Word轉ODF
這種方案在Windows中可行,且非常簡便,但它完全依賴於OpenOffice,想在Linux中實現,顯然不是一個好的方案,筆者嘗試過Linux中裝OpenOffice,但令人髮指的是居然還需要裝GUI!

Java程式碼


import java.io.File;
import java.io.IOException;
import java.net.ConnectException;

import com.artofsolving.jodconverter.DocumentConverter;
import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter; /** * maven包 <dependency> <groupId>com.artofsolving</groupId> <artifactId>jodconverter-maven-plugin</artifactId> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> </dependency> * * */
public class OfficetoPdfUtil { public static void createPDF(String sourceFile, String destFile) { // String OpenOffice_HOME = "D:/Program Files/OpenOffice.org 3";// // 這裡是OpenOffice的安裝目錄,C:\Program Files (x86)\OpenOffice 4 String OpenOffice_HOME = "D:\\openoffice\\newgay\\"; Process pro = null; try { String command = OpenOffice_HOME + "program\\soffice.exe -headless -accept=\"socket,host=127.0.0.1,port=8300;urp;StarOffice.ServiceManager\" -nofirststartwizard"; pro = Runtime.getRuntime().exec(command); File inputFile = new File(sourceFile); // 如果目標路徑不存在, 則新建該路徑 File outputFile = new File(destFile); if (!outputFile.getParentFile().exists()) { outputFile.getParentFile().mkdirs(); } // connect to an OpenOffice.org instance running on port 8100 OpenOfficeConnection connection = new SocketOpenOfficeConnection( "127.0.0.1", 8300); connection.connect(); // convert DocumentConverter converter = new OpenOfficeDocumentConverter( connection); converter.convert(inputFile, outputFile); // close the connection connection.disconnect(); pro.destroy(); } catch (ConnectException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { createPDF("C:\\Users\\Administrator\\Desktop\\test.doc", "C:\\Users\\Administrator\\Desktop\\test.pdf"); } }
docx2pdf實現Word轉ODF
這種方案在Windows和Linux中都可用,但有一點需要注意,它只支援07以上的Word轉換,很不幸,筆者的Word模版是03的,只能用第三種方案了。事實上,這種方案中也依賴了Itext的包

Java程式碼


import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;

import org.apache.poi.xwpf.converter.pdf.PdfConverter;
import org.apache.poi.xwpf.converter.pdf.PdfOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

	/**
	 *      2007.docx 可用      
			maven包
			<dependency>
				<groupId>org.apache.poi</groupId>
				<artifactId>poi</artifactId>
				<version>3.15</version>
			</dependency>
			<dependency>
				<groupId>org.apache.poi</groupId>
				<artifactId>poi-ooxml</artifactId>
				<version>3.15</version>
			</dependency>
			<dependency>
				<groupId>org.apache.poi</groupId>
				<artifactId>poi-scratchpad</artifactId>
				<version>3.15</version>
			</dependency>
			<dependency>  
				<groupId>fr.opensagres.xdocreport</groupId>  
				<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>  
				<version>1.0.5</version>  
			</dependency>
			<dependency>
				<groupId>fr.opensagres.xdocreport</groupId>
				<artifactId>org.apache.poi.xwpf.converter.core</artifactId>
				<version>1.0.6</version>
			</dependency>
			<dependency>
				<groupId>fr.opensagres.xdocreport</groupId>
				<artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
				<version>1.0.6</version>
			</dependency>


			<dependency>
				<groupId>org.xhtmlrenderer</groupId>
				<artifactId>flying-saucer-pdf</artifactId>
				<version>9.1.16</version>
			</dependency>
			<dependency>
				<groupId>org.jsoup</groupId>
				<artifactId>jsoup</artifactId>
				<version>1.11.3</version>
			</dependency>
			<dependency>
				<groupId>com.itextpdf.tool</groupId>
				<artifactId>xmlworker</artifactId>
				<version>5.5.13</version>
			</dependency>
			<dependency>
				<groupId>fr.opensagres.xdocreport</groupId>
				<artifactId>fr.opensagres.xdocreport.document</artifactId>
				<version>1.0.5</version>
			</dependency>
	 * 
	 */
public class WordToPDF {
	/**
	 * 將word文件, 轉換成pdf, 中間替換掉變數
	 * 
	 * @param source
	 *            源為word文件, 必須為docx文件
	 * @param target
	 *            目標輸出
	 * @param params
	 *            需要替換的變數
	 * @throws Exception
	 */
	public static void wordConverterToPdf(InputStream source,
			OutputStream target, Map<String, String> params) throws Exception {
		wordConverterToPdf(source, target, null, params);
	}

	/**
	 * 將word文件, 轉換成pdf, 中間替換掉變數
	 * 
	 * @param source
	 *            源為word文件, 必須為docx文件
	 * @param target
	 *            目標輸出
	 * @param params
	 *            需要替換的變數
	 * @param options
	 *            PdfOptions.create().fontEncoding( "windows-1250" ) 或者其他
	 * @throws Exception
	 */
	public static void wordConverterToPdf(InputStream source, OutputStream target, PdfOptions options,
            Map<String, String> params) throws Exception {  
		XWPFDocument docx = new XWPFDocument(source);
	    PdfConverter.getInstance().convert(docx, target, options);  
    }
	public static void main(String[] args) {
		String filepath = "C:\\Users\\Administrator\\Desktop\\test.docx";
		String outpath = "C:\\Users\\Administrator\\Desktop\\test.pdf";

		InputStream source;
		OutputStream target;
		try {
			source = new FileInputStream(filepath);
			target = new FileOutputStream(outpath);
			Map<String, String> params = new HashMap<String, String>();

			PdfOptions options = PdfOptions.create();

			wordConverterToPdf(source, target, options, params);
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
}

itext+POI實現Word轉ODF
這種方案也就是先解析Word,然後將內容轉換到PDF中,是三種方案中最繁瑣的選擇,但如果你的Word模板不是07以上的,而線上環境是Linux,這是最後的選擇了

word解析


import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Bookmark;
import org.apache.poi.hwpf.usermodel.Bookmarks;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Section;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;

public class WordUtil {
	public static void main(String[] args) {
		try {
//			InputStream is = null;
//	        BufferedImage src = null;
//	        int ret = -1;
//	        
//	        is = new FileInputStream(new File("C:\\Users\\Administrator\\Desktop\\timg.png"));
//            src = javax.imageio.ImageIO.read(is);
//            System.out.println(src.getHeight());
//            System.out.println(src.getWidth());
//            is.close();
			new WordUtil().testReadByDoc();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	public void testReadByDoc() throws Exception {  
	      InputStream is = new FileInputStream("C:\\Users\\Administrator\\Desktop\\100007zldlwts.doc");
	      HWPFDocument doc = new HWPFDocument(is);
	      //輸出書籤資訊  
	      this.printInfo(doc.getBookmarks(), doc);  
	      //輸出文字  
//	      System.out.println(doc.getDocumentText());  
//	      Range range = doc.getRange();  
//	      this.insertInfo(range);  
//	      this.printInfo(range);  
//	      //讀表格  
//	      this.readTable(range);  
//	      //讀列表  
//	      this.readList(range);  
//	      //刪除range  
//	      Range r1 = new Range(0, 1048, doc);  
//	      r1.delete();//在記憶體中進行刪除,如果需要儲存到檔案中需要再把它寫回檔案
	      
//	                 把當前HWPFDocument寫到輸出流中  
//	      doc.write(new FileOutputStream("C:\\Users\\Administrator\\Desktop\\100006bzsupdated.doc"));  
	      this.closeStream(is);  
	   }  
	    
	   /** 
	    * 關閉輸入流 
	    * @param is 
	    */  
	   private void closeStream(InputStream is) {  
	      if (is != null) {  
	         try {  
	            is.close();  
	         } catch (IOException e) {  
	            e.printStackTrace();  
	         }  
	      }  
	   }  
	    
	   /** 
	    * 輸出書籤資訊 
	    * @param bookmarks 
	 * @param doc 
	    */  
	   private void printInfo(Bookmarks bookmarks, HWPFDocument doc) {  
	      int count = bookmarks.getBookmarksCount();  
	      System.out.println("書籤數量:" + count);  
	      Bookmark bookmark;  
	      for (int i=0; i<count; i++) {  
	         bookmark = bookmarks.getBookmark(i);  
	         System.out.println("書籤" + (i+1) + "的名稱是:" + bookmark.getName());  
	         System.out.println("開始位置:" + bookmark.getStart());  
	         System.out.println("結束位置:" + bookmark.getEnd());  
	         System.out.println(new Range(bookmark.getStart(), bookmark.getEnd(), doc).text().replaceAll(" FORMTEXT ", "").replaceAll("", ""));
	      }  
	   }  
	    
	   /** 
	    * 讀表格 
	    * 每一個回車符代表一個段落,所以對於表格而言,每一個單元格至少包含一個段落,每行結束都是一個段落。 
	    * @param range 
	    */  
	   private void readTable(Range range) {  
	      //遍歷range範圍內的table。  
	      TableIterator tableIter = new TableIterator(range);  
	      Table table;  
	      TableRow row;  
	      TableCell cell;  
	      while (tableIter.hasNext()) {  
	         table = tableIter.next();  
	         int rowNum = table.numRows();  
	         for (int j=0; j<rowNum; j++) {  
	            row = table.getRow(j);  
	            int cellNum = row.numCells();  
	            for (int k=0; k<cellNum; k++) {  
	                cell = row.getCell(k);  
	                //輸出單元格的文字  
	                System.out.println(cell.text().trim());  
	            }  
	         }  
	      }  
	   }  
	    
	   /** 
	    * 讀列表 
	    * @param range 
	    */  
	   private void readList(Range range) {  
	      int num = range.numParagraphs();  
	      Paragraph para;  
	      for (int i=0; i<num; i++) {  
	         para = range.getParagraph(i);  
	         if (para.isInList()) {  
	            System.out.println("list: " + para.text());  
	         }  
	      }  
	   }  
	    
	   /** 
	    * 輸出Range 
	    * @param range 
	    */  
	   private void printInfo(Range range) {  
	      //獲取段落數  
	      int paraNum = range.numParagraphs();  
	      System.out.println(paraNum);  
	      for (int i=0; i<paraNum; i++) {  
	         //this.insertInfo(range.getParagraph(i));  
	         System.out.println("段落" + (i+1) + ":" + range.getParagraph(i).text());  
	         if (i == (paraNum-1)) {  
	            this.insertInfo(range.getParagraph(i));  
	         }  
	      }  
	      int secNum = range.numSections();  
	      System.out.println(secNum);  
	      Section section;  
	      for (int i=0; i<secNum; i++) {  
	         section = range.getSection(i);  
	         System.out.println(section.getMarginLeft());  
	         System.out.println(section.getMarginRight());  
	         System.out.println(section.getMarginTop());  
	         System.out.println(section.getMarginBottom());  
	         System.out.println(section.getPageHeight());  
	         System.out.println(section.text());  
	      }  
	   }  
	    
	   /** 
	    * 插入內容到Range,這裡只會寫到記憶體中 
	    * @param range 
	    */  
	   private void insertInfo(Range range) {  
	      range.insertAfter("Hello");  
	   }  

}

pdf生成


import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;

import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Chunk;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.Font;
import com.itextpdf.text.Image;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.Phrase;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.ColumnText;
import com.itextpdf.text.pdf.PdfPageEventHelper;
import com.itextpdf.text.pdf.PdfTemplate;
import com.itextpdf.text.pdf.PdfWriter;

public class PDFUtil {
	public static void main(String[] args) throws Exception {
		OutputStream out = new FileOutputStream(new File("C:\\Users\\Administrator\\Desktop\\test.pdf"));

		BaseFont bfChinese = BaseFont.createFont("STSongStd-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED); 
		Font BlodFont = new Font(bfChinese, 12, Font.NORMAL, BaseColor.BLACK);
		
		Document document = new Document(PageSize.A4);
		// 寫入器
		PdfWriter writer = PdfWriter.getInstance(document, out);
		document.open();
		
		Paragraph paragraphRemark = new Paragraph();
		paragraphRemark.setFirstLineIndent(2f);
	    paragraphRemark.add(new Chunk("特別提醒:", BlodFont));
	    paragraphRemark.add(Chunk.NEWLINE);
	    document.add(paragraphRemark);
	    
	    Image img = Image.getInstance("C:\\Users\\Administrator\\Desktop\\圖片1.png");
	    img.scaleToFit(400f,200f);
	    document.add(img);
	    
	    HeaderFoot headerFoot = new HeaderFoot("Title");
        writer.setPageEvent(headerFoot);