1. 程式人生 > >java-poi3.17讀取word文字及圖片

java-poi3.17讀取word文字及圖片

package per.qy.dexter.fileoperate;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
import java.util.UUID;

import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import org.junit.Test;

public class WordTest {

	@Test
	public void testWord() {
		// String path = "D:\\temp\\temp\\test.doc";
		String path = "D:\\temp\\temp\\test.docx";
		String content = null;
		File file = new File(path);
		if (file.exists() && file.isFile()) {
			InputStream is = null;
			HWPFDocument doc = null;
			XWPFDocument docx = null;
			POIXMLTextExtractor extractor = null;
			try {
				is = new FileInputStream(file);
				if (path.endsWith(".doc")) {
					doc = new HWPFDocument(is);

					// 文件文字內容
					content = doc.getDocumentText();

					// 文件圖片內容
					PicturesTable picturesTable = doc.getPicturesTable();
					List<Picture> pictures = picturesTable.getAllPictures();
					for (Picture picture : pictures) {
						// 輸出圖片到磁碟
						OutputStream out = new FileOutputStream(
								new File("D:\\temp\\" + UUID.randomUUID() + "." + picture.suggestFileExtension()));
						picture.writeImageContent(out);
						out.close();
					}
				} else if (path.endsWith("docx")) {
					docx = new XWPFDocument(is);
					extractor = new XWPFWordExtractor(docx);

					// 文件文字內容
					content = extractor.getText();

					// 文件圖片內容
					List<XWPFPictureData> pictures = docx.getAllPictures();
					for (XWPFPictureData picture : pictures) {
						byte[] bytev = picture.getData();
						// 輸出圖片到磁碟
						FileOutputStream out = new FileOutputStream(
								"D:\\temp\\temp\\" + UUID.randomUUID() + picture.getFileName());
						out.write(bytev);
						out.close();
					}
				} else {
					System.out.println("此檔案不是word檔案!");
				}
				System.out.println(content);
			} catch (FileNotFoundException e) {
			} catch (IOException e) {
			} finally {
				try {
					if (doc != null) {
						doc.close();
					}
					if (extractor != null) {
						extractor.close();
					}
					if (docx != null) {
						docx.close();
					}
					if (is != null) {
						is.close();
					}
				} catch (IOException e) {
				}
			}
		}
	}

}