1. 程式人生 > >Java讀取本地文件內容支援文件格式有(.doc+.docx+.txt+.xls+.xlsx)

Java讀取本地文件內容支援文件格式有(.doc+.docx+.txt+.xls+.xlsx)

友情提示:為了方便,整理了匯入本地文件功能,包含標題中幾種格式文件,以備不時之需。

一、所需jar包

           這裡只介紹 pom 檔案引入jar 包的配置。如下:

                <dependency>
   		 	<groupId>org.apache.poi</groupId>
    		        <artifactId>poi</artifactId>
    		        <version>3.17</version>
		</dependency>
		
		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi-scratchpad</artifactId>
			<version>3.17</version>
		</dependency>

		
		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi-ooxml</artifactId>
			<version>3.17</version>
		</dependency>
		
		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi-ooxml-schemas</artifactId>
			<version>3.17</version>
		</dependency>

二、測試程式碼

package com.cdvcloud.thread.file;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.POIXMLDocument;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.springframework.util.StringUtils;

public class FileImport {
	
	private static final String DEFULTCHARSET = "GB2312";

	public static void main(String[] args) {
		String path="C:/Users/test/Desktop/ceshi2.docx";
		String path2="C:/Users/test/Desktop/test2.xlsx";
		String path3="C:/Users/test/Desktop/test.xls";
		String path4="C:/Users/test/Desktop/test.txt";
		String path5="C:/Users/test/Desktop/ceshi.doc";
		
		String string = importFile(path,null);
		String string2 = importFile(path2,"GB2312");
		String string3 = importFile(path3,null);
		String string4 = importFile(path4,null);
		String string5 = importFile(path5,null);
		System.out.println("docx===>>"+string);
		System.out.println("xlsx===>>"+string2);
		System.out.println("xls===>>"+string3);
		System.out.println("txt===>>"+string4);
		System.out.println("doc===>>"+string5);
	}
	
	/**
	 * 讀取檔案統一入口
	 * @param path
	 * @return
	 */
	static String importFile(String path,String charset){
		String resTextString = null;
		if (StringUtils.isEmpty(path)) {
			return resTextString;
		}
		try {
			int lastIndexOf = path.lastIndexOf(".");
			String type = path.substring(lastIndexOf+1);
			if ("txt".equals(type)) {
				resTextString = importTxt(path,charset);
			}else if ("doc".equals(type)) {
				resTextString = importWord3(path);
			}else if ("docx".equals(type)) {
				resTextString = importWord7(path);
			}else if ("xls".equals(type)||"xlsx".equals(type)) {
				resTextString = importExcel(path, charset);
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		return resTextString;
	}
	
	
	/**
	 * 讀取word2003
	 * @throws  
	 */
	static String importWord3(String path){
		String textString=null;
		FileInputStream inputStream =null;
		try {
			inputStream = new FileInputStream(path);
			HWPFDocument doc = new HWPFDocument(inputStream);
			textString = doc.getText().toString();
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (inputStream!=null) {
				try {
					inputStream.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		return textString;
	}
	
	/**
	 * 讀取word2007
	 * @param path
	 * @return
	 */
	static String importWord7(String path){
		String text = null;
		OPCPackage openPackage =null;
		try {
			openPackage = POIXMLDocument.openPackage(path);
			XWPFWordExtractor word = new XWPFWordExtractor(openPackage);
			text = word.getText();
		} catch (Exception e) {
			e.printStackTrace();
		}finally {
			if (openPackage!=null) {
				try {
					openPackage.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		return text;
	}
	
	/**
	 * 匯入txt 檔案
	 * @param path
	 * @return
	 */
	// try-with-resources可以優雅關閉檔案,異常時自動關閉檔案
	static String importTxt(String path,String charset){
		String resText=null;
		if (StringUtils.isEmpty(charset)) {
			charset=DEFULTCHARSET;
		}
		InputStreamReader reader = null;
		try {
			reader = new InputStreamReader(new FileInputStream(path),charset);
			BufferedReader br = new BufferedReader(reader);
			String line;
			while ((line=br.readLine())!=null) {
				resText+=line;
			}
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (reader!=null) {
				try {
					reader.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		return resText;
	}
	
	/**
	 * 匯入.xls文件
	 * @param path
	 * @param charset
	 * @return
	 */
	static String importExcel(String path,String charset){
		String resString = "";
		Workbook wb=null;
		InputStream is =null;
		int lastIndexOf = path.lastIndexOf(".");
		String type = path.substring(lastIndexOf+1);
		try {
			is = new FileInputStream(path);
			if ("xls".equals(type)) {
				wb = new HSSFWorkbook(is);
			}else if ("xlsx".equals(type)) {
				wb = new XSSFWorkbook(is);
			}else {
				return resString;
			}
			if (wb!=null) {
				resString = getExcelVal(wb);
			}
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (is!=null) {
				try {
					is.close();
				} catch (Exception e2) {
					e2.printStackTrace();
				}
			}
		}
		return resString;
	}
	
	/**
	 * 獲取值
	 */
	public static String getExcelVal(Workbook wb){
		Sheet sheet=null;
		Row row =null;
		String cellData = null;
		String resString = "";
		//用來存放表中資料
        List<ArrayList<String>> list = new ArrayList<ArrayList<String>>();
        //Excel的頁籤數量  
        int sheetNum = wb.getNumberOfSheets();
        for (int m = 0; m<sheetNum; m++) {
        	//獲取一個sheet
        	sheet = wb.getSheetAt(m);
        	Sheet sheetAt = wb.getSheetAt(0);
        	//獲取最大行數
        	int rownum = sheet.getPhysicalNumberOfRows();
        	//獲取第一行
        	row = sheet.getRow(0);
        	if (row==null) {
				continue;
			}
        	//獲取最大列數
        	int colnum = row.getPhysicalNumberOfCells();
        	for (int i = 0; i<rownum; i++) {
        		row = sheet.getRow(i);
        		ArrayList<String> colList = new ArrayList<String>();
        		if(row !=null){
        			for (int j=0;j<colnum;j++){
        				cellData = String.valueOf(getCellFormatValue(row.getCell(j)));
        				colList.add(cellData);
        				if (j<colnum-1) {
        					resString+=cellData+" ";
        				}else{
        					resString+=cellData+"\n";
        				}
        			}
        		}else{
        			break;
        		}
        		list.add(colList);
        	}
		}
		return resString;
	}
	
	/**
	 * 根據型別取值
	 * @param cell
	 * @return
	 */
    @SuppressWarnings("deprecation")
	public static Object getCellFormatValue(Cell cell){
        Object cellValue = null;
        if(cell!=null){
            //判斷cell型別
            switch(cell.getCellType()){
	            case Cell.CELL_TYPE_NUMERIC:{
	                cellValue = String.valueOf(cell.getNumericCellValue());
	                break;
	            }
	            case Cell.CELL_TYPE_FORMULA:{
	                //判斷cell是否為日期格式
	                if(DateUtil.isCellDateFormatted(cell)){
	                    //轉換為日期格式YYYY-mm-dd
	                    cellValue = cell.getDateCellValue();
	                }else{
	                    //數字
	                    cellValue = String.valueOf(cell.getNumericCellValue());
	                }
	                break;
	            }
	            case Cell.CELL_TYPE_STRING:{
	                cellValue = cell.getRichStringCellValue().getString();
	                break;
	            }
	            default:
	                cellValue = "";
	            }
        }else{
            cellValue = "";
        }
        return cellValue;
    }
}

【注】txt文件可能會亂碼,請將引數設定為txt 對應的編碼格式。一般預設為GB2312 或者 gbk