1. 程式人生 > >doc,docx,pdf,ppt等檔案型別讀取方法

doc,docx,pdf,ppt等檔案型別讀取方法

//讀取ppt檔案
    public String readPPT(String file) throws IOException {
        String re="";
        InputStream is = null;
        try{
            is = new FileInputStream(new File(file));
            PowerPointExtractor extractor = new PowerPointExtractor(is);
            re = extractor.getText();
        }catch
(Exception e){ System.out.println("讀取ppt出錯"+e.toString()); }finally { is.close(); } return re; } //讀取pptx檔案 public String readPPT2007(String file) throws IOException { OPCPackage opc = null; String re = ""; try{ opc = POIXMLDocument.openPackage(file); re = new
XSLFPowerPointExtractor(opc).getText(); } catch (Exception e) { System.out.println("讀取pptx出錯"+e.toString()); }finally { opc.close(); } return re; } // 讀取pdf檔案 public String readPDF(String file) throws IOException { String result = null
; FileInputStream is = null; PDDocument document = null; try{ is = new FileInputStream(file); PDFParser parser = new PDFParser(is); parser.parse(); document = parser.getPDDocument(); PDFTextStripper stripper = new PDFTextStripper(); result = stripper.getText(document); } catch (Exception e) { e.printStackTrace(); }finally { if(is != null){ is.close(); } if (document != null){ document.close(); } } return result; } // 讀取doc檔案 public String readWord(String file){ String result =""; WordExtractor wordExtractor = null; try{ wordExtractor = new WordExtractor(new FileInputStream(file)); result = wordExtractor.getText(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return result; } // 讀取docx檔案 public String readDocx(String file) throws IOException { String result = ""; OPCPackage opc = null; try{ opc = POIXMLDocument.openPackage(file); result = new XWPFWordExtractor(opc).getText(); } catch (Exception e) { e.printStackTrace(); }finally { opc.close(); } return result; } // 讀取xls檔案 public String readEXCEL(String file){ StringBuilder content = new StringBuilder(); HSSFWorkbook workbook = null; try{ workbook = new HSSFWorkbook(new FileInputStream(file)); for(int numSheets = 0;numSheets <workbook.getNumberOfSheets();numSheets++){ if(null != workbook.getSheetAt(numSheets)){ HSSFSheet sheet = workbook.getSheetAt(numSheets); for(int rowNumOfSheet = 0;rowNumOfSheet <= sheet.getLastRowNum();rowNumOfSheet++ ){ HSSFRow row = sheet.getRow(rowNumOfSheet); for (short cellNumOfRow = 0; cellNumOfRow <=row.getLastCellNum();cellNumOfRow ++){ HSSFCell cell = row.getCell(cellNumOfRow); if(this.convertCellHSSFCell(cell).length() > 0){ content.append(this.convertCellHSSFCell(cell)); } } content.append("\n"); } } } } catch (Exception e) { e.printStackTrace(); } return content.toString(); } private String convertCellHSSFCell(HSSFCell cell){ NumberFormat format = NumberFormat.getInstance(); format.setGroupingUsed(false); String cellValue = ""; if(cell == null){ return cellValue; } switch (cell.getCellType()){ case HSSFCell.CELL_TYPE_NUMERIC: cellValue = format.format(cell.getNumericCellValue()); break; case HSSFCell.CELL_TYPE_STRING: cellValue = cell.getStringCellValue(); break; case HSSFCell.CELL_TYPE_BLANK: cellValue = cell.getStringCellValue(); break; case HSSFCell.CELL_TYPE_BOOLEAN: cellValue = Boolean.valueOf(cell.getBooleanCellValue()).toString(); break; case HSSFCell.CELL_TYPE_ERROR: cellValue = String.valueOf(cell.getErrorCellValue()); break; default: cellValue = ""; } return cellValue.trim(); } // 讀取xlsx檔案 public String readEXCEL2007(String file) throws IOException { XSSFWorkbook workbook=null; StringBuilder content = new StringBuilder(); try{ workbook = new XSSFWorkbook(file); for (int numSheets = 0; numSheets < workbook.getNumberOfSheets(); numSheets++) { if (null != workbook.getSheetAt(numSheets)) { XSSFSheet aSheet = workbook.getSheetAt(numSheets);// 獲得一個sheet for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet.getLastRowNum(); rowNumOfSheet++) { if (null != aSheet.getRow(rowNumOfSheet)) { XSSFRow aRow = aSheet.getRow(rowNumOfSheet); // 獲得一個行 for (short cellNumOfRow = 0; cellNumOfRow <= aRow.getLastCellNum(); cellNumOfRow++) { if (null != aRow.getCell(cellNumOfRow)) { XSSFCell aCell = aRow.getCell(cellNumOfRow);// 獲得列值 if (this.convertCellXHSSFCell(aCell).length() > 0) { content.append(this.convertCellXHSSFCell(aCell)); } } content.append("\n"); } } } } } } catch(Exception ex){ System.out.println("讀取excel出錯"+ex.toString()); } return content.toString(); } private String convertCellXHSSFCell(XSSFCell aCell) { NumberFormat formater = NumberFormat.getInstance(); formater.setGroupingUsed(false); String cellValue = ""; if (aCell == null) { return cellValue; } switch (aCell.getCellType()) { case HSSFCell.CELL_TYPE_NUMERIC: cellValue = formater.format(aCell.getNumericCellValue()); break; case HSSFCell.CELL_TYPE_STRING: cellValue = aCell.getStringCellValue(); break; case HSSFCell.CELL_TYPE_BLANK: cellValue = aCell.getStringCellValue(); break; case HSSFCell.CELL_TYPE_BOOLEAN: cellValue = Boolean.valueOf(aCell.getBooleanCellValue()).toString(); break; case HSSFCell.CELL_TYPE_ERROR: cellValue = String.valueOf(aCell.getErrorCellValue()); break; default: cellValue = ""; } return cellValue.trim(); }

所需jar包的maven座標 :

        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.9</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.9</version>
        </dependency>
        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
       <artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
            <version>1.0.6</version>
        </dependency>

        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>org.apache.poi.xwpf.converter.core</artifactId>
            <version>1.0.6</version>
        </dependency>

        <dependency>
            <groupId>org.apache.directory.studio</groupId>
            <artifactId>org.apache.commons.collections</artifactId>
            <version>3.2.1</version>
        </dependency>

        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>org.apache.poi.xwpf.converter.core</artifactId>
            <version>1.0.5</version>
        </dependency>

        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
            <version>1.0.5</version>
        </dependency>

        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
            <version>1.0.5</version>
        </dependency>

        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox</artifactId>
            <version>1.8.13</version>
        </dependency>

        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox-tools</artifactId>
            <version>2.0.8</version>
        </dependency>