54一個大文件夾下所有文件的讀取和檢索！支持英文

阿新 • • 發佈：2017-05-05

data time ++ 檢索 pdm () new textfield ocx

package lld;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;

import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;
/**
* @author xinghl
*
*/
public class IndexManager{
private static IndexManager indexManager;
private static String content="";
private static String INDEX_DIR = "E:\\uploadFolders";
private static String DATA_DIR = "E:\\uploadFolder";
private static Analyzer analyzer = null;
private static Directory directory = null;
private static IndexWriter indexWriter = null;

/**
* 創建索引管理器
* @return 返回索引管理器對象
*/
public IndexManager getManager(){
if(indexManager == null){
this.indexManager = new IndexManager();
}
return indexManager;
}
/**
* 讀取txt文件的內容
* @param file 想要讀取的文件對象
* @return 返回文件內容
*/
public static String txt2String(File file){
String result = "";
try{
BufferedReader br = new BufferedReader(new FileReader(file));//構造一個BufferedReader類來讀取文件
String s = null;
while((s = br.readLine())!=null){//使用readLine方法，一次讀一行
result = result + "\n" +s;
}
br.close();
}catch(Exception e){
e.printStackTrace();
}
return result;
}
/**
* 讀取doc文件內容
* @param file 想要讀取的文件對象
* @return 返回文件內容
*/
public static String doc2String(File file){
String result = "";
try{
FileInputStream fis = new FileInputStream(file);
HWPFDocument doc = new HWPFDocument(fis);
Range rang = doc.getRange();
result = result + rang.text().toString();
fis.close();
}catch(Exception e){
e.printStackTrace();
}
return result;
}
/**
* 讀取xls文件內容
* @param file 想要讀取的文件對象
* @return 返回文件內容
*/
public static String xls2String(File file){
String result = "";
//jxl.Workbook readwb=null;
Cell cell=null;
try{
InputStream instream = new FileInputStream(file);
/*List<List<String>> result = null;
InputStream instream = new FileInputStream(file);
HSSFWorkbook hssfWorkbook = new HSSFWorkbook(instream);
result = new ArrayList<List<String>>();
for(int numSheet = 0;numSheet < hssfWorkbook.getNumberOfSheets();numSheet++){
HSSFSheet hssfSheet = hssfWorkbook.getSheetAt(numSheet);
if(hssfSheet==null){
continue;
}
for(int rowNum =1; rowNum <= hssfSheet.getLastRowNum(); rowNum++){
HSSFRow hssfRow = hssfSheet.getRow(rowNum);

int minColIx = hssfRow.getFirstCellNum();
int maxColIX = hssfRow.getLastCellNum();
List<String> rowList = new ArrayList<String>();

for(int colIx = minColIx; colIx<minColIx;colIx++){
HSSFCell cell = hssfRow.getCell(colIx);
if(cell == null){
continue;
}
rowList.add(ExcelUtils.export(fileName, out););
}
result.add(rowList);
}
}
return result;
}*/
jxl.Workbook readwb = Workbook.getWorkbook(instream);
//Sheet的下標是從0開始
//獲取第一張Sheet表
Sheet readsheet = readwb.getSheet(0);
//獲取Sheet表中所包含的總列數
int rsColumns = readsheet.getColumns();
//獲取Sheet表中所包含的總行數
int rsRows = readsheet.getRows();
//獲取指定單元格的對象引用
for (int i = 0; i < rsRows; i++)
{
for (int j = 0; j < rsColumns; j++)
{
cell = readsheet.getCell(j, i);
result = result+cell.getContents()+" ";
}
}
}catch(Exception e){
e.printStackTrace();
}
return result;
}
public static String xlsx2String(File file){
String result = "";

return result;
}
private static String getValue(HSSFCell hssfCell) {
if(hssfCell.getCellType() == hssfCell.CELL_TYPE_BOOLEAN){
return String.valueOf( hssfCell.getBooleanCellValue());
}else if(hssfCell.getCellType() == hssfCell.CELL_TYPE_NUMERIC){
return String.valueOf( hssfCell.getNumericCellValue());
}else{
return String.valueOf( hssfCell.getStringCellValue());
}
}
/*public static String pdf2String(File file){
String result = null;
FileInputStream is = null;

PDDocument document = null;
try {
is = new FileInputStream(file);
PDFParser parser = new PDFParser(is);
parser.parse();
document = parser.getPDDocument();
PDFTextStripper stripper = new PDFTextStripper();
result = stripper.getText(document);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (is != null) {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (document != null) {
try {
document.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return result;
} */
public static String docx2String(File file){
String result = "";
try {

FileInputStream fis = new FileInputStream(file);
XWPFDocument xdoc = new XWPFDocument(fis);
XWPFWordExtractor extractor = new XWPFWordExtractor(xdoc);
result = extractor.getText();
//System.out.println(result);
fis.close();
}catch(Exception e){
e.printStackTrace();
}

return result;
}
/**
* 查找索引，返回符合條件的文件
* @param text 查找的字符串
* @return 符合條件的文件List
*/
public static void searchIndex(String text){
Date date1 = new Date();
try{
directory = FSDirectory.open(new File(INDEX_DIR));
analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
DirectoryReader ireader = DirectoryReader.open(directory);
IndexSearcher isearcher = new IndexSearcher(ireader);
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "content", analyzer);
Query query = parser.parse(text);
ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
for (int i = 0; i < hits.length; i++) {
Document hitDoc = isearcher.doc(hits[i].doc);
System.out.println("_____________查看索引_______________");
System.out.println(hitDoc.get("filename"));
System.out.println(hitDoc.get("content"));
System.out.println(hitDoc.get("path"));
System.out.println("_____________查看索引________________");
}
ireader.close();
directory.close();
}catch(Exception e){
e.printStackTrace();
}
Date date2 = new Date();
System.out.println("查看索引-----耗時：" + (date2.getTime() - date1.getTime()) + "ms\n");
}
/**
* 創建當前文件目錄的索引
* @param path 當前文件目錄
* @return 是否成功
*/
public static boolean createIndex(String path){
Date date1 = new Date();
List<File> fileList = getFileList(path);
for (File file : fileList) {
content = "";
//獲取文件後綴
String type = file.getName().substring(file.getName().lastIndexOf(".")+1);
if("txt".equalsIgnoreCase(type)){

content += txt2String(file);

}else if("doc".equalsIgnoreCase(type)){

content += doc2String(file);
/*}else if("pdf".equalsIgnoreCase(type)){

content += pdf2String(file);*/

}else if("xls".equalsIgnoreCase(type)){

content += xls2String(file);
}else if("xlsx".equalsIgnoreCase(type)){

content += xlsx2String(file);

}else if("docx".equalsIgnoreCase(type)){

content += docx2String(file);
}
System.out.println("name :"+file.getName());
//System.out.println("path :"+file.getPath());
//System.out.println("content :"+content);
System.out.println("_____________索引建立完畢_______________");
try{
analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
directory = FSDirectory.open(new File(INDEX_DIR));
File indexFile = new File(INDEX_DIR);
if (!indexFile.exists()) {
indexFile.mkdirs();
}
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer);
indexWriter = new IndexWriter(directory, config);
Document document = new Document();
document.add(new TextField("filename", file.getName(), Store.YES));
document.add(new TextField("content", content, Store.YES));
document.add(new TextField("path", file.getPath(), Store.YES));
indexWriter.addDocument(document);
indexWriter.commit();
closeWriter();
}catch(Exception e){
e.printStackTrace();
}
content = "";
}
Date date2 = new Date();
System.out.println("創建索引-----耗時：" + (date2.getTime() - date1.getTime()) + "ms\n");
return true;
}
public static void main(String[] args){
File fileIndex = new File(INDEX_DIR);
if(deleteDir(fileIndex)){
fileIndex.mkdir();
}else{
fileIndex.mkdir();
}
createIndex(DATA_DIR);
searchIndex("love");
}
/**
* 過濾目錄下的文件
* @param dirPath 想要獲取文件的目錄
* @return 返回文件list
*/

static List<File> FileList = new ArrayList<File>();
public static List<File> getFileList(String dirPath) {
File[] files = new File(dirPath).listFiles();// 該文件目錄下文件全部放入數組

if (files != null) {
for (int i = 0; i < files.length; i++) {
//List<File> fileList = new ArrayList<File>();
String fileName = files[i].getName();
if (files[i].isDirectory()) { // 判斷是文件還是文件夾
getFileList(files[i].getAbsolutePath()); // 獲取文件絕對路徑
} else if(files != null){ // 判斷文件名是否以.avi結尾
String strFileName = files[i].getAbsolutePath();
//System.out.println(strFileName);

//fileList.add(strFileName);
FileList.add(files[i]);
// System.out.println(FileList);
} else {
continue;
}
//FileList.addAll(fileList);
}
//FileList.addAll(fileList);
}
System.out.println(FileList);
return FileList;
}
//return fileList;
/*for (File file : files) {
if (isTxtFile(file.getName())) {
fileList.add(file);
}
}*/
//System.out.println("111111111111111111111111111111111111111111111111111111111111");
//System.out.println(fileList);
//System.out.println("\n");
//return fileList;
//}
/**
* 判斷是否為目標文件，目前支持txt xls doc格式
* @param fileName 文件名稱
* @return 如果是文件類型滿足過濾條件，返回true；否則返回false
*/
public static boolean isTxtFile(String fileName) {
if (fileName.lastIndexOf(".txt") > 0) {
return true;
}else if (fileName.lastIndexOf(".xls") > 0) {
return true;
}else if (fileName.lastIndexOf(".xlsx") > 0) {
return true;

}else if (fileName.lastIndexOf(".doc") > 0) {
return true;
}
else if (fileName.lastIndexOf(".pdf") > 0) {
return true;
}
else if (fileName.lastIndexOf(".docx") > 0) {
return true;
}
return false;
}
public static void closeWriter() throws Exception {
if (indexWriter != null) {
indexWriter.close();
}
}
/**
* 刪除文件目錄下的所有文件
* @param file 要刪除的文件目錄
* @return 如果成功，返回true.
*/
public static boolean deleteDir(File file){
if(file.isDirectory()){
File[] files = file.listFiles();
for(int i=0; i<files.length; i++){
deleteDir(files[i]);
}
}
file.delete();
return true;
}
public static String[][] getData(File file, int ignoreRows)
throws FileNotFoundException, IOException {
List<String[]> result = new ArrayList<String[]>();
int rowSize = 0;
BufferedInputStream in = new BufferedInputStream(new FileInputStream(
file));
// 打開HSSFWorkbook
POIFSFileSystem fs = new POIFSFileSystem(in);
HSSFWorkbook wb = new HSSFWorkbook(fs);
HSSFCell cell = null;
for (int sheetIndex = 0; sheetIndex < wb.getNumberOfSheets(); sheetIndex++) {
HSSFSheet st = wb.getSheetAt(sheetIndex);
// 第一行為標題，不取
for (int rowIndex = ignoreRows; rowIndex <= st.getLastRowNum(); rowIndex++) {
HSSFRow row = st.getRow(rowIndex);
if (row == null) {
continue;
}
int tempRowSize = row.getLastCellNum() + 1;
if (tempRowSize > rowSize) {
rowSize = tempRowSize;
}
String[] values = new String[rowSize];
Arrays.fill(values, "");
boolean hasValue = false;
for (short columnIndex = 0; columnIndex <= row.getLastCellNum(); columnIndex++) {
String value = "";
cell = row.getCell(columnIndex);
if (cell != null) {
// 註意：一定要設成這個，否則可能會出現亂碼
// cell.setEncoding(HSSFCell.ENCODING_UTF_16);
switch (cell.getCellType()) {
case HSSFCell.CELL_TYPE_STRING:
value = cell.getStringCellValue();
break;
case HSSFCell.CELL_TYPE_NUMERIC:
if (HSSFDateUtil.isCellDateFormatted(cell)) {
Date date = cell.getDateCellValue();
if (date != null) {
value = new SimpleDateFormat("yyyy-MM-dd")
.format(date);
} else {
value = "";
}
} else {
value = new DecimalFormat("0").format(cell
.getNumericCellValue());
}
break;
case HSSFCell.CELL_TYPE_FORMULA:
// 導入時如果為公式生成的數據則無值
if (!cell.getStringCellValue().equals("")) {
value = cell.getStringCellValue();
} else {
value = cell.getNumericCellValue() + "";
}
break;
case HSSFCell.CELL_TYPE_BLANK:
break;
case HSSFCell.CELL_TYPE_ERROR:
value = "";
break;
case HSSFCell.CELL_TYPE_BOOLEAN:
value = (cell.getBooleanCellValue() == true ? "Y"
: "N");
break;
default:
value = "";
}
}
if (columnIndex == 0 && value.trim().equals("")) {
break;
}
values[columnIndex] = rightTrim(value);
hasValue = true;
}

if (hasValue) {
result.add(values);
}
}
}
in.close();
String[][] returnArray = new String[result.size()][rowSize];
for (int i = 0; i < returnArray.length; i++) {
returnArray[i] = (String[]) result.get(i);
}
return returnArray;
}
/**
* 去掉字符串右邊的空格
* @param str 要處理的字符串
* @return 處理後的字符串
*/
public static String rightTrim(String str) {
if (str == null) {
return "";
}
int length = str.length();
for (int i = length - 1; i >= 0; i--) {
if (str.charAt(i) != 0x20) {
break;
}
length--;
}
return str.substring(0, length);
}
}

54一個大文件夾下所有文件的讀取和檢索！支持英文

data time ++ 檢索 pdm () new textfield ocx package lld; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel

54一個大文件夾下所有文件的讀取和檢索！支持英文

54一個大文件夾下所有文件的讀取和檢索！支持英文

bat復制文件夾下所有文件到另一個目錄

C# 把一個文件夾下所有文件復制到另一個文件夾下把一個文件夾下所有文件刪除（轉）

Linux C 讀取文件夾下所有文件（包括子文件夾）的文件名(轉)

C# 刪除此文件夾及此文件夾下所有文件

python中讀取某個路徑文件夾下所有文件--listdir（）

C# 遞歸查找文件夾下所有文件和子文件夾的所有文件

[轉載]python文件和目錄操作方法大全（含更改文件夾下所有文件名稱的實例）

使用cmd命令刪除文件夾下所有文件

CentOS下遞歸遍歷文件夾下所有文件，查找指定字符

批量復制windows文件夾下所有文件名

遍歷文件夾下所有文件的路徑

PHP遍歷文件夾下所有文件

SHELL遞歸遍歷文件夾下所有文件

python查看文件夾下所有文件

DOS批處理器移動指定數量文件到一個臨時文件夾，上傳到linux服務器，並刪除臨時文件夾下的文件

Cocos2dx 遍歷文件夾下所有的文件（草稿）

JAVA獲取文件夾下所有的文件

【Linux】Linux下統計當前文件夾下的文件個數、目錄個數

ubuntu 用shell腳本實現將當前文件夾下全部文件夾中的某一類文件復制到同一文件夾下

54一個大文件夾下所有文件的讀取和檢索！支持英文

相關推薦