關於poi讀取execl2003表格出現記憶體溢位問題的解決方案
apache的poi是解析execl表格的java開源框架,上手比較的容易,因此受到大部分開發者的喜愛,但是在我做專案中出現了記憶體溢位的問題,網上找了很多的解決方法都不盡人意,因此通過自己想辦法解決了該問題!!現把之前記憶體溢位的程式碼和現在的優化後的程式碼放出來以供參考,注:我這是讀取execl表格的資料取出我所要的資料
DateUtil類只要execl表格列和資料過大就會導致記憶體溢位
package com.rskytech.am.util;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.*;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.*;
/**
* @author lizehua
* @date 2018-07-5 17:31 pm
* @DESC 處理車臺數據的時間按精確到微妙時間,
* 並把訊號對應的時間和值寫入到csv檔案中
* @package com.rskytech.am.util.DateUtil
*/
public class DateUtil {
/**
* 讀取execl表格方法
* (1)判斷屬於那個型別的xls檔案
* (2)根據檔案型別的不同調用不同的處理execl方法
* @param file execl檔案的路徑
* @return
* @throws IOException
*/
public static List<List<Object>> readExcel(File file) throws IOException {
String fileName = file.getName();
String extension = fileName.lastIndexOf(".") == -1 ? "" : fileName.substring(fileName.lastIndexOf(".") + 1);
Workbook hwb;
//判斷execl檔案檔案字尾是否為.xls
if ("xls".equals(extension)) {
hwb = new HSSFWorkbook(new FileInputStream(file));
return readExcel(file, hwb);
} else if ("xlsx".equals(extension)) {
//讀取.xlsx檔案
hwb = new XSSFWorkbook(new FileInputStream(file));
return readExcel(file, hwb);
} else {
//讀取的檔案不合法
throw new IOException("不支援的檔案型別");
}
}
/**
* 讀取 office excel
* (1)讀取execl資料取出空行資料只保留有資料的行數
* (2)取出行資料,然後根據行資料取出該行的所有列的資料
* @throws IOException
* @throws FileNotFoundException
*/
private static List<List<Object>> readExcel(File file, Workbook hwb) throws IOException {
List<List<Object>> list = new LinkedList<List<Object>>();
Sheet sheet = hwb.getSheetAt(0);
Object value = "";
Row row = null;
Row lastRow = null;
Cell cell = null;
for (int i = sheet.getFirstRowNum(); i <= sheet.getLastRowNum() + 1; i++) {
row = sheet.getRow(i);
lastRow = sheet.getRow(sheet.getLastRowNum());
lastRow.getPhysicalNumberOfCells();
if (row == null) {
continue;
}
List<Object> linked = new LinkedList<Object>();
for (int j = 0; j <= lastRow.getLastCellNum(); j++) {
cell = row.getCell(j);
if (cell == null) {
linked.add("");
continue;
}
// 格式化 number String 字元
DecimalFormat df = new DecimalFormat("0");
// 格式化日期字串
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
// 格式化數字
DecimalFormat nf = new DecimalFormat("0");
switch (cell.getCellType()) {
case XSSFCell.CELL_TYPE_STRING:
value = cell.getStringCellValue();
break;
case XSSFCell.CELL_TYPE_NUMERIC:
if ("@".equals(cell.getCellStyle().getDataFormatString())) {
value = df.format(cell.getNumericCellValue());
} else if ("General".equals(cell.getCellStyle().getDataFormatString())) {
value = cell.getNumericCellValue();
} else {
value = sdf.format(HSSFDateUtil.getJavaDate(cell.getNumericCellValue()));
}
break;
case XSSFCell.CELL_TYPE_BOOLEAN:
value = cell.getBooleanCellValue();
break;
case XSSFCell.CELL_TYPE_BLANK:
value = "";
break;
default:
value = cell.toString();
}
linked.add(value);
}
list.add(linked);
}
return list;
}
/**
* 獲得ececl的開始時間並處理格式化
* (1)獲得開始時間所在的列,然後用"-"分隔成年月日和小時,分鐘,秒,毫秒的字串陣列
* (2)取出(1)中的陣列的第一個元素用"/"分隔成為一個新的資料,然後把新的資料各個元素取出來按照順序組合成正確的年日月
* (3)取出(1)中的第二個元素用“:”分隔成為一個字串陣列,這是陣列的第一個元素是如16h35,第二個元素是1如18,456
* (4)取出(3)中陣列第一個元素用"h"分隔,然後拼接成小時,分鐘的格式,如“16:35"
* (5)取出(3)中陣列第二個元素用”."分隔注意要轉化
* (6)把(2)中處理好的年月日和(4),(5)中處理好的資料拼接成所要的時間格式
* @param execlFIlePath execl檔案的路徑
* @return
*/
public static String getStartTime(String execlFIlePath) throws IOException {
List<List<Object>> execlData = readExcel(new File(execlFIlePath));
//獲得開始時間所在行數的資料
String rowData = execlData.get(1).get(1).toString();
//用-分割得到時間資料的年月日和小時,分鐘秒的資料
String[] splitData = rowData.split("-");
//格式化年月日的資料
String[] date = splitData[0].split("/");
String deal_date = date[date.length - 1].trim() + "-" + date[1] + "-" + date[0];
//格式化小時分鐘秒的資料
String[] time = splitData[1].split(":");
String[] hourAndMount = time[0].split("h");
String deal_time = hourAndMount[0] + ":" + hourAndMount[1] + ":" + time[1].split("\\.")[0] + "." + time[1].split("\\.")[1];
//處理後得到所要的正確的時間
String start_time = deal_date + "" + deal_time;
return start_time;
}
/**
* 格式化各個訊號的時間
* (1)迴圈取出各個訊號對應的沒有格式化的時間包括微妙的資料存放相應的集合中
* (2)迴圈(1)中的兩個集合拼接資料,並存放到最終結果的集合
* @param execlFilePath execl檔案的路徑
* @return
*/
public static List<String> formatDate(String execlFilePath) throws IOException {
String start_time = getStartTime(execlFilePath);
List<List<Object>> execlData = readExcel(new File(execlFilePath));
String date = start_time.split(" ")[0];
List<String> deal_times = new ArrayList<String>();
//存放取出微妙值的集合
List<String> usList = new ArrayList<String>();
//存放處理含有微妙的集合
List<String> after_deal_times = new ArrayList<String>();
//迴圈取出小時,分鐘,秒,毫秒,微妙的值
for (int j = 7; j < execlData.size(); j++) {
for (int i = 0; i < execlData.get(j).size(); i++) {
if (i % 4 == 0) {
String deal_get_time_split[] = execlData.get(j).get(i).toString().split(":");
String deal_hour_minute[] = deal_get_time_split[0].split("h");
String deal_hour_minute_ = deal_hour_minute[0] + ":" + deal_hour_minute[1];
String deal_seconds_millis[] = deal_get_time_split[1].split(",");
String deal_seonds_millis_ = deal_seconds_millis[0] + "," + deal_seconds_millis[1];
String deal_time = date + " " + deal_hour_minute_ + ":" + deal_seonds_millis_;
deal_times.add(deal_time);
usList.add(execlData.get(j).get(i + 1).toString());
}
}
}
//迴圈取出對應的值,然後把微秒的值乘以1000拼接
for (int i = 0; i < deal_times.size(); i++) {
for (int j = 0; j < usList.size(); j++) {
//保證一一對應
if (i == j) {
String us = String.valueOf(Double.parseDouble(usList.get(i)) * 1000);
//把得到的微妙先轉為double型別的資料,再把double資料轉為String資料,去掉.0
after_deal_times.add(String.format(deal_times.get(i).toString() + "," + us.substring(0, us.lastIndexOf("."))));
}
}
}
return after_deal_times;
}
/**
* 讀取execl表格資料,並把對應的訊號名的值和時間存入到csv檔案中
* (1)讀取execl檔案並獲得資料,同時呼叫getStartTime()方法獲得開始時間
* (2)讀取含有訊號名的資料,並取出訊號名,分別拼接成"訊號名_time,訊號名_value",並寫入檔案中
* (3)取出訊號名下對應的時間的值並處理精確到毫秒級別,加上年月日和相應的訊號值並按行寫入檔案
* @param execlFilePath 要被讀取的execl檔案路徑
* @param csvFilePath 要寫如csv檔案的路徑
*/
public static String writExeclToCsvFile(String execlFilePath, String csvFilePath) throws IOException {
//讀取execl檔案獲取每行每列的資料
List<List<Object>> write_execl_data = readExcel(new File(execlFilePath));
String start_time = getStartTime(execlFilePath);
String date = start_time.split(" ")[0];
//建立寫入流物件,並以UTF-8字元編碼格式寫入
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(csvFilePath), "UTF-8");
//對應訊號名
String sign_name = "";
//拼接訊號名下對應的時間和值,並用","分割開
List<Object> sign_names = write_execl_data.get(2);
for (int i = 0; i < sign_names.size(); i++) {
if (i % 4 == 0) {
String sign_name_ = sign_names.get(i).toString();
sign_name += sign_name_ + "_" + "time" + "," + sign_name_ + "_" + "value" + ",";
}
}
//寫入訊號名對應的時間值和該訊號名的值,並換行寫入,寫入檔案,並去掉最後一個","
writer.write(sign_name.substring(0, sign_name.lastIndexOf(",")) + "\n");
writer.flush();
//迴圈取出小時,分鐘,秒,毫秒,微妙的值
for (int j = 7; j < write_execl_data.size(); j++) {
//要寫入檔案的資料,按","分割開
String line_data = "";
for (int i = 0; i < write_execl_data.get(j).size(); i++) {
if (i % 4 == 0) {
//處理時間,不要把毫秒用","隔開,以後會影響讀取csv檔案資料的準確性
String deal_get_time_split[] = write_execl_data.get(j).get(i).toString().split(":");
String deal_hour_minute[] = deal_get_time_split[0].split("h");
String deal_hour_minute_ = deal_hour_minute[0] + ":" + deal_hour_minute[1];
String deal_seconds_millis[] = deal_get_time_split[1].split(",");
String deal_seonds_millis_ = deal_seconds_millis[0] + "." + deal_seconds_millis[1];
// line_data += write_execl_data.get(j).get(i).toString() + "," + write_execl_data.get(j).get(i + 2).toString() + ",";
line_data += date + " " + deal_hour_minute_ + ":" + deal_seonds_millis_ + "," + write_execl_data.get(j).get(i + 2).toString() + ",";
}
}
//寫入檔案,並去掉最後一個","
writer.write(line_data.substring(0, line_data.lastIndexOf(",")) + "\n");
writer.flush();
}
writer.close();
String csvFileName = execlFilePath.split("/")[execlFilePath.split("/").length -1].split("\\.")[0] + ".csv";
System.out.println("寫入csv檔名是======>: " + csvFileName);
return csvFileName;
}
public static void main(String[] args) {
String filePath = "/home/lizehua/poiTest/20180330-第一次試驗-試驗階段.xls";
String csvFilePath = "/home/lizehua/carCsv/2018-03-30_data.csv";
try {
// formatDate(filePath);
writExeclToCsvFile(filePath, csvFilePath);
} catch (IOException e) {
e.printStackTrace();
}
}
}
下面就把優化後不會出現記憶體溢位的類貼上
package com.rskytech.am.util;
import org.apache.poi.hssf.eventusermodel.*;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.*;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
public class XlsToCsv implements HSSFListener {
private int minColumns;
private POIFSFileSystem fs;
private PrintStream output;
private int lastRowNumber;
private int lastColumnNumber;
public long time_cha;
public long getTime_cha() {
return time_cha;
}
public void setTime_cha(long time_cha) {
this.time_cha = time_cha;
}
/**
* Should we output the formula, or the value it has?
*/
private boolean outputFormulaValues = true;
/**
* For parsing Formulas
*/
private EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener;
private HSSFWorkbook stubWorkbook;
// Records we pick up as we process
private SSTRecord sstRecord;
private FormatTrackingHSSFListener formatListener;
/**
* So we known which sheet we're on
*/
private int sheetIndex = -1;
private BoundSheetRecord[] orderedBSRs;
private ArrayList boundSheetRecords = new ArrayList();
// For handling formulas with string results
private int nextRow;
private int nextColumn;
private boolean outputNextStringRecord;
private String d = "";
private final String OUTPUT_CHARSET = "UTF-8";
// private final String OUTPUT_CHARSET = "GBK";
private int count = 0;
public static String deal_time = "";
/**
* Creates a new XLS -> CSV converter
*
* @param fs The POIFSFileSystem to process
* @param output The PrintStream to output the CSV to
* @param minColumns The minimum number of columns to output, or -1 for no minimum
*/
public XlsToCsv(POIFSFileSystem fs, PrintStream output, int minColumns) {
this.fs = fs;
this.output = output;
this.minColumns = minColumns;
}
public XlsToCsv(String inputFilePath, String outputFilePath) throws Exception {
fs = new POIFSFileSystem(new FileInputStream(inputFilePath));
output = new PrintStream(outputFilePath, OUTPUT_CHARSET);
minColumns = -1;
}
/**
* Creates a new XLS -> CSV converter
*
* @param filename The file to process
* @param minColumns The minimum number of columns to output, or -1 for no minimum
* @throws IOException
* @throws FileNotFoundException
*/
public XlsToCsv(String filename, int minColumns) throws IOException, FileNotFoundException {
this(new POIFSFileSystem(new FileInputStream(filename)), System.out, minColumns);
}
public XlsToCsv() {
}
public XlsToCsv(long time_cha) {
this.time_cha = time_cha;
}
/**
* Initiates the processing of the XLS file to CSV
*/
public void process() throws IOException {
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
formatListener = new FormatTrackingHSSFListener(listener);
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
if (outputFormulaValues) {
request.addListenerForAllRecords(formatListener);
} else {
workbookBuildingListener = new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
}
factory.processWorkbookEvents(request, fs);
}
/**
* Main HSSFListener method, processes events, and outputs the CSV as the
* file is processed.
*/
public void processRecord(Record record) {
int thisRow = -1;
int thisColumn = -1;
String thisStr = null;
String sign = "";
switch (record.getSid()) {
case BoundSheetRecord.sid:
//boundSheetRecords.add(record);
break;
case BOFRecord.sid:
BOFRecord br = (BOFRecord) record;
if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
// Create sub workbook if required
if (workbookBuildingListener != null && stubWorkbook == null) {
stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
}
// Output the worksheet name
// Works by ordering the BSRs by the location of
// their BOFRecords, and then knowing that we
// process BOFRecords in byte offset order
sheetIndex++;
if (orderedBSRs == null) {
orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
}
}
break;
case SSTRecord.sid:
sstRecord = (SSTRecord) record;
break;
case BlankRecord.sid:
BlankRecord brec = (BlankRecord) record;
thisRow = brec.getRow();
thisColumn = brec.getColumn();
// thisStr = "";
break;
case BoolErrRecord.sid:
BoolErrRecord berec = (BoolErrRecord) record;
thisRow = berec.getRow();
thisColumn = berec.getColumn();
// thisStr = "";
break;
case FormulaRecord.sid:
FormulaRecord frec = (FormulaRecord) record;
thisRow = frec.getRow();
thisColumn = frec.getColumn();
if (outputFormulaValues) {
if (Double.isNaN(frec.getValue())) {
// Formula result is a string
// This is stored in the next record
outputNextStringRecord = true;
nextRow = frec.getRow();
nextColumn = frec.getColumn();
} else {
thisStr = formatListener.formatNumberDateCell(frec);
}
} else {
// thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
thisStr = HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression());
}
break;
case StringRecord.sid:
if (outputNextStringRecord) {
// String for formula
StringRecord srec = (StringRecord) record;
thisStr = srec.getString();
thisRow = nextRow;
thisColumn = nextColumn;
outputNextStringRecord = false;
}
break;
case LabelRecord.sid:
LabelRecord lrec = (LabelRecord) record;
thisRow = lrec.getRow();
thisColumn = lrec.getColumn();
// thisStr = '"' + lrec.getValue() + '"';
thisStr = lrec.getValue();
break;
case LabelSSTRecord.sid:
LabelSSTRecord lsrec = (LabelSSTRecord) record;
thisRow = lsrec.getRow();
thisColumn = lsrec.getColumn();
if (sstRecord == null) {
thisStr = '"' + "(No SST Record, can't identify string)" + '"';
} else if (sstRecord.getString(lsrec.getSSTIndex()).toString().contains("30/03/2018 - 16h34:18.440")) {
// thisStr = '"' + sstRecord.getString(lsrec.getSSTIndex()).toString() + '"';
thisStr = sstRecord.getString(lsrec.getSSTIndex()).toString();
//用-分割得到時間資料的年月日和小時,分鐘秒的資料
String[] splitData = thisStr.split("-");
//格式化年月日的資料
String[] date = splitData[0].split("/");
//格式化小時分鐘秒的資料
String[] time = splitData[1].split(":");
String[] hourAndMount = time[0].split("h");
String hours = hourAndMount[0] + ":" + hourAndMount[1] + ":" + time[1].split("\\.")[0] + "." + time[1].split("\\.")[1];
d = date[date.length - 1].trim() + "-" + date[1] + "-" + date[0];
deal_time = d + " " + hours;
thisStr = "";
} else {
//處理時間格式
thisStr = sstRecord.getString(lsrec.getSSTIndex()).toString();
// System.out.println("------->" + thisStr);
if (thisStr.contains("h")) {
String deal_get_time_split[] = thisStr.split(":");
String deal_hour_minute[] = deal_get_time_split[0].split("h");
String deal_hour_minute_ = deal_hour_minute[0] + ":" + deal_hour_minute[1];
String deal_seconds_millis[] = deal_get_time_split[1].split(",");
String deal_seonds_millis_ = deal_seconds_millis[0] + "." + deal_seconds_millis[1];
String deal_date = d + " " + deal_hour_minute_ + ":" + deal_seonds_millis_;
// thisStr = d + " " + deal_hour_minute_ + ":" + deal_seonds_millis_;
try {
long t = SystemConstants.stdMSsdf.parse(deal_date).getTime() + this.getTime_cha();
// System.out.println("時間差粗例=========> " + t);
String s = SystemConstants.stdMSsdf.format(new Date(t));
System.out.println("s===" + s);
thisStr = s;
} catch (ParseException e) {
e.printStackTrace();
}
} else if (thisStr.contains("Sign")) {
//處理訊號的訊號名。包括訊號產生時間和對應的值
sign += thisStr + "_time," + thisStr + "_value";
thisStr = sign;
} else {
thisStr = "";
}
}
break;
case NoteRecord.sid:
NoteRecord nrec = (NoteRecord) record;
thisRow = nrec.getRow();
thisColumn = nrec.getColumn();
thisStr = '"' + "(TODO)" + '"';
break;
case NumberRecord.sid:
NumberRecord numrec = (NumberRecord) record;
thisRow = numrec.getRow();
thisColumn = numrec.getColumn();
// Format
thisStr = formatListener.formatNumberDateCell(numrec);
break;
case RKRecord.sid:
RKRecord rkrec = (RKRecord) record;
thisRow = rkrec.getRow();
thisColumn = rkrec.getColumn();
thisStr = '"' + "(TODO)" + '"';
break;
default:
break;
}
// Handle new row
if (thisRow != -1 && thisRow != lastRowNumber) {
lastColumnNumber = -1;
}
// Handle missing column
if (record instanceof MissingCellDummyRecord) {
MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
thisRow = mc.getRow();
thisColumn = mc.getColumn();
thisStr = "";
}
// If we got something to print out, do so
if (thisStr != null) {
if (thisStr == "") {
} else {
// System.out.println("thisColumn :" + thisColumn + " thisStr: " + thisStr);
if (thisColumn > 0) {
// System.out.println(thisStr);
// if (thisStr.contains("0.0") || thisStr.equalsIgnoreCase("0") || thisStr.equalsIgnoreCase("100")||thisStr.equalsIgnoreCase("2")){
//控制那一列不被寫入到csv檔案中,具體以thisStr來決定
if ((thisColumn -1) % 4 == 0){
// System.out.println("us=====>" + thisStr );
thisStr = "";
} else {
output.print(',');
}
}
output.print(thisStr);
}
}
// Update column and row count
if (thisRow > -1)
lastRowNumber = thisRow;
if (thisColumn > -1)
lastColumnNumber = thisColumn;
// Handle end of row
if (record instanceof LastCellOfRowDummyRecord) {
if (((LastCellOfRowDummyRecord) record).getRow() > 7) {
// Print out any missing commas if needed
if (minColumns > 0) {
// Columns are 0 based
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
for (int i = lastColumnNumber; i < (minColumns); i++) {
output.print(',');
}
}
// We're onto a new row
lastColumnNumber = -1;
// End the row
output.println();
}
}
}
public static void main(String[] args) throws Exception {
long star = System.currentTimeMillis();
System.out.println("開始讀取的時間是======》:: " + star);
String inputPath2 = "/home/java/data/fault_database/20180330-第一次試驗-試驗階段.xls";
String outputPath2 = "/home/java/data/fault_database/data.csv";
// XlsToCsv.time_cha=2000;
XlsToCsv xls2csv = new XlsToCsv(inputPath2, outputPath2);
xls2csv.setTime_cha(2000);
xls2csv.process();
System.out.println(XlsToCsv.deal_time);
long end = System.currentTimeMillis();
System.out.println("總過用時==========>: " + (end - star));
}
}