1. 程式人生 > >java 從zip包中抽取檔案

java 從zip包中抽取檔案

專案需求

  • 從多個資料夾中讀取zip包
  • 解壓zip包
  • 讀取zip包中的資料
  • 把讀取的資料寫入到txt檔案中

Util.java

import java.io.*;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.*;


public class Util {
    public List<String> getFileName(String path){//path為絕對路徑
        List<String> zipFileName = new
ArrayList<String>(); File file = new File(path); File[] files = file.listFiles(); if(files.length == 0){ System.out.println("This package is empty"); }else{ for(File file2:files){ if(file2.isDirectory()){ String name_1 = file2.getName(); File[] zipfile = file2.listFiles(); for
(File zip:zipfile){ zipFileName.add(name_1+File.separator+zip.getName().replaceAll(".zip","")); } } } } return zipFileName; } public List<String> getZipWithAnt(String path) throws Exception { List<String> innerJsonFile = new
ArrayList<String>(); String[] strings = new String[0]; FileInputStream fis = null; try { fis = new FileInputStream(path); } catch (FileNotFoundException e) { e.printStackTrace(); } CheckedInputStream check = new CheckedInputStream(fis,new Adler32()); Charset gbk = Charset.forName("gbk"); ZipInputStream zis = new ZipInputStream(check,gbk); ZipEntry ze = zis.getNextEntry(); BufferedReader br = new BufferedReader(new InputStreamReader(zis)); char[] chars = new char[1024*1024]; while(ze != null){ if(ze.toString().endsWith(".txt")){ String line; while((line = br.readLine())!= null){ line = line.substring(1,line.length()-1); strings = line.split(",\\{"); for(int i = 1;i < strings.length;i++){ strings[i] = "{" + strings[i]; } break; } } ze = zis.getNextEntry(); } br.close(); zis.close(); check.close(); fis.close(); for(String str:strings){ innerJsonFile.add(str); } System.out.println(path); return innerJsonFile; } public List<String> getZipInnerFile(String path) {//path格式為xxx.zip List<String> innerJsonFile = new ArrayList<String>(); ZipFile zf = null; try { zf = new ZipFile(path); } catch (IOException e) { System.out.println(e); e.printStackTrace(); } try { InputStream in = null; in = new BufferedInputStream(new FileInputStream(path)); Charset gbk = Charset.forName("GBK"); ZipInputStream zin = new ZipInputStream(in,gbk); ZipEntry ze = new ZipEntry(path); while((ze = zin.getNextEntry()) != null){ if(ze.toString().endsWith("txt")){ BufferedReader br = new BufferedReader( new InputStreamReader(zf.getInputStream(ze))); String line; while((line = br.readLine()) != null){ line = line.substring(1,line.length()-1); String[] strings = line.split(",\\{"); for(int i = 1;i < strings.length;i++){ strings[i] = "{" + strings[i]; } for(String str:strings){ innerJsonFile.add(str); } line = br.readLine(); } br.close(); } else if(ze.toString().startsWith("attach")){ break; } } zin.closeEntry(); return innerJsonFile; } catch (IOException e) { e.printStackTrace(); }finally { System.out.println(path); } return null; } public void writeToTxt(String json,String path) throws IOException { BufferedWriter out = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(path,true))); out.write(json+"\r\n"); out.close(); } }

Collect.java

import java.io.File;
import java.io.IOException;
import java.util.List;

public class Collect {
    public static void main(String[] args){
        String path = "/data/app/sh/notes";
        Util util = new Util();
        List<String> zipfilename = util.getFileName(path);
        String name = "part-r-0000";//儲存json的檔名
        int count = 0,n = 0;
        //path+zipfilename+".zip"   即為絕對路徑
        //讀取zip包內檔案
        for(String file:zipfilename){
            String fullFileName = path + File.separator+ file + ".zip";

             List<String> innerJsonFile = null;
            try {
                innerJsonFile = util.getZipWithAnt(fullFileName);//json條
            } catch (Exception ex) {
                continue;
            }
            for(String json:innerJsonFile){
                if(count < 10000 ){
                    try {
                        util.writeToTxt(file+"\t"+json,path+File.separator+"json"+File.separator+name+String.valueOf(n)+".txt");
                    } catch (IOException e) {
                        e.printStackTrace();
                    }finally {
                        count++;
                    }
                }
                else{
                    count = 0;
                    n++;
                    try {
                        util.writeToTxt(file+"\t"+json,path+File.separator+"json"+File.separator+name+String.valueOf(n)+".txt");
                    } catch (IOException e) {
                        e.printStackTrace();
                    }finally {
                        count++;
                    }
                }
            }
        }
    }
}
本次實驗遇到的問題:不可在main函式上加異常,否則為系統級異常,丟擲異常後程序自動停止。
注:中間遇到的其他問題在本人其他部落格已寫明。