1. 程式人生 > >hadoop javaapi讀取資料夾下的資料

hadoop javaapi讀取資料夾下的資料

導包:

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.net.URI;

程式碼:

只需要複製貼上

改改output和input就能用了,如果要讀取檔案的話,直接使用readHdfsFile()方法即可。

public class HdfsApp {

    private static String output="F:/Tags/jiemianshouye";
    private static String input="hdfs://192.168.163.120:8020/user/hive/external/jiemianhomepage/dt=2018-12-12";

    private static FileSystem getFileSystem(String direPath)throws Exception{
        Configuration configuration = new Configuration();
        FileSystem fileSystem = FileSystem.get(URI.create(direPath),configuration);
        return fileSystem;
    }

    private static  void readHdfsFile(String filePath){
        FSDataInputStream fsDataInputStream=null;
        try {
            Path path = new Path(filePath);
            fsDataInputStream = getFileSystem(filePath).open(path);
            OutputStream out = new FileOutputStream(output,true);
            IOUtils.copyBytes(fsDataInputStream,out,4096,false);
        }catch (Exception e){
            e.printStackTrace();
        }finally {
            if(fsDataInputStream != null){
                IOUtils.closeStream(fsDataInputStream);
            }
        }
    }
    public static void getDirectoryFromHdfs(String direPath) throws Exception {
        FileSystem fs = getFileSystem(direPath);
        FileStatus[] filelist = fs.listStatus(new Path(direPath));
        for (int i = 0; i < filelist.length; i++) {
            System.out.println("_________________第" + i + "個檔案" + "____________________");
            FileStatus fileStatus = filelist[i];
            System.out.println("Name:" + fileStatus.getPath().getName());
            System.out.println("Path:" + fileStatus.getPath());
            readHdfsFile(fileStatus.getPath().toString());
            System.out.println("size:" + fileStatus.getLen());
            System.out.println("_________________第" + i + "個檔案" + "____________________");
        }
        fs.close();
    }
    public static void main(String[] args) throws Exception{
        HdfsApp hdfsApp = new HdfsApp();
        //hdfsApp.readHdfsFile("/user/data/JMRecommend/data-20181205-1/news/vectorize");
        ///user/hive/external/jiemianhomepage/dt=2018-12-12
        getDirectoryFromHdfs(input);
//        String inPath="D://workSpace/src/main/resources/hdfs-site.xml";
//        String outPath="hdfs://ns/user/kfk/data/local.xml";
        //hdfsApp.writeHdfsFile(inPath,outPath);
    }
}

希望能幫到有需要的朋友