1. 程式人生 > >Hadoop HDFS Java API

Hadoop HDFS Java API

Hadoop HDFS Java

[toc]


Hadoop HDFS Java API

主要是Java操作HDFS的一些常用代碼,下面直接給出代碼:

package com.uplooking.bigdata.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;

/**
 * 列出目錄的內容:listStatus
 * 讀取文件:open
 * 創建目錄:mkdirs
 * 創建文件:create
 * 刪除文件或目錄:delete
 * 顯示文件存儲位置:getFileBlockLocations
 */
public class HDFSTest {
    private FileSystem fs;
    private DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm");

    /**
     * 初始化資源
     * @throws Exception
     */
    @Before
    public void setUp() throws Exception {
        URI uri = new URI("hdfs://uplooking01:9000");
        Configuration configuration = new Configuration();
        fs = FileSystem.get(uri, configuration);
    }

    /**
     * 列出目錄的內容:listStatus
     * 模仿:
     * $ hdfs dfs -ls /
     * -rw-r--r--   1 uplooking supergroup         28 2018-02-28 12:29 /hello
     * drwxr-xr-x   - uplooking supergroup          0 2018-02-28 12:31 /output
     * drwx------   - uplooking supergroup          0 2018-02-28 12:31 /tmp
     *
     * @throws IOException
     */
    @Test
    public void testList() throws IOException {
        FileStatus[] fileStatuses = fs.listStatus(new Path("/"));
        for (FileStatus fileStatus : fileStatuses) {
            // 先定義好需要判斷才能確定的項
            String prefix = "d";
            String repliaction = "-";

            // 獲取文件類型
            if (fileStatus.isFile()) {
                prefix = "-";
            }

            // 獲取權限列表
            FsPermission permission = fileStatus.getPermission();
            String uacl = permission.getUserAction().SYMBOL;
            String gacl = permission.getGroupAction().SYMBOL;
            String oacl = permission.getOtherAction().SYMBOL;
            String acl = uacl + gacl + oacl;

            // 獲取復制因子數
            if (fileStatus.isFile()) {
                repliaction = fileStatus.getReplication() + "";
            }

            // 獲取文件屬主
            String owner = fileStatus.getOwner();
            // 獲取文件屬組
            String group = fileStatus.getGroup();

            // 獲取文件大小
            long len = fileStatus.getLen();
            // 獲取文件修改時間
            String mTime = df.format(new Date(fileStatus.getModificationTime()));
            // 獲取文件路徑
            Path path = fileStatus.getPath();

            // 格式化輸出
            System.out.println(prefix + acl + "\t" + repliaction + "\t" + owner + "  " + group + "\t" + mTime + "\t" + path);
        }
    }

    /**
     * 讀取文件:open
     *
     * @throws IOException
     */
    @Test
    public void testOpen() throws IOException {
        FSDataInputStream fis = fs.open(new Path("hdfs://uplooking01:9000/hello"));
        // 方式1:
       /* byte[] bytes = new byte[1024];
        int len = 0;
        while ((len = fis.read(bytes)) != -1) {
            System.out.println(new String(bytes, 0, len));
        }
        fis.close();*/

        // 方式2:
        /*BufferedReader br = new BufferedReader(new InputStreamReader(fis));
        String line = null;
        while ((line = br.readLine()) != null) {
            System.out.println(line);
        }
        fis.close();*/

        // 方式3:
        IOUtils.copyBytes(fis, System.out, 1024, false);
    }

    /**
     * 創建目錄:mkdirs
     *
     * @throws IOException
     */
    @Test
    public void testMkdir() throws IOException {
        boolean ret = fs.mkdirs(new Path("/input/hdfs"));
        System.out.println(ret ? "創建目錄成功" : "創建目錄失敗");
    }

    /**
     * 創建文件:create
     *
     * @throws IOException
     */
    @Test
    public void testCreate() throws IOException {
        // 第二個參數為是否覆蓋,Files are overwritten by default
        FSDataOutputStream fos = fs.create(new Path("/input/hdfs/word.txt"), false);
        fos.write("hello\n".getBytes());
        fos.write("xpleaf\n".getBytes());
        fos.close();
    }

    /**
     * 刪除文件或目錄:delete
     *
     * @throws IOException
     */
    @Test
    public void testDelete() throws IOException {
        // 第二個參數為是否遞歸刪除(當刪除目錄時)
        boolean ret = fs.delete(new Path("/input/hdfs/word.txt"), false);
        System.out.println(ret ? "刪除成功" : "刪除失敗");
    }

    /**
     * 顯示文件存儲位置:getFileBlockLocations
     *
     * @throws IOException
     */
    @Test
    public void testLocations() throws IOException {
        Path path = new Path("/hadoop-2.6.4.tar.gz");
        FileStatus fileStatus = fs.getFileStatus(path);
        // 參數分別為:文件路徑   偏移起始位置  文件長度
        BlockLocation[] locations = fs.getFileBlockLocations(path, 0, fileStatus.getLen());
        System.out.println(locations);
        for (BlockLocation location : locations) {
            System.out.println(location);
        }
        /**
         * 0,134217728,uplooking01          (偏移量從0開始,大小為128MB的塊存儲在節點uplooking01上)
           134217728,61798247,uplooking01   (偏移量從128M開始,大小為59M的塊(就是剩余大小)存儲在節點uplooking01上)
           可以看到,兩個塊都只存在uplooking01上的,這是因為這裏的hadoop環境是偽分布式的
         */
    }

    /**
     * 釋放資源
     * @throws IOException
     */
    @After
    public void cleanUp() throws IOException {
        fs.close();
    }
}

Hadoop HDFS Java API