Hadoop HDFS Java API
阿新 • • 發佈:2018-02-28
Hadoop HDFS Java [toc]
Hadoop HDFS Java API
主要是Java操作HDFS的一些常用代碼,下面直接給出代碼:
package com.uplooking.bigdata.hdfs; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.io.IOUtils; import org.junit.After; import org.junit.Before; import org.junit.Test; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.URI; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Date; /** * 列出目錄的內容:listStatus * 讀取文件:open * 創建目錄:mkdirs * 創建文件:create * 刪除文件或目錄:delete * 顯示文件存儲位置:getFileBlockLocations */ public class HDFSTest { private FileSystem fs; private DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm"); /** * 初始化資源 * @throws Exception */ @Before public void setUp() throws Exception { URI uri = new URI("hdfs://uplooking01:9000"); Configuration configuration = new Configuration(); fs = FileSystem.get(uri, configuration); } /** * 列出目錄的內容:listStatus * 模仿: * $ hdfs dfs -ls / * -rw-r--r-- 1 uplooking supergroup 28 2018-02-28 12:29 /hello * drwxr-xr-x - uplooking supergroup 0 2018-02-28 12:31 /output * drwx------ - uplooking supergroup 0 2018-02-28 12:31 /tmp * * @throws IOException */ @Test public void testList() throws IOException { FileStatus[] fileStatuses = fs.listStatus(new Path("/")); for (FileStatus fileStatus : fileStatuses) { // 先定義好需要判斷才能確定的項 String prefix = "d"; String repliaction = "-"; // 獲取文件類型 if (fileStatus.isFile()) { prefix = "-"; } // 獲取權限列表 FsPermission permission = fileStatus.getPermission(); String uacl = permission.getUserAction().SYMBOL; String gacl = permission.getGroupAction().SYMBOL; String oacl = permission.getOtherAction().SYMBOL; String acl = uacl + gacl + oacl; // 獲取復制因子數 if (fileStatus.isFile()) { repliaction = fileStatus.getReplication() + ""; } // 獲取文件屬主 String owner = fileStatus.getOwner(); // 獲取文件屬組 String group = fileStatus.getGroup(); // 獲取文件大小 long len = fileStatus.getLen(); // 獲取文件修改時間 String mTime = df.format(new Date(fileStatus.getModificationTime())); // 獲取文件路徑 Path path = fileStatus.getPath(); // 格式化輸出 System.out.println(prefix + acl + "\t" + repliaction + "\t" + owner + " " + group + "\t" + mTime + "\t" + path); } } /** * 讀取文件:open * * @throws IOException */ @Test public void testOpen() throws IOException { FSDataInputStream fis = fs.open(new Path("hdfs://uplooking01:9000/hello")); // 方式1: /* byte[] bytes = new byte[1024]; int len = 0; while ((len = fis.read(bytes)) != -1) { System.out.println(new String(bytes, 0, len)); } fis.close();*/ // 方式2: /*BufferedReader br = new BufferedReader(new InputStreamReader(fis)); String line = null; while ((line = br.readLine()) != null) { System.out.println(line); } fis.close();*/ // 方式3: IOUtils.copyBytes(fis, System.out, 1024, false); } /** * 創建目錄:mkdirs * * @throws IOException */ @Test public void testMkdir() throws IOException { boolean ret = fs.mkdirs(new Path("/input/hdfs")); System.out.println(ret ? "創建目錄成功" : "創建目錄失敗"); } /** * 創建文件:create * * @throws IOException */ @Test public void testCreate() throws IOException { // 第二個參數為是否覆蓋,Files are overwritten by default FSDataOutputStream fos = fs.create(new Path("/input/hdfs/word.txt"), false); fos.write("hello\n".getBytes()); fos.write("xpleaf\n".getBytes()); fos.close(); } /** * 刪除文件或目錄:delete * * @throws IOException */ @Test public void testDelete() throws IOException { // 第二個參數為是否遞歸刪除(當刪除目錄時) boolean ret = fs.delete(new Path("/input/hdfs/word.txt"), false); System.out.println(ret ? "刪除成功" : "刪除失敗"); } /** * 顯示文件存儲位置:getFileBlockLocations * * @throws IOException */ @Test public void testLocations() throws IOException { Path path = new Path("/hadoop-2.6.4.tar.gz"); FileStatus fileStatus = fs.getFileStatus(path); // 參數分別為:文件路徑 偏移起始位置 文件長度 BlockLocation[] locations = fs.getFileBlockLocations(path, 0, fileStatus.getLen()); System.out.println(locations); for (BlockLocation location : locations) { System.out.println(location); } /** * 0,134217728,uplooking01 (偏移量從0開始,大小為128MB的塊存儲在節點uplooking01上) 134217728,61798247,uplooking01 (偏移量從128M開始,大小為59M的塊(就是剩余大小)存儲在節點uplooking01上) 可以看到,兩個塊都只存在uplooking01上的,這是因為這裏的hadoop環境是偽分布式的 */ } /** * 釋放資源 * @throws IOException */ @After public void cleanUp() throws IOException { fs.close(); } }
Hadoop HDFS Java API