Hadoop學習筆記一(通過Java API 操作HDFS,檔案上傳、下載)
package demo.hdfs;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
public class TestDemo {
@Test
public void test1() throws Exception{
//獲取HDFS某個目錄的資訊
//配置NameNode: HDFS的主節點
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.146.111:9000");
//通過傳入的配置引數得到HDFS的客戶端
FileSystem fs = FileSystem.get(conf);
//得到/tools目錄的資訊
FileStatus[] list= fs.listStatus(new Path("/tools"));
//獲取檔案的屬性
for(FileStatus status:list) {
System.out.println(status.isDirectory()?"目錄":"檔案");
System.out.println(status.getAccessTime());
}
}
@Test
public void test2() throws Exception{
//獲取某個資料塊的資訊
///配置NameNode: HDFS的主節點
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.146.111:9000");
//通過傳入的配置引數得到HDFS的客戶端
FileSystem fs = FileSystem.get(conf);
//獲取/root/hadoop-2.7.3.tar.gz的資料塊的資訊
//得到該檔案的狀態資訊
FileStatus fileStatus = fs.getFileStatus(new Path("/tools/hadoop-2.7.3.tar.gz"));
//然後獲取資料塊的資訊
BlockLocation[] list = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
//System.out.println(list);
for(BlockLocation bl:list) {
//System.out.println(bl);
//獲取資料塊的主機資訊
System.out.println(Arrays.toString(bl.getHosts()));
//獲取資料塊的名稱
System.out.println(Arrays.toString(bl.getNames()));
}
}
}
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
首先分析HDFS檔案上傳的過程
1、建立與Namenode 的通訊,配置Namenode 的資訊
2、得到HDFS的客戶端,傳入Namenode的配置資訊
3、開啟一個輸入流,建立一個輸出流
4、使用Utils工具IOUtils.copyBytes(in, out, 1024);
package demo.hdfs;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.Test;
public class TestUpload {
@Test
public void testUpload1() throws Exception{
//配置NameNode: HDFS的主節點
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.146.111:9000");
//通過傳入的配置引數得到HDFS的客戶端
FileSystem fs = FileSystem.get(conf);
//開啟一個輸入流:位元組流
InputStream in = new FileInputStream("f:\\temp\\hadoop-2.7.3.tar.gz");
//建立一個輸出流 -------> HDFS
OutputStream out = fs.create(new Path("/tools/a.tar.gz"));
//建立一個緩衝區
byte[] buffer = new byte[1024];
//資料長度
int len = 0;
while ((len=in.read(buffer))>0) {
//讀入了資料,寫到輸出流
out.write(buffer, 0, len);
}
out.flush();
//關閉流
in.close();
out.close();
}
@Test
public void testUpload2() throws Exception{
//配置NameNode: HDFS的主節點
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.146.111:9000");
//通過傳入的配置引數得到HDFS的客戶端
FileSystem fs = FileSystem.get(conf);
//開啟一個輸入流:位元組流
InputStream in = new FileInputStream("f:\\temp\\hadoop-2.7.3.tar.gz");
//建立一個輸出流 -------> HDFS
OutputStream out = fs.create(new Path("/tools/b.tar.gz"));
//使用工具類
IOUtils.copyBytes(in, out, 1024);
}
}
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
package demo.hdfs;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.Test;
public class TestDownload {
@Test
public void testUpload1() throws Exception{
//配置NameNode: HDFS的主節點
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.146.111:9000");
//通過傳入的配置引數得到HDFS的客戶端
FileSystem fs = FileSystem.get(conf);
//開啟一個輸入流,從/tools/a.tar.gz讀入資料
InputStream input = fs.open(new Path("/tools/a.tar.gz"));
//建立一個輸出流,f:\\temp
OutputStream output = new FileOutputStream("f:\\temp\\xyz.tar.gz");
//建立一個緩衝區
byte[] buffer = new byte[1024];
//資料長度
int len = 0;
while ((len=input.read(buffer))>0) {
//讀入了資料,寫到輸出流
output.write(buffer, 0, len);
}
output.flush();
//關閉流
input.close();
output.close();
}
@Test
public void testUpload2() throws Exception{
//配置NameNode: HDFS的主節點
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.146.111:9000");
//通過傳入的配置引數得到HDFS的客戶端
FileSystem fs = FileSystem.get(conf);
//開啟一個輸入流,從/tools/a.tar.gz讀入資料
InputStream input = fs.open(new Path("/tools/a.tar.gz"));
//建立一個輸出流,f:\\temp
OutputStream output = new FileOutputStream("f:\\temp\\mmmm.tar.gz");
//使用工具類
IOUtils.copyBytes(input, output, 1024);
}
}