1. 程式人生 > >大資料(HBase-程式設計java api)

大資料(HBase-程式設計java api)

開發環境搭建步驟

1:解壓下載下來的hbase的安裝包

2:配置windows的hosts檔案,地址:C:\Windows\System32\drivers\etc
    配置虛擬機器的ip對應的主機名
        比如:
    192.168.153.115 hm02
    192.168.153.116 hs0201
    192.168.153.117 hs0202

3:下載hbase叢集中的hbase-site.xml檔案
    通過遠端工具的ftp功能來進行下載

4:使用開發工具建立hbase客戶端工程

    1)將hbase-site.xml以及log4j.properties檔案放入工程path
    2)將解壓後的hbase的lib目錄下的jar包匯入工程
    3)寫一個測試程式並執行,在執行之前確保我們的hbase的叢集已經啟動

HbaseJavaTest.java

package com.hbase;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;

public class HbaseJavaTest {
	/*封裝公用模組*/
	private static Configuration conf = null ;
	private static Connection conn = null ;
	static {
		conf = HBaseConfiguration.create() ;
		//配置hbase的zookeeper
		conf.set("hbase.zookeeper.quorum", "hmaster,hslave01,hslave02");
		//conn
		try {
			conn = ConnectionFactory.createConnection(conf) ;
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	//新增一條資料及批量
	public static void addOneData() throws Exception{
		//建立表物件
		Table ht = conn.getTable(TableName.valueOf("people")) ;
		
		//建立put物件
		Put put = new Put(Bytes.toBytes("p00001")) ;
		put.addColumn(Bytes.toBytes("cinfo"), Bytes.toBytes("name"), Bytes.toBytes("xiaoshi")) ;
		
//		List<Put> plist = new ArrayList()<Put>(10000) ;
//		for(int i = 0 ; i < 10000 ; i ++){
//			Put put = new Put(Bytes.toBytes("Ande0000"+i)) ;
//			put.addColumn(Bytes.toBytes("cinfo"), Bytes.toBytes("name"), Bytes.toBytes("xiaoshi" + i)) ;
//			plist.add(put) ;
//		}
		
		//新增put到表物件
		ht.put(put);
		//關閉資源
		ht.close(); 
		System.out.println("===========資料插入成功===========");
		
		
	}
	
	//獲得表描述方法
	public static void getTableDesc(String tableName) throws Exception{
		Table ht = conn.getTable(TableName.valueOf("people")) ;//HTable ht = new HTable(conf, "people") ;			//獲得表物件
		HTableDescriptor td = ht.getTableDescriptor();		//獲得表描述物件
		HColumnDescriptor[] hds = td.getColumnFamilies();	//獲得列描述物件陣列
		for(HColumnDescriptor hd : hds){
			String name = hd.getNameAsString();				//列族名
			int bs = hd.getBlocksize() ;
			int minVers = hd.getMinVersions() ;
			int maxVers = hd.getMaxVersions() ;
			int defVers = HColumnDescriptor.DEFAULT_VERSIONS ;
			System.out.println("name : " + name + 
					" blocksize : " + bs +
					" minVers : " + minVers + 
					" maxVers : " + maxVers + " defVers : " + defVers);
			
		}
		//釋放資源
		ht.close(); 
	}
	
	//掃描表的所有資料
	public static void scanTable(String tableName) throws Exception{
		Table ht = conn.getTable(TableName.valueOf("people")) ;
		
		Scan scan = new Scan() ;
		//ResultScanner是客戶端獲取值的介面
		ResultScanner scanner = ht.getScanner(scan);
		
		//每行的資料就是Result,儲存GET獲得SCAN操作後獲得單行的值
		for(Result res : scanner){
			for(Cell cell : res.listCells()){
				System.out.println("================================================");
				System.out.println("行鍵:rowkey ===========" + Bytes.toString(res.getRow()));
				System.out.println("列族:columnFam ========" + Bytes.toString(CellUtil.cloneFamily(cell)));
				System.out.println("列:column ============" + Bytes.toString(CellUtil.cloneQualifier(cell)));
				System.out.println("時間戳:timestamp =======" + cell.getTimestamp());
				System.out.println("值:value ==============" + Bytes.toString(CellUtil.cloneValue(cell)));
			}
		}
		//釋放資源
		ht.close();
	}
	
	//獲得多行的scan資料
	public static void scanForRange() throws Exception{
		Table ht = conn.getTable(TableName.valueOf("people")) ;
		Scan scan = new Scan(Bytes.toBytes("Ande0000500"), Bytes.toBytes("Ande0000600")) ;
		ResultScanner scanner = ht.getScanner(scan);
		for(Result rs : scanner){
			//獲得某個列的值
			String res = Bytes.toString(rs.getValue(Bytes.toBytes("cinfo"), Bytes.toBytes("name"))) ;
			System.out.println(res);
		}
		ht.close();
	}
	
	//獲得單行的資料
	public static void getForRowKey(String rowkey) throws Exception{
		Table ht = conn.getTable(TableName.valueOf("people")) ;			//獲得表物件
		Get get = new Get(Bytes.toBytes(rowkey)) ;
		
		Result result = ht.get(get);
		if( result == null || result.size() == 0){
			System.out.println("沒有這個rowkey");
			ht.close();
			return ;
		}
		for(Cell cell : result.listCells()){
			System.out.println("================================================");
			System.out.println("行鍵:rowkey ===========" + Bytes.toString(result.getRow()));
			System.out.println("列族:columnFam ========" + Bytes.toString(CellUtil.cloneFamily(cell)));
			System.out.println("列:column ============" + Bytes.toString(CellUtil.cloneQualifier(cell)));
			System.out.println("時間戳:timestamp =======" + cell.getTimestamp());
			System.out.println("值:value ==============" + Bytes.toString(CellUtil.cloneValue(cell)));
		}
		ht.close(); 
	}
	
	//刪除資料
	public static void deleteRow(String rowkey) throws Exception{
		Table ht = conn.getTable(TableName.valueOf("people")) ;			//獲得表物件
		Delete delete = new Delete(Bytes.toBytes(rowkey)) ;
		ht.delete(delete);
		ht.close();
	}
		
	//修改表,新增列族
	public static void alterTableAddCls() throws Exception {
		//建立資料庫管理員
		Admin admin = conn.getAdmin() ;
		admin.disableTable(TableName.valueOf("people"));
		HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes("age")) ;
		
		//1:獲得表描述物件進行修改
		HTableDescriptor td = admin.getTableDescriptor(TableName.valueOf("people"));
		td.addFamily(hcd);
		
		//通過admin來進行實際的修改
		admin.modifyTable(TableName.valueOf(("people")), td);
		admin.enableTable(TableName.valueOf("people"));
		
		admin.close();
		System.out.println("====新增列族成功====");
	}
	
	//刪除該表
	public static void deleteTable() throws Exception{
		Admin admin = conn.getAdmin() ;
		if(admin.tableExists(TableName.valueOf("people"))){
			admin.disableTable(TableName.valueOf("people"));
			admin.deleteTable(TableName.valueOf("people"));
			System.out.println("刪除表成功");
		}
		admin.close();
	}
		
	public static void main(String[] args) throws Exception {
		//獲得表描述資訊
//		getTableDesc("people") ;
		//插入單條資料
//		addOneData() ;
		//掃描資料
//		scanTable("people") ;
		//掃描多行
//		scanForRange() ;
		//獲得單行資料
//		getForRowKey("Ande0000500") ;
		//新增列族
//		alterTableAddCls() ;
		
		conn.close();
	}
}

HbaseTest.java

package com.dongnao;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;

public class HbaseTest {

	public static void main(String[] args) throws Exception {
		/*
		 * 我們一般通過來獲取Configuration,設定一些引數
		 * 比如zk的地址,埠等
		 * */
		Configuration conf = HBaseConfiguration.create() ;
		conf.set("hbase.zookeeper.quorum", "hm02,hs0201,hs0202");
		
		/*Connection用來獲取和hbase的連結*/
		Connection conn = ConnectionFactory.createConnection(conf) ;
		
		/*
		 * Admin這個類主要用來建立表,刪除表,啟用禁用表等操作的介面類
		 * 過期的類叫HBaseAdmin
		 * */
		Admin admin = conn.getAdmin() ;
		
		/*
		 * 
		 * HTableDescriptor 表描述資訊的介面類
		 * TableName 		描述表名稱的介面類,把字串(表名)變成hbase所認識的
		 * HColumnDescriptor 列族的描述資訊類,比如版本,壓縮方式等等
		 * Put				新增資料的時候需要用到,可以批量新增也可以單條新增
		 * 					若是批量新增,需要建立一個list,將put物件放入
		 * */
		HTableDescriptor table = new HTableDescriptor(TableName.valueOf("people")) ;
		HColumnDescriptor cf = new HColumnDescriptor("cinfo") ;
		cf.setMaxVersions(3) ;
		//新增列族
		table.addFamily(cf) ;
		
		//建立表
		if(!admin.tableExists(TableName.valueOf("people"))){
			admin.createTable(table);
		}else{
			admin.disableTable(TableName.valueOf("people"));
			admin.deleteTable(TableName.valueOf("people"));
			System.out.println("該表已經存在,刪除成功!");
		}
		//釋放資源
		admin.close();
		System.out.println("===表建立成功===");
	}

}

hbase-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-->
<configuration>
	<property>
		<name>hbase.rootdir</name>
		<value>hdfs://hm02:9000/hbase</value>
	</property>
	<property>
		<name>hbase.cluster.distributed</name>
		<value>true</value>
	</property>
	<property>
		<name>hbase.zookeeper.quorum</name>
		<value>hm02,hs0201,hs0202</value>
	</property>
</configuration>

hosts

# Copyright (c) 1993-2009 Microsoft Corp.
#
# This is a sample HOSTS file used by Microsoft TCP/IP for Windows.
#
# This file contains the mappings of IP addresses to host names. Each
# entry should be kept on an individual line. The IP address should
# be placed in the first column followed by the corresponding host name.
# The IP address and the host name should be separated by at least one
# space.
#
# Additionally, comments (such as these) may be inserted on individual
# lines or following the machine name denoted by a '#' symbol.
#
# For example:
#
#      102.54.94.97     rhino.acme.com          # source server
#       38.25.63.10     x.acme.com              # x client host

# localhost name resolution is handled within DNS itself.
#	127.0.0.1       localhost
#	::1             localhost

192.168.204.1 windows10.microdone.cn
192.168.153.112 hm
192.168.153.113 hs01
192.168.153.114 hs02

192.168.153.115 hm02
192.168.153.116 hs0201
192.168.153.117 hs0202