1. 程式人生 > >通過MapReduce把Hive表資料匯入到HBase

通過MapReduce把Hive表資料匯入到HBase

由於Hive查詢速度比較慢,進行了表分割槽使用Impala也是很滿意,所以為了公司業務展示,需要測試使用HBase的查詢速度怎麼樣,頭一件事就是把HIVE的資料匯入到HBase中,搜了半天也沒搜到到底該怎麼搞,也有說能用Sqoop的,可是沒找到資料,只好自己用MapReduce實現。

話不多說,邏輯很簡單,只是用了Map,直接上程式碼。

public class Hive2HBase {

    /**
     * Mapper
     */
    static class ImportMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable
, Put> {
@Override public void map(LongWritable offset, Text value, Context context) { String[] splited = value.toString().split("\t"); if (splited.length != 4) return; try { byte[] rowkey = Bytes.toBytes(splited[0
]);// id作為rowkey Put put = new Put(rowkey); // 為了省事直接列名為log1...log4 for (int j = 0; j < splited.length; j++) { put.addColumn(Bytes.toBytes(HConfiguration.colFamily), Bytes.toBytes("log" + j), Bytes.toBytes(splited[j])); } context.write(new
ImmutableBytesWritable(rowkey), put); } catch (NumberFormatException e) { System.out.println("出錯了" + e.getMessage()); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } } /** * Main * * @param args * @throws Exception */ public static void main(String[] args) throws Exception { Configuration configuration = new Configuration(); // 設定zookeeper configuration.set("hbase.zookeeper.quorum", HConfiguration.hbase_zookeeper_quorum); configuration.set("hbase.zookeeper.property.clientPort", "2181"); // 設定hbase表名稱 configuration.set(TableOutputFormat.OUTPUT_TABLE, HConfiguration.tableName); // 將該值改大,防止hbase超時退出 configuration.set("dfs.client.socket-timeout", "180000"); MRDriver myDriver = MRDriver.getInstance(); try { //建立表 myDriver.createTableIfExistDelete(HConfiguration.tableName, HConfiguration.colFamily); } catch (Exception e) { e.printStackTrace(); } Job job = new Job(configuration, "HBaseBatchImport"); job.setJarByClass(Hive2HBase.class); job.setMapperClass(ImportMapper.class); // 設定map的輸出,不設定reduce的輸出型別 job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Writeable.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); // 不再設定輸出路徑,而是設定輸出格式型別 job.setOutputFormatClass(TableOutputFormat.class); // hive表路徑 FileInputFormat.setInputPaths(job, "hdfs://172.*.*.2:8022/user/hive/warehouse/sample_07"); job.waitForCompletion(true); } }