1. 程式人生 > >hive常用sql整理

hive常用sql整理

Hive常用的sql整理,方便快速查詢使用

1.建立Hive表

-- 建立ORC格式分割槽表
CREATE TABLE if not exists edw_applications.dws_test_table (
  cid                    string, 
  event_code             int,
  event_date             string,
  house_id               bigint, 
  house_project_id       int, 
  event_interval         int, 
  event_weight_score     double,
  interval_decay_factor  double,
  event_score            double,
  event_times            bigint,
  load_job_number        string, 
  load_job_name          string, 
  insert_timestamp       timestamp, 
) partitioned by (dt string)
  row format delimited
  fields terminated by '\001' 
  stored as ORC;

-- 建立複製表結構
create table edw_applications.dws_test_table_002 like edw_applications.dws_test_table;

-- 刪除表
drop table if exists edw_applications.dws_test_table;

2.資料表匯入匯出

-- 將表資料匯入到本地檔案  
insert overwrite local directory '/data/hadoop/test/dws_test_table' row format delimited fields terminated by '\001'   
select * from edw_applications.dws_test_table;

-- 將hdfs上的檔案匯入hive表  
load data inpath '/src/dws_test_table/*' into table dws_test_table;            -- hdfs路徑,移動檔案  

-- 將本地檔案匯入hive表 
load data local inpath '/home/xubc/dws_test_table/*' into table dws_test_table;  -- 本地路徑

3.分割槽操作

-- 新增分割槽
alter table edw_applications.dws_test_table add if not exists partition(dt = '${dt}');

-- 刪除分割槽
alter table edw_applications.dws_test_table drop if exists partition(dt = '${dt}');

-- 清空分割槽資料
truncate table edw_applications.dws_test_table partition(dt = '${dt}');

-- 插入資料
insert overwrite table edw_applications.dws_test_table partition(dt = '${dt}') 
  select * from edw_applications.dws_test_table_001;         -- 覆蓋分割槽

insert into edw_applications.dws_test_table partition(dt = '${dt}') 
  select * from edw_applications.dws_test_table_001;         -- 追加插入

4.新增udf函式

add jar /home/xubc/hive-contrib-1.2.0.jar;       -- 本地jar
add jar hdfs://localhost:8010/user/data_user/hive-contrib-1.2.0.jar;    -- hdfs上檔案jar

create temporary function row_sequence as 'org.apache.hadoop.hive.contrib.udf.UDFRowSequence';

create table edw_applications.tmp_dws_test_table_20161218_local as
select row_sequence() as id, t.* from edw_applications.dws_test_table t where dt= '20161218';

5. insert插入多條資料

-- 採用union all方式插入中文資料
 insert into ic_edw_applications.ic_dim_edw_tag_init (tag_type,tag_name,data_source)
   select 'room_tag', '1房',       'manual import'   union all
   select 'room_tag', '2房',       'manual import'   union all
   select 'room_tag', '3房',       'manual import'   union all
   select 'room_tag', '4房',       'manual import'   union all
   select 'room_tag', '5房',       'manual import'   union all
   select 'room_tag', '6房',       'manual import' ;

-- insert values方式插入非中文不易出現亂碼
  insert into ic_edw_applications.ic_dim_edw_tag_init (tag_type,tag_name,data_source)
   values
   ('room_tag', '1房',       'manual import'), 
   ('room_tag', '2房',       'manual import'),
   ('room_tag', '3房',       'manual import'), 
   ('room_tag', '4房',       'manual import'),
   ('room_tag', '5房',       'manual import'),
   ('room_tag', '6房',       'manual import') ;

insert overwrite table up.dim_event_code
SELECT a.*
FROM
  (SELECT STACK( 4, 
                 1, '瀏覽', 10001, '詳情_PV',       '文章瀏覽', '', 0.1, 4, 1, current_timestamp,
                 1, '瀏覽', 10002, '詳情_下方點贊', '文章點贊', '', 0.8, 4, 1, current_timestamp,
                 1, '瀏覽', 10003, '詳情_分享成功', '文章分享', '', 1.0, 4, 1, current_timestamp,
                 1, '瀏覽', 10004, 'H5分享按鈕',       '文章分享', '', 1.0, 4, 1, current_timestamp 
                 )
) a;