hive常用sql整理
阿新 • • 發佈:2019-01-26
Hive常用的sql整理,方便快速查詢使用
1.建立Hive表
-- 建立ORC格式分割槽表 CREATE TABLE if not exists edw_applications.dws_test_table ( cid string, event_code int, event_date string, house_id bigint, house_project_id int, event_interval int, event_weight_score double, interval_decay_factor double, event_score double, event_times bigint, load_job_number string, load_job_name string, insert_timestamp timestamp, ) partitioned by (dt string) row format delimited fields terminated by '\001' stored as ORC; -- 建立複製表結構 create table edw_applications.dws_test_table_002 like edw_applications.dws_test_table; -- 刪除表 drop table if exists edw_applications.dws_test_table;
2.資料表匯入匯出
-- 將表資料匯入到本地檔案 insert overwrite local directory '/data/hadoop/test/dws_test_table' row format delimited fields terminated by '\001' select * from edw_applications.dws_test_table; -- 將hdfs上的檔案匯入hive表 load data inpath '/src/dws_test_table/*' into table dws_test_table; -- hdfs路徑,移動檔案 -- 將本地檔案匯入hive表 load data local inpath '/home/xubc/dws_test_table/*' into table dws_test_table; -- 本地路徑
3.分割槽操作
-- 新增分割槽 alter table edw_applications.dws_test_table add if not exists partition(dt = '${dt}'); -- 刪除分割槽 alter table edw_applications.dws_test_table drop if exists partition(dt = '${dt}'); -- 清空分割槽資料 truncate table edw_applications.dws_test_table partition(dt = '${dt}'); -- 插入資料 insert overwrite table edw_applications.dws_test_table partition(dt = '${dt}') select * from edw_applications.dws_test_table_001; -- 覆蓋分割槽 insert into edw_applications.dws_test_table partition(dt = '${dt}') select * from edw_applications.dws_test_table_001; -- 追加插入
4.新增udf函式
add jar /home/xubc/hive-contrib-1.2.0.jar; -- 本地jar
add jar hdfs://localhost:8010/user/data_user/hive-contrib-1.2.0.jar; -- hdfs上檔案jar
create temporary function row_sequence as 'org.apache.hadoop.hive.contrib.udf.UDFRowSequence';
create table edw_applications.tmp_dws_test_table_20161218_local as
select row_sequence() as id, t.* from edw_applications.dws_test_table t where dt= '20161218';
5. insert插入多條資料
-- 採用union all方式插入中文資料
insert into ic_edw_applications.ic_dim_edw_tag_init (tag_type,tag_name,data_source)
select 'room_tag', '1房', 'manual import' union all
select 'room_tag', '2房', 'manual import' union all
select 'room_tag', '3房', 'manual import' union all
select 'room_tag', '4房', 'manual import' union all
select 'room_tag', '5房', 'manual import' union all
select 'room_tag', '6房', 'manual import' ;
-- insert values方式插入非中文不易出現亂碼
insert into ic_edw_applications.ic_dim_edw_tag_init (tag_type,tag_name,data_source)
values
('room_tag', '1房', 'manual import'),
('room_tag', '2房', 'manual import'),
('room_tag', '3房', 'manual import'),
('room_tag', '4房', 'manual import'),
('room_tag', '5房', 'manual import'),
('room_tag', '6房', 'manual import') ;
insert overwrite table up.dim_event_code
SELECT a.*
FROM
(SELECT STACK( 4,
1, '瀏覽', 10001, '詳情_PV', '文章瀏覽', '', 0.1, 4, 1, current_timestamp,
1, '瀏覽', 10002, '詳情_下方點贊', '文章點贊', '', 0.8, 4, 1, current_timestamp,
1, '瀏覽', 10003, '詳情_分享成功', '文章分享', '', 1.0, 4, 1, current_timestamp,
1, '瀏覽', 10004, 'H5分享按鈕', '文章分享', '', 1.0, 4, 1, current_timestamp
)
) a;