1. 程式人生 > >Hive創建表及插入數據demo

Hive創建表及插入數據demo

lines 大於 字符串 如果 使用 .exe ESS hdf simple

create table student(id int comment "學生id",name string comment "學生姓名",age int comment "學生年齡")
comment "學生信息表"
row format delimited fields terminated by ",";

create external table student_ext(id int comment "學生id",name string comment "學生姓名",age int comment "學生年齡")

comment "學生信息表"
row format delimited fields terminated by ","
location "/user/hive/student_ext";

create external table student_ptn(id int comment "學生id",name string comment "學生姓名",age int comment "學生年齡")
comment "學生信息表"
partitioned by (city string)

row format delimited fields terminated by ","
location "/user/hive/student_ptn";

set hive.exec.dynamici.partition=true; #開啟動態分區,默認是false
set hive.exec.dynamic.partition.mode=nonstrict; #開啟允許所有分區都是動態的,否則必須要有靜態分區才能使用。

set hive.exec.dynamic.partition=true;(可通過這個語句查看:set hive.exec.dynamic.partition;)

set hive.exec.dynamic.partition.mode=nonstrict;
SET hive.exec.max.dynamic.partitions=100000;(如果自動分區數大於這個參數,將會報錯)
SET hive.exec.max.dynamic.partitions.pernode=100000;

insert into table student_ptn partition(city) select 6,"yangdong",29,"beijing";
insert into table student_ptn partition(city) select 2,"limei",22,"chongqing";
insert into table student_ptn partition(city) select 3,"wangxing",25,"beijing";
insert into table student_ptn partition(city) select 4,"chenming",22,"beijing";
insert into table student_ptn partition(city) select 5,"xiali",26,"chongqing";

create external table student_bck(id int comment "學生id",name string comment "學生姓名",age int comment "學生年齡")
comment "學生信息表"
clustered by(id) sorted by(id asc) into 2 buckets
row format delimited fields terminated by ","
location "/user/hive/student_bck";

insert into table student_bck
select * from student;

create table cdt(
id int,
name string,
work_location array<string>,
piaofang map<string,bigint>,
address struct<location:string,zipcode:int,phone:string,value:int>)
row format delimited
fields terminated by "\t"
collection items terminated by ","
map keys terminated by ":"
lines terminated by "\n";

將json字符串加載到table json中
{"movie":"1193","rate":"5","timeStamp":"978300760","uid":"1"}
{"movie":"661","rate":"3","timeStamp":"978302109","uid":"1"}
{"movie":"914","rate":"3","timeStamp":"978301968","uid":"1"}
{"movie":"3408","rate":"4","timeStamp":"978300275","uid":"1"}
{"movie":"2355","rate":"5","timeStamp":"978824291","uid":"1"}
{"movie":"1197","rate":"3","timeStamp":"978302268","uid":"1"}
{"movie":"1287","rate":"5","timeStamp":"978302039","uid":"1"}
{"movie":"2804","rate":"5","timeStamp":"978300719","uid":"1"}
{"movie":"594","rate":"4","timeStamp":"978302268","uid":"1"}

CREATE TABLE json(
data string)
ROW FORMAT SERDE
‘org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe‘
STORED AS INPUTFORMAT
‘org.apache.hadoop.mapred.TextInputFormat‘
OUTPUTFORMAT
‘org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat‘
LOCATION
‘hdfs://hmaster:9000/user/hive/warehouse/plusorg.db/json‘
TBLPROPERTIES (
‘transient_lastDdlTime‘=‘1542008332‘)

load data local inpath "/root/json.txt" into table json;
get_json_object(data,‘$.movie‘) 內置函數解析某列數據
select get_json_object(data,‘$.movie‘) as movie from json;
json_tuple(jsonStr, k1, k2, ...) 內置函數解析json字符串類數據
參數為一組鍵k1,k2……和JSON字符串,返回值的元組。該方法比?get_json_object?高效,因為可以在一次調用中輸入多個鍵

select
b.b_movie,
b.b_rate,
b.b_timeStamp,
b.b_uid
from json a
lateral view json_tuple(a.data,‘movie‘,‘rate‘,‘timeStamp‘,‘uid‘) b as b_movie,b_rate,b_timeStamp,b_uid;

create table rate(movie int, rate int, unixtime int, userid int) row format delimited fields
terminated by ‘\t‘;

insert into table rate select
get_json_object(data,‘$.movie‘) as moive,
get_json_object(data,‘$.rate‘) as rate,
get_json_object(data,‘$.timeStamp‘) as unixtime,
get_json_object(data,‘$.uid‘) as userid
from json;

select from_unixtime(unixtime,‘yyyy/MM/dd HH:mm:ss‘) from rate;

create table lastjsontable(movie int, rate int, utime date, userid int) row format delimited
fields terminated by ‘\t‘;
添加Python腳本,hive即可訪問,路徑為在當前Unix服務器存儲絕對路徑
add file /home/pythoncode/WeekdayMapper.py;

insert into table lastjsontable
select
transform(movie,rate,unixtime,userid) #輸入值(基表)
using ‘python WeekdayMapper.py‘ #使用腳本清洗
as(movie,rate,utime,userid) #輸出值(子表)
from rate; #基表

Hive創建表及插入數據demo