1. 程式人生 > >hive中判斷A表時間欄位是否在B表的兩個時間欄位中及求訂單中間休息時間

hive中判斷A表時間欄位是否在B表的兩個時間欄位中及求訂單中間休息時間

問題:在hive中,A表中有一個時間的欄位,型別位string,格式為2018-12-0 13:34:12;在B表中有欄位start_time和end_time,型別為string,格式為2018-12-06 13:34:12,先需要將兩表按id關聯後新增一個標記欄位(當A表的時間在B表的start_time和end_time之間就標記為1,不在區間內就標記為0),B表中還有一個訂單id,string型別

解決方法:使用join和case when then和lead視窗函式
先在hive中建立兩張表並插入模擬資料:

-- 建立表
create table A (
plate_num string,
time string ); create table B ( order_id string, plate_num string, start_time string, end_time string ); -- 插入模擬資料 insert into A values('瀘A88888', '2018-04-02 09:34:12'); insert into A values('瀘A66666', '2018-04-02 12:34:12'); insert into A values('瀘A00000', '2018-04-02 10:34:12'); insert into A values('瀘A22222',
'2018-04-02 15:34:12'); insert into B values('201812060001', '瀘A88888', '2018-04-02 09:00:12', '2018-04-02 10:00:12'); insert into B values('201812060002', '瀘A66666', '2018-04-02 09:10:12', '2018-04-02 10:00:12'); insert into B values('201812060003', '瀘A99999', '2018-04-02 09:15:12', '2018-04-02 09:55:12'); insert into B values('201812060004', '瀘A88888'
, '2018-04-02 10:05:12', '2018-04-02 10:30:12'); insert into B values('201812060005', '瀘A22222', '2018-04-02 10:10:12', '2018-04-02 10:44:12'); insert into B values('201812060006', '瀘A99999', '2018-04-02 10:15:12', '2018-04-02 10:34:12'); insert into B values('201812060007', '瀘A66666', '2018-04-02 10:20:12', '2018-04-02 10:30:12'); insert into B values('201812060008', '瀘A66666', '2018-04-02 10:35:12', '2018-04-02 10:40:12'); insert into B values('201812060009', '瀘A00000', '2018-04-02 10:35:12', '2018-04-02 10:45:12'); insert into B values('201812060010', '瀘A99999', '2018-04-02 10:36:12', '2018-04-02 10:45:12'); insert into B values('201812060011', '瀘A22222', '2018-04-02 10:50:12', '2018-04-02 11:20:12');

使用case when then 求表A的時間是否在表B的開始和結束時間之間,在標記為1,不在標記為0

-- 查詢結果
-- 方法一:不將時間欄位轉換為時間戳
select 
A.plate_num plate_num, 
A.time time, 
B.start_time start_time, 
B.end_time end_time,
case when unix_timestamp(A.time) between unix_timestamp(B.start_time) and unix_timestamp(B.end_time) then 1 else 0 end sign
from 
A
join
B
on
A.plate_num = B.plate_num;

-- 結果:
OK
瀘A88888	2018-04-02 09:34:12	2018-04-02 09:00:12	2018-04-02 10:00:12	1
瀘A22222	2018-04-02 15:34:12	2018-04-02 10:50:12	2018-04-02 11:20:12	0
瀘A88888	2018-04-02 09:34:12	2018-04-02 10:05:12	2018-04-02 10:30:12	0
瀘A22222	2018-04-02 15:34:12	2018-04-02 10:10:12	2018-04-02 10:44:12	0
瀘A00000	2018-04-02 10:34:12	2018-04-02 10:35:12	2018-04-02 10:45:12	0
瀘A66666	2018-04-02 12:34:12	2018-04-02 09:10:12	2018-04-02 10:00:12	0
瀘A66666	2018-04-02 12:34:12	2018-04-02 10:20:12	2018-04-02 10:30:12	0
瀘A66666	2018-04-02 12:34:12	2018-04-02 10:35:12	2018-04-02 10:40:12	0
Time taken: 9.907 seconds, Fetched: 8 row(s)


-- 方法二:將時間欄位轉換為時間戳
select 
A.plate_num plate_num, 
A.time time, 
B.start_time start_time, 
B.end_time end_time,
case when A.time between B.start_time and B.end_time then 1 else 0 end sign
from 
A
join
B
on
A.plate_num = B.plate_num;

-- 結果:  
OK
瀘A88888	2018-04-02 09:34:12	2018-04-02 09:00:12	2018-04-02 10:00:12	1
瀘A22222	2018-04-02 15:34:12	2018-04-02 10:50:12	2018-04-02 11:20:12	0
瀘A88888	2018-04-02 09:34:12	2018-04-02 10:05:12	2018-04-02 10:30:12	0
瀘A22222	2018-04-02 15:34:12	2018-04-02 10:10:12	2018-04-02 10:44:12	0
瀘A00000	2018-04-02 10:34:12	2018-04-02 10:35:12	2018-04-02 10:45:12	0
瀘A66666	2018-04-02 12:34:12	2018-04-02 09:10:12	2018-04-02 10:00:12	0
瀘A66666	2018-04-02 12:34:12	2018-04-02 10:20:12	2018-04-02 10:30:12	0
瀘A66666	2018-04-02 12:34:12	2018-04-02 10:35:12	2018-04-02 10:40:12	0
Time taken: 0.985 seconds, Fetched: 8 row(s)

求訂單中間的休息時間,使用lead的視窗函式

select 
t.order_id, t.plate_num, t.start_time, t.end_time,
(unix_timestamp(t.start_time1) - unix_timestamp(t.end_time)) as interval_time
from(
select
*,
-- 根據車牌號分割槽,開始時間升序取後一個訂單的開始時間
lead(start_time, 1, 0) over(partition by plate_num order by start_time) start_time1
from b) t;

-- 結果
OK
201812060009	瀘A00000	2018-04-02 10:35:12	2018-04-02 10:45:12	NULL
201812060005	瀘A22222	2018-04-02 10:10:12	2018-04-02 10:44:12	360
201812060011	瀘A22222	2018-04-02 10:50:12	2018-04-02 11:20:12	NULL
201812060002	瀘A66666	2018-04-02 09:10:12	2018-04-02 10:00:12	1200
201812060007	瀘A66666	2018-04-02 10:20:12	2018-04-02 10:30:12	300
201812060008	瀘A66666	2018-04-02 10:35:12	2018-04-02 10:40:12	NULL
201812060001	瀘A88888	2018-04-02 09:00:12	2018-04-02 10:00:12	300
201812060004	瀘A88888	2018-04-02 10:05:12	2018-04-02 10:30:12	NULL
201812060003	瀘A99999	2018-04-02 09:15:12	2018-04-02 09:55:12	1200
201812060006	瀘A99999	2018-04-02 10:15:12	2018-04-02 10:34:12	120
201812060010	瀘A99999	2018-04-02 10:36:12	2018-04-02 10:45:12	NULL
Time taken: 9.289 seconds, Fetched: 11 row(s)