hive OVER(PARTITION BY)函式用法





   over(order by salary) 按照salary排序進行累計,order by是個預設的開窗函式
   over(partition by deptno)按照部門分割槽

   over(partition by deptno order by salary)


over(order by salary range between 5 preceding and 5 following):視窗範圍為當前行資料幅度減5加5後的範圍內的。


--sum(s)over(order by s range between 2 preceding and 2 following) 表示加2或2的範圍內的求和

 select name,class,s, sum(s)over(order by s range between 2 preceding and 2 following) mm from t2
adf        3        45        45  --45加2減2即43到47,但是s在這個範圍內只有45
asdf       3        55        55
cfe        2        74        74
3dd        3        78        158 --78在76到80範圍內有78,80,求和得158
fda        1        80        158
gds        2        92        92
ffd        1        95        190
dss        1        95        190
ddd        3        99        198

gf         3        99        198

  over(order by salaryrows between 5 preceding and 5 following):視窗範圍為當前行前後各移動5行。


--sum(s)over(order by s rows between 2 preceding and 2 following)表示在上下兩行之間的範圍內
select name,class,s, sum(s)over(order by s rows between 2 preceding and 2 following) mm from t2
adf        3        45        174  (45+55+74=174)
asdf       3        55        252   (45+55+74+78=252)
cfe        2        74        332    (74+55+45+78+80=332)
3dd        3        78        379    (78+74+55+80+92=379)
fda        1        80        419
gds        2        92        440
ffd        1        95        461
dss        1        95        480
ddd        3        99        388
gf         3        99        293 over(order by salary range
 between unbounded preceding and unbounded following)或者 over(order by salary rows between unbounded preceding and unbounded following):視窗不做限制




cfe        2        74
dss        1        95
ffd        1        95
fda        1        80
gds        2        92
gf         3        99
ddd        3        99
adf        3        45
asdf       3        55
3dd        3        78

select * from                                                                      
    select name,class,s,rank()over(partition by class order by s desc) mm from t2
    where mm=1;
dss        1        95        1
ffd        1        95        1
gds        2        92        1
gf         3        99        1
ddd        3        99        1 

select * from                                                                      
    select name,class,s,row_number()over(partition by class order by s desc) mm from t2
    where mm=1;
1        95        1  --95有兩名但是隻顯示一個
2        92        1
3        99        1 --99有兩名但也只顯示一個

select name,class,s,rank()over(partition by class order by s desc) mm from t2
dss        1        95        1
ffd        1        95        1
fda        1        80        3 --直接就跳到了第三
gds        2        92        1
cfe        2        74        2
gf         3        99        1
ddd        3        99        1
3dd        3        78        3
asdf       3        55        4
adf        3        45        5
select name,class,s,dense_rank()over(partition by class order by s desc) mm from t2
dss        1        95        1
ffd        1        95        1
fda        1        80        2 --連續排序(仍為2)
gds        2        92        1
cfe        2        74        2
gf         3        99        1
ddd        3        99        1
3dd        3        78        2
asdf       3        55        3
adf        3        45        4

select name,class,s, sum(s)over(partition by class order by s desc) mm from t2 --根據班級進行分數求和
dss        1        95        190  --由於兩個95都是第一名,所以累加時是兩個第一名的相加
ffd        1        95        190 
fda        1        80        270  --第一名加上第二名的
gds        2        92        92
cfe        2        74        166
gf         3        99        198
ddd        3        99        198
3dd        3        78        276
asdf       3        55        331
adf        3        45        376

first_value() over()和last_value() over()的使用  


SELECT opr_id,res_type,
       first_value(res_type) over(PARTITION BY opr_id ORDER BY res_type) low,
       last_value(res_type) over(PARTITION BY opr_id ORDER BY res_type rows BETWEEN unbounded preceding AND unbounded following) high
  FROM rm_circuit_route
WHERE opr_id IN ('000100190000000000021311','000100190000000000021355','000100190000000000021339')
 ORDER BY opr_id;


注:rows BETWEEN unbounded preceding AND unbounded following 的使用

--取last_value時不使用rows BETWEEN unbounded preceding AND unbounded following的結果

SELECT opr_id,res_type,
       first_value(res_type) over(PARTITION BY opr_id ORDER BY res_type) low,
       last_value(res_type) over(PARTITION BY opr_id ORDER BY res_type) high
  FROM rm_circuit_route
 WHERE opr_id IN ('000100190000000000021311','000100190000000000021355','000100190000000000021339')
 ORDER BY opr_id;


rows BETWEEN unbounded preceding AND unbounded following,取出的last_value由於與res_type進行進行排列,因此取出的電路的最後一行記錄的型別就不是按照電路的範圍提取了,而是以res_type為範圍進行提取了。

在first_value和last_value中ignore nulls的使用 資料如下:


取出該電路的第一條記錄,加上ignore nulls後,如果第一條是判斷的那個欄位是空的,則預設取下一條,結果如下所示:



--lag() over()函式用法(取出前n行資料)
with a as 
(select 1 id,'a' name from dual
 select 2 id,'b' name from dual
 select 3 id,'c' name from dual
 select 4 id,'d' name from dual
 select 5 id,'e' name from dual

select id,name,lag(id,1,'')over(order by name) from a;

--lead() over()函式用法(取出後N行資料)

with a as 
(select 1 id,'a' name from dual
 select 2 id,'b' name from dual
 select 3 id,'c' name from dual
 select 4 id,'d' name from dual
 select 5 id,'e' name from dual

select id,name,lead(id,1,'')over(order by name) from a;

--ratio_to_report(a)函式用法 Ratio_to_report() 括號中就是分子,over() 括號中就是分母
with a as (select 1 a from dual
           union all
select 1 a from dual
           union  all
select 1 a from dual
           union all
select 2 a from dual
           union all 
select 3 a from dual
           union all
select 4 a from dual
           union all
select 4 a from dual
           union all
select 5 a from dual
select a, ratio_to_report(a)over(partition by a) b from a 
order by a; 

with a as (select 1 a from dual
           union all
select 1 a from dual
           union  all
select 1 a from dual
           union all
select 2 a from dual
           union all 
select 3 a from dual
           union all
select 4 a from dual
           union all
select 4 a from dual
           union all
select 5 a from dual
select a, ratio_to_report(a)over() b from a --分母預設就是整個佔比
order by a; 

with a as (select 1 a from dual
           union all
select 1 a from dual
           union  all
select 1 a from dual
           union all
select 2 a from dual
           union all 
select 3 a from dual
           union all
select 4 a from dual
           union all
select 4 a from dual
           union all
select 5 a from dual
select a, ratio_to_report(a)over() b from a
group by a order by a;--分組後的佔比

percent_rank用法 計算方法:所在組排名序號-1除以該組所有的行數-1,如下所示自己計算的pr1與通過percent_rank函式得到的值是一樣的:
SELECT a.deptno,
       (a.r-1)/(n-1) pr1,
       percent_rank() over(PARTITION BY a.deptno ORDER BY a.sal) pr2
  FROM (SELECT deptno,
               rank() over(PARTITION BY deptno ORDER BY sal) r --計算出在組中的排名序號
          FROM emp
         ORDER BY deptno, sal) a,
       (SELECT deptno, COUNT(1) n FROM emp GROUP BY deptno) b --按部門計算每個部門的所有成員數
 WHERE a.deptno = b.deptno;


cume_dist函式 計算方法:所在組排名序號除以該組所有的行數,但是如果存在並列情況,則需加上並列的個數-1,
SELECT a.deptno,
       (a.r + c.rn - 1) / n pr1,
       cume_dist() over(PARTITION BY a.deptno ORDER BY a.sal) pr2
  FROM (SELECT deptno,
               rank() over(PARTITION BY deptno ORDER BY sal) r
          FROM emp
         ORDER BY deptno, sal) a,
       (SELECT deptno, COUNT(1) n FROM emp GROUP BY deptno) b,
       (SELECT deptno, r, COUNT(1) rn,sal
          FROM (SELECT deptno,sal,
                       rank() over(PARTITION BY deptno ORDER BY sal) r
                  FROM emp)
         GROUP BY deptno, r,sal
         ORDER BY deptno) c --c表就是為了得到每個部門員工工資的一樣的個數
 WHERE a.deptno = b.deptno
   AND a.deptno = c.deptno(+)
   AND a.sal = c.sal;     percentile_cont函式 含義:輸入一個百分比(該百分比就是按照percent_rank函式計算的值),返回該百分比位置的平均值
SELECT ename,
       percentile_cont(0.7) within GROUP(ORDER BY sal) over(PARTITION BY deptno) "Percentile_Cont",
       percent_rank() over(PARTITION BY deptno ORDER BY sal) "Percent_Rank"
  FROM emp
 WHERE deptno IN (30, 60);


SELECT ename,
       percentile_cont(0.6) within GROUP(ORDER BY sal) over(PARTITION BY deptno) "Percentile_Cont",
       percent_rank() over(PARTITION BY deptno ORDER BY sal) "Percent_Rank"
  FROM emp
 WHERE deptno IN (30, 60);   PERCENTILE_DISC函式 功能描述:返回一個與輸入的分佈百分比值相對應的資料值,分佈百分比的計算方法見函式CUME_DIST,如果沒有正好對應的資料值,就取大於該分佈值的下一個值。


SELECT ename,
       percentile_disc(0.7) within GROUP(ORDER BY sal) over(PARTITION BY deptno) "Percentile_Disc",
       cume_dist() over(PARTITION BY deptno ORDER BY sal) "Cume_Dist"
  FROM emp
 WHERE deptno IN (30, 60);