1. 程式人生 > >nulls first & nulls last 對索引影響

nulls first & nulls last 對索引影響

-- 當我們需要排序欄位時,比如order by name,如果name欄位定義時沒有not null時,就有可能涉及到null值的排序
-- 如果不注意,可能會造成隱藏的bug,pg預設null是無窮大,在升序時排在最後面,當然在排序時也可以指定 nulls first 或 nulls last
-- 具體使用方法在此不在複述,本文主要是講的是在建立索引時指定 nulls first 或 nulls last 對查詢的影響


-- 基礎建立索引語法如下
CREATE INDEX [ ASC | DESC ] [ NULLS { FIRST | LAST } ]
    
-- 在升序或降序時的預設值
-- NULLS FIRST
Specifies that nulls sort before non-nulls. This is the default when DESC is specified.
-- NULLS LAST
Specifies that nulls sort after non-nulls. This is the default when DESC is not specified.



--建立測試表
create table t as select n id ,'rudy'||n as name ,n||'password' as password,now() + (n||' second')::interval as create_date from generate_seies(1,1000000) n;

postgres=# update t set name = null where mod(id,2)=0;
UPDATE 500000
-- 構造隨機的null值,本次測試取對半
postgres=# create table t1 as select * from t; 
SELECT 1000000
postgres=# create index on t(name);
CREATE INDEX
postgres=# create index on t1(name nulls first);
CREATE INDEX

-- 在不指定 null first 時,由於pg預設null無限大,故當排序為升序時,t表能夠使用索引,t1表卻是使用全表掃描
postgres=# explain verbose select * from t where id>1000 order by name limit 10;
                                        QUERY PLAN                                         
-------------------------------------------------------------------------------------------
 Limit  (cost=0.42..1.26 rows=10 width=33)
   Output: id, name, password, create_date
   ->  Index Scan using idx_t_name on public.t  (cost=0.42..83387.28 rows=999027 width=33)
         Output: id, name, password, create_date
         Filter: (t.id > 1000)
(5 rows)


postgres=# explain verbose select * from t1 where id>1000 order by name limit 10;
                                  QUERY PLAN                                   
-------------------------------------------------------------------------------
 Limit  (cost=40949.92..40949.94 rows=10 width=33)
   Output: id, name, password, create_date
   ->  Sort  (cost=40949.92..43447.52 rows=999041 width=33)
         Output: id, name, password, create_date
         Sort Key: t1.name
         ->  Seq Scan on public.t1  (cost=0.00..19361.00 rows=999041 width=33)
               Output: id, name, password, create_date
               Filter: (t1.id > 1000)
(8 rows)


-- 在指定 nulls first 時,由於t1表建立索引時null值放在最前面,而且排序欄位為升序,所以t1表使用索引,t表使用全表掃描
postgres=# explain verbose select * from t where id>1000 order by name nulls first limit 10; 
                                  QUERY PLAN                                  
------------------------------------------------------------------------------
 Limit  (cost=46095.61..46095.64 rows=10 width=33)
   Output: id, name, password, create_date
   ->  Sort  (cost=46095.61..48593.18 rows=999027 width=33)
         Output: id, name, password, create_date
         Sort Key: t.name NULLS FIRST
         ->  Seq Scan on public.t  (cost=0.00..24507.00 rows=999027 width=33)
               Output: id, name, password, create_date
               Filter: (t.id > 1000)
(8 rows)


postgres=# explain verbose select * from t1 where id>1000 order by name nulls first limit 10;
                                         QUERY PLAN                                          
---------------------------------------------------------------------------------------------
 Limit  (cost=0.42..0.98 rows=10 width=33)
   Output: id, name, password, create_date
   ->  Index Scan using t1_name_idx on public.t1  (cost=0.42..55921.45 rows=999041 width=33)
         Output: id, name, password, create_date
         Filter: (t1.id > 1000)
(5 rows)



-- 綜上,為什麼會出現這樣的結果,因為我們預設建立的btree索引,其葉子結點的資料是有序排列的,當建立索引不指定nulls first時,pgl預設把null值放在葉子節點的最後
-- 如果排序時只是order by name,未指定nulls first,pg只需要根據索引順序的返回需要的資料則可,否則,如果order by name nulls last,pg如果使用索引,
-- 其可能先在葉子未尾節點返回null值的資料,再在葉子起始節點開始返回資料(假設需要返回10條資料,null值為5條,非null值有5條),其明顯此時使用索引不是高效的