LightDB 22.1 在原先基础上新增了多种优化器提示,包括use_hash_aggregation,semijoin/antijoin,swap_join_inputs。
使用此优化器提示可以控制group by的算法,目前group by有两种算法,一种为排序,另一种为哈希, 通过使用use_hash_aggregation
可以控制group by使用hash算法,使用no_use_hash_aggregation
控制group by不使用hash算法,也即使用排序算法。示例如下:
lightdb@postgres=# create table test1 (key1 int primary key, key2 int);
CREATE TABLE
lightdb@postgres=# EXPLAIN (COSTS false) select max(id) from t1 where id>1 group by id order by id;
QUERY PLAN
-------------------------------------------
GroupAggregate
Group Key: id
-> Index Only Scan using t1_pkey on t1
Index Cond: (id > 1)
(4 rows)
lightdb@postgres=# EXPLAIN (COSTS false) select/*+ use_hash_aggregation*/ max(id) from t1 where id>1 group by id order by id;
QUERY PLAN
------------------------------------
Sort
Sort Key: id
-> HashAggregate
Group Key: id
-> Seq Scan on t1 @"lt#0"
Filter: (id > 1)
(6 rows)
lightdb@postgres=# EXPLAIN (COSTS false) select max(id) from t1 where id>1 group by id;
QUERY PLAN
--------------------------
HashAggregate
Group Key: id
-> Seq Scan on t1
Filter: (id > 1)
(4 rows)
lightdb@postgres=# EXPLAIN (COSTS false) select/*+ no_use_hash_aggregation*/ max(id) from t1 where id>1 group by id;
QUERY PLAN
---------------------------------------------------
GroupAggregate
Group Key: id
-> Index Only Scan using t1_pkey on t1 @"lt#0"
Index Cond: (id > 1)
(4 rows)
lightdb@postgres=#
此优化器提示用来控制exists/in子链接join时使用或不使用semijoin,并可以控制seimjoin的算法,是走nestloop、hash还是merge join。示例如下:
lightdb@postgres=# create table test1 (key1 int primary key, key2 int);
CREATE TABLE
lightdb@postgres=# create table test2 (key1 int primary key, key2 int);
CREATE TABLE
lightdb@postgres=#
lightdb@postgres=# EXPLAIN (COSTS false) select * from test1 where exists (select * from test2 where test1.key1=test2.key1);
QUERY PLAN
----------------------------------------
Hash Join
Hash Cond: (test1.key1 = test2.key1)
-> Seq Scan on test1
-> Hash
-> Seq Scan on test2
(5 rows)
lightdb@postgres=# EXPLAIN (COSTS false) select * from test1 where exists (select /*+semijoin*/* from test2 where test1.key1=test2.key1);
QUERY PLAN
---------------------------------------------------------
Merge Semi Join
Merge Cond: (test1.key1 = test2.key1)
-> Index Scan using test1_pkey on test1 @"lt#1"
-> Index Only Scan using test2_pkey on test2 @"lt#0"
(4 rows)
lightdb@postgres=# EXPLAIN (COSTS false) select * from test1 where exists (select /*+hash_sj*/* from test2 where test1.key1=test2.key1);
QUERY PLAN
----------------------------------------
Hash Semi Join
Hash Cond: (test1.key1 = test2.key1)
-> Seq Scan on test1 @"lt#1"
-> Hash
-> Seq Scan on test2 @"lt#0"
(5 rows)
lightdb@postgres=#
lightdb@postgres=# create table tt1 (id int, t int, name varchar(255));
CREATE TABLE
lightdb@postgres=# create table tt2 (id int , salary int);
CREATE TABLE
lightdb@postgres=# create index idx_t1_id on tt1(id);
CREATE INDEX
lightdb@postgres=# create index idx_t2_id on tt2(id);
CREATE INDEX
lightdb@postgres=#
lightdb@postgres=# EXPLAIN (COSTS false) select * from tt1 where exists (select * from tt2 where tt1.id=tt2.id);
QUERY PLAN
----------------------------------------------
Nested Loop Semi Join
-> Seq Scan on tt1
-> Index Only Scan using idx_t2_id on tt2
Index Cond: (id = tt1.id)
(4 rows)
lightdb@postgres=# EXPLAIN (COSTS false) select * from tt1 where exists (select /*+no_semijoin*/* from tt2 where tt1.id=tt2.id);
QUERY PLAN
-------------------------------------------
Hash Join
Hash Cond: (tt1.id = tt2.id)
-> Seq Scan on tt1 @"lt#1"
-> Hash
-> HashAggregate
Group Key: tt2.id
-> Seq Scan on tt2 @"lt#0"
(7 rows)
lightdb@postgres=#
此优化器提示用来控制not exists/not in子链接使用antijoin时的算法,是走nestloop、hash还是merge join。示例如下:
lightdb@postgres=# create table tt1 (id int, t int, name varchar(255));
CREATE TABLE
lightdb@postgres=# create table tt2 (id int , salary int);
CREATE TABLE
lightdb@postgres=# EXPLAIN (COSTS false) select * from test1 where not exists (select 1 from test2 where test1.key1=test2.key1);
QUERY PLAN
----------------------------------------
Hash Anti Join
Hash Cond: (test1.key1 = test2.key1)
-> Seq Scan on test1
-> Hash
-> Seq Scan on test2
(5 rows)
lightdb@postgres=# EXPLAIN (COSTS false) select * from test1 where not exists (select/*+hash_aj*/ 1 from test2 where test1.key1=test2.key1);
QUERY PLAN
----------------------------------------
Hash Anti Join
Hash Cond: (test1.key1 = test2.key1)
-> Seq Scan on test1 @"lt#1"
-> Hash
-> Seq Scan on test2 @"lt#0"
(5 rows)
lightdb@postgres=# EXPLAIN (COSTS false) select * from test1 where not exists (select/*+nl_aj*/ 1 from test2 where test1.key1=test2.key1);
QUERY PLAN
---------------------------------------------------------
Nested Loop Anti Join
-> Seq Scan on test1 @"lt#1"
-> Index Only Scan using test2_pkey on test2 @"lt#0"
Index Cond: (key1 = test1.key1)
(4 rows)
lightdb@postgres=# EXPLAIN (COSTS false) select * from test1 where not exists (select/*+merge_aj*/ 1 from test2 where test1.key1=test2.key1);
QUERY PLAN
---------------------------------------------------------
Merge Anti Join
Merge Cond: (test1.key1 = test2.key1)
-> Index Scan using test1_pkey on test1 @"lt#1"
-> Index Only Scan using test2_pkey on test2 @"lt#0"
(4 rows)
lightdb@postgres=#
此优化器提示用来指定hash join的外表(驱动表)。示例如下:
lightdb@postgres=# create table test1 (key1 int primary key, key2 int);
CREATE TABLE
lightdb@postgres=# create table test2 (key1 int primary key, key2 int);
CREATE TABLE
lightdb@postgres=# EXPLAIN (COSTS false) select * from test1, test2 where test1.key1 = test2.key1;
QUERY PLAN
----------------------------------------
Hash Join
Hash Cond: (test1.key1 = test2.key1)
-> Seq Scan on test1
-> Hash
-> Seq Scan on test2
(5 rows)
lightdb@postgres=# EXPLAIN (COSTS false) select /*+swap_join_inputs(test2)*/* from test1, test2 where test1.key1 = test2.key1;
LOG: pg_hint_plan:
used hint:
swap_join_inputs(test2@lt#0)
not used hint:
duplication hint:
error hint:
QUERY PLAN
----------------------------------------
Hash Join
Hash Cond: (test2.key1 = test1.key1)
-> Seq Scan on test2 @"lt#0"
-> Hash
-> Seq Scan on test1 @"lt#0"
(5 rows)
lightdb@postgres=#
需要注意的是,如果SQL原先不为hashjoin, 使用了此hint后会变为hashjoin。示例如下:
lightdb@postgres=# create table t_1(key1 int not null);
CREATE TABLE
lightdb@postgres=# create table t_2(key1 int not null);
CREATE TABLE
lightdb@postgres=# explain select /*swap_join_inputs(t_2)*/* from t_1,t_2 where t_1.key1=t_2.key1;
QUERY PLAN
-------------------------------------------------------------------
Merge Join (cost=359.57..860.00 rows=32512 width=8)
Merge Cond: (t_1.key1 = t_2.key1)
-> Sort (cost=179.78..186.16 rows=2550 width=4)
Sort Key: t_1.key1
-> Seq Scan on t_1 (cost=0.00..35.50 rows=2550 width=4)
-> Sort (cost=179.78..186.16 rows=2550 width=4)
Sort Key: t_2.key1
-> Seq Scan on t_2 (cost=0.00..35.50 rows=2550 width=4)
(8 rows)
lightdb@postgres=# explain select /*+swap_join_inputs(t_2)*/* from t_1,t_2 where t_1.key1=t_2.key1;
LOG: pg_hint_plan:
used hint:
swap_join_inputs(t_2@lt#0)
not used hint:
duplication hint:
error hint:
QUERY PLAN
---------------------------------------------------------------------------
Hash Join (cost=67.38..1247.18 rows=32512 width=8)
Hash Cond: (t_2.key1 = t_1.key1)
-> Seq Scan on t_2 @"lt#0" (cost=0.00..35.50 rows=2550 width=4)
-> Hash (cost=35.50..35.50 rows=2550 width=4)
-> Seq Scan on t_1 @"lt#0" (cost=0.00..35.50 rows=2550 width=4)
(5 rows)
lightdb@postgres=#
ey1)
-> Seq Scan on t_2 @“lt#0” (cost=0.00…35.50 rows=2550 width=4)
-> Hash (cost=35.50…35.50 rows=2550 width=4)
-> Seq Scan on t_1 @“lt#0” (cost=0.00…35.50 rows=2550 width=4)
(5 rows)
lightdb@postgres=#