os: centos 7.4
db: postgresql 10.10
pg_pathman: 1.5
pg_pathman是postgresql管理分区插件,postgresql 9.6、10 的内置分区管理也一直都在完善。使用哪种方式来管理,用户自己决定。不过pg_pathman 确实很方便。
由于pg_pathman使用了custom scan provider api,所以只支持PostgreSQL 9.5以及以上的版本
The pg_pathman module provides optimized partitioning mechanism and functions to manage partitions.
The extension is compatible with:
PostgreSQL 9.5, 9.6, 10;
Postgres Pro Standard 9.5, 9.6;
Postgres Pro Enterprise;
创建新表
peiybdb=# create table tmp_hash(
id int not null,
name text not null,
insert_timestamp timestamp with time zone not null
);
分区设置
peiybdb=# select create_hash_partitions(
'tmp_hash'::regclass, -- 主表oid
'id', -- 分区字段,一定要not null约束
20, -- 分区表数量
false -- 不立即将数据从主表迁移到分区
);
peiybdb=# select set_enable_parent('tmp_hash'::regclass, false);
插入数据
peiybdb=# insert into tmp_hash
select id,
md5(id::text),
current_date - mod(id,1000)
from generate_series(1,1000000) as id;
peiybdb=# select count(*) from tmp_hash;
count
---------
1000000
(1 row)
peiybdb=# select count(*) from only tmp_hash;
count
-------
0
(1 row)
查看执行计划
peiybdb=# explain select * from tmp_hash where id = 99999;
QUERY PLAN
----------------------------------------------------------------------
Append (cost=0.00..1129.05 rows=264 width=44)
-> Seq Scan on tmp_hash_4 (cost=0.00..1129.05 rows=264 width=44)
Filter: (id = 99999)
(3 rows)
使用了id=99999,被认为常量,进行了分区裁剪
peiybdb=# explain select * from tmp_hash where id in ( 99999 , 100000 );
QUERY PLAN
--------------------------------------------------------------------
Append (cost=0.00..2183.44 rows=4 width=44)
-> Seq Scan on tmp_hash_0 (cost=0.00..1090.75 rows=2 width=45)
Filter: (id = ANY ('{99999,100000}'::integer[]))
-> Seq Scan on tmp_hash_4 (cost=0.00..1092.69 rows=2 width=45)
Filter: (id = ANY ('{99999,100000}'::integer[]))
(5 rows)
同样进行了分区裁剪
peiybdb=# explain select * from tmp_hash where id between 99999 and 100000;
QUERY PLAN
-----------------------------------------------------------------------------------
Gather (cost=1000.00..19181.53 rows=20 width=44)
Workers Planned: 1
-> Append (cost=0.00..18179.53 rows=20 width=44)
-> Parallel Seq Scan on tmp_hash_0 (cost=0.00..907.29 rows=1 width=45)
Filter: ((id >= 99999) AND (id <= 100000))
-> Parallel Seq Scan on tmp_hash_1 (cost=0.00..905.60 rows=1 width=45)
Filter: ((id >= 99999) AND (id <= 100000))
-> Parallel Seq Scan on tmp_hash_2 (cost=0.00..915.11 rows=1 width=45)
Filter: ((id >= 99999) AND (id <= 100000))
-> Parallel Seq Scan on tmp_hash_3 (cost=0.00..911.22 rows=1 width=45)
Filter: ((id >= 99999) AND (id <= 100000))
-> Parallel Seq Scan on tmp_hash_16 (cost=0.00..911.72 rows=1 width=45)
Filter: ((id >= 99999) AND (id <= 100000))
-> Parallel Seq Scan on tmp_hash_17 (cost=0.00..912.84 rows=1 width=45)
Filter: ((id >= 99999) AND (id <= 100000))
-> Parallel Seq Scan on tmp_hash_18 (cost=0.00..911.69 rows=1 width=45)
Filter: ((id >= 99999) AND (id <= 100000))
-> Parallel Seq Scan on tmp_hash_19 (cost=0.00..903.44 rows=1 width=45)
Filter: ((id >= 99999) AND (id <= 100000))
(43 rows)
没有分区裁剪
模拟已有表,且里面有大量数据
peiybdb=# create table tmp_hash_his(
id int not null,
name text not null,
insert_timestamp timestamp with time zone not null
);
peiybdb=# insert into tmp_hash_his
select id,
md5(id::text),
current_date - mod(id,1000)
from generate_series(1,2000000) as id;
默认创建20个分区。
peiybdb=# select create_hash_partitions(
'tmp_hash_his'::regclass, -- 主表oid
'id', -- 分区字段,一定要not null约束
20, -- 分区表数量
false -- 不立即将数据从主表迁移到分区
);
执行非堵塞迁移
peiybdb=# select partition_table_concurrently(
'tmp_hash_his'::regclass, -- 主表OID
1000, -- 一个事务批量迁移多少记录
1.0 -- 获得行锁失败时,休眠多久再次获取,重试60次退出任务
);
查看迁移进度
peiybdb=# select * from pathman_concurrent_part_tasks;
userid | pid | dbid | relid | processed | status
----------+------+--------+--------------+-----------+---------
postgres | 6057 | 148454 | tmp_hash_his | 71000 | working
(1 row)
peiybdb=# select * from pathman_concurrent_part_tasks;
userid | pid | dbid | relid | processed | status
--------+-----+------+-------+-----------+--------
(0 rows)
禁用主表
peiybdb=# select set_enable_parent('tmp_hash_his'::regclass, false);
迁移后,查看数据
peiybdb=# select count(*) from tmp_hash_his;
count
-------
2000000
(1 row)
peiybdb=# select count(*) from only tmp_hash_his;
count
-------
0
(1 row)
建议
参考:
https://github.com/postgrespro/pg_pathman
https://github.com/postgrespro/pg_pathman/wiki
https://github.com/postgrespro/pg_pathman_build