Skip to content

Compatible tools

Dmitry Ivanov edited this page Oct 23, 2017 · 4 revisions

Here's a list of tools that have been tested with pg_pathman:

Important: If you use pg_pathman 1.4.7 and older, you should disable pg_pathman.override_copy before dumping a database. Otherwise produced dump file will contain duplicate rows.

# dump whole database 'db'
pg_10/bin/pg_dump db -Fc > db_dump.bin

# restore database 'db' to 'db_copy'
pg_restore -d db_copy db_dump.bin
# backup whole cluster in tar format
pg_basebackup -D db_backup -Ft -z -P

Add a few lines to postgresql.conf

shared_preload_libraries = 'online_analyze, pg_pathman'

# adjust these settings
online_analyze.enable = on
online_analyze.verbose = on
# online_analyze.threshold = 50
# online_analyze.min_interval = 1
# online_analyze.scale_factor = 0.1
# online_analyze.table_type = 'temporary'

Now, let's take a look at some INSERT statements:

create table pt (id int not null);
select create_hash_partitions('pt', 'id', 4);

insert into pt values (1);
INFO:  analyzing "public.pt"
INFO:  "pt": scanned 0 of 0 pages, containing 0 live rows and 0 dead rows; 0 rows in sample, 0 estimated total rows
INFO:  analyzing "public.pt" inheritance tree
INFO:  "pt_2": scanned 1 of 1 pages, containing 1 live rows and 0 dead rows; 1 rows in sample, 1 estimated total rows
INFO:  analyze "pt" took 0.00 seconds
INSERT 0 1

insert into pt select generate_series(1, 3);
INFO:  analyzing "public.pt"
INFO:  "pt": scanned 0 of 0 pages, containing 0 live rows and 0 dead rows; 0 rows in sample, 0 estimated total rows
INFO:  analyzing "public.pt" inheritance tree
INFO:  "pt_2": scanned 1 of 1 pages, containing 3 live rows and 0 dead rows; 3 rows in sample, 3 estimated total rows
INFO:  "pt_3": scanned 1 of 1 pages, containing 1 live rows and 0 dead rows; 1 rows in sample, 1 estimated total rows
INFO:  analyze "pt" took 0.00 seconds
INSERT 0 3

pg_repack is a PostgreSQL extension which lets you remove bloat from tables and indexes, and optionally restore the physical order of clustered indexes. Unlike CLUSTER and VACUUM FULL it works online, without holding an exclusive lock on the processed tables during processing. pg_repack is efficient to boot, with performance comparable to using CLUSTER directly.

To repack a whole database, use -C pg_pathman option:

create extension pg_pathman;
create extension pg_repack;

create table a(id int primary key not null);
select create_range_partitions('a', 'id', 1, 10, 4);

create table b(id int primary key not null);
select create_hash_partitions('b', 'id', 3);
# repack all tables of database 'db'
pg_repack db -C pg_pathman
INFO: repacking table "a"
INFO: repacking table "a_1"
INFO: repacking table "a_2"
INFO: repacking table "a_3"
INFO: repacking table "a_4"
INFO: repacking table "b"
INFO: repacking table "b_0"
INFO: repacking table "b_1"
INFO: repacking table "b_2"

Otherwise pg_repack will deadlock on table pathman_config:

# repack all tables of database 'db'
pg_repack db
INFO: repacking table "pathman_config"
..... # nothing happens at all

To repack a single partitioned table, use -I option:

create extension pg_pathman;
create extension pg_repack;

/* create a table partitioned by hash */
create table pt (id int primary key not null);
select create_hash_partitions('pt', 'id', 4);
# repack table 'pt' of database 'db'
pg_repack db -I pt
INFO: repacking table "pt"
INFO: repacking table "pt_0"
INFO: repacking table "pt_1"
INFO: repacking table "pt_2"
INFO: repacking table "pt_3"