From 7c834db6e3edc056b0f40dd912ff04205484893f Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Thu, 16 Jul 2015 00:36:40 +0200 Subject: [PATCH] Warn against pre-existing indexes. Pre-existing indexes will reduce data loading performances and it's generally better to DROP the index prior to the load and CREATE them again once the load is done. See #251 for an example of that. In that patch we just add a WARNING against the situation, the next patch will also add support for a new WITH clause option allowing to have pgloader take care of the DROP/CREATE dance around the data loading. --- src/package.lisp | 3 +++ src/pgsql/queries.lisp | 25 +++++++++++++++++++++++++ src/pgsql/schema.lisp | 18 ++++++++++-------- src/sources/csv/csv.lisp | 10 +++++++++- 4 files changed, 47 insertions(+), 9 deletions(-) diff --git a/src/package.lisp b/src/package.lisp index 7914211d..9ee18575 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -178,6 +178,7 @@ #:list-databases #:list-tables #:list-columns + #:list-indexes #:list-tables-cols #:list-tables-and-fkeys #:list-reserved-keywords @@ -244,6 +245,8 @@ (:use #:cl #:pgloader.params #:pgloader.utils #:pgloader.connection #:pgloader.sources #:pgloader.queue) + (:import-from #:pgloader.pgsql + #:list-indexes) (:export #:*csv-path-root* #:csv-connection #:specs diff --git a/src/pgsql/queries.lisp b/src/pgsql/queries.lisp index 0d3c0b3f..2dd85e43 100644 --- a/src/pgsql/queries.lisp +++ b/src/pgsql/queries.lisp @@ -189,6 +189,31 @@ where c.oid = '~:[~*~a~;~a.~a~]'::regclass and attnum > 0 order by attnum" schema schema table-name) :column))) +(defun list-indexes (pgconn table-name) + "List all indexes for TABLE-NAME in SCHEMA." + (with-pgsql-connection (pgconn) + (loop :for (index-name table-name table-oid primary sql) + :in (pomo:query (format nil " +select i.relname, + indrelid::regclass, + indrelid, + indisprimary, + pg_get_indexdef(indexrelid) + from pg_index x + join pg_class i ON i.oid = x.indexrelid + where indrelid = '~@[~a.~]~a'::regclass" + (when (typep table-name 'cons) + (car table-name)) + (typecase table-name + (cons (cdr table-name)) + (string table-name)))) + :collect (make-pgsql-index :name index-name + :table-name table-name + :table-oid table-oid + :primary primary + :columns nil + :sql sql)))) + (defun list-reserved-keywords (pgconn) "Connect to PostgreSQL DBNAME and fetch reserved keywords." (handler-case diff --git a/src/pgsql/schema.lisp b/src/pgsql/schema.lisp index 03196d52..faffe22b 100644 --- a/src/pgsql/schema.lisp +++ b/src/pgsql/schema.lisp @@ -263,7 +263,7 @@ ;;; ;;; Index support ;;; -(defstruct pgsql-index name table-name table-oid primary unique columns) +(defstruct pgsql-index name table-name table-oid primary unique columns sql) (defgeneric index-table-name (index) (:documentation @@ -293,18 +293,20 @@ (values ;; ensure good concurrency here, don't take the ACCESS EXCLUSIVE ;; LOCK on the table before we have the index done already - (format nil "CREATE UNIQUE INDEX ~a ON ~a (~{~a~^, ~});" - index-name table-name cols) + (or (pgsql-index-sql index) + (format nil "CREATE UNIQUE INDEX ~a ON ~a (~{~a~^, ~});" + index-name table-name cols)) (format nil "ALTER TABLE ~a ADD PRIMARY KEY USING INDEX ~a;" table-name index-name))) (t - (format nil "CREATE~:[~; UNIQUE~] INDEX ~a ON ~a (~{~a~^, ~});" - (pgsql-index-unique index) - index-name - table-name - cols))))) + (or (pgsql-index-sql index) + (format nil "CREATE~:[~; UNIQUE~] INDEX ~a ON ~a (~{~a~^, ~});" + (pgsql-index-unique index) + index-name + table-name + cols)))))) ;;; ;;; Parallel index building. diff --git a/src/sources/csv/csv.lisp b/src/sources/csv/csv.lisp index 374fd54a..a49599f2 100644 --- a/src/sources/csv/csv.lisp +++ b/src/sources/csv/csv.lisp @@ -179,7 +179,15 @@ (*state* (or *state* (pgloader.utils:make-pgstate))) (lp:*kernel* (make-kernel 2)) (channel (lp:make-channel)) - (queue (lq:make-queue :fixed-capacity *concurrent-batches*))) + (queue (lq:make-queue :fixed-capacity *concurrent-batches*)) + (indexes (list-indexes (target-db csv) + (target csv)))) + + ;; issue a performance warning against pre-existing indexes + (when indexes + (log-message :warning "Target table ~s has ~d indexes defined against it." + (target csv) (length indexes)) + (log-message :warning "That could impact loading performance badly")) (with-stats-collection ((target csv) :dbname (db-name (target-db csv))