diff --git a/cmd/keeper/cmd/keeper.go b/cmd/keeper/cmd/keeper.go index ba2f1352e..ec4c9a53b 100644 --- a/cmd/keeper/cmd/keeper.go +++ b/cmd/keeper/cmd/keeper.go @@ -1547,6 +1547,7 @@ func (p *PostgresKeeper) postgresKeeperSM(pctx context.Context) { } needsReload := false + changedParams := pgParameters.Diff(pgm.CurParameters()) if !pgParameters.Equals(pgm.CurParameters()) { log.Infow("postgres parameters changed, reloading postgres instance") @@ -1588,6 +1589,23 @@ func (p *PostgresKeeper) postgresKeeperSM(pctx context.Context) { if err := pgm.Reload(); err != nil { log.Errorw("failed to reload postgres instance", err) } + + clusterSpec := cd.Cluster.DefSpec() + automaticPgRestartEnabled := *clusterSpec.AutomaticPgRestart + + if automaticPgRestartEnabled { + needsRestart, err := pgm.IsRestartRequired(changedParams) + if err != nil { + log.Errorw("Failed to checked if restart is required", err) + } + + if needsRestart { + log.Infow("Restarting postgres") + if err := pgm.Restart(true); err != nil { + log.Errorw("Failed to restart postgres instance", err) + } + } + } } // If we are here, then all went well and we can update the db generation and save it locally diff --git a/doc/cluster_spec.md b/doc/cluster_spec.md index 758a2eaec..5afc415eb 100644 --- a/doc/cluster_spec.md +++ b/doc/cluster_spec.md @@ -36,7 +36,8 @@ Some options in a running cluster specification can be changed to update the des | pitrConfig | configuration for initMode of type "pitr" | if initMode is "pitr" | PITRConfig | | | standbyConfig | standby config when the cluster is a standby cluster | if role is "standby" | StandbyConfig | | | pgParameters | a map containing the postgres server parameters and their values. The parameters value don't have to be quoted and single quotes don't have to be doubled since this is already done by the keeper when writing the postgresql.conf file | no | map[string]string | | -| pgHBA | a list containing additional pg_hba.conf entries. They will be added to the pg_hba.conf generated by stolon. **NOTE**: these lines aren't validated so if some of them are wrong postgres will refuse to start or, on reload, will log a warning and ignore the updated pg_hba.conf file | no | []string | null. Will use the default behiavior of accepting connections from all hosts for all dbs and users with md5 password authentication | +| pgHBA | a list containing additional pg_hba.conf entries. They will be added to the pg_hba.conf generated by stolon. **NOTE**: these lines aren't validated so if some of them are wrong postgres will refuse to start or, on reload, will log a warning and ignore the updated pg_hba.conf file | no | []string | null. Will use the default behiavior of accepting connections from all hosts for all dbs and users with md5 password authentication | +| automaticPgRestart | restart postgres automatically after changing the pgParameters that requires restart. Refer `pending_restart` in [pg_settings](https://www.postgresql.org/docs/9.5/static/view-pg-settings.html) | no | bool | false | #### ExistingConfig diff --git a/doc/postgres_parameters.md b/doc/postgres_parameters.md index 578b8f18a..68def26a6 100644 --- a/doc/postgres_parameters.md +++ b/doc/postgres_parameters.md @@ -48,3 +48,7 @@ To disable this behavior just set `mergePgParameters` to false in the cluster sp Since postgresql.auto.conf overrides postgresql.conf parameters, changing some of them with ALTER SYSTEM could break the cluster (parameters managed by stolon could be overridden) and make pg parameters different between the instances. To avoid this stolon disables the execution of ALTER SYSTEM commands making postgresql.auto.conf a symlink to /dev/null. When an ALTER SYSTEM command is executed it'll return an error. + +## Restart postgres on changing some pg parameters + +There are some pg parameters which requires postgres restart to take effect after changing. For example, changing `max_connections` will not take effect till the underlying postgres is restarted. This is disabled by default and can be enabled using the clusterSpecification `automaticPgRestart`. This is achieved using `pending_restart` in [pg_settings](https://www.postgresql.org/docs/9.5/static/view-pg-settings.html) for postgres 9.5 and above and the `context` column of `pg_settings` for lower versions (<= 9.4). \ No newline at end of file diff --git a/internal/cluster/cluster.go b/internal/cluster/cluster.go index 517567a80..dbacae135 100644 --- a/internal/cluster/cluster.go +++ b/internal/cluster/cluster.go @@ -69,6 +69,7 @@ const ( DefaultMergePGParameter = true DefaultRole ClusterRole = ClusterRoleMaster DefaultSUReplAccess SUReplAccessMode = SUReplAccessAll + DefaultAutomaticPgRestart = false ) const ( @@ -283,6 +284,8 @@ type ClusterSpec struct { // Additional pg_hba.conf entries // we don't set omitempty since we want to distinguish between null or empty slice PGHBA []string `json:"pgHBA"` + // Enable automatic pg restart when pg parameters that requires restart changes + AutomaticPgRestart *bool `json:"automaticPgRestart"` } type ClusterStatus struct { @@ -391,6 +394,9 @@ func (os *ClusterSpec) WithDefaults() *ClusterSpec { v := DefaultRole s.Role = &v } + if s.AutomaticPgRestart == nil { + s.AutomaticPgRestart = BoolP(DefaultAutomaticPgRestart) + } return s } diff --git a/internal/common/common.go b/internal/common/common.go index fe1dc8346..8e72666b1 100644 --- a/internal/common/common.go +++ b/internal/common/common.go @@ -73,6 +73,23 @@ func (s Parameters) Equals(is Parameters) bool { return reflect.DeepEqual(s, is) } +// Diff returns the list of pgParameters changed(newly added, existing deleted and value changed) +func (s Parameters) Diff(newParams Parameters) []string { + var changedParams []string + for k, v := range newParams { + if val, ok := s[k]; !ok || v != val { + changedParams = append(changedParams, k) + } + } + + for k := range s { + if _, ok := newParams[k]; !ok { + changedParams = append(changedParams, k) + } + } + return changedParams +} + // WriteFileAtomicFunc atomically writes a file, it achieves this by creating a // temporary file and then moving it. writeFunc is the func that will write // data to the file. diff --git a/internal/common/common_test.go b/internal/common/common_test.go new file mode 100644 index 000000000..5855a1f62 --- /dev/null +++ b/internal/common/common_test.go @@ -0,0 +1,44 @@ +// Copyright 2018 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package common_test + +import ( + "testing" + + "github.com/sorintlab/stolon/internal/common" + "github.com/sorintlab/stolon/internal/util" +) + +func TestDiffReturnsChangedParams(t *testing.T) { + var curParams common.Parameters = map[string]string{ + "max_connections": "100", + "shared_buffers": "10MB", + "huge": "off", + } + + var newParams common.Parameters = map[string]string{ + "max_connections": "200", + "shared_buffers": "10MB", + "work_mem": "4MB", + } + + expectedDiff := []string{"max_connections", "huge", "work_mem"} + + diff := curParams.Diff(newParams) + + if !util.CompareStringSliceNoOrder(expectedDiff, diff) { + t.Errorf("Expected diff is %v, but got %v", expectedDiff, diff) + } +} diff --git a/internal/postgresql/postgresql.go b/internal/postgresql/postgresql.go index de44350ae..c7e0ab4a4 100644 --- a/internal/postgresql/postgresql.go +++ b/internal/postgresql/postgresql.go @@ -924,3 +924,20 @@ func (p *Manager) OlderWalFile() (string, error) { return "", nil } + +// IsRestartRequired returns if a postgres restart is necessary +func (p *Manager) IsRestartRequired(changedParams []string) (bool, error) { + maj, min, err := p.BinaryVersion() + if err != nil { + return false, fmt.Errorf("Error fetching pg version for checking IsRestartRequired, %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), p.requestTimeout) + defer cancel() + + if maj == 9 && min < 5 { + return isRestartRequiredUsingPgSettingsContext(ctx, p.localConnParams, changedParams) + } else { + return isRestartRequiredUsingPendingRestart(ctx, p.localConnParams) + } +} diff --git a/internal/postgresql/utils.go b/internal/postgresql/utils.go index ac6516700..c92078586 100644 --- a/internal/postgresql/utils.go +++ b/internal/postgresql/utils.go @@ -27,7 +27,7 @@ import ( "os" - _ "github.com/lib/pq" + "github.com/lib/pq" ) const ( @@ -436,6 +436,56 @@ func getConfigFilePGParameters(ctx context.Context, connParams ConnParams) (comm return pgParameters, nil } +func isRestartRequiredUsingPendingRestart(ctx context.Context, connParams ConnParams) (bool, error) { + isRestartRequired := false + db, err := sql.Open("postgres", connParams.ConnString()) + if err != nil { + return isRestartRequired, err + } + defer db.Close() + + rows, err := query(ctx, db, "select count(*) > 0 from pg_settings where pending_restart;") + if err != nil { + return isRestartRequired, err + } + defer rows.Close() + if rows.Next() { + if err := rows.Scan(&isRestartRequired); err != nil { + return isRestartRequired, err + } + } + + return isRestartRequired, nil +} + +func isRestartRequiredUsingPgSettingsContext(ctx context.Context, connParams ConnParams, changedParams []string) (bool, error) { + isRestartRequired := false + db, err := sql.Open("postgres", connParams.ConnString()) + if err != nil { + return isRestartRequired, err + } + defer db.Close() + + stmt, err := db.Prepare("select count(*) > 0 from pg_settings where context = 'postmaster' and name = ANY($1)") + + if err != nil { + return false, err + } + + rows, err := stmt.Query(pq.Array(changedParams)) + if err != nil { + return isRestartRequired, err + } + defer rows.Close() + if rows.Next() { + if err := rows.Scan(&isRestartRequired); err != nil { + return isRestartRequired, err + } + } + + return isRestartRequired, nil +} + func ParseBinaryVersion(v string) (int, int, error) { // extact version (removing beta*, rc* etc...) regex, err := regexp.Compile(`.* \(PostgreSQL\) ([0-9\.]+).*`) diff --git a/tests/integration/config_test.go b/tests/integration/config_test.go index 047a5690e..1a232a5e0 100644 --- a/tests/integration/config_test.go +++ b/tests/integration/config_test.go @@ -462,3 +462,127 @@ func TestAdditionalReplicationSlots(t *testing.T) { t.Fatalf("unexpected err: %v", err) } } + +func TestAutomaticPgRestart(t *testing.T) { + t.Parallel() + + dir, err := ioutil.TempDir("", "") + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + defer os.RemoveAll(dir) + + tstore, err := NewTestStore(t, dir) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if err := tstore.Start(); err != nil { + t.Fatalf("unexpected err: %v", err) + } + if err := tstore.WaitUp(10 * time.Second); err != nil { + t.Fatalf("error waiting on store up: %v", err) + } + storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) + defer tstore.Stop() + + clusterName := uuid.NewV4().String() + + storePath := filepath.Join(common.StorePrefix, clusterName) + + sm := store.NewKVBackedStore(tstore.store, storePath) + automaticPgRestart := true + pgParameters := map[string]string{"max_connections": "100"} + + initialClusterSpec := &cluster.ClusterSpec{ + InitMode: cluster.ClusterInitModeP(cluster.ClusterInitModeNew), + AutomaticPgRestart: &automaticPgRestart, + PGParameters: pgParameters, + } + + initialClusterSpecFile, err := writeClusterSpec(dir, initialClusterSpec) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + ts, err := NewTestSentinel(t, dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-spec=%s", initialClusterSpecFile)) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if err := ts.Start(); err != nil { + t.Fatalf("unexpected err: %v", err) + } + defer ts.Stop() + + tk, err := NewTestKeeper(t, dir, clusterName, pgSUUsername, pgSUPassword, pgReplUsername, pgReplPassword, tstore.storeBackend, storeEndpoints) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if err := tk.Start(); err != nil { + t.Fatalf("unexpected err: %v", err) + } + defer tk.Stop() + + if err := WaitClusterPhase(sm, cluster.ClusterPhaseNormal, 60*time.Second); err != nil { + t.Fatalf("unexpected err: %v", err) + } + if err := tk.WaitDBUp(60 * time.Second); err != nil { + t.Fatalf("unexpected err: %v", err) + } + + err = StolonCtl(clusterName, tstore.storeBackend, storeEndpoints, "update", "--patch", `{ "pgParameters" : { "max_connections": "150" } }`) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + // Wait for restart to happen + time.Sleep(10 * time.Second) + + rows, err := tk.Query("select setting from pg_settings where name = 'max_connections'") + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + defer rows.Close() + + if rows.Next() { + var maxConnections int + err = rows.Scan(&maxConnections) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + if maxConnections != 150 { + t.Errorf("expected max_connections %d is not equal to actual %d", 150, maxConnections) + } + } + + // Allow users to opt out + err = StolonCtl(clusterName, tstore.storeBackend, storeEndpoints, "update", "--patch", `{ "automaticPgRestart" : false }`) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + err = StolonCtl(clusterName, tstore.storeBackend, storeEndpoints, "update", "--patch", `{ "pgParameters" : { "max_connections": "200" } }`) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + // Restart should not happen, but waiting in case it restarts + time.Sleep(10 * time.Second) + + rows, err = tk.Query("select setting from pg_settings where name = 'max_connections'") + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + defer rows.Close() + + if rows.Next() { + var maxConnections int + err = rows.Scan(&maxConnections) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + if maxConnections != 150 { + t.Errorf("expected max_connections %d is not equal to actual %d", 150, maxConnections) + } + } +}