Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

WIP: The Great Configuration Variables Exodus (deprecation) #150

Merged
merged 28 commits into from
May 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
b5c2c5e
The Great Configuration Variables Exodus (deprecation)
Apr 13, 2017
1747989
deprecating DatabaselessMode__experimental
Apr 13, 2017
268660a
deprecating DatabaselessMode__experimental
Apr 13, 2017
5667309
deperecated BufferBinlogEvents
Apr 13, 2017
c4f30b2
deprecating BinlogFileHistoryDays
Apr 13, 2017
dac461f
deprecating MaintenanceOwner
Apr 13, 2017
63cb66c
deprecating ReadLongRunningQueries
Apr 13, 2017
54e6d14
deprecating DiscoveryPollSeconds
Apr 13, 2017
d79a4ef
deprecating ActiveNodeExpireSeconds
Apr 13, 2017
9db2688
deprecating AuditPageSize
Apr 13, 2017
632f0b6
deprecating AuditPurgeDays
Apr 13, 2017
4b2bedc
deprecating SlaveStartPostWaitMilliseconds
Apr 14, 2017
0288031
listing down retired config variables; this will be used for issuing …
Apr 15, 2017
3b8cbd6
Merge branch 'master' into config-variables-exodus
Apr 18, 2017
f1c41eb
Merge branch 'master' into config-variables-exodus
Apr 19, 2017
1077bfa
Merge branch 'master' into config-variables-exodus
Apr 20, 2017
fb425c7
AuthenticationMethod default to none
Apr 20, 2017
bc4dbd1
Merge branch 'master' into config-variables-exodus
Apr 22, 2017
9e1cd18
MySQLTopologyMaxPoolConnections is now a constant
Apr 23, 2017
27038e1
deprecated configuration list updated
Apr 23, 2017
c409bd8
MaintenancePurgeDays is a constant
Apr 23, 2017
eb5f349
MaintenanceExpireMinutes is a constant
Apr 23, 2017
1a8fcb0
deprecated HttpTimeoutSeconds
Apr 23, 2017
7962922
DiscoverAgentInstance is deprecated. New logic works as if it were 't…
Apr 23, 2017
6a896b0
PseudoGTIDCoordinatesHistoryHeuristicMinutes is a constant
Apr 23, 2017
db9f76b
Merge branch 'config-variables-exodus' of github.com:github/orchestra…
Apr 23, 2017
2e543dc
Merge branch 'master' into config-variables-exodus
May 21, 2017
4c16482
fixed merge
May 21, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 0 additions & 14 deletions conf/orchestrator-sample.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
"MySQLTopologySSLCAFile": "",
"MySQLTopologySSLSkipVerify": true,
"MySQLTopologyUseMutualTLS": false,
"MySQLTopologyMaxPoolConnections": 3,
"DatabaselessMode__experimental": false,
"MySQLOrchestratorHost": "127.0.0.1",
"MySQLOrchestratorPort": 3306,
"MySQLOrchestratorDatabase": "orchestrator",
Expand All @@ -25,14 +23,11 @@
"MySQLOrchestratorUseMutualTLS": false,
"MySQLConnectTimeoutSeconds": 1,
"DefaultInstancePort": 3306,
"SlaveStartPostWaitMilliseconds": 1000,
"DiscoverByShowSlaveHosts": true,
"InstancePollSeconds": 5,
"ReadLongRunningQueries": true,
"UnseenInstanceForgetHours": 240,
"SnapshotTopologiesIntervalHours": 0,
"InstanceBulkOperationsWaitTimeoutSeconds": 10,
"ActiveNodeExpireSeconds": 5,
"HostnameResolveMethod": "default",
"MySQLHostnameResolveMethod": "@@hostname",
"SkipBinlogServerUnresolveCheck": true,
Expand All @@ -41,15 +36,10 @@
"ReasonableReplicationLagSeconds": 10,
"ProblemIgnoreHostnameFilters": [],
"VerifyReplicationFilters": false,
"MaintenanceOwner": "orchestrator",
"ReasonableMaintenanceReplicationLagSeconds": 20,
"MaintenanceExpireMinutes": 10,
"MaintenancePurgeDays": 365,
"CandidateInstanceExpireMinutes": 60,
"AuditLogFile": "",
"AuditToSyslog": false,
"AuditPageSize": 20,
"AuditPurgeDays": 365,
"RemoveTextFromHostnameDisplay": ".mydomain.com:3306",
"ReadOnly": false,
"AuthenticationMethod": "",
Expand Down Expand Up @@ -91,19 +81,15 @@
"StatusEndpoint": "/api/status",
"StatusSimpleHealth": true,
"StatusOUVerify": false,
"HttpTimeoutSeconds": 60,
"AgentPollMinutes": 60,
"AgentAutoDiscover": false,
"UnseenAgentForgetHours": 6,
"StaleSeedFailMinutes": 60,
"SeedAcceptableBytesDiff": 8192,
"PseudoGTIDPattern": "",
"PseudoGTIDPatternIsFixedSubstring": false,
"PseudoGTIDMonotonicHint": "asc:",
"DetectPseudoGTIDQuery": "",
"PseudoGTIDCoordinatesHistoryHeuristicMinutes": 2,
"BinlogEventsChunkSize": 10000,
"BufferBinlogEvents": true,
"SkipBinlogEventsContaining": [],
"ReduceReplicationAnalysisCount": true,
"FailureDetectionPeriodBlockMinutes": 60,
Expand Down
2 changes: 0 additions & 2 deletions conf/orchestrator-simple.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
"#": "-----",
"DiscoverByShowSlaveHosts": true,
"InstancePollSeconds": 5,
"ReadLongRunningQueries": true,
"HostnameResolveMethod": "default",
"MySQLHostnameResolveMethod": "@@hostname",
"ReasonableReplicationLagSeconds": 10,
Expand Down Expand Up @@ -48,7 +47,6 @@
"PseudoGTIDPatternIsFixedSubstring": false,
"PseudoGTIDMonotonicHint": "asc:",
"DetectPseudoGTIDQuery": "",
"PseudoGTIDCoordinatesHistoryHeuristicMinutes": 2,
"#": "-----",
"#": "Recovery related config:",
"#": "-----",
Expand Down
10 changes: 0 additions & 10 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ The following is an incomplete list of configuration parameters. "Complete" is a
* `MySQLTopologyUser` (string), credentials for replication topology servers (masters & replicas)
* `MySQLTopologyPassword` (string), credentials for replication topology servers (masters & replicas)
* `MySQLTopologyCredentialsConfigFile` (string), as an alternative to providing `MySQLTopologyUser`, `MySQLTopologyPassword`, name of file in `my.cnf`-like format where credentials are stored.
* `MySQLTopologyMaxPoolConnections` (int), Max concurrent connections on any topology instance
* `MySQLOrchestratorHost` (string), hostname for backend MySQL server
* `MySQLOrchestratorPort` (uint), port for backend MySQL server
* `MySQLOrchestratorDatabase` (string), name of backend MySQL server schema
Expand All @@ -21,25 +20,18 @@ The following is an incomplete list of configuration parameters. "Complete" is a
* `SkipOrchestratorDatabaseUpdate` (bool), When false, orchestrator will attempt to create & update all tables in backend database; when true, this is skipped. It makes sense to skip on command-line invocations and to enable for http or occasional invocations, or just after upgrades
* `ReplicaLagQuery` (string), custom query to check on replica lg (e.g. heartbeat table). If unprovided,
replica's `Seconds_Behind_Master` is used.
* `SlaveStartPostWaitMilliseconds` (int), Time to wait after `START SLAVE` before re-reading instance (give replica chance to connect to master)
* `DiscoverByShowSlaveHosts` (bool), Attempt `SHOW SLAVE HOSTS` before `SHOW PROCESSLIST`
* `InstancePollSeconds` (uint), Number of seconds between instance reads
* `UnseenInstanceForgetHours` (uint), Number of hours after which an unseen instance is forgotten
* `DiscoveryPollSeconds` (uint), Auto/continuous discovery of instances sleep time between polls
* `DiscoveryQueueMaxStatisticsSize` (int), Maximum number of secondly discovery queue statistic entries to keep
* `InstanceBulkOperationsWaitTimeoutSeconds` (uint), Time to wait on a single instance when doing bulk (many instances) operation
* `ActiveNodeExpireSeconds` (uint), Maximum time to wait for active node to send keepalive before attempting to take over as active node.
* `HostnameResolveMethod` (string), Type of hostname resolve method (either `"none"` or `"cname"`)
* `ExpiryHostnameResolvesMinutes` (int), Number of minute after which a hostname resolve expires (hostname resolve are cached for up to this number of minutes)
* `RejectHostnameResolvePattern` (string), Regexp pattern for resolved hostname that will not be accepted (not cached, not written to db). This is done to avoid storing wrong resolves due to network glitches.
* `ReasonableReplicationLagSeconds` (int), Above this value is considered a problem
* `VerifyReplicationFilters` (bool), Include replication filters check before approving topology refactoring (e.g. `orchestrator` will not allow placing a non-filteres replica under a filtered one)
* `MaintenanceOwner` (string), (Default) name of maintenance owner to use if none provided
* `ReasonableMaintenanceReplicationLagSeconds` (int), Above this value move-up and move-below are blocked
* `MaintenanceExpireMinutes` (int), Minutes after which a maintenance flag is considered stale and is cleared
* `MaintenancePurgeDays` (int), Days after which maintenance entries are purged from the database
* `AuditLogFile` (string), Name of log file for audit operations. Disabled when empty.
* `AuditPageSize` (int), Number of entries in an audit page
* `RemoveTextFromHostnameDisplay` (string), Text to strip off the hostname on cluster/clusters pages. Save pixels (e.g. `mycompany.com`)
* `ReadOnly` (bool) When `"true"`, no write operations (e.g. stopping a replica, repointing replicas, discovering) are allowed
* `AuthenticationMethod` (string), type of authentication. Either empty (no authentication, default), `"basic"`, `"multi"` or `"proxy"`. See [Security](#security) section.
Expand Down Expand Up @@ -71,15 +63,13 @@ The following is an incomplete list of configuration parameters. "Complete" is a
* `StatusEndpoint` (string), Override the status endpoint. Defaults to `/api/status`
* `StatusSimpleHealth` (bool), If true, calling the status endpoint will use the simplified health check
* `StatusOUVerify` (bool), If true, try to verify OUs when Mutual TLS is on. Defaults to false
* `HttpTimeoutSeconds` (int), HTTP GET request timeout (when connecting to _orchestrator-agent_)
* `AgentPollMinutes` (uint), interval at which *orchestrator* contacts agents for brief status update
* `UnseenAgentForgetHours` (uint), time without contact after which an agent is forgotten
* `StaleSeedFailMinutes` (uint), time after which a seed with no state update is considered to be failed
* `PseudoGTIDPattern` (string), Pattern to look for in binary logs that makes for a unique entry (pseudo GTID). When empty, Pseudo-GTID based refactoring is disabled.
* `PseudoGTIDMonotonicHint` (string), Optional, subtring in Pseudo-GTID entry which indicates Pseudo-GTID entries are expected to be monotonically increasing
* `DetectPseudoGTIDQuery` (string), Optional query which is used to authoritatively decide whether pseudo gtid is enabled on instance
* `BinlogEventsChunkSize` (int), Chunk size (X) for `SHOW BINLOG|RELAYLOG EVENTS LIMIT ?,X` statements. Smaller means less locking and more work to be done. Recommendation: keep `10000` or below, due to locking issues.
* `BufferBinlogEvents` (bool), Should we used buffered read on `SHOW BINLOG|RELAYLOG EVENTS` -- releases the database lock sooner (recommended).
* `RecoveryPeriodBlockSeconds` (int), The time for which an instance's recovery is kept "active", so as to avoid concurrent recoveries on smae instance as well as flapping
* `RecoveryIgnoreHostnameFilters` ([]string), Recovery analysis will completely ignore hosts matching given patterns
* `RecoverMasterClusterFilters` ([]string), Only do master recovery on clusters matching these regexp patterns (of course the ``.*`` pattern matches everything)
Expand Down
2 changes: 1 addition & 1 deletion docs/developers.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ Replace `orch_host` with hostname or orchestrator machine (or do your wildcards

Go to the `Discovery` page at `http://localhost:3000/web/discover`. Type in a hostname & port for a known MySQL instance, preferably one that is part of a larger topology (again I like using _MySQLSandbox_ for such test environments). Submit it.

Depending on your configuration (`DiscoveryPollSeconds`, `InstancePollSeconds`) this may take a few seconds to a minute for
Depending on your configuration (`InstancePollSeconds`) this may take a few seconds to a minute for
`orchestrator` to fully scan the replication topology this instance belongs to, and present it under the [clusters dashboard](http://localhost:3000/web/clusters/).

If you've made it this far, you've done 90% of the work. You may consider configuring Pseudo GTID queries, DC awareness etc. See
Expand Down
6 changes: 2 additions & 4 deletions go/agent/agent_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func InitHttpClient() {
return
}

httpTimeout := time.Duration(time.Duration(config.Config.HttpTimeoutSeconds) * time.Second)
httpTimeout := time.Duration(time.Duration(config.AgentHttpTimeoutSeconds) * time.Second)
dialTimeout := func(network, addr string) (net.Conn, error) {
return net.DialTimeout(network, addr, httpTimeout)
}
Expand Down Expand Up @@ -120,9 +120,7 @@ func SubmitAgent(hostname string, port int, token string) (string, error) {
}

// Try to discover topology instances when an agent submits
if config.Config.AgentAutoDiscover {
DiscoverAgentInstance(hostname, port)
}
go DiscoverAgentInstance(hostname, port)

return hostname, err
}
Expand Down
10 changes: 5 additions & 5 deletions go/app/prompt.go
Original file line number Diff line number Diff line change
Expand Up @@ -713,19 +713,19 @@ Cheatsheet:
to possible chaos). Locks are placed in the backend database, and so multiple orchestrator instances are safe.
Operations automatically acquire locks and release them. This command manually acquires a lock, and will
block other operations on the instance until lock is released.
Note that orchestrator automatically assumes locks to be expired after MaintenanceExpireMinutes (in config).
Note that orchestrator automatically assumes locks to be expired after MaintenanceExpireMinutes (hard coded value).
Examples:

orchestrator -c begin-maintenance -i instance.to.lock.com --duration=3h --reason="load testing; do not disturb"
accepted duration format: 10s, 30m, 24h, 3d, 4w

orchestrator -c begin-maintenance -i instance.to.lock.com --reason="load testing; do not disturb"
--duration not given; default to config's MaintenanceExpireMinutes
--duration not given; default to MaintenanceExpireMinutes (hard coded value)

end-maintenance
Remove maintenance lock; such lock may have been gained by an explicit begin-maintenance command implicitly
by a topology change. You should generally only remove locks you have placed manually; orchestrator will
automatically expire locks after MaintenanceExpireMinutes (in config).
automatically expire locks after MaintenanceExpireMinutes (hard coded value).
Example:

orchestrator -c end-maintenance -i locked.instance.com
Expand All @@ -737,14 +737,14 @@ Cheatsheet:
Downtime is different than maintanence in that it places no lock (mainenance uses an exclusive lock on the instance).
It is OK to downtime an instance that is already downtimed -- the new begin-downtime command will override whatever
previous downtime attributes there were on downtimes instance.
Note that orchestrator automatically assumes downtime to be expired after MaintenanceExpireMinutes (in config).
Note that orchestrator automatically assumes downtime to be expired after MaintenanceExpireMinutes (hard coded value).
Examples:

orchestrator -c begin-downtime -i instance.to.downtime.com --duration=3h --reason="dba handling; do not do recovery"
accepted duration format: 10s, 30m, 24h, 3d, 4w

orchestrator -c begin-downtime -i instance.to.lock.com --reason="dba handling; do not do recovery"
--duration not given; default to config's MaintenanceExpireMinutes
--duration not given; default to MaintenanceExpireMinutes (hard coded value)

end-downtime
Indicate an instance is no longer downtimed. Typically you should not need to use this since
Expand Down
4 changes: 0 additions & 4 deletions go/cmd/orchestrator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ func main() {
debug := flag.Bool("debug", false, "debug mode (very verbose)")
stack := flag.Bool("stack", false, "add stack trace upon error")
config.RuntimeCLIFlags.SkipBinlogSearch = flag.Bool("skip-binlog-search", false, "when matching via Pseudo-GTID, only use relay logs. This can save the hassle of searching for a non-existend pseudo-GTID entry, for example in servers with replication filters.")
config.RuntimeCLIFlags.Databaseless = flag.Bool("databaseless", false, "EXPERIMENTAL! Work without backend database")
config.RuntimeCLIFlags.SkipUnresolve = flag.Bool("skip-unresolve", false, "Do not unresolve a host name")
config.RuntimeCLIFlags.SkipUnresolveCheck = flag.Bool("skip-unresolve-check", false, "Skip/ignore checking an unresolve mapping (via hostname_unresolve table) resolves back to same hostname")
config.RuntimeCLIFlags.Noop = flag.Bool("noop", false, "Dry run; do not perform destructing operations")
Expand Down Expand Up @@ -112,9 +111,6 @@ func main() {
} else {
config.Read("/etc/orchestrator.conf.json", "conf/orchestrator.conf.json", "orchestrator.conf.json")
}
if *config.RuntimeCLIFlags.Databaseless {
config.Config.DatabaselessMode__experimental = true
}
if config.Config.Debug {
log.SetLevel(log.DEBUG)
}
Expand Down
1 change: 0 additions & 1 deletion go/config/cli_flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ type CLIFlags struct {
SkipUnresolve *bool
SkipUnresolveCheck *bool
BinlogFile *string
Databaseless *bool
GrabElection *bool
Version *bool
Statement *string
Expand Down
Loading