Skip to content

Commit

Permalink
slack-vitess-r14.0.5-dsdefense: Add flag to select tx throttler tab…
Browse files Browse the repository at this point in the history
…let type (vitessio#12174) (#95)

* Add flag to select tx throttler tablet type (vitessio#12174)

Signed-off-by: Tim Vaillancourt <tim@timvaillancourt.com>

* Remove mistaken git add

Signed-off-by: Tim Vaillancourt <tim@timvaillancourt.com>

---------

Signed-off-by: Tim Vaillancourt <tim@timvaillancourt.com>
  • Loading branch information
timvaillancourt committed Jul 5, 2023
1 parent a67fd1c commit d7a37b3
Show file tree
Hide file tree
Showing 8 changed files with 128 additions and 39 deletions.
1 change: 1 addition & 0 deletions config/tablet/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ cacheResultFields: true # enable-query-plan-field-caching
# enable-tx-throttler
# tx-throttler-config
# tx-throttler-healthcheck-cells
# tx-throttler-tablet-types
# enable_transaction_limit
# enable_transaction_limit_dry_run
# transaction_limit_per_user
Expand Down
9 changes: 7 additions & 2 deletions doc/ReplicationLagBasedThrottlingOfTransactions.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,13 @@ If this is not specified a [default](https://github.com/vitessio/vitess/tree/mai
* *tx-throttler-healthcheck-cells*

A comma separated list of datacenter cells. The throttler will only monitor
the non-RDONLY replicas found in these cells for replication lag.
the replicas found in these cells for replication lag.

* *tx-throttler-tablet-types*

A comma separated list of tablet types. The throttler will only monitor tablets
with these types. Only `replica` and/or `rdonly` types are supported. The default
is `replica`.

# Caveats and Known Issues
* The throttler keeps trying to explore the maximum rate possible while keeping
Expand All @@ -39,4 +45,3 @@ lag limit may occasionally be slightly violated.

* Transactions are considered homogeneous. There is currently no support
for specifying how `expensive` a transaction is.

2 changes: 2 additions & 0 deletions go/flags/endtoend/vttablet.txt
Original file line number Diff line number Diff line change
Expand Up @@ -941,6 +941,8 @@ max_rate_approach_threshold: 0.9
Default priority assigned to queries that lack priority information.
--tx-throttler-healthcheck-cells value
Synonym to -tx_throttler_healthcheck_cells
--tx-throttler-tablet-types value
A comma-separated list of tablet types. Only tablets of this type are monitored for replication lag by the transaction throttler. Supported types are replica and/or rdonly. (default replica)
--tx_throttler_config string
The configuration of the transaction throttler as a text formatted throttlerdata.Configuration protocol buffer message (default target_replication_lag_sec: 2
max_replication_lag_sec: 10
Expand Down
46 changes: 23 additions & 23 deletions go/vt/mysqlctl/rice-box.go

Large diffs are not rendered by default.

40 changes: 36 additions & 4 deletions go/vt/vttablet/tabletserver/tabletenv/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ import (
"vitess.io/vitess/go/vt/dbconfigs"
"vitess.io/vitess/go/vt/log"
"vitess.io/vitess/go/vt/throttler"
"vitess.io/vitess/go/vt/topo/topoproto"
"vitess.io/vitess/go/vt/vterrors"

topodatapb "vitess.io/vitess/go/vt/proto/topodata"
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
)

// These constants represent values for various config parameters.
Expand Down Expand Up @@ -135,6 +140,7 @@ func init() {
flagutil.DualFormatStringVar(&currentConfig.TxThrottlerConfig, "tx_throttler_config", defaultConfig.TxThrottlerConfig, "The configuration of the transaction throttler as a text formatted throttlerdata.Configuration protocol buffer message")
flagutil.DualFormatStringListVar(&currentConfig.TxThrottlerHealthCheckCells, "tx_throttler_healthcheck_cells", defaultConfig.TxThrottlerHealthCheckCells, "A comma-separated list of cells. Only tabletservers running in these cells will be monitored for replication lag by the transaction throttler.")
flag.IntVar(&currentConfig.TxThrottlerDefaultPriority, "tx-throttler-default-priority", defaultConfig.TxThrottlerDefaultPriority, "Default priority assigned to queries that lack priority information.")
topoproto.TabletTypeListVar(&currentConfig.TxThrottlerTabletTypes, "tx-throttler-tablet-types", "A comma-separated list of tablet types. Only tablets of this type are monitored for replication lag by the transaction throttler. Supported types are replica and/or rdonly. (default replica)")

flag.BoolVar(&enableHotRowProtection, "enable_hot_row_protection", false, "If true, incoming transactions for the same row (range) will be queued and cannot consume all txpool slots.")
flag.BoolVar(&enableHotRowProtectionDryRun, "enable_hot_row_protection_dry_run", false, "If true, hot row protection is not enforced but logs if transactions would have been queued.")
Expand Down Expand Up @@ -241,6 +247,12 @@ func Init() {
if *txLogHandler != "" {
TxLogger.ServeLogs(*txLogHandler, streamlog.GetFormatter(TxLogger))
}

// HACK: set default ("replica") here because topoproto.TabletTypeListVar(...) defaults dont work
// and topoproto.TabletTypeListFlag doesn't exist in v14
if len(currentConfig.TxThrottlerTabletTypes) == 0 {
currentConfig.TxThrottlerTabletTypes = []topodatapb.TabletType{topodatapb.TabletType_REPLICA}
}
}

// TabletConfig contains all the configuration for query service
Expand Down Expand Up @@ -289,10 +301,11 @@ type TabletConfig struct {
TwoPCCoordinatorAddress string `json:"-"`
TwoPCAbandonAge Seconds `json:"-"`

EnableTxThrottler bool `json:"-"`
TxThrottlerConfig string `json:"-"`
TxThrottlerHealthCheckCells []string `json:"-"`
TxThrottlerDefaultPriority int `json:"-"`
EnableTxThrottler bool `json:"-"`
TxThrottlerConfig string `json:"-"`
TxThrottlerHealthCheckCells []string `json:"-"`
TxThrottlerDefaultPriority int `json:"-"`
TxThrottlerTabletTypes []topodatapb.TabletType `json:"-"`

EnableLagThrottler bool `json:"-"`

Expand Down Expand Up @@ -397,6 +410,9 @@ func (c *TabletConfig) Verify() error {
if err := c.verifyTransactionLimitConfig(); err != nil {
return err
}
if err := c.verifyTxThrottlerConfig(); err != nil {
return err
}
if v := c.HotRowProtection.MaxQueueSize; v <= 0 {
return fmt.Errorf("-hot_row_protection_max_queue_size must be > 0 (specified value: %v)", v)
}
Expand Down Expand Up @@ -442,6 +458,22 @@ func (c *TabletConfig) verifyTransactionLimitConfig() error {
return nil
}

// verifyTxThrottlerConfig checks the TxThrottler related config for sanity.
func (c *TabletConfig) verifyTxThrottlerConfig() error {
if len(c.TxThrottlerTabletTypes) == 0 {
return vterrors.New(vtrpcpb.Code_FAILED_PRECONDITION, "--tx-throttler-tablet-types must be defined when transaction throttler is enabled")
}
for _, tabletType := range c.TxThrottlerTabletTypes {
switch tabletType {
case topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY:
continue
default:
return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "unsupported tablet type %q", tabletType)
}
}
return nil
}

// Some of these values are for documentation purposes.
// They actually get overwritten during Init.
var defaultConfig = TabletConfig{
Expand Down
42 changes: 42 additions & 0 deletions go/vt/vttablet/tabletserver/tabletenv/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ import (

"vitess.io/vitess/go/cache"
"vitess.io/vitess/go/vt/dbconfigs"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
"vitess.io/vitess/go/vt/vterrors"
"vitess.io/vitess/go/yaml2"
)

Expand Down Expand Up @@ -240,6 +243,11 @@ func TestFlags(t *testing.T) {
assert.Equal(t, want.DB, currentConfig.DB)
assert.Equal(t, want, currentConfig)

// HACK: expect default ("replica") only after .Init() because topoproto.TabletTypeListVar(...)
// defaults dont work and it's fixed later in .Init()
Init()
want.TxThrottlerTabletTypes = []topodatapb.TabletType{topodatapb.TabletType_REPLICA}

// Simple Init.
Init()
want.OlapReadPool.IdleTimeoutSeconds = 1800
Expand Down Expand Up @@ -363,3 +371,37 @@ func TestFlags(t *testing.T) {
want.SanitizeLogMessages = true
assert.Equal(t, want, currentConfig)
}

func TestVerifyTxThrottlerConfig(t *testing.T) {
{
// default config (replica)
Init()
assert.Nil(t, currentConfig.verifyTxThrottlerConfig())
assert.Equal(t, []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, currentConfig.TxThrottlerTabletTypes)
}
{
// replica + rdonly (allowed)
Init()
currentConfig.TxThrottlerTabletTypes = []topodatapb.TabletType{
topodatapb.TabletType_REPLICA,
topodatapb.TabletType_RDONLY,
}
assert.Nil(t, currentConfig.verifyTxThrottlerConfig())
}
{
// no tablet types
Init()
currentConfig.TxThrottlerTabletTypes = []topodatapb.TabletType{}
err := currentConfig.verifyTxThrottlerConfig()
assert.NotNil(t, err)
assert.Equal(t, vtrpcpb.Code_FAILED_PRECONDITION, vterrors.Code(err))
}
{
// disallowed tablet type
Init()
currentConfig.TxThrottlerTabletTypes = []topodatapb.TabletType{topodatapb.TabletType_DRAINED}
err := currentConfig.verifyTxThrottlerConfig()
assert.NotNil(t, err)
assert.Equal(t, vtrpcpb.Code_INVALID_ARGUMENT, vterrors.Code(err))
}
}
24 changes: 15 additions & 9 deletions go/vt/vttablet/tabletserver/txthrottler/tx_throttler.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ type txThrottlerConfig struct {
// healthCheckCells stores the cell names in which running vttablets will be monitored for
// replication lag.
healthCheckCells []string

tabletTypes []topodatapb.TabletType
}

// txThrottlerState holds the state of an open TxThrottler object.
Expand Down Expand Up @@ -214,6 +216,7 @@ func tryCreateTxThrottler(env tabletenv.Env, topoServer *topo.Server) (*txThrott

return newTxThrottler(env, topoServer, &txThrottlerConfig{
enabled: true,
tabletTypes: env.Config().TxThrottlerTabletTypes,
throttlerConfig: &throttlerConfig,
healthCheckCells: healthCheckCells,
})
Expand Down Expand Up @@ -312,6 +315,7 @@ func newTxThrottlerState(topoServer *topo.Server, config *txThrottlerConfig, tar
return nil, err
}
result := &txThrottlerState{
config: config,
throttler: t,
}
result.healthCheck = healthCheckFactory()
Expand Down Expand Up @@ -362,16 +366,18 @@ func (ts *txThrottlerState) deallocateResources() {
ts.throttler = nil
}

// StatsUpdate is part of the LegacyHealthCheckStatsListener interface.
// StatsUpdate updates the health of a tablet with the given healthcheck.
func (ts *txThrottlerState) StatsUpdate(tabletStats *discovery.LegacyTabletStats) {
// Ignore PRIMARY and RDONLY stats.
// We currently do not monitor RDONLY tablets for replication lag. RDONLY tablets are not
// candidates for becoming primary during failover, and it's acceptable to serve somewhat
// stale date from these.
// TODO(erez): If this becomes necessary, we can add a configuration option that would
// determine whether we consider RDONLY tablets here, as well.
if tabletStats.Target.TabletType != topodatapb.TabletType_REPLICA {
if ts.config.tabletTypes == nil {
return
}
ts.throttler.RecordReplicationLag(time.Now(), tabletStats)

// Monitor tablets for replication lag if they have a tablet
// type specified by the --tx_throttler_tablet_types flag.
for _, expectedTabletType := range ts.config.tabletTypes {
if tabletStats.Target.TabletType == expectedTabletType {
ts.throttler.RecordReplicationLag(time.Now(), tabletStats)
return
}
}
}
3 changes: 2 additions & 1 deletion go/vt/vttablet/tabletserver/txthrottler/tx_throttler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,9 @@ func TestEnabledThrottler(t *testing.T) {
config := tabletenv.NewDefaultConfig()
config.EnableTxThrottler = true
config.TxThrottlerHealthCheckCells = []string{"cell1", "cell2"}
env := tabletenv.NewEnv(config, t.Name())
config.TxThrottlerTabletTypes = []topodatapb.TabletType{topodatapb.TabletType_REPLICA}

env := tabletenv.NewEnv(config, t.Name())
throttler, err := tryCreateTxThrottler(env, ts)
assert.Nil(t, err)
throttler.InitDBConfig(&querypb.Target{
Expand Down

0 comments on commit d7a37b3

Please sign in to comment.