Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

*: support MockAutoScaler and AWSAutoScaler #40729

Merged
merged 31 commits into from
Feb 1, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
06dd82c
*: support fetch topo from AutoScaler
guo-shaoge Jan 12, 2023
c7b60f3
fix batch_coprocessor
guo-shaoge Jan 12, 2023
2b1badd
Merge branch 'master' of github.com:pingcap/tidb into autoscaler
guo-shaoge Jan 12, 2023
50439a7
fix rpcCtx.Meta nil
guo-shaoge Jan 13, 2023
ba260d0
tmp safe AWSTopoFetcher
guo-shaoge Jan 15, 2023
f3cdf84
Merge branch 'master' of github.com:pingcap/tidb into autoscaler
guo-shaoge Jan 15, 2023
95ad661
add AWSTopoFetcher
guo-shaoge Jan 17, 2023
bdb6b51
batch update same with(cse: 88bfe6e6ef4062b517e04d47bbc09751cc0bca0d)
guo-shaoge Jan 19, 2023
51916c9
fix review
guo-shaoge Jan 19, 2023
dac3d54
fix typo
guo-shaoge Jan 19, 2023
114af33
fix bazel_lint
guo-shaoge Jan 19, 2023
d280e17
refine topo fetcher err msg(cse:70feb371993e38ed92556ce602793b5a53e32…
guo-shaoge Jan 20, 2023
7c132a3
fix fmt
guo-shaoge Jan 20, 2023
61700de
remove using reflect
guo-shaoge Jan 28, 2023
1734140
update TestASType related code
guo-shaoge Jan 28, 2023
dbcad3a
update batch of trivial fix
guo-shaoge Jan 28, 2023
a092fdd
Merge branch 'master' of github.com:pingcap/tidb into autoscaler
guo-shaoge Jan 28, 2023
d6617ab
Merge branch 'master' of github.com:pingcap/tidb into autoscaler
guo-shaoge Jan 31, 2023
45fa4f7
Merge branch 'master' into autoscaler
bestwoody Jan 31, 2023
338229b
change config name: cluster-name -> autoscaler-cluster-id
guo-shaoge Jan 31, 2023
00f6aa6
Merge branch 'autoscaler' of github.com:guo-shaoge/tidb into autoscaler
guo-shaoge Jan 31, 2023
dfaf69c
update config.toml.example
guo-shaoge Jan 31, 2023
fabf2ac
Merge branch 'master' of github.com:pingcap/tidb into autoscaler
guo-shaoge Feb 1, 2023
92fed8f
Merge branch 'master' into autoscaler
guo-shaoge Feb 1, 2023
ed0cf4c
fix unit-test
guo-shaoge Feb 1, 2023
856cabd
Merge branch 'master' into autoscaler
guo-shaoge Feb 1, 2023
582ba44
fix comment
guo-shaoge Feb 1, 2023
3fe60b5
Merge branch 'master' into autoscaler
ti-chi-bot Feb 1, 2023
e345bd9
Merge branch 'autoscaler' of github.com:guo-shaoge/tidb into autoscaler
guo-shaoge Feb 1, 2023
e93ff8a
Merge branch 'master' into autoscaler
guo-shaoge Feb 1, 2023
1396d1a
Merge branch 'master' into autoscaler
ti-chi-bot Feb 1, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions config/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ go_library(
"//br/pkg/streamhelper/config",
"//parser/terror",
"//util/logutil",
"//util/tiflashcompute",
"//util/tikvutil",
"//util/versioninfo",
"@com_github_burntsushi_toml//:toml",
Expand Down
56 changes: 55 additions & 1 deletion config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"os"
"os/user"
"path/filepath"
"reflect"
"sort"
"strings"
"sync"
Expand All @@ -35,6 +36,7 @@ import (
logbackupconf "github.com/pingcap/tidb/br/pkg/streamhelper/config"
"github.com/pingcap/tidb/parser/terror"
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tidb/util/tiflashcompute"
"github.com/pingcap/tidb/util/tikvutil"
"github.com/pingcap/tidb/util/versioninfo"
tikvcfg "github.com/tikv/client-go/v2/config"
Expand Down Expand Up @@ -93,6 +95,8 @@ const (
DefAuthTokenRefreshInterval = time.Hour
// EnvVarKeyspaceName is the system env name for keyspace name.
EnvVarKeyspaceName = "KEYSPACE_NAME"
// ConfigKeyspaceFieldName is the struct field name Config.KeyspaceName.
ConfigKeyspaceFieldName = "KeyspaceName"
)

// Valid config maps
Expand Down Expand Up @@ -287,7 +291,14 @@ type Config struct {
Plugin Plugin `toml:"plugin" json:"plugin"`
MaxServerConnections uint32 `toml:"max-server-connections" json:"max-server-connections"`
RunDDL bool `toml:"run-ddl" json:"run-ddl"`
DisaggregatedTiFlash bool `toml:"disaggregated-tiflash" json:"disaggregated-tiflash"`

// These config is related to disaggregated-tiflash mode.
DisaggregatedTiFlash bool `toml:"disaggregated-tiflash" json:"disaggregated-tiflash"`
TiFlashComputeAutoScalerType string `toml:"autoscaler-type" json:"autoscaler-type"`
TiFlashComputeAutoScalerAddr string `toml:"autoscaler-addr" json:"autoscaler-addr"`
IsTiFlashComputeFixedPool bool `toml:"is-tiflashcompute-fixed-pool" json:"is-tiflashcompute-fixed-pool"`
ClusterName string `toml:"cluster-name" json:"cluster-name"`

// TiDBMaxReuseChunk indicates max cached chunk num
TiDBMaxReuseChunk uint32 `toml:"tidb-max-reuse-chunk" json:"tidb-max-reuse-chunk"`
// TiDBMaxReuseColumn indicates max cached column num
Expand Down Expand Up @@ -998,6 +1009,10 @@ var defaultConf = Config{
EnableGlobalKill: true,
TrxSummary: DefaultTrxSummary(),
DisaggregatedTiFlash: false,
TiFlashComputeAutoScalerType: tiflashcompute.DefASStr,
TiFlashComputeAutoScalerAddr: tiflashcompute.DefAWSAutoScalerAddr,
IsTiFlashComputeFixedPool: false,
ClusterName: "",
TiDBMaxReuseChunk: 64,
TiDBMaxReuseColumn: 256,
}
Expand Down Expand Up @@ -1028,6 +1043,34 @@ func StoreGlobalConfig(config *Config) {
tikvcfg.StoreGlobalConfig(&cfg)
}

// GetClusterName returns clusterName, which is KeyspaceName or ClusterName.
func GetClusterName() (string, error) {
c := GetGlobalConfig()
var keyspaceName string
clusterName := c.ClusterName

// TODO: Delete using reflect when keyspace code is merged.
v := reflect.ValueOf(c).FieldByName(ConfigKeyspaceFieldName)
if v.IsValid() {
if v.Kind() != reflect.String {
terror.MustNil(errors.New("config.KeyspaceName should be String type"))
}
keyspaceName = v.String()
}
if keyspaceName != "" && clusterName != "" {
return "", errors.Errorf("config.KeyspaceName(%s) and config.ClusterName(%s) are not empty both", keyspaceName, clusterName)
}
if keyspaceName == "" && clusterName == "" {
return "", errors.Errorf("config.KeyspaceName and config.ClusterName are both empty")
}

res := keyspaceName
if res == "" {
res = clusterName
}
return res, nil
}

// removedConfig contains items that are no longer supported.
// they might still be in the config struct to support import,
// but are not actively used.
Expand Down Expand Up @@ -1312,6 +1355,17 @@ func (c *Config) Valid() error {
return fmt.Errorf("stats-load-queue-size should be [%d, %d]", DefStatsLoadQueueSizeLimit, DefMaxOfStatsLoadQueueSizeLimit)
}

// Check tiflash_compute topo fetch is valid.
if c.DisaggregatedTiFlash {
if tiflashcompute.GetAutoScalerType(c.TiFlashComputeAutoScalerType) == tiflashcompute.InvalidASType {
return fmt.Errorf("invalid AutoScaler type, expect %s, %s or %s, got %s",
tiflashcompute.MockASStr, tiflashcompute.AWSASStr, tiflashcompute.GCPASStr, c.TiFlashComputeAutoScalerType)
}
if c.TiFlashComputeAutoScalerAddr == "" {
return fmt.Errorf("autoscaler-addr cannot be empty when disaggregated-tiflash mode is true")
}
}

// test log level
l := zap.NewAtomicLevel()
return l.UnmarshalText([]byte(c.Log.Level))
Expand Down
16 changes: 16 additions & 0 deletions config/config.toml.example
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,22 @@ enable-enum-length-limit = true
# command can be forwarded to the right TiDB instance to execute.
enable-global-kill = true

# disaggregated-tiflash indicates whether TiDB is in disaggregated tiflash mode, if true, MPP will runs on tiflash_compute nodes.
disaggregated-tiflash = false

# autoscaler-type indicates which type of AutoScaler will be used. Possible values are: mock, aws, gcp.
# Only meaningful when disaggregated-tiflash is true
autoscaler-type = "aws"

# autoscaler-addr is the host of AutoScaler, Only meaningful when disaggregated-tiflash is true.
autoscaler-addr = "tiflash-autoscale-lb.tiflash-autoscale.svc.cluster.local:8081"

# cluster-name is the name of cluster. DON NOT set this if config.KeyspaceName is already set.
cluster-name = ""

# is-tiflashcompute-fixed-pool controls whether autoscaler use fixed shared pool(free user) or not.
is-tiflashcompute-fixed-pool = false

[log]
# Log level: debug, info, warn, error, fatal.
level = "info"
Expand Down
1 change: 1 addition & 0 deletions executor/tiflashtest/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ go_test(
"//testkit",
"//testkit/external",
"//util/israce",
"//util/tiflashcompute",
"@com_github_pingcap_errors//:errors",
"@com_github_pingcap_failpoint//:failpoint",
"@com_github_pingcap_kvproto//pkg/metapb",
Expand Down
22 changes: 13 additions & 9 deletions executor/tiflashtest/tiflash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"github.com/pingcap/tidb/testkit"
"github.com/pingcap/tidb/testkit/external"
"github.com/pingcap/tidb/util/israce"
"github.com/pingcap/tidb/util/tiflashcompute"
"github.com/stretchr/testify/require"
"github.com/tikv/client-go/v2/testutils"
)
Expand Down Expand Up @@ -1258,24 +1259,30 @@ func TestDisaggregatedTiFlash(t *testing.T) {
config.UpdateGlobal(func(conf *config.Config) {
conf.DisaggregatedTiFlash = true
})
defer config.UpdateGlobal(func(conf *config.Config) {
conf.DisaggregatedTiFlash = false
})
err := tiflashcompute.InitGlobalTopoFetcher(tiflashcompute.TestASStr, "", "", false)
require.NoError(t, err)

store := testkit.CreateMockStore(t, withMockTiFlash(2))
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t(c1 int)")
tk.MustExec("alter table t set tiflash replica 1")
tb := external.GetTableByName(t, tk, "test", "t")
err := domain.GetDomain(tk.Session()).DDL().UpdateTableReplicaInfo(tk.Session(), tb.Meta().ID, true)
err = domain.GetDomain(tk.Session()).DDL().UpdateTableReplicaInfo(tk.Session(), tb.Meta().ID, true)
require.NoError(t, err)
tk.MustExec("set @@session.tidb_isolation_read_engines=\"tiflash\"")

err = tk.ExecToErr("select * from t;")
require.Contains(t, err.Error(), "Please check tiflash_compute node is available")
require.Contains(t, err.Error(), "Cannot find proper topo from AutoScaler")

config.UpdateGlobal(func(conf *config.Config) {
conf.DisaggregatedTiFlash = false
})
tk.MustQuery("select * from t;").Check(testkit.Rows())
err = tiflashcompute.InitGlobalTopoFetcher(tiflashcompute.AWSASStr, "", "", false)
require.NoError(t, err)
err = tk.ExecToErr("select * from t;")
require.Contains(t, err.Error(), "[util:1815]Internal : get tiflash_compute topology failed")
}

func TestDisaggregatedTiFlashQuery(t *testing.T) {
Expand Down Expand Up @@ -1304,9 +1311,6 @@ func TestDisaggregatedTiFlashQuery(t *testing.T) {
require.NoError(t, err)
tk.MustExec("set @@session.tidb_isolation_read_engines=\"tiflash\"")

needCheckTiFlashComputeNode := "false"
failpoint.Enable("github.com/pingcap/tidb/planner/core/testDisaggregatedTiFlashQuery", fmt.Sprintf("return(%s)", needCheckTiFlashComputeNode))
defer failpoint.Disable("github.com/pingcap/tidb/planner/core/testDisaggregatedTiFlashQuery")
tk.MustExec("explain select max( tbl_1.col_1 ) as r0 , sum( tbl_1.col_1 ) as r1 , sum( tbl_1.col_8 ) as r2 from tbl_1 where tbl_1.col_8 != 68 or tbl_1.col_3 between null and 939 order by r0,r1,r2;")

tk.MustExec("set @@tidb_partition_prune_mode = 'static';")
Expand Down
1 change: 0 additions & 1 deletion planner/core/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ go_library(
"//sessiontxn/staleread",
"//statistics",
"//statistics/handle",
"//store/driver/backoff",
"//table",
"//table/tables",
"//table/temptable",
Expand Down
2 changes: 2 additions & 0 deletions planner/core/fragment.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ type mppAddr struct {
addr string
}

var _ kv.MPPTaskMeta = &mppAddr{}

func (m *mppAddr) GetAddress() string {
return m.addr
}
Expand Down
19 changes: 0 additions & 19 deletions planner/core/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import (

"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/config"
"github.com/pingcap/tidb/domain"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/expression/aggregation"
Expand All @@ -50,7 +49,6 @@ import (
"github.com/pingcap/tidb/sessionctx/variable"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/statistics/handle"
"github.com/pingcap/tidb/store/driver/backoff"
"github.com/pingcap/tidb/table"
"github.com/pingcap/tidb/table/tables"
"github.com/pingcap/tidb/table/temptable"
Expand All @@ -67,7 +65,6 @@ import (
"github.com/pingcap/tidb/util/plancodec"
"github.com/pingcap/tidb/util/set"
"github.com/pingcap/tidb/util/size"
"github.com/tikv/client-go/v2/tikv"
)

const (
Expand Down Expand Up @@ -692,13 +689,6 @@ func (ds *DataSource) setPreferredStoreType(hintInfo *tableHintInfo) {
ds.preferStoreType = 0
return
}
if config.GetGlobalConfig().DisaggregatedTiFlash && !isTiFlashComputeNodeAvailable(ds.ctx) {
// TiFlash is in disaggregated mode, need to make sure tiflash_compute node is available.
errMsg := "No available tiflash_compute node"
warning := ErrInternal.GenWithStack(errMsg)
ds.ctx.GetSessionVars().StmtCtx.AppendWarning(warning)
return
}
for _, path := range ds.possibleAccessPaths {
if path.StoreType == kv.TiFlash {
ds.preferStoreType |= preferTiFlash
Expand All @@ -716,15 +706,6 @@ func (ds *DataSource) setPreferredStoreType(hintInfo *tableHintInfo) {
}
}

func isTiFlashComputeNodeAvailable(ctx sessionctx.Context) bool {
bo := backoff.NewBackofferWithVars(context.Background(), 5000, nil)
stores, err := ctx.GetStore().(tikv.Storage).GetRegionCache().GetTiFlashComputeStores(bo.TiKVBackoffer())
if err != nil || len(stores) == 0 {
return false
}
return true
}

func resetNotNullFlag(schema *expression.Schema, start, end int) {
for i := start; i < end; i++ {
col := *schema.Columns[i]
Expand Down
24 changes: 2 additions & 22 deletions planner/core/planbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
"unsafe"

"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/bindinfo"
"github.com/pingcap/tidb/config"
"github.com/pingcap/tidb/domain"
Expand Down Expand Up @@ -1453,8 +1452,6 @@ func filterPathByIsolationRead(ctx sessionctx.Context, paths []*util.AccessPath,
isolationReadEngines := ctx.GetSessionVars().GetIsolationReadEngines()
availableEngine := map[kv.StoreType]struct{}{}
var availableEngineStr string
var outputComputeNodeErrMsg bool
noTiFlashComputeNode := config.GetGlobalConfig().DisaggregatedTiFlash && !isTiFlashComputeNodeAvailable(ctx)
for i := len(paths) - 1; i >= 0; i-- {
// availableEngineStr is for warning message.
if _, ok := availableEngine[paths[i].StoreType]; !ok {
Expand All @@ -1464,20 +1461,7 @@ func filterPathByIsolationRead(ctx sessionctx.Context, paths []*util.AccessPath,
}
availableEngineStr += paths[i].StoreType.Name()
}
_, exists := isolationReadEngines[paths[i].StoreType]
// Prune this path if:
// 1. path.StoreType doesn't exists in isolationReadEngines or
// 2. TiFlash is disaggregated and the number of tiflash_compute node is zero.
shouldPruneTiFlashCompute := noTiFlashComputeNode && exists && paths[i].StoreType == kv.TiFlash
failpoint.Inject("testDisaggregatedTiFlashQuery", func(val failpoint.Value) {
// Ignore check if tiflash_compute node number.
// After we support disaggregated tiflash in test framework, can delete this failpoint.
shouldPruneTiFlashCompute = val.(bool)
})
if shouldPruneTiFlashCompute {
outputComputeNodeErrMsg = true
}
if (!exists && paths[i].StoreType != kv.TiDB) || shouldPruneTiFlashCompute {
if _, ok := isolationReadEngines[paths[i].StoreType]; !ok && paths[i].StoreType != kv.TiDB {
paths = append(paths[:i], paths[i+1:]...)
}
}
Expand All @@ -1486,11 +1470,7 @@ func filterPathByIsolationRead(ctx sessionctx.Context, paths []*util.AccessPath,
if len(paths) == 0 {
helpMsg := ""
if engineVals == "tiflash" {
if outputComputeNodeErrMsg {
helpMsg = ". Please check tiflash_compute node is available"
} else {
helpMsg = ". Please check tiflash replica or ensure the query is readonly"
}
helpMsg = ". Please check tiflash replica or ensure the query is readonly"
}
err = ErrInternal.GenWithStackByArgs(fmt.Sprintf("No access path for table '%s' is found with '%v' = '%v', valid values can be '%s'%s.", tblName.String(),
variable.TiDBIsolationReadEngines, engineVals, availableEngineStr, helpMsg))
Expand Down
8 changes: 0 additions & 8 deletions session/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -3340,14 +3340,6 @@ func BootstrapSession(store kv.Storage) (*domain.Domain, error) {
return nil, err
}

if config.GetGlobalConfig().DisaggregatedTiFlash {
// Invalid client-go tiflash_compute store cache if necessary.
err = dom.WatchTiFlashComputeNodeChange()
if err != nil {
return nil, err
}
}

if err = extensionimpl.Bootstrap(context.Background(), dom); err != nil {
return nil, err
}
Expand Down
4 changes: 4 additions & 0 deletions store/copr/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ go_library(
"//util/mathutil",
"//util/memory",
"//util/paging",
"//util/tiflashcompute",
"//util/trxevents",
"@com_github_dgraph_io_ristretto//:ristretto",
"@com_github_gogo_protobuf//proto",
Expand All @@ -42,6 +43,7 @@ go_library(
"@com_github_pingcap_kvproto//pkg/mpp",
"@com_github_pingcap_log//:log",
"@com_github_pingcap_tipb//go-tipb",
"@com_github_stathat_consistent//:consistent",
"@com_github_tikv_client_go_v2//config",
"@com_github_tikv_client_go_v2//error",
"@com_github_tikv_client_go_v2//metrics",
Expand Down Expand Up @@ -75,6 +77,7 @@ go_test(
"//kv",
"//store/driver/backoff",
"//testkit/testsetup",
"//util/logutil",
"//util/paging",
"@com_github_pingcap_errors//:errors",
"@com_github_pingcap_kvproto//pkg/coprocessor",
Expand All @@ -86,5 +89,6 @@ go_test(
"@com_github_tikv_client_go_v2//tikv",
"@com_github_tikv_client_go_v2//tikvrpc",
"@org_uber_go_goleak//:goleak",
"@org_uber_go_zap//:zap",
],
)
Loading