From bb8ba31c0c0716012ce903893505ed6cead06f33 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 21 Mar 2024 12:57:44 +0800 Subject: [PATCH 1/4] tests/api: reduce TestRegionsWithKillRequest test time (#7953) ref tikv/pd#7930 Signed-off-by: lhy1024 Co-authored-by: disksing --- server/api/region_test.go | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/server/api/region_test.go b/server/api/region_test.go index 8c0d78abd4a..e10bfbd1af0 100644 --- a/server/api/region_test.go +++ b/server/api/region_test.go @@ -23,6 +23,7 @@ import ( "net/http" "net/url" "sort" + "sync" "testing" "time" @@ -333,13 +334,28 @@ func TestRegionsWithKillRequest(t *testing.T) { url := fmt.Sprintf("%s%s/api/v1/regions", addr, apiPrefix) mustBootstrapCluster(re, svr) regionCount := 100000 + + // create data + var wg sync.WaitGroup + tasks := make(chan int, regionCount) + for w := 0; w < 16; w++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := range tasks { + r := core.NewTestRegionInfo(uint64(i+2), 1, + []byte(fmt.Sprintf("%09d", i)), + []byte(fmt.Sprintf("%09d", i+1)), + core.SetApproximateKeys(10), core.SetApproximateSize(10)) + mustRegionHeartbeat(re, svr, r) + } + }() + } for i := 0; i < regionCount; i++ { - r := core.NewTestRegionInfo(uint64(i+2), 1, - []byte(fmt.Sprintf("%09d", i)), - []byte(fmt.Sprintf("%09d", i+1)), - core.SetApproximateKeys(10), core.SetApproximateSize(10)) - mustRegionHeartbeat(re, svr, r) + tasks <- i } + close(tasks) + wg.Wait() ctx, cancel := context.WithCancel(context.Background()) req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, http.NoBody) From 89a80875bd6933d52176a7b0f264ab5324ac12bf Mon Sep 17 00:00:00 2001 From: Hu# Date: Thu, 21 Mar 2024 14:15:43 +0800 Subject: [PATCH 2/4] tests/server: move `TestCheckClusterID` out of `leaderServerTestSuite` (#7950) ref tikv/pd#7930 which can use parallel test Signed-off-by: husharp Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- server/server_test.go | 94 +++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/server/server_test.go b/server/server_test.go index 32f5d0646bc..b2b15962fdc 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -88,7 +88,7 @@ func (suite *leaderServerTestSuite) TearDownSuite() { } } -func (suite *leaderServerTestSuite) newTestServersWithCfgs( +func newTestServersWithCfgs( ctx context.Context, cfgs []*config.Config, re *require.Assertions, @@ -135,52 +135,6 @@ func (suite *leaderServerTestSuite) newTestServersWithCfgs( return svrs, cleanup } -func (suite *leaderServerTestSuite) TestCheckClusterID() { - re := suite.Require() - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cfgs := NewTestMultiConfig(assertutil.CheckerWithNilAssert(re), 2) - for i, cfg := range cfgs { - cfg.DataDir = fmt.Sprintf("/tmp/test_pd_check_clusterID_%d", i) - // Clean up before testing. - testutil.CleanServer(cfg.DataDir) - } - originInitial := cfgs[0].InitialCluster - for _, cfg := range cfgs { - cfg.InitialCluster = fmt.Sprintf("%s=%s", cfg.Name, cfg.PeerUrls) - } - - cfgA, cfgB := cfgs[0], cfgs[1] - // Start a standalone cluster. - svrsA, cleanA := suite.newTestServersWithCfgs(ctx, []*config.Config{cfgA}, re) - defer cleanA() - // Close it. - for _, svr := range svrsA { - svr.Close() - } - - // Start another cluster. - _, cleanB := suite.newTestServersWithCfgs(ctx, []*config.Config{cfgB}, re) - defer cleanB() - - // Start previous cluster, expect an error. - cfgA.InitialCluster = originInitial - mockHandler := CreateMockHandler(re, "127.0.0.1") - svr, err := CreateServer(ctx, cfgA, nil, mockHandler) - re.NoError(err) - - etcd, err := embed.StartEtcd(svr.etcdCfg) - re.NoError(err) - urlsMap, err := types.NewURLsMap(svr.cfg.InitialCluster) - re.NoError(err) - tlsConfig, err := svr.cfg.Security.ToTLSConfig() - re.NoError(err) - err = etcdutil.CheckClusterID(etcd.Server.Cluster().ID(), urlsMap, tlsConfig) - re.Error(err) - etcd.Close() - testutil.CleanServer(cfgA.DataDir) -} - func (suite *leaderServerTestSuite) TestRegisterServerHandler() { re := suite.Require() cfg := NewTestSingleConfig(assertutil.CheckerWithNilAssert(re)) @@ -330,3 +284,49 @@ func TestIsPathInDirectory(t *testing.T) { path = filepath.Join(directory, fileName) re.False(isPathInDirectory(path, directory)) } + +func TestCheckClusterID(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + cfgs := NewTestMultiConfig(assertutil.CheckerWithNilAssert(re), 2) + for i, cfg := range cfgs { + cfg.DataDir = fmt.Sprintf("/tmp/test_pd_check_clusterID_%d", i) + // Clean up before testing. + testutil.CleanServer(cfg.DataDir) + } + originInitial := cfgs[0].InitialCluster + for _, cfg := range cfgs { + cfg.InitialCluster = fmt.Sprintf("%s=%s", cfg.Name, cfg.PeerUrls) + } + + cfgA, cfgB := cfgs[0], cfgs[1] + // Start a standalone cluster. + svrsA, cleanA := newTestServersWithCfgs(ctx, []*config.Config{cfgA}, re) + defer cleanA() + // Close it. + for _, svr := range svrsA { + svr.Close() + } + + // Start another cluster. + _, cleanB := newTestServersWithCfgs(ctx, []*config.Config{cfgB}, re) + defer cleanB() + + // Start previous cluster, expect an error. + cfgA.InitialCluster = originInitial + mockHandler := CreateMockHandler(re, "127.0.0.1") + svr, err := CreateServer(ctx, cfgA, nil, mockHandler) + re.NoError(err) + + etcd, err := embed.StartEtcd(svr.etcdCfg) + re.NoError(err) + urlsMap, err := types.NewURLsMap(svr.cfg.InitialCluster) + re.NoError(err) + tlsConfig, err := svr.cfg.Security.ToTLSConfig() + re.NoError(err) + err = etcdutil.CheckClusterID(etcd.Server.Cluster().ID(), urlsMap, tlsConfig) + re.Error(err) + etcd.Close() + testutil.CleanServer(cfgA.DataDir) +} From 7ec0058913c2c8d4176c7cd92cb34a85f700b422 Mon Sep 17 00:00:00 2001 From: Hu# Date: Thu, 21 Mar 2024 15:02:14 +0800 Subject: [PATCH 3/4] mcs: fix micro-service topo's display (#7956) close tikv/pd#7957 Signed-off-by: husharp --- pkg/mcs/scheduling/server/server.go | 4 +++- pkg/mcs/tso/server/server.go | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 738140612b8..47a7cf9962b 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -20,6 +20,7 @@ import ( "net/http" "os" "os/signal" + "path/filepath" "runtime" "strconv" "sync" @@ -413,7 +414,8 @@ func (s *Server) startServer() (err error) { // different service modes provided by the same pd-server binary bs.ServerInfoGauge.WithLabelValues(versioninfo.PDReleaseVersion, versioninfo.PDGitHash).Set(float64(time.Now().Unix())) bs.ServerMaxProcsGauge.Set(float64(runtime.GOMAXPROCS(0))) - deployPath, err := os.Executable() + execPath, err := os.Executable() + deployPath := filepath.Dir(execPath) if err != nil { deployPath = "" } diff --git a/pkg/mcs/tso/server/server.go b/pkg/mcs/tso/server/server.go index bac93ddfb6b..f5f46a29504 100644 --- a/pkg/mcs/tso/server/server.go +++ b/pkg/mcs/tso/server/server.go @@ -20,6 +20,7 @@ import ( "net/http" "os" "os/signal" + "path/filepath" "runtime" "strconv" "sync" @@ -368,7 +369,8 @@ func (s *Server) startServer() (err error) { s.serverLoopCtx, s.serverLoopCancel = context.WithCancel(s.Context()) legacySvcRootPath := endpoint.LegacyRootPath(s.clusterID) tsoSvcRootPath := endpoint.TSOSvcRootPath(s.clusterID) - deployPath, err := os.Executable() + execPath, err := os.Executable() + deployPath := filepath.Dir(execPath) if err != nil { deployPath = "" } From 955d30ab97c71b669badc69e09d21dcec2cdbb0e Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 21 Mar 2024 17:20:13 +0800 Subject: [PATCH 4/4] metrics: fix alloc id, current tso and patrol panel (#7961) close tikv/pd#7959 Signed-off-by: lhy1024 --- metrics/grafana/pd.json | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/metrics/grafana/pd.json b/metrics/grafana/pd.json index 15a4b0bfc43..30014a959dc 100644 --- a/metrics/grafana/pd.json +++ b/metrics/grafana/pd.json @@ -1738,7 +1738,7 @@ "tableColumn": "idalloc", "targets": [ { - "expr": "max(pd_cluster_id{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"idalloc\"})", + "expr": "pd_cluster_id{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"idalloc\"}!=0", "format": "time_series", "hide": false, "instant": true, @@ -2284,7 +2284,7 @@ "tableColumn": "tso", "targets": [ { - "expr": "max(pd_cluster_tso{type=\"tso\", dc=\"global\"})", + "expr": "pd_cluster_tso{type=\"tso\", dc=\"global\"}!=0", "format": "time_series", "instant": true, "interval": "", @@ -2588,7 +2588,7 @@ "tableColumn": "tso", "targets": [ { - "expr": "max(pd_cluster_tso{type=\"tso\", dc=\"global\"})", + "expr": "pd_cluster_tso{type=\"tso\", dc=\"global\"}!=0", "format": "time_series", "instant": true, "interval": "", @@ -7895,6 +7895,7 @@ "targets": [ { "expr": "pd_checker_patrol_regions_time{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"} != 0", + "legendFormat": "{{instance}}", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -8474,14 +8475,14 @@ "refId": "A" }, { - "expr": "rate(pd_schedule_scatter_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"fail\"}[1m]*60)", + "expr": "rate(pd_schedule_scatter_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"fail\"}[1m])*60", "format": "time_series", "intervalFactor": 2, "legendFormat": "fail", "refId": "B" }, { - "expr": "rate(pd_schedule_scatter_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"success\"}[1m]*60)", + "expr": "rate(pd_schedule_scatter_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"success\"}[1m])*60", "format": "time_series", "intervalFactor": 2, "legendFormat": "success", @@ -9296,7 +9297,7 @@ "steppedLine": false, "targets": [ { - "expr": "etcd_mvcc_db_total_size_in_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", job=\"pd\"}", + "expr": "etcd_mvcc_db_total_size_in_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\".*pd.*\"}", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -9304,7 +9305,7 @@ "refId": "A" }, { - "expr": "etcd_mvcc_db_total_size_in_use_in_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", job=\"pd\"}", + "expr": "etcd_mvcc_db_total_size_in_use_in_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\".*pd.*\"}", "format": "time_series", "hide": false, "intervalFactor": 1,