diff --git a/.github/workflows/check.yaml b/.github/workflows/check.yaml index e2bf99c026f..3ccb2635e9f 100644 --- a/.github/workflows/check.yaml +++ b/.github/workflows/check.yaml @@ -8,21 +8,11 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 20 steps: - - uses: actions/setup-go@v3 - with: - go-version: '1.21' - name: Checkout code - uses: actions/checkout@v3 - - name: Restore cache - uses: actions/cache@v3 + uses: actions/checkout@v4 + - uses: actions/setup-go@v5 with: - path: | - ~/go/pkg/mod - ~/.cache/go-build - **/.dashboard_download_cache - key: ${{ runner.os }}-golang-${{ hashFiles('**/go.sum') }} - restore-keys: | - ${{ runner.os }}-golang + go-version: '1.21' - name: Make Check run: | SWAGGER=1 make build diff --git a/.github/workflows/label.yaml b/.github/workflows/label.yaml index 5ff2b895528..00438d26b63 100644 --- a/.github/workflows/label.yaml +++ b/.github/workflows/label.yaml @@ -7,7 +7,7 @@ jobs: add_labels: runs-on: ubuntu-latest steps: - - uses: actions/github-script@v4 + - uses: actions/github-script@v7 name: Add labels with: script: | diff --git a/.github/workflows/pd-docker-image.yaml b/.github/workflows/pd-docker-image.yaml index 2a04c030016..5beaa66c156 100644 --- a/.github/workflows/pd-docker-image.yaml +++ b/.github/workflows/pd-docker-image.yaml @@ -15,10 +15,10 @@ jobs: strategy: fail-fast: true steps: - - uses: actions/setup-go@v3 + - name: Checkout code + uses: actions/checkout@v4 + - uses: actions/setup-go@v5 with: go-version: '1.21' - - name: Checkout code - uses: actions/checkout@v3 - name: Make run: make docker-image diff --git a/.github/workflows/pd-tests.yaml b/.github/workflows/pd-tests.yaml index 517a2c480e7..223187737e0 100644 --- a/.github/workflows/pd-tests.yaml +++ b/.github/workflows/pd-tests.yaml @@ -7,6 +7,7 @@ on: - release-5.* - release-6.* - release-7.* + - release-8.* pull_request: branches: - master @@ -14,6 +15,7 @@ on: - release-5.* - release-6.* - release-7.* + - release-8.* concurrency: group: ${{ github.ref }}-${{ github.workflow }} cancel-in-progress: true @@ -23,54 +25,59 @@ jobs: strategy: fail-fast: true matrix: - worker_id: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + include: + - worker_id: 1 + name: 'Unit Test(1)' + - worker_id: 2 + name: 'Unit Test(2)' + - worker_id: 3 + name: 'Tools Test' + - worker_id: 4 + name: 'Client Integration Test' + - worker_id: 5 + name: 'TSO Integration Test' + - worker_id: 6 + name: 'MicroService Integration Test' outputs: - job-total: 13 + job-total: 6 steps: - - uses: actions/setup-go@v3 - with: - go-version: '1.21' - name: Checkout code - uses: actions/checkout@v3 - - name: Restore cache - uses: actions/cache@v3 + uses: actions/checkout@v4 + - uses: actions/setup-go@v5 with: - path: | - ~/go/pkg/mod - ~/.cache/go-build - **/.tools - **/.dashboard_download_cache - key: ${{ runner.os }}-go-${{ matrix.worker_id }}-${{ hashFiles('**/go.sum') }} - - name: Make Test + go-version: '1.21' + - name: ${{ matrix.name }} env: WORKER_ID: ${{ matrix.worker_id }} - WORKER_COUNT: 13 - JOB_COUNT: 10 # 11, 12, 13 are for other integrations jobs run: | - make ci-test-job JOB_COUNT=$(($JOB_COUNT)) JOB_INDEX=$WORKER_ID + make ci-test-job JOB_INDEX=$WORKER_ID mv covprofile covprofile_$WORKER_ID - sed -i "/failpoint_binding/d" covprofile_$WORKER_ID - name: Upload coverage result ${{ matrix.worker_id }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: - name: cover-reports + name: cover-reports-${{ matrix.worker_id }} path: covprofile_${{ matrix.worker_id }} report-coverage: needs: chunks runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Download chunk report - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 with: - name: cover-reports + pattern: cover-reports-* + merge-multiple: true - name: Merge env: TOTAL_JOBS: ${{needs.chunks.outputs.job-total}} - run: for i in $(seq 1 $TOTAL_JOBS); do cat covprofile_$i >> covprofile; done + run: | + for i in $(seq 1 $TOTAL_JOBS); do cat covprofile_$i >> covprofile; done + sed -i "/failpoint_binding/d" covprofile + # only keep the first line(`mode: aomic`) of the coverage profile + sed -i '2,${/mode: atomic/d;}' covprofile - name: Send coverage - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v4.2.0 with: token: ${{ secrets.CODECOV }} file: ./covprofile diff --git a/.github/workflows/tso-consistency-test.yaml b/.github/workflows/tso-consistency-test.yaml index 570cbbc5da8..3cb24898a10 100644 --- a/.github/workflows/tso-consistency-test.yaml +++ b/.github/workflows/tso-consistency-test.yaml @@ -8,10 +8,10 @@ jobs: tso-consistency-test: runs-on: ubuntu-latest steps: - - uses: actions/setup-go@v3 + - name: Checkout code + uses: actions/checkout@v4 + - uses: actions/setup-go@v5 with: go-version: '1.21' - - name: Checkout code - uses: actions/checkout@v3 - name: Make TSO Consistency Test run: make test-tso-consistency diff --git a/.github/workflows/tso-function-test.yaml b/.github/workflows/tso-function-test.yaml index ee7679602f5..13fd6fe7df6 100644 --- a/.github/workflows/tso-function-test.yaml +++ b/.github/workflows/tso-function-test.yaml @@ -6,12 +6,14 @@ on: - release-5.* - release-6.* - release-7.* + - release-8.* pull_request: branches: - master - release-5.* - release-6.* - release-7.* + - release-8.* concurrency: group: ${{ github.ref }}-${{ github.workflow }} cancel-in-progress: true @@ -19,10 +21,10 @@ jobs: tso-function-test: runs-on: ubuntu-latest steps: - - uses: actions/setup-go@v3 + - name: Checkout code + uses: actions/checkout@v4 + - uses: actions/setup-go@v5 with: go-version: '1.21' - - name: Checkout code - uses: actions/checkout@v3 - name: Make TSO Function Test run: make test-tso-function diff --git a/.gitignore b/.gitignore index 748d24872b6..b9be6099e24 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,4 @@ coverage.xml coverage *.txt go.work* +embedded_assets_handler.go diff --git a/.golangci.yml b/.golangci.yml index 9843142b4ab..e938c24cc59 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -12,7 +12,8 @@ linters: - gosec - bodyclose - testifylint - disable: + - gofmt + - revive - errcheck linters-settings: gocritic: @@ -41,3 +42,171 @@ linters-settings: - require-error - suite-dont-use-pkg - suite-extra-assert-call + disable: + - float-compare + - go-require + gofmt: + # https://golangci-lint.run/usage/linters/#gofmt + # disable for faster check + simplify: false + rewrite-rules: + - pattern: "interface{}" + replacement: "any" + revive: + ignore-generated-header: false + severity: error + confidence: 0.8 + rules: + - name: atomic + severity: warning + exclude: [""] + disabled: false + - name: blank-imports + severity: warning + exclude: [""] + disabled: false + - name: confusing-naming + severity: warning + disabled: false + exclude: [""] + - name: confusing-results + severity: warning + disabled: false + exclude: [""] + - name: context-as-argument + severity: warning + disabled: false + exclude: [""] + arguments: + - allowTypesBefore: "*testing.T,*github.com/user/repo/testing.Harness" + - name: datarace + severity: warning + disabled: false + exclude: [""] + - name: defer + severity: warning + disabled: false + exclude: [""] + arguments: + - ["call-chain", "loop"] + - name: dot-imports + severity: warning + disabled: false + exclude: [""] + - name: duplicated-imports + severity: warning + disabled: false + exclude: [""] + - name: empty-block + severity: warning + disabled: false + exclude: [""] + - name: empty-lines + severity: warning + disabled: false + exclude: [""] + - name: error-return + severity: warning + disabled: false + exclude: [""] + - name: error-strings + severity: warning + disabled: false + exclude: [""] + - name: error-naming + severity: warning + disabled: false + exclude: [""] + - name: exported + severity: warning + disabled: false + exclude: [""] + arguments: + - "checkPrivateReceivers" + - "sayRepetitiveInsteadOfStutters" + - name: identical-branches + severity: warning + disabled: false + exclude: [""] + - name: if-return + severity: warning + disabled: false + exclude: [""] + - name: modifies-parameter + severity: warning + disabled: false + exclude: [""] + - name: optimize-operands-order + severity: warning + disabled: false + exclude: [""] + - name: package-comments + severity: warning + disabled: false + exclude: [""] + - name: range + severity: warning + disabled: false + exclude: [""] + - name: range-val-in-closure + severity: warning + disabled: false + exclude: [""] + - name: range-val-address + severity: warning + disabled: false + exclude: [""] + - name: receiver-naming + severity: warning + disabled: false + exclude: [""] + - name: indent-error-flow + severity: warning + disabled: false + exclude: [""] + - name: superfluous-else + severity: warning + disabled: false + exclude: [""] + - name: unnecessary-stmt + severity: warning + disabled: false + exclude: [""] + - name: unreachable-code + severity: warning + disabled: false + exclude: [""] + - name: unused-parameter + severity: warning + disabled: false + exclude: [""] + arguments: + - allowRegex: "^_" + - name: unused-receiver + severity: warning + disabled: false + exclude: [""] + - name: useless-break + severity: warning + disabled: false + exclude: [""] + - name: var-naming + severity: warning + disabled: false + exclude: [""] + - name: waitgroup-by-value + severity: warning + disabled: false + exclude: [""] +issues: + exclude-rules: + - path: (_test\.go|pkg/mock/.*\.go|tests/.*\.go) + linters: + - errcheck + # following path will enable in the future + - path: (pd-analysis|pd-api-bench|pd-backup|pd-ctl|pd-heartbeat-bench|pd-recover|pd-simulator|pd-tso-bench|pd-ut|regions-dump|stores-dump) + linters: + - errcheck + - path: (pkg/tso/admin.go|pkg/schedule/schedulers/split_bucket.go|server/api/plugin_disable.go|server/api/plugin_disable.go|server/api/operator.go|server/api/region.go|pkg/schedule/schedulers/balance_leader.go|server/api/.*\.go|pkg/replication/replication_mode.go|pkg/storage/endpoint/gc_safe_point.go|server/.*\.go|pkg/schedule/schedulers/.*\.go|pkg/syncer/server.go) + linters: + - errcheck diff --git a/Makefile b/Makefile index bb955f19271..dca00012114 100644 --- a/Makefile +++ b/Makefile @@ -80,7 +80,7 @@ BUILD_BIN_PATH := $(ROOT_PATH)/bin build: pd-server pd-ctl pd-recover -tools: pd-tso-bench pd-heartbeat-bench regions-dump stores-dump pd-api-bench +tools: pd-tso-bench pd-heartbeat-bench regions-dump stores-dump pd-api-bench pd-ut PD_SERVER_DEP := ifeq ($(SWAGGER), 1) @@ -92,7 +92,9 @@ ifneq ($(DASHBOARD_DISTRIBUTION_DIR),) endif PD_SERVER_DEP += dashboard-ui -pd-server: ${PD_SERVER_DEP} +pre-build: ${PD_SERVER_DEP} + +pd-server: pre-build GOEXPERIMENT=$(BUILD_GOEXPERIMENT) CGO_ENABLED=$(BUILD_CGO_ENABLED) go build $(BUILD_FLAGS) -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -tags "$(BUILD_TAGS)" -o $(BUILD_BIN_PATH)/pd-server cmd/pd-server/main.go pd-server-failpoint: @@ -103,10 +105,9 @@ pd-server-failpoint: pd-server-basic: SWAGGER=0 DASHBOARD=0 $(MAKE) pd-server -.PHONY: build tools pd-server pd-server-basic +.PHONY: pre-build build tools pd-server pd-server-basic # Tools - pd-ctl: cd tools && GOEXPERIMENT=$(BUILD_GOEXPERIMENT) CGO_ENABLED=$(BUILD_TOOL_CGO_ENABLED) go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/pd-ctl pd-ctl/main.go pd-tso-bench: @@ -125,8 +126,12 @@ regions-dump: cd tools && CGO_ENABLED=0 go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/regions-dump regions-dump/main.go stores-dump: cd tools && CGO_ENABLED=0 go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/stores-dump stores-dump/main.go +pd-ut: pd-xprog + cd tools && GOEXPERIMENT=$(BUILD_GOEXPERIMENT) CGO_ENABLED=$(BUILD_TOOL_CGO_ENABLED) go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/pd-ut pd-ut/ut.go pd-ut/coverProfile.go +pd-xprog: + cd tools && GOEXPERIMENT=$(BUILD_GOEXPERIMENT) CGO_ENABLED=$(BUILD_TOOL_CGO_ENABLED) go build -tags xprog -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/xprog pd-ut/xprog.go -.PHONY: pd-ctl pd-tso-bench pd-recover pd-analysis pd-heartbeat-bench simulator regions-dump stores-dump pd-api-bench +.PHONY: pd-ctl pd-tso-bench pd-recover pd-analysis pd-heartbeat-bench simulator regions-dump stores-dump pd-api-bench pd-ut #### Docker image #### @@ -165,25 +170,27 @@ SHELL := env PATH='$(PATH)' GOBIN='$(GO_TOOLS_BIN_PATH)' $(shell which bash) install-tools: @mkdir -p $(GO_TOOLS_BIN_PATH) - @which golangci-lint >/dev/null 2>&1 || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(GO_TOOLS_BIN_PATH) v1.55.2 + @which golangci-lint >/dev/null 2>&1 || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(GO_TOOLS_BIN_PATH) v1.56.2 @grep '_' tools.go | sed 's/"//g' | awk '{print $$2}' | xargs go install .PHONY: install-tools #### Static checks #### -check: install-tools tidy static generate-errdoc +check: tidy static generate-errdoc -static: install-tools +static: install-tools pre-build @ echo "gofmt ..." @ gofmt -s -l -d $(PACKAGE_DIRECTORIES) 2>&1 | awk '{ print } END { if (NR > 0) { exit 1 } }' @ echo "golangci-lint ..." @ golangci-lint run --verbose $(PACKAGE_DIRECTORIES) --allow-parallel-runners - @ echo "revive ..." - @ revive -formatter friendly -config revive.toml $(PACKAGES) - @ for mod in $(SUBMODULES); do cd $$mod && $(MAKE) static && cd $(ROOT_PATH) > /dev/null; done +# Because CI downloads the dashboard code and runs gofmt, we can't add this check into static now. +fmt: + @ echo "gofmt ..." + @ gofmt -s -l -w -r 'interface{} -> any' -d $(PACKAGE_DIRECTORIES) 2>&1 | awk '{ print } END { if (NR > 0) { exit 1 } }' + tidy: @ go mod tidy git diff go.mod go.sum | cat @@ -218,6 +225,13 @@ failpoint-disable: install-tools #### Test #### +ut: pd-ut + @$(FAILPOINT_ENABLE) + # only run unit tests + ./bin/pd-ut run --ignore tests --race + @$(CLEAN_UT_BINARY) + @$(FAILPOINT_DISABLE) + PACKAGE_DIRECTORIES := $(subst $(PD_PKG)/,,$(PACKAGES)) TEST_PKGS := $(filter $(shell find . -iname "*_test.go" -exec dirname {} \; | \ sort -u | sed -e "s/^\./github.com\/tikv\/pd/"),$(PACKAGES)) @@ -238,9 +252,9 @@ basic-test: install-tools go test $(BASIC_TEST_PKGS) || { $(FAILPOINT_DISABLE); exit 1; } @$(FAILPOINT_DISABLE) -ci-test-job: install-tools dashboard-ui +ci-test-job: install-tools dashboard-ui pd-ut @$(FAILPOINT_ENABLE) - ./scripts/ci-subtask.sh $(JOB_COUNT) $(JOB_INDEX) + ./scripts/ci-subtask.sh $(JOB_COUNT) $(JOB_INDEX) || { $(FAILPOINT_DISABLE); exit 1; } @$(FAILPOINT_DISABLE) TSO_INTEGRATION_TEST_PKGS := $(PD_PKG)/tests/server/tso @@ -296,6 +310,8 @@ split: clean: failpoint-disable clean-test clean-build +CLEAN_UT_BINARY := find . -name '*.test.bin'| xargs rm -f + clean-test: # Cleaning test tmp... rm -rf /tmp/test_pd* @@ -303,6 +319,7 @@ clean-test: rm -rf /tmp/test_etcd* rm -f $(REAL_CLUSTER_TEST_PATH)/playground.log go clean -testcache + @$(CLEAN_UT_BINARY) clean-build: # Cleaning building files... diff --git a/OWNERS b/OWNERS new file mode 100644 index 00000000000..5911dfd3b66 --- /dev/null +++ b/OWNERS @@ -0,0 +1,26 @@ +# See the OWNERS docs at https://go.k8s.io/owners +approvers: + - AndreMouche + - binshi-bing + - bufferflies + - CabinfeverB + - Connor1996 + - disksing + - huachaohuang + - HunDunDM + - HuSharp + - JmPotato + - lhy1024 + - nolouch + - overvenus + - qiuyesuifeng + - rleungx + - siddontang + - Yisaer + - zhouqiang-cl +reviewers: + - BusyJay + - howardlau1999 + - Luffbee + - shafreeck + - xhebox diff --git a/OWNERS_ALIASES b/OWNERS_ALIASES new file mode 100644 index 00000000000..516a466c91e --- /dev/null +++ b/OWNERS_ALIASES @@ -0,0 +1,6 @@ +# Sort the member alphabetically. +aliases: + sig-critical-approvers-config: + - easonn7 + - kevin-xianliu + - niubell diff --git a/client/Makefile b/client/Makefile index dae53222d92..3e8f6b0d383 100644 --- a/client/Makefile +++ b/client/Makefile @@ -12,20 +12,30 @@ # See the License for the specific language governing permissions and # limitations under the License. -GO_TOOLS_BIN_PATH := $(shell pwd)/../.tools/bin +ROOT_PATH := $(shell pwd)/.. +GO_TOOLS_BIN_PATH := $(ROOT_PATH)/.tools/bin PATH := $(GO_TOOLS_BIN_PATH):$(PATH) SHELL := env PATH='$(PATH)' GOBIN='$(GO_TOOLS_BIN_PATH)' $(shell which bash) default: static tidy test -test: - CGO_ENABLE=1 go test ./... -race -cover +test: failpoint-enable + CGO_ENABLED=1 go test ./... -v -tags deadlock -race -cover || { $(MAKE) failpoint-disable && exit 1; } + $(MAKE) failpoint-disable -basic-test: - CGO_ENABLE=1 go test ./... +basic-test: failpoint-enable + CGO_ENABLED=1 go test ./... || { $(MAKE) failpoint-disable && exit 1; } + $(MAKE) failpoint-disable ci-test-job: - CGO_ENABLED=1 go test ./... -race -covermode=atomic -coverprofile=covprofile -coverpkg=../... github.com/tikv/pd/client + if [ -f covprofile ]; then rm covprofile; fi + CGO_ENABLED=1 go test ./... -v -tags deadlock -race -cover -covermode=atomic -coverprofile=covprofile -coverpkg=../... + +failpoint-enable: + cd $(ROOT_PATH) && $(MAKE) failpoint-enable + +failpoint-disable: + cd $(ROOT_PATH) && $(MAKE) failpoint-disable install-tools: cd .. && $(MAKE) install-tools @@ -35,8 +45,6 @@ static: install-tools @ gofmt -s -l -d . 2>&1 | awk '{ print } END { if (NR > 0) { exit 1 } }' @ echo "golangci-lint ..." @ golangci-lint run -c ../.golangci.yml --verbose ./... --allow-parallel-runners - @ echo "revive ..." - @ revive -formatter friendly -config ../revive.toml ./... tidy: @ go mod tidy diff --git a/client/client.go b/client/client.go index 8efdd16693c..0916b57dd6e 100644 --- a/client/client.go +++ b/client/client.go @@ -69,16 +69,10 @@ type GlobalConfigItem struct { PayLoad []byte } -// Client is a PD (Placement Driver) RPC client. -// It should not be used after calling Close(). -type Client interface { - // GetClusterID gets the cluster ID from PD. - GetClusterID(ctx context.Context) uint64 +// RPCClient is a PD (Placement Driver) RPC and related mcs client which can only call RPC. +type RPCClient interface { // GetAllMembers gets the members Info from PD GetAllMembers(ctx context.Context) ([]*pdpb.Member, error) - // GetLeaderAddr returns current leader's address. It returns "" before - // syncing leader from server. - GetLeaderAddr() string // GetRegion gets a region and its leader Peer from PD by key. // The region may expire after split. Caller is responsible for caching and // taking care of region change. @@ -133,17 +127,12 @@ type Client interface { StoreGlobalConfig(ctx context.Context, configPath string, items []GlobalConfigItem) error // WatchGlobalConfig returns a stream with all global config and updates WatchGlobalConfig(ctx context.Context, configPath string, revision int64) (chan []GlobalConfigItem, error) - // UpdateOption updates the client option. - UpdateOption(option DynamicOption, value interface{}) error // GetExternalTimestamp returns external timestamp GetExternalTimestamp(ctx context.Context) (uint64, error) // SetExternalTimestamp sets external timestamp SetExternalTimestamp(ctx context.Context, timestamp uint64) error - // GetServiceDiscovery returns ServiceDiscovery - GetServiceDiscovery() ServiceDiscovery - // TSOClient is the TSO client. TSOClient // MetaStorageClient is the meta storage client. @@ -154,6 +143,24 @@ type Client interface { GCClient // ResourceManagerClient manages resource group metadata and token assignment. ResourceManagerClient +} + +// Client is a PD (Placement Driver) RPC client. +// It should not be used after calling Close(). +type Client interface { + RPCClient + + // GetClusterID gets the cluster ID from PD. + GetClusterID(ctx context.Context) uint64 + // GetLeaderURL returns current leader's URL. It returns "" before + // syncing leader from server. + GetLeaderURL() string + // GetServiceDiscovery returns ServiceDiscovery + GetServiceDiscovery() ServiceDiscovery + + // UpdateOption updates the client option. + UpdateOption(option DynamicOption, value any) error + // Close closes the client. Close() } @@ -303,7 +310,7 @@ func (k *serviceModeKeeper) close() { fallthrough case pdpb.ServiceMode_PD_SVC_MODE: if k.tsoClient != nil { - k.tsoClient.Close() + k.tsoClient.close() } case pdpb.ServiceMode_UNKNOWN_SVC_MODE: } @@ -390,7 +397,7 @@ func createClientWithKeyspace( ctx: clientCtx, cancel: clientCancel, keyspaceID: keyspaceID, - svrUrls: addrsToUrls(svrAddrs), + svrUrls: svrAddrs, tlsCfg: tlsCfg, option: newOption(), } @@ -405,6 +412,9 @@ func createClientWithKeyspace( nil, keyspaceID, c.svrUrls, c.tlsCfg, c.option) if err := c.setup(); err != nil { c.cancel() + if c.pdSvcDiscovery != nil { + c.pdSvcDiscovery.Close() + } return nil, err } @@ -437,12 +447,12 @@ func NewAPIContextV1() APIContext { } // GetAPIVersion returns the API version. -func (apiCtx *apiContextV1) GetAPIVersion() (version APIVersion) { +func (*apiContextV1) GetAPIVersion() (version APIVersion) { return V1 } // GetKeyspaceName returns the keyspace name. -func (apiCtx *apiContextV1) GetKeyspaceName() (keyspaceName string) { +func (*apiContextV1) GetKeyspaceName() (keyspaceName string) { return "" } @@ -459,7 +469,7 @@ func NewAPIContextV2(keyspaceName string) APIContext { } // GetAPIVersion returns the API version. -func (apiCtx *apiContextV2) GetAPIVersion() (version APIVersion) { +func (*apiContextV2) GetAPIVersion() (version APIVersion) { return V2 } @@ -506,7 +516,7 @@ func newClientWithKeyspaceName( updateTokenConnectionCh: make(chan struct{}, 1), ctx: clientCtx, cancel: clientCancel, - svrUrls: addrsToUrls(svrAddrs), + svrUrls: svrAddrs, tlsCfg: tlsCfg, option: newOption(), } @@ -525,12 +535,15 @@ func newClientWithKeyspaceName( return nil } - // Create a PD service discovery with null keyspace id, then query the real id wth the keyspace name, + // Create a PD service discovery with null keyspace id, then query the real id with the keyspace name, // finally update the keyspace id to the PD service discovery for the following interactions. c.pdSvcDiscovery = newPDServiceDiscovery( clientCtx, clientCancel, &c.wg, c.setServiceMode, updateKeyspaceIDCb, nullKeyspaceID, c.svrUrls, c.tlsCfg, c.option) if err := c.setup(); err != nil { c.cancel() + if c.pdSvcDiscovery != nil { + c.pdSvcDiscovery.Close() + } return nil, err } log.Info("[pd] create pd client with endpoints and keyspace", @@ -578,7 +591,7 @@ func (c *client) setup() error { } // Register callbacks - c.pdSvcDiscovery.AddServingAddrSwitchedCallback(c.scheduleUpdateTokenConnection) + c.pdSvcDiscovery.AddServingURLSwitchedCallback(c.scheduleUpdateTokenConnection) // Create dispatchers c.createTokenDispatcher() @@ -614,12 +627,22 @@ func (c *client) setServiceMode(newMode pdpb.ServiceMode) { log.Info("[pd] changing service mode", zap.String("old-mode", c.serviceMode.String()), zap.String("new-mode", newMode.String())) + c.resetTSOClientLocked(newMode) + oldMode := c.serviceMode + c.serviceMode = newMode + log.Info("[pd] service mode changed", + zap.String("old-mode", oldMode.String()), + zap.String("new-mode", newMode.String())) +} + +// Reset a new TSO client. +func (c *client) resetTSOClientLocked(mode pdpb.ServiceMode) { // Re-create a new TSO client. var ( newTSOCli *tsoClient newTSOSvcDiscovery ServiceDiscovery ) - switch newMode { + switch mode { case pdpb.ServiceMode_PD_SVC_MODE: newTSOCli = newTSOClient(c.ctx, c.option, c.pdSvcDiscovery, &pdTSOStreamBuilderFactory{}) @@ -642,11 +665,11 @@ func (c *client) setServiceMode(newMode pdpb.ServiceMode) { log.Warn("[pd] intend to switch to unknown service mode, just return") return } - newTSOCli.Setup() + newTSOCli.setup() // Replace the old TSO client. oldTSOClient := c.tsoClient c.tsoClient = newTSOCli - oldTSOClient.Close() + oldTSOClient.close() // Replace the old TSO service discovery if needed. oldTSOSvcDiscovery := c.tsoSvcDiscovery // If newTSOSvcDiscovery is nil, that's expected, as it means we are switching to PD service mode and @@ -657,11 +680,6 @@ func (c *client) setServiceMode(newMode pdpb.ServiceMode) { // We are switching from API service mode to PD service mode, so delete the old tso microservice discovery. oldTSOSvcDiscovery.Close() } - oldMode := c.serviceMode - c.serviceMode = newMode - log.Info("[pd] service mode changed", - zap.String("old-mode", oldMode.String()), - zap.String("new-mode", newMode.String())) } func (c *client) getTSOClient() *tsoClient { @@ -670,6 +688,13 @@ func (c *client) getTSOClient() *tsoClient { return c.tsoClient } +// ResetTSOClient resets the TSO client, only for test. +func (c *client) ResetTSOClient() { + c.Lock() + defer c.Unlock() + c.resetTSOClientLocked(c.serviceMode) +} + func (c *client) getServiceMode() pdpb.ServiceMode { c.RLock() defer c.RUnlock() @@ -688,9 +713,9 @@ func (c *client) GetClusterID(context.Context) uint64 { return c.pdSvcDiscovery.GetClusterID() } -// GetLeaderAddr returns the leader address. -func (c *client) GetLeaderAddr() string { - return c.pdSvcDiscovery.GetServingAddr() +// GetLeaderURL returns the leader URL. +func (c *client) GetLeaderURL() string { + return c.pdSvcDiscovery.GetServingURL() } // GetServiceDiscovery returns the client-side service discovery object @@ -699,7 +724,7 @@ func (c *client) GetServiceDiscovery() ServiceDiscovery { } // UpdateOption updates the client option. -func (c *client) UpdateOption(option DynamicOption, value interface{}) error { +func (c *client) UpdateOption(option DynamicOption, value any) error { switch option { case MaxTSOBatchWaitInterval: interval, ok := value.(time.Duration) @@ -710,6 +735,9 @@ func (c *client) UpdateOption(option DynamicOption, value interface{}) error { return err } case EnableTSOFollowerProxy: + if c.getServiceMode() != pdpb.ServiceMode_PD_SVC_MODE { + return errors.New("[pd] tso follower proxy is only supported in PD service mode") + } enable, ok := value.(bool) if !ok { return errors.New("[pd] invalid value type for EnableTSOFollowerProxy option, it should be bool") @@ -750,7 +778,7 @@ func (c *client) GetAllMembers(ctx context.Context) ([]*pdpb.Member, error) { // follower pd client and the context which holds forward information. func (c *client) getClientAndContext(ctx context.Context) (pdpb.PDClient, context.Context) { serviceClient := c.pdSvcDiscovery.GetServiceClient() - if serviceClient == nil { + if serviceClient == nil || serviceClient.GetClientConn() == nil { return nil, ctx } return pdpb.NewPDClient(serviceClient.GetClientConn()), serviceClient.BuildGRPCTargetContext(ctx, true) @@ -767,7 +795,7 @@ func (c *client) getRegionAPIClientAndContext(ctx context.Context, allowFollower } } serviceClient = c.pdSvcDiscovery.GetServiceClient() - if serviceClient == nil { + if serviceClient == nil || serviceClient.GetClientConn() == nil { return nil, ctx } return serviceClient, serviceClient.BuildGRPCTargetContext(ctx, !allowFollower) @@ -778,31 +806,52 @@ func (c *client) GetTSAsync(ctx context.Context) TSFuture { } func (c *client) GetLocalTSAsync(ctx context.Context, dcLocation string) TSFuture { - defer trace.StartRegion(ctx, "GetLocalTSAsync").End() - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("GetLocalTSAsync", opentracing.ChildOf(span.Context())) - ctx = opentracing.ContextWithSpan(ctx, span) + defer trace.StartRegion(ctx, "pdclient.GetLocalTSAsync").End() + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.GetLocalTSAsync", opentracing.ChildOf(span.Context())) + defer span.Finish() } - req := tsoReqPool.Get().(*tsoRequest) - req.requestCtx = ctx - req.clientCtx = c.ctx - tsoClient := c.getTSOClient() - req.start = time.Now() - req.dcLocation = dcLocation + return c.dispatchTSORequestWithRetry(ctx, dcLocation) +} - if tsoClient == nil { - req.done <- errs.ErrClientGetTSO.FastGenByArgs("tso client is nil") - return req - } +const ( + dispatchRetryDelay = 50 * time.Millisecond + dispatchRetryCount = 2 +) - if err := tsoClient.dispatchRequest(dcLocation, req); err != nil { - // Wait for a while and try again - time.Sleep(50 * time.Millisecond) - if err = tsoClient.dispatchRequest(dcLocation, req); err != nil { - req.done <- err +func (c *client) dispatchTSORequestWithRetry(ctx context.Context, dcLocation string) TSFuture { + var ( + retryable bool + err error + req *tsoRequest + ) + for i := 0; i < dispatchRetryCount; i++ { + // Do not delay for the first time. + if i > 0 { + time.Sleep(dispatchRetryDelay) + } + // Get the tsoClient each time, as it may be initialized or switched during the process. + tsoClient := c.getTSOClient() + if tsoClient == nil { + err = errs.ErrClientGetTSO.FastGenByArgs("tso client is nil") + continue + } + // Get a new request from the pool if it's nil or not from the current pool. + if req == nil || req.pool != tsoClient.tsoReqPool { + req = tsoClient.getTSORequest(ctx, dcLocation) + } + retryable, err = tsoClient.dispatchRequest(req) + if !retryable { + break } } + if err != nil { + if req == nil { + return newTSORequestFastFail(err) + } + req.tryDone(err) + } return req } @@ -879,9 +928,9 @@ func handleRegionResponse(res *pdpb.GetRegionResponse) *Region { return r } -func (c *client) GetRegionFromMember(ctx context.Context, key []byte, memberURLs []string, opts ...GetRegionOption) (*Region, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.GetRegionFromMember", opentracing.ChildOf(span.Context())) +func (c *client) GetRegionFromMember(ctx context.Context, key []byte, memberURLs []string, _ ...GetRegionOption) (*Region, error) { + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.GetRegionFromMember", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() @@ -918,8 +967,8 @@ func (c *client) GetRegionFromMember(ctx context.Context, key []byte, memberURLs } func (c *client) GetRegion(ctx context.Context, key []byte, opts ...GetRegionOption) (*Region, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.GetRegion", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.GetRegion", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() @@ -956,8 +1005,8 @@ func (c *client) GetRegion(ctx context.Context, key []byte, opts ...GetRegionOpt } func (c *client) GetPrevRegion(ctx context.Context, key []byte, opts ...GetRegionOption) (*Region, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.GetPrevRegion", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.GetPrevRegion", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() @@ -994,8 +1043,8 @@ func (c *client) GetPrevRegion(ctx context.Context, key []byte, opts ...GetRegio } func (c *client) GetRegionByID(ctx context.Context, regionID uint64, opts ...GetRegionOption) (*Region, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.GetRegionByID", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.GetRegionByID", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() @@ -1032,8 +1081,8 @@ func (c *client) GetRegionByID(ctx context.Context, regionID uint64, opts ...Get } func (c *client) ScanRegions(ctx context.Context, key, endKey []byte, limit int, opts ...GetRegionOption) ([]*Region, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.ScanRegions", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.ScanRegions", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() @@ -1107,8 +1156,8 @@ func handleRegionsResponse(resp *pdpb.ScanRegionsResponse) []*Region { } func (c *client) GetStore(ctx context.Context, storeID uint64) (*metapb.Store, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.GetStore", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.GetStore", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() @@ -1151,8 +1200,8 @@ func (c *client) GetAllStores(ctx context.Context, opts ...GetStoreOption) ([]*m opt(options) } - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.GetAllStores", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.GetAllStores", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() @@ -1178,8 +1227,8 @@ func (c *client) GetAllStores(ctx context.Context, opts ...GetStoreOption) ([]*m } func (c *client) UpdateGCSafePoint(ctx context.Context, safePoint uint64) (uint64, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.UpdateGCSafePoint", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.UpdateGCSafePoint", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() @@ -1209,8 +1258,8 @@ func (c *client) UpdateGCSafePoint(ctx context.Context, safePoint uint64) (uint6 // determine the safepoint for multiple services, it does not trigger a GC // job. Use UpdateGCSafePoint to trigger the GC job if needed. func (c *client) UpdateServiceGCSafePoint(ctx context.Context, serviceID string, ttl int64, safePoint uint64) (uint64, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.UpdateServiceGCSafePoint", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.UpdateServiceGCSafePoint", opentracing.ChildOf(span.Context())) defer span.Finish() } @@ -1239,8 +1288,8 @@ func (c *client) UpdateServiceGCSafePoint(ctx context.Context, serviceID string, } func (c *client) ScatterRegion(ctx context.Context, regionID uint64) error { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.ScatterRegion", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.ScatterRegion", opentracing.ChildOf(span.Context())) defer span.Finish() } return c.scatterRegionsWithGroup(ctx, regionID, "") @@ -1273,16 +1322,16 @@ func (c *client) scatterRegionsWithGroup(ctx context.Context, regionID uint64, g } func (c *client) ScatterRegions(ctx context.Context, regionsID []uint64, opts ...RegionsOption) (*pdpb.ScatterRegionResponse, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.ScatterRegions", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.ScatterRegions", opentracing.ChildOf(span.Context())) defer span.Finish() } return c.scatterRegionsWithOptions(ctx, regionsID, opts...) } func (c *client) SplitAndScatterRegions(ctx context.Context, splitKeys [][]byte, opts ...RegionsOption) (*pdpb.SplitAndScatterRegionsResponse, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.SplitAndScatterRegions", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.SplitAndScatterRegions", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() @@ -1309,8 +1358,8 @@ func (c *client) SplitAndScatterRegions(ctx context.Context, splitKeys [][]byte, } func (c *client) GetOperator(ctx context.Context, regionID uint64) (*pdpb.GetOperatorResponse, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.GetOperator", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.GetOperator", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() @@ -1332,8 +1381,8 @@ func (c *client) GetOperator(ctx context.Context, regionID uint64) (*pdpb.GetOpe // SplitRegions split regions by given split keys func (c *client) SplitRegions(ctx context.Context, splitKeys [][]byte, opts ...RegionsOption) (*pdpb.SplitRegionsResponse, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.SplitRegions", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.SplitRegions", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() @@ -1396,33 +1445,14 @@ func (c *client) scatterRegionsWithOptions(ctx context.Context, regionsID []uint return resp, nil } -func addrsToUrls(addrs []string) []string { - // Add default schema "http://" to addrs. - urls := make([]string, 0, len(addrs)) - for _, addr := range addrs { - if strings.Contains(addr, "://") { - urls = append(urls, addr) - } else { - urls = append(urls, "http://"+addr) - } - } - return urls -} - -// IsLeaderChange will determine whether there is a leader change. -func IsLeaderChange(err error) bool { - if err == errs.ErrClientTSOStreamClosed { - return true - } - errMsg := err.Error() - return strings.Contains(errMsg, errs.NotLeaderErr) || - strings.Contains(errMsg, errs.MismatchLeaderErr) || - strings.Contains(errMsg, errs.NotServedErr) -} +const ( + httpSchemePrefix = "http://" + httpsSchemePrefix = "https://" +) func trimHTTPPrefix(str string) string { - str = strings.TrimPrefix(str, "http://") - str = strings.TrimPrefix(str, "https://") + str = strings.TrimPrefix(str, httpSchemePrefix) + str = strings.TrimPrefix(str, httpsSchemePrefix) return str } diff --git a/client/client_test.go b/client/client_test.go index fda334a9ef2..76cded79053 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -63,6 +63,12 @@ func TestUpdateURLs(t *testing.T) { re.Equal(getURLs([]*pdpb.Member{members[1], members[3], members[2]}), cli.GetServiceURLs()) cli.updateURLs(members) re.Equal(getURLs([]*pdpb.Member{members[1], members[3], members[2], members[0]}), cli.GetServiceURLs()) + cli.updateURLs(members[1:]) + re.Equal(getURLs([]*pdpb.Member{members[1], members[3], members[2]}), cli.GetServiceURLs()) + cli.updateURLs(members[2:]) + re.Equal(getURLs([]*pdpb.Member{members[3], members[2]}), cli.GetServiceURLs()) + cli.updateURLs(members[3:]) + re.Equal(getURLs([]*pdpb.Member{members[3]}), cli.GetServiceURLs()) } const testClientURL = "tmp://test.url:5255" diff --git a/client/errs/errno.go b/client/errs/errno.go index c095bbe4b4a..0dbcb4fe147 100644 --- a/client/errs/errno.go +++ b/client/errs/errno.go @@ -20,21 +20,20 @@ import ( "github.com/pingcap/errors" ) +// Note: keep the same as the ones defined on the server side to ensure the client can use them correctly. const ( + // NoLeaderErr indicates there is no leader in the cluster currently. + NoLeaderErr = "no leader" // NotLeaderErr indicates the non-leader member received the requests which should be received by leader. - // Note: keep the same as the ones defined on the server side, because the client side checks if an error message - // contains this string to judge whether the leader is changed. - NotLeaderErr = "is not leader" + NotLeaderErr = "not leader" // MismatchLeaderErr indicates the non-leader member received the requests which should be received by leader. - // Note: keep the same as the ones defined on the server side, because the client side checks if an error message - // contains this string to judge whether the leader is changed. MismatchLeaderErr = "mismatch leader id" // NotServedErr indicates an tso node/pod received the requests for the keyspace groups which are not served by it. - // Note: keep the same as the ones defined on the server side, because the client side checks if an error message - // contains this string to judge whether the leader is changed. NotServedErr = "is not served" // RetryTimeoutErr indicates the server is busy. RetryTimeoutErr = "retry timeout" + // NotPrimaryErr indicates the non-primary member received the requests which should be received by primary. + NotPrimaryErr = "not primary" ) // client errors @@ -51,6 +50,7 @@ var ( ErrClientGetClusterInfo = errors.Normalize("get cluster info failed", errors.RFCCodeText("PD:client:ErrClientGetClusterInfo")) ErrClientUpdateMember = errors.Normalize("update member failed, %v", errors.RFCCodeText("PD:client:ErrUpdateMember")) ErrClientNoAvailableMember = errors.Normalize("no available member", errors.RFCCodeText("PD:client:ErrClientNoAvailableMember")) + ErrClientNoTargetMember = errors.Normalize("no target member", errors.RFCCodeText("PD:client:ErrClientNoTargetMember")) ErrClientProtoUnmarshal = errors.Normalize("failed to unmarshal proto", errors.RFCCodeText("PD:proto:ErrClientProtoUnmarshal")) ErrClientGetMultiResponse = errors.Normalize("get invalid value response %v, must only one", errors.RFCCodeText("PD:client:ErrClientGetMultiResponse")) ErrClientGetServingEndpoint = errors.Normalize("get serving endpoint failed", errors.RFCCodeText("PD:client:ErrClientGetServingEndpoint")) @@ -90,7 +90,7 @@ var ( var ( ErrClientListResourceGroup = errors.Normalize("get all resource group failed, %v", errors.RFCCodeText("PD:client:ErrClientListResourceGroup")) ErrClientResourceGroupConfigUnavailable = errors.Normalize("resource group config is unavailable, %v", errors.RFCCodeText("PD:client:ErrClientResourceGroupConfigUnavailable")) - ErrClientResourceGroupThrottled = errors.Normalize("exceeded resource group quota limitation", errors.RFCCodeText("PD:client:ErrClientResourceGroupThrottled")) + ErrClientResourceGroupThrottled = errors.Normalize("exceeded resource group quota limitation, estimated wait time %s, ltb state is %.2f:%.2f", errors.RFCCodeText("PD:client:ErrClientResourceGroupThrottled")) ) // ErrClientGetResourceGroup is the error type for getting resource group. diff --git a/client/errs/errs.go b/client/errs/errs.go index 47f7c29a467..da333efda4c 100644 --- a/client/errs/errs.go +++ b/client/errs/errs.go @@ -15,11 +15,29 @@ package errs import ( + "strings" + "github.com/pingcap/errors" "go.uber.org/zap" "go.uber.org/zap/zapcore" ) +// IsLeaderChange will determine whether there is a leader/primary change. +func IsLeaderChange(err error) bool { + if err == nil { + return false + } + if err == ErrClientTSOStreamClosed { + return true + } + errMsg := err.Error() + return strings.Contains(errMsg, NoLeaderErr) || + strings.Contains(errMsg, NotLeaderErr) || + strings.Contains(errMsg, MismatchLeaderErr) || + strings.Contains(errMsg, NotServedErr) || + strings.Contains(errMsg, NotPrimaryErr) +} + // ZapError is used to make the log output easier. func ZapError(err error, causeError ...error) zap.Field { if err == nil { diff --git a/client/gc_client.go b/client/gc_client.go index fff292405c2..21eb0051499 100644 --- a/client/gc_client.go +++ b/client/gc_client.go @@ -34,8 +34,8 @@ type GCClient interface { // UpdateGCSafePointV2 update gc safe point for the given keyspace. func (c *client) UpdateGCSafePointV2(ctx context.Context, keyspaceID uint32, safePoint uint64) (uint64, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.UpdateGCSafePointV2", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.UpdateGCSafePointV2", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() @@ -63,8 +63,8 @@ func (c *client) UpdateGCSafePointV2(ctx context.Context, keyspaceID uint32, saf // UpdateServiceSafePointV2 update service safe point for the given keyspace. func (c *client) UpdateServiceSafePointV2(ctx context.Context, keyspaceID uint32, serviceID string, ttl int64, safePoint uint64) (uint64, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.UpdateServiceSafePointV2", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.UpdateServiceSafePointV2", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() diff --git a/client/go.mod b/client/go.mod index 6a9d29a3184..6baa2f112f4 100644 --- a/client/go.mod +++ b/client/go.mod @@ -18,7 +18,7 @@ require ( go.uber.org/goleak v1.1.11 go.uber.org/zap v1.24.0 golang.org/x/exp v0.0.0-20230711005742-c3f37128e5a4 - google.golang.org/grpc v1.59.0 + google.golang.org/grpc v1.62.1 google.golang.org/grpc/examples v0.0.0-20231221225426-4f03f3ff32c9 ) @@ -34,11 +34,11 @@ require ( github.com/prometheus/common v0.46.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/net v0.20.0 // indirect - golang.org/x/sys v0.16.0 // indirect + golang.org/x/net v0.23.0 // indirect + golang.org/x/sys v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 // indirect - google.golang.org/protobuf v1.32.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 // indirect + google.golang.org/protobuf v1.33.0 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/client/go.sum b/client/go.sum index a58d351ebcf..54942bb0bb8 100644 --- a/client/go.sum +++ b/client/go.sum @@ -22,8 +22,8 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= @@ -110,8 +110,8 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= -golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= -golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= +golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= +golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -122,8 +122,8 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= -golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= @@ -144,16 +144,16 @@ golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 h1:Jyp0Hsi0bmHXG6k9eATXoYtjd6e2UzZ1SCn/wIupY14= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17/go.mod h1:oQ5rr10WTTMvP4A36n8JpR1OrO1BEiV4f78CneXZxkA= -google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk= -google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 h1:AjyfHzEPEFp/NpvfN5g+KDla3EMojjhRVZc1i7cj+oM= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80/go.mod h1:PAREbraiVEVGVdTZsVWjSbbTtSyGbAgIIvni8a8CD5s= +google.golang.org/grpc v1.62.1 h1:B4n+nfKzOICUXMgyrNd19h/I9oH0L1pizfk1d4zSgTk= +google.golang.org/grpc v1.62.1/go.mod h1:IWTG0VlJLCh1SkC58F7np9ka9mx/WNkjl4PGJaiq+QE= google.golang.org/grpc/examples v0.0.0-20231221225426-4f03f3ff32c9 h1:ATnmU8nL2NfIyTSiBvJVDIDIr3qBmeW+c7z7XU21eWs= google.golang.org/grpc/examples v0.0.0-20231221225426-4f03f3ff32c9/go.mod h1:j5uROIAAgi3YmtiETMt1LW0d/lHqQ7wwrIY4uGRXLQ4= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= -google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/client/grpcutil/grpcutil.go b/client/grpcutil/grpcutil.go index b6be2594b4d..0e987825c02 100644 --- a/client/grpcutil/grpcutil.go +++ b/client/grpcutil/grpcutil.go @@ -27,7 +27,9 @@ import ( "github.com/tikv/pd/client/errs" "go.uber.org/zap" "google.golang.org/grpc" + "google.golang.org/grpc/backoff" "google.golang.org/grpc/credentials" + "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/metadata" ) @@ -53,7 +55,7 @@ const ( // ctx will be noop. Users should call ClientConn.Close to terminate all the // pending operations after this function returns. func GetClientConn(ctx context.Context, addr string, tlsCfg *tls.Config, do ...grpc.DialOption) (*grpc.ClientConn, error) { - opt := grpc.WithInsecure() //nolint + opt := grpc.WithTransportCredentials(insecure.NewCredentials()) if tlsCfg != nil { creds := credentials.NewTLS(tlsCfg) opt = grpc.WithTransportCredentials(creds) @@ -62,7 +64,18 @@ func GetClientConn(ctx context.Context, addr string, tlsCfg *tls.Config, do ...g if err != nil { return nil, errs.ErrURLParse.Wrap(err).GenWithStackByCause() } - cc, err := grpc.DialContext(ctx, u.Host, append(do, opt)...) + // Here we use a shorter MaxDelay to make the connection recover faster. + // The default MaxDelay is 120s, which is too long for us. + backoffOpts := grpc.WithConnectParams(grpc.ConnectParams{ + Backoff: backoff.Config{ + BaseDelay: time.Second, + Multiplier: 1.6, + Jitter: 0.2, + MaxDelay: 3 * time.Second, + }, + }) + do = append(do, opt, backoffOpts) + cc, err := grpc.DialContext(ctx, u.Host, do...) if err != nil { return nil, errs.ErrGRPCDial.Wrap(err).GenWithStackByCause() } @@ -71,8 +84,8 @@ func GetClientConn(ctx context.Context, addr string, tlsCfg *tls.Config, do ...g // BuildForwardContext creates a context with receiver metadata information. // It is used in client side. -func BuildForwardContext(ctx context.Context, addr string) context.Context { - md := metadata.Pairs(ForwardMetadataKey, addr) +func BuildForwardContext(ctx context.Context, url string) context.Context { + md := metadata.Pairs(ForwardMetadataKey, url) return metadata.NewOutgoingContext(ctx, md) } @@ -111,17 +124,17 @@ func getValueFromMetadata(ctx context.Context, key string, f func(context.Contex // GetOrCreateGRPCConn returns the corresponding grpc client connection of the given addr. // Returns the old one if's already existed in the clientConns; otherwise creates a new one and returns it. -func GetOrCreateGRPCConn(ctx context.Context, clientConns *sync.Map, addr string, tlsCfg *tls.Config, opt ...grpc.DialOption) (*grpc.ClientConn, error) { - conn, ok := clientConns.Load(addr) +func GetOrCreateGRPCConn(ctx context.Context, clientConns *sync.Map, url string, tlsCfg *tls.Config, opt ...grpc.DialOption) (*grpc.ClientConn, error) { + conn, ok := clientConns.Load(url) if ok { // TODO: check the connection state. return conn.(*grpc.ClientConn), nil } dCtx, cancel := context.WithTimeout(ctx, dialTimeout) defer cancel() - cc, err := GetClientConn(dCtx, addr, tlsCfg, opt...) + cc, err := GetClientConn(dCtx, url, tlsCfg, opt...) failpoint.Inject("unreachableNetwork2", func(val failpoint.Value) { - if val, ok := val.(string); ok && val == addr { + if val, ok := val.(string); ok && val == url { cc = nil err = errors.Errorf("unreachable network") } @@ -129,7 +142,7 @@ func GetOrCreateGRPCConn(ctx context.Context, clientConns *sync.Map, addr string if err != nil { return nil, err } - conn, loaded := clientConns.LoadOrStore(addr, cc) + conn, loaded := clientConns.LoadOrStore(url, cc) if !loaded { // Successfully stored the connection. return cc, nil diff --git a/client/http/api.go b/client/http/api.go index bd0bf830a1f..3376a48770d 100644 --- a/client/http/api.go +++ b/client/http/api.go @@ -41,6 +41,7 @@ const ( membersPrefix = "/pd/api/v1/members" leaderPrefix = "/pd/api/v1/leader" transferLeader = "/pd/api/v1/leader/transfer" + health = "/pd/api/v1/health" // Config Config = "/pd/api/v1/config" ClusterVersion = "/pd/api/v1/config/cluster-version" @@ -77,8 +78,12 @@ const ( ClusterStatus = "/pd/api/v1/cluster/status" Status = "/pd/api/v1/status" Version = "/pd/api/v1/version" + operators = "/pd/api/v1/operators" // Micro Service microServicePrefix = "/pd/api/v2/ms" + // Keyspace + KeyspaceConfig = "/pd/api/v2/keyspaces/%s/config" + GetKeyspaceMetaByName = "/pd/api/v2/keyspaces/%s" ) // RegionByID returns the path of PD HTTP API to get region by ID. @@ -94,7 +99,7 @@ func RegionByKey(key []byte) string { // RegionsByKeyRange returns the path of PD HTTP API to scan regions with given start key, end key and limit parameters. func RegionsByKeyRange(keyRange *KeyRange, limit int) string { startKeyStr, endKeyStr := keyRange.EscapeAsUTF8Str() - return fmt.Sprintf("%s?start_key=%s&end_key=%s&limit=%d", + return fmt.Sprintf("%s?key=%s&end_key=%s&limit=%d", regionsByKey, startKeyStr, endKeyStr, limit) } @@ -195,3 +200,18 @@ func PProfGoroutineWithDebugLevel(level int) string { func MicroServiceMembers(service string) string { return fmt.Sprintf("%s/members/%s", microServicePrefix, service) } + +// MicroServicePrimary returns the path of PD HTTP API to get the primary of microservice. +func MicroServicePrimary(service string) string { + return fmt.Sprintf("%s/primary/%s", microServicePrefix, service) +} + +// GetUpdateKeyspaceConfigURL returns the path of PD HTTP API to update keyspace config. +func GetUpdateKeyspaceConfigURL(keyspaceName string) string { + return fmt.Sprintf(KeyspaceConfig, keyspaceName) +} + +// GetKeyspaceMetaByNameURL returns the path of PD HTTP API to get keyspace meta by keyspace name. +func GetKeyspaceMetaByNameURL(keyspaceName string) string { + return fmt.Sprintf(GetKeyspaceMetaByName, keyspaceName) +} diff --git a/client/http/client.go b/client/http/client.go index 1235266a271..123ca616422 100644 --- a/client/http/client.go +++ b/client/http/client.go @@ -47,7 +47,7 @@ const ( ) // respHandleFunc is the function to handle the HTTP response. -type respHandleFunc func(resp *http.Response, res interface{}) error +type respHandleFunc func(resp *http.Response, res any) error // clientInner is the inner implementation of the PD HTTP client, which contains some fundamental fields. // It is wrapped by the `client` struct to make sure the inner implementation won't be exposed and could @@ -66,6 +66,8 @@ type clientInner struct { requestCounter *prometheus.CounterVec executionDuration *prometheus.HistogramVec + // defaultSD indicates whether the client is created with the default service discovery. + defaultSD bool } func newClientInner(ctx context.Context, cancel context.CancelFunc, source string) *clientInner { @@ -90,6 +92,10 @@ func (ci *clientInner) close() { if ci.cli != nil { ci.cli.CloseIdleConnections() } + // only close the service discovery if it's created by the client. + if ci.defaultSD && ci.sd != nil { + ci.sd.Close() + } } func (ci *clientInner) reqCounter(name, status string) { @@ -114,23 +120,50 @@ func (ci *clientInner) requestWithRetry( headerOpts ...HeaderOption, ) error { var ( + serverURL string + isLeader bool statusCode int err error + logFields = append(reqInfo.logFields(), zap.String("source", ci.source)) ) execFunc := func() error { + defer func() { + // If the status code is 503, it indicates that there may be PD leader/follower changes. + // If the error message contains the leader/primary change information, it indicates that there may be PD leader/primary change. + if statusCode == http.StatusServiceUnavailable || errs.IsLeaderChange(err) { + ci.sd.ScheduleCheckMemberChanged() + } + log.Debug("[pd] http request finished", append(logFields, + zap.String("server-url", serverURL), + zap.Bool("is-leader", isLeader), + zap.Int("status-code", statusCode), + zap.Error(err))...) + }() // It will try to send the request to the PD leader first and then try to send the request to the other PD followers. clients := ci.sd.GetAllServiceClients() if len(clients) == 0 { return errs.ErrClientNoAvailableMember } + skipNum := 0 for _, cli := range clients { - addr := cli.GetHTTPAddress() - statusCode, err = ci.doRequest(ctx, addr, reqInfo, headerOpts...) + serverURL = cli.GetURL() + isLeader = cli.IsConnectedToLeader() + if len(reqInfo.targetURL) > 0 && reqInfo.targetURL != serverURL { + skipNum++ + continue + } + statusCode, err = ci.doRequest(ctx, serverURL, reqInfo, headerOpts...) if err == nil || noNeedRetry(statusCode) { return err } - log.Debug("[pd] request addr failed", - zap.String("source", ci.source), zap.Bool("is-leader", cli.IsConnectedToLeader()), zap.String("addr", addr), zap.Error(err)) + log.Debug("[pd] http request url failed", append(logFields, + zap.String("server-url", serverURL), + zap.Bool("is-leader", isLeader), + zap.Int("status-code", statusCode), + zap.Error(err))...) + } + if skipNum == len(clients) { + return errs.ErrClientNoTargetMember } return err } @@ -139,10 +172,11 @@ func (ci *clientInner) requestWithRetry( } // Copy a new backoffer for each request. bo := *reqInfo.bo - // Backoffer also needs to check the status code to determine whether to retry. + // Set the retryable checker for the backoffer if it's not set. bo.SetRetryableChecker(func(err error) bool { + // Backoffer also needs to check the status code to determine whether to retry. return err != nil && !noNeedRetry(statusCode) - }) + }, false) return bo.Exec(ctx, execFunc) } @@ -154,26 +188,21 @@ func noNeedRetry(statusCode int) bool { func (ci *clientInner) doRequest( ctx context.Context, - addr string, reqInfo *requestInfo, + serverURL string, reqInfo *requestInfo, headerOpts ...HeaderOption, ) (int, error) { var ( - source = ci.source callerID = reqInfo.callerID name = reqInfo.name - url = reqInfo.getURL(addr) method = reqInfo.method body = reqInfo.body res = reqInfo.res respHandler = reqInfo.respHandler + url = reqInfo.getURL(serverURL) + logFields = append(reqInfo.logFields(), + zap.String("source", ci.source), + zap.String("url", url)) ) - logFields := []zap.Field{ - zap.String("source", source), - zap.String("name", name), - zap.String("url", url), - zap.String("method", method), - zap.String("caller-id", callerID), - } log.Debug("[pd] request the http url", logFields...) req, err := http.NewRequestWithContext(ctx, method, url, bytes.NewBuffer(body)) if err != nil { @@ -214,11 +243,14 @@ func (ci *clientInner) doRequest( if readErr != nil { logFields = append(logFields, zap.NamedError("read-body-error", err)) } else { + // API server will return a JSON body containing the detailed error message + // when the status code is not `http.StatusOK` 200. + bs = bytes.TrimSpace(bs) logFields = append(logFields, zap.ByteString("body", bs)) } log.Error("[pd] request failed with a non-200 status", logFields...) - return resp.StatusCode, errors.Errorf("request pd http api failed with status: '%s'", resp.Status) + return resp.StatusCode, errors.Errorf("request pd http api failed with status: '%s', body: '%s'", resp.Status, bs) } if res == nil { @@ -238,6 +270,7 @@ type client struct { callerID string respHandler respHandleFunc bo *retry.Backoffer + targetURL string } // ClientOption configures the HTTP client. @@ -299,10 +332,12 @@ func NewClient( } sd := pd.NewDefaultPDServiceDiscovery(ctx, cancel, pdAddrs, c.inner.tlsConf) if err := sd.Init(); err != nil { - log.Error("[pd] init service discovery failed", zap.String("source", source), zap.Strings("pd-addrs", pdAddrs), zap.Error(err)) + log.Error("[pd] init service discovery failed", + zap.String("source", source), zap.Strings("pd-addrs", pdAddrs), zap.Error(err)) return nil } c.inner.init(sd) + c.inner.defaultSD = true return c } @@ -321,7 +356,7 @@ func (c *client) WithCallerID(callerID string) Client { // WithRespHandler sets and returns a new client with the given HTTP response handler. func (c *client) WithRespHandler( - handler func(resp *http.Response, res interface{}) error, + handler func(resp *http.Response, res any) error, ) Client { newClient := *c newClient.respHandler = handler @@ -335,6 +370,13 @@ func (c *client) WithBackoffer(bo *retry.Backoffer) Client { return &newClient } +// WithTargetURL sets and returns a new client with the given target URL. +func (c *client) WithTargetURL(targetURL string) Client { + newClient := *c + newClient.targetURL = targetURL + return &newClient +} + // Header key definition constants. const ( pdAllowFollowerHandleKey = "PD-Allow-Follower-Handle" @@ -355,7 +397,8 @@ func (c *client) request(ctx context.Context, reqInfo *requestInfo, headerOpts . return c.inner.requestWithRetry(ctx, reqInfo. WithCallerID(c.callerID). WithRespHandler(c.respHandler). - WithBackoffer(c.bo), + WithBackoffer(c.bo). + WithTargetURL(c.targetURL), headerOpts...) } @@ -375,9 +418,8 @@ func NewHTTPClientWithRequestChecker(checker requestChecker) *http.Client { } } -// newClientWithoutInitServiceDiscovery creates a PD HTTP client -// with the given PD addresses and TLS config without init service discovery. -func newClientWithoutInitServiceDiscovery( +// newClientWithMockServiceDiscovery creates a new PD HTTP client with a mock PD service discovery. +func newClientWithMockServiceDiscovery( source string, pdAddrs []string, opts ...ClientOption, @@ -388,7 +430,12 @@ func newClientWithoutInitServiceDiscovery( for _, opt := range opts { opt(c) } - sd := pd.NewDefaultPDServiceDiscovery(ctx, cancel, pdAddrs, c.inner.tlsConf) + sd := pd.NewMockPDServiceDiscovery(pdAddrs, c.inner.tlsConf) + if err := sd.Init(); err != nil { + log.Error("[pd] init mock service discovery failed", + zap.String("source", source), zap.Strings("pd-addrs", pdAddrs), zap.Error(err)) + return nil + } c.inner.init(sd) return c } diff --git a/client/http/client_test.go b/client/http/client_test.go index 02fce93838e..8769fa53f9a 100644 --- a/client/http/client_test.go +++ b/client/http/client_test.go @@ -22,12 +22,14 @@ import ( "time" "github.com/stretchr/testify/require" + "github.com/tikv/pd/client/errs" "github.com/tikv/pd/client/retry" "go.uber.org/atomic" ) func TestPDAllowFollowerHandleHeader(t *testing.T) { re := require.New(t) + checked := 0 httpClient := NewHTTPClientWithRequestChecker(func(req *http.Request) error { var expectedVal string if req.URL.Path == HotHistory { @@ -38,16 +40,19 @@ func TestPDAllowFollowerHandleHeader(t *testing.T) { re.Failf("PD allow follower handler header check failed", "should be %s, but got %s", expectedVal, val) } + checked++ return nil }) - c := newClientWithoutInitServiceDiscovery("test-header", []string{"http://127.0.0.1"}, WithHTTPClient(httpClient)) + c := newClientWithMockServiceDiscovery("test-header", []string{"http://127.0.0.1"}, WithHTTPClient(httpClient)) + defer c.Close() c.GetRegions(context.Background()) c.GetHistoryHotRegions(context.Background(), &HistoryHotRegionsRequest{}) - c.Close() + re.Equal(2, checked) } -func TestCallerID(t *testing.T) { +func TestWithCallerID(t *testing.T) { re := require.New(t) + checked := 0 expectedVal := atomic.NewString(defaultCallerID) httpClient := NewHTTPClientWithRequestChecker(func(req *http.Request) error { val := req.Header.Get(xCallerIDKey) @@ -56,20 +61,23 @@ func TestCallerID(t *testing.T) { re.Failf("Caller ID header check failed", "should be %s, but got %s", expectedVal, val) } + checked++ return nil }) - c := newClientWithoutInitServiceDiscovery("test-caller-id", []string{"http://127.0.0.1"}, WithHTTPClient(httpClient)) + c := newClientWithMockServiceDiscovery("test-caller-id", []string{"http://127.0.0.1"}, WithHTTPClient(httpClient)) + defer c.Close() c.GetRegions(context.Background()) expectedVal.Store("test") c.WithCallerID(expectedVal.Load()).GetRegions(context.Background()) - c.Close() + re.Equal(2, checked) } func TestWithBackoffer(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - c := newClientWithoutInitServiceDiscovery("test-with-backoffer", []string{"http://127.0.0.1"}) + c := newClientWithMockServiceDiscovery("test-with-backoffer", []string{"http://127.0.0.1"}) + defer c.Close() base := 100 * time.Millisecond max := 500 * time.Millisecond @@ -88,5 +96,17 @@ func TestWithBackoffer(t *testing.T) { _, err = c.WithBackoffer(bo).GetPDVersion(timeoutCtx) re.InDelta(3*time.Second, time.Since(start), float64(250*time.Millisecond)) re.ErrorIs(err, context.DeadlineExceeded) - c.Close() +} + +func TestWithTargetURL(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + c := newClientWithMockServiceDiscovery("test-with-target-url", []string{"http://127.0.0.1", "http://127.0.0.2", "http://127.0.0.3"}) + defer c.Close() + + _, err := c.WithTargetURL("http://127.0.0.4").GetStatus(ctx) + re.ErrorIs(err, errs.ErrClientNoTargetMember) + _, err = c.WithTargetURL("http://127.0.0.2").GetStatus(ctx) + re.ErrorContains(err, "connect: connection refused") } diff --git a/client/http/interface.go b/client/http/interface.go index 7510ea31fcf..3684e19b1f5 100644 --- a/client/http/interface.go +++ b/client/http/interface.go @@ -23,6 +23,7 @@ import ( "strings" "github.com/pingcap/errors" + "github.com/pingcap/kvproto/pkg/keyspacepb" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/tikv/pd/client/retry" @@ -48,16 +49,19 @@ type Client interface { GetRegionStatusByKeyRange(context.Context, *KeyRange, bool) (*RegionStats, error) GetStores(context.Context) (*StoresInfo, error) GetStore(context.Context, uint64) (*StoreInfo, error) + DeleteStore(context.Context, uint64) error SetStoreLabels(context.Context, int64, map[string]string) error + GetHealthStatus(context.Context) ([]Health, error) /* Config-related interfaces */ - GetConfig(context.Context) (map[string]interface{}, error) - SetConfig(context.Context, map[string]interface{}, ...float64) error - GetScheduleConfig(context.Context) (map[string]interface{}, error) - SetScheduleConfig(context.Context, map[string]interface{}) error + GetConfig(context.Context) (map[string]any, error) + SetConfig(context.Context, map[string]any, ...float64) error + GetScheduleConfig(context.Context) (map[string]any, error) + SetScheduleConfig(context.Context, map[string]any) error GetClusterVersion(context.Context) (string, error) GetCluster(context.Context) (*metapb.Cluster, error) GetClusterStatus(context.Context) (*ClusterState, error) - GetReplicateConfig(context.Context) (map[string]interface{}, error) + GetStatus(context.Context) (*State, error) + GetReplicateConfig(context.Context) (map[string]any, error) /* Scheduler-related interfaces */ GetSchedulers(context.Context) ([]string, error) CreateScheduler(ctx context.Context, name string, storeID uint64) error @@ -91,7 +95,18 @@ type Client interface { GetMinResolvedTSByStoresIDs(context.Context, []uint64) (uint64, map[uint64]uint64, error) GetPDVersion(context.Context) (string, error) /* Micro Service interfaces */ - GetMicroServiceMembers(context.Context, string) ([]string, error) + GetMicroServiceMembers(context.Context, string) ([]MicroServiceMember, error) + GetMicroServicePrimary(context.Context, string) (string, error) + DeleteOperators(context.Context) error + + /* Keyspace interface */ + + // UpdateKeyspaceGCManagementType update the `gc_management_type` in keyspace meta config. + // If `gc_management_type` is `global_gc`, it means the current keyspace requires a tidb without 'keyspace-name' + // configured to run a global gc worker to calculate a global gc safe point. + // If `gc_management_type` is `keyspace_level_gc` it means the current keyspace can calculate gc safe point by its own. + UpdateKeyspaceGCManagementType(ctx context.Context, keyspaceName string, keyspaceGCManagementType *KeyspaceGCManagementTypeConfig) error + GetKeyspaceMetaByName(ctx context.Context, keyspaceName string) (*keyspacepb.KeyspaceMeta, error) /* Client-related methods */ // WithCallerID sets and returns a new client with the given caller ID. @@ -100,9 +115,11 @@ type Client interface { // This allows the caller to customize how the response is handled, including error handling logic. // Additionally, it is important for the caller to handle the content of the response body properly // in order to ensure that it can be read and marshaled correctly into `res`. - WithRespHandler(func(resp *http.Response, res interface{}) error) Client + WithRespHandler(func(resp *http.Response, res any) error) Client // WithBackoffer sets and returns a new client with the given backoffer. WithBackoffer(*retry.Backoffer) Client + // WithTargetURL sets and returns a new client with the given target URL. + WithTargetURL(string) Client // Close gracefully closes the HTTP client. Close() } @@ -322,9 +339,23 @@ func (c *client) SetStoreLabels(ctx context.Context, storeID int64, storeLabels WithBody(jsonInput)) } +// GetHealthStatus gets the health status of the cluster. +func (c *client) GetHealthStatus(ctx context.Context) ([]Health, error) { + var healths []Health + err := c.request(ctx, newRequestInfo(). + WithName(getHealthStatusName). + WithURI(health). + WithMethod(http.MethodGet). + WithResp(&healths)) + if err != nil { + return nil, err + } + return healths, nil +} + // GetConfig gets the configurations. -func (c *client) GetConfig(ctx context.Context) (map[string]interface{}, error) { - var config map[string]interface{} +func (c *client) GetConfig(ctx context.Context) (map[string]any, error) { + var config map[string]any err := c.request(ctx, newRequestInfo(). WithName(getConfigName). WithURI(Config). @@ -337,7 +368,7 @@ func (c *client) GetConfig(ctx context.Context) (map[string]interface{}, error) } // SetConfig sets the configurations. ttlSecond is optional. -func (c *client) SetConfig(ctx context.Context, config map[string]interface{}, ttlSecond ...float64) error { +func (c *client) SetConfig(ctx context.Context, config map[string]any, ttlSecond ...float64) error { configJSON, err := json.Marshal(config) if err != nil { return errors.Trace(err) @@ -356,8 +387,8 @@ func (c *client) SetConfig(ctx context.Context, config map[string]interface{}, t } // GetScheduleConfig gets the schedule configurations. -func (c *client) GetScheduleConfig(ctx context.Context) (map[string]interface{}, error) { - var config map[string]interface{} +func (c *client) GetScheduleConfig(ctx context.Context) (map[string]any, error) { + var config map[string]any err := c.request(ctx, newRequestInfo(). WithName(getScheduleConfigName). WithURI(ScheduleConfig). @@ -370,7 +401,7 @@ func (c *client) GetScheduleConfig(ctx context.Context) (map[string]interface{}, } // SetScheduleConfig sets the schedule configurations. -func (c *client) SetScheduleConfig(ctx context.Context, config map[string]interface{}) error { +func (c *client) SetScheduleConfig(ctx context.Context, config map[string]any) error { configJSON, err := json.Marshal(config) if err != nil { return errors.Trace(err) @@ -410,6 +441,14 @@ func (c *client) GetStore(ctx context.Context, storeID uint64) (*StoreInfo, erro return &store, nil } +// DeleteStore deletes the store by ID. +func (c *client) DeleteStore(ctx context.Context, storeID uint64) error { + return c.request(ctx, newRequestInfo(). + WithName(deleteStoreName). + WithURI(StoreByID(storeID)). + WithMethod(http.MethodDelete)) +} + // GetClusterVersion gets the cluster version. func (c *client) GetClusterVersion(ctx context.Context) (string, error) { var version string @@ -452,9 +491,24 @@ func (c *client) GetClusterStatus(ctx context.Context) (*ClusterState, error) { return clusterStatus, nil } +// GetStatus gets the status of PD. +func (c *client) GetStatus(ctx context.Context) (*State, error) { + var status *State + err := c.request(ctx, newRequestInfo(). + WithName(getStatusName). + WithURI(Status). + WithMethod(http.MethodGet). + WithResp(&status), + WithAllowFollowerHandle()) + if err != nil { + return nil, err + } + return status, nil +} + // GetReplicateConfig gets the replication configurations. -func (c *client) GetReplicateConfig(ctx context.Context) (map[string]interface{}, error) { - var config map[string]interface{} +func (c *client) GetReplicateConfig(ctx context.Context) (map[string]any, error) { + var config map[string]any err := c.request(ctx, newRequestInfo(). WithName(getReplicateConfigName). WithURI(ReplicateConfig). @@ -694,7 +748,7 @@ func (c *client) GetSchedulers(ctx context.Context) ([]string, error) { // CreateScheduler creates a scheduler to PD cluster. func (c *client) CreateScheduler(ctx context.Context, name string, storeID uint64) error { - inputJSON, err := json.Marshal(map[string]interface{}{ + inputJSON, err := json.Marshal(map[string]any{ "name": name, "store_id": storeID, }) @@ -854,8 +908,8 @@ func (c *client) GetMinResolvedTSByStoresIDs(ctx context.Context, storeIDs []uin } // GetMicroServiceMembers gets the members of the microservice. -func (c *client) GetMicroServiceMembers(ctx context.Context, service string) ([]string, error) { - var members []string +func (c *client) GetMicroServiceMembers(ctx context.Context, service string) ([]MicroServiceMember, error) { + var members []MicroServiceMember err := c.request(ctx, newRequestInfo(). WithName(getMicroServiceMembersName). WithURI(MicroServiceMembers(service)). @@ -867,6 +921,17 @@ func (c *client) GetMicroServiceMembers(ctx context.Context, service string) ([] return members, nil } +// GetMicroServicePrimary gets the primary of the microservice. +func (c *client) GetMicroServicePrimary(ctx context.Context, service string) (string, error) { + var primary string + err := c.request(ctx, newRequestInfo(). + WithName(getMicroServicePrimaryName). + WithURI(MicroServicePrimary(service)). + WithMethod(http.MethodGet). + WithResp(&primary)) + return primary, err +} + // GetPDVersion gets the release version of the PD binary. func (c *client) GetPDVersion(ctx context.Context) (string, error) { var ver struct { @@ -879,3 +944,56 @@ func (c *client) GetPDVersion(ctx context.Context) (string, error) { WithResp(&ver)) return ver.Version, err } + +// DeleteOperators deletes the running operators. +func (c *client) DeleteOperators(ctx context.Context) error { + return c.request(ctx, newRequestInfo(). + WithName(deleteOperators). + WithURI(operators). + WithMethod(http.MethodDelete)) +} + +// UpdateKeyspaceGCManagementType patches the keyspace config. +func (c *client) UpdateKeyspaceGCManagementType(ctx context.Context, keyspaceName string, keyspaceGCmanagementType *KeyspaceGCManagementTypeConfig) error { + keyspaceConfigPatchJSON, err := json.Marshal(keyspaceGCmanagementType) + if err != nil { + return errors.Trace(err) + } + return c.request(ctx, newRequestInfo(). + WithName(UpdateKeyspaceGCManagementTypeName). + WithURI(GetUpdateKeyspaceConfigURL(keyspaceName)). + WithMethod(http.MethodPatch). + WithBody(keyspaceConfigPatchJSON)) +} + +// GetKeyspaceMetaByName get the given keyspace meta. +func (c *client) GetKeyspaceMetaByName(ctx context.Context, keyspaceName string) (*keyspacepb.KeyspaceMeta, error) { + var ( + tempKeyspaceMeta tempKeyspaceMeta + keyspaceMetaPB keyspacepb.KeyspaceMeta + ) + err := c.request(ctx, newRequestInfo(). + WithName(GetKeyspaceMetaByNameName). + WithURI(GetKeyspaceMetaByNameURL(keyspaceName)). + WithMethod(http.MethodGet). + WithResp(&tempKeyspaceMeta)) + + if err != nil { + return nil, err + } + + keyspaceState, err := stringToKeyspaceState(tempKeyspaceMeta.State) + if err != nil { + return nil, err + } + + keyspaceMetaPB = keyspacepb.KeyspaceMeta{ + Name: tempKeyspaceMeta.Name, + Id: tempKeyspaceMeta.ID, + Config: tempKeyspaceMeta.Config, + CreatedAt: tempKeyspaceMeta.CreatedAt, + StateChangedAt: tempKeyspaceMeta.StateChangedAt, + State: keyspaceState, + } + return &keyspaceMetaPB, nil +} diff --git a/client/http/request_info.go b/client/http/request_info.go index 404d1e657b5..40bd0368250 100644 --- a/client/http/request_info.go +++ b/client/http/request_info.go @@ -18,6 +18,7 @@ import ( "fmt" "github.com/tikv/pd/client/retry" + "go.uber.org/zap" ) // The following constants are the names of the requests. @@ -38,7 +39,9 @@ const ( getRegionStatusByKeyRangeName = "GetRegionStatusByKeyRange" getStoresName = "GetStores" getStoreName = "GetStore" + deleteStoreName = "DeleteStore" setStoreLabelsName = "SetStoreLabels" + getHealthStatusName = "GetHealthStatus" getConfigName = "GetConfig" setConfigName = "SetConfig" getScheduleConfigName = "GetScheduleConfig" @@ -46,6 +49,7 @@ const ( getClusterVersionName = "GetClusterVersion" getClusterName = "GetCluster" getClusterStatusName = "GetClusterStatus" + getStatusName = "GetStatus" getReplicateConfigName = "GetReplicateConfig" getSchedulersName = "GetSchedulers" createSchedulerName = "CreateScheduler" @@ -70,11 +74,15 @@ const ( accelerateScheduleInBatchName = "AccelerateScheduleInBatch" getMinResolvedTSByStoresIDsName = "GetMinResolvedTSByStoresIDs" getMicroServiceMembersName = "GetMicroServiceMembers" + getMicroServicePrimaryName = "GetMicroServicePrimary" getPDVersionName = "GetPDVersion" resetTSName = "ResetTS" resetBaseAllocIDName = "ResetBaseAllocID" setSnapshotRecoveringMarkName = "SetSnapshotRecoveringMark" deleteSnapshotRecoveringMarkName = "DeleteSnapshotRecoveringMark" + deleteOperators = "DeleteOperators" + UpdateKeyspaceGCManagementTypeName = "UpdateKeyspaceGCManagementType" + GetKeyspaceMetaByNameName = "GetKeyspaceMetaByName" ) type requestInfo struct { @@ -83,9 +91,10 @@ type requestInfo struct { uri string method string body []byte - res interface{} + res any respHandler respHandleFunc bo *retry.Backoffer + targetURL string } // newRequestInfo creates a new request info. @@ -124,7 +133,7 @@ func (ri *requestInfo) WithBody(body []byte) *requestInfo { } // WithResp sets the response struct of the request. -func (ri *requestInfo) WithResp(res interface{}) *requestInfo { +func (ri *requestInfo) WithResp(res any) *requestInfo { ri.res = res return ri } @@ -141,6 +150,22 @@ func (ri *requestInfo) WithBackoffer(bo *retry.Backoffer) *requestInfo { return ri } +// WithTargetURL sets the target URL of the request. +func (ri *requestInfo) WithTargetURL(targetURL string) *requestInfo { + ri.targetURL = targetURL + return ri +} + func (ri *requestInfo) getURL(addr string) string { return fmt.Sprintf("%s%s", addr, ri.uri) } + +func (ri *requestInfo) logFields() []zap.Field { + return []zap.Field{ + zap.String("caller-id", ri.callerID), + zap.String("name", ri.name), + zap.String("uri", ri.uri), + zap.String("method", ri.method), + zap.String("target-url", ri.targetURL), + } +} diff --git a/client/http/types.go b/client/http/types.go index 59f9077262b..ab624049436 100644 --- a/client/http/types.go +++ b/client/http/types.go @@ -17,10 +17,12 @@ package http import ( "encoding/hex" "encoding/json" + "fmt" "net/url" "time" "github.com/pingcap/kvproto/pkg/encryptionpb" + "github.com/pingcap/kvproto/pkg/keyspacepb" "github.com/pingcap/kvproto/pkg/pdpb" ) @@ -32,6 +34,15 @@ type ClusterState struct { ReplicationStatus string `json:"replication_status"` } +// State is the status of PD server. +// NOTE: This type sync with https://github.com/tikv/pd/blob/1d77b25656bc18e1f5aa82337d4ab62a34b10087/pkg/versioninfo/versioninfo.go#L29 +type State struct { + BuildTS string `json:"build_ts"` + Version string `json:"version"` + GitHash string `json:"git_hash"` + StartTimestamp int64 `json:"start_timestamp"` +} + // KeyRange defines a range of keys in bytes. type KeyRange struct { startKey []byte @@ -122,11 +133,22 @@ type RegionsInfo struct { Regions []RegionInfo `json:"regions"` } +func newRegionsInfo(count int64) *RegionsInfo { + return &RegionsInfo{ + Count: count, + Regions: make([]RegionInfo, 0, count), + } +} + // Merge merges two RegionsInfo together and returns a new one. func (ri *RegionsInfo) Merge(other *RegionsInfo) *RegionsInfo { - newRegionsInfo := &RegionsInfo{ - Regions: make([]RegionInfo, 0, ri.Count+other.Count), + if ri == nil { + ri = newRegionsInfo(0) + } + if other == nil { + other = newRegionsInfo(0) } + newRegionsInfo := newRegionsInfo(ri.Count + other.Count) m := make(map[int64]RegionInfo, ri.Count+other.Count) for _, region := range ri.Regions { m[region.ID] = region @@ -344,7 +366,7 @@ func (r *Rule) String() string { // Clone returns a copy of Rule. func (r *Rule) Clone() *Rule { var clone Rule - json.Unmarshal([]byte(r.String()), &clone) + _ = json.Unmarshal([]byte(r.String()), &clone) clone.StartKey = append(r.StartKey[:0:0], r.StartKey...) clone.EndKey = append(r.EndKey[:0:0], r.EndKey...) return &clone @@ -575,7 +597,7 @@ type LabelRule struct { Index int `json:"index"` Labels []RegionLabel `json:"labels"` RuleType string `json:"rule_type"` - Data interface{} `json:"data"` + Data any `json:"data"` } // LabelRulePatch is the patch to update the label rules. @@ -592,3 +614,59 @@ type MembersInfo struct { Leader *pdpb.Member `json:"leader,omitempty"` EtcdLeader *pdpb.Member `json:"etcd_leader,omitempty"` } + +// MicroServiceMember is the member info of a micro service. +type MicroServiceMember struct { + ServiceAddr string `json:"service-addr"` + Version string `json:"version"` + GitHash string `json:"git-hash"` + DeployPath string `json:"deploy-path"` + StartTimestamp int64 `json:"start-timestamp"` +} + +// KeyspaceGCManagementType represents parameters needed to modify the gc management type. +// If `gc_management_type` is `global_gc`, it means the current keyspace requires a tidb without 'keyspace-name' +// configured to run a global gc worker to calculate a global gc safe point. +// If `gc_management_type` is `keyspace_level_gc` it means the current keyspace can calculate gc safe point by its own. +type KeyspaceGCManagementType struct { + GCManagementType string `json:"gc_management_type,omitempty"` +} + +// KeyspaceGCManagementTypeConfig represents parameters needed to modify target keyspace's configs. +type KeyspaceGCManagementTypeConfig struct { + Config KeyspaceGCManagementType `json:"config"` +} + +// tempKeyspaceMeta is the keyspace meta struct that returned from the http interface. +type tempKeyspaceMeta struct { + ID uint32 `json:"id"` + Name string `json:"name"` + State string `json:"state"` + CreatedAt int64 `json:"created_at"` + StateChangedAt int64 `json:"state_changed_at"` + Config map[string]string `json:"config"` +} + +func stringToKeyspaceState(str string) (keyspacepb.KeyspaceState, error) { + switch str { + case "ENABLED": + return keyspacepb.KeyspaceState_ENABLED, nil + case "DISABLED": + return keyspacepb.KeyspaceState_DISABLED, nil + case "ARCHIVED": + return keyspacepb.KeyspaceState_ARCHIVED, nil + case "TOMBSTONE": + return keyspacepb.KeyspaceState_TOMBSTONE, nil + default: + return keyspacepb.KeyspaceState(0), fmt.Errorf("invalid KeyspaceState string: %s", str) + } +} + +// Health reflects the cluster's health. +// NOTE: This type is moved from `server/api/health.go`, maybe move them to the same place later. +type Health struct { + Name string `json:"name"` + MemberID uint64 `json:"member_id"` + ClientUrls []string `json:"client_urls"` + Health bool `json:"health"` +} diff --git a/client/http/types_test.go b/client/http/types_test.go index 39c53ae525d..904476ceda1 100644 --- a/client/http/types_test.go +++ b/client/http/types_test.go @@ -23,30 +23,140 @@ import ( func TestMergeRegionsInfo(t *testing.T) { re := require.New(t) - regionsInfo1 := &RegionsInfo{ - Count: 1, - Regions: []RegionInfo{ - { - ID: 1, - StartKey: "", - EndKey: "a", + testCases := []struct { + source *RegionsInfo + target *RegionsInfo + }{ + // Different regions. + { + source: &RegionsInfo{ + Count: 1, + Regions: []RegionInfo{ + { + ID: 1, + StartKey: "", + EndKey: "a", + }, + }, + }, + target: &RegionsInfo{ + Count: 1, + Regions: []RegionInfo{ + { + ID: 2, + StartKey: "a", + EndKey: "", + }, + }, }, }, - } - regionsInfo2 := &RegionsInfo{ - Count: 1, - Regions: []RegionInfo{ - { - ID: 2, - StartKey: "a", - EndKey: "", + // Same region. + { + source: &RegionsInfo{ + Count: 1, + Regions: []RegionInfo{ + { + ID: 1, + StartKey: "", + EndKey: "a", + }, + }, + }, + target: &RegionsInfo{ + Count: 1, + Regions: []RegionInfo{ + { + ID: 1, + StartKey: "", + EndKey: "a", + }, + }, + }, + }, + { + source: &RegionsInfo{ + Count: 1, + Regions: []RegionInfo{ + { + ID: 1, + StartKey: "", + EndKey: "a", + }, + }, + }, + target: nil, + }, + { + source: nil, + target: &RegionsInfo{ + Count: 1, + Regions: []RegionInfo{ + { + ID: 2, + StartKey: "a", + EndKey: "", + }, + }, + }, + }, + { + source: nil, + target: nil, + }, + { + source: &RegionsInfo{ + Count: 1, + Regions: []RegionInfo{ + { + ID: 1, + StartKey: "", + EndKey: "a", + }, + }, + }, + target: newRegionsInfo(0), + }, + { + source: newRegionsInfo(0), + target: &RegionsInfo{ + Count: 1, + Regions: []RegionInfo{ + { + ID: 2, + StartKey: "a", + EndKey: "", + }, + }, }, }, + { + source: newRegionsInfo(0), + target: newRegionsInfo(0), + }, + } + for idx, tc := range testCases { + regionsInfo := tc.source.Merge(tc.target) + if tc.source == nil { + tc.source = newRegionsInfo(0) + } + if tc.target == nil { + tc.target = newRegionsInfo(0) + } + m := make(map[int64]RegionInfo, tc.source.Count+tc.target.Count) + for _, region := range tc.source.Regions { + m[region.ID] = region + } + for _, region := range tc.target.Regions { + m[region.ID] = region + } + mergedCount := len(m) + re.Equal(int64(mergedCount), regionsInfo.Count, "case %d", idx) + re.Len(regionsInfo.Regions, mergedCount, "case %d", idx) + // All regions in source and target should be in the merged result. + for _, region := range append(tc.source.Regions, tc.target.Regions...) { + re.Contains(regionsInfo.Regions, region, "case %d", idx) + } } - regionsInfo := regionsInfo1.Merge(regionsInfo2) - re.Equal(int64(2), regionsInfo.Count) - re.Len(regionsInfo.Regions, 2) - re.Subset(regionsInfo.Regions, append(regionsInfo1.Regions, regionsInfo2.Regions...)) } func TestRuleStartEndKey(t *testing.T) { @@ -198,3 +308,30 @@ func mustMarshalAndUnmarshalRuleOp(re *require.Assertions, ruleOp *RuleOp) *Rule re.NoError(err) return newRuleOp } + +// startKey and endKey are json:"-" which means cannot be Unmarshal from json +// We need to take care of `Clone` method. +func TestRuleKeyClone(t *testing.T) { + re := require.New(t) + r := &Rule{ + StartKey: []byte{1, 2, 3}, + EndKey: []byte{4, 5, 6}, + } + + clone := r.Clone() + // Modify the original rule + r.StartKey[0] = 9 + r.EndKey[0] = 9 + + // The clone should not be affected + re.Equal([]byte{1, 2, 3}, clone.StartKey) + re.Equal([]byte{4, 5, 6}, clone.EndKey) + + // Modify the clone + clone.StartKey[0] = 8 + clone.EndKey[0] = 8 + + // The original rule should not be affected + re.Equal([]byte{9, 2, 3}, r.StartKey) + re.Equal([]byte{9, 5, 6}, r.EndKey) +} diff --git a/client/keyspace_client.go b/client/keyspace_client.go index fedb7452412..e52a4f85f05 100644 --- a/client/keyspace_client.go +++ b/client/keyspace_client.go @@ -21,6 +21,7 @@ import ( "github.com/opentracing/opentracing-go" "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/keyspacepb" + "github.com/tikv/pd/client/errs" ) // KeyspaceClient manages keyspace metadata. @@ -45,8 +46,8 @@ func (c *client) keyspaceClient() keyspacepb.KeyspaceClient { // LoadKeyspace loads and returns target keyspace's metadata. func (c *client) LoadKeyspace(ctx context.Context, name string) (*keyspacepb.KeyspaceMeta, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("keyspaceClient.LoadKeyspace", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("keyspaceClient.LoadKeyspace", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() @@ -56,7 +57,12 @@ func (c *client) LoadKeyspace(ctx context.Context, name string) (*keyspacepb.Key Header: c.requestHeader(), Name: name, } - resp, err := c.keyspaceClient().LoadKeyspace(ctx, req) + protoClient := c.keyspaceClient() + if protoClient == nil { + cancel() + return nil, errs.ErrClientGetProtoClient + } + resp, err := protoClient.LoadKeyspace(ctx, req) cancel() if err != nil { @@ -84,8 +90,8 @@ func (c *client) LoadKeyspace(ctx context.Context, name string) (*keyspacepb.Key // // Updated keyspace meta will be returned. func (c *client) UpdateKeyspaceState(ctx context.Context, id uint32, state keyspacepb.KeyspaceState) (*keyspacepb.KeyspaceMeta, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("keyspaceClient.UpdateKeyspaceState", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("keyspaceClient.UpdateKeyspaceState", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() @@ -96,7 +102,12 @@ func (c *client) UpdateKeyspaceState(ctx context.Context, id uint32, state keysp Id: id, State: state, } - resp, err := c.keyspaceClient().UpdateKeyspaceState(ctx, req) + protoClient := c.keyspaceClient() + if protoClient == nil { + cancel() + return nil, errs.ErrClientGetProtoClient + } + resp, err := protoClient.UpdateKeyspaceState(ctx, req) cancel() if err != nil { @@ -117,14 +128,14 @@ func (c *client) UpdateKeyspaceState(ctx context.Context, id uint32, state keysp // It returns a stream of slices of keyspace metadata. // The first message in stream contains all current keyspaceMeta, // all subsequent messages contains new put events for all keyspaces. -func (c *client) WatchKeyspaces(ctx context.Context) (chan []*keyspacepb.KeyspaceMeta, error) { +func (*client) WatchKeyspaces(context.Context) (chan []*keyspacepb.KeyspaceMeta, error) { return nil, errors.Errorf("WatchKeyspaces unimplemented") } // GetAllKeyspaces get all keyspaces metadata. func (c *client) GetAllKeyspaces(ctx context.Context, startID uint32, limit uint32) ([]*keyspacepb.KeyspaceMeta, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("keyspaceClient.GetAllKeyspaces", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("keyspaceClient.GetAllKeyspaces", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() @@ -135,7 +146,12 @@ func (c *client) GetAllKeyspaces(ctx context.Context, startID uint32, limit uint StartId: startID, Limit: limit, } - resp, err := c.keyspaceClient().GetAllKeyspaces(ctx, req) + protoClient := c.keyspaceClient() + if protoClient == nil { + cancel() + return nil, errs.ErrClientGetProtoClient + } + resp, err := protoClient.GetAllKeyspaces(ctx, req) cancel() if err != nil { diff --git a/client/meta_storage_client.go b/client/meta_storage_client.go index b203fb914d3..fe7e8a33e93 100644 --- a/client/meta_storage_client.go +++ b/client/meta_storage_client.go @@ -110,8 +110,8 @@ func (c *client) Put(ctx context.Context, key, value []byte, opts ...OpOption) ( opt(options) } - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.Put", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.Put", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() @@ -124,7 +124,7 @@ func (c *client) Put(ctx context.Context, key, value []byte, opts ...OpOption) ( Lease: options.lease, PrevKv: options.prevKv, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) + ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderURL()) cli := c.metaStorageClient() if cli == nil { cancel() @@ -148,8 +148,8 @@ func (c *client) Get(ctx context.Context, key []byte, opts ...OpOption) (*meta_s options.rangeEnd = getPrefix(key) } - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.Get", opentracing.ChildOf(span.Context())) + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span = span.Tracer().StartSpan("pdclient.Get", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() @@ -162,7 +162,7 @@ func (c *client) Get(ctx context.Context, key []byte, opts ...OpOption) (*meta_s Limit: options.limit, Revision: options.revision, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) + ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderURL()) cli := c.metaStorageClient() if cli == nil { cancel() diff --git a/client/mock_pd_service_discovery.go b/client/mock_pd_service_discovery.go new file mode 100644 index 00000000000..f1fabd0a1d2 --- /dev/null +++ b/client/mock_pd_service_discovery.go @@ -0,0 +1,74 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pd + +import ( + "crypto/tls" + "sync" + + "google.golang.org/grpc" +) + +var _ ServiceDiscovery = (*mockPDServiceDiscovery)(nil) + +type mockPDServiceDiscovery struct { + urls []string + tlsCfg *tls.Config + clients []ServiceClient +} + +// NewMockPDServiceDiscovery creates a mock PD service discovery. +func NewMockPDServiceDiscovery(urls []string, tlsCfg *tls.Config) *mockPDServiceDiscovery { + return &mockPDServiceDiscovery{ + urls: urls, + tlsCfg: tlsCfg, + } +} + +// Init directly creates the service clients with the given URLs. +func (m *mockPDServiceDiscovery) Init() error { + m.clients = make([]ServiceClient, 0, len(m.urls)) + for _, url := range m.urls { + m.clients = append(m.clients, newPDServiceClient(url, m.urls[0], nil, false)) + } + return nil +} + +// Close clears the service clients. +func (m *mockPDServiceDiscovery) Close() { + clear(m.clients) +} + +// GetAllServiceClients returns all service clients init in the mock PD service discovery. +func (m *mockPDServiceDiscovery) GetAllServiceClients() []ServiceClient { + return m.clients +} + +func (*mockPDServiceDiscovery) GetClusterID() uint64 { return 0 } +func (*mockPDServiceDiscovery) GetKeyspaceID() uint32 { return 0 } +func (*mockPDServiceDiscovery) GetKeyspaceGroupID() uint32 { return 0 } +func (*mockPDServiceDiscovery) GetServiceURLs() []string { return nil } +func (*mockPDServiceDiscovery) GetServingEndpointClientConn() *grpc.ClientConn { return nil } +func (*mockPDServiceDiscovery) GetClientConns() *sync.Map { return nil } +func (*mockPDServiceDiscovery) GetServingURL() string { return "" } +func (*mockPDServiceDiscovery) GetBackupURLs() []string { return nil } +func (*mockPDServiceDiscovery) GetServiceClient() ServiceClient { return nil } +func (*mockPDServiceDiscovery) GetOrCreateGRPCConn(string) (*grpc.ClientConn, error) { + return nil, nil +} +func (*mockPDServiceDiscovery) ScheduleCheckMemberChanged() {} +func (*mockPDServiceDiscovery) CheckMemberChanged() error { return nil } +func (*mockPDServiceDiscovery) AddServingURLSwitchedCallback(...func()) {} +func (*mockPDServiceDiscovery) AddServiceURLsSwitchedCallback(...func()) {} diff --git a/client/pd_service_discovery.go b/client/pd_service_discovery.go index 15ea9cadb46..9378ed278e0 100644 --- a/client/pd_service_discovery.go +++ b/client/pd_service_discovery.go @@ -17,7 +17,7 @@ package pd import ( "context" "crypto/tls" - "fmt" + "net/url" "reflect" "sort" "strings" @@ -87,15 +87,15 @@ type ServiceDiscovery interface { // which is the leader in a quorum-based cluster or the primary in a primary/secondary // configured cluster. GetServingEndpointClientConn() *grpc.ClientConn - // GetClientConns returns the mapping {addr -> a gRPC connection} + // GetClientConns returns the mapping {URL -> a gRPC connection} GetClientConns() *sync.Map - // GetServingAddr returns the serving endpoint which is the leader in a quorum-based cluster + // GetServingURL returns the serving endpoint which is the leader in a quorum-based cluster // or the primary in a primary/secondary configured cluster. - GetServingAddr() string - // GetBackupAddrs gets the addresses of the current reachable backup service + GetServingURL() string + // GetBackupURLs gets the URLs of the current reachable backup service // endpoints. Backup service endpoints are followers in a quorum-based cluster or // secondaries in a primary/secondary configured cluster. - GetBackupAddrs() []string + GetBackupURLs() []string // GetServiceClient tries to get the leader/primary ServiceClient. // If the leader ServiceClient meets network problem, // it returns a follower/secondary ServiceClient which can forward the request to leader. @@ -103,8 +103,8 @@ type ServiceDiscovery interface { // GetAllServiceClients tries to get all ServiceClient. // If the leader is not nil, it will put the leader service client first in the slice. GetAllServiceClients() []ServiceClient - // GetOrCreateGRPCConn returns the corresponding grpc client connection of the given addr - GetOrCreateGRPCConn(addr string) (*grpc.ClientConn, error) + // GetOrCreateGRPCConn returns the corresponding grpc client connection of the given url. + GetOrCreateGRPCConn(url string) (*grpc.ClientConn, error) // ScheduleCheckMemberChanged is used to trigger a check to see if there is any membership change // among the leader/followers in a quorum-based cluster or among the primary/secondaries in a // primary/secondary configured cluster. @@ -112,23 +112,22 @@ type ServiceDiscovery interface { // CheckMemberChanged immediately check if there is any membership change among the leader/followers // in a quorum-based cluster or among the primary/secondaries in a primary/secondary configured cluster. CheckMemberChanged() error - // AddServingAddrSwitchedCallback adds callbacks which will be called when the leader + // AddServingURLSwitchedCallback adds callbacks which will be called when the leader // in a quorum-based cluster or the primary in a primary/secondary configured cluster // is switched. - AddServingAddrSwitchedCallback(callbacks ...func()) - // AddServiceAddrsSwitchedCallback adds callbacks which will be called when any leader/follower + AddServingURLSwitchedCallback(callbacks ...func()) + // AddServiceURLsSwitchedCallback adds callbacks which will be called when any leader/follower // in a quorum-based cluster or any primary/secondary in a primary/secondary configured cluster // is changed. - AddServiceAddrsSwitchedCallback(callbacks ...func()) + AddServiceURLsSwitchedCallback(callbacks ...func()) } // ServiceClient is an interface that defines a set of operations for a raw PD gRPC client to specific PD server. type ServiceClient interface { - // GetAddress returns the address information of the PD server. - GetAddress() string - // GetHTTPAddress returns the address with HTTP scheme of the PD server. - GetHTTPAddress() string - // GetClientConn returns the gRPC connection of the service client + // GetURL returns the client url of the PD/etcd server. + GetURL() string + // GetClientConn returns the gRPC connection of the service client. + // It returns nil if the connection is not available. GetClientConn() *grpc.ClientConn // BuildGRPCTargetContext builds a context object with a gRPC context. // ctx: the original context object. @@ -149,43 +148,23 @@ var ( ) type pdServiceClient struct { - addr string - httpAddress string - conn *grpc.ClientConn - isLeader bool - leaderAddr string + url string + conn *grpc.ClientConn + isLeader bool + leaderURL string networkFailure atomic.Bool } -func newPDServiceClient(addr, leaderAddr string, tlsCfg *tls.Config, conn *grpc.ClientConn, isLeader bool) ServiceClient { - var httpAddress string - if tlsCfg == nil { - if strings.HasPrefix(addr, httpsScheme) { - addr = strings.TrimPrefix(addr, httpsScheme) - httpAddress = fmt.Sprintf("%s%s", httpScheme, addr) - } else if strings.HasPrefix(addr, httpScheme) { - httpAddress = addr - } else { - httpAddress = fmt.Sprintf("%s://%s", httpScheme, addr) - } - } else { - if strings.HasPrefix(addr, httpsScheme) { - httpAddress = addr - } else if strings.HasPrefix(addr, httpScheme) { - addr = strings.TrimPrefix(addr, httpScheme) - httpAddress = fmt.Sprintf("%s%s", httpsScheme, addr) - } else { - httpAddress = fmt.Sprintf("%s://%s", httpsScheme, addr) - } - } - +// NOTE: In the current implementation, the URL passed in is bound to have a scheme, +// because it is processed in `newPDServiceDiscovery`, and the url returned by etcd member owns the sheme. +// When testing, the URL is also bound to have a scheme. +func newPDServiceClient(url, leaderURL string, conn *grpc.ClientConn, isLeader bool) ServiceClient { cli := &pdServiceClient{ - addr: addr, - httpAddress: httpAddress, - conn: conn, - isLeader: isLeader, - leaderAddr: leaderAddr, + url: url, + conn: conn, + isLeader: isLeader, + leaderURL: leaderURL, } if conn == nil { cli.networkFailure.Store(true) @@ -193,20 +172,12 @@ func newPDServiceClient(addr, leaderAddr string, tlsCfg *tls.Config, conn *grpc. return cli } -// GetAddress implements ServiceClient. -func (c *pdServiceClient) GetAddress() string { - if c == nil { - return "" - } - return c.addr -} - -// GetHTTPAddress implements ServiceClient. -func (c *pdServiceClient) GetHTTPAddress() string { +// GetURL implements ServiceClient. +func (c *pdServiceClient) GetURL() string { if c == nil { return "" } - return c.httpAddress + return c.url } // BuildGRPCTargetContext implements ServiceClient. @@ -215,7 +186,7 @@ func (c *pdServiceClient) BuildGRPCTargetContext(ctx context.Context, toLeader b return ctx } if toLeader { - return grpcutil.BuildForwardContext(ctx, c.leaderAddr) + return grpcutil.BuildForwardContext(ctx, c.leaderURL) } return grpcutil.BuildFollowerHandleContext(ctx) } @@ -243,7 +214,7 @@ func (c *pdServiceClient) checkNetworkAvailable(ctx context.Context) { healthCli := healthpb.NewHealthClient(c.conn) resp, err := healthCli.Check(ctx, &healthpb.HealthCheckRequest{Service: ""}) failpoint.Inject("unreachableNetwork1", func(val failpoint.Value) { - if val, ok := val.(string); (ok && val == c.GetAddress()) || !ok { + if val, ok := val.(string); (ok && val == c.GetURL()) || !ok { resp = nil err = status.New(codes.Unavailable, "unavailable").Err() } @@ -276,9 +247,9 @@ func (c *pdServiceClient) NeedRetry(pdErr *pdpb.Error, err error) bool { return !(err == nil && pdErr == nil) } -type errFn func(pdErr *pdpb.Error) bool +type errFn func(*pdpb.Error) bool -func emptyErrorFn(pdErr *pdpb.Error) bool { +func emptyErrorFn(*pdpb.Error) bool { return false } @@ -412,20 +383,22 @@ func (c *pdServiceBalancer) get() (ret ServiceClient) { } type updateKeyspaceIDFunc func() error -type tsoLocalServAddrsUpdatedFunc func(map[string]string) error -type tsoGlobalServAddrUpdatedFunc func(string) error +type tsoLocalServURLsUpdatedFunc func(map[string]string) error +type tsoGlobalServURLUpdatedFunc func(string) error type tsoAllocatorEventSource interface { - // SetTSOLocalServAddrsUpdatedCallback adds a callback which will be called when the local tso + // SetTSOLocalServURLsUpdatedCallback adds a callback which will be called when the local tso // allocator leader list is updated. - SetTSOLocalServAddrsUpdatedCallback(callback tsoLocalServAddrsUpdatedFunc) - // SetTSOGlobalServAddrUpdatedCallback adds a callback which will be called when the global tso + SetTSOLocalServURLsUpdatedCallback(callback tsoLocalServURLsUpdatedFunc) + // SetTSOGlobalServURLUpdatedCallback adds a callback which will be called when the global tso // allocator leader is updated. - SetTSOGlobalServAddrUpdatedCallback(callback tsoGlobalServAddrUpdatedFunc) + SetTSOGlobalServURLUpdatedCallback(callback tsoGlobalServURLUpdatedFunc) } -var _ ServiceDiscovery = (*pdServiceDiscovery)(nil) -var _ tsoAllocatorEventSource = (*pdServiceDiscovery)(nil) +var ( + _ ServiceDiscovery = (*pdServiceDiscovery)(nil) + _ tsoAllocatorEventSource = (*pdServiceDiscovery)(nil) +) // pdServiceDiscovery is the service discovery client of PD/API service which is quorum based type pdServiceDiscovery struct { @@ -440,10 +413,10 @@ type pdServiceDiscovery struct { all atomic.Value // Store as []pdServiceClient apiCandidateNodes [apiKindCount]*pdServiceBalancer // PD follower URLs. Only for tso. - followerAddresses atomic.Value // Store as []string + followerURLs atomic.Value // Store as []string clusterID uint64 - // addr -> a gRPC connection + // url -> a gRPC connection clientConns sync.Map // Store as map[string]*grpc.ClientConn // serviceModeUpdateCb will be called when the service mode gets updated @@ -454,11 +427,11 @@ type pdServiceDiscovery struct { // leader and followers membersChangedCbs []func() // tsoLocalAllocLeadersUpdatedCb will be called when the local tso allocator - // leader list is updated. The input is a map {DC Location -> Leader Addr} - tsoLocalAllocLeadersUpdatedCb tsoLocalServAddrsUpdatedFunc + // leader list is updated. The input is a map {DC Location -> Leader URL} + tsoLocalAllocLeadersUpdatedCb tsoLocalServURLsUpdatedFunc // tsoGlobalAllocLeaderUpdatedCb will be called when the global tso allocator // leader is updated. - tsoGlobalAllocLeaderUpdatedCb tsoGlobalServAddrUpdatedFunc + tsoGlobalAllocLeaderUpdatedCb tsoGlobalServURLUpdatedFunc checkMembershipCh chan struct{} @@ -480,18 +453,7 @@ func NewDefaultPDServiceDiscovery( urls []string, tlsCfg *tls.Config, ) *pdServiceDiscovery { var wg sync.WaitGroup - pdsd := &pdServiceDiscovery{ - checkMembershipCh: make(chan struct{}, 1), - ctx: ctx, - cancel: cancel, - wg: &wg, - apiCandidateNodes: [apiKindCount]*pdServiceBalancer{newPDServiceBalancer(emptyErrorFn), newPDServiceBalancer(regionAPIErrorFn)}, - keyspaceID: defaultKeyspaceID, - tlsCfg: tlsCfg, - option: newOption(), - } - pdsd.urls.Store(urls) - return pdsd + return newPDServiceDiscovery(ctx, cancel, &wg, nil, nil, defaultKeyspaceID, urls, tlsCfg, newOption()) } // newPDServiceDiscovery returns a new PD service discovery-based client. @@ -515,6 +477,7 @@ func newPDServiceDiscovery( tlsCfg: tlsCfg, option: option, } + urls = addrsToURLs(urls, tlsCfg) pdsd.urls.Store(urls) return pdsd } @@ -655,7 +618,7 @@ func (c *pdServiceDiscovery) checkLeaderHealth(ctx context.Context) { } func (c *pdServiceDiscovery) checkFollowerHealth(ctx context.Context) { - c.followers.Range(func(key, value any) bool { + c.followers.Range(func(_, value any) bool { // To ensure that the leader's healthy check is not delayed, shorten the duration. ctx, cancel := context.WithTimeout(ctx, MemberHealthCheckInterval/3) defer cancel() @@ -672,7 +635,7 @@ func (c *pdServiceDiscovery) checkFollowerHealth(ctx context.Context) { func (c *pdServiceDiscovery) Close() { c.closeOnce.Do(func() { log.Info("[pd] close pd service discovery client") - c.clientConns.Range(func(key, cc interface{}) bool { + c.clientConns.Range(func(key, cc any) bool { if err := cc.(*grpc.ClientConn).Close(); err != nil { log.Error("[pd] failed to close grpc clientConn", errs.ZapError(errs.ErrCloseGRPCConn, err)) } @@ -698,7 +661,7 @@ func (c *pdServiceDiscovery) SetKeyspaceID(keyspaceID uint32) { } // GetKeyspaceGroupID returns the ID of the keyspace group -func (c *pdServiceDiscovery) GetKeyspaceGroupID() uint32 { +func (*pdServiceDiscovery) GetKeyspaceGroupID() uint32 { // PD/API service only supports the default keyspace group return defaultKeySpaceGroupID } @@ -709,17 +672,17 @@ func (c *pdServiceDiscovery) discoverMicroservice(svcType serviceType) (urls []s case apiService: urls = c.GetServiceURLs() case tsoService: - leaderAddr := c.getLeaderAddr() - if len(leaderAddr) > 0 { - clusterInfo, err := c.getClusterInfo(c.ctx, leaderAddr, c.option.timeout) + leaderURL := c.getLeaderURL() + if len(leaderURL) > 0 { + clusterInfo, err := c.getClusterInfo(c.ctx, leaderURL, c.option.timeout) if err != nil { log.Error("[pd] failed to get cluster info", - zap.String("leader-addr", leaderAddr), errs.ZapError(err)) + zap.String("leader-url", leaderURL), errs.ZapError(err)) return nil, err } urls = clusterInfo.TsoUrls } else { - err = errors.New("failed to get leader addr") + err = errors.New("failed to get leader url") return nil, err } default: @@ -739,26 +702,26 @@ func (c *pdServiceDiscovery) GetServiceURLs() []string { // which is the leader in a quorum-based cluster or the primary in a primary/secondary // configured cluster. func (c *pdServiceDiscovery) GetServingEndpointClientConn() *grpc.ClientConn { - if cc, ok := c.clientConns.Load(c.getLeaderAddr()); ok { + if cc, ok := c.clientConns.Load(c.getLeaderURL()); ok { return cc.(*grpc.ClientConn) } return nil } -// GetClientConns returns the mapping {addr -> a gRPC connection} +// GetClientConns returns the mapping {URL -> a gRPC connection} func (c *pdServiceDiscovery) GetClientConns() *sync.Map { return &c.clientConns } -// GetServingAddr returns the leader address -func (c *pdServiceDiscovery) GetServingAddr() string { - return c.getLeaderAddr() +// GetServingURL returns the leader url +func (c *pdServiceDiscovery) GetServingURL() string { + return c.getLeaderURL() } -// GetBackupAddrs gets the addresses of the current reachable followers +// GetBackupURLs gets the URLs of the current reachable followers // in a quorum-based cluster. Used for tso currently. -func (c *pdServiceDiscovery) GetBackupAddrs() []string { - return c.getFollowerAddrs() +func (c *pdServiceDiscovery) GetBackupURLs() []string { + return c.getFollowerURLs() } // getLeaderServiceClient returns the leader ServiceClient. @@ -784,7 +747,7 @@ func (c *pdServiceDiscovery) GetServiceClient() ServiceClient { leaderClient := c.getLeaderServiceClient() if c.option.enableForwarding && !leaderClient.Available() { if followerClient := c.getServiceClientByKind(forwardAPIKind); followerClient != nil { - log.Debug("[pd] use follower client", zap.String("addr", followerClient.GetAddress())) + log.Debug("[pd] use follower client", zap.String("url", followerClient.GetURL())) return followerClient } } @@ -794,7 +757,7 @@ func (c *pdServiceDiscovery) GetServiceClient() ServiceClient { return leaderClient } -// GetAllServiceClients implments ServiceDiscovery +// GetAllServiceClients implements ServiceDiscovery func (c *pdServiceDiscovery) GetAllServiceClients() []ServiceClient { all := c.all.Load() if all == nil { @@ -819,46 +782,48 @@ func (c *pdServiceDiscovery) CheckMemberChanged() error { return c.updateMember() } -// AddServingAddrSwitchedCallback adds callbacks which will be called +// AddServingURLSwitchedCallback adds callbacks which will be called // when the leader is switched. -func (c *pdServiceDiscovery) AddServingAddrSwitchedCallback(callbacks ...func()) { +func (c *pdServiceDiscovery) AddServingURLSwitchedCallback(callbacks ...func()) { c.leaderSwitchedCbs = append(c.leaderSwitchedCbs, callbacks...) } -// AddServiceAddrsSwitchedCallback adds callbacks which will be called when +// AddServiceURLsSwitchedCallback adds callbacks which will be called when // any leader/follower is changed. -func (c *pdServiceDiscovery) AddServiceAddrsSwitchedCallback(callbacks ...func()) { +func (c *pdServiceDiscovery) AddServiceURLsSwitchedCallback(callbacks ...func()) { c.membersChangedCbs = append(c.membersChangedCbs, callbacks...) } -// SetTSOLocalServAddrsUpdatedCallback adds a callback which will be called when the local tso +// SetTSOLocalServURLsUpdatedCallback adds a callback which will be called when the local tso // allocator leader list is updated. -func (c *pdServiceDiscovery) SetTSOLocalServAddrsUpdatedCallback(callback tsoLocalServAddrsUpdatedFunc) { +func (c *pdServiceDiscovery) SetTSOLocalServURLsUpdatedCallback(callback tsoLocalServURLsUpdatedFunc) { c.tsoLocalAllocLeadersUpdatedCb = callback } -// SetTSOGlobalServAddrUpdatedCallback adds a callback which will be called when the global tso +// SetTSOGlobalServURLUpdatedCallback adds a callback which will be called when the global tso // allocator leader is updated. -func (c *pdServiceDiscovery) SetTSOGlobalServAddrUpdatedCallback(callback tsoGlobalServAddrUpdatedFunc) { - addr := c.getLeaderAddr() - if len(addr) > 0 { - callback(addr) +func (c *pdServiceDiscovery) SetTSOGlobalServURLUpdatedCallback(callback tsoGlobalServURLUpdatedFunc) { + url := c.getLeaderURL() + if len(url) > 0 { + if err := callback(url); err != nil { + log.Error("[tso] failed to call back when tso global service url update", zap.String("url", url), errs.ZapError(err)) + } } c.tsoGlobalAllocLeaderUpdatedCb = callback } -// getLeaderAddr returns the leader address. -func (c *pdServiceDiscovery) getLeaderAddr() string { - return c.getLeaderServiceClient().GetAddress() +// getLeaderURL returns the leader URL. +func (c *pdServiceDiscovery) getLeaderURL() string { + return c.getLeaderServiceClient().GetURL() } -// getFollowerAddrs returns the follower address. -func (c *pdServiceDiscovery) getFollowerAddrs() []string { - followerAddrs := c.followerAddresses.Load() - if followerAddrs == nil { +// getFollowerURLs returns the follower URLs. +func (c *pdServiceDiscovery) getFollowerURLs() []string { + followerURLs := c.followerURLs.Load() + if followerURLs == nil { return []string{} } - return followerAddrs.([]string) + return followerURLs.([]string) } func (c *pdServiceDiscovery) initClusterID() error { @@ -892,18 +857,20 @@ func (c *pdServiceDiscovery) initClusterID() error { } func (c *pdServiceDiscovery) checkServiceModeChanged() error { - leaderAddr := c.getLeaderAddr() - if len(leaderAddr) == 0 { + leaderURL := c.getLeaderURL() + if len(leaderURL) == 0 { return errors.New("no leader found") } - clusterInfo, err := c.getClusterInfo(c.ctx, leaderAddr, c.option.timeout) + clusterInfo, err := c.getClusterInfo(c.ctx, leaderURL, c.option.timeout) if err != nil { if strings.Contains(err.Error(), "Unimplemented") { // If the method is not supported, we set it to pd mode. // TODO: it's a hack way to solve the compatibility issue. // we need to remove this after all maintained version supports the method. - c.serviceModeUpdateCb(pdpb.ServiceMode_PD_SVC_MODE) + if c.serviceModeUpdateCb != nil { + c.serviceModeUpdateCb(pdpb.ServiceMode_PD_SVC_MODE) + } return nil } return err @@ -934,7 +901,7 @@ func (c *pdServiceDiscovery) updateMember() error { var errTSO error if err == nil { if members.GetLeader() == nil || len(members.GetLeader().GetClientUrls()) == 0 { - err = errs.ErrClientGetLeader.FastGenByArgs("leader address doesn't exist") + err = errs.ErrClientGetLeader.FastGenByArgs("leader url doesn't exist") } // Still need to update TsoAllocatorLeaders, even if there is no PD leader errTSO = c.switchTSOAllocatorLeaders(members.GetTsoAllocatorLeaders()) @@ -942,8 +909,8 @@ func (c *pdServiceDiscovery) updateMember() error { // Failed to get members if err != nil { - log.Info("[pd] cannot update member from this address", - zap.String("address", url), + log.Info("[pd] cannot update member from this url", + zap.String("url", url), errs.ZapError(err)) select { case <-c.ctx.Done(): @@ -1026,68 +993,67 @@ func (c *pdServiceDiscovery) updateURLs(members []*pdpb.Member) { log.Info("[pd] update member urls", zap.Strings("old-urls", oldURLs), zap.Strings("new-urls", urls)) } -func (c *pdServiceDiscovery) switchLeader(addrs []string) (bool, error) { - // FIXME: How to safely compare leader urls? For now, only allows one client url. - addr := addrs[0] +func (c *pdServiceDiscovery) switchLeader(url string) (bool, error) { oldLeader := c.getLeaderServiceClient() - if addr == oldLeader.GetAddress() && oldLeader.GetClientConn() != nil { + if url == oldLeader.GetURL() && oldLeader.GetClientConn() != nil { return false, nil } - newConn, err := c.GetOrCreateGRPCConn(addr) + newConn, err := c.GetOrCreateGRPCConn(url) // If gRPC connect is created successfully or leader is new, still saves. - if addr != oldLeader.GetAddress() || newConn != nil { + if url != oldLeader.GetURL() || newConn != nil { // Set PD leader and Global TSO Allocator (which is also the PD leader) - leaderClient := newPDServiceClient(addr, addr, c.tlsCfg, newConn, true) + leaderClient := newPDServiceClient(url, url, newConn, true) c.leader.Store(leaderClient) } // Run callbacks if c.tsoGlobalAllocLeaderUpdatedCb != nil { - if err := c.tsoGlobalAllocLeaderUpdatedCb(addr); err != nil { + if err := c.tsoGlobalAllocLeaderUpdatedCb(url); err != nil { return true, err } } for _, cb := range c.leaderSwitchedCbs { cb() } - log.Info("[pd] switch leader", zap.String("new-leader", addr), zap.String("old-leader", oldLeader.GetAddress())) + log.Info("[pd] switch leader", zap.String("new-leader", url), zap.String("old-leader", oldLeader.GetURL())) return true, err } -func (c *pdServiceDiscovery) updateFollowers(members []*pdpb.Member, leader *pdpb.Member) (changed bool) { +func (c *pdServiceDiscovery) updateFollowers(members []*pdpb.Member, leaderID uint64, leaderURL string) (changed bool) { followers := make(map[string]*pdServiceClient) c.followers.Range(func(key, value any) bool { followers[key.(string)] = value.(*pdServiceClient) return true }) - var followerAddrs []string + var followerURLs []string for _, member := range members { - if member.GetMemberId() != leader.GetMemberId() { + if member.GetMemberId() != leaderID { if len(member.GetClientUrls()) > 0 { - followerAddrs = append(followerAddrs, member.GetClientUrls()...) + // Now we don't apply ServiceClient for TSO Follower Proxy, so just keep the all URLs. + followerURLs = append(followerURLs, member.GetClientUrls()...) // FIXME: How to safely compare urls(also for leader)? For now, only allows one client url. - addr := member.GetClientUrls()[0] - if client, ok := c.followers.Load(addr); ok { + url := pickMatchedURL(member.GetClientUrls(), c.tlsCfg) + if client, ok := c.followers.Load(url); ok { if client.(*pdServiceClient).GetClientConn() == nil { - conn, err := c.GetOrCreateGRPCConn(addr) + conn, err := c.GetOrCreateGRPCConn(url) if err != nil || conn == nil { - log.Warn("[pd] failed to connect follower", zap.String("follower", addr), errs.ZapError(err)) + log.Warn("[pd] failed to connect follower", zap.String("follower", url), errs.ZapError(err)) continue } - follower := newPDServiceClient(addr, leader.GetClientUrls()[0], c.tlsCfg, conn, false) - c.followers.Store(addr, follower) + follower := newPDServiceClient(url, leaderURL, conn, false) + c.followers.Store(url, follower) changed = true } - delete(followers, addr) + delete(followers, url) } else { changed = true - conn, err := c.GetOrCreateGRPCConn(addr) - follower := newPDServiceClient(addr, leader.GetClientUrls()[0], c.tlsCfg, conn, false) + conn, err := c.GetOrCreateGRPCConn(url) + follower := newPDServiceClient(url, leaderURL, conn, false) if err != nil || conn == nil { - log.Warn("[pd] failed to connect follower", zap.String("follower", addr), errs.ZapError(err)) + log.Warn("[pd] failed to connect follower", zap.String("follower", url), errs.ZapError(err)) } - c.followers.LoadOrStore(addr, follower) + c.followers.LoadOrStore(url, follower) } } } @@ -1098,13 +1064,15 @@ func (c *pdServiceDiscovery) updateFollowers(members []*pdpb.Member, leader *pdp c.followers.Delete(key) } } - c.followerAddresses.Store(followerAddrs) + c.followerURLs.Store(followerURLs) return } func (c *pdServiceDiscovery) updateServiceClient(members []*pdpb.Member, leader *pdpb.Member) error { - leaderChanged, err := c.switchLeader(leader.GetClientUrls()) - followerChanged := c.updateFollowers(members, leader) + // FIXME: How to safely compare leader urls? For now, only allows one client url. + leaderURL := pickMatchedURL(leader.GetClientUrls(), c.tlsCfg) + leaderChanged, err := c.switchLeader(leaderURL) + followerChanged := c.updateFollowers(members, leader.GetMemberId(), leaderURL) // don't need to recreate balancer if no changess. if !followerChanged && !leaderChanged { return err @@ -1151,7 +1119,55 @@ func (c *pdServiceDiscovery) switchTSOAllocatorLeaders(allocatorMap map[string]* return nil } -// GetOrCreateGRPCConn returns the corresponding grpc client connection of the given addr -func (c *pdServiceDiscovery) GetOrCreateGRPCConn(addr string) (*grpc.ClientConn, error) { - return grpcutil.GetOrCreateGRPCConn(c.ctx, &c.clientConns, addr, c.tlsCfg, c.option.gRPCDialOptions...) +// GetOrCreateGRPCConn returns the corresponding grpc client connection of the given URL. +func (c *pdServiceDiscovery) GetOrCreateGRPCConn(url string) (*grpc.ClientConn, error) { + return grpcutil.GetOrCreateGRPCConn(c.ctx, &c.clientConns, url, c.tlsCfg, c.option.gRPCDialOptions...) +} + +func addrsToURLs(addrs []string, tlsCfg *tls.Config) []string { + // Add default schema "http://" to addrs. + urls := make([]string, 0, len(addrs)) + for _, addr := range addrs { + urls = append(urls, modifyURLScheme(addr, tlsCfg)) + } + return urls +} + +func modifyURLScheme(url string, tlsCfg *tls.Config) string { + if tlsCfg == nil { + if strings.HasPrefix(url, httpsSchemePrefix) { + url = httpSchemePrefix + strings.TrimPrefix(url, httpsSchemePrefix) + } else if !strings.HasPrefix(url, httpSchemePrefix) { + url = httpSchemePrefix + url + } + } else { + if strings.HasPrefix(url, httpSchemePrefix) { + url = httpsSchemePrefix + strings.TrimPrefix(url, httpSchemePrefix) + } else if !strings.HasPrefix(url, httpsSchemePrefix) { + url = httpsSchemePrefix + url + } + } + return url +} + +// pickMatchedURL picks the matched URL based on the TLS config. +// Note: please make sure the URLs are valid. +func pickMatchedURL(urls []string, tlsCfg *tls.Config) string { + for _, uStr := range urls { + u, err := url.Parse(uStr) + if err != nil { + continue + } + if tlsCfg != nil && u.Scheme == httpsScheme { + return uStr + } + if tlsCfg == nil && u.Scheme == httpScheme { + return uStr + } + } + ret := modifyURLScheme(urls[0], tlsCfg) + log.Warn("[pd] no matched url found", zap.Strings("urls", urls), + zap.Bool("tls-enabled", tlsCfg != nil), + zap.String("attempted-url", ret)) + return ret } diff --git a/client/pd_service_discovery_test.go b/client/pd_service_discovery_test.go index 1dc73af1f5f..44171873b1a 100644 --- a/client/pd_service_discovery_test.go +++ b/client/pd_service_discovery_test.go @@ -29,9 +29,11 @@ import ( "github.com/pingcap/kvproto/pkg/pdpb" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/tikv/pd/client/errs" "github.com/tikv/pd/client/grpcutil" "github.com/tikv/pd/client/testutil" "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" pb "google.golang.org/grpc/examples/helloworld/helloworld" "google.golang.org/grpc/health" healthpb "google.golang.org/grpc/health/grpc_health_v1" @@ -136,13 +138,19 @@ func (suite *serviceClientTestSuite) SetupSuite() { go suite.leaderServer.run() go suite.followerServer.run() for i := 0; i < 10; i++ { - leaderConn, err1 := grpc.Dial(suite.leaderServer.addr, grpc.WithInsecure()) //nolint - followerConn, err2 := grpc.Dial(suite.followerServer.addr, grpc.WithInsecure()) //nolint + leaderConn, err1 := grpc.Dial(suite.leaderServer.addr, grpc.WithTransportCredentials(insecure.NewCredentials())) + followerConn, err2 := grpc.Dial(suite.followerServer.addr, grpc.WithTransportCredentials(insecure.NewCredentials())) if err1 == nil && err2 == nil { - suite.followerClient = newPDServiceClient(suite.followerServer.addr, suite.leaderServer.addr, nil, followerConn, false) - suite.leaderClient = newPDServiceClient(suite.leaderServer.addr, suite.leaderServer.addr, nil, leaderConn, true) + suite.followerClient = newPDServiceClient( + modifyURLScheme(suite.followerServer.addr, nil), + modifyURLScheme(suite.leaderServer.addr, nil), + followerConn, false) + suite.leaderClient = newPDServiceClient( + modifyURLScheme(suite.leaderServer.addr, nil), + modifyURLScheme(suite.leaderServer.addr, nil), + leaderConn, true) suite.followerServer.server.leaderConn = suite.leaderClient.GetClientConn() - suite.followerServer.server.leaderAddr = suite.leaderClient.GetAddress() + suite.followerServer.server.leaderAddr = suite.leaderClient.GetURL() return } time.Sleep(50 * time.Millisecond) @@ -158,21 +166,21 @@ func (suite *serviceClientTestSuite) TearDownTest() { func (suite *serviceClientTestSuite) TearDownSuite() { suite.leaderServer.grpcServer.GracefulStop() suite.followerServer.grpcServer.GracefulStop() + suite.leaderClient.GetClientConn().Close() + suite.followerClient.GetClientConn().Close() suite.clean() } func (suite *serviceClientTestSuite) TestServiceClient() { re := suite.Require() - leaderAddress := suite.leaderServer.addr - followerAddress := suite.followerServer.addr + leaderAddress := modifyURLScheme(suite.leaderServer.addr, nil) + followerAddress := modifyURLScheme(suite.followerServer.addr, nil) follower := suite.followerClient leader := suite.leaderClient - re.Equal(follower.GetAddress(), followerAddress) - re.Equal(leader.GetAddress(), leaderAddress) - re.Equal(follower.GetHTTPAddress(), "http://"+followerAddress) - re.Equal(leader.GetHTTPAddress(), "http://"+leaderAddress) + re.Equal(follower.GetURL(), followerAddress) + re.Equal(leader.GetURL(), leaderAddress) re.True(follower.Available()) re.True(leader.Available()) @@ -198,7 +206,7 @@ func (suite *serviceClientTestSuite) TestServiceClient() { re.NotNil(leaderConn) _, err := pb.NewGreeterClient(followerConn).SayHello(suite.ctx, &pb.HelloRequest{Name: "pd"}) - re.ErrorContains(err, "not leader") + re.ErrorContains(err, errs.NotLeaderErr) resp, err := pb.NewGreeterClient(leaderConn).SayHello(suite.ctx, &pb.HelloRequest{Name: "pd"}) re.NoError(err) re.Equal("Hello pd", resp.GetMessage()) @@ -298,18 +306,78 @@ func (suite *serviceClientTestSuite) TestServiceClientBalancer() { re.Equal(int32(5), suite.followerServer.server.getForwardCount()) } -func TestHTTPScheme(t *testing.T) { +func TestServiceClientScheme(t *testing.T) { re := require.New(t) - cli := newPDServiceClient("127.0.0.1:2379", "127.0.0.1:2379", nil, nil, false) - re.Equal("http://127.0.0.1:2379", cli.GetHTTPAddress()) - cli = newPDServiceClient("https://127.0.0.1:2379", "127.0.0.1:2379", nil, nil, false) - re.Equal("http://127.0.0.1:2379", cli.GetHTTPAddress()) - cli = newPDServiceClient("http://127.0.0.1:2379", "127.0.0.1:2379", nil, nil, false) - re.Equal("http://127.0.0.1:2379", cli.GetHTTPAddress()) - cli = newPDServiceClient("127.0.0.1:2379", "127.0.0.1:2379", &tls.Config{}, nil, false) - re.Equal("https://127.0.0.1:2379", cli.GetHTTPAddress()) - cli = newPDServiceClient("https://127.0.0.1:2379", "127.0.0.1:2379", &tls.Config{}, nil, false) - re.Equal("https://127.0.0.1:2379", cli.GetHTTPAddress()) - cli = newPDServiceClient("http://127.0.0.1:2379", "127.0.0.1:2379", &tls.Config{}, nil, false) - re.Equal("https://127.0.0.1:2379", cli.GetHTTPAddress()) + cli := newPDServiceClient(modifyURLScheme("127.0.0.1:2379", nil), modifyURLScheme("127.0.0.1:2379", nil), nil, false) + re.Equal("http://127.0.0.1:2379", cli.GetURL()) + cli = newPDServiceClient(modifyURLScheme("https://127.0.0.1:2379", nil), modifyURLScheme("127.0.0.1:2379", nil), nil, false) + re.Equal("http://127.0.0.1:2379", cli.GetURL()) + cli = newPDServiceClient(modifyURLScheme("http://127.0.0.1:2379", nil), modifyURLScheme("127.0.0.1:2379", nil), nil, false) + re.Equal("http://127.0.0.1:2379", cli.GetURL()) + cli = newPDServiceClient(modifyURLScheme("127.0.0.1:2379", &tls.Config{}), modifyURLScheme("127.0.0.1:2379", &tls.Config{}), nil, false) + re.Equal("https://127.0.0.1:2379", cli.GetURL()) + cli = newPDServiceClient(modifyURLScheme("https://127.0.0.1:2379", &tls.Config{}), modifyURLScheme("127.0.0.1:2379", &tls.Config{}), nil, false) + re.Equal("https://127.0.0.1:2379", cli.GetURL()) + cli = newPDServiceClient(modifyURLScheme("http://127.0.0.1:2379", &tls.Config{}), modifyURLScheme("127.0.0.1:2379", &tls.Config{}), nil, false) + re.Equal("https://127.0.0.1:2379", cli.GetURL()) +} + +func TestSchemeFunction(t *testing.T) { + re := require.New(t) + tlsCfg := &tls.Config{} + + endpoints1 := []string{ + "http://tc-pd:2379", + "tc-pd:2379", + "https://tc-pd:2379", + } + endpoints2 := []string{ + "127.0.0.1:2379", + "http://127.0.0.1:2379", + "https://127.0.0.1:2379", + } + urls := addrsToURLs(endpoints1, tlsCfg) + for _, u := range urls { + re.Equal("https://tc-pd:2379", u) + } + urls = addrsToURLs(endpoints2, tlsCfg) + for _, u := range urls { + re.Equal("https://127.0.0.1:2379", u) + } + urls = addrsToURLs(endpoints1, nil) + for _, u := range urls { + re.Equal("http://tc-pd:2379", u) + } + urls = addrsToURLs(endpoints2, nil) + for _, u := range urls { + re.Equal("http://127.0.0.1:2379", u) + } + + re.Equal("https://127.0.0.1:2379", modifyURLScheme("https://127.0.0.1:2379", tlsCfg)) + re.Equal("https://127.0.0.1:2379", modifyURLScheme("http://127.0.0.1:2379", tlsCfg)) + re.Equal("https://127.0.0.1:2379", modifyURLScheme("127.0.0.1:2379", tlsCfg)) + re.Equal("https://tc-pd:2379", modifyURLScheme("tc-pd:2379", tlsCfg)) + re.Equal("http://127.0.0.1:2379", modifyURLScheme("https://127.0.0.1:2379", nil)) + re.Equal("http://127.0.0.1:2379", modifyURLScheme("http://127.0.0.1:2379", nil)) + re.Equal("http://127.0.0.1:2379", modifyURLScheme("127.0.0.1:2379", nil)) + re.Equal("http://tc-pd:2379", modifyURLScheme("tc-pd:2379", nil)) + + urls = []string{ + "http://127.0.0.1:2379", + "https://127.0.0.1:2379", + } + re.Equal("https://127.0.0.1:2379", pickMatchedURL(urls, tlsCfg)) + urls = []string{ + "http://127.0.0.1:2379", + } + re.Equal("https://127.0.0.1:2379", pickMatchedURL(urls, tlsCfg)) + urls = []string{ + "http://127.0.0.1:2379", + "https://127.0.0.1:2379", + } + re.Equal("http://127.0.0.1:2379", pickMatchedURL(urls, nil)) + urls = []string{ + "https://127.0.0.1:2379", + } + re.Equal("http://127.0.0.1:2379", pickMatchedURL(urls, nil)) } diff --git a/client/resource_group/controller/OWNERS b/client/resource_group/controller/OWNERS new file mode 100644 index 00000000000..aa02465dbd9 --- /dev/null +++ b/client/resource_group/controller/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs at https://go.k8s.io/owners +options: + no_parent_owners: true +filters: + "(OWNERS|config\\.go)$": + approvers: + - sig-critical-approvers-config diff --git a/client/resource_group/controller/config.go b/client/resource_group/controller/config.go index ffc360c385c..a4176c073cc 100644 --- a/client/resource_group/controller/config.go +++ b/client/resource_group/controller/config.go @@ -52,6 +52,10 @@ const ( defaultTargetPeriod = 5 * time.Second // defaultMaxWaitDuration is the max duration to wait for the token before throwing error. defaultMaxWaitDuration = 30 * time.Second + // defaultWaitRetryTimes is the times to retry when waiting for the token. + defaultWaitRetryTimes = 10 + // defaultWaitRetryInterval is the interval to retry when waiting for the token. + defaultWaitRetryInterval = 50 * time.Millisecond ) const ( @@ -85,6 +89,12 @@ type Config struct { // LTBMaxWaitDuration is the max wait time duration for local token bucket. LTBMaxWaitDuration Duration `toml:"ltb-max-wait-duration" json:"ltb-max-wait-duration"` + // WaitRetryInterval is the interval to retry when waiting for the token. + WaitRetryInterval Duration `toml:"wait-retry-interval" json:"wait-retry-interval"` + + // WaitRetryTimes is the times to retry when waiting for the token. + WaitRetryTimes int `toml:"wait-retry-times" json:"wait-retry-times"` + // RequestUnit is the configuration determines the coefficients of the RRU and WRU cost. // This configuration should be modified carefully. RequestUnit RequestUnitConfig `toml:"request-unit" json:"request-unit"` @@ -98,6 +108,8 @@ func DefaultConfig() *Config { return &Config{ DegradedModeWaitDuration: NewDuration(defaultDegradedModeWaitDuration), LTBMaxWaitDuration: NewDuration(defaultMaxWaitDuration), + WaitRetryInterval: NewDuration(defaultWaitRetryInterval), + WaitRetryTimes: defaultWaitRetryTimes, RequestUnit: DefaultRequestUnitConfig(), EnableControllerTraceLog: false, } @@ -155,6 +167,8 @@ type RUConfig struct { // some config for client LTBMaxWaitDuration time.Duration + WaitRetryInterval time.Duration + WaitRetryTimes int DegradedModeWaitDuration time.Duration } @@ -176,6 +190,8 @@ func GenerateRUConfig(config *Config) *RUConfig { WriteBytesCost: RequestUnit(config.RequestUnit.WriteCostPerByte), CPUMsCost: RequestUnit(config.RequestUnit.CPUMsCost), LTBMaxWaitDuration: config.LTBMaxWaitDuration.Duration, + WaitRetryInterval: config.WaitRetryInterval.Duration, + WaitRetryTimes: config.WaitRetryTimes, DegradedModeWaitDuration: config.DegradedModeWaitDuration.Duration, } } diff --git a/client/resource_group/controller/controller.go b/client/resource_group/controller/controller.go old mode 100755 new mode 100644 index 326f564b1df..1910e37eff8 --- a/client/resource_group/controller/controller.go +++ b/client/resource_group/controller/controller.go @@ -39,8 +39,6 @@ import ( const ( controllerConfigPath = "resource_group/controller" - maxRetry = 10 - retryInterval = 50 * time.Millisecond maxNotificationChanLen = 200 needTokensAmplification = 1.1 trickleReserveDuration = 1250 * time.Millisecond @@ -105,6 +103,27 @@ func WithMaxWaitDuration(d time.Duration) ResourceControlCreateOption { } } +// WithWaitRetryInterval is the option to set the retry interval when waiting for the token. +func WithWaitRetryInterval(d time.Duration) ResourceControlCreateOption { + return func(controller *ResourceGroupsController) { + controller.ruConfig.WaitRetryInterval = d + } +} + +// WithWaitRetryTimes is the option to set the times to retry when waiting for the token. +func WithWaitRetryTimes(times int) ResourceControlCreateOption { + return func(controller *ResourceGroupsController) { + controller.ruConfig.WaitRetryTimes = times + } +} + +// WithDegradedModeWaitDuration is the option to set the wait duration for degraded mode. +func WithDegradedModeWaitDuration(d time.Duration) ResourceControlCreateOption { + return func(controller *ResourceGroupsController) { + controller.ruConfig.DegradedModeWaitDuration = d + } +} + var _ ResourceGroupKVInterceptor = (*ResourceGroupsController)(nil) // ResourceGroupsController implements ResourceGroupKVInterceptor. @@ -186,7 +205,7 @@ func loadServerConfig(ctx context.Context, provider ResourceGroupProvider) (*Con log.Warn("[resource group controller] server does not save config, load config failed") return DefaultConfig(), nil } - config := &Config{} + config := DefaultConfig() err = json.Unmarshal(kvs[0].GetValue(), config) if err != nil { return nil, err @@ -367,7 +386,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) { } for _, item := range resp { cfgRevision = item.Kv.ModRevision - config := &Config{} + config := DefaultConfig() if err := json.Unmarshal(item.Kv.Value, config); err != nil { continue } @@ -386,8 +405,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) { } case gc := <-c.tokenBucketUpdateChan: - now := gc.run.now - go gc.handleTokenBucketUpdateEvent(c.loopCtx, now) + go gc.handleTokenBucketUpdateEvent(c.loopCtx) } } }() @@ -461,7 +479,7 @@ func (c *ResourceGroupsController) cleanUpResourceGroup() { } func (c *ResourceGroupsController) executeOnAllGroups(f func(controller *groupCostController)) { - c.groupsController.Range(func(name, value any) bool { + c.groupsController.Range(func(_, value any) bool { f(value.(*groupCostController)) return true }) @@ -492,12 +510,12 @@ func (c *ResourceGroupsController) handleTokenBucketResponse(resp []*rmpb.TokenB func (c *ResourceGroupsController) collectTokenBucketRequests(ctx context.Context, source string, typ selectType) { c.run.currentRequests = make([]*rmpb.TokenBucketRequest, 0) - c.groupsController.Range(func(name, value any) bool { + c.groupsController.Range(func(_, value any) bool { gc := value.(*groupCostController) request := gc.collectRequestAndConsumption(typ) if request != nil { c.run.currentRequests = append(c.run.currentRequests, request) - gc.tokenRequestCounter.Inc() + gc.metrics.tokenRequestCounter.Inc() } return true }) @@ -564,7 +582,6 @@ func (c *ResourceGroupsController) IsBackgroundRequest(ctx context.Context, resourceGroupName, requestResource string) bool { gc, err := c.tryGetResourceGroup(ctx, resourceGroupName) if err != nil { - failedRequestCounter.WithLabelValues(resourceGroupName).Inc() return false } @@ -577,7 +594,6 @@ func (c *ResourceGroupsController) checkBackgroundSettings(ctx context.Context, resourceGroupName := "default" gc, err := c.tryGetResourceGroup(ctx, resourceGroupName) if err != nil { - failedRequestCounter.WithLabelValues(resourceGroupName).Inc() return false } bg = gc.getMeta().BackgroundSettings @@ -616,13 +632,9 @@ type groupCostController struct { calculators []ResourceCalculator handleRespFunc func(*rmpb.TokenBucketResponse) - successfulRequestDuration prometheus.Observer - failedLimitReserveDuration prometheus.Observer - requestRetryCounter prometheus.Counter - failedRequestCounter prometheus.Counter - tokenRequestCounter prometheus.Counter - - mu struct { + // metrics + metrics *groupMetricsCollection + mu struct { sync.Mutex consumption *rmpb.Consumption storeCounter map[uint64]*rmpb.Consumption @@ -669,6 +681,30 @@ type groupCostController struct { tombstone bool } +type groupMetricsCollection struct { + successfulRequestDuration prometheus.Observer + failedLimitReserveDuration prometheus.Observer + requestRetryCounter prometheus.Counter + failedRequestCounterWithOthers prometheus.Counter + failedRequestCounterWithThrottled prometheus.Counter + tokenRequestCounter prometheus.Counter +} + +func initMetrics(oldName, name string) *groupMetricsCollection { + const ( + otherType = "others" + throttledType = "throttled" + ) + return &groupMetricsCollection{ + successfulRequestDuration: successfulRequestDuration.WithLabelValues(oldName, name), + failedLimitReserveDuration: failedLimitReserveDuration.WithLabelValues(oldName, name), + failedRequestCounterWithOthers: failedRequestCounter.WithLabelValues(oldName, name, otherType), + failedRequestCounterWithThrottled: failedRequestCounter.WithLabelValues(oldName, name, throttledType), + requestRetryCounter: requestRetryCounter.WithLabelValues(oldName, name), + tokenRequestCounter: resourceGroupTokenRequestCounter.WithLabelValues(oldName, name), + } +} + type tokenCounter struct { getTokenBucketFunc func() *rmpb.TokenBucket @@ -709,16 +745,13 @@ func newGroupCostController( default: return nil, errs.ErrClientResourceGroupConfigUnavailable.FastGenByArgs("not supports the resource type") } + ms := initMetrics(group.Name, group.Name) gc := &groupCostController{ - meta: group, - name: group.Name, - mainCfg: mainCfg, - mode: group.GetMode(), - successfulRequestDuration: successfulRequestDuration.WithLabelValues(group.Name, group.Name), - failedLimitReserveDuration: failedLimitReserveDuration.WithLabelValues(group.Name, group.Name), - failedRequestCounter: failedRequestCounter.WithLabelValues(group.Name, group.Name), - requestRetryCounter: requestRetryCounter.WithLabelValues(group.Name, group.Name), - tokenRequestCounter: resourceGroupTokenRequestCounter.WithLabelValues(group.Name, group.Name), + meta: group, + name: group.Name, + mainCfg: mainCfg, + mode: group.GetMode(), + metrics: ms, calculators: []ResourceCalculator{ newKVCalculator(mainCfg), newSQLCalculator(mainCfg), @@ -773,7 +806,7 @@ func (gc *groupCostController) initRunState() { case rmpb.GroupMode_RUMode: gc.run.requestUnitTokens = make(map[rmpb.RequestUnitType]*tokenCounter) for typ := range requestUnitLimitTypeList { - limiter := NewLimiterWithCfg(now, cfgFunc(getRUTokenBucketSetting(gc.meta, typ)), gc.lowRUNotifyChan) + limiter := NewLimiterWithCfg(gc.name, now, cfgFunc(getRUTokenBucketSetting(gc.meta, typ)), gc.lowRUNotifyChan) counter := &tokenCounter{ limiter: limiter, avgRUPerSec: 0, @@ -787,7 +820,7 @@ func (gc *groupCostController) initRunState() { case rmpb.GroupMode_RawMode: gc.run.resourceTokens = make(map[rmpb.RawResourceType]*tokenCounter) for typ := range requestResourceLimitTypeList { - limiter := NewLimiterWithCfg(now, cfgFunc(getRawResourceTokenBucketSetting(gc.meta, typ)), gc.lowRUNotifyChan) + limiter := NewLimiterWithCfg(gc.name, now, cfgFunc(getRawResourceTokenBucketSetting(gc.meta, typ)), gc.lowRUNotifyChan) counter := &tokenCounter{ limiter: limiter, avgRUPerSec: 0, @@ -846,7 +879,7 @@ func (gc *groupCostController) resetEmergencyTokenAcquisition() { } } -func (gc *groupCostController) handleTokenBucketUpdateEvent(ctx context.Context, now time.Time) { +func (gc *groupCostController) handleTokenBucketUpdateEvent(ctx context.Context) { switch gc.mode { case rmpb.GroupMode_RawMode: for _, counter := range gc.run.resourceTokens { @@ -863,7 +896,7 @@ func (gc *groupCostController) handleTokenBucketUpdateEvent(ctx context.Context, counter.notify.setupNotificationCh = nil threshold := counter.notify.setupNotificationThreshold counter.notify.mu.Unlock() - counter.limiter.SetupNotificationThreshold(now, threshold) + counter.limiter.SetupNotificationThreshold(threshold) case <-ctx.Done(): return } @@ -884,7 +917,7 @@ func (gc *groupCostController) handleTokenBucketUpdateEvent(ctx context.Context, counter.notify.setupNotificationCh = nil threshold := counter.notify.setupNotificationThreshold counter.notify.mu.Unlock() - counter.limiter.SetupNotificationThreshold(now, threshold) + counter.limiter.SetupNotificationThreshold(threshold) case <-ctx.Done(): return } @@ -1208,7 +1241,7 @@ func (gc *groupCostController) onRequestWait( var i int var d time.Duration retryLoop: - for i = 0; i < maxRetry; i++ { + for i = 0; i < gc.mainCfg.WaitRetryTimes; i++ { switch gc.mode { case rmpb.GroupMode_RawMode: res := make([]*Reservation, 0, len(requestResourceLimitTypeList)) @@ -1217,7 +1250,7 @@ func (gc *groupCostController) onRequestWait( res = append(res, counter.limiter.Reserve(ctx, gc.mainCfg.LTBMaxWaitDuration, now, v)) } } - if d, err = WaitReservations(ctx, now, res); err == nil { + if d, err = WaitReservations(ctx, now, res); err == nil || errs.ErrClientResourceGroupThrottled.NotEqual(err) { break retryLoop } case rmpb.GroupMode_RUMode: @@ -1227,18 +1260,20 @@ func (gc *groupCostController) onRequestWait( res = append(res, counter.limiter.Reserve(ctx, gc.mainCfg.LTBMaxWaitDuration, now, v)) } } - if d, err = WaitReservations(ctx, now, res); err == nil { + if d, err = WaitReservations(ctx, now, res); err == nil || errs.ErrClientResourceGroupThrottled.NotEqual(err) { break retryLoop } } - gc.requestRetryCounter.Inc() - time.Sleep(retryInterval) - waitDuration += retryInterval + gc.metrics.requestRetryCounter.Inc() + time.Sleep(gc.mainCfg.WaitRetryInterval) + waitDuration += gc.mainCfg.WaitRetryInterval } if err != nil { - gc.failedRequestCounter.Inc() - if d.Seconds() > 0 { - gc.failedLimitReserveDuration.Observe(d.Seconds()) + if errs.ErrClientResourceGroupThrottled.Equal(err) { + gc.metrics.failedRequestCounterWithThrottled.Inc() + gc.metrics.failedLimitReserveDuration.Observe(d.Seconds()) + } else { + gc.metrics.failedRequestCounterWithOthers.Inc() } gc.mu.Lock() sub(gc.mu.consumption, delta) @@ -1248,7 +1283,7 @@ func (gc *groupCostController) onRequestWait( }) return nil, nil, waitDuration, 0, err } - gc.successfulRequestDuration.Observe(d.Seconds()) + gc.metrics.successfulRequestDuration.Observe(d.Seconds()) waitDuration += d } diff --git a/client/resource_group/controller/controller_test.go b/client/resource_group/controller/controller_test.go index fea4a133ad0..4f4ec592793 100644 --- a/client/resource_group/controller/controller_test.go +++ b/client/resource_group/controller/controller_test.go @@ -26,6 +26,7 @@ import ( rmpb "github.com/pingcap/kvproto/pkg/resource_manager" "github.com/stretchr/testify/require" + "github.com/tikv/pd/client/errs" ) func createTestGroupCostController(re *require.Assertions) *groupCostController { @@ -117,3 +118,17 @@ func TestRequestAndResponseConsumption(t *testing.T) { re.Equal(expectedConsumption.TotalCpuTimeMs, consumption.TotalCpuTimeMs, caseNum) } } + +func TestResourceGroupThrottledError(t *testing.T) { + re := require.New(t) + gc := createTestGroupCostController(re) + gc.initRunState() + req := &TestRequestInfo{ + isWrite: true, + writeBytes: 10000000, + } + // The group is throttled + _, _, _, _, err := gc.onRequestWait(context.TODO(), req) + re.Error(err) + re.True(errs.ErrClientResourceGroupThrottled.Equal(err)) +} diff --git a/client/resource_group/controller/limiter.go b/client/resource_group/controller/limiter.go index 63c94a9782b..2e42f591b8b 100644 --- a/client/resource_group/controller/limiter.go +++ b/client/resource_group/controller/limiter.go @@ -26,6 +26,7 @@ import ( "time" "github.com/pingcap/log" + "github.com/prometheus/client_golang/prometheus" "github.com/tikv/pd/client/errs" "go.uber.org/zap" ) @@ -81,6 +82,15 @@ type Limiter struct { isLowProcess bool // remainingNotifyTimes is used to limit notify when the speed limit is already set. remainingNotifyTimes int + name string + + // metrics + metrics *limiterMetricsCollection +} + +// limiterMetricsCollection is a collection of metrics for a limiter. +type limiterMetricsCollection struct { + lowTokenNotifyCounter prometheus.Counter } // Limit returns the maximum overall event rate. @@ -106,8 +116,9 @@ func NewLimiter(now time.Time, r Limit, b int64, tokens float64, lowTokensNotify // NewLimiterWithCfg returns a new Limiter that allows events up to rate r and permits // bursts of at most b tokens. -func NewLimiterWithCfg(now time.Time, cfg tokenBucketReconfigureArgs, lowTokensNotifyChan chan<- struct{}) *Limiter { +func NewLimiterWithCfg(name string, now time.Time, cfg tokenBucketReconfigureArgs, lowTokensNotifyChan chan<- struct{}) *Limiter { lim := &Limiter{ + name: name, limit: Limit(cfg.NewRate), last: now, tokens: cfg.NewTokens, @@ -115,6 +126,9 @@ func NewLimiterWithCfg(now time.Time, cfg tokenBucketReconfigureArgs, lowTokensN notifyThreshold: cfg.NotifyThreshold, lowTokensNotifyChan: lowTokensNotifyChan, } + lim.metrics = &limiterMetricsCollection{ + lowTokenNotifyCounter: lowTokenRequestNotifyCounter.WithLabelValues(lim.name), + } log.Debug("new limiter", zap.String("limiter", fmt.Sprintf("%+v", lim))) return lim } @@ -122,13 +136,14 @@ func NewLimiterWithCfg(now time.Time, cfg tokenBucketReconfigureArgs, lowTokensN // A Reservation holds information about events that are permitted by a Limiter to happen after a delay. // A Reservation may be canceled, which may enable the Limiter to permit additional events. type Reservation struct { - ok bool - lim *Limiter - tokens float64 - timeToAct time.Time - needWaitDurtion time.Duration + ok bool + lim *Limiter + tokens float64 + timeToAct time.Time + needWaitDuration time.Duration // This is the Limit at reservation time, it can change later. - limit Limit + limit Limit + remainingTokens float64 } // OK returns whether the limiter can provide the requested number of tokens @@ -217,12 +232,20 @@ func (lim *Limiter) Reserve(ctx context.Context, waitDuration time.Duration, now } // SetupNotificationThreshold enables the notification at the given threshold. -func (lim *Limiter) SetupNotificationThreshold(now time.Time, threshold float64) { +func (lim *Limiter) SetupNotificationThreshold(threshold float64) { lim.mu.Lock() defer lim.mu.Unlock() lim.notifyThreshold = threshold } +// SetName sets the name of the limiter. +func (lim *Limiter) SetName(name string) *Limiter { + lim.mu.Lock() + defer lim.mu.Unlock() + lim.name = name + return lim +} + // notify tries to send a non-blocking notification on notifyCh and disables // further notifications (until the next Reconfigure or StartNotification). func (lim *Limiter) notify() { @@ -233,6 +256,9 @@ func (lim *Limiter) notify() { lim.isLowProcess = true select { case lim.lowTokensNotifyChan <- struct{}{}: + if lim.metrics != nil { + lim.metrics.lowTokenNotifyCounter.Inc() + } default: } } @@ -329,6 +355,8 @@ func (lim *Limiter) AvailableTokens(now time.Time) float64 { return tokens } +const reserveWarnLogInterval = 10 * time.Millisecond + // reserveN is a helper method for Reserve. // maxFutureReserve specifies the maximum reservation wait duration allowed. // reserveN returns Reservation, not *Reservation. @@ -359,10 +387,11 @@ func (lim *Limiter) reserveN(now time.Time, n float64, maxFutureReserve time.Dur // Prepare reservation r := Reservation{ - ok: ok, - lim: lim, - limit: lim.limit, - needWaitDurtion: waitDuration, + ok: ok, + lim: lim, + limit: lim.limit, + needWaitDuration: waitDuration, + remainingTokens: tokens, } if ok { r.tokens = n @@ -374,14 +403,19 @@ func (lim *Limiter) reserveN(now time.Time, n float64, maxFutureReserve time.Dur lim.tokens = tokens lim.maybeNotify() } else { - log.Warn("[resource group controller] cannot reserve enough tokens", - zap.Duration("need-wait-duration", waitDuration), - zap.Duration("max-wait-duration", maxFutureReserve), - zap.Float64("current-ltb-tokens", lim.tokens), - zap.Float64("current-ltb-rate", float64(lim.limit)), - zap.Float64("request-tokens", n), - zap.Int64("burst", lim.burst), - zap.Int("remaining-notify-times", lim.remainingNotifyTimes)) + // print log if the limiter cannot reserve for a while. + if time.Since(lim.last) > reserveWarnLogInterval { + log.Warn("[resource group controller] cannot reserve enough tokens", + zap.Duration("need-wait-duration", waitDuration), + zap.Duration("max-wait-duration", maxFutureReserve), + zap.Float64("current-ltb-tokens", lim.tokens), + zap.Float64("current-ltb-rate", float64(lim.limit)), + zap.Float64("request-tokens", n), + zap.Float64("notify-threshold", lim.notifyThreshold), + zap.Bool("is-low-process", lim.isLowProcess), + zap.Int64("burst", lim.burst), + zap.Int("remaining-notify-times", lim.remainingNotifyTimes)) + } lim.last = last if lim.limit == 0 { lim.notify() @@ -461,7 +495,7 @@ func WaitReservations(ctx context.Context, now time.Time, reservations []*Reserv for _, res := range reservations { if !res.ok { cancel() - return res.needWaitDurtion, errs.ErrClientResourceGroupThrottled + return res.needWaitDuration, errs.ErrClientResourceGroupThrottled.FastGenByArgs(res.needWaitDuration, res.limit, res.remainingTokens) } delay := res.DelayFrom(now) if delay > longestDelayDuration { diff --git a/client/resource_group/controller/limiter_test.go b/client/resource_group/controller/limiter_test.go index 786e5c51cdf..c9bed856f1c 100644 --- a/client/resource_group/controller/limiter_test.go +++ b/client/resource_group/controller/limiter_test.go @@ -163,6 +163,7 @@ func TestCancel(t *testing.T) { d, err := WaitReservations(ctx, t2, []*Reservation{r1, r2}) re.Equal(4*time.Second, d) re.Error(err) + re.Contains(err.Error(), "estimated wait time 4s, ltb state is 1.00:-4.00") checkTokens(re, lim1, t3, 13) checkTokens(re, lim2, t3, 3) cancel1() diff --git a/client/resource_group/controller/metrics.go b/client/resource_group/controller/metrics.go index 4261705a6f6..30a0b850c7d 100644 --- a/client/resource_group/controller/metrics.go +++ b/client/resource_group/controller/metrics.go @@ -24,6 +24,8 @@ const ( // TODO: remove old label in 8.x resourceGroupNameLabel = "name" newResourceGroupNameLabel = "resource_group" + + errType = "type" ) var ( @@ -40,7 +42,7 @@ var ( Namespace: namespace, Subsystem: requestSubsystem, Name: "success", - Buckets: []float64{.005, .01, .05, .1, .5, 1, 5, 10, 20, 25, 30}, // 0.005 ~ 30 + Buckets: []float64{0.0005, .005, .01, .05, .1, .5, 1, 5, 10, 20, 25, 30, 60, 600, 1800, 3600}, // 0.0005 ~ 1h Help: "Bucketed histogram of wait duration of successful request.", }, []string{resourceGroupNameLabel, newResourceGroupNameLabel}) @@ -49,7 +51,7 @@ var ( Namespace: namespace, Subsystem: requestSubsystem, Name: "limit_reserve_time_failed", - Buckets: []float64{.005, .01, .05, .1, .5, 1, 5, 10, 20, 25, 30}, // 0.005 ~ 30 + Buckets: []float64{0.0005, .01, .05, .1, .5, 1, 5, 10, 20, 25, 30, 60, 600, 1800, 3600, 86400}, // 0.0005 ~ 24h Help: "Bucketed histogram of wait duration of failed request.", }, []string{resourceGroupNameLabel, newResourceGroupNameLabel}) @@ -59,7 +61,7 @@ var ( Subsystem: requestSubsystem, Name: "fail", Help: "Counter of failed request.", - }, []string{resourceGroupNameLabel, newResourceGroupNameLabel}) + }, []string{resourceGroupNameLabel, newResourceGroupNameLabel, errType}) requestRetryCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -73,6 +75,7 @@ var ( prometheus.HistogramOpts{ Namespace: namespace, Subsystem: tokenRequestSubsystem, + Buckets: prometheus.ExponentialBuckets(0.001, 2, 13), // 1ms ~ 8s Name: "duration", Help: "Bucketed histogram of latency(s) of token request.", }, []string{"type"}) @@ -84,6 +87,14 @@ var ( Name: "resource_group", Help: "Counter of token request by every resource group.", }, []string{resourceGroupNameLabel, newResourceGroupNameLabel}) + + lowTokenRequestNotifyCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: tokenRequestSubsystem, + Name: "low_token_notified", + Help: "Counter of low token request.", + }, []string{newResourceGroupNameLabel}) ) var ( @@ -100,4 +111,5 @@ func init() { prometheus.MustRegister(requestRetryCounter) prometheus.MustRegister(tokenRequestDuration) prometheus.MustRegister(resourceGroupTokenRequestCounter) + prometheus.MustRegister(lowTokenRequestNotifyCounter) } diff --git a/client/resource_group/controller/model.go b/client/resource_group/controller/model.go index dedc2ed7359..9e86de69abb 100644 --- a/client/resource_group/controller/model.go +++ b/client/resource_group/controller/model.go @@ -75,8 +75,7 @@ func newKVCalculator(cfg *RUConfig) *KVCalculator { } // Trickle ... -func (kc *KVCalculator) Trickle(*rmpb.Consumption) { -} +func (*KVCalculator) Trickle(*rmpb.Consumption) {} // BeforeKVRequest ... func (kc *KVCalculator) BeforeKVRequest(consumption *rmpb.Consumption, req RequestInfo) { @@ -166,11 +165,11 @@ func (dsc *SQLCalculator) Trickle(consumption *rmpb.Consumption) { } // BeforeKVRequest ... -func (dsc *SQLCalculator) BeforeKVRequest(consumption *rmpb.Consumption, req RequestInfo) { +func (*SQLCalculator) BeforeKVRequest(*rmpb.Consumption, RequestInfo) { } // AfterKVRequest ... -func (dsc *SQLCalculator) AfterKVRequest(consumption *rmpb.Consumption, req RequestInfo, res ResponseInfo) { +func (*SQLCalculator) AfterKVRequest(*rmpb.Consumption, RequestInfo, ResponseInfo) { } func getRUValueFromConsumption(custom *rmpb.Consumption, typ rmpb.RequestUnitType) float64 { diff --git a/client/resource_group/controller/testutil.go b/client/resource_group/controller/testutil.go index 4df8c9bba0d..01a9c3af1fc 100644 --- a/client/resource_group/controller/testutil.go +++ b/client/resource_group/controller/testutil.go @@ -52,7 +52,7 @@ func (tri *TestRequestInfo) StoreID() uint64 { } // ReplicaNumber implements the RequestInfo interface. -func (tri *TestRequestInfo) ReplicaNumber() int64 { +func (*TestRequestInfo) ReplicaNumber() int64 { return 1 } diff --git a/client/resource_group/controller/util_test.go b/client/resource_group/controller/util_test.go index a89ea08b955..10fa7c345a5 100644 --- a/client/resource_group/controller/util_test.go +++ b/client/resource_group/controller/util_test.go @@ -27,7 +27,6 @@ type example struct { } func TestDurationJSON(t *testing.T) { - t.Parallel() re := require.New(t) example := &example{} @@ -41,7 +40,6 @@ func TestDurationJSON(t *testing.T) { } func TestDurationTOML(t *testing.T) { - t.Parallel() re := require.New(t) example := &example{} diff --git a/client/resource_manager_client.go b/client/resource_manager_client.go index 433d17ceeee..19adbd199b0 100644 --- a/client/resource_manager_client.go +++ b/client/resource_manager_client.go @@ -16,7 +16,6 @@ package pd import ( "context" - "strings" "time" "github.com/gogo/protobuf/proto" @@ -35,10 +34,6 @@ const ( modify actionType = 1 groupSettingsPathPrefix = "resource_group/settings" controllerConfigPathPrefix = "resource_group/controller" - // errNotPrimary is returned when the requested server is not primary. - errNotPrimary = "not primary" - // errNotLeader is returned when the requested server is not pd leader. - errNotLeader = "not leader" ) // GroupSettingsPathPrefixBytes is used to watch or get resource groups. @@ -74,7 +69,7 @@ func WithRUStats(op *GetResourceGroupOp) { // resourceManagerClient gets the ResourceManager client of current PD leader. func (c *client) resourceManagerClient() (rmpb.ResourceManagerClient, error) { - cc, err := c.pdSvcDiscovery.GetOrCreateGRPCConn(c.GetLeaderAddr()) + cc, err := c.pdSvcDiscovery.GetOrCreateGRPCConn(c.GetLeaderURL()) if err != nil { return nil, err } @@ -83,7 +78,7 @@ func (c *client) resourceManagerClient() (rmpb.ResourceManagerClient, error) { // gRPCErrorHandler is used to handle the gRPC error returned by the resource manager service. func (c *client) gRPCErrorHandler(err error) { - if strings.Contains(err.Error(), errNotPrimary) || strings.Contains(err.Error(), errNotLeader) { + if errs.IsLeaderChange(err) { c.pdSvcDiscovery.ScheduleCheckMemberChanged() } } @@ -324,7 +319,9 @@ func (c *client) handleResourceTokenDispatcher(dispatcherCtx context.Context, tb // If the stream is nil or the leader has changed, try to reconnect. if toReconnect { connection.reset() - c.tryResourceManagerConnect(dispatcherCtx, &connection) + if err := c.tryResourceManagerConnect(dispatcherCtx, &connection); err != nil { + log.Error("[resource_manager] try to connect token leader failed", errs.ZapError(err)) + } log.Info("[resource_manager] token leader may change, try to reconnect the stream") stream, streamCtx = connection.stream, connection.ctx } diff --git a/client/retry/backoff.go b/client/retry/backoff.go index e79d0e3e4eb..9161ad0fea1 100644 --- a/client/retry/backoff.go +++ b/client/retry/backoff.go @@ -16,12 +16,28 @@ package retry import ( "context" + "reflect" + "runtime" + "strings" "time" "github.com/pingcap/errors" "github.com/pingcap/failpoint" + "github.com/pingcap/log" + "go.uber.org/zap" ) +// Option is used to customize the backoffer. +type Option func(*Backoffer) + +// withMinLogInterval sets the minimum log interval for retrying. +// Because the retry interval may be not the factor of log interval, so this is the minimum interval. +func withMinLogInterval(interval time.Duration) Option { + return func(bo *Backoffer) { + bo.logInterval = interval + } +} + // Backoffer is a backoff policy for retrying operations. type Backoffer struct { // base defines the initial time interval to wait before each retry. @@ -31,9 +47,14 @@ type Backoffer struct { // total defines the max total time duration cost in retrying. If it's 0, it means infinite retry until success. total time.Duration // retryableChecker is used to check if the error is retryable. - // By default, all errors are retryable. + // If it's not set, it will always retry unconditionally no matter what the error is. retryableChecker func(err error) bool + // logInterval defines the log interval for retrying. + logInterval time.Duration + // nextLogTime is used to record the next log time. + nextLogTime time.Duration + attempt int next time.Duration currentTotal time.Duration } @@ -48,12 +69,20 @@ func (bo *Backoffer) Exec( err error after *time.Timer ) + fnName := getFunctionName(fn) for { err = fn() - if !bo.isRetryable(err) { + bo.attempt++ + if err == nil || !bo.isRetryable(err) { break } currentInterval := bo.nextInterval() + bo.nextLogTime += currentInterval + if bo.logInterval > 0 && bo.nextLogTime >= bo.logInterval { + bo.nextLogTime %= bo.logInterval + log.Warn("[pd.backoffer] exec fn failed and retrying", + zap.String("fn-name", fnName), zap.Int("retry-time", bo.attempt), zap.Error(err)) + } if after == nil { after = time.NewTimer(currentInterval) } else { @@ -84,7 +113,7 @@ func (bo *Backoffer) Exec( // - `base` defines the initial time interval to wait before each retry. // - `max` defines the max time interval to wait before each retry. // - `total` defines the max total time duration cost in retrying. If it's 0, it means infinite retry until success. -func InitialBackoffer(base, max, total time.Duration) *Backoffer { +func InitialBackoffer(base, max, total time.Duration, opts ...Option) *Backoffer { // Make sure the base is less than or equal to the max. if base > max { base = max @@ -93,20 +122,25 @@ func InitialBackoffer(base, max, total time.Duration) *Backoffer { if total > 0 && total < base { total = base } - return &Backoffer{ - base: base, - max: max, - total: total, - retryableChecker: func(err error) bool { - return err != nil - }, + bo := &Backoffer{ + base: base, + max: max, + total: total, next: base, currentTotal: 0, + attempt: 0, } + for _, opt := range opts { + opt(bo) + } + return bo } -// SetRetryableChecker sets the retryable checker. -func (bo *Backoffer) SetRetryableChecker(checker func(err error) bool) { +// SetRetryableChecker sets the retryable checker, `overwrite` flag is used to indicate whether to overwrite the existing checker. +func (bo *Backoffer) SetRetryableChecker(checker func(err error) bool, overwrite bool) { + if !overwrite && bo.retryableChecker != nil { + return + } bo.retryableChecker = checker } @@ -141,6 +175,8 @@ func (bo *Backoffer) exponentialInterval() time.Duration { func (bo *Backoffer) resetBackoff() { bo.next = bo.base bo.currentTotal = 0 + bo.attempt = 0 + bo.nextLogTime = 0 } // Only used for test. @@ -150,3 +186,8 @@ var testBackOffExecuteFlag = false func TestBackOffExecute() bool { return testBackOffExecuteFlag } + +func getFunctionName(f any) string { + strs := strings.Split(runtime.FuncForPC(reflect.ValueOf(f).Pointer()).Name(), ".") + return strings.Split(strs[len(strs)-1], "-")[0] +} diff --git a/client/retry/backoff_test.go b/client/retry/backoff_test.go index 3dd983f2afa..22d487b1885 100644 --- a/client/retry/backoff_test.go +++ b/client/retry/backoff_test.go @@ -15,12 +15,16 @@ package retry import ( + "bytes" "context" "errors" + "fmt" "testing" "time" + "github.com/pingcap/log" "github.com/stretchr/testify/require" + "go.uber.org/zap" ) func TestBackoffer(t *testing.T) { @@ -84,25 +88,157 @@ func TestBackoffer(t *testing.T) { return expectedErr }) re.InDelta(total, time.Since(start), float64(250*time.Millisecond)) + re.ErrorContains(err, "test") re.ErrorIs(err, expectedErr) re.Equal(4, execCount) re.True(isBackofferReset(bo)) - // Test the retryable checker. + // Test the error returned. execCount = 0 - bo = InitialBackoffer(base, max, total) - bo.SetRetryableChecker(func(err error) bool { - return execCount < 2 + err = bo.Exec(ctx, func() error { + execCount++ + return fmt.Errorf("test %d", execCount) }) + re.Error(err) + re.Equal("test 4", err.Error()) + re.Equal(4, execCount) + re.True(isBackofferReset(bo)) + execCount = 0 err = bo.Exec(ctx, func() error { + if execCount == 1 { + return nil + } execCount++ - return nil + return expectedErr }) + re.Equal(1, execCount) re.NoError(err) + re.True(isBackofferReset(bo)) + + // Test the retryable checker. + execCount = 0 + bo = InitialBackoffer(base, max, total) + retryableChecker := func(error) bool { + return execCount < 2 + } + bo.SetRetryableChecker(retryableChecker, false) + execFunc := func() error { + execCount++ + return expectedErr + } + err = bo.Exec(ctx, execFunc) + re.ErrorIs(err, expectedErr) + re.Equal(2, execCount) + re.True(isBackofferReset(bo)) + // Test the retryable checker with overwrite. + execCount = 0 + retryableChecker = func(error) bool { + return execCount < 4 + } + bo.SetRetryableChecker(retryableChecker, false) + err = bo.Exec(ctx, execFunc) + re.ErrorIs(err, expectedErr) re.Equal(2, execCount) re.True(isBackofferReset(bo)) + execCount = 0 + bo.SetRetryableChecker(retryableChecker, true) + err = bo.Exec(ctx, execFunc) + re.ErrorIs(err, expectedErr) + re.Equal(4, execCount) + re.True(isBackofferReset(bo)) } func isBackofferReset(bo *Backoffer) bool { return bo.next == bo.base && bo.currentTotal == 0 } + +func TestBackofferWithLog(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + conf := &log.Config{Level: "debug", File: log.FileLogConfig{}, DisableTimestamp: true} + lg := newZapTestLogger(conf) + log.ReplaceGlobals(lg.Logger, nil) + + bo := InitialBackoffer(time.Millisecond*10, time.Millisecond*100, time.Millisecond*1000, withMinLogInterval(time.Millisecond*100)) + err := bo.Exec(ctx, testFn) + re.ErrorIs(err, errTest) + + ms := lg.Messages() + len1 := len(ms) + // 10 + 20 + 40 + 80(log) + 100(log) * 9 >= 1000, so log ten times. + re.Len(ms, 10) + // 10 + 20 + 40 + 80 + 100 * 9, 13 times retry. + rfc := `["[pd.backoffer] exec fn failed and retrying"] [fn-name=testFn] [retry-time=13] [error=test]` + re.Contains(ms[len(ms)-1], rfc) + // 10 + 20 + 40 + 80(log), 4 times retry. + rfc = `["[pd.backoffer] exec fn failed and retrying"] [fn-name=testFn] [retry-time=4] [error=test]` + re.Contains(ms[0], rfc) + + err = bo.Exec(ctx, testFn) + re.ErrorIs(err, errTest) + + ms = lg.Messages() + re.Len(ms, 20) + rfc = `["[pd.backoffer] exec fn failed and retrying"] [fn-name=testFn] [retry-time=13] [error=test]` + re.Contains(ms[len(ms)-1], rfc) + rfc = `["[pd.backoffer] exec fn failed and retrying"] [fn-name=testFn] [retry-time=4] [error=test]` + re.Contains(ms[len1], rfc) +} + +var errTest = errors.New("test") + +func testFn() error { + return errTest +} + +// testingWriter is a WriteSyncer that writes the the messages. +type testingWriter struct { + messages []string +} + +func newTestingWriter() *testingWriter { + return &testingWriter{} +} + +func (w *testingWriter) Write(p []byte) (n int, err error) { + n = len(p) + p = bytes.TrimRight(p, "\n") + m := string(p) + w.messages = append(w.messages, m) + return n, nil +} +func (*testingWriter) Sync() error { + return nil +} + +type verifyLogger struct { + *zap.Logger + w *testingWriter +} + +func (logger *verifyLogger) Message() string { + if logger.w.messages == nil { + return "" + } + return logger.w.messages[len(logger.w.messages)-1] +} + +func (logger *verifyLogger) Messages() []string { + if logger.w.messages == nil { + return nil + } + return logger.w.messages +} + +func newZapTestLogger(cfg *log.Config, opts ...zap.Option) verifyLogger { + // TestingWriter is used to write to memory. + // Used in the verify logger. + writer := newTestingWriter() + lg, _, _ := log.InitLoggerWithWriteSyncer(cfg, writer, writer, opts...) + return verifyLogger{ + Logger: lg, + w: writer, + } +} diff --git a/client/testutil/check_env_dummy.go b/client/testutil/check_env_dummy.go index 2fbcbd1a9e7..c8f4d268c9d 100644 --- a/client/testutil/check_env_dummy.go +++ b/client/testutil/check_env_dummy.go @@ -16,6 +16,6 @@ package testutil -func environmentCheck(addr string) bool { +func environmentCheck(_ string) bool { return true } diff --git a/client/testutil/leak.go b/client/testutil/leak.go index ec2a6543941..28b5baae60f 100644 --- a/client/testutil/leak.go +++ b/client/testutil/leak.go @@ -23,9 +23,4 @@ var LeakOptions = []goleak.Option{ goleak.IgnoreTopFunction("google.golang.org/grpc.(*addrConn).createTransport"), goleak.IgnoreTopFunction("google.golang.org/grpc.(*addrConn).resetTransport"), goleak.IgnoreTopFunction("google.golang.org/grpc.(*Server).handleRawConn"), - // TODO: remove the below options once we fixed the http connection leak problems - goleak.IgnoreTopFunction("internal/poll.runtime_pollWait"), - goleak.IgnoreTopFunction("google.golang.org/grpc/internal/transport.(*controlBuffer).get"), - goleak.IgnoreTopFunction("google.golang.org/grpc/internal/transport.(*http2Server).keepalive"), - goleak.IgnoreTopFunction("google.golang.org/grpc/internal/grpcsync.(*CallbackSerializer).run"), } diff --git a/client/tlsutil/OWNERS b/client/tlsutil/OWNERS new file mode 100644 index 00000000000..211db06feee --- /dev/null +++ b/client/tlsutil/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs at https://go.k8s.io/owners +options: + no_parent_owners: true +filters: + "(OWNERS|tlsconfig\\.go)$": + approvers: + - sig-critical-approvers-config diff --git a/client/tlsutil/tlsconfig.go b/client/tlsutil/tlsconfig.go index c9cee5987bb..a8bac17f676 100644 --- a/client/tlsutil/tlsconfig.go +++ b/client/tlsutil/tlsconfig.go @@ -131,7 +131,7 @@ func (info TLSInfo) baseConfig() (*tls.Config, error) { } if info.AllowedCN != "" { - cfg.VerifyPeerCertificate = func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error { + cfg.VerifyPeerCertificate = func(_ [][]byte, verifiedChains [][]*x509.Certificate) error { for _, chains := range verifiedChains { if len(chains) != 0 { if info.AllowedCN == chains[0].Subject.CommonName { @@ -145,10 +145,10 @@ func (info TLSInfo) baseConfig() (*tls.Config, error) { // this only reloads certs when there's a client request // TODO: support server-side refresh (e.g. inotify, SIGHUP), caching - cfg.GetCertificate = func(clientHello *tls.ClientHelloInfo) (*tls.Certificate, error) { + cfg.GetCertificate = func(*tls.ClientHelloInfo) (*tls.Certificate, error) { return NewCert(info.CertFile, info.KeyFile, info.parseFunc) } - cfg.GetClientCertificate = func(unused *tls.CertificateRequestInfo) (*tls.Certificate, error) { + cfg.GetClientCertificate = func(*tls.CertificateRequestInfo) (*tls.Certificate, error) { return NewCert(info.CertFile, info.KeyFile, info.parseFunc) } return cfg, nil diff --git a/client/tso_batch_controller.go b/client/tso_batch_controller.go index 842c772abd9..a713b7a187d 100644 --- a/client/tso_batch_controller.go +++ b/client/tso_batch_controller.go @@ -16,7 +16,13 @@ package pd import ( "context" + "runtime/trace" "time" + + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/tikv/pd/client/tsoutil" + "go.uber.org/zap" ) type tsoBatchController struct { @@ -130,9 +136,31 @@ func (tbc *tsoBatchController) adjustBestBatchSize() { } } -func (tbc *tsoBatchController) revokePendingRequest(err error) { +func (tbc *tsoBatchController) finishCollectedRequests(physical, firstLogical int64, suffixBits uint32, err error) { + for i := 0; i < tbc.collectedRequestCount; i++ { + tsoReq := tbc.collectedRequests[i] + // Retrieve the request context before the request is done to trace without race. + requestCtx := tsoReq.requestCtx + tsoReq.physical, tsoReq.logical = physical, tsoutil.AddLogical(firstLogical, int64(i), suffixBits) + tsoReq.tryDone(err) + trace.StartRegion(requestCtx, "pdclient.tsoReqDequeue").End() + } + // Prevent the finished requests from being processed again. + tbc.collectedRequestCount = 0 +} + +func (tbc *tsoBatchController) revokePendingRequests(err error) { for i := 0; i < len(tbc.tsoRequestCh); i++ { req := <-tbc.tsoRequestCh - req.done <- err + req.tryDone(err) } } + +func (tbc *tsoBatchController) clear() { + log.Info("[pd] clear the tso batch controller", + zap.Int("max-batch-size", tbc.maxBatchSize), zap.Int("best-batch-size", tbc.bestBatchSize), + zap.Int("collected-request-count", tbc.collectedRequestCount), zap.Int("pending-request-count", len(tbc.tsoRequestCh))) + tsoErr := errors.WithStack(errClosing) + tbc.finishCollectedRequests(0, 0, 0, tsoErr) + tbc.revokePendingRequests(tsoErr) +} diff --git a/client/tso_client.go b/client/tso_client.go index fc38ee8e5ba..5e221eae478 100644 --- a/client/tso_client.go +++ b/client/tso_client.go @@ -18,15 +18,29 @@ import ( "context" "fmt" "math/rand" + "runtime/trace" "sync" "time" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/log" "github.com/tikv/pd/client/errs" + "github.com/tikv/pd/client/grpcutil" "go.uber.org/zap" "google.golang.org/grpc" + "google.golang.org/grpc/codes" healthpb "google.golang.org/grpc/health/grpc_health_v1" + "google.golang.org/grpc/status" +) + +const ( + tsoDispatcherCheckInterval = time.Minute + // defaultMaxTSOBatchSize is the default max size of the TSO request batch. + defaultMaxTSOBatchSize = 10000 + // retryInterval and maxRetryTimes are used to control the retry interval and max retry times. + retryInterval = 500 * time.Millisecond + maxRetryTimes = 6 ) // TSOClient is the client used to get timestamps. @@ -44,26 +58,6 @@ type TSOClient interface { GetMinTS(ctx context.Context) (int64, int64, error) } -type tsoRequest struct { - start time.Time - clientCtx context.Context - requestCtx context.Context - done chan error - physical int64 - logical int64 - dcLocation string -} - -var tsoReqPool = sync.Pool{ - New: func() interface{} { - return &tsoRequest{ - done: make(chan error, 1), - physical: 0, - logical: 0, - } - }, -} - type tsoClient struct { ctx context.Context cancel context.CancelFunc @@ -74,21 +68,17 @@ type tsoClient struct { tsoStreamBuilderFactory // tsoAllocators defines the mapping {dc-location -> TSO allocator leader URL} tsoAllocators sync.Map // Store as map[string]string - // tsoAllocServingAddrSwitchedCallback will be called when any global/local + // tsoAllocServingURLSwitchedCallback will be called when any global/local // tso allocator leader is switched. - tsoAllocServingAddrSwitchedCallback []func() + tsoAllocServingURLSwitchedCallback []func() + // tsoReqPool is the pool to recycle `*tsoRequest`. + tsoReqPool *sync.Pool // tsoDispatcher is used to dispatch different TSO requests to // the corresponding dc-location TSO channel. - tsoDispatcher sync.Map // Same as map[string]chan *tsoRequest - // dc-location -> deadline - tsDeadline sync.Map // Same as map[string]chan deadline - // dc-location -> *tsoInfo while the tsoInfo is the last TSO info - lastTSOInfoMap sync.Map // Same as map[string]*tsoInfo - - checkTSDeadlineCh chan struct{} - checkTSODispatcherCh chan struct{} - updateTSOConnectionCtxsCh chan struct{} + tsoDispatcher sync.Map // Same as map[string]*tsoDispatcher + + checkTSODispatcherCh chan struct{} } // newTSOClient returns a new TSO client. @@ -98,56 +88,130 @@ func newTSOClient( ) *tsoClient { ctx, cancel := context.WithCancel(ctx) c := &tsoClient{ - ctx: ctx, - cancel: cancel, - option: option, - svcDiscovery: svcDiscovery, - tsoStreamBuilderFactory: factory, - checkTSDeadlineCh: make(chan struct{}), - checkTSODispatcherCh: make(chan struct{}, 1), - updateTSOConnectionCtxsCh: make(chan struct{}, 1), + ctx: ctx, + cancel: cancel, + option: option, + svcDiscovery: svcDiscovery, + tsoStreamBuilderFactory: factory, + tsoReqPool: &sync.Pool{ + New: func() any { + return &tsoRequest{ + done: make(chan error, 1), + physical: 0, + logical: 0, + } + }, + }, + checkTSODispatcherCh: make(chan struct{}, 1), } eventSrc := svcDiscovery.(tsoAllocatorEventSource) - eventSrc.SetTSOLocalServAddrsUpdatedCallback(c.updateTSOLocalServAddrs) - eventSrc.SetTSOGlobalServAddrUpdatedCallback(c.updateTSOGlobalServAddr) - c.svcDiscovery.AddServiceAddrsSwitchedCallback(c.scheduleUpdateTSOConnectionCtxs) + eventSrc.SetTSOLocalServURLsUpdatedCallback(c.updateTSOLocalServURLs) + eventSrc.SetTSOGlobalServURLUpdatedCallback(c.updateTSOGlobalServURL) + c.svcDiscovery.AddServiceURLsSwitchedCallback(c.scheduleUpdateAllTSOConnectionCtxs) return c } -func (c *tsoClient) Setup() { - c.svcDiscovery.CheckMemberChanged() +func (c *tsoClient) getOption() *option { return c.option } + +func (c *tsoClient) getServiceDiscovery() ServiceDiscovery { return c.svcDiscovery } + +func (c *tsoClient) setup() { + if err := c.svcDiscovery.CheckMemberChanged(); err != nil { + log.Warn("[tso] failed to check member changed", errs.ZapError(err)) + } c.updateTSODispatcher() // Start the daemons. - c.wg.Add(2) + c.wg.Add(1) go c.tsoDispatcherCheckLoop() - go c.tsCancelLoop() } -// Close closes the TSO client -func (c *tsoClient) Close() { +func (c *tsoClient) tsoDispatcherCheckLoop() { + log.Info("[tso] start tso dispatcher check loop") + defer log.Info("[tso] exit tso dispatcher check loop") + defer c.wg.Done() + + loopCtx, loopCancel := context.WithCancel(c.ctx) + defer loopCancel() + + ticker := time.NewTicker(tsoDispatcherCheckInterval) + defer ticker.Stop() + for { + c.updateTSODispatcher() + select { + case <-ticker.C: + case <-c.checkTSODispatcherCh: + case <-loopCtx.Done(): + return + } + } +} + +// close closes the TSO client +func (c *tsoClient) close() { if c == nil { return } - log.Info("closing tso client") + log.Info("[tso] closing tso client") c.cancel() c.wg.Wait() - log.Info("close tso client") - c.tsoDispatcher.Range(func(_, dispatcherInterface interface{}) bool { - if dispatcherInterface != nil { - dispatcher := dispatcherInterface.(*tsoDispatcher) - tsoErr := errors.WithStack(errClosing) - dispatcher.tsoBatchController.revokePendingRequest(tsoErr) - dispatcher.dispatcherCancel() - } + log.Info("[tso] close tso client") + c.closeTSODispatcher() + log.Info("[tso] tso client is closed") +} + +func (c *tsoClient) scheduleCheckTSODispatcher() { + select { + case c.checkTSODispatcherCh <- struct{}{}: + default: + } +} + +// scheduleUpdateAllTSOConnectionCtxs update the TSO connection contexts for all dc-locations. +func (c *tsoClient) scheduleUpdateAllTSOConnectionCtxs() { + c.tsoDispatcher.Range(func(_, dispatcher any) bool { + dispatcher.(*tsoDispatcher).scheduleUpdateConnectionCtxs() return true }) +} - log.Info("tso client is closed") +// scheduleUpdateTSOConnectionCtxs update the TSO connection contexts for the given dc-location. +func (c *tsoClient) scheduleUpdateTSOConnectionCtxs(dcLocation string) { + dispatcher, ok := c.getTSODispatcher(dcLocation) + if !ok { + return + } + dispatcher.scheduleUpdateConnectionCtxs() +} + +// TSO Follower Proxy only supports the Global TSO proxy now. +func (c *tsoClient) allowTSOFollowerProxy(dc string) bool { + return dc == globalDCLocation && c.option.getEnableTSOFollowerProxy() +} + +func (c *tsoClient) getTSORequest(ctx context.Context, dcLocation string) *tsoRequest { + req := c.tsoReqPool.Get().(*tsoRequest) + // Set needed fields in the request before using it. + req.start = time.Now() + req.pool = c.tsoReqPool + req.requestCtx = ctx + req.clientCtx = c.ctx + req.physical = 0 + req.logical = 0 + req.dcLocation = dcLocation + return req +} + +func (c *tsoClient) getTSODispatcher(dcLocation string) (*tsoDispatcher, bool) { + dispatcher, ok := c.tsoDispatcher.Load(dcLocation) + if !ok || dispatcher == nil { + return nil, false + } + return dispatcher.(*tsoDispatcher), true } // GetTSOAllocators returns {dc-location -> TSO allocator leader URL} connection map @@ -155,8 +219,8 @@ func (c *tsoClient) GetTSOAllocators() *sync.Map { return &c.tsoAllocators } -// GetTSOAllocatorServingAddrByDCLocation returns the tso allocator of the given dcLocation -func (c *tsoClient) GetTSOAllocatorServingAddrByDCLocation(dcLocation string) (string, bool) { +// GetTSOAllocatorServingURLByDCLocation returns the tso allocator of the given dcLocation +func (c *tsoClient) GetTSOAllocatorServingURLByDCLocation(dcLocation string) (string, bool) { url, exist := c.tsoAllocators.Load(dcLocation) if !exist { return "", false @@ -164,14 +228,12 @@ func (c *tsoClient) GetTSOAllocatorServingAddrByDCLocation(dcLocation string) (s return url.(string), true } -// GetTSOAllocatorClientConnByDCLocation returns the tso allocator grpc client connection -// of the given dcLocation +// GetTSOAllocatorClientConnByDCLocation returns the TSO allocator gRPC client connection of the given dcLocation. func (c *tsoClient) GetTSOAllocatorClientConnByDCLocation(dcLocation string) (*grpc.ClientConn, string) { url, ok := c.tsoAllocators.Load(dcLocation) if !ok { - panic(fmt.Sprintf("the allocator leader in %s should exist", dcLocation)) + log.Fatal("[tso] the allocator leader should exist", zap.String("dc-location", dcLocation)) } - // todo: if we support local tso forward, we should get or create client conns. cc, ok := c.svcDiscovery.GetClientConns().Load(url) if !ok { return nil, url.(string) @@ -179,13 +241,13 @@ func (c *tsoClient) GetTSOAllocatorClientConnByDCLocation(dcLocation string) (*g return cc.(*grpc.ClientConn), url.(string) } -// AddTSOAllocatorServingAddrSwitchedCallback adds callbacks which will be called +// AddTSOAllocatorServingURLSwitchedCallback adds callbacks which will be called // when any global/local tso allocator service endpoint is switched. -func (c *tsoClient) AddTSOAllocatorServingAddrSwitchedCallback(callbacks ...func()) { - c.tsoAllocServingAddrSwitchedCallback = append(c.tsoAllocServingAddrSwitchedCallback, callbacks...) +func (c *tsoClient) AddTSOAllocatorServingURLSwitchedCallback(callbacks ...func()) { + c.tsoAllocServingURLSwitchedCallback = append(c.tsoAllocServingURLSwitchedCallback, callbacks...) } -func (c *tsoClient) updateTSOLocalServAddrs(allocatorMap map[string]string) error { +func (c *tsoClient) updateTSOLocalServURLs(allocatorMap map[string]string) error { if len(allocatorMap) == 0 { return nil } @@ -193,31 +255,33 @@ func (c *tsoClient) updateTSOLocalServAddrs(allocatorMap map[string]string) erro updated := false // Switch to the new one - for dcLocation, addr := range allocatorMap { - if len(addr) == 0 { + for dcLocation, url := range allocatorMap { + if len(url) == 0 { continue } - oldAddr, exist := c.GetTSOAllocatorServingAddrByDCLocation(dcLocation) - if exist && addr == oldAddr { + oldURL, exist := c.GetTSOAllocatorServingURLByDCLocation(dcLocation) + if exist && url == oldURL { continue } updated = true - if _, err := c.svcDiscovery.GetOrCreateGRPCConn(addr); err != nil { - log.Warn("[tso] failed to connect dc tso allocator serving address", + if _, err := c.svcDiscovery.GetOrCreateGRPCConn(url); err != nil { + log.Warn("[tso] failed to connect dc tso allocator serving url", zap.String("dc-location", dcLocation), - zap.String("serving-address", addr), + zap.String("serving-url", url), errs.ZapError(err)) return err } - c.tsoAllocators.Store(dcLocation, addr) - log.Info("[tso] switch dc tso local allocator serving address", + c.tsoAllocators.Store(dcLocation, url) + log.Info("[tso] switch dc tso local allocator serving url", zap.String("dc-location", dcLocation), - zap.String("new-address", addr), - zap.String("old-address", oldAddr)) + zap.String("new-url", url), + zap.String("old-url", oldURL)) + // Should trigger the update of the connection contexts once the allocator leader is switched. + c.scheduleUpdateTSOConnectionCtxs(dcLocation) } // Garbage collection of the old TSO allocator primaries - c.gcAllocatorServingAddr(allocatorMap) + c.gcAllocatorServingURL(allocatorMap) if updated { c.scheduleCheckTSODispatcher() @@ -226,18 +290,19 @@ func (c *tsoClient) updateTSOLocalServAddrs(allocatorMap map[string]string) erro return nil } -func (c *tsoClient) updateTSOGlobalServAddr(addr string) error { - c.tsoAllocators.Store(globalDCLocation, addr) - log.Info("[tso] switch dc tso global allocator serving address", +func (c *tsoClient) updateTSOGlobalServURL(url string) error { + c.tsoAllocators.Store(globalDCLocation, url) + log.Info("[tso] switch dc tso global allocator serving url", zap.String("dc-location", globalDCLocation), - zap.String("new-address", addr)) + zap.String("new-url", url)) + c.scheduleUpdateTSOConnectionCtxs(globalDCLocation) c.scheduleCheckTSODispatcher() return nil } -func (c *tsoClient) gcAllocatorServingAddr(curAllocatorMap map[string]string) { +func (c *tsoClient) gcAllocatorServingURL(curAllocatorMap map[string]string) { // Clean up the old TSO allocators - c.tsoAllocators.Range(func(dcLocationKey, _ interface{}) bool { + c.tsoAllocators.Range(func(dcLocationKey, _ any) bool { dcLocation := dcLocationKey.(string) // Skip the Global TSO Allocator if dcLocation == globalDCLocation { @@ -255,25 +320,376 @@ func (c *tsoClient) gcAllocatorServingAddr(curAllocatorMap map[string]string) { // backup service endpoints randomly. Backup service endpoints are followers in a // quorum-based cluster or secondaries in a primary/secondary configured cluster. func (c *tsoClient) backupClientConn() (*grpc.ClientConn, string) { - addrs := c.svcDiscovery.GetBackupAddrs() - if len(addrs) < 1 { + urls := c.svcDiscovery.GetBackupURLs() + if len(urls) < 1 { return nil, "" } var ( cc *grpc.ClientConn err error ) - for i := 0; i < len(addrs); i++ { - addr := addrs[rand.Intn(len(addrs))] - if cc, err = c.svcDiscovery.GetOrCreateGRPCConn(addr); err != nil { + for i := 0; i < len(urls); i++ { + url := urls[rand.Intn(len(urls))] + if cc, err = c.svcDiscovery.GetOrCreateGRPCConn(url); err != nil { continue } healthCtx, healthCancel := context.WithTimeout(c.ctx, c.option.timeout) resp, err := healthpb.NewHealthClient(cc).Check(healthCtx, &healthpb.HealthCheckRequest{Service: ""}) healthCancel() if err == nil && resp.GetStatus() == healthpb.HealthCheckResponse_SERVING { - return cc, addr + return cc, url } } return nil, "" } + +// tsoConnectionContext is used to store the context of a TSO stream connection. +type tsoConnectionContext struct { + ctx context.Context + cancel context.CancelFunc + // Current URL of the stream connection. + streamURL string + // Current stream to send gRPC requests. + // - `pdpb.PD_TsoClient` for a leader/follower in the PD cluster. + // - `tsopb.TSO_TsoClient` for a primary/secondary in the TSO cluster. + stream tsoStream +} + +// updateConnectionCtxs will choose the proper way to update the connections for the given dc-location. +// It will return a bool to indicate whether the update is successful. +func (c *tsoClient) updateConnectionCtxs(ctx context.Context, dc string, connectionCtxs *sync.Map) bool { + // Normal connection creating, it will be affected by the `enableForwarding`. + createTSOConnection := c.tryConnectToTSO + if c.allowTSOFollowerProxy(dc) { + createTSOConnection = c.tryConnectToTSOWithProxy + } + if err := createTSOConnection(ctx, dc, connectionCtxs); err != nil { + log.Error("[tso] update connection contexts failed", zap.String("dc", dc), errs.ZapError(err)) + return false + } + return true +} + +// tryConnectToTSO will try to connect to the TSO allocator leader. If the connection becomes unreachable +// and enableForwarding is true, it will create a new connection to a follower to do the forwarding, +// while a new daemon will be created also to switch back to a normal leader connection ASAP the +// connection comes back to normal. +func (c *tsoClient) tryConnectToTSO( + ctx context.Context, + dc string, + connectionCtxs *sync.Map, +) error { + var ( + networkErrNum uint64 + err error + stream tsoStream + url string + cc *grpc.ClientConn + updateAndClear = func(newURL string, connectionCtx *tsoConnectionContext) { + // Only store the `connectionCtx` if it does not exist before. + connectionCtxs.LoadOrStore(newURL, connectionCtx) + // Remove all other `connectionCtx`s. + connectionCtxs.Range(func(url, cc any) bool { + if url.(string) != newURL { + cc.(*tsoConnectionContext).cancel() + connectionCtxs.Delete(url) + } + return true + }) + } + ) + + ticker := time.NewTicker(retryInterval) + defer ticker.Stop() + // Retry several times before falling back to the follower when the network problem happens + for i := 0; i < maxRetryTimes; i++ { + c.svcDiscovery.ScheduleCheckMemberChanged() + cc, url = c.GetTSOAllocatorClientConnByDCLocation(dc) + if _, ok := connectionCtxs.Load(url); ok { + return nil + } + if cc != nil { + cctx, cancel := context.WithCancel(ctx) + stream, err = c.tsoStreamBuilderFactory.makeBuilder(cc).build(cctx, cancel, c.option.timeout) + failpoint.Inject("unreachableNetwork", func() { + stream = nil + err = status.New(codes.Unavailable, "unavailable").Err() + }) + if stream != nil && err == nil { + updateAndClear(url, &tsoConnectionContext{cctx, cancel, url, stream}) + return nil + } + + if err != nil && c.option.enableForwarding { + // The reason we need to judge if the error code is equal to "Canceled" here is that + // when we create a stream we use a goroutine to manually control the timeout of the connection. + // There is no need to wait for the transport layer timeout which can reduce the time of unavailability. + // But it conflicts with the retry mechanism since we use the error code to decide if it is caused by network error. + // And actually the `Canceled` error can be regarded as a kind of network error in some way. + if rpcErr, ok := status.FromError(err); ok && (isNetworkError(rpcErr.Code()) || rpcErr.Code() == codes.Canceled) { + networkErrNum++ + } + } + cancel() + } else { + networkErrNum++ + } + select { + case <-ctx.Done(): + return err + case <-ticker.C: + } + } + + if networkErrNum == maxRetryTimes { + // encounter the network error + backupClientConn, backupURL := c.backupClientConn() + if backupClientConn != nil { + log.Info("[tso] fall back to use follower to forward tso stream", zap.String("dc", dc), zap.String("follower-url", backupURL)) + forwardedHost, ok := c.GetTSOAllocatorServingURLByDCLocation(dc) + if !ok { + return errors.Errorf("cannot find the allocator leader in %s", dc) + } + + // create the follower stream + cctx, cancel := context.WithCancel(ctx) + cctx = grpcutil.BuildForwardContext(cctx, forwardedHost) + stream, err = c.tsoStreamBuilderFactory.makeBuilder(backupClientConn).build(cctx, cancel, c.option.timeout) + if err == nil { + forwardedHostTrim := trimHTTPPrefix(forwardedHost) + addr := trimHTTPPrefix(backupURL) + // the goroutine is used to check the network and change back to the original stream + go c.checkAllocator(ctx, cancel, dc, forwardedHostTrim, addr, url, updateAndClear) + requestForwarded.WithLabelValues(forwardedHostTrim, addr).Set(1) + updateAndClear(backupURL, &tsoConnectionContext{cctx, cancel, backupURL, stream}) + return nil + } + cancel() + } + } + return err +} + +func (c *tsoClient) checkAllocator( + ctx context.Context, + forwardCancel context.CancelFunc, + dc, forwardedHostTrim, addr, url string, + updateAndClear func(newAddr string, connectionCtx *tsoConnectionContext), +) { + defer func() { + // cancel the forward stream + forwardCancel() + requestForwarded.WithLabelValues(forwardedHostTrim, addr).Set(0) + }() + cc, u := c.GetTSOAllocatorClientConnByDCLocation(dc) + var healthCli healthpb.HealthClient + ticker := time.NewTicker(time.Second) + defer ticker.Stop() + for { + // the pd/allocator leader change, we need to re-establish the stream + if u != url { + log.Info("[tso] the leader of the allocator leader is changed", zap.String("dc", dc), zap.String("origin", url), zap.String("new", u)) + return + } + if healthCli == nil && cc != nil { + healthCli = healthpb.NewHealthClient(cc) + } + if healthCli != nil { + healthCtx, healthCancel := context.WithTimeout(ctx, c.option.timeout) + resp, err := healthCli.Check(healthCtx, &healthpb.HealthCheckRequest{Service: ""}) + failpoint.Inject("unreachableNetwork", func() { + resp.Status = healthpb.HealthCheckResponse_UNKNOWN + }) + healthCancel() + if err == nil && resp.GetStatus() == healthpb.HealthCheckResponse_SERVING { + // create a stream of the original allocator + cctx, cancel := context.WithCancel(ctx) + stream, err := c.tsoStreamBuilderFactory.makeBuilder(cc).build(cctx, cancel, c.option.timeout) + if err == nil && stream != nil { + log.Info("[tso] recover the original tso stream since the network has become normal", zap.String("dc", dc), zap.String("url", url)) + updateAndClear(url, &tsoConnectionContext{cctx, cancel, url, stream}) + return + } + } + } + select { + case <-ctx.Done(): + return + case <-ticker.C: + // To ensure we can get the latest allocator leader + // and once the leader is changed, we can exit this function. + cc, u = c.GetTSOAllocatorClientConnByDCLocation(dc) + } + } +} + +// tryConnectToTSOWithProxy will create multiple streams to all the service endpoints to work as +// a TSO proxy to reduce the pressure of the main serving service endpoint. +func (c *tsoClient) tryConnectToTSOWithProxy( + ctx context.Context, + dc string, + connectionCtxs *sync.Map, +) error { + tsoStreamBuilders := c.getAllTSOStreamBuilders() + leaderAddr := c.svcDiscovery.GetServingURL() + forwardedHost, ok := c.GetTSOAllocatorServingURLByDCLocation(dc) + if !ok { + return errors.Errorf("cannot find the allocator leader in %s", dc) + } + // GC the stale one. + connectionCtxs.Range(func(addr, cc any) bool { + addrStr := addr.(string) + if _, ok := tsoStreamBuilders[addrStr]; !ok { + log.Info("[tso] remove the stale tso stream", + zap.String("dc", dc), + zap.String("addr", addrStr)) + cc.(*tsoConnectionContext).cancel() + connectionCtxs.Delete(addr) + } + return true + }) + // Update the missing one. + for addr, tsoStreamBuilder := range tsoStreamBuilders { + if _, ok = connectionCtxs.Load(addr); ok { + continue + } + log.Info("[tso] try to create tso stream", + zap.String("dc", dc), zap.String("addr", addr)) + cctx, cancel := context.WithCancel(ctx) + // Do not proxy the leader client. + if addr != leaderAddr { + log.Info("[tso] use follower to forward tso stream to do the proxy", + zap.String("dc", dc), zap.String("addr", addr)) + cctx = grpcutil.BuildForwardContext(cctx, forwardedHost) + } + // Create the TSO stream. + stream, err := tsoStreamBuilder.build(cctx, cancel, c.option.timeout) + if err == nil { + if addr != leaderAddr { + forwardedHostTrim := trimHTTPPrefix(forwardedHost) + addrTrim := trimHTTPPrefix(addr) + requestForwarded.WithLabelValues(forwardedHostTrim, addrTrim).Set(1) + } + connectionCtxs.Store(addr, &tsoConnectionContext{cctx, cancel, addr, stream}) + continue + } + log.Error("[tso] create the tso stream failed", + zap.String("dc", dc), zap.String("addr", addr), errs.ZapError(err)) + cancel() + } + return nil +} + +// getAllTSOStreamBuilders returns a TSO stream builder for every service endpoint of TSO leader/followers +// or of keyspace group primary/secondaries. +func (c *tsoClient) getAllTSOStreamBuilders() map[string]tsoStreamBuilder { + var ( + addrs = c.svcDiscovery.GetServiceURLs() + streamBuilders = make(map[string]tsoStreamBuilder, len(addrs)) + cc *grpc.ClientConn + err error + ) + for _, addr := range addrs { + if len(addrs) == 0 { + continue + } + if cc, err = c.svcDiscovery.GetOrCreateGRPCConn(addr); err != nil { + continue + } + healthCtx, healthCancel := context.WithTimeout(c.ctx, c.option.timeout) + resp, err := healthpb.NewHealthClient(cc).Check(healthCtx, &healthpb.HealthCheckRequest{Service: ""}) + healthCancel() + if err == nil && resp.GetStatus() == healthpb.HealthCheckResponse_SERVING { + streamBuilders[addr] = c.tsoStreamBuilderFactory.makeBuilder(cc) + } + } + return streamBuilders +} + +func (c *tsoClient) createTSODispatcher(dcLocation string) { + dispatcher := newTSODispatcher(c.ctx, dcLocation, defaultMaxTSOBatchSize, c) + if _, ok := c.tsoDispatcher.LoadOrStore(dcLocation, dispatcher); !ok { + // Create a new dispatcher for the dc-location to handle the TSO requests. + c.wg.Add(1) + go dispatcher.handleDispatcher(&c.wg) + } else { + dispatcher.close() + } +} + +func (c *tsoClient) closeTSODispatcher() { + c.tsoDispatcher.Range(func(_, dispatcherInterface any) bool { + if dispatcherInterface != nil { + dispatcherInterface.(*tsoDispatcher).close() + } + return true + }) +} + +func (c *tsoClient) updateTSODispatcher() { + // Set up the new TSO dispatcher and batch controller. + c.GetTSOAllocators().Range(func(dcLocationKey, _ any) bool { + dcLocation := dcLocationKey.(string) + if _, ok := c.getTSODispatcher(dcLocation); !ok { + c.createTSODispatcher(dcLocation) + } + return true + }) + // Clean up the unused TSO dispatcher + c.tsoDispatcher.Range(func(dcLocationKey, dispatcher any) bool { + dcLocation := dcLocationKey.(string) + // Skip the Global TSO Allocator + if dcLocation == globalDCLocation { + return true + } + if _, exist := c.GetTSOAllocators().Load(dcLocation); !exist { + log.Info("[tso] delete unused tso dispatcher", zap.String("dc-location", dcLocation)) + c.tsoDispatcher.Delete(dcLocation) + dispatcher.(*tsoDispatcher).close() + } + return true + }) +} + +// dispatchRequest will send the TSO request to the corresponding TSO dispatcher. +func (c *tsoClient) dispatchRequest(request *tsoRequest) (bool, error) { + dispatcher, ok := c.getTSODispatcher(request.dcLocation) + if !ok { + err := errs.ErrClientGetTSO.FastGenByArgs(fmt.Sprintf("unknown dc-location %s to the client", request.dcLocation)) + log.Error("[tso] dispatch tso request error", zap.String("dc-location", request.dcLocation), errs.ZapError(err)) + c.svcDiscovery.ScheduleCheckMemberChanged() + // New dispatcher could be created in the meantime, which is retryable. + return true, err + } + + defer trace.StartRegion(request.requestCtx, "pdclient.tsoReqEnqueue").End() + select { + case <-request.requestCtx.Done(): + // Caller cancelled the request, no need to retry. + return false, request.requestCtx.Err() + case <-request.clientCtx.Done(): + // Client is closed, no need to retry. + return false, request.clientCtx.Err() + case <-c.ctx.Done(): + // tsoClient is closed due to the PD service mode switch, which is retryable. + return true, c.ctx.Err() + default: + // This failpoint will increase the possibility that the request is sent to a closed dispatcher. + failpoint.Inject("delayDispatchTSORequest", func() { + time.Sleep(time.Second) + }) + dispatcher.push(request) + } + // Check the contexts again to make sure the request is not been sent to a closed dispatcher. + // Never retry on these conditions to prevent unexpected data race. + select { + case <-request.requestCtx.Done(): + return false, request.requestCtx.Err() + case <-request.clientCtx.Done(): + return false, request.clientCtx.Err() + case <-c.ctx.Done(): + return false, c.ctx.Err() + default: + } + return false, nil +} diff --git a/client/tso_dispatcher.go b/client/tso_dispatcher.go index 9510f7aadb6..0919fd84744 100644 --- a/client/tso_dispatcher.go +++ b/client/tso_dispatcher.go @@ -27,123 +27,14 @@ import ( "github.com/pingcap/failpoint" "github.com/pingcap/log" "github.com/tikv/pd/client/errs" - "github.com/tikv/pd/client/grpcutil" "github.com/tikv/pd/client/retry" "github.com/tikv/pd/client/timerpool" "github.com/tikv/pd/client/tsoutil" "go.uber.org/zap" - "google.golang.org/grpc" - "google.golang.org/grpc/codes" - healthpb "google.golang.org/grpc/health/grpc_health_v1" - "google.golang.org/grpc/status" ) -type tsoDispatcher struct { - dispatcherCancel context.CancelFunc - tsoBatchController *tsoBatchController -} - -type tsoInfo struct { - tsoServer string - reqKeyspaceGroupID uint32 - respKeyspaceGroupID uint32 - respReceivedAt time.Time - physical int64 - logical int64 -} - -const ( - tsLoopDCCheckInterval = time.Minute - defaultMaxTSOBatchSize = 10000 // should be higher if client is sending requests in burst - retryInterval = 500 * time.Millisecond - maxRetryTimes = 6 -) - -func (c *tsoClient) scheduleCheckTSODispatcher() { - select { - case c.checkTSODispatcherCh <- struct{}{}: - default: - } -} - -func (c *tsoClient) scheduleUpdateTSOConnectionCtxs() { - select { - case c.updateTSOConnectionCtxsCh <- struct{}{}: - default: - } -} - -func (c *tsoClient) dispatchRequest(dcLocation string, request *tsoRequest) error { - dispatcher, ok := c.tsoDispatcher.Load(dcLocation) - if !ok { - err := errs.ErrClientGetTSO.FastGenByArgs(fmt.Sprintf("unknown dc-location %s to the client", dcLocation)) - log.Error("[tso] dispatch tso request error", zap.String("dc-location", dcLocation), errs.ZapError(err)) - c.svcDiscovery.ScheduleCheckMemberChanged() - return err - } - - defer trace.StartRegion(request.requestCtx, "tsoReqEnqueue").End() - dispatcher.(*tsoDispatcher).tsoBatchController.tsoRequestCh <- request - return nil -} - -// TSFuture is a future which promises to return a TSO. -type TSFuture interface { - // Wait gets the physical and logical time, it would block caller if data is not available yet. - Wait() (int64, int64, error) -} - -func (req *tsoRequest) Wait() (physical int64, logical int64, err error) { - // If tso command duration is observed very high, the reason could be it - // takes too long for Wait() be called. - start := time.Now() - cmdDurationTSOAsyncWait.Observe(start.Sub(req.start).Seconds()) - select { - case err = <-req.done: - defer trace.StartRegion(req.requestCtx, "tsoReqDone").End() - err = errors.WithStack(err) - defer tsoReqPool.Put(req) - if err != nil { - cmdFailDurationTSO.Observe(time.Since(req.start).Seconds()) - return 0, 0, err - } - physical, logical = req.physical, req.logical - now := time.Now() - cmdDurationWait.Observe(now.Sub(start).Seconds()) - cmdDurationTSO.Observe(now.Sub(req.start).Seconds()) - return - case <-req.requestCtx.Done(): - return 0, 0, errors.WithStack(req.requestCtx.Err()) - case <-req.clientCtx.Done(): - return 0, 0, errors.WithStack(req.clientCtx.Err()) - } -} - -func (c *tsoClient) updateTSODispatcher() { - // Set up the new TSO dispatcher and batch controller. - c.GetTSOAllocators().Range(func(dcLocationKey, _ interface{}) bool { - dcLocation := dcLocationKey.(string) - if !c.checkTSODispatcher(dcLocation) { - c.createTSODispatcher(dcLocation) - } - return true - }) - // Clean up the unused TSO dispatcher - c.tsoDispatcher.Range(func(dcLocationKey, dispatcher interface{}) bool { - dcLocation := dcLocationKey.(string) - // Skip the Global TSO Allocator - if dcLocation == globalDCLocation { - return true - } - if _, exist := c.GetTSOAllocators().Load(dcLocation); !exist { - log.Info("[tso] delete unused tso dispatcher", zap.String("dc-location", dcLocation)) - dispatcher.(*tsoDispatcher).dispatcherCancel() - c.tsoDispatcher.Delete(dcLocation) - } - return true - }) -} - +// deadline is used to control the TS request timeout manually, +// it will be sent to the `tsDeadlineCh` to be handled by the `watchTSDeadline` goroutine. type deadline struct { timer *time.Timer done chan struct{} @@ -163,261 +54,170 @@ func newTSDeadline( } } -func (c *tsoClient) tsCancelLoop() { - defer c.wg.Done() - - tsCancelLoopCtx, tsCancelLoopCancel := context.WithCancel(c.ctx) - defer tsCancelLoopCancel() - - ticker := time.NewTicker(tsLoopDCCheckInterval) - defer ticker.Stop() - for { - // Watch every dc-location's tsDeadlineCh - c.GetTSOAllocators().Range(func(dcLocation, _ interface{}) bool { - c.watchTSDeadline(tsCancelLoopCtx, dcLocation.(string)) - return true - }) - select { - case <-c.checkTSDeadlineCh: - continue - case <-ticker.C: - continue - case <-tsCancelLoopCtx.Done(): - log.Info("exit tso requests cancel loop") - return - } - } +type tsoInfo struct { + tsoServer string + reqKeyspaceGroupID uint32 + respKeyspaceGroupID uint32 + respReceivedAt time.Time + physical int64 + logical int64 } -func (c *tsoClient) watchTSDeadline(ctx context.Context, dcLocation string) { - if _, exist := c.tsDeadline.Load(dcLocation); !exist { - tsDeadlineCh := make(chan *deadline, 1) - c.tsDeadline.Store(dcLocation, tsDeadlineCh) - go func(dc string, tsDeadlineCh <-chan *deadline) { - for { - select { - case d := <-tsDeadlineCh: - select { - case <-d.timer.C: - log.Error("[tso] tso request is canceled due to timeout", zap.String("dc-location", dc), errs.ZapError(errs.ErrClientGetTSOTimeout)) - d.cancel() - timerpool.GlobalTimerPool.Put(d.timer) - case <-d.done: - timerpool.GlobalTimerPool.Put(d.timer) - case <-ctx.Done(): - timerpool.GlobalTimerPool.Put(d.timer) - return - } - case <-ctx.Done(): - return - } - } - }(dcLocation, tsDeadlineCh) - } +type tsoServiceProvider interface { + getOption() *option + getServiceDiscovery() ServiceDiscovery + updateConnectionCtxs(ctx context.Context, dc string, connectionCtxs *sync.Map) bool } -func (c *tsoClient) scheduleCheckTSDeadline() { - select { - case c.checkTSDeadlineCh <- struct{}{}: - default: - } -} +type tsoDispatcher struct { + ctx context.Context + cancel context.CancelFunc + dc string -func (c *tsoClient) tsoDispatcherCheckLoop() { - defer c.wg.Done() + provider tsoServiceProvider + // URL -> *connectionContext + connectionCtxs *sync.Map + batchController *tsoBatchController + tsDeadlineCh chan *deadline + lastTSOInfo *tsoInfo - loopCtx, loopCancel := context.WithCancel(c.ctx) - defer loopCancel() + updateConnectionCtxsCh chan struct{} +} - ticker := time.NewTicker(tsLoopDCCheckInterval) - defer ticker.Stop() - for { - c.updateTSODispatcher() - select { - case <-ticker.C: - case <-c.checkTSODispatcherCh: - case <-loopCtx.Done(): - log.Info("exit tso dispatcher loop") - return - } - } +func newTSODispatcher( + ctx context.Context, + dc string, + maxBatchSize int, + provider tsoServiceProvider, +) *tsoDispatcher { + dispatcherCtx, dispatcherCancel := context.WithCancel(ctx) + tsoBatchController := newTSOBatchController( + make(chan *tsoRequest, maxBatchSize*2), + maxBatchSize, + ) + failpoint.Inject("shortDispatcherChannel", func() { + tsoBatchController = newTSOBatchController( + make(chan *tsoRequest, 1), + maxBatchSize, + ) + }) + td := &tsoDispatcher{ + ctx: dispatcherCtx, + cancel: dispatcherCancel, + dc: dc, + provider: provider, + connectionCtxs: &sync.Map{}, + batchController: tsoBatchController, + tsDeadlineCh: make(chan *deadline, 1), + updateConnectionCtxsCh: make(chan struct{}, 1), + } + go td.watchTSDeadline() + return td } -func (c *tsoClient) checkAllocator( - dispatcherCtx context.Context, - forwardCancel context.CancelFunc, - dc, forwardedHostTrim, addrTrim, url string, - updateAndClear func(newAddr string, connectionCtx *tsoConnectionContext)) { - defer func() { - // cancel the forward stream - forwardCancel() - requestForwarded.WithLabelValues(forwardedHostTrim, addrTrim).Set(0) - }() - cc, u := c.GetTSOAllocatorClientConnByDCLocation(dc) - var healthCli healthpb.HealthClient - ticker := time.NewTicker(time.Second) - defer ticker.Stop() +func (td *tsoDispatcher) watchTSDeadline() { + log.Info("[tso] start tso deadline watcher", zap.String("dc-location", td.dc)) + defer log.Info("[tso] exit tso deadline watcher", zap.String("dc-location", td.dc)) for { - // the pd/allocator leader change, we need to re-establish the stream - if u != url { - log.Info("[tso] the leader of the allocator leader is changed", zap.String("dc", dc), zap.String("origin", url), zap.String("new", u)) - return - } - if healthCli == nil && cc != nil { - healthCli = healthpb.NewHealthClient(cc) - } - if healthCli != nil { - healthCtx, healthCancel := context.WithTimeout(dispatcherCtx, c.option.timeout) - resp, err := healthCli.Check(healthCtx, &healthpb.HealthCheckRequest{Service: ""}) - failpoint.Inject("unreachableNetwork", func() { - resp.Status = healthpb.HealthCheckResponse_UNKNOWN - }) - healthCancel() - if err == nil && resp.GetStatus() == healthpb.HealthCheckResponse_SERVING { - // create a stream of the original allocator - cctx, cancel := context.WithCancel(dispatcherCtx) - stream, err := c.tsoStreamBuilderFactory.makeBuilder(cc).build(cctx, cancel, c.option.timeout) - if err == nil && stream != nil { - log.Info("[tso] recover the original tso stream since the network has become normal", zap.String("dc", dc), zap.String("url", url)) - updateAndClear(url, &tsoConnectionContext{url, stream, cctx, cancel}) - return - } - } - } select { - case <-dispatcherCtx.Done(): + case d := <-td.tsDeadlineCh: + select { + case <-d.timer.C: + log.Error("[tso] tso request is canceled due to timeout", + zap.String("dc-location", td.dc), errs.ZapError(errs.ErrClientGetTSOTimeout)) + d.cancel() + timerpool.GlobalTimerPool.Put(d.timer) + case <-d.done: + timerpool.GlobalTimerPool.Put(d.timer) + case <-td.ctx.Done(): + timerpool.GlobalTimerPool.Put(d.timer) + return + } + case <-td.ctx.Done(): return - case <-ticker.C: - // To ensure we can get the latest allocator leader - // and once the leader is changed, we can exit this function. - cc, u = c.GetTSOAllocatorClientConnByDCLocation(dc) } } } -func (c *tsoClient) checkTSODispatcher(dcLocation string) bool { - dispatcher, ok := c.tsoDispatcher.Load(dcLocation) - if !ok || dispatcher == nil { - return false +func (td *tsoDispatcher) scheduleUpdateConnectionCtxs() { + select { + case td.updateConnectionCtxsCh <- struct{}{}: + default: } - return true } -func (c *tsoClient) createTSODispatcher(dcLocation string) { - dispatcherCtx, dispatcherCancel := context.WithCancel(c.ctx) - dispatcher := &tsoDispatcher{ - dispatcherCancel: dispatcherCancel, - tsoBatchController: newTSOBatchController( - make(chan *tsoRequest, defaultMaxTSOBatchSize*2), - defaultMaxTSOBatchSize), - } +func (td *tsoDispatcher) close() { + td.cancel() + td.batchController.clear() +} - if _, ok := c.tsoDispatcher.LoadOrStore(dcLocation, dispatcher); !ok { - // Successfully stored the value. Start the following goroutine. - // Each goroutine is responsible for handling the tso stream request for its dc-location. - // The only case that will make the dispatcher goroutine exit - // is that the loopCtx is done, otherwise there is no circumstance - // this goroutine should exit. - c.wg.Add(1) - go c.handleDispatcher(dispatcherCtx, dcLocation, dispatcher.tsoBatchController) - log.Info("[tso] tso dispatcher created", zap.String("dc-location", dcLocation)) - } else { - dispatcherCancel() - } +func (td *tsoDispatcher) push(request *tsoRequest) { + td.batchController.tsoRequestCh <- request } -func (c *tsoClient) handleDispatcher( - dispatcherCtx context.Context, - dc string, - tbc *tsoBatchController) { +func (td *tsoDispatcher) handleDispatcher(wg *sync.WaitGroup) { var ( - err error - streamAddr string - stream tsoStream - streamCtx context.Context - cancel context.CancelFunc - // addr -> connectionContext - connectionCtxs sync.Map - opts []opentracing.StartSpanOption + ctx = td.ctx + dc = td.dc + provider = td.provider + svcDiscovery = provider.getServiceDiscovery() + option = provider.getOption() + connectionCtxs = td.connectionCtxs + batchController = td.batchController ) + log.Info("[tso] tso dispatcher created", zap.String("dc-location", dc)) + // Clean up the connectionCtxs when the dispatcher exits. defer func() { log.Info("[tso] exit tso dispatcher", zap.String("dc-location", dc)) // Cancel all connections. - connectionCtxs.Range(func(_, cc interface{}) bool { + connectionCtxs.Range(func(_, cc any) bool { cc.(*tsoConnectionContext).cancel() return true }) - c.wg.Done() + // Clear the tso batch controller. + batchController.clear() + wg.Done() }() - // Call updateTSOConnectionCtxs once to init the connectionCtxs first. - c.updateTSOConnectionCtxs(dispatcherCtx, dc, &connectionCtxs) - // Only the Global TSO needs to watch the updateTSOConnectionCtxsCh to sense the - // change of the cluster when TSO Follower Proxy is enabled. - // TODO: support TSO Follower Proxy for the Local TSO. - if dc == globalDCLocation { - go func() { - var updateTicker = &time.Ticker{} - setNewUpdateTicker := func(ticker *time.Ticker) { - if updateTicker.C != nil { - updateTicker.Stop() - } - updateTicker = ticker - } - // Set to nil before returning to ensure that the existing ticker can be GC. - defer setNewUpdateTicker(nil) - - for { - select { - case <-dispatcherCtx.Done(): - return - case <-c.option.enableTSOFollowerProxyCh: - enableTSOFollowerProxy := c.option.getEnableTSOFollowerProxy() - if enableTSOFollowerProxy && updateTicker.C == nil { - // Because the TSO Follower Proxy is enabled, - // the periodic check needs to be performed. - setNewUpdateTicker(time.NewTicker(memberUpdateInterval)) - } else if !enableTSOFollowerProxy && updateTicker.C != nil { - // Because the TSO Follower Proxy is disabled, - // the periodic check needs to be turned off. - setNewUpdateTicker(&time.Ticker{}) - } else { - // The status of TSO Follower Proxy does not change, and updateTSOConnectionCtxs is not triggered - continue - } - case <-updateTicker.C: - case <-c.updateTSOConnectionCtxsCh: - } - c.updateTSOConnectionCtxs(dispatcherCtx, dc, &connectionCtxs) - } - }() - } + // Daemon goroutine to update the connectionCtxs periodically and handle the `connectionCtxs` update event. + go td.connectionCtxsUpdater() + var ( + err error + streamCtx context.Context + cancel context.CancelFunc + streamURL string + stream tsoStream + ) // Loop through each batch of TSO requests and send them for processing. - streamLoopTimer := time.NewTimer(c.option.timeout) + streamLoopTimer := time.NewTimer(option.timeout) defer streamLoopTimer.Stop() bo := retry.InitialBackoffer(updateMemberBackOffBaseTime, updateMemberTimeout, updateMemberBackOffBaseTime) tsoBatchLoop: for { select { - case <-dispatcherCtx.Done(): + case <-ctx.Done(): return default: } // Start to collect the TSO requests. - maxBatchWaitInterval := c.option.getMaxTSOBatchWaitInterval() - if err = tbc.fetchPendingRequests(dispatcherCtx, maxBatchWaitInterval); err != nil { + maxBatchWaitInterval := option.getMaxTSOBatchWaitInterval() + // Once the TSO requests are collected, must make sure they could be finished or revoked eventually, + // otherwise the upper caller may get blocked on waiting for the results. + if err = batchController.fetchPendingRequests(ctx, maxBatchWaitInterval); err != nil { + // Finish the collected requests if the fetch failed. + batchController.finishCollectedRequests(0, 0, 0, errors.WithStack(err)) if err == context.Canceled { log.Info("[tso] stop fetching the pending tso requests due to context canceled", zap.String("dc-location", dc)) } else { log.Error("[tso] fetch pending tso requests error", zap.String("dc-location", dc), - errs.ZapError(errs.ErrClientGetTSO, err)) + zap.Error(errs.ErrClientGetTSO.FastGenByArgs(err.Error()))) } return } if maxBatchWaitInterval >= 0 { - tbc.adjustBestBatchSize() + batchController.adjustBestBatchSize() } // Stop the timer if it's not stopped. if !streamLoopTimer.Stop() { @@ -428,30 +228,33 @@ tsoBatchLoop: } // We need be careful here, see more details in the comments of Timer.Reset. // https://pkg.go.dev/time@master#Timer.Reset - streamLoopTimer.Reset(c.option.timeout) + streamLoopTimer.Reset(option.timeout) // Choose a stream to send the TSO gRPC request. streamChoosingLoop: for { - connectionCtx := c.chooseStream(&connectionCtxs) + connectionCtx := chooseStream(connectionCtxs) if connectionCtx != nil { - streamAddr, stream, streamCtx, cancel = connectionCtx.streamAddr, connectionCtx.stream, connectionCtx.ctx, connectionCtx.cancel + streamCtx, cancel, streamURL, stream = connectionCtx.ctx, connectionCtx.cancel, connectionCtx.streamURL, connectionCtx.stream } // Check stream and retry if necessary. if stream == nil { log.Info("[tso] tso stream is not ready", zap.String("dc", dc)) - if c.updateTSOConnectionCtxs(dispatcherCtx, dc, &connectionCtxs) { + if provider.updateConnectionCtxs(ctx, dc, connectionCtxs) { continue streamChoosingLoop } timer := time.NewTimer(retryInterval) select { - case <-dispatcherCtx.Done(): + case <-ctx.Done(): + // Finish the collected requests if the context is canceled. + batchController.finishCollectedRequests(0, 0, 0, errors.WithStack(ctx.Err())) timer.Stop() return case <-streamLoopTimer.C: err = errs.ErrClientCreateTSOStream.FastGenByArgs(errs.RetryTimeoutErr) log.Error("[tso] create tso stream error", zap.String("dc-location", dc), errs.ZapError(err)) - c.svcDiscovery.ScheduleCheckMemberChanged() - c.finishRequest(tbc.getCollectedRequests(), 0, 0, 0, errors.WithStack(err)) + svcDiscovery.ScheduleCheckMemberChanged() + // Finish the collected requests if the stream is failed to be created. + batchController.finishCollectedRequests(0, 0, 0, errors.WithStack(err)) timer.Stop() continue tsoBatchLoop case <-timer.C: @@ -461,9 +264,9 @@ tsoBatchLoop: } select { case <-streamCtx.Done(): - log.Info("[tso] tso stream is canceled", zap.String("dc", dc), zap.String("stream-addr", streamAddr)) + log.Info("[tso] tso stream is canceled", zap.String("dc", dc), zap.String("stream-url", streamURL)) // Set `stream` to nil and remove this stream from the `connectionCtxs` due to being canceled. - connectionCtxs.Delete(streamAddr) + connectionCtxs.Delete(streamURL) cancel() stream = nil continue @@ -472,67 +275,113 @@ tsoBatchLoop: } } done := make(chan struct{}) - dl := newTSDeadline(c.option.timeout, done, cancel) - tsDeadlineCh, ok := c.tsDeadline.Load(dc) - for !ok || tsDeadlineCh == nil { - c.scheduleCheckTSDeadline() - time.Sleep(time.Millisecond * 100) - tsDeadlineCh, ok = c.tsDeadline.Load(dc) - } + dl := newTSDeadline(option.timeout, done, cancel) select { - case <-dispatcherCtx.Done(): + case <-ctx.Done(): + // Finish the collected requests if the context is canceled. + batchController.finishCollectedRequests(0, 0, 0, errors.WithStack(ctx.Err())) return - case tsDeadlineCh.(chan *deadline) <- dl: + case td.tsDeadlineCh <- dl: } - opts = extractSpanReference(tbc, opts[:0]) - err = c.processRequests(stream, dc, tbc, opts) + // processRequests guarantees that the collected requests could be finished properly. + err = td.processRequests(stream, dc, td.batchController) close(done) // If error happens during tso stream handling, reset stream and run the next trial. if err != nil { select { - case <-dispatcherCtx.Done(): + case <-ctx.Done(): return default: } - c.svcDiscovery.ScheduleCheckMemberChanged() + svcDiscovery.ScheduleCheckMemberChanged() log.Error("[tso] getTS error after processing requests", zap.String("dc-location", dc), - zap.String("stream-addr", streamAddr), - errs.ZapError(errs.ErrClientGetTSO, err)) + zap.String("stream-url", streamURL), + zap.Error(errs.ErrClientGetTSO.FastGenByArgs(err.Error()))) // Set `stream` to nil and remove this stream from the `connectionCtxs` due to error. - connectionCtxs.Delete(streamAddr) + connectionCtxs.Delete(streamURL) cancel() stream = nil // Because ScheduleCheckMemberChanged is asynchronous, if the leader changes, we better call `updateMember` ASAP. - if IsLeaderChange(err) { - if err := bo.Exec(dispatcherCtx, c.svcDiscovery.CheckMemberChanged); err != nil { + if errs.IsLeaderChange(err) { + if err := bo.Exec(ctx, svcDiscovery.CheckMemberChanged); err != nil { select { - case <-dispatcherCtx.Done(): + case <-ctx.Done(): return default: } } // Because the TSO Follower Proxy could be configured online, - // If we change it from on -> off, background updateTSOConnectionCtxs + // If we change it from on -> off, background updateConnectionCtxs // will cancel the current stream, then the EOF error caused by cancel() - // should not trigger the updateTSOConnectionCtxs here. + // should not trigger the updateConnectionCtxs here. // So we should only call it when the leader changes. - c.updateTSOConnectionCtxs(dispatcherCtx, dc, &connectionCtxs) + provider.updateConnectionCtxs(ctx, dc, connectionCtxs) } } } } -// TSO Follower Proxy only supports the Global TSO proxy now. -func (c *tsoClient) allowTSOFollowerProxy(dc string) bool { - return dc == globalDCLocation && c.option.getEnableTSOFollowerProxy() +// updateConnectionCtxs updates the `connectionCtxs` for the specified DC location regularly. +func (td *tsoDispatcher) connectionCtxsUpdater() { + var ( + ctx = td.ctx + dc = td.dc + connectionCtxs = td.connectionCtxs + provider = td.provider + option = td.provider.getOption() + updateTicker = &time.Ticker{} + ) + + log.Info("[tso] start tso connection contexts updater", zap.String("dc-location", dc)) + setNewUpdateTicker := func(ticker *time.Ticker) { + if updateTicker.C != nil { + updateTicker.Stop() + } + updateTicker = ticker + } + // Set to nil before returning to ensure that the existing ticker can be GC. + defer setNewUpdateTicker(nil) + + for { + provider.updateConnectionCtxs(ctx, dc, connectionCtxs) + select { + case <-ctx.Done(): + log.Info("[tso] exit tso connection contexts updater", zap.String("dc-location", dc)) + return + case <-option.enableTSOFollowerProxyCh: + // TODO: implement support of TSO Follower Proxy for the Local TSO. + if dc != globalDCLocation { + continue + } + enableTSOFollowerProxy := option.getEnableTSOFollowerProxy() + log.Info("[tso] tso follower proxy status changed", + zap.String("dc-location", dc), + zap.Bool("enable", enableTSOFollowerProxy)) + if enableTSOFollowerProxy && updateTicker.C == nil { + // Because the TSO Follower Proxy is enabled, + // the periodic check needs to be performed. + setNewUpdateTicker(time.NewTicker(memberUpdateInterval)) + } else if !enableTSOFollowerProxy && updateTicker.C != nil { + // Because the TSO Follower Proxy is disabled, + // the periodic check needs to be turned off. + setNewUpdateTicker(&time.Ticker{}) + } else { + continue + } + case <-updateTicker.C: + // Triggered periodically when the TSO Follower Proxy is enabled. + case <-td.updateConnectionCtxsCh: + // Triggered by the leader/follower change. + } + } } // chooseStream uses the reservoir sampling algorithm to randomly choose a connection. // connectionCtxs will only have only one stream to choose when the TSO Follower Proxy is off. -func (c *tsoClient) chooseStream(connectionCtxs *sync.Map) (connectionCtx *tsoConnectionContext) { +func chooseStream(connectionCtxs *sync.Map) (connectionCtx *tsoConnectionContext) { idx := 0 - connectionCtxs.Range(func(_, cc interface{}) bool { + connectionCtxs.Range(func(_, cc any) bool { j := rand.Intn(idx + 1) if j < 1 { connectionCtx = cc.(*tsoConnectionContext) @@ -543,296 +392,93 @@ func (c *tsoClient) chooseStream(connectionCtxs *sync.Map) (connectionCtx *tsoCo return connectionCtx } -type tsoConnectionContext struct { - streamAddr string - // Current stream to send gRPC requests, pdpb.PD_TsoClient for a leader/follower in the PD cluster, - // or tsopb.TSO_TsoClient for a primary/secondary in the TSO cluster - stream tsoStream - ctx context.Context - cancel context.CancelFunc -} - -func (c *tsoClient) updateTSOConnectionCtxs(updaterCtx context.Context, dc string, connectionCtxs *sync.Map) bool { - // Normal connection creating, it will be affected by the `enableForwarding`. - createTSOConnection := c.tryConnectToTSO - if c.allowTSOFollowerProxy(dc) { - createTSOConnection = c.tryConnectToTSOWithProxy - } - if err := createTSOConnection(updaterCtx, dc, connectionCtxs); err != nil { - log.Error("[tso] update connection contexts failed", zap.String("dc", dc), errs.ZapError(err)) - return false - } - return true -} - -// tryConnectToTSO will try to connect to the TSO allocator leader. If the connection becomes unreachable -// and enableForwarding is true, it will create a new connection to a follower to do the forwarding, -// while a new daemon will be created also to switch back to a normal leader connection ASAP the -// connection comes back to normal. -func (c *tsoClient) tryConnectToTSO( - dispatcherCtx context.Context, - dc string, - connectionCtxs *sync.Map, +func (td *tsoDispatcher) processRequests( + stream tsoStream, dcLocation string, tbc *tsoBatchController, ) error { var ( - networkErrNum uint64 - err error - stream tsoStream - url string - cc *grpc.ClientConn + requests = tbc.getCollectedRequests() + traceRegions = make([]*trace.Region, 0, len(requests)) + spans = make([]opentracing.Span, 0, len(requests)) ) - updateAndClear := func(newAddr string, connectionCtx *tsoConnectionContext) { - if cc, loaded := connectionCtxs.LoadOrStore(newAddr, connectionCtx); loaded { - // If the previous connection still exists, we should close it first. - cc.(*tsoConnectionContext).cancel() - connectionCtxs.Store(newAddr, connectionCtx) + for _, req := range requests { + traceRegions = append(traceRegions, trace.StartRegion(req.requestCtx, "pdclient.tsoReqSend")) + if span := opentracing.SpanFromContext(req.requestCtx); span != nil && span.Tracer() != nil { + spans = append(spans, span.Tracer().StartSpan("pdclient.processRequests", opentracing.ChildOf(span.Context()))) } - connectionCtxs.Range(func(addr, cc interface{}) bool { - if addr.(string) != newAddr { - cc.(*tsoConnectionContext).cancel() - connectionCtxs.Delete(addr) - } - return true - }) } - // retry several times before falling back to the follower when the network problem happens - - ticker := time.NewTicker(retryInterval) - defer ticker.Stop() - for i := 0; i < maxRetryTimes; i++ { - c.svcDiscovery.ScheduleCheckMemberChanged() - cc, url = c.GetTSOAllocatorClientConnByDCLocation(dc) - if cc != nil { - cctx, cancel := context.WithCancel(dispatcherCtx) - stream, err = c.tsoStreamBuilderFactory.makeBuilder(cc).build(cctx, cancel, c.option.timeout) - failpoint.Inject("unreachableNetwork", func() { - stream = nil - err = status.New(codes.Unavailable, "unavailable").Err() - }) - if stream != nil && err == nil { - updateAndClear(url, &tsoConnectionContext{url, stream, cctx, cancel}) - return nil - } - - if err != nil && c.option.enableForwarding { - // The reason we need to judge if the error code is equal to "Canceled" here is that - // when we create a stream we use a goroutine to manually control the timeout of the connection. - // There is no need to wait for the transport layer timeout which can reduce the time of unavailability. - // But it conflicts with the retry mechanism since we use the error code to decide if it is caused by network error. - // And actually the `Canceled` error can be regarded as a kind of network error in some way. - if rpcErr, ok := status.FromError(err); ok && (isNetworkError(rpcErr.Code()) || rpcErr.Code() == codes.Canceled) { - networkErrNum++ - } - } - cancel() - } else { - networkErrNum++ - } - select { - case <-dispatcherCtx.Done(): - return err - case <-ticker.C: + defer func() { + for i := range spans { + spans[i].Finish() } - } - - if networkErrNum == maxRetryTimes { - // encounter the network error - backupClientConn, addr := c.backupClientConn() - if backupClientConn != nil { - log.Info("[tso] fall back to use follower to forward tso stream", zap.String("dc", dc), zap.String("addr", addr)) - forwardedHost, ok := c.GetTSOAllocatorServingAddrByDCLocation(dc) - if !ok { - return errors.Errorf("cannot find the allocator leader in %s", dc) - } - - // create the follower stream - cctx, cancel := context.WithCancel(dispatcherCtx) - cctx = grpcutil.BuildForwardContext(cctx, forwardedHost) - stream, err = c.tsoStreamBuilderFactory.makeBuilder(backupClientConn).build(cctx, cancel, c.option.timeout) - if err == nil { - forwardedHostTrim := trimHTTPPrefix(forwardedHost) - addrTrim := trimHTTPPrefix(addr) - // the goroutine is used to check the network and change back to the original stream - go c.checkAllocator(dispatcherCtx, cancel, dc, forwardedHostTrim, addrTrim, url, updateAndClear) - requestForwarded.WithLabelValues(forwardedHostTrim, addrTrim).Set(1) - updateAndClear(addr, &tsoConnectionContext{addr, stream, cctx, cancel}) - return nil - } - cancel() + for i := range traceRegions { + traceRegions[i].End() } - } - return err -} + }() -// getAllTSOStreamBuilders returns a TSO stream builder for every service endpoint of TSO leader/followers -// or of keyspace group primary/secondaries. -func (c *tsoClient) getAllTSOStreamBuilders() map[string]tsoStreamBuilder { var ( - addrs = c.svcDiscovery.GetServiceURLs() - streamBuilders = make(map[string]tsoStreamBuilder, len(addrs)) - cc *grpc.ClientConn - err error + count = int64(len(requests)) + svcDiscovery = td.provider.getServiceDiscovery() + clusterID = svcDiscovery.GetClusterID() + keyspaceID = svcDiscovery.GetKeyspaceID() + reqKeyspaceGroupID = svcDiscovery.GetKeyspaceGroupID() ) - for _, addr := range addrs { - if len(addrs) == 0 { - continue - } - if cc, err = c.svcDiscovery.GetOrCreateGRPCConn(addr); err != nil { - continue - } - healthCtx, healthCancel := context.WithTimeout(c.ctx, c.option.timeout) - resp, err := healthpb.NewHealthClient(cc).Check(healthCtx, &healthpb.HealthCheckRequest{Service: ""}) - healthCancel() - if err == nil && resp.GetStatus() == healthpb.HealthCheckResponse_SERVING { - streamBuilders[addr] = c.tsoStreamBuilderFactory.makeBuilder(cc) - } - } - return streamBuilders -} - -// tryConnectToTSOWithProxy will create multiple streams to all the service endpoints to work as -// a TSO proxy to reduce the pressure of the main serving service endpoint. -func (c *tsoClient) tryConnectToTSOWithProxy(dispatcherCtx context.Context, dc string, connectionCtxs *sync.Map) error { - tsoStreamBuilders := c.getAllTSOStreamBuilders() - leaderAddr := c.svcDiscovery.GetServingAddr() - forwardedHost, ok := c.GetTSOAllocatorServingAddrByDCLocation(dc) - if !ok { - return errors.Errorf("cannot find the allocator leader in %s", dc) - } - // GC the stale one. - connectionCtxs.Range(func(addr, cc interface{}) bool { - if _, ok := tsoStreamBuilders[addr.(string)]; !ok { - cc.(*tsoConnectionContext).cancel() - connectionCtxs.Delete(addr) - } - return true - }) - // Update the missing one. - for addr, tsoStreamBuilder := range tsoStreamBuilders { - if _, ok = connectionCtxs.Load(addr); ok { - continue - } - cctx, cancel := context.WithCancel(dispatcherCtx) - // Do not proxy the leader client. - if addr != leaderAddr { - log.Info("[tso] use follower to forward tso stream to do the proxy", - zap.String("dc", dc), zap.String("addr", addr)) - cctx = grpcutil.BuildForwardContext(cctx, forwardedHost) - } - // Create the TSO stream. - stream, err := tsoStreamBuilder.build(cctx, cancel, c.option.timeout) - if err == nil { - if addr != leaderAddr { - forwardedHostTrim := trimHTTPPrefix(forwardedHost) - addrTrim := trimHTTPPrefix(addr) - requestForwarded.WithLabelValues(forwardedHostTrim, addrTrim).Set(1) - } - connectionCtxs.Store(addr, &tsoConnectionContext{addr, stream, cctx, cancel}) - continue - } - log.Error("[tso] create the tso stream failed", - zap.String("dc", dc), zap.String("addr", addr), errs.ZapError(err)) - cancel() - } - return nil -} - -func extractSpanReference(tbc *tsoBatchController, opts []opentracing.StartSpanOption) []opentracing.StartSpanOption { - for _, req := range tbc.getCollectedRequests() { - if span := opentracing.SpanFromContext(req.requestCtx); span != nil { - opts = append(opts, opentracing.ChildOf(span.Context())) - } - } - return opts -} - -func (c *tsoClient) processRequests( - stream tsoStream, dcLocation string, tbc *tsoBatchController, opts []opentracing.StartSpanOption, -) error { - if len(opts) > 0 { - span := opentracing.StartSpan("pdclient.processRequests", opts...) - defer span.Finish() - } - - requests := tbc.getCollectedRequests() - for _, req := range requests { - defer trace.StartRegion(req.requestCtx, "tsoReqSend").End() - } - count := int64(len(requests)) - reqKeyspaceGroupID := c.svcDiscovery.GetKeyspaceGroupID() respKeyspaceGroupID, physical, logical, suffixBits, err := stream.processRequests( - c.svcDiscovery.GetClusterID(), c.svcDiscovery.GetKeyspaceID(), reqKeyspaceGroupID, - dcLocation, requests, tbc.batchStartTime) + clusterID, keyspaceID, reqKeyspaceGroupID, + dcLocation, count, tbc.batchStartTime) if err != nil { - c.finishRequest(requests, 0, 0, 0, err) + tbc.finishCollectedRequests(0, 0, 0, err) return err } - // `logical` is the largest ts's logical part here, we need to do the subtracting before we finish each TSO request. - firstLogical := tsoutil.AddLogical(logical, -count+1, suffixBits) curTSOInfo := &tsoInfo{ - tsoServer: stream.getServerAddr(), + tsoServer: stream.getServerURL(), reqKeyspaceGroupID: reqKeyspaceGroupID, respKeyspaceGroupID: respKeyspaceGroupID, respReceivedAt: time.Now(), physical: physical, - logical: tsoutil.AddLogical(firstLogical, count-1, suffixBits), + logical: logical, } - c.compareAndSwapTS(dcLocation, curTSOInfo, physical, firstLogical) - c.finishRequest(requests, physical, firstLogical, suffixBits, nil) + // `logical` is the largest ts's logical part here, we need to do the subtracting before we finish each TSO request. + firstLogical := tsoutil.AddLogical(logical, -count+1, suffixBits) + td.compareAndSwapTS(curTSOInfo, firstLogical) + tbc.finishCollectedRequests(physical, firstLogical, suffixBits, nil) return nil } -func (c *tsoClient) compareAndSwapTS( - dcLocation string, - curTSOInfo *tsoInfo, - physical, firstLogical int64, +func (td *tsoDispatcher) compareAndSwapTS( + curTSOInfo *tsoInfo, firstLogical int64, ) { - val, loaded := c.lastTSOInfoMap.LoadOrStore(dcLocation, curTSOInfo) - if !loaded { - return - } - lastTSOInfo := val.(*tsoInfo) - if lastTSOInfo.respKeyspaceGroupID != curTSOInfo.respKeyspaceGroupID { - log.Info("[tso] keyspace group changed", - zap.String("dc-location", dcLocation), - zap.Uint32("old-group-id", lastTSOInfo.respKeyspaceGroupID), - zap.Uint32("new-group-id", curTSOInfo.respKeyspaceGroupID)) - } - - // The TSO we get is a range like [largestLogical-count+1, largestLogical], so we save the last TSO's largest logical - // to compare with the new TSO's first logical. For example, if we have a TSO resp with logical 10, count 5, then - // all TSOs we get will be [6, 7, 8, 9, 10]. lastTSOInfo.logical stores the logical part of the largest ts returned - // last time. - if tsoutil.TSLessEqual(physical, firstLogical, lastTSOInfo.physical, lastTSOInfo.logical) { - log.Panic("[tso] timestamp fallback", - zap.String("dc-location", dcLocation), - zap.Uint32("keyspace", c.svcDiscovery.GetKeyspaceID()), - zap.String("last-ts", fmt.Sprintf("(%d, %d)", lastTSOInfo.physical, lastTSOInfo.logical)), - zap.String("cur-ts", fmt.Sprintf("(%d, %d)", physical, firstLogical)), - zap.String("last-tso-server", lastTSOInfo.tsoServer), - zap.String("cur-tso-server", curTSOInfo.tsoServer), - zap.Uint32("last-keyspace-group-in-request", lastTSOInfo.reqKeyspaceGroupID), - zap.Uint32("cur-keyspace-group-in-request", curTSOInfo.reqKeyspaceGroupID), - zap.Uint32("last-keyspace-group-in-response", lastTSOInfo.respKeyspaceGroupID), - zap.Uint32("cur-keyspace-group-in-response", curTSOInfo.respKeyspaceGroupID), - zap.Time("last-response-received-at", lastTSOInfo.respReceivedAt), - zap.Time("cur-response-received-at", curTSOInfo.respReceivedAt)) - } - lastTSOInfo.tsoServer = curTSOInfo.tsoServer - lastTSOInfo.reqKeyspaceGroupID = curTSOInfo.reqKeyspaceGroupID - lastTSOInfo.respKeyspaceGroupID = curTSOInfo.respKeyspaceGroupID - lastTSOInfo.respReceivedAt = curTSOInfo.respReceivedAt - lastTSOInfo.physical = curTSOInfo.physical - lastTSOInfo.logical = curTSOInfo.logical -} - -func (c *tsoClient) finishRequest(requests []*tsoRequest, physical, firstLogical int64, suffixBits uint32, err error) { - for i := 0; i < len(requests); i++ { - if span := opentracing.SpanFromContext(requests[i].requestCtx); span != nil { - span.Finish() - } - requests[i].physical, requests[i].logical = physical, tsoutil.AddLogical(firstLogical, int64(i), suffixBits) - defer trace.StartRegion(requests[i].requestCtx, "tsoReqDequeue").End() - requests[i].done <- err - } + if td.lastTSOInfo != nil { + var ( + lastTSOInfo = td.lastTSOInfo + dc = td.dc + physical = curTSOInfo.physical + keyspaceID = td.provider.getServiceDiscovery().GetKeyspaceID() + ) + if td.lastTSOInfo.respKeyspaceGroupID != curTSOInfo.respKeyspaceGroupID { + log.Info("[tso] keyspace group changed", + zap.String("dc-location", dc), + zap.Uint32("old-group-id", lastTSOInfo.respKeyspaceGroupID), + zap.Uint32("new-group-id", curTSOInfo.respKeyspaceGroupID)) + } + // The TSO we get is a range like [largestLogical-count+1, largestLogical], so we save the last TSO's largest logical + // to compare with the new TSO's first logical. For example, if we have a TSO resp with logical 10, count 5, then + // all TSOs we get will be [6, 7, 8, 9, 10]. lastTSOInfo.logical stores the logical part of the largest ts returned + // last time. + if tsoutil.TSLessEqual(physical, firstLogical, lastTSOInfo.physical, lastTSOInfo.logical) { + log.Panic("[tso] timestamp fallback", + zap.String("dc-location", dc), + zap.Uint32("keyspace", keyspaceID), + zap.String("last-ts", fmt.Sprintf("(%d, %d)", lastTSOInfo.physical, lastTSOInfo.logical)), + zap.String("cur-ts", fmt.Sprintf("(%d, %d)", physical, firstLogical)), + zap.String("last-tso-server", lastTSOInfo.tsoServer), + zap.String("cur-tso-server", curTSOInfo.tsoServer), + zap.Uint32("last-keyspace-group-in-request", lastTSOInfo.reqKeyspaceGroupID), + zap.Uint32("cur-keyspace-group-in-request", curTSOInfo.reqKeyspaceGroupID), + zap.Uint32("last-keyspace-group-in-response", lastTSOInfo.respKeyspaceGroupID), + zap.Uint32("cur-keyspace-group-in-response", curTSOInfo.respKeyspaceGroupID), + zap.Time("last-response-received-at", lastTSOInfo.respReceivedAt), + zap.Time("cur-response-received-at", curTSOInfo.respReceivedAt)) + } + } + td.lastTSOInfo = curTSOInfo } diff --git a/client/tso_request.go b/client/tso_request.go new file mode 100644 index 00000000000..b912fa35497 --- /dev/null +++ b/client/tso_request.go @@ -0,0 +1,96 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pd + +import ( + "context" + "runtime/trace" + "sync" + "time" + + "github.com/pingcap/errors" +) + +// TSFuture is a future which promises to return a TSO. +type TSFuture interface { + // Wait gets the physical and logical time, it would block caller if data is not available yet. + Wait() (int64, int64, error) +} + +var ( + _ TSFuture = (*tsoRequest)(nil) + _ TSFuture = (*tsoRequestFastFail)(nil) +) + +type tsoRequest struct { + requestCtx context.Context + clientCtx context.Context + done chan error + physical int64 + logical int64 + dcLocation string + + // Runtime fields. + start time.Time + pool *sync.Pool +} + +// tryDone tries to send the result to the channel, it will not block. +func (req *tsoRequest) tryDone(err error) { + select { + case req.done <- err: + default: + } +} + +// Wait will block until the TSO result is ready. +func (req *tsoRequest) Wait() (physical int64, logical int64, err error) { + // If tso command duration is observed very high, the reason could be it + // takes too long for Wait() be called. + start := time.Now() + cmdDurationTSOAsyncWait.Observe(start.Sub(req.start).Seconds()) + select { + case err = <-req.done: + defer req.pool.Put(req) + defer trace.StartRegion(req.requestCtx, "pdclient.tsoReqDone").End() + err = errors.WithStack(err) + if err != nil { + cmdFailDurationTSO.Observe(time.Since(req.start).Seconds()) + return 0, 0, err + } + physical, logical = req.physical, req.logical + now := time.Now() + cmdDurationWait.Observe(now.Sub(start).Seconds()) + cmdDurationTSO.Observe(now.Sub(req.start).Seconds()) + return + case <-req.requestCtx.Done(): + return 0, 0, errors.WithStack(req.requestCtx.Err()) + case <-req.clientCtx.Done(): + return 0, 0, errors.WithStack(req.clientCtx.Err()) + } +} + +type tsoRequestFastFail struct { + err error +} + +func newTSORequestFastFail(err error) *tsoRequestFastFail { + return &tsoRequestFastFail{err} +} + +// Wait returns the error directly. +func (req *tsoRequestFastFail) Wait() (physical int64, logical int64, err error) { + return 0, 0, req.err +} diff --git a/client/tso_service_discovery.go b/client/tso_service_discovery.go index 3d7c0745f49..443d455e911 100644 --- a/client/tso_service_discovery.go +++ b/client/tso_service_discovery.go @@ -58,45 +58,45 @@ var _ tsoAllocatorEventSource = (*tsoServiceDiscovery)(nil) type keyspaceGroupSvcDiscovery struct { sync.RWMutex group *tsopb.KeyspaceGroup - // primaryAddr is the primary serving address - primaryAddr string - // secondaryAddrs are TSO secondary serving addresses - secondaryAddrs []string - // addrs are the primary/secondary serving addresses - addrs []string + // primaryURL is the primary serving URL + primaryURL string + // secondaryURLs are TSO secondary serving URL + secondaryURLs []string + // urls are the primary/secondary serving URL + urls []string } func (k *keyspaceGroupSvcDiscovery) update( keyspaceGroup *tsopb.KeyspaceGroup, - newPrimaryAddr string, - secondaryAddrs, addrs []string, -) (oldPrimaryAddr string, primarySwitched, secondaryChanged bool) { + newPrimaryURL string, + secondaryURLs, urls []string, +) (oldPrimaryURL string, primarySwitched, secondaryChanged bool) { k.Lock() defer k.Unlock() - // If the new primary address is empty, we don't switch the primary address. - oldPrimaryAddr = k.primaryAddr - if len(newPrimaryAddr) > 0 { - primarySwitched = !strings.EqualFold(oldPrimaryAddr, newPrimaryAddr) - k.primaryAddr = newPrimaryAddr + // If the new primary URL is empty, we don't switch the primary URL. + oldPrimaryURL = k.primaryURL + if len(newPrimaryURL) > 0 { + primarySwitched = !strings.EqualFold(oldPrimaryURL, newPrimaryURL) + k.primaryURL = newPrimaryURL } - if !reflect.DeepEqual(k.secondaryAddrs, secondaryAddrs) { - k.secondaryAddrs = secondaryAddrs + if !reflect.DeepEqual(k.secondaryURLs, secondaryURLs) { + k.secondaryURLs = secondaryURLs secondaryChanged = true } k.group = keyspaceGroup - k.addrs = addrs + k.urls = urls return } // tsoServerDiscovery is for discovering the serving endpoints of the TSO servers -// TODO: dynamically update the TSO server addresses in the case of TSO server failover +// TODO: dynamically update the TSO server URLs in the case of TSO server failover // and scale-out/in. type tsoServerDiscovery struct { sync.RWMutex - addrs []string + urls []string // used for round-robin load balancing selectIdx int // failureCount counts the consecutive failures for communicating with the tso servers @@ -107,7 +107,7 @@ func (t *tsoServerDiscovery) countFailure() bool { t.Lock() defer t.Unlock() t.failureCount++ - return t.failureCount >= len(t.addrs) + return t.failureCount >= len(t.urls) } func (t *tsoServerDiscovery) resetFailure() { @@ -133,14 +133,14 @@ type tsoServiceDiscovery struct { // keyspaceGroupSD is for discovering the serving endpoints of the keyspace group keyspaceGroupSD *keyspaceGroupSvcDiscovery - // addr -> a gRPC connection + // URL -> a gRPC connection clientConns sync.Map // Store as map[string]*grpc.ClientConn // localAllocPrimariesUpdatedCb will be called when the local tso allocator primary list is updated. - // The input is a map {DC Location -> Leader Addr} - localAllocPrimariesUpdatedCb tsoLocalServAddrsUpdatedFunc + // The input is a map {DC Location -> Leader URL} + localAllocPrimariesUpdatedCb tsoLocalServURLsUpdatedFunc // globalAllocPrimariesUpdatedCb will be called when the local tso allocator primary list is updated. - globalAllocPrimariesUpdatedCb tsoGlobalServAddrUpdatedFunc + globalAllocPrimariesUpdatedCb tsoGlobalServURLUpdatedFunc checkMembershipCh chan struct{} @@ -173,11 +173,11 @@ func newTSOServiceDiscovery( } c.keyspaceID.Store(keyspaceID) c.keyspaceGroupSD = &keyspaceGroupSvcDiscovery{ - primaryAddr: "", - secondaryAddrs: make([]string, 0), - addrs: make([]string, 0), + primaryURL: "", + secondaryURLs: make([]string, 0), + urls: make([]string, 0), } - c.tsoServerDiscovery = &tsoServerDiscovery{addrs: make([]string, 0)} + c.tsoServerDiscovery = &tsoServerDiscovery{urls: make([]string, 0)} // Start with the default keyspace group. The actual keyspace group, to which the keyspace belongs, // will be discovered later. c.defaultDiscoveryKey = fmt.Sprintf(tsoSvcDiscoveryFormat, clusterID, defaultKeySpaceGroupID) @@ -231,7 +231,7 @@ func (c *tsoServiceDiscovery) Close() { c.cancel() c.wg.Wait() - c.clientConns.Range(func(key, cc interface{}) bool { + c.clientConns.Range(func(key, cc any) bool { if err := cc.(*grpc.ClientConn).Close(); err != nil { log.Error("[tso] failed to close gRPC clientConn", errs.ZapError(errs.ErrCloseGRPCConn, err)) } @@ -288,44 +288,44 @@ func (c *tsoServiceDiscovery) GetKeyspaceGroupID() uint32 { return c.keyspaceGroupSD.group.Id } -// GetServiceURLs returns the URLs of the tso primary/secondary addresses of this keyspace group. +// GetServiceURLs returns the URLs of the tso primary/secondary URL of this keyspace group. // For testing use. It should only be called when the client is closed. func (c *tsoServiceDiscovery) GetServiceURLs() []string { c.keyspaceGroupSD.RLock() defer c.keyspaceGroupSD.RUnlock() - return c.keyspaceGroupSD.addrs + return c.keyspaceGroupSD.urls } -// GetServingAddr returns the grpc client connection of the serving endpoint +// GetServingURL returns the grpc client connection of the serving endpoint // which is the primary in a primary/secondary configured cluster. func (c *tsoServiceDiscovery) GetServingEndpointClientConn() *grpc.ClientConn { - if cc, ok := c.clientConns.Load(c.getPrimaryAddr()); ok { + if cc, ok := c.clientConns.Load(c.getPrimaryURL()); ok { return cc.(*grpc.ClientConn) } return nil } -// GetClientConns returns the mapping {addr -> a gRPC connection} +// GetClientConns returns the mapping {URL -> a gRPC connection} func (c *tsoServiceDiscovery) GetClientConns() *sync.Map { return &c.clientConns } -// GetServingAddr returns the serving endpoint which is the primary in a +// GetServingURL returns the serving endpoint which is the primary in a // primary/secondary configured cluster. -func (c *tsoServiceDiscovery) GetServingAddr() string { - return c.getPrimaryAddr() +func (c *tsoServiceDiscovery) GetServingURL() string { + return c.getPrimaryURL() } -// GetBackupAddrs gets the addresses of the current reachable and healthy +// GetBackupURLs gets the URLs of the current reachable and healthy // backup service endpoints. Backup service endpoints are secondaries in // a primary/secondary configured cluster. -func (c *tsoServiceDiscovery) GetBackupAddrs() []string { - return c.getSecondaryAddrs() +func (c *tsoServiceDiscovery) GetBackupURLs() []string { + return c.getSecondaryURLs() } -// GetOrCreateGRPCConn returns the corresponding grpc client connection of the given addr. -func (c *tsoServiceDiscovery) GetOrCreateGRPCConn(addr string) (*grpc.ClientConn, error) { - return grpcutil.GetOrCreateGRPCConn(c.ctx, &c.clientConns, addr, c.tlsCfg, c.option.gRPCDialOptions...) +// GetOrCreateGRPCConn returns the corresponding grpc client connection of the given URL. +func (c *tsoServiceDiscovery) GetOrCreateGRPCConn(url string) (*grpc.ClientConn, error) { + return grpcutil.GetOrCreateGRPCConn(c.ctx, &c.clientConns, url, c.tlsCfg, c.option.gRPCDialOptions...) } // ScheduleCheckMemberChanged is used to trigger a check to see if there is any change in service endpoints. @@ -339,7 +339,9 @@ func (c *tsoServiceDiscovery) ScheduleCheckMemberChanged() { // CheckMemberChanged Immediately check if there is any membership change among the primary/secondaries in // a primary/secondary configured cluster. func (c *tsoServiceDiscovery) CheckMemberChanged() error { - c.apiSvcDiscovery.CheckMemberChanged() + if err := c.apiSvcDiscovery.CheckMemberChanged(); err != nil { + log.Warn("[tso] failed to check member changed", errs.ZapError(err)) + } if err := c.retry(tsoQueryRetryMaxTimes, tsoQueryRetryInterval, c.updateMember); err != nil { log.Error("[tso] failed to update member", errs.ZapError(err)) return err @@ -347,28 +349,28 @@ func (c *tsoServiceDiscovery) CheckMemberChanged() error { return nil } -// AddServingAddrSwitchedCallback adds callbacks which will be called when the primary in +// AddServingURLSwitchedCallback adds callbacks which will be called when the primary in // a primary/secondary configured cluster is switched. -func (c *tsoServiceDiscovery) AddServingAddrSwitchedCallback(callbacks ...func()) { -} +func (*tsoServiceDiscovery) AddServingURLSwitchedCallback(...func()) {} -// AddServiceAddrsSwitchedCallback adds callbacks which will be called when any primary/secondary +// AddServiceURLsSwitchedCallback adds callbacks which will be called when any primary/secondary // in a primary/secondary configured cluster is changed. -func (c *tsoServiceDiscovery) AddServiceAddrsSwitchedCallback(callbacks ...func()) { -} +func (*tsoServiceDiscovery) AddServiceURLsSwitchedCallback(...func()) {} -// SetTSOLocalServAddrsUpdatedCallback adds a callback which will be called when the local tso +// SetTSOLocalServURLsUpdatedCallback adds a callback which will be called when the local tso // allocator leader list is updated. -func (c *tsoServiceDiscovery) SetTSOLocalServAddrsUpdatedCallback(callback tsoLocalServAddrsUpdatedFunc) { +func (c *tsoServiceDiscovery) SetTSOLocalServURLsUpdatedCallback(callback tsoLocalServURLsUpdatedFunc) { c.localAllocPrimariesUpdatedCb = callback } -// SetTSOGlobalServAddrUpdatedCallback adds a callback which will be called when the global tso +// SetTSOGlobalServURLUpdatedCallback adds a callback which will be called when the global tso // allocator leader is updated. -func (c *tsoServiceDiscovery) SetTSOGlobalServAddrUpdatedCallback(callback tsoGlobalServAddrUpdatedFunc) { - addr := c.getPrimaryAddr() - if len(addr) > 0 { - callback(addr) +func (c *tsoServiceDiscovery) SetTSOGlobalServURLUpdatedCallback(callback tsoGlobalServURLUpdatedFunc) { + url := c.getPrimaryURL() + if len(url) > 0 { + if err := callback(url); err != nil { + log.Error("[tso] failed to call back when tso global service url update", zap.String("url", url), errs.ZapError(err)) + } } c.globalAllocPrimariesUpdatedCb = callback } @@ -383,18 +385,18 @@ func (c *tsoServiceDiscovery) GetAllServiceClients() []ServiceClient { return c.apiSvcDiscovery.GetAllServiceClients() } -// getPrimaryAddr returns the primary address. -func (c *tsoServiceDiscovery) getPrimaryAddr() string { +// getPrimaryURL returns the primary URL. +func (c *tsoServiceDiscovery) getPrimaryURL() string { c.keyspaceGroupSD.RLock() defer c.keyspaceGroupSD.RUnlock() - return c.keyspaceGroupSD.primaryAddr + return c.keyspaceGroupSD.primaryURL } -// getSecondaryAddrs returns the secondary addresses. -func (c *tsoServiceDiscovery) getSecondaryAddrs() []string { +// getSecondaryURLs returns the secondary URLs. +func (c *tsoServiceDiscovery) getSecondaryURLs() []string { c.keyspaceGroupSD.RLock() defer c.keyspaceGroupSD.RUnlock() - return c.keyspaceGroupSD.secondaryAddrs + return c.keyspaceGroupSD.secondaryURLs } func (c *tsoServiceDiscovery) afterPrimarySwitched(oldPrimary, newPrimary string) error { @@ -411,9 +413,9 @@ func (c *tsoServiceDiscovery) afterPrimarySwitched(oldPrimary, newPrimary string } func (c *tsoServiceDiscovery) updateMember() error { - // The keyspace membership or the primary serving address of the keyspace group, to which this + // The keyspace membership or the primary serving URL of the keyspace group, to which this // keyspace belongs, might have been changed. We need to query tso servers to get the latest info. - tsoServerAddr, err := c.getTSOServer(c.apiSvcDiscovery) + tsoServerURL, err := c.getTSOServer(c.apiSvcDiscovery) if err != nil { log.Error("[tso] failed to get tso server", errs.ZapError(err)) return err @@ -421,41 +423,41 @@ func (c *tsoServiceDiscovery) updateMember() error { keyspaceID := c.GetKeyspaceID() var keyspaceGroup *tsopb.KeyspaceGroup - if len(tsoServerAddr) > 0 { - keyspaceGroup, err = c.findGroupByKeyspaceID(keyspaceID, tsoServerAddr, updateMemberTimeout) + if len(tsoServerURL) > 0 { + keyspaceGroup, err = c.findGroupByKeyspaceID(keyspaceID, tsoServerURL, updateMemberTimeout) if err != nil { if c.tsoServerDiscovery.countFailure() { log.Error("[tso] failed to find the keyspace group", zap.Uint32("keyspace-id-in-request", keyspaceID), - zap.String("tso-server-addr", tsoServerAddr), + zap.String("tso-server-url", tsoServerURL), errs.ZapError(err)) } return err } c.tsoServerDiscovery.resetFailure() } else { - // There is no error but no tso server address found, which means + // There is no error but no tso server URL found, which means // the server side hasn't been upgraded to the version that // processes and returns GetClusterInfoResponse.TsoUrls. In this case, - // we fall back to the old way of discovering the tso primary addresses + // we fall back to the old way of discovering the tso primary URL // from etcd directly. c.printFallbackLogOnce.Do(func() { - log.Warn("[tso] no tso server address found,"+ + log.Warn("[tso] no tso server URL found,"+ " fallback to the legacy path to discover from etcd directly", zap.Uint32("keyspace-id-in-request", keyspaceID), - zap.String("tso-server-addr", tsoServerAddr), + zap.String("tso-server-url", tsoServerURL), zap.String("discovery-key", c.defaultDiscoveryKey)) }) - addrs, err := c.discoverWithLegacyPath() + urls, err := c.discoverWithLegacyPath() if err != nil { return err } - if len(addrs) == 0 { - return errors.New("no tso server address found") + if len(urls) == 0 { + return errors.New("no tso server url found") } - members := make([]*tsopb.KeyspaceGroupMember, 0, len(addrs)) - for _, addr := range addrs { - members = append(members, &tsopb.KeyspaceGroupMember{Address: addr}) + members := make([]*tsopb.KeyspaceGroupMember, 0, len(urls)) + for _, url := range urls { + members = append(members, &tsopb.KeyspaceGroupMember{Address: url}) } members[0].IsPrimary = true keyspaceGroup = &tsopb.KeyspaceGroup{ @@ -467,54 +469,54 @@ func (c *tsoServiceDiscovery) updateMember() error { oldGroupID := c.GetKeyspaceGroupID() if oldGroupID != keyspaceGroup.Id { log.Info("[tso] the keyspace group changed", - zap.Uint32("keyspace-id", keyspaceGroup.Id), + zap.Uint32("keyspace-id", keyspaceID), zap.Uint32("new-keyspace-group-id", keyspaceGroup.Id), zap.Uint32("old-keyspace-group-id", oldGroupID)) } - // Initialize the serving addresses from the returned keyspace group info. - primaryAddr := "" - secondaryAddrs := make([]string, 0) - addrs := make([]string, 0, len(keyspaceGroup.Members)) + // Initialize the serving URL from the returned keyspace group info. + primaryURL := "" + secondaryURLs := make([]string, 0) + urls := make([]string, 0, len(keyspaceGroup.Members)) for _, m := range keyspaceGroup.Members { - addrs = append(addrs, m.Address) + urls = append(urls, m.Address) if m.IsPrimary { - primaryAddr = m.Address + primaryURL = m.Address } else { - secondaryAddrs = append(secondaryAddrs, m.Address) + secondaryURLs = append(secondaryURLs, m.Address) } } - // If the primary address is not empty, we need to create a grpc connection to it, and do it + // If the primary URL is not empty, we need to create a grpc connection to it, and do it // out of the critical section of the keyspace group service discovery. - if len(primaryAddr) > 0 { - if primarySwitched := !strings.EqualFold(primaryAddr, c.getPrimaryAddr()); primarySwitched { - if _, err := c.GetOrCreateGRPCConn(primaryAddr); err != nil { + if len(primaryURL) > 0 { + if primarySwitched := !strings.EqualFold(primaryURL, c.getPrimaryURL()); primarySwitched { + if _, err := c.GetOrCreateGRPCConn(primaryURL); err != nil { log.Warn("[tso] failed to connect the next primary", zap.Uint32("keyspace-id-in-request", keyspaceID), - zap.String("tso-server-addr", tsoServerAddr), - zap.String("next-primary", primaryAddr), errs.ZapError(err)) + zap.String("tso-server-url", tsoServerURL), + zap.String("next-primary", primaryURL), errs.ZapError(err)) return err } } } oldPrimary, primarySwitched, _ := - c.keyspaceGroupSD.update(keyspaceGroup, primaryAddr, secondaryAddrs, addrs) + c.keyspaceGroupSD.update(keyspaceGroup, primaryURL, secondaryURLs, urls) if primarySwitched { log.Info("[tso] updated keyspace group service discovery info", zap.Uint32("keyspace-id-in-request", keyspaceID), - zap.String("tso-server-addr", tsoServerAddr), + zap.String("tso-server-url", tsoServerURL), zap.String("keyspace-group-service", keyspaceGroup.String())) - if err := c.afterPrimarySwitched(oldPrimary, primaryAddr); err != nil { + if err := c.afterPrimarySwitched(oldPrimary, primaryURL); err != nil { return err } } - // Even if the primary address is empty, we still updated other returned info above, including the - // keyspace group info and the secondary addresses. - if len(primaryAddr) == 0 { - return errors.New("no primary address found") + // Even if the primary URL is empty, we still updated other returned info above, including the + // keyspace group info and the secondary url. + if len(primaryURL) == 0 { + return errors.New("no primary URL found") } return nil @@ -523,7 +525,7 @@ func (c *tsoServiceDiscovery) updateMember() error { // Query the keyspace group info from the tso server by the keyspace ID. The server side will return // the info of the keyspace group to which this keyspace belongs. func (c *tsoServiceDiscovery) findGroupByKeyspaceID( - keyspaceID uint32, tsoSrvAddr string, timeout time.Duration, + keyspaceID uint32, tsoSrvURL string, timeout time.Duration, ) (*tsopb.KeyspaceGroup, error) { failpoint.Inject("unexpectedCallOfFindGroupByKeyspaceID", func(val failpoint.Value) { keyspaceToCheck, ok := val.(int) @@ -534,7 +536,7 @@ func (c *tsoServiceDiscovery) findGroupByKeyspaceID( ctx, cancel := context.WithTimeout(c.ctx, timeout) defer cancel() - cc, err := c.GetOrCreateGRPCConn(tsoSrvAddr) + cc, err := c.GetOrCreateGRPCConn(tsoSrvURL) if err != nil { return nil, err } @@ -572,40 +574,40 @@ func (c *tsoServiceDiscovery) getTSOServer(sd ServiceDiscovery) (string, error) defer c.Unlock() var ( - addrs []string - err error + urls []string + err error ) t := c.tsoServerDiscovery - if len(t.addrs) == 0 || t.failureCount == len(t.addrs) { - addrs, err = sd.(*pdServiceDiscovery).discoverMicroservice(tsoService) + if len(t.urls) == 0 || t.failureCount == len(t.urls) { + urls, err = sd.(*pdServiceDiscovery).discoverMicroservice(tsoService) if err != nil { return "", err } failpoint.Inject("serverReturnsNoTSOAddrs", func() { - log.Info("[failpoint] injected error: server returns no tso addrs") - addrs = nil + log.Info("[failpoint] injected error: server returns no tso URLs") + urls = nil }) - if len(addrs) == 0 { - // There is no error but no tso server address found, which means + if len(urls) == 0 { + // There is no error but no tso server url found, which means // the server side hasn't been upgraded to the version that // processes and returns GetClusterInfoResponse.TsoUrls. Return here // and handle the fallback logic outside of this function. return "", nil } - log.Info("update tso server addresses", zap.Strings("addrs", addrs)) + log.Info("update tso server URLs", zap.Strings("urls", urls)) - t.addrs = addrs + t.urls = urls t.selectIdx = 0 t.failureCount = 0 } // Pick a TSO server in a round-robin way. - tsoServerAddr := t.addrs[t.selectIdx] + tsoServerURL := t.urls[t.selectIdx] t.selectIdx++ - t.selectIdx %= len(t.addrs) + t.selectIdx %= len(t.urls) - return tsoServerAddr, nil + return tsoServerURL, nil } func (c *tsoServiceDiscovery) discoverWithLegacyPath() ([]string, error) { diff --git a/client/tso_stream.go b/client/tso_stream.go index e3203818938..14b72bc697b 100644 --- a/client/tso_stream.go +++ b/client/tso_stream.go @@ -34,14 +34,14 @@ type tsoStreamBuilderFactory interface { type pdTSOStreamBuilderFactory struct{} -func (f *pdTSOStreamBuilderFactory) makeBuilder(cc *grpc.ClientConn) tsoStreamBuilder { - return &pdTSOStreamBuilder{client: pdpb.NewPDClient(cc), serverAddr: cc.Target()} +func (*pdTSOStreamBuilderFactory) makeBuilder(cc *grpc.ClientConn) tsoStreamBuilder { + return &pdTSOStreamBuilder{client: pdpb.NewPDClient(cc), serverURL: cc.Target()} } type tsoTSOStreamBuilderFactory struct{} -func (f *tsoTSOStreamBuilderFactory) makeBuilder(cc *grpc.ClientConn) tsoStreamBuilder { - return &tsoTSOStreamBuilder{client: tsopb.NewTSOClient(cc), serverAddr: cc.Target()} +func (*tsoTSOStreamBuilderFactory) makeBuilder(cc *grpc.ClientConn) tsoStreamBuilder { + return &tsoTSOStreamBuilder{client: tsopb.NewTSOClient(cc), serverURL: cc.Target()} } // TSO Stream Builder @@ -51,8 +51,8 @@ type tsoStreamBuilder interface { } type pdTSOStreamBuilder struct { - serverAddr string - client pdpb.PDClient + serverURL string + client pdpb.PDClient } func (b *pdTSOStreamBuilder) build(ctx context.Context, cancel context.CancelFunc, timeout time.Duration) (tsoStream, error) { @@ -62,14 +62,14 @@ func (b *pdTSOStreamBuilder) build(ctx context.Context, cancel context.CancelFun stream, err := b.client.Tso(ctx) done <- struct{}{} if err == nil { - return &pdTSOStream{stream: stream, serverAddr: b.serverAddr}, nil + return &pdTSOStream{stream: stream, serverURL: b.serverURL}, nil } return nil, err } type tsoTSOStreamBuilder struct { - serverAddr string - client tsopb.TSOClient + serverURL string + client tsopb.TSOClient } func (b *tsoTSOStreamBuilder) build( @@ -81,7 +81,7 @@ func (b *tsoTSOStreamBuilder) build( stream, err := b.client.Tso(ctx) done <- struct{}{} if err == nil { - return &tsoTSOStream{stream: stream, serverAddr: b.serverAddr}, nil + return &tsoTSOStream{stream: stream, serverURL: b.serverURL}, nil } return nil, err } @@ -102,28 +102,27 @@ func checkStreamTimeout(ctx context.Context, cancel context.CancelFunc, done cha // TSO Stream type tsoStream interface { - getServerAddr() string + getServerURL() string // processRequests processes TSO requests in streaming mode to get timestamps processRequests( clusterID uint64, keyspaceID, keyspaceGroupID uint32, dcLocation string, - requests []*tsoRequest, batchStartTime time.Time, + count int64, batchStartTime time.Time, ) (respKeyspaceGroupID uint32, physical, logical int64, suffixBits uint32, err error) } type pdTSOStream struct { - serverAddr string - stream pdpb.PD_TsoClient + serverURL string + stream pdpb.PD_TsoClient } -func (s *pdTSOStream) getServerAddr() string { - return s.serverAddr +func (s *pdTSOStream) getServerURL() string { + return s.serverURL } func (s *pdTSOStream) processRequests( - clusterID uint64, _, _ uint32, dcLocation string, requests []*tsoRequest, batchStartTime time.Time, + clusterID uint64, _, _ uint32, dcLocation string, count int64, batchStartTime time.Time, ) (respKeyspaceGroupID uint32, physical, logical int64, suffixBits uint32, err error) { start := time.Now() - count := int64(len(requests)) req := &pdpb.TsoRequest{ Header: &pdpb.RequestHeader{ ClusterId: clusterID, @@ -165,20 +164,19 @@ func (s *pdTSOStream) processRequests( } type tsoTSOStream struct { - serverAddr string - stream tsopb.TSO_TsoClient + serverURL string + stream tsopb.TSO_TsoClient } -func (s *tsoTSOStream) getServerAddr() string { - return s.serverAddr +func (s *tsoTSOStream) getServerURL() string { + return s.serverURL } func (s *tsoTSOStream) processRequests( clusterID uint64, keyspaceID, keyspaceGroupID uint32, dcLocation string, - requests []*tsoRequest, batchStartTime time.Time, + count int64, batchStartTime time.Time, ) (respKeyspaceGroupID uint32, physical, logical int64, suffixBits uint32, err error) { start := time.Now() - count := int64(len(requests)) req := &tsopb.TsoRequest{ Header: &tsopb.RequestHeader{ ClusterId: clusterID, diff --git a/cmd/pd-server/main.go b/cmd/pd-server/main.go index d0f4e458412..bd75309ed8a 100644 --- a/cmd/pd-server/main.go +++ b/cmd/pd-server/main.go @@ -102,7 +102,7 @@ func NewTSOServiceCommand() *cobra.Command { cmd.Flags().StringP("cacert", "", "", "path of file that contains list of trusted TLS CAs") cmd.Flags().StringP("cert", "", "", "path of file that contains X509 certificate in PEM format") cmd.Flags().StringP("key", "", "", "path of file that contains X509 key in PEM format") - cmd.Flags().StringP("log-level", "L", "info", "log level: debug, info, warn, error, fatal (default 'info')") + cmd.Flags().StringP("log-level", "L", "", "log level: debug, info, warn, error, fatal (default 'info')") cmd.Flags().StringP("log-file", "", "", "log file path") return cmd } @@ -122,7 +122,7 @@ func NewSchedulingServiceCommand() *cobra.Command { cmd.Flags().StringP("cacert", "", "", "path of file that contains list of trusted TLS CAs") cmd.Flags().StringP("cert", "", "", "path of file that contains X509 certificate in PEM format") cmd.Flags().StringP("key", "", "", "path of file that contains X509 key in PEM format") - cmd.Flags().StringP("log-level", "L", "info", "log level: debug, info, warn, error, fatal (default 'info')") + cmd.Flags().StringP("log-level", "L", "", "log level: debug, info, warn, error, fatal (default 'info')") cmd.Flags().StringP("log-file", "", "", "log file path") return cmd } @@ -142,7 +142,7 @@ func NewResourceManagerServiceCommand() *cobra.Command { cmd.Flags().StringP("cacert", "", "", "path of file that contains list of trusted TLS CAs") cmd.Flags().StringP("cert", "", "", "path of file that contains X509 certificate in PEM format") cmd.Flags().StringP("key", "", "", "path of file that contains X509 key in PEM format") - cmd.Flags().StringP("log-level", "L", "info", "log level: debug, info, warn, error, fatal (default 'info')") + cmd.Flags().StringP("log-level", "L", "", "log level: debug, info, warn, error, fatal (default 'info')") cmd.Flags().StringP("log-file", "", "", "log file path") return cmd } @@ -171,7 +171,7 @@ func addFlags(cmd *cobra.Command) { cmd.Flags().StringP("initial-cluster", "", "", "initial cluster configuration for bootstrapping, e,g. pd=http://127.0.0.1:2380") cmd.Flags().StringP("join", "", "", "join to an existing cluster (usage: cluster's '${advertise-client-urls}'") cmd.Flags().StringP("metrics-addr", "", "", "prometheus pushgateway address, leaves it empty will disable prometheus push") - cmd.Flags().StringP("log-level", "L", "info", "log level: debug, info, warn, error, fatal (default 'info')") + cmd.Flags().StringP("log-level", "L", "", "log level: debug, info, warn, error, fatal (default 'info')") cmd.Flags().StringP("log-file", "", "", "log file path") cmd.Flags().StringP("cacert", "", "", "path of file that contains list of trusted TLS CAs") cmd.Flags().StringP("cert", "", "", "path of file that contains X509 certificate in PEM format") @@ -221,6 +221,8 @@ func start(cmd *cobra.Command, args []string, services ...string) { exit(0) } + // Check the PD version first before running. + server.CheckAndGetPDVersion() // New zap logger err = logutil.SetupLogger(cfg.Log, &cfg.Logger, &cfg.LogProps, cfg.Security.RedactInfoLog) if err == nil { diff --git a/codecov.yml b/codecov.yml index bb439917e78..936eb3bbb11 100644 --- a/codecov.yml +++ b/codecov.yml @@ -24,9 +24,3 @@ flag_management: target: 74% # increase it if you want to enforce higher coverage for project, current setting as 74% is for do not let the error be reported and lose the meaning of warning. - type: patch target: 74% # increase it if you want to enforce higher coverage for project, current setting as 74% is for do not let the error be reported and lose the meaning of warning. - -ignore: - # Ignore the tool tests - - tests/dashboard - - tests/pdbackup - - tests/pdctl diff --git a/conf/OWNERS b/conf/OWNERS new file mode 100644 index 00000000000..1a435c49089 --- /dev/null +++ b/conf/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs at https://go.k8s.io/owners +options: + no_parent_owners: true +filters: + "(OWNERS|config\\.toml)$": + approvers: + - sig-critical-approvers-config diff --git a/conf/config.toml b/conf/config.toml index 8b80a5044f1..20f664a4c85 100644 --- a/conf/config.toml +++ b/conf/config.toml @@ -201,7 +201,7 @@ ## When enabled, usage data will be sent to PingCAP for improving user experience. # enable-telemetry = false -[keyspaces] +[keyspace] ## pre-alloc is used to pre-allocate keyspaces during pd bootstrap. ## Its value should be a list of strings, denotting the name of the keyspaces. ## Example: diff --git a/errors.toml b/errors.toml index 64101000478..a61c23a6fbd 100644 --- a/errors.toml +++ b/errors.toml @@ -16,11 +16,21 @@ error = ''' redirect failed ''' +["PD:apiutil:ErrRedirectNoLeader"] +error = ''' +redirect finds no leader +''' + ["PD:apiutil:ErrRedirectToNotLeader"] error = ''' redirect to not leader ''' +["PD:apiutil:ErrRedirectToNotPrimary"] +error = ''' +redirect to not primary +''' + ["PD:autoscaling:ErrEmptyMetricsResponse"] error = ''' metrics response from Prometheus is empty diff --git a/go.mod b/go.mod index e438228a728..90c5639c936 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,11 @@ module github.com/tikv/pd go 1.21 +// When you modify PD cooperatively with kvproto, this will be useful to submit the PR to PD and the PR to +// kvproto at the same time. You can run `go mod tidy` to make it replaced with go-mod style specification. +// After the PR to kvproto is merged, remember to comment this out and run `go mod tidy`. +// replace github.com/pingcap/kvproto => github.com/$YourPrivateRepo $YourPrivateBranch + require ( github.com/AlekSi/gocov-xml v1.0.0 github.com/BurntSushi/toml v0.3.1 @@ -12,7 +17,7 @@ require ( github.com/axw/gocov v1.0.0 github.com/brianvoe/gofakeit/v6 v6.26.3 github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5 - github.com/coreos/go-semver v0.3.0 + github.com/coreos/go-semver v0.3.1 github.com/docker/go-units v0.4.0 github.com/elliotchance/pie/v2 v2.1.0 github.com/gin-contrib/cors v1.4.0 @@ -21,27 +26,25 @@ require ( github.com/gin-gonic/gin v1.9.1 github.com/gogo/protobuf v1.3.2 github.com/google/btree v1.1.2 - github.com/google/uuid v1.3.0 github.com/gorilla/mux v1.7.4 github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 github.com/joho/godotenv v1.4.0 github.com/mailru/easyjson v0.7.6 - github.com/mgechev/revive v1.0.2 github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d github.com/pingcap/errcode v0.3.0 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 - github.com/pingcap/kvproto v0.0.0-20231222062942-c0c73f41d0b2 + github.com/pingcap/kvproto v0.0.0-20240403065636-c699538f7aa1 github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21 - github.com/pingcap/tidb-dashboard v0.0.0-20240111062855-41f7c8011953 - github.com/prometheus/client_golang v1.18.0 - github.com/prometheus/common v0.46.0 + github.com/pingcap/tidb-dashboard v0.0.0-20240326110213-9768844ff5d7 + github.com/prometheus/client_golang v1.19.0 + github.com/prometheus/common v0.51.1 github.com/sasha-s/go-deadlock v0.2.0 github.com/shirou/gopsutil/v3 v3.23.3 - github.com/smallnest/chanx v0.0.0-20221229104322-eb4c998d2072 - github.com/soheilhy/cmux v0.1.4 - github.com/spf13/cobra v1.0.0 + github.com/smallnest/chanx v1.2.1-0.20240521153536-01121e21ff99 + github.com/soheilhy/cmux v0.1.5 + github.com/spf13/cobra v1.8.0 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.8.4 github.com/swaggo/http-swagger v1.2.6 @@ -49,14 +52,14 @@ require ( github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965 github.com/unrolled/render v1.0.1 github.com/urfave/negroni v0.3.0 - go.etcd.io/etcd v0.5.0-alpha.5.0.20220915004622-85b640cee793 + go.etcd.io/etcd v0.5.0-alpha.5.0.20240320135013-950cd5fbe6ca go.uber.org/atomic v1.10.0 - go.uber.org/goleak v1.2.0 - go.uber.org/zap v1.26.0 + go.uber.org/goleak v1.3.0 + go.uber.org/zap v1.27.0 golang.org/x/exp v0.0.0-20230711005742-c3f37128e5a4 - golang.org/x/time v0.3.0 + golang.org/x/time v0.5.0 golang.org/x/tools v0.14.0 - google.golang.org/grpc v1.59.0 + google.golang.org/grpc v1.62.1 gotest.tools/gotestsum v1.7.0 ) @@ -84,14 +87,13 @@ require ( github.com/cenkalti/backoff/v4 v4.0.2 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect - github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f // indirect - github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect - github.com/cpuguy83/go-md2man/v2 v2.0.0 // indirect + github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf // indirect + github.com/coreos/pkg v0.0.0-20240122114842-bbd7aa9bf6fb // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dnephin/pflag v1.0.7 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/fatih/color v1.10.0 // indirect - github.com/fatih/structtag v1.2.0 // indirect github.com/fogleman/gg v1.3.0 // indirect github.com/fsnotify/fsnotify v1.4.9 // indirect github.com/gabriel-vasile/mimetype v1.4.2 // indirect @@ -109,39 +111,36 @@ require ( github.com/go-sql-driver/mysql v1.7.0 // indirect github.com/goccy/go-graphviz v0.0.9 // indirect github.com/goccy/go-json v0.10.2 // indirect - github.com/golang-jwt/jwt v3.2.1+incompatible // indirect + github.com/golang-jwt/jwt v3.2.2+incompatible // indirect github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect - github.com/golang/protobuf v1.5.3 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/protobuf v1.5.4 // indirect github.com/golang/snappy v0.0.4 // indirect github.com/google/pprof v0.0.0-20211122183932-1daafda22083 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect - github.com/gorilla/websocket v1.4.2 // indirect - github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/gorilla/websocket v1.5.1 // indirect + github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 // indirect github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect github.com/gtank/cryptopasta v0.0.0-20170601214702-1f550f6f2f69 // indirect github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d // indirect - github.com/inconshreveable/mousetrap v1.0.0 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect - github.com/jonboulle/clockwork v0.2.2 // indirect + github.com/jonboulle/clockwork v0.4.0 // indirect github.com/joomcode/errorx v1.0.1 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/cpuid/v2 v2.2.4 // indirect - github.com/konsorten/go-windows-terminal-sequences v1.0.3 // indirect github.com/leodido/go-urn v1.2.4 // indirect github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a // indirect github.com/mattn/go-colorable v0.1.8 // indirect github.com/mattn/go-isatty v0.0.19 // indirect - github.com/mattn/go-runewidth v0.0.8 // indirect github.com/mattn/go-sqlite3 v1.14.15 // indirect - github.com/mgechev/dots v0.0.0-20190921121421-c36f7dcfbb81 // indirect github.com/minio/sio v0.3.0 // indirect - github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/oleiade/reflections v1.0.1 // indirect - github.com/olekukonko/tablewriter v0.0.4 // indirect github.com/onsi/gomega v1.20.1 // indirect github.com/pelletier/go-toml/v2 v2.0.8 // indirect github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36 // indirect @@ -149,49 +148,48 @@ require ( github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b // indirect - github.com/prometheus/client_model v0.5.0 // indirect - github.com/prometheus/procfs v0.12.0 // indirect + github.com/prometheus/client_model v0.6.0 // indirect + github.com/prometheus/procfs v0.13.0 // indirect github.com/rs/cors v1.7.0 // indirect - github.com/russross/blackfriday/v2 v2.0.1 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/samber/lo v1.37.0 // indirect github.com/sergi/go-diff v1.1.0 // indirect github.com/shoenig/go-m1cpu v0.1.5 // indirect github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0 // indirect - github.com/shurcooL/sanitized_anchor_name v1.0.0 // indirect - github.com/sirupsen/logrus v1.6.0 // indirect + github.com/sirupsen/logrus v1.9.3 // indirect github.com/stretchr/objx v0.5.0 // indirect github.com/swaggo/files v0.0.0-20210815190702-a29dd2bc99b2 // indirect github.com/tidwall/gjson v1.9.3 // indirect github.com/tklauser/go-sysconf v0.3.11 // indirect github.com/tklauser/numcpus v0.6.0 // indirect - github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 // indirect + github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.11 // indirect github.com/urfave/cli/v2 v2.3.0 // indirect github.com/vmihailenco/msgpack/v5 v5.3.5 // indirect github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect - github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect + github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect // Fix panic in unit test with go >= 1.14, ref: etcd-io/bbolt#201 https://github.com/etcd-io/bbolt/pull/201 - go.etcd.io/bbolt v1.3.6 // indirect + go.etcd.io/bbolt v1.3.9 // indirect go.uber.org/dig v1.9.0 // indirect go.uber.org/fx v1.12.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/arch v0.3.0 // indirect - golang.org/x/crypto v0.18.0 // indirect + golang.org/x/crypto v0.21.0 // indirect golang.org/x/image v0.10.0 // indirect golang.org/x/mod v0.13.0 // indirect - golang.org/x/net v0.20.0 // indirect - golang.org/x/oauth2 v0.16.0 // indirect - golang.org/x/sync v0.4.0 // indirect - golang.org/x/sys v0.16.0 // indirect - golang.org/x/term v0.16.0 // indirect + golang.org/x/net v0.23.0 // indirect + golang.org/x/oauth2 v0.18.0 // indirect + golang.org/x/sync v0.6.0 // indirect + golang.org/x/sys v0.18.0 // indirect + golang.org/x/term v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect - google.golang.org/appengine v1.6.7 // indirect - google.golang.org/genproto v0.0.0-20231030173426-d783a09b4405 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20231016165738-49dd2c1f3d0b // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 // indirect - google.golang.org/protobuf v1.32.0 // indirect + google.golang.org/appengine v1.6.8 // indirect + google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda // indirect + google.golang.org/protobuf v1.33.0 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect @@ -200,12 +198,5 @@ require ( gorm.io/driver/sqlite v1.4.3 // indirect gorm.io/gorm v1.24.3 // indirect moul.io/zapgorm2 v1.1.0 // indirect - sigs.k8s.io/yaml v1.2.0 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect ) - -replace google.golang.org/grpc v1.59.0 => google.golang.org/grpc v1.26.0 - -// When you modify PD cooperatively with kvproto, this will be useful to submit the PR to PD and the PR to -// kvproto at the same time. You can run `go mod tidy` to make it replaced with go-mod style specification. -// After the PR to kvproto is merged, remember to comment this out and run `go mod tidy`. -// replace github.com/pingcap/kvproto => github.com/$YourPrivateRepo $YourPrivateBranch diff --git a/go.sum b/go.sum index 0e308d173a0..6ec1baa72c4 100644 --- a/go.sum +++ b/go.sum @@ -8,7 +8,6 @@ github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww= github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= -github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI= github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M= @@ -20,13 +19,10 @@ github.com/VividCortex/mysqlerr v1.0.0/go.mod h1:xERx8E4tBhLvpjzdUyQiSfUxeMcATEQ github.com/Xeoncross/go-aesctr-with-hmac v0.0.0-20200623134604-12b17a7ff502 h1:L8IbaI/W6h5Cwgh0n4zGeZpVK78r/jBf9ASurHo9+/o= github.com/Xeoncross/go-aesctr-with-hmac v0.0.0-20200623134604-12b17a7ff502/go.mod h1:pmnBM9bxWSiHvC/gSWunUIyDvGn33EkP2CUjxFKtTTM= github.com/agiledragon/gomonkey/v2 v2.3.1/go.mod h1:ap1AmDzcVOAz1YpeJ3TCzIgstoaWLA6jbbgxfB4w2iY= -github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alvaroloes/enumer v1.1.2/go.mod h1:FxrjvuXoDAx9isTJrv4c+T410zFi0DtXIT0m65DJ+Wo= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/appleboy/gofight/v2 v2.1.2 h1:VOy3jow4vIK8BRQJoC/I9muxyYlJ2yb9ht2hZoS3rf4= github.com/appleboy/gofight/v2 v2.1.2/go.mod h1:frW+U1QZEdDgixycTj4CygQ48yLTUhplt43+Wczp3rw= -github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/aws/aws-sdk-go-v2 v1.17.7 h1:CLSjnhJSTSogvqUGhIC6LqFKATMRexcxLZ0i/Nzk9Eg= github.com/aws/aws-sdk-go-v2 v1.17.7/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw= github.com/aws/aws-sdk-go-v2/config v1.18.19 h1:AqFK6zFNtq4i1EYu+eC7lcKHYnZagMn6SW171la0bGw= @@ -56,11 +52,8 @@ github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J github.com/axw/gocov v1.0.0 h1:YsqYR66hUmilVr23tu8USgnJIJvnwh3n7j5zRn7x4LU= github.com/axw/gocov v1.0.0/go.mod h1:LvQpEYiwwIb2nYkXY2fDWhg9/AsYqkhmrCshjlUJECE= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= -github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= -github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/bitly/go-simplejson v0.5.0 h1:6IH+V8/tVMab511d5bn4M7EwGXZf9Hj6i2xSwkNEM+Y= github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= @@ -77,7 +70,6 @@ github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5/go.mod h1:jtAfV github.com/cenkalti/backoff/v4 v4.0.2 h1:JIufpQLbh4DkbQoii76ItQIUFzevQSqOLZca4eamEDs= github.com/cenkalti/backoff/v4 v4.0.2/go.mod h1:eEew/i+1Q6OrCDZh3WiXYv3+nJwBASZ8Bog/87DQnVg= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= @@ -90,35 +82,25 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa h1:OaNxuTZr7kxeODyLWsRMC+OD03aFUH+mW6r2d+MWa5Y= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= -github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= -github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= -github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM= -github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f h1:JOrtw2xFKzlg+cbHpyrpLDmnN1HqhBfnX7WDiW7eG2c= -github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= -github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f h1:lBNOc5arjvs8E5mO2tbpBpLoyyu8B6e44T7hJy6potg= -github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= +github.com/coreos/go-semver v0.3.1 h1:yi21YpKnrx1gt5R+la8n5WgS0kCrsPp33dmEyHReZr4= +github.com/coreos/go-semver v0.3.1/go.mod h1:irMmmIw/7yzSRPWryHsK7EYSg09caPQL03VsM8rvUec= +github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU= +github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/pkg v0.0.0-20240122114842-bbd7aa9bf6fb h1:GIzvVQ9UkUlOhSDlqmrQAAAUd6R3E+caIisNEyWXvNE= +github.com/coreos/pkg v0.0.0-20240122114842-bbd7aa9bf6fb/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= github.com/corona10/goimagehash v1.0.2 h1:pUfB0LnsJASMPGEZLj7tGY251vF+qLGqOgEP4rUs6kA= github.com/corona10/goimagehash v1.0.2/go.mod h1:/l9umBhvcHQXVtQO1V6Gp1yD20STawkhRnnX0D1bvVI= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/cpuguy83/go-md2man/v2 v2.0.0 h1:EoUDS0afbrsXAZ9YQ9jdu/mZ2sXgT1/2yyNng4PGlyM= -github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/cpuguy83/go-md2man/v2 v2.0.3 h1:qMCsGGgs+MAzDFyp9LpAe1Lqy/fY/qCovCm0qnXZOBM= +github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= -github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= -github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/dnephin/pflag v1.0.7 h1:oxONGlWxhmUct0YzKTgrpQv9AUA1wtPBn7zuSjJqptk= github.com/dnephin/pflag v1.0.7/go.mod h1:uxE91IoWURlOiTUIA8Mq5ZZkAv3dPUfZNaT80Zm7OQE= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385 h1:clC1lXBpe2kTj2VHdaIu9ajZQe4kcEY9j0NsnDDBZ3o= @@ -129,12 +111,8 @@ github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymF github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= -github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= github.com/fatih/color v1.10.0 h1:s36xzo75JdqLaaWoiEHk767eHiwo0598uUxyfiPkDsg= github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= -github.com/fatih/structtag v1.2.0 h1:/OdNE99OxoI/PqaW/SuSK9uxxT3f/tcSZgon/ssNSx4= -github.com/fatih/structtag v1.2.0/go.mod h1:mBJUNpUnHmRKrKlQQlmCrh5PuhftFbNv8Ys4/aAZl94= github.com/fogleman/gg v1.3.0 h1:/7zJX8F6AaYQc57WQCyN9cAIz+4bCJGO9B+dyW29am8= github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= @@ -158,10 +136,8 @@ github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwv github.com/gin-gonic/gin v1.8.1/go.mod h1:ji8BvRH1azfM+SYow9zQ6SZMvR8qOMZHmsCuWR9tTTk= github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= -github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= -github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= -github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= +github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= +github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= @@ -201,12 +177,11 @@ github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/gogo/protobuf v0.0.0-20171007142547-342cbe0a0415/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v0.0.0-20180717141946-636bf0302bc9/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang-jwt/jwt v3.2.1+incompatible h1:73Z+4BJcrTC+KczS6WvTPvRGOp1WmfEP4Q1lOd9Z/+c= github.com/golang-jwt/jwt v3.2.1+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= +github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY= +github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0ktxqI+Sida1w446QrXBRJ0nee3SNZlA= github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A= @@ -214,25 +189,21 @@ github.com/golang-sql/sqlexp v0.1.0/go.mod h1:J4ad9Vo8ZCWQ2GMrC4UCQy1JpCbwU9m3EO github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/glog v1.1.2 h1:DVjP2PbBOzHyzA+dn3WhHIq4NdVu3Q+pvivFICf/7fo= -github.com/golang/glog v1.1.2/go.mod h1:zR+okUeTbrL6EL3xHUDxZuEtGv04p5shwip1+mL/rLQ= -github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef h1:veQD95Isof8w9/WXiA+pa3tz3fJXkt5B7QaRBrM62gk= -github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v0.0.0-20180814211427-aa810b61a9c7/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU= github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -240,42 +211,38 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20211122183932-1daafda22083 h1:c8EUapQFi+kjzedr4c6WqbwMdmB95+oDBWZ5XFHFYxY= github.com/google/pprof v0.0.0-20211122183932-1daafda22083/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= -github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= -github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gorilla/mux v1.7.4 h1:VuZ8uybHlWmqV03+zRzdwKL4tUnIp1MAQtp1mIFE1bc= github.com/gorilla/mux v1.7.4/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= -github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= -github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 h1:z53tR0945TRRQO/fLEVPI6SMv7ZflF0TEaTAoU7tOzg= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= +github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/QY= +github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY= +github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 h1:UH//fgunKIs4JdUbpDl1VZCDaL56wXCB/5+wF6uHfaI= +github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vbp88Yd8NsDy6rZz+RcrMPxvld8= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= -github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= -github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/gtank/cryptopasta v0.0.0-20170601214702-1f550f6f2f69 h1:7xsUJsB2NrdcttQPa7JLEaGzvdbk7KvfrjgHZXOQRo0= github.com/gtank/cryptopasta v0.0.0-20170601214702-1f550f6f2f69/go.mod h1:YLEMZOtU+AZ7dhN9T/IpGhXVGly2bvkJQ+zxj3WeVQo= -github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d h1:uGg2frlt3IcT7kbV6LEp5ONv4vmoO2FW4qSO+my/aoM= github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w= -github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= -github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jackc/chunkreader/v2 v2.0.1 h1:i+RDz65UE+mmpjTfyz0MoVTnzeYxroil2G82ki7MGG8= github.com/jackc/chunkreader/v2 v2.0.1/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk= github.com/jackc/pgconn v1.13.0 h1:3L1XMNV2Zvca/8BYhzcRFS70Lr0WlDg16Di6SFGAbys= @@ -305,9 +272,9 @@ github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHW github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/joho/godotenv v1.4.0 h1:3l4+N6zfMWnkbPEXKng2o2/MR5mSwTrBih4ZEkkz1lg= github.com/joho/godotenv v1.4.0/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= -github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= -github.com/jonboulle/clockwork v0.2.2 h1:UOGuzwb1PwsrDAObMuhUnj0p5ULPj8V/xJ7Kx9qUBdQ= github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8= +github.com/jonboulle/clockwork v0.4.0 h1:p4Cf1aMWXnXAUh8lVfewRBx1zaTSYKrKMF2g3ST4RZ4= +github.com/jonboulle/clockwork v0.4.0/go.mod h1:xgRqUGwRcjKCO1vbZUEtSLrqKoPSsUpK7fnezOII0kc= github.com/joomcode/errorx v1.0.1 h1:CalpDWz14ZHd68fIqluJasJosAewpz2TFaJALrUxjrk= github.com/joomcode/errorx v1.0.1/go.mod h1:kgco15ekB6cs+4Xjzo7SPeXzx38PbJzBwbnu9qfVNHQ= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -315,23 +282,16 @@ github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFF github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.5/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= -github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8= -github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= @@ -348,40 +308,23 @@ github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNa github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a h1:N9zuLhTvBSRt0gWSiJswwQ2HqDmtX/ZCDJURnKUt1Ik= github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a/go.mod h1:JKx41uQRwqlTZabZc+kILPrO/3jlKnQ2Z8b7YiVw5cE= -github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA= github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= -github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= -github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= github.com/mattn/go-colorable v0.1.8 h1:c1ghPdyEDarC70ftn0y+A/Ee++9zz8ljHG1b13eJ0s8= github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= -github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= -github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= -github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= -github.com/mattn/go-runewidth v0.0.8 h1:3tS41NlGYSmhhe/8fhGRzc+z3AYCw1Fe1WAyLuujKs0= -github.com/mattn/go-runewidth v0.0.8/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI= github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= -github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= -github.com/mgechev/dots v0.0.0-20190921121421-c36f7dcfbb81 h1:QASJXOGm2RZ5Ardbc86qNFvby9AqkLDibfChMtAg5QM= -github.com/mgechev/dots v0.0.0-20190921121421-c36f7dcfbb81/go.mod h1:KQ7+USdGKfpPjXk4Ga+5XxQM4Lm4e3gAogrreFAYpOg= -github.com/mgechev/revive v1.0.2 h1:v0NxxQ7fSFz/u1NQydPo6EGdq7va0J1BtsZmae6kzUg= -github.com/mgechev/revive v1.0.2/go.mod h1:rb0dQy1LVAxW9SWy5R3LPUjevzUbUS316U5MFySA2lo= github.com/microsoft/go-mssqldb v0.17.0 h1:Fto83dMZPnYv1Zwx5vHHxpNraeEaUlQ/hhHLgZiaenE= github.com/microsoft/go-mssqldb v0.17.0/go.mod h1:OkoNGhGEs8EZqchVTtochlXruEhEOaO4S0d2sB5aeGQ= github.com/minio/sio v0.3.0 h1:syEFBewzOMOYVzSTFpp1MqpSZk8rUNbz8VIIc+PNzus= github.com/minio/sio v0.3.0/go.mod h1:8b0yPp2avGThviy/+OCJBI6OMpvxoUuiLvE6F1lebhw= -github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= -github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -389,31 +332,26 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lN github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/nfnt/resize v0.0.0-20160724205520-891127d8d1b5 h1:BvoENQQU+fZ9uukda/RzCAL/191HHwJA5b13R6diVlY= github.com/nfnt/resize v0.0.0-20160724205520-891127d8d1b5/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/oleiade/reflections v1.0.1 h1:D1XO3LVEYroYskEsoSiGItp9RUxG6jWnCVvrqH0HHQM= github.com/oleiade/reflections v1.0.1/go.mod h1:rdFxbxq4QXVZWj0F+e9jqjDkc7dbp97vkRixKo2JR60= -github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= -github.com/olekukonko/tablewriter v0.0.4 h1:vHD/YYe1Wolo78koG299f7V/VAS08c6IpCLn+Ejf/w8= -github.com/olekukonko/tablewriter v0.0.4/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs= github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/onsi/gomega v1.20.1 h1:PA/3qinGoukvymdIDV8pii6tiZgC8kbmJO6Z5+b002Q= github.com/onsi/gomega v1.20.1/go.mod h1:DtrZpjmvpn2mPm4YWQa0/ALMDj9v4YxLgojwPeREyVo= +github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/otiai10/copy v1.7.0/go.mod h1:rmRl6QPdJj6EiUqXQ/4Nn2lLXoNQjFCQbbNrxgc/t3U= github.com/otiai10/curr v0.0.0-20150429015615-9b4961190c95/go.mod h1:9qAhocn7zKJG+0mI8eUu6xqkFDYS2kb2saOteoSB3cE= github.com/otiai10/curr v1.0.0/go.mod h1:LskTG5wDwr8Rs+nNQ+1LlxRjAtTZZjtJW4rMXl6j4vs= github.com/otiai10/mint v1.3.0/go.mod h1:F5AjcsTsWUqX+Na9fpHb52P8pcRX2CI6A3ctIT91xUo= github.com/otiai10/mint v1.3.3/go.mod h1:/yxELlJQ0ufhjUwhshSj+wFjZ78CnZ48/1wtmBH1OTc= github.com/pascaldekloe/name v0.0.0-20180628100202-0fd16699aae1/go.mod h1:eD5JxqMiuNYyFNmyY9rkJ/slN8y59oEu4Ei7F8OoKWQ= -github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo= github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= @@ -433,19 +371,18 @@ github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c/go.mod h1:X2r9ue github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZFh4N3vQ5HEtld3S+Y+StULhWVvumU0= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= -github.com/pingcap/kvproto v0.0.0-20231222062942-c0c73f41d0b2 h1:364A6VCS+l0oHBKZKotX9LzmfEtIO/NTccTIQcPp3Ug= -github.com/pingcap/kvproto v0.0.0-20231222062942-c0c73f41d0b2/go.mod h1:rXxWk2UnwfUhLXha1jxRWPADw9eMZGWEWCg92Tgmb/8= +github.com/pingcap/kvproto v0.0.0-20240403065636-c699538f7aa1 h1:vDWWJKU6ztczn24XixahtLwcnJ15DOtSRIRM3jVtZNU= +github.com/pingcap/kvproto v0.0.0-20240403065636-c699538f7aa1/go.mod h1:rXxWk2UnwfUhLXha1jxRWPADw9eMZGWEWCg92Tgmb/8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8IDP+SZrdhV1Kibl9KrHxJ9eciw= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21 h1:QV6jqlfOkh8hqvEAgwBZa+4bSgO0EeKC7s5c6Luam2I= github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21/go.mod h1:QYnjfA95ZaMefyl1NO8oPtKeb8pYUdnDVhQgf+qdpjM= -github.com/pingcap/tidb-dashboard v0.0.0-20240111062855-41f7c8011953 h1:vY/bY5vkSvvuXB1030AUmy0LFhuEA53ryVdF/bTbFXU= -github.com/pingcap/tidb-dashboard v0.0.0-20240111062855-41f7c8011953/go.mod h1:ucZBRz52icb23T/5Z4CsuUHmarYiin7p2MeiVBe+o8c= +github.com/pingcap/tidb-dashboard v0.0.0-20240326110213-9768844ff5d7 h1:eFu98FbfJB7PKWOtkaV6YNXXJWqDhczQX56j/iucgU4= +github.com/pingcap/tidb-dashboard v0.0.0-20240326110213-9768844ff5d7/go.mod h1:ucZBRz52icb23T/5Z4CsuUHmarYiin7p2MeiVBe+o8c= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e h1:FBaTXU8C3xgt/drM58VHxojHo/QoG1oPsgWTGvaSpO4= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= -github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -454,28 +391,15 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b h1:0LFwY6Q3gMACTjAbMZBjXAqTOzOwFaj2Ld6cjeQ7Rig= github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= -github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= -github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= -github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk= -github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlkOQntgjkJWKrN5txjA= -github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= +github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= -github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= -github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= -github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.46.0 h1:doXzt5ybi1HBKpsZOL0sSkaNHJJqkyfEWZGGqqScV0Y= -github.com/prometheus/common v0.46.0/go.mod h1:Tp0qkxpb9Jsg54QMe+EAmqXkSV7Evdy1BTn+g2pa/hQ= -github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= -github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= -github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= -github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= +github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos= +github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8= +github.com/prometheus/common v0.51.1 h1:eIjN50Bwglz6a/c3hAgSMcofL3nD+nFQkV6Dd4DsQCw= +github.com/prometheus/common v0.51.1/go.mod h1:lrWtQx+iDfn2mbH5GUzlH9TSHyfZpHkSiG1W7y3sF2Q= +github.com/prometheus/procfs v0.13.0 h1:GqzLlQyfsPbaEHaQkO7tbDlriv/4o5Hudv6OXHGKX7o= +github.com/prometheus/procfs v0.13.0/go.mod h1:cd4PFCR54QLnGKPaKGA6l+cfuNXtht43ZKY6tow0Y1g= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= @@ -484,8 +408,9 @@ github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjR github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/rs/cors v1.7.0 h1:+88SsELBHx5r+hZ8TCkggzSstaWNbDvThkVK8H6f9ik= github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= -github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/samber/lo v1.37.0 h1:XjVcB8g6tgUp8rsPsJ2CvhClfImrpL04YpQHXeHPhRw= github.com/samber/lo v1.37.0/go.mod h1:9vaz2O4o8oOnK23pd2TrXufcbdbJIa3b6cstBWKpopA= github.com/sasha-s/go-deadlock v0.2.0 h1:lMqc+fUb7RrFS3gQLtoQsJ7/6TV/pAIFvBsqX73DK8Y= @@ -502,30 +427,22 @@ github.com/shoenig/test v0.6.3 h1:GVXWJFk9PiOjN0KoJ7VrJGH6uLPnqxR7/fe3HUPfE0c= github.com/shoenig/test v0.6.3/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0 h1:mj/nMDAwTBiaCqMEs4cYCqF7pO6Np7vhy1D1wcQGz+E= github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0/go.mod h1:919LwcH0M7/W4fcZ0/jy0qGght1GIhqyS/EgWGH2j5Q= -github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= -github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= -github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I= -github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= -github.com/smallnest/chanx v0.0.0-20221229104322-eb4c998d2072 h1:Txo4SXVJq/OgEjwgkWoxkMoTjGlcrgsQE/XSghjmu0w= -github.com/smallnest/chanx v0.0.0-20221229104322-eb4c998d2072/go.mod h1:+4nWMF0+CqEcU74SnX2NxaGqZ8zX4pcQ8Jcs77DbX5A= +github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/smallnest/chanx v1.2.1-0.20240521153536-01121e21ff99 h1:fmanhZtn5RKRljCjX46H+Q9/PECsHbflXm0RdrnK9e4= +github.com/smallnest/chanx v1.2.1-0.20240521153536-01121e21ff99/go.mod h1:+4nWMF0+CqEcU74SnX2NxaGqZ8zX4pcQ8Jcs77DbX5A= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= -github.com/soheilhy/cmux v0.1.4 h1:0HKaf1o97UwFjHH9o5XsHUOF+tqmdA7KEzXLpiyaw0E= -github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= -github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= -github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= -github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= -github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= -github.com/spf13/cobra v1.0.0 h1:6m/oheQuQ13N9ks4hubMG6BnvwOeaJrqSPLahSnczz8= -github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE= -github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= -github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js= +github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0= +github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= +github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= @@ -568,12 +485,10 @@ github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7Am github.com/tklauser/numcpus v0.3.0/go.mod h1:yFGUr7TUHQRAhyqBcEg0Ge34zDBAsIvJJcyE6boqnA8= github.com/tklauser/numcpus v0.6.0 h1:kebhY2Qt+3U6RNK7UqpYNA+tJ23IBEGKkB7JQBfDYms= github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4= -github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= -github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 h1:j6JEOq5QWFker+d7mFQYOhjTZonQ7YkLTHm56dbn+yM= -github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 h1:6fotK7otjonDflCTK0BCfls4SPy3NcCVb5dqqmbRknE= +github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75/go.mod h1:KO6IkyS8Y3j8OdNO85qEYBsRPuteD+YciPomcXdrMnk= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= -github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M= github.com/ugorji/go/codec v0.0.0-20181022190402-e5e69e061d4f/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= @@ -583,7 +498,6 @@ github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4d github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/unrolled/render v1.0.1 h1:VDDnQQVfBMsOsp3VaCJszSO0nkBIVEYoPWeRThk9spY= github.com/unrolled/render v1.0.1/go.mod h1:gN9T0NhL4Bfbwu8ann7Ry/TGHYfosul+J0obPf6NBdM= -github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/urfave/cli/v2 v2.3.0 h1:qph92Y649prgesehzOrQjdWyxFOp/QVM+6imKHad91M= github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI= github.com/urfave/negroni v0.3.0 h1:PaXOb61mWeZJxc1Ji2xJjpVg9QfPo0rrB+lHyBxGNSU= @@ -592,22 +506,19 @@ github.com/vmihailenco/msgpack/v5 v5.3.5 h1:5gO0H1iULLWGhs2H5tbAHIZTV8/cYafcFOr9 github.com/vmihailenco/msgpack/v5 v5.3.5/go.mod h1:7xyJ9e+0+9SaZT0Wt1RGleJXzli6Q/V5KbhBonMG9jc= github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= -github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= -github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= -github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510 h1:S2dVYn90KE98chqDkyE9Z4N61UnQd+KOfgp5Iu53llk= +github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg= github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= -go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= -go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= -go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= -go.etcd.io/etcd v0.5.0-alpha.5.0.20220915004622-85b640cee793 h1:fqmtdYQlwZ/vKWSz5amW+a4cnjg23ojz5iL7rjf08Wg= -go.etcd.io/etcd v0.5.0-alpha.5.0.20220915004622-85b640cee793/go.mod h1:eBhtbxXP1qpW0F6+WxoJ64DM1Mrfx46PHtVxEdkLe0I= +go.etcd.io/bbolt v1.3.9 h1:8x7aARPEXiXbHmtUwAIv7eV2fQFHrLLavdiJ3uzJXoI= +go.etcd.io/bbolt v1.3.9/go.mod h1:zaO32+Ti0PK1ivdPtgMESzuzL2VPoIG1PCQNvOdo/dE= +go.etcd.io/etcd v0.5.0-alpha.5.0.20240320135013-950cd5fbe6ca h1:LCc0GAhfJ+qDqnUbE7ybQ0mTz1dNRn2iiM6e183p/5E= +go.etcd.io/etcd v0.5.0-alpha.5.0.20240320135013-950cd5fbe6ca/go.mod h1:1AyK+XVcIwjbjw5EYrhT+IiMYSgRZTohGb2ceZ0/US8= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= -go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= @@ -620,8 +531,8 @@ go.uber.org/fx v1.12.0 h1:+1+3Cz9M0dFMPy9SW9XUIUHye8bnPUm7q7DroNGWYG4= go.uber.org/fx v1.12.0/go.mod h1:egT3Kyg1JFYQkvKLZ3EsykxkNrZxgXS+gKoKo7abERY= go.uber.org/goleak v0.10.0/go.mod h1:VCZuO8V8mFPlL0F5J5GK1rtHV3DrFcQ1R8ryq7FK0aI= go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= -go.uber.org/goleak v1.2.0 h1:xqgm/S+aQvhWFTtR0XK3Jvg7z8kGV8P4X14IzwN3Eqk= -go.uber.org/goleak v1.2.0/go.mod h1:XJYK+MuIchqpmGmUSAzotztawfKvYLUIgg7guXrwVUo= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.4.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= @@ -632,16 +543,15 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= -go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.12.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ= +go.uber.org/zap v1.18.1/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= -go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo= -go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= -golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190513172903-22d7a77e9e5f/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -650,9 +560,8 @@ golang.org/x/crypto v0.0.0-20200204104054-c9f3fb736b72/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc= -golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= +golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= +golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20230711005742-c3f37128e5a4 h1:QLureRX3moex6NVu/Lr4MGakp9FdA7sBHGBmvRW7NaM= golang.org/x/exp v0.0.0-20230711005742-c3f37128e5a4/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc= @@ -678,32 +587,30 @@ golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181005035420-146acd28ed58/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM= golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211123203042-d83791d6bcd9/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= -golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= +golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= +golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.16.0 h1:aDkGMBSYxElaoP81NpoUoz2oo2R2wHdZpGToUxfyQrQ= -golang.org/x/oauth2 v0.16.0/go.mod h1:hqZ+0LWXsiVoZpeld6jVt06P3adbS2Uu911W1SsJv2o= +golang.org/x/oauth2 v0.18.0 h1:09qnuIAgzdx1XplqJvW6CQqMCtGZykZWcXzPMPUusvI= +golang.org/x/oauth2 v0.18.0/go.mod h1:Wf7knwG0MPoWIMMBgFlEaSUDaKskp0dCfrlJRJXbBi8= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -713,16 +620,12 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ= -golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181228144115-9a3f9b0469bb/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -731,7 +634,6 @@ golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -739,6 +641,7 @@ golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -746,34 +649,33 @@ golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210816074244-15123e1e1f71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= -golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= -golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE= -golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY= +golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= +golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= -golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= @@ -791,7 +693,6 @@ golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20191114200427-caa0b0f7d508/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200225230052-807dcd883420/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20201125231158-b5590deeca9b/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= @@ -808,41 +709,40 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= -google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM= +google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20181004005441-af9cb2a35e7f/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20231030173426-d783a09b4405 h1:I6WNifs6pF9tNdSob2W24JtyxIYjzFB9qDlpUC76q+U= -google.golang.org/genproto v0.0.0-20231030173426-d783a09b4405/go.mod h1:3WDQMjmJk36UQhjQ89emUzb1mdaHcPeeAh4SCBKznB4= -google.golang.org/genproto/googleapis/api v0.0.0-20231016165738-49dd2c1f3d0b h1:CIC2YMXmIhYw6evmhPxBKJ4fmLbOFtXQN/GV3XOZR8k= -google.golang.org/genproto/googleapis/api v0.0.0-20231016165738-49dd2c1f3d0b/go.mod h1:IBQ646DjkDkvUIsVq/cc03FUFQ9wbZu7yE396YcL870= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 h1:Jyp0Hsi0bmHXG6k9eATXoYtjd6e2UzZ1SCn/wIupY14= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17/go.mod h1:oQ5rr10WTTMvP4A36n8JpR1OrO1BEiV4f78CneXZxkA= +google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda h1:wu/KJm9KJwpfHWhkkZGohVC6KRrc1oJNr4jwtQMOQXw= +google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda/go.mod h1:g2LLCvCeCSir/JJSWosk19BR4NVxGqHUC6rxIRsd7Aw= +google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda h1:b6F6WIV4xHHD0FA4oIyzU6mHWg2WI2X1RBehwa5QN38= +google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda/go.mod h1:AHcE/gZH76Bk/ROZhQphlRoWo5xKDEtz3eVEO1LfA8c= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda h1:LI5DOvAxUPMv/50agcLLoo+AdWc1irS9Rzz4vPuD1V4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.24.0/go.mod h1:XDChyiUovWa60DnaeDeZmSW86xtLtjtZbwvSiRnRtcA= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= -google.golang.org/grpc v1.26.0 h1:2dTRdpdFEEhJYQD8EMLB61nnrzSCTbG38PhqdhvOltg= -google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= +google.golang.org/grpc v1.62.1 h1:B4n+nfKzOICUXMgyrNd19h/I9oH0L1pizfk1d4zSgTk= +google.golang.org/grpc v1.62.1/go.mod h1:IWTG0VlJLCh1SkC58F7np9ka9mx/WNkjl4PGJaiq+QE= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= -google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= -gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= @@ -851,10 +751,8 @@ gopkg.in/go-playground/validator.v8 v8.18.2/go.mod h1:RX2a/7Ha8BgOhfk7j780h4/u/R gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc= gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc= -gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= -gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= @@ -889,10 +787,8 @@ gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= -k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I= moul.io/zapgorm2 v1.1.0 h1:qwAlMBYf+qJkJ7PAzJl4oCe6eS6QGiKAXUPeis0+RBE= moul.io/zapgorm2 v1.1.0/go.mod h1:emRfKjNqSzVj5lcgasBdovIXY1jSOwFz2GQZn1Rddks= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= -sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= -sigs.k8s.io/yaml v1.2.0 h1:kr/MCeFWJWTwyaHoR9c8EjH9OumOmoF9YGiZd7lFm/Q= -sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/metrics/alertmanager/pd.rules.yml b/metrics/alertmanager/pd.rules.yml index cf8cea9c3b4..5d51dc4a1c5 100644 --- a/metrics/alertmanager/pd.rules.yml +++ b/metrics/alertmanager/pd.rules.yml @@ -195,3 +195,27 @@ groups: description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values:{{ $value }}' value: '{{ $value }}' summary: PD_cluster_slow_tikv_nums + + - alert: PD_cpu_quota + expr: irate(process_cpu_seconds_total{job="pd"}[30s]) / pd_service_maxprocs > 0.8 + for: 45s + labels: + env: ENV_LABELS_ENV + level: warning + expr: irate(process_cpu_seconds_total{job="pd"}[30s]) / pd_service_maxprocs > 0.8 + annotations: + description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values:{{ $value }}' + value: '{{ $value }}' + summary: PD CPU usage is over 80% of CPU quota + + - alert: PD_memory_quota + expr: process_resident_memory_bytes{job="pd"} / pd_service_memory_quota_bytes > 0.8 + for: 15s + labels: + env: ENV_LABELS_ENV + level: warning + expr: process_resident_memory_bytes{job="pd"} / pd_service_memory_quota_bytes > 0.8 + annotations: + description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values:{{ $value }}' + value: '{{ $value }}' + summary: PD memory usage is over 80% of memory quota diff --git a/metrics/grafana/pd.json b/metrics/grafana/pd.json index 3a95191747c..abfe049b905 100644 --- a/metrics/grafana/pd.json +++ b/metrics/grafana/pd.json @@ -1151,7 +1151,7 @@ "fillGradient": 0, "gridPos": { "h": 6, - "w": 4, + "w": 8, "x": 16, "y": 13 }, @@ -1218,7 +1218,6 @@ }, "yaxes": [ { - "$$hashKey": "object:192", "format": "short", "label": null, "logBase": 1, @@ -1227,7 +1226,6 @@ "show": true }, { - "$$hashKey": "object:193", "format": "short", "label": null, "logBase": 1, @@ -1738,7 +1736,7 @@ "tableColumn": "idalloc", "targets": [ { - "expr": "max(pd_cluster_id{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"idalloc\"})", + "expr": "max(pd_cluster_id{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"idalloc\"})by(type)", "format": "time_series", "hide": false, "instant": true, @@ -1809,7 +1807,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(process_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*pd.*\"}[30s])", + "expr": "irate(process_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*(pd|tso|scheduling).*\"}[30s])", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -1819,10 +1817,10 @@ }, { "exemplar": true, - "expr": "pd_service_maxprocs{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*pd.*\"}", + "expr": "pd_service_maxprocs{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*(pd|tso|scheduling).*\"}", "hide": false, "interval": "", - "legendFormat": "{{job}}-{{instance}}-limit", + "legendFormat": "quota-{{job}}-{{instance}}", "refId": "B" } ], @@ -1917,7 +1915,7 @@ "steppedLine": false, "targets": [ { - "expr": "process_resident_memory_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*pd.*\"}", + "expr": "process_resident_memory_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*(pd|tso|scheduling).*\"}", "format": "time_series", "hide": false, "interval": "", @@ -1927,46 +1925,53 @@ "step": 4 }, { - "expr": "go_memstats_heap_sys_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*pd.*\"}", + "expr": "go_memstats_heap_sys_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*(pd|tso|scheduling).*\"}", "hide": true, "interval": "", "legendFormat": "HeapSys-{{job}}-{{instance}}", "refId": "B" }, { - "expr": "go_memstats_heap_inuse_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*pd.*\"}", + "expr": "go_memstats_heap_inuse_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*(pd|tso|scheduling).*\"}", "hide": false, "interval": "", "legendFormat": "HeapInuse-{{job}}-{{instance}}", "refId": "C" }, { - "expr": "go_memstats_heap_alloc_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*pd.*\"}", + "expr": "go_memstats_heap_alloc_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*(pd|tso|scheduling).*\"}", "hide": true, "interval": "", "legendFormat": "HeapAlloc-{{job}}-{{instance}}", "refId": "D" }, { - "expr": "go_memstats_heap_idle_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*pd.*\"}", + "expr": "go_memstats_heap_idle_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*(pd|tso|scheduling).*\"}", "hide": true, "interval": "", "legendFormat": "HeapIdle-{{job}}-{{instance}}", "refId": "E" }, { - "expr": "go_memstats_heap_released_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*pd.*\"}", + "expr": "go_memstats_heap_released_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*(pd|tso|scheduling).*\"}", "hide": true, "interval": "", "legendFormat": "HeapReleased-{{job}}-{{instance}}", "refId": "F" }, { - "expr": "go_memstats_next_gc_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*pd.*\"}", + "expr": "go_memstats_next_gc_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*(pd|tso|scheduling).*\"}", "hide": true, "interval": "", "legendFormat": "GCTrigger-{{job}}-{{instance}}", "refId": "G" + }, + { + "expr": "pd_service_memory_quota_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*pd.*\"}", + "hide": false, + "interval": "", + "legendFormat": "quota-{{job}}-{{instance}}", + "refId": "H" } ], "thresholds": [], @@ -2059,7 +2064,7 @@ "steppedLine": false, "targets": [ { - "expr": "(time() - process_start_time_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*pd.*\"})", + "expr": "(time() - process_start_time_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*(pd|tso|scheduling).*\"})", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -2091,7 +2096,7 @@ { "format": "dtdurations", "label": null, - "logBase": 1, + "logBase": 2, "max": null, "min": "0", "show": true @@ -2158,7 +2163,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_goroutines{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*pd.*\"}", + "expr": "go_goroutines{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",job=~\".*(pd|tso|scheduling).*\"}", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -2277,7 +2282,7 @@ "tableColumn": "tso", "targets": [ { - "expr": "max(pd_cluster_tso{type=\"tso\", dc=\"global\"})", + "expr": "max(pd_cluster_tso{type=\"tso\", dc=\"global\"})by(type)", "format": "time_series", "instant": true, "interval": "", @@ -2581,7 +2586,7 @@ "tableColumn": "tso", "targets": [ { - "expr": "max(pd_cluster_tso{type=\"tso\", dc=\"global\"})", + "expr": "max(pd_cluster_tso{type=\"tso\", dc=\"global\"})by(type)", "format": "time_series", "instant": true, "interval": "", @@ -2664,7 +2669,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_schedule_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", event=\"create\"}[1m])) by (type)", + "expr": "sum(rate(pd_schedule_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", event=\"create\"}[1m])*60) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type}}", @@ -2757,7 +2762,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_schedule_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", event=\"check\"}[1m])) by (type)", + "expr": "sum(rate(pd_schedule_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", event=\"check\"}[1m])*60) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type}}", @@ -2850,7 +2855,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_schedule_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", event=\"finish\"}[1m])) by (type)", + "expr": "sum(rate(pd_schedule_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", event=\"finish\"}[1m])*60) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type}}", @@ -2942,7 +2947,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_schedule_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", event=\"timeout\"}[1m])) by (type)", + "expr": "sum(rate(pd_schedule_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", event=\"timeout\"}[1m])*60) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type}}", @@ -3035,7 +3040,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_schedule_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", event=\"cancel\"}[1m])) by (type)", + "expr": "sum(rate(pd_schedule_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", event=\"cancel\"}[1m])*60) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type}}", @@ -3043,7 +3048,7 @@ "step": 4 }, { - "expr": "sum(delta(pd_schedule_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", event=\"replace\"}[1m])) by (type)", + "expr": "sum(rate(pd_schedule_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", event=\"replace\"}[1m])*60) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type}}", @@ -3136,7 +3141,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_schedule_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (event)", + "expr": "sum(rate(pd_schedule_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])*60) by (event)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{event}}", @@ -3428,7 +3433,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_schedule_operator_limit{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (type,name)", + "expr": "sum(rate(pd_schedule_operator_limit{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])*60) by (type,name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type}}-{{name}}", @@ -3436,7 +3441,7 @@ }, { "exemplar": true, - "expr": "sum(delta(pd_schedule_operator_exceeded_store_limit{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (desc)", + "expr": "sum(rate(pd_schedule_operator_exceeded_store_limit{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])*60) by (desc)", "hide": true, "interval": "", "legendFormat": "{{desc}}-exceed-store-limit", @@ -5874,6 +5879,21 @@ "intervalFactor": 1, "legendFormat": "store-{{store}}-in", "refId": "B" + }, + { + "expr": "- sum(delta(pd_scheduler_hot_region_direction{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\",type=\"move-leader\",direction=\"out\",rw=\"write\"}[1m]))by (store)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "store-{{store}}-out", + "refId": "C", + "step": 4 + }, + { + "expr": "sum(delta(pd_scheduler_hot_region_direction{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\",type=\"move-leader\",direction=\"in\",rw=\"write\"}[1m]))by (store)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "store-{{store}}-in", + "refId": "D" } ], "thresholds": [], @@ -7775,7 +7795,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_scheduler_balance_direction{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (type, source, target)", + "expr": "sum(rate(pd_scheduler_balance_direction{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])*60) by (type, source, target)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{source}}-{{target}}-{{type}}", @@ -7804,7 +7824,7 @@ }, "yaxes": [ { - "format": "ops", + "format": "opm", "label": null, "logBase": 1, "max": null, @@ -7887,7 +7907,8 @@ "tableColumn": "", "targets": [ { - "expr": "pd_checker_patrol_regions_time{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"} != 0", + "expr": "max(max(pd_checker_patrol_regions_time{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"})by(instance))", + "legendFormat": "{{instance}}", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -8233,7 +8254,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_schedule_filter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", action=\"filter-source\", type!=\"store-state-tombstone-filter\"}[1m])) by (source, type, scope)", + "expr": "sum(rate(pd_schedule_filter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", action=\"filter-source\", type!=\"store-state-tombstone-filter\"}[1m])*60) by (source, type, scope)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{scope}}-store-{{source}}-{{type}}", @@ -8262,7 +8283,7 @@ }, "yaxes": [ { - "format": "ops", + "format": "opm", "label": null, "logBase": 1, "max": null, @@ -8329,7 +8350,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_schedule_filter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", action=\"filter-target\", type!=\"store-state-tombstone-filter\"}[1m])) by (target, type, scope)", + "expr": "sum(rate(pd_schedule_filter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", action=\"filter-target\", type!=\"store-state-tombstone-filter\"}[1m])*60) by (target, type, scope)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{scope}}-store-{{target}}-{{type}}", @@ -8338,7 +8359,7 @@ "step": 4 }, { - "expr": "sum(delta(pd_schedule_filter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", action=\"filter-target\",type=\"distinct-filter\"}[1m])) by (source, target, type, scope)", + "expr": "sum(rate(pd_schedule_filter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", action=\"filter-target\",type=\"distinct-filter\"}[1m])*60) by (source, target, type, scope)", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -8346,7 +8367,7 @@ "refId": "B" }, { - "expr": "sum(delta(pd_schedule_filter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", action=\"filter-target\",type=\"rule-fit-filter\"}[1m])) by (source, target, type, scope)", + "expr": "sum(rate(pd_schedule_filter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", action=\"filter-target\",type=\"rule-fit-filter\"}[1m])*60) by (source, target, type, scope)", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -8354,7 +8375,7 @@ "refId": "C" }, { - "expr": "sum(delta(pd_schedule_filter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", action=\"filter-target\",type=\"rule-fit-leader-filter\"}[1m])) by (source, target, type, scope)", + "expr": "sum(rate(pd_schedule_filter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", action=\"filter-target\",type=\"rule-fit-leader-filter\"}[1m])*60) by (source, target, type, scope)", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -8382,7 +8403,7 @@ }, "yaxes": [ { - "format": "ops", + "format": "opm", "label": null, "logBase": 1, "max": null, @@ -8460,21 +8481,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_schedule_scatter_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"skip\"}[1m])) by (event)", + "expr": "sum(rate(pd_schedule_scatter_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"skip\"}[1m])*60) by (event)", "format": "time_series", "intervalFactor": 2, "legendFormat": "skip-{{event}}", "refId": "A" }, { - "expr": "delta(pd_schedule_scatter_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"fail\"}[1m])", + "expr": "rate(pd_schedule_scatter_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"fail\"}[1m])*60", "format": "time_series", "intervalFactor": 2, "legendFormat": "fail", "refId": "B" }, { - "expr": "delta(pd_schedule_scatter_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"success\"}[1m])", + "expr": "rate(pd_schedule_scatter_operators_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"success\"}[1m])*60", "format": "time_series", "intervalFactor": 2, "legendFormat": "success", @@ -8563,14 +8584,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_schedule_scatter_distribution{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", engine=\"tikv\", is_leader=\"false\"}[1m])) by (store)", + "expr": "sum(rate(pd_schedule_scatter_distribution{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", engine=\"tikv\", is_leader=\"false\"}[1m])*60) by (store)", "format": "time_series", "intervalFactor": 1, "legendFormat": "peer-{{store}}", "refId": "A" }, { - "expr": "sum(delta(pd_schedule_scatter_distribution{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", engine=\"tikv\", is_leader=\"true\"}[1m])) by (store)", + "expr": "sum(rate(pd_schedule_scatter_distribution{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", engine=\"tikv\", is_leader=\"true\"}[1m])*60) by (store)", "format": "time_series", "intervalFactor": 1, "legendFormat": "leader-{{store}}", @@ -9289,7 +9310,7 @@ "steppedLine": false, "targets": [ { - "expr": "etcd_mvcc_db_total_size_in_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", job=\"pd\"}", + "expr": "etcd_mvcc_db_total_size_in_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\".*pd.*\"}", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -9297,7 +9318,7 @@ "refId": "A" }, { - "expr": "etcd_mvcc_db_total_size_in_use_in_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", job=\"pd\"}", + "expr": "etcd_mvcc_db_total_size_in_use_in_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\".*pd.*\"}", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -9747,7 +9768,7 @@ "query": { "datasourceId": 1, "model": { - "expr": "delta(etcd_disk_wal_fsync_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", job=\"pd\"}[1m])", + "expr": "rate(etcd_disk_wal_fsync_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", job=\"pd\"}[1m])*60", "intervalFactor": 2, "legendFormat": "{{instance}} etch disk wal fsync rate", "refId": "A", @@ -9818,7 +9839,7 @@ "steppedLine": false, "targets": [ { - "expr": "delta(etcd_disk_wal_fsync_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", job=\"pd\"}[1m])", + "expr": "rate(etcd_disk_wal_fsync_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", job=\"pd\"}[1m])*60", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -10017,7 +10038,7 @@ "steppedLine": false, "targets": [ { - "expr": "delta(etcd_disk_backend_commit_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])", + "expr": "rate(etcd_disk_backend_commit_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])*60", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -10066,6 +10087,188 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed of etcd endpoint health check in .99", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 53 + }, + "hiddenSeries": false, + "id": 1607, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "8.5.27", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(pd_server_etcd_endpoint_latency_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\", source=\"server-etcd-client\"}[30s])) by (instance, endpoint, le))", + "intervalFactor": 2, + "legendFormat": "{{instance}} -> {{endpoint}}", + "range": true, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "99% Endpoint health check latency", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The state of the endpoint health.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 61 + }, + "hiddenSeries": false, + "id": 1110, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "8.5.27", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "editorMode": "code", + "expr": "pd_server_etcd_client{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\", source=\"server-etcd-client\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} - {{type}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Endpoint health state", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, { "aliasColors": {}, "bars": false, @@ -10078,7 +10281,7 @@ "h": 8, "w": 8, "x": 0, - "y": 53 + "y": 69 }, "id": 1109, "legend": { @@ -10169,7 +10372,7 @@ "h": 8, "w": 8, "x": 8, - "y": 53 + "y": 69 }, "id": 1110, "legend": { @@ -10261,7 +10464,7 @@ "h": 8, "w": 8, "x": 16, - "y": 53 + "y": 69 }, "id": 1111, "legend": { @@ -10431,18 +10634,38 @@ "step": 2 }, { - "expr": "histogram_quantile(0.99999, sum(rate(pd_server_handle_tso_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", + "expr": "histogram_quantile(0.90, sum(rate(tso_server_handle_tso_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "99.999% tso", - "refId": "D" + "hide": false, + "intervalFactor": 2, + "legendFormat": "90% tso", + "refId": "D", + "step": 2 + }, + { + "expr": "histogram_quantile(0.99, sum(rate(tso_server_handle_tso_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "99% tso", + "refId": "E", + "step": 2 + }, + { + "expr": "histogram_quantile(0.999, sum(rate(tso_server_handle_tso_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "99.9% tso", + "refId": "F", + "step": 2 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "PD server TSO handle time", + "title": "PD server TSO handle duration", "tooltip": { "msResolution": false, "shared": true, @@ -10529,26 +10752,42 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.98, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", - "hide": false, + "expr": "avg(rate(pd_client_request_handle_requests_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type) / avg(rate(pd_client_request_handle_requests_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type)", "intervalFactor": 2, - "legendFormat": "{{type}} 98th percentile", + "legendFormat": "avg {{type}}", "refId": "A", "step": 2 }, { - "expr": "avg(rate(pd_client_request_handle_requests_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type) / avg(rate(pd_client_request_handle_requests_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type)", + "expr": "histogram_quantile(0.90, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", + "hide": false, "intervalFactor": 2, - "legendFormat": "{{type}} average", + "legendFormat": "90% {{type}}", "refId": "B", "step": 2 + }, + { + "expr": "histogram_quantile(0.99, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", + "hide": false, + "intervalFactor": 2, + "legendFormat": "99% {{type}}", + "refId": "C", + "step": 2 + }, + { + "expr": "histogram_quantile(0.999, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", + "hide": false, + "intervalFactor": 2, + "legendFormat": "99.9% {{type}}", + "refId": "D", + "step": 2 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Handle requests duration", + "title": "PD client requests handle duration", "tooltip": { "msResolution": false, "shared": true, @@ -10648,6 +10887,15 @@ "hide": true, "refId": "B", "step": 2 + }, + { + "expr": "sum(rate(tso_server_handle_tso_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (type)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "tso request/secs", + "refId": "C", + "step": 2 } ], "thresholds": [], @@ -10756,7 +11004,7 @@ }, { "exemplar": true, - "expr": "sum(delta(pd_client_request_handle_tso_batch_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) / sum(delta(pd_client_request_handle_tso_batch_size_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m]))", + "expr": "sum(rate(pd_client_request_handle_tso_batch_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) / sum(rate(pd_client_request_handle_tso_batch_size_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m]))", "hide": false, "interval": "", "intervalFactor": 1, @@ -10924,10 +11172,15 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The region heartbeat handle duration in .99", + "description": "The region heartbeat handle duration by levels", "editable": true, "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "fill": 0, + "fillGradient": 0, "grid": {}, "gridPos": { "h": 8, @@ -10935,7 +11188,8 @@ "x": 12, "y": 23 }, - "id": 1302, + "hiddenSeries": false, + "id": 1610, "legend": { "alignAsTable": true, "avg": false, @@ -10953,8 +11207,12 @@ "linewidth": 1, "links": [], "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, "paceLength": 10, "percentage": false, + "pluginVersion": "7.5.17", "pointradius": 5, "points": false, "renderer": "flot", @@ -10964,20 +11222,46 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(pd_scheduler_handle_region_heartbeat_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\"}[1m])) by (address, store, le))", + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(pd_scheduler_handle_region_heartbeat_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\"}[1m])) by (le))", "format": "time_series", "hide": false, + "interval": "", "intervalFactor": 2, - "legendFormat": "{{address}}-store-{{store}}", + "legendFormat": "0.99", "refId": "A", "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.9, sum(rate(pd_scheduler_handle_region_heartbeat_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\"}[1m])) by (le))", + "hide": false, + "interval": "", + "legendFormat": "0.9", + "refId": "B" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.8, sum(rate(pd_scheduler_handle_region_heartbeat_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\"}[1m])) by (le))", + "hide": false, + "interval": "", + "legendFormat": "0.8", + "refId": "C" + }, + { + "exemplar": true, + "expr": "sum(rate(pd_scheduler_handle_region_heartbeat_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\"}[1m])) / sum(rate(pd_scheduler_handle_region_heartbeat_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "avg", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "99% Region heartbeat handle latency", + "title": "Region heartbeat handle latency overview", "tooltip": { "msResolution": false, "shared": true, @@ -11139,7 +11423,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of the heartbeats which each TiKV instance reports to PD", + "description": "The region heartbeat handle duration in .99 by store", "editable": true, "error": false, "fill": 0, @@ -11150,6 +11434,103 @@ "x": 12, "y": 31 }, + "id": 1302, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(pd_scheduler_handle_region_heartbeat_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\"}[1m])) by (address, store, le))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{address}}-store-{{store}}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "99% Region heartbeat handle latency by store", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The count of the heartbeats which each TiKV instance reports to PD", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 39 + }, "id": 1304, "legend": { "alignAsTable": true, @@ -11179,7 +11560,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_scheduler_region_heartbeat{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", type=\"report\", status=\"ok\"}[1m])) by (address, store)", + "expr": "sum(rate(pd_scheduler_region_heartbeat{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", type=\"report\", status=\"ok\"}[1m])*60) by (address, store)", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -11230,6 +11611,359 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The breakdown metric about heartbeat", + "editable": true, + "error": false, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 39 + }, + "hiddenSeries": false, + "id": 1335, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "8.5.27", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "WaitRegionsLock", + "bars": false, + "lines": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "WaitSubRegionsLock", + "bars": false, + "lines": true, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(pd_core_region_heartbeat_breakdown_handle_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\",tidb_cluster=~\"$tidb_cluster.*\"}[1m])) by (name)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "range": true, + "refId": "A", + "step": 4 + }, + { + "expr": "sum(rate(pd_core_acquire_regions_lock_wait_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\",tidb_cluster=~\"$tidb_cluster.*\"}[1m])) by (type)", + "hide": false, + "legendFormat": "{{type}}", + "range": true, + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Heartbeat Performance Duration BreakDown (Accumulation)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "s", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The count of the heartbeats which pending in the task queue.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 47 + }, + "hiddenSeries": false, + "id": 1609, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/max-wait-duration.*/", + "bars": true, + "lines": false, + "transform": "negative-Y", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "pd_ratelimit_runner_pending_tasks{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{task_type}}_{{runner_name}}", + "refId": "A", + "step": 4 + }, + { + "exemplar": true, + "expr": "pd_ratelimit_runner_task_max_waiting_duration_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "hide": false, + "interval": "", + "legendFormat": "max-wait-duration-{{runner_name}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Concurrent Runner Pending Task", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The count of the heartbeats which faileds in the task queue.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 47 + }, + "hiddenSeries": false, + "id": 1608, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "rate(pd_ratelimit_runner_failed_tasks_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])*60", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "failed-tasks-{{runner_name}}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Concurrent Runner Failed Task", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "opm", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": {}, "bars": false, @@ -11245,7 +11979,7 @@ "h": 8, "w": 12, "x": 0, - "y": 39 + "y": 55 }, "id": 1305, "legend": { @@ -11339,7 +12073,7 @@ "h": 8, "w": 12, "x": 12, - "y": 39 + "y": 55 }, "id": 1306, "legend": { @@ -11369,7 +12103,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_scheduler_region_heartbeat{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", type=\"report\", status=\"err\"}[1m])) by (address, store)", + "expr": "sum(rate(pd_scheduler_region_heartbeat{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", type=\"report\", status=\"err\"}[1m])*60) by (address, store)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{address}}-store-{{store}}", @@ -11429,7 +12163,7 @@ "h": 8, "w": 12, "x": 0, - "y": 47 + "y": 63 }, "id": 1307, "legend": { @@ -11459,7 +12193,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_scheduler_region_heartbeat{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", type=\"report\", status=\"bind\"}[1m])) by (address, store)", + "expr": "sum(rate(pd_scheduler_region_heartbeat{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", type=\"report\", status=\"bind\"}[1m])*60) by (address, store)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{address}}-store-{{store}}", @@ -11522,7 +12256,7 @@ "h": 8, "w": 12, "x": 12, - "y": 47 + "y": 63 }, "id": 1308, "legend": { @@ -11619,7 +12353,7 @@ "h": 8, "w": 12, "x": 0, - "y": 55 + "y": 71 }, "id": 1309, "legend": { @@ -11650,7 +12384,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_hbstream_region_message{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", type=\"push\", status=\"ok\"}[1m])) by (address, store)", + "expr": "sum(rate(pd_hbstream_region_message{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", type=\"push\", status=\"ok\"}[1m])*60) by (address, store)", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -11716,7 +12450,7 @@ "h": 8, "w": 12, "x": 12, - "y": 55 + "y": 71 }, "id": 1310, "legend": { @@ -11747,7 +12481,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_hbstream_region_message{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", type=\"push\", status=\"err\"}[1m])) by (address, store)", + "expr": "sum(rate(pd_hbstream_region_message{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", type=\"push\", status=\"err\"}[1m])*60) by (address, store)", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -11813,7 +12547,7 @@ "h": 8, "w": 12, "x": 0, - "y": 63 + "y": 79 }, "id": 1311, "legend": { @@ -11910,7 +12644,7 @@ "h": 8, "w": 12, "x": 12, - "y": 63 + "y": 79 }, "id": 1312, "legend": { @@ -12051,7 +12785,7 @@ "pluginVersion": "7.1.5", "targets": [ { - "expr": "sum(delta(pd_scheduler_read_byte_hist_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le)", + "expr": "sum(rate(pd_scheduler_read_byte_hist_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le)", "format": "heatmap", "hide": false, "interval": "", @@ -12109,7 +12843,7 @@ "pluginVersion": "7.1.5", "targets": [ { - "expr": "sum(delta(pd_scheduler_write_byte_hist_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le)", + "expr": "sum(rate(pd_scheduler_write_byte_hist_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le)", "format": "heatmap", "hide": false, "interval": "", @@ -12169,7 +12903,7 @@ "pluginVersion": "7.1.5", "targets": [ { - "expr": "sum(delta(pd_scheduler_read_key_hist_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le)", + "expr": "sum(rate(pd_scheduler_read_key_hist_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le)", "format": "heatmap", "hide": false, "interval": "", @@ -12229,7 +12963,7 @@ "pluginVersion": "7.1.5", "targets": [ { - "expr": "sum(delta(pd_scheduler_write_key_hist_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le)", + "expr": "sum(rate(pd_scheduler_write_key_hist_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le)", "format": "heatmap", "hide": false, "interval": "", @@ -12291,7 +13025,7 @@ "repeatDirection": "h", "targets": [ { - "expr": "sum(delta(pd_scheduler_store_heartbeat_interval_hist_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le)", + "expr": "sum(rate(pd_scheduler_store_heartbeat_interval_hist_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le)", "format": "heatmap", "hide": false, "interval": "", @@ -12353,7 +13087,7 @@ "repeatDirection": "h", "targets": [ { - "expr": "sum(delta(pd_scheduler_region_heartbeat_interval_hist_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le)", + "expr": "sum(rate(pd_scheduler_region_heartbeat_interval_hist_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le)", "format": "heatmap", "hide": false, "interval": "", @@ -12402,7 +13136,7 @@ "repeatDirection": "h", "targets": [ { - "expr": "sum(delta(pd_server_bucket_report_interval_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\"}[1m])) by (le)", + "expr": "sum(rate(pd_server_bucket_report_interval_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\"}[1m])) by (le)", "format": "heatmap", "hide": false, "interval": "", @@ -12451,7 +13185,7 @@ "repeatDirection": "h", "targets": [ { - "expr": "sum(delta(pd_scheduler_buckets_hot_degree_hist_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\"}[1m])) by (le)", + "expr": "sum(rate(pd_scheduler_buckets_hot_degree_hist_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\"}[1m])) by (le)", "format": "heatmap", "hide": false, "interval": "", @@ -12726,7 +13460,7 @@ "id": 1601, "options": { "colorMode": "value", - "graphMode": "area", + "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { @@ -12743,7 +13477,7 @@ "targets": [ { "exemplar": true, - "expr": "pd_replication_dr_state{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=\"$instance\"}", + "expr": "max(pd_replication_dr_state{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"})", "instant": false, "interval": "", "legendFormat": "{{instance}}", @@ -12911,7 +13645,7 @@ "targets": [ { "exemplar": true, - "expr": "rate(pd_replication_dr_tick_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=\"$instance\"}[5m])", + "expr": "rate(pd_replication_dr_tick_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[5m])", "instant": false, "interval": "", "legendFormat": "{{instance}}", diff --git a/pkg/audit/audit.go b/pkg/audit/audit.go index b971b09ed7e..f84d035f8c9 100644 --- a/pkg/audit/audit.go +++ b/pkg/audit/audit.go @@ -118,7 +118,7 @@ func NewLocalLogBackend(before bool) Backend { } // ProcessHTTPRequest is used to implement audit.Backend -func (l *LocalLogBackend) ProcessHTTPRequest(r *http.Request) bool { +func (*LocalLogBackend) ProcessHTTPRequest(r *http.Request) bool { requestInfo, ok := requestutil.RequestInfoFrom(r.Context()) if !ok { return false diff --git a/pkg/audit/audit_test.go b/pkg/audit/audit_test.go index 8098b36975e..9066d81ebe3 100644 --- a/pkg/audit/audit_test.go +++ b/pkg/audit/audit_test.go @@ -32,7 +32,6 @@ import ( ) func TestLabelMatcher(t *testing.T) { - t.Parallel() re := require.New(t) matcher := &LabelMatcher{"testSuccess"} labels1 := &BackendLabels{Labels: []string{"testFail", "testSuccess"}} @@ -42,7 +41,6 @@ func TestLabelMatcher(t *testing.T) { } func TestPrometheusHistogramBackend(t *testing.T) { - t.Parallel() re := require.New(t) serviceAuditHistogramTest := prometheus.NewHistogramVec( prometheus.HistogramOpts{ @@ -90,7 +88,6 @@ func TestPrometheusHistogramBackend(t *testing.T) { } func TestLocalLogBackendUsingFile(t *testing.T) { - t.Parallel() re := require.New(t) backend := NewLocalLogBackend(true) fname := testutil.InitTempFileLogger("info") diff --git a/pkg/autoscaling/calculation.go b/pkg/autoscaling/calculation.go index d85af498e47..8c8783dd618 100644 --- a/pkg/autoscaling/calculation.go +++ b/pkg/autoscaling/calculation.go @@ -409,7 +409,7 @@ func buildPlans(planMap map[string]map[string]struct{}, resourceTypeMap map[stri } // TODO: implement heterogeneous logic and take cluster information into consideration. -func findBestGroupToScaleIn(strategy *Strategy, scaleInQuota float64, groups []*Plan) Plan { +func findBestGroupToScaleIn(_ *Strategy, _ float64, groups []*Plan) Plan { return *groups[0] } diff --git a/pkg/autoscaling/calculation_test.go b/pkg/autoscaling/calculation_test.go index 85f723b562c..9eb4ad648df 100644 --- a/pkg/autoscaling/calculation_test.go +++ b/pkg/autoscaling/calculation_test.go @@ -29,7 +29,6 @@ import ( ) func TestGetScaledTiKVGroups(t *testing.T) { - t.Parallel() re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -70,7 +69,7 @@ func TestGetScaledTiKVGroups(t *testing.T) { informer core.StoreSetInformer healthyInstances []instance expectedPlan []*Plan - errorChecker func(err error, msgAndArgs ...interface{}) + errorChecker func(err error, msgAndArgs ...any) }{ { name: "no scaled tikv group", @@ -204,7 +203,7 @@ func TestGetScaledTiKVGroups(t *testing.T) { type mockQuerier struct{} -func (q *mockQuerier) Query(options *QueryOptions) (QueryResult, error) { +func (*mockQuerier) Query(options *QueryOptions) (QueryResult, error) { result := make(QueryResult) for _, addr := range options.addresses { result[addr] = mockResultValue @@ -214,7 +213,6 @@ func (q *mockQuerier) Query(options *QueryOptions) (QueryResult, error) { } func TestGetTotalCPUUseTime(t *testing.T) { - t.Parallel() re := require.New(t) querier := &mockQuerier{} instances := []instance{ @@ -237,7 +235,6 @@ func TestGetTotalCPUUseTime(t *testing.T) { } func TestGetTotalCPUQuota(t *testing.T) { - t.Parallel() re := require.New(t) querier := &mockQuerier{} instances := []instance{ @@ -260,7 +257,6 @@ func TestGetTotalCPUQuota(t *testing.T) { } func TestScaleOutGroupLabel(t *testing.T) { - t.Parallel() re := require.New(t) var jsonStr = []byte(` { @@ -303,7 +299,6 @@ func TestScaleOutGroupLabel(t *testing.T) { } func TestStrategyChangeCount(t *testing.T) { - t.Parallel() re := require.New(t) var count uint64 = 2 strategy := &Strategy{ diff --git a/pkg/autoscaling/handler.go b/pkg/autoscaling/handler.go index ea248fdcc55..7bffa8ec156 100644 --- a/pkg/autoscaling/handler.go +++ b/pkg/autoscaling/handler.go @@ -41,22 +41,22 @@ func NewHTTPHandler(svr *server.Server, rd *render.Render) *HTTPHandler { func (h *HTTPHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { rc := h.svr.GetRaftCluster() if rc == nil { - h.rd.JSON(w, http.StatusInternalServerError, errs.ErrNotBootstrapped.FastGenByArgs().Error()) + _ = h.rd.JSON(w, http.StatusInternalServerError, errs.ErrNotBootstrapped.FastGenByArgs().Error()) return } data, err := io.ReadAll(r.Body) r.Body.Close() if err != nil { - h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + _ = h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } strategy := Strategy{} if err := json.Unmarshal(data, &strategy); err != nil { - h.rd.JSON(w, http.StatusBadRequest, err.Error()) + _ = h.rd.JSON(w, http.StatusBadRequest, err.Error()) return } plan := calculate(rc, h.svr.GetPDServerConfig(), &strategy) - h.rd.JSON(w, http.StatusOK, plan) + _ = h.rd.JSON(w, http.StatusOK, plan) } diff --git a/pkg/autoscaling/prometheus_test.go b/pkg/autoscaling/prometheus_test.go index 2efdc348ead..9fe69e810d1 100644 --- a/pkg/autoscaling/prometheus_test.go +++ b/pkg/autoscaling/prometheus_test.go @@ -86,8 +86,8 @@ type data struct { } type result struct { - Metric metric `json:"metric"` - Value []interface{} `json:"value"` + Metric metric `json:"metric"` + Value []any `json:"value"` } type metric struct { @@ -121,7 +121,7 @@ func (c *normalClient) buildCPUMockData(component ComponentType) { var results []result for i := 0; i < instanceCount; i++ { results = append(results, result{ - Value: []interface{}{time.Now().Unix(), fmt.Sprintf("%f", mockResultValue)}, + Value: []any{time.Now().Unix(), fmt.Sprintf("%f", mockResultValue)}, Metric: metric{ Instance: pods[i], Cluster: mockClusterName, @@ -168,7 +168,7 @@ func makeJSONResponse(promResp *response) (*http.Response, []byte, error) { return response, body, nil } -func (c *normalClient) URL(ep string, args map[string]string) *url.URL { +func (*normalClient) URL(ep string, args map[string]string) *url.URL { return doURL(ep, args) } @@ -180,7 +180,6 @@ func (c *normalClient) Do(_ context.Context, req *http.Request) (response *http. } func TestRetrieveCPUMetrics(t *testing.T) { - t.Parallel() re := require.New(t) client := &normalClient{ mockData: make(map[string]*response), @@ -207,11 +206,11 @@ func TestRetrieveCPUMetrics(t *testing.T) { type emptyResponseClient struct{} -func (c *emptyResponseClient) URL(ep string, args map[string]string) *url.URL { +func (*emptyResponseClient) URL(ep string, args map[string]string) *url.URL { return doURL(ep, args) } -func (c *emptyResponseClient) Do(_ context.Context, req *http.Request) (r *http.Response, body []byte, err error) { +func (*emptyResponseClient) Do(context.Context, *http.Request) (r *http.Response, body []byte, err error) { promResp := &response{ Status: "success", Data: data{ @@ -225,7 +224,6 @@ func (c *emptyResponseClient) Do(_ context.Context, req *http.Request) (r *http. } func TestEmptyResponse(t *testing.T) { - t.Parallel() re := require.New(t) client := &emptyResponseClient{} querier := NewPrometheusQuerier(client) @@ -237,11 +235,11 @@ func TestEmptyResponse(t *testing.T) { type errorHTTPStatusClient struct{} -func (c *errorHTTPStatusClient) URL(ep string, args map[string]string) *url.URL { +func (*errorHTTPStatusClient) URL(ep string, args map[string]string) *url.URL { return doURL(ep, args) } -func (c *errorHTTPStatusClient) Do(_ context.Context, req *http.Request) (r *http.Response, body []byte, err error) { +func (*errorHTTPStatusClient) Do(context.Context, *http.Request) (r *http.Response, body []byte, err error) { promResp := &response{} r, body, err = makeJSONResponse(promResp) @@ -253,7 +251,6 @@ func (c *errorHTTPStatusClient) Do(_ context.Context, req *http.Request) (r *htt } func TestErrorHTTPStatus(t *testing.T) { - t.Parallel() re := require.New(t) client := &errorHTTPStatusClient{} querier := NewPrometheusQuerier(client) @@ -265,11 +262,11 @@ func TestErrorHTTPStatus(t *testing.T) { type errorPrometheusStatusClient struct{} -func (c *errorPrometheusStatusClient) URL(ep string, args map[string]string) *url.URL { +func (*errorPrometheusStatusClient) URL(ep string, args map[string]string) *url.URL { return doURL(ep, args) } -func (c *errorPrometheusStatusClient) Do(_ context.Context, req *http.Request) (r *http.Response, body []byte, err error) { +func (*errorPrometheusStatusClient) Do(_ context.Context, _ *http.Request) (r *http.Response, body []byte, err error) { promResp := &response{ Status: "error", } @@ -279,7 +276,6 @@ func (c *errorPrometheusStatusClient) Do(_ context.Context, req *http.Request) ( } func TestErrorPrometheusStatus(t *testing.T) { - t.Parallel() re := require.New(t) client := &errorPrometheusStatusClient{} querier := NewPrometheusQuerier(client) @@ -290,7 +286,6 @@ func TestErrorPrometheusStatus(t *testing.T) { } func TestGetInstanceNameFromAddress(t *testing.T) { - t.Parallel() re := require.New(t) testCases := []struct { address string @@ -328,7 +323,6 @@ func TestGetInstanceNameFromAddress(t *testing.T) { } func TestGetDurationExpression(t *testing.T) { - t.Parallel() re := require.New(t) testCases := []struct { duration time.Duration diff --git a/pkg/balancer/balancer_test.go b/pkg/balancer/balancer_test.go index 996b4f1da35..2c760c6220c 100644 --- a/pkg/balancer/balancer_test.go +++ b/pkg/balancer/balancer_test.go @@ -22,7 +22,6 @@ import ( ) func TestBalancerPutAndDelete(t *testing.T) { - t.Parallel() re := require.New(t) balancers := []Balancer[uint32]{ NewRoundRobin[uint32](), @@ -56,7 +55,6 @@ func TestBalancerPutAndDelete(t *testing.T) { } func TestBalancerDuplicate(t *testing.T) { - t.Parallel() re := require.New(t) balancers := []Balancer[uint32]{ NewRoundRobin[uint32](), @@ -77,7 +75,6 @@ func TestBalancerDuplicate(t *testing.T) { } func TestRoundRobin(t *testing.T) { - t.Parallel() re := require.New(t) balancer := NewRoundRobin[uint32]() for i := 0; i < 100; i++ { diff --git a/pkg/basicserver/metrics.go b/pkg/basicserver/metrics.go index 8f26216d696..4e4ab214ed5 100644 --- a/pkg/basicserver/metrics.go +++ b/pkg/basicserver/metrics.go @@ -17,7 +17,7 @@ package server import "github.com/prometheus/client_golang/prometheus" var ( - // ServerMaxProcsGauge record the maxprocs. + // ServerMaxProcsGauge records the maxprocs. ServerMaxProcsGauge = prometheus.NewGauge( prometheus.GaugeOpts{ Namespace: "pd", @@ -26,6 +26,15 @@ var ( Help: "The value of GOMAXPROCS.", }) + // ServerMemoryLimit records the cgroup memory limit. + ServerMemoryLimit = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: "pd", + Subsystem: "service", + Name: "memory_quota_bytes", + Help: "The value of memory quota bytes.", + }) + // ServerInfoGauge indicates the pd server info including version and git hash. ServerInfoGauge = prometheus.NewGaugeVec( prometheus.GaugeOpts{ @@ -38,5 +47,6 @@ var ( func init() { prometheus.MustRegister(ServerMaxProcsGauge) + prometheus.MustRegister(ServerMemoryLimit) prometheus.MustRegister(ServerInfoGauge) } diff --git a/pkg/btree/btree_generic.go b/pkg/btree/btree_generic.go index 630cb25abcd..599614678eb 100644 --- a/pkg/btree/btree_generic.go +++ b/pkg/btree/btree_generic.go @@ -73,7 +73,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//revive:disable +// nolint package btree import ( @@ -821,7 +821,7 @@ type copyOnWriteContext[T Item[T]] struct { // The internal tree structure of b is marked read-only and shared between t and // t2. Writes to both t and t2 use copy-on-write logic, creating new nodes // whenever one of b's original nodes would have been modified. Read operations -// should have no performance degredation. Write operations for both t and t2 +// should have no performance degradation. Write operations for both t and t2 // will initially experience minor slow-downs caused by additional allocs and // copies due to the aforementioned copy-on-write logic, but should converge to // the original performance characteristics of the original tree. diff --git a/pkg/btree/btree_generic_test.go b/pkg/btree/btree_generic_test.go index 751fb2744e9..fd0df3e5aaf 100644 --- a/pkg/btree/btree_generic_test.go +++ b/pkg/btree/btree_generic_test.go @@ -82,7 +82,7 @@ func allrev[T Item[T]](t *BTreeG[T]) (out []T) { return } -func assertEq(t *testing.T, desc string, got, need interface{}) { +func assertEq(t *testing.T, desc string, got, need any) { if !reflect.DeepEqual(need, got) { t.Fatalf("%s failed: need %T %v, but got %T %v", desc, need, need, got, got) } @@ -475,7 +475,7 @@ func BenchmarkSeek(b *testing.B) { b.StartTimer() for i := 0; i < b.N; i++ { - tr.AscendGreaterOrEqual(Int(i%size), func(i Int) bool { return false }) + tr.AscendGreaterOrEqual(Int(i%size), func(_ Int) bool { return false }) } } diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go index eb1d314dd60..3aa7297201a 100644 --- a/pkg/cache/cache.go +++ b/pkg/cache/cache.go @@ -19,11 +19,11 @@ import "github.com/tikv/pd/pkg/utils/syncutil" // Cache is an interface for cache system type Cache interface { // Put puts an item into cache. - Put(key uint64, value interface{}) + Put(key uint64, value any) // Get retrieves an item from cache. - Get(key uint64) (interface{}, bool) + Get(key uint64) (any, bool) // Peek reads an item from cache. The action is no considered 'Use'. - Peek(key uint64) (interface{}, bool) + Peek(key uint64) (any, bool) // Remove eliminates an item from cache. Remove(key uint64) // Elems return all items in cache. @@ -59,7 +59,7 @@ func newThreadSafeCache(cache Cache) Cache { } // Put puts an item into cache. -func (c *threadSafeCache) Put(key uint64, value interface{}) { +func (c *threadSafeCache) Put(key uint64, value any) { c.lock.Lock() defer c.lock.Unlock() c.cache.Put(key, value) @@ -68,14 +68,14 @@ func (c *threadSafeCache) Put(key uint64, value interface{}) { // Get retrieves an item from cache. // When Get method called, LRU and TwoQueue cache will rearrange entries // so we must use write lock. -func (c *threadSafeCache) Get(key uint64) (interface{}, bool) { +func (c *threadSafeCache) Get(key uint64) (any, bool) { c.lock.Lock() defer c.lock.Unlock() return c.cache.Get(key) } // Peek reads an item from cache. The action is no considered 'Use'. -func (c *threadSafeCache) Peek(key uint64) (interface{}, bool) { +func (c *threadSafeCache) Peek(key uint64) (any, bool) { c.lock.RLock() defer c.lock.RUnlock() return c.cache.Peek(key) diff --git a/pkg/cache/cache_test.go b/pkg/cache/cache_test.go index 904da1afb62..43e97dfa2b0 100644 --- a/pkg/cache/cache_test.go +++ b/pkg/cache/cache_test.go @@ -25,7 +25,6 @@ import ( ) func TestExpireRegionCache(t *testing.T) { - t.Parallel() re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -121,7 +120,6 @@ func sortIDs(ids []uint64) []uint64 { } func TestLRUCache(t *testing.T) { - t.Parallel() re := require.New(t) cache := newLRU(3) @@ -199,7 +197,6 @@ func TestLRUCache(t *testing.T) { } func TestFifoCache(t *testing.T) { - t.Parallel() re := require.New(t) cache := NewFIFO(3) cache.Put(1, "1") @@ -227,7 +224,6 @@ func TestFifoCache(t *testing.T) { } func TestFifoFromLastSameElems(t *testing.T) { - t.Parallel() re := require.New(t) type testStruct struct { value string @@ -238,7 +234,7 @@ func TestFifoFromLastSameElems(t *testing.T) { cache.Put(1, &testStruct{value: "3"}) fun := func() []*Item { return cache.FromLastSameElems( - func(i interface{}) (bool, string) { + func(i any) (bool, string) { result, ok := i.(*testStruct) if result == nil { return ok, "" @@ -260,7 +256,6 @@ func TestFifoFromLastSameElems(t *testing.T) { } func TestTwoQueueCache(t *testing.T) { - t.Parallel() re := require.New(t) cache := newTwoQueue(3) cache.Put(1, "1") @@ -345,7 +340,6 @@ func (pq PriorityQueueItemTest) ID() uint64 { } func TestPriorityQueue(t *testing.T) { - t.Parallel() re := require.New(t) testData := []PriorityQueueItemTest{0, 1, 2, 3, 4, 5} pq := NewPriorityQueue(0) diff --git a/pkg/cache/fifo.go b/pkg/cache/fifo.go index fc1b02ac0c9..d544cdaec1c 100644 --- a/pkg/cache/fifo.go +++ b/pkg/cache/fifo.go @@ -40,7 +40,7 @@ func NewFIFO(maxCount int) *FIFO { } // Put puts an item into cache. -func (c *FIFO) Put(key uint64, value interface{}) { +func (c *FIFO) Put(key uint64, value any) { c.Lock() defer c.Unlock() @@ -89,13 +89,13 @@ func (c *FIFO) FromElems(key uint64) []*Item { return elems } -// FromLastSameElems returns continuous items that have the same comparable attribute with the the lastest one. -func (c *FIFO) FromLastSameElems(checkFunc func(interface{}) (bool, string)) []*Item { +// FromLastSameElems returns continuous items that have the same comparable attribute with the last one. +func (c *FIFO) FromLastSameElems(checkFunc func(any) (bool, string)) []*Item { c.RLock() defer c.RUnlock() elems := make([]*Item, 0, c.ll.Len()) - var lastItem interface{} + var lastItem any for ele := c.ll.Front(); ele != nil; ele = ele.Next() { kv := ele.Value.(*Item) if lastItem == nil { diff --git a/pkg/cache/lru.go b/pkg/cache/lru.go index db750bbb9bd..66751b01eee 100644 --- a/pkg/cache/lru.go +++ b/pkg/cache/lru.go @@ -21,7 +21,7 @@ import ( // Item is the cache entry. type Item struct { Key uint64 - Value interface{} + Value any } // LRU is 'Least-Recently-Used' cache. @@ -45,7 +45,7 @@ func newLRU(maxCount int) *LRU { } // Put puts an item into cache. -func (c *LRU) Put(key uint64, value interface{}) { +func (c *LRU) Put(key uint64, value any) { if ele, ok := c.cache[key]; ok { c.ll.MoveToFront(ele) ele.Value.(*Item).Value = value @@ -61,7 +61,7 @@ func (c *LRU) Put(key uint64, value interface{}) { } // Get retrieves an item from cache. -func (c *LRU) Get(key uint64) (interface{}, bool) { +func (c *LRU) Get(key uint64) (any, bool) { if ele, ok := c.cache[key]; ok { c.ll.MoveToFront(ele) return ele.Value.(*Item).Value, true @@ -71,7 +71,7 @@ func (c *LRU) Get(key uint64) (interface{}, bool) { } // Peek reads an item from cache. The action is no considered 'Use'. -func (c *LRU) Peek(key uint64) (interface{}, bool) { +func (c *LRU) Peek(key uint64) (any, bool) { if ele, ok := c.cache[key]; ok { return ele.Value.(*Item).Value, true } @@ -104,7 +104,7 @@ func (c *LRU) removeOldest() { } } -func (c *LRU) getAndRemoveOldest() (uint64, interface{}, bool) { +func (c *LRU) getAndRemoveOldest() (uint64, any, bool) { ele := c.ll.Back() if ele != nil { c.removeElement(ele) diff --git a/pkg/cache/ttl.go b/pkg/cache/ttl.go index 14adf072dad..2aa39f6c6fd 100644 --- a/pkg/cache/ttl.go +++ b/pkg/cache/ttl.go @@ -25,7 +25,7 @@ import ( ) type ttlCacheItem struct { - value interface{} + value any expire time.Time } @@ -34,7 +34,7 @@ type ttlCache struct { syncutil.RWMutex ctx context.Context - items map[interface{}]ttlCacheItem + items map[any]ttlCacheItem ttl time.Duration gcInterval time.Duration } @@ -43,7 +43,7 @@ type ttlCache struct { func newTTL(ctx context.Context, gcInterval time.Duration, duration time.Duration) *ttlCache { c := &ttlCache{ ctx: ctx, - items: make(map[interface{}]ttlCacheItem), + items: make(map[any]ttlCacheItem), ttl: duration, gcInterval: gcInterval, } @@ -53,12 +53,12 @@ func newTTL(ctx context.Context, gcInterval time.Duration, duration time.Duratio } // Put puts an item into cache. -func (c *ttlCache) put(key interface{}, value interface{}) { +func (c *ttlCache) put(key any, value any) { c.putWithTTL(key, value, c.ttl) } // PutWithTTL puts an item into cache with specified TTL. -func (c *ttlCache) putWithTTL(key interface{}, value interface{}, ttl time.Duration) { +func (c *ttlCache) putWithTTL(key any, value any, ttl time.Duration) { c.Lock() defer c.Unlock() @@ -69,7 +69,7 @@ func (c *ttlCache) putWithTTL(key interface{}, value interface{}, ttl time.Durat } // Get retrieves an item from cache. -func (c *ttlCache) get(key interface{}) (interface{}, bool) { +func (c *ttlCache) get(key any) (any, bool) { c.RLock() defer c.RUnlock() @@ -86,11 +86,11 @@ func (c *ttlCache) get(key interface{}) (interface{}, bool) { } // GetKeys returns all keys that are not expired. -func (c *ttlCache) getKeys() []interface{} { +func (c *ttlCache) getKeys() []any { c.RLock() defer c.RUnlock() - var keys []interface{} + var keys []any now := time.Now() for key, item := range c.items { @@ -102,7 +102,7 @@ func (c *ttlCache) getKeys() []interface{} { } // Remove eliminates an item from cache. -func (c *ttlCache) remove(key interface{}) { +func (c *ttlCache) remove(key any) { c.Lock() defer c.Unlock() @@ -110,7 +110,7 @@ func (c *ttlCache) remove(key interface{}) { } // pop one key/value that is not expired. If boolean is false, it means that it didn't find the valid one. -func (c *ttlCache) pop() (interface{}, interface{}, bool) { +func (c *ttlCache) pop() (key, value any, exist bool) { c.Lock() defer c.Unlock() now := time.Now() @@ -199,12 +199,12 @@ func NewIDTTL(ctx context.Context, gcInterval, ttl time.Duration) *TTLUint64 { } // Get return the value by key id -func (c *TTLUint64) Get(id uint64) (interface{}, bool) { +func (c *TTLUint64) Get(id uint64) (any, bool) { return c.ttlCache.get(id) } // Put saves an ID in cache. -func (c *TTLUint64) Put(id uint64, value interface{}) { +func (c *TTLUint64) Put(id uint64, value any) { c.ttlCache.put(id, value) } @@ -233,7 +233,7 @@ func (c *TTLUint64) Remove(key uint64) { } // PutWithTTL puts an item into cache with specified TTL. -func (c *TTLUint64) PutWithTTL(key uint64, value interface{}, ttl time.Duration) { +func (c *TTLUint64) PutWithTTL(key uint64, value any, ttl time.Duration) { c.ttlCache.putWithTTL(key, value, ttl) } @@ -250,17 +250,17 @@ func NewStringTTL(ctx context.Context, gcInterval, ttl time.Duration) *TTLString } // Put put the string key with the value -func (c *TTLString) Put(key string, value interface{}) { +func (c *TTLString) Put(key string, value any) { c.ttlCache.put(key, value) } // PutWithTTL puts an item into cache with specified TTL. -func (c *TTLString) PutWithTTL(key string, value interface{}, ttl time.Duration) { +func (c *TTLString) PutWithTTL(key string, value any, ttl time.Duration) { c.ttlCache.putWithTTL(key, value, ttl) } // Pop one key/value that is not expired -func (c *TTLString) Pop() (string, interface{}, bool) { +func (c *TTLString) Pop() (string, any, bool) { k, v, success := c.ttlCache.pop() if !success { return "", nil, false @@ -273,7 +273,7 @@ func (c *TTLString) Pop() (string, interface{}, bool) { } // Get return the value by key id -func (c *TTLString) Get(id string) (interface{}, bool) { +func (c *TTLString) Get(id string) (any, bool) { return c.ttlCache.get(id) } diff --git a/pkg/cache/two_queue.go b/pkg/cache/two_queue.go index 0e41d855387..c2e0c4b32f7 100644 --- a/pkg/cache/two_queue.go +++ b/pkg/cache/two_queue.go @@ -62,7 +62,7 @@ func newTwoQueueParams(size int, recentRatio, ghostRatio float64) *TwoQueue { } // Put puts an item into cache. -func (c *TwoQueue) Put(key uint64, value interface{}) { +func (c *TwoQueue) Put(key uint64, value any) { // Check if value is in frequent list, // then just update it if c.frequent.contains(key) { @@ -111,7 +111,7 @@ func (c *TwoQueue) ensureSpace(ghost bool) { } // Get retrieves an item from cache. -func (c *TwoQueue) Get(key uint64) (interface{}, bool) { +func (c *TwoQueue) Get(key uint64) (any, bool) { // Check in frequent list if val, ok := c.frequent.Get(key); ok { return val, ok @@ -128,7 +128,7 @@ func (c *TwoQueue) Get(key uint64) (interface{}, bool) { } // Peek reads an item from cache. The action is no considered 'Use'. -func (c *TwoQueue) Peek(key uint64) (interface{}, bool) { +func (c *TwoQueue) Peek(key uint64) (any, bool) { if val, ok := c.frequent.Peek(key); ok { return val, ok } diff --git a/pkg/cgroup/cgmon.go b/pkg/cgroup/cgmon.go new file mode 100644 index 00000000000..407e50f50c7 --- /dev/null +++ b/pkg/cgroup/cgmon.go @@ -0,0 +1,162 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cgroup + +import ( + "context" + "math" + "runtime" + "sync" + "time" + + "github.com/pingcap/log" + "github.com/shirou/gopsutil/v3/mem" + bs "github.com/tikv/pd/pkg/basicserver" + "go.uber.org/zap" +) + +const ( + refreshInterval = 10 * time.Second +) + +// Monitor is used to monitor the cgroup. +type Monitor struct { + started bool + ctx context.Context + cancel context.CancelFunc + wg sync.WaitGroup + cfgMaxProcs int + lastMaxProcs int + lastMemoryLimit uint64 +} + +// StartMonitor uses to start the cgroup monitoring. +// WARN: this function is not thread-safe. +func (m *Monitor) StartMonitor(ctx context.Context) { + if m.started { + return + } + m.started = true + if runtime.GOOS != "linux" { + return + } + m.ctx, m.cancel = context.WithCancel(ctx) + m.wg.Add(1) + go m.refreshCgroupLoop() + log.Info("cgroup monitor started") +} + +// StopMonitor uses to stop the cgroup monitoring. +// WARN: this function is not thread-safe. +func (m *Monitor) StopMonitor() { + if !m.started { + return + } + if runtime.GOOS != "linux" { + return + } + m.started = false + if m.cancel != nil { + m.cancel() + } + m.wg.Wait() + log.Info("cgroup monitor stopped") +} + +func (m *Monitor) refreshCgroupLoop() { + ticker := time.NewTicker(refreshInterval) + defer func() { + if r := recover(); r != nil { + log.Error("[pd] panic in the recoverable goroutine", + zap.String("func-info", "refreshCgroupLoop"), + zap.Reflect("r", r), + zap.Stack("stack")) + } + m.wg.Done() + ticker.Stop() + }() + + err := m.refreshCgroupCPU() + if err != nil { + log.Warn("failed to get cgroup memory limit", zap.Error(err)) + } + err = m.refreshCgroupMemory() + if err != nil { + log.Warn("failed to get cgroup memory limit", zap.Error(err)) + } + for { + select { + case <-m.ctx.Done(): + return + case <-ticker.C: + err = m.refreshCgroupCPU() + if err != nil { + log.Debug("failed to get cgroup cpu quota", zap.Error(err)) + } + err = m.refreshCgroupMemory() + if err != nil { + log.Debug("failed to get cgroup memory limit", zap.Error(err)) + } + } + } +} + +func (m *Monitor) refreshCgroupCPU() error { + // Get the number of CPUs. + quota := runtime.NumCPU() + + // Get CPU quota from cgroup. + cpuPeriod, cpuQuota, err := GetCPUPeriodAndQuota() + if err != nil { + return err + } + if cpuPeriod > 0 && cpuQuota > 0 { + ratio := float64(cpuQuota) / float64(cpuPeriod) + if ratio < float64(quota) { + quota = int(math.Ceil(ratio)) + } + } + + if quota != m.lastMaxProcs { + log.Info("set the maxprocs", zap.Int("quota", quota)) + bs.ServerMaxProcsGauge.Set(float64(quota)) + m.lastMaxProcs = quota + } else if m.lastMaxProcs == 0 { + log.Info("set the maxprocs", zap.Int("maxprocs", m.cfgMaxProcs)) + bs.ServerMaxProcsGauge.Set(float64(m.cfgMaxProcs)) + m.lastMaxProcs = m.cfgMaxProcs + } + return nil +} + +func (m *Monitor) refreshCgroupMemory() error { + memLimit, err := GetMemoryLimit() + if err != nil { + return err + } + vmem, err := mem.VirtualMemory() + if err != nil { + return err + } + if memLimit > vmem.Total { + memLimit = vmem.Total + } + if memLimit != m.lastMemoryLimit { + log.Info("set the memory limit", zap.Uint64("mem-limit", memLimit)) + bs.ServerMemoryLimit.Set(float64(memLimit)) + m.lastMemoryLimit = memLimit + } + return nil +} diff --git a/pkg/cgroup/cgroup.go b/pkg/cgroup/cgroup.go index 2a99d2fcd3d..133bd3158c8 100644 --- a/pkg/cgroup/cgroup.go +++ b/pkg/cgroup/cgroup.go @@ -143,7 +143,6 @@ func combineErrors(err1, err2 error) error { func readFile(filepath string) (res []byte, err error) { var f *os.File - //nolint:gosec f, err = os.Open(filepath) if err != nil { return nil, err @@ -155,10 +154,36 @@ func readFile(filepath string) (res []byte, err error) { return res, err } +// The field in /proc/self/cgroup and /proc/self/mountinfo may appear as "cpuacct,cpu" or "rw,cpuacct,cpu" +// while the input controller is "cpu,cpuacct" +func controllerMatch(field string, controller string) bool { + if field == controller { + return true + } + + fs := strings.Split(field, ",") + if len(fs) < 2 { + return false + } + cs := strings.Split(controller, ",") + if len(fs) < len(cs) { + return false + } + fmap := make(map[string]struct{}, len(fs)) + for _, f := range fs { + fmap[f] = struct{}{} + } + for _, c := range cs { + if _, ok := fmap[c]; !ok { + return false + } + } + return true +} + // The controller is defined via either type `memory` for cgroup v1 or via empty type for cgroup v2, // where the type is the second field in /proc/[pid]/cgroup file func detectControlPath(cgroupFilePath string, controller string) (string, error) { - //nolint:gosec cgroup, err := os.Open(cgroupFilePath) if err != nil { return "", errors.Wrapf(err, "failed to read %s cgroup from cgroups file: %s", controller, cgroupFilePath) @@ -185,7 +210,7 @@ func detectControlPath(cgroupFilePath string, controller string) (string, error) // but no known container solutions support it. if f0 == "0" && f1 == "" { unifiedPathIfFound = string(fields[2]) - } else if f1 == controller { + } else if controllerMatch(f1, controller) { var result []byte // In some case, the cgroup path contains `:`. We need to join them back. if len(fields) > 3 { @@ -202,7 +227,6 @@ func detectControlPath(cgroupFilePath string, controller string) (string, error) // See http://man7.org/linux/man-pages/man5/proc.5.html for `mountinfo` format. func getCgroupDetails(mountInfoPath string, cRoot string, controller string) (mount []string, version []int, err error) { - //nolint:gosec info, err := os.Open(mountInfoPath) if err != nil { return nil, nil, errors.Wrapf(err, "failed to read mounts info from file: %s", mountInfoPath) @@ -314,7 +338,7 @@ func detectCgroupVersion(fields [][]byte, controller string) (_ int, found bool) // Check for controller specifically in cgroup v1 (it is listed in super // options field), as the value can't be found if it is not enforced. - if bytes.Equal(fields[pos], []byte("cgroup")) && bytes.Contains(fields[pos+2], []byte(controller)) { + if bytes.Equal(fields[pos], []byte("cgroup")) && controllerMatch(string(fields[pos+2]), controller) { return 1, true } else if bytes.Equal(fields[pos], []byte("cgroup2")) { return 2, true @@ -384,7 +408,6 @@ func detectCPUQuotaInV2(cRoot string) (period, quota int64, err error) { func detectCPUUsageInV2(cRoot string) (stime, utime uint64, err error) { statFilePath := filepath.Join(cRoot, cgroupV2CPUStat) var stat *os.File - //nolint:gosec stat, err = os.Open(statFilePath) if err != nil { return 0, 0, errors.Wrapf(err, "can't read cpu usage from cgroup v2 at %s", statFilePath) @@ -417,7 +440,6 @@ func detectCPUUsageInV2(cRoot string) (stime, utime uint64, err error) { func readInt64Value(root, filename string, cgVersion int) (value uint64, err error) { filePath := filepath.Join(root, filename) - //nolint:gosec file, err := os.Open(filePath) if err != nil { return 0, errors.Wrapf(err, "can't read %s from cgroup v%d", filename, cgVersion) diff --git a/pkg/cgroup/cgroup_cpu.go b/pkg/cgroup/cgroup_cpu.go index 7063aa89bf9..67eace5363c 100644 --- a/pkg/cgroup/cgroup_cpu.go +++ b/pkg/cgroup/cgroup_cpu.go @@ -88,6 +88,49 @@ func getCgroupCPUHelper(root string) (CPUUsage, error) { return res, nil } +// Helper function for getCgroupCPUPeriodAndQuota. Root is always "/", except in tests. +func getCgroupCPUPeriodAndQuota(root string) (period int64, quota int64, err error) { + path, err := detectControlPath(filepath.Join(root, procPathCGroup), "cpu") + if err != nil { + return + } + + // No CPU controller detected + if path == "" { + err = errors.New("no cpu controller detected") + return + } + + mount, ver, err := getCgroupDetails(filepath.Join(root, procPathMountInfo), path, "cpu") + if err != nil { + return + } + + if len(mount) == 2 { + cgroupRootV1 := filepath.Join(root, mount[0]) + cgroupRootV2 := filepath.Join(root, mount[1], path) + period, quota, err = detectCPUQuotaInV2(cgroupRootV2) + if err != nil { + period, quota, err = detectCPUQuotaInV1(cgroupRootV1) + } + if err != nil { + return + } + } else { + switch ver[0] { + case 1: + cgroupRoot := filepath.Join(root, mount[0]) + period, quota, err = detectCPUQuotaInV1(cgroupRoot) + case 2: + cgroupRoot := filepath.Join(root, mount[0], path) + period, quota, err = detectCPUQuotaInV2(cgroupRoot) + default: + err = fmt.Errorf("detected unknown cgroup version index: %d", ver[0]) + } + } + return +} + // CPUShares returns the number of CPUs this cgroup can be expected to // max out. If there's no limit, NumCPU is returned. func (c CPUUsage) CPUShares() float64 { diff --git a/pkg/cgroup/cgroup_cpu_linux.go b/pkg/cgroup/cgroup_cpu_linux.go index 34bce632daa..5dc9ea4c285 100644 --- a/pkg/cgroup/cgroup_cpu_linux.go +++ b/pkg/cgroup/cgroup_cpu_linux.go @@ -25,9 +25,9 @@ import ( // GetCgroupCPU returns the CPU usage and quota for the current cgroup. func GetCgroupCPU() (CPUUsage, error) { - cpuusage, err := getCgroupCPUHelper("/") - cpuusage.NumCPU = runtime.NumCPU() - return cpuusage, err + cpuUsage, err := getCgroupCPUHelper("/") + cpuUsage.NumCPU = runtime.NumCPU() + return cpuUsage, err } // CPUQuotaToGOMAXPROCS converts the CPU quota applied to the calling process @@ -44,6 +44,11 @@ func CPUQuotaToGOMAXPROCS(minValue int) (int, CPUQuotaStatus, error) { return maxProcs, CPUQuotaUsed, nil } +// GetCPUPeriodAndQuota returns CPU period and quota time of cgroup. +func GetCPUPeriodAndQuota() (period int64, quota int64, err error) { + return getCgroupCPUPeriodAndQuota("/") +} + // InContainer returns true if the process is running in a container. func InContainer() bool { v, err := os.ReadFile(procPathCGroup) diff --git a/pkg/cgroup/cgroup_cpu_test.go b/pkg/cgroup/cgroup_cpu_test.go index f0b9239ecab..c373f803210 100644 --- a/pkg/cgroup/cgroup_cpu_test.go +++ b/pkg/cgroup/cgroup_cpu_test.go @@ -46,7 +46,7 @@ func checkKernelVersionNewerThan(re *require.Assertions, t *testing.T, major, mi re.Len(kernelVersion, 1, fmt.Sprintf("release str is %s", releaseStr)) kernelVersionPartRE := regexp.MustCompile(`[0-9]+`) kernelVersionParts := kernelVersionPartRE.FindAllString(kernelVersion[0], -1) - re.Len(kernelVersionParts, 3, fmt.Sprintf("kernel verion str is %s", kernelVersion[0])) + re.Len(kernelVersionParts, 3, fmt.Sprintf("kernel version str is %s", kernelVersion[0])) t.Logf("parsed kernel version parts: major %s, minor %s, patch %s", kernelVersionParts[0], kernelVersionParts[1], kernelVersionParts[2]) mustConvInt := func(s string) int { diff --git a/pkg/cgroup/cgroup_cpu_unsupport.go b/pkg/cgroup/cgroup_cpu_unsupport.go index 9576ff52542..72c37aad396 100644 --- a/pkg/cgroup/cgroup_cpu_unsupport.go +++ b/pkg/cgroup/cgroup_cpu_unsupport.go @@ -27,6 +27,12 @@ func GetCgroupCPU() (CPUUsage, error) { return cpuUsage, nil } +// GetCPUPeriodAndQuota returns CPU period and quota time of cgroup. +// This is Linux-specific and not supported in the current OS. +func GetCPUPeriodAndQuota() (period int64, quota int64, err error) { + return -1, -1, nil +} + // CPUQuotaToGOMAXPROCS converts the CPU quota applied to the calling process // to a valid GOMAXPROCS value. This is Linux-specific and not supported in the // current OS. diff --git a/pkg/cgroup/cgroup_memory.go b/pkg/cgroup/cgroup_memory.go index fb8e8f212dc..2a6d581023e 100644 --- a/pkg/cgroup/cgroup_memory.go +++ b/pkg/cgroup/cgroup_memory.go @@ -177,7 +177,6 @@ func detectMemInactiveFileUsageInV2(root string) (uint64, error) { func detectMemStatValue(cRoot, filename, key string, cgVersion int) (value uint64, err error) { statFilePath := filepath.Join(cRoot, filename) - //nolint:gosec stat, err := os.Open(statFilePath) if err != nil { return 0, errors.Wrapf(err, "can't read file %s from cgroup v%d", filename, cgVersion) diff --git a/pkg/cgroup/cgroup_mock_test.go b/pkg/cgroup/cgroup_mock_test.go index 949e93bb125..5a7ca9a73dc 100644 --- a/pkg/cgroup/cgroup_mock_test.go +++ b/pkg/cgroup/cgroup_mock_test.go @@ -18,6 +18,7 @@ import ( "os" "path/filepath" "regexp" + "strings" "testing" "github.com/stretchr/testify/require" @@ -370,6 +371,28 @@ const ( ) func TestCgroupsGetCPU(t *testing.T) { + for i := 0; i < 2; i++ { + if i == 1 { + // The field in /proc/self/cgroup and /proc/self/mountinfo may appear as "cpuacct,cpu" or "rw,cpuacct,cpu" + // while the input controller is "cpu,cpuacct" + v1CgroupWithCPUController = strings.ReplaceAll(v1CgroupWithCPUController, "cpu,cpuacct", "cpuacct,cpu") + v1CgroupWithCPUControllerNS = strings.ReplaceAll(v1CgroupWithCPUControllerNS, "cpu,cpuacct", "cpuacct,cpu") + v1CgroupWithCPUControllerNSMountRel = strings.ReplaceAll(v1CgroupWithCPUControllerNSMountRel, "cpu,cpuacct", "cpuacct,cpu") + v1CgroupWithCPUControllerNSMountRelRemount = strings.ReplaceAll(v1CgroupWithCPUControllerNSMountRelRemount, "cpu,cpuacct", "cpuacct,cpu") + v1CgroupWithCPUControllerNS2 = strings.ReplaceAll(v1CgroupWithCPUControllerNS2, "cpu,cpuacct", "cpuacct,cpu") + + v1MountsWithCPUController = strings.ReplaceAll(v1MountsWithCPUController, "rw,cpu,cpuacct", "rw,cpuacct,cpu") + v1MountsWithCPUControllerNS = strings.ReplaceAll(v1MountsWithCPUControllerNS, "rw,cpu,cpuacct", "rw,cpuacct,cpu") + v1MountsWithCPUControllerNSMountRel = strings.ReplaceAll(v1MountsWithCPUControllerNSMountRel, "rw,cpu,cpuacct", "rw,cpuacct,cpu") + v1MountsWithCPUControllerNSMountRelRemount = strings.ReplaceAll(v1MountsWithCPUControllerNSMountRelRemount, "rw,cpu,cpuacct", "rw,cpuacct,cpu") + v1MountsWithCPUControllerNS2 = strings.ReplaceAll(v1MountsWithCPUControllerNS2, "rw,cpu,cpuacct", "rw,cpuacct,cpu") + } + testCgroupGetCPUHelper(t) + testCgroupsGetCPUPeriodAndQuota(t) + } +} + +func testCgroupGetCPUHelper(t *testing.T) { for _, tc := range []struct { name string paths map[string]string @@ -552,6 +575,147 @@ func TestCgroupsGetCPU(t *testing.T) { } } +func testCgroupsGetCPUPeriodAndQuota(t *testing.T) { + for _, tc := range []struct { + name string + paths map[string]string + errMsg string + period int64 + quota int64 + }{ + { + errMsg: "failed to read cpu cgroup from cgroups file:", + }, + { + paths: map[string]string{ + "/proc/self/cgroup": v1CgroupWithoutCPUController, + "/proc/self/mountinfo": v1MountsWithoutCPUController, + }, + errMsg: "no cpu controller detected", + }, + { + paths: map[string]string{ + "/proc/self/cgroup": v1CgroupWithCPUController, + }, + errMsg: "failed to read mounts info from file:", + }, + { + paths: map[string]string{ + "/proc/self/cgroup": v1CgroupWithCPUController, + "/proc/self/mountinfo": v1MountsWithoutCPUController, + }, + errMsg: "failed to detect cgroup root mount and version", + }, + { + paths: map[string]string{ + "/proc/self/cgroup": v1CgroupWithCPUController, + "/proc/self/mountinfo": v1MountsWithCPUController, + "/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_quota_us": "12345", + "/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_period_us": "67890", + }, + quota: int64(12345), + period: int64(67890), + }, + { + paths: map[string]string{ + "/proc/self/cgroup": v1CgroupWithCPUControllerNS, + "/proc/self/mountinfo": v1MountsWithCPUControllerNS, + "/sys/fs/cgroup/cpu,cpuacct/crdb_test/cpu.cfs_quota_us": "12345", + "/sys/fs/cgroup/cpu,cpuacct/crdb_test/cpu.cfs_period_us": "67890", + }, + quota: int64(12345), + period: int64(67890), + }, + { + paths: map[string]string{ + "/proc/self/cgroup": v1CgroupWithCPUControllerNSMountRel, + "/proc/self/mountinfo": v1MountsWithCPUControllerNSMountRel, + }, + errMsg: "failed to detect cgroup root mount and version", + }, + { + paths: map[string]string{ + "/proc/self/cgroup": v1CgroupWithCPUControllerNSMountRelRemount, + "/proc/self/mountinfo": v1MountsWithCPUControllerNSMountRelRemount, + "/sys/fs/cgroup/cpu,cpuacct/crdb_test/cpu.cfs_quota_us": "12345", + "/sys/fs/cgroup/cpu,cpuacct/crdb_test/cpu.cfs_period_us": "67890", + }, + quota: int64(12345), + period: int64(67890), + }, + { + paths: map[string]string{ + "/proc/self/cgroup": v1CgroupWithCPUControllerNS2, + "/proc/self/mountinfo": v1MountsWithCPUControllerNS2, + "/sys/fs/cgroup/cpu,cpuacct/crdb_test/cpu.cfs_quota_us": "12345", + "/sys/fs/cgroup/cpu,cpuacct/crdb_test/cpu.cfs_period_us": "67890", + }, + quota: int64(12345), + period: int64(67890), + }, + { + paths: map[string]string{ + "/proc/self/cgroup": v1CgroupWithCPUController, + "/proc/self/mountinfo": v1MountsWithCPUController, + "/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_quota_us": "-1", + "/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_period_us": "67890", + }, + quota: int64(-1), + period: int64(67890), + }, + { + paths: map[string]string{ + "/proc/self/cgroup": v2CgroupWithMemoryController, + "/proc/self/mountinfo": v2Mounts, + }, + errMsg: "error when read cpu quota from cgroup v2", + }, + { + paths: map[string]string{ + "/proc/self/cgroup": v2CgroupWithMemoryController, + "/proc/self/mountinfo": v2Mounts, + "/sys/fs/cgroup/machine.slice/libpod-f1c6b44c0d61f273952b8daecf154cee1be2d503b7e9184ebf7fcaf48e139810.scope/cpu.max": "foo bar\n", + }, + errMsg: "error when reading cpu quota from cgroup v2 at", + }, + { + paths: map[string]string{ + "/proc/self/cgroup": v2CgroupWithMemoryController, + "/proc/self/mountinfo": v2Mounts, + "/sys/fs/cgroup/machine.slice/libpod-f1c6b44c0d61f273952b8daecf154cee1be2d503b7e9184ebf7fcaf48e139810.scope/cpu.max": "100 1000\n", + }, + quota: int64(100), + period: int64(1000), + }, + { + paths: map[string]string{ + "/proc/self/cgroup": v2CgroupWithMemoryController, + "/proc/self/mountinfo": v2Mounts, + "/sys/fs/cgroup/machine.slice/libpod-f1c6b44c0d61f273952b8daecf154cee1be2d503b7e9184ebf7fcaf48e139810.scope/cpu.max": "max 1000\n", + }, + quota: int64(-1), + period: int64(1000), + }, + { + paths: map[string]string{ + "/proc/self/cgroup": v2CgroupWithMemoryController, + "/proc/self/mountinfo": v2Mounts, + "/sys/fs/cgroup/machine.slice/libpod-f1c6b44c0d61f273952b8daecf154cee1be2d503b7e9184ebf7fcaf48e139810.scope/cpu.max": "100 1000\n", + }, + quota: int64(100), + period: int64(1000), + }, + } { + dir := createFiles(t, tc.paths) + + period, quota, err := getCgroupCPUPeriodAndQuota(dir) + require.True(t, isError(err, tc.errMsg), + "%v %v", err, tc.errMsg) + require.Equal(t, tc.quota, quota) + require.Equal(t, tc.period, period) + } +} + func createFiles(t *testing.T, paths map[string]string) (dir string) { dir = t.TempDir() @@ -564,7 +728,7 @@ func createFiles(t *testing.T, paths map[string]string) (dir string) { return dir } -const ( +var ( //#nosec G101 v1CgroupWithMemoryController = `11:blkio:/kubepods/besteffort/pod1bf924dd-3f6f-11ea-983d-0abc95f90166/c17eb535a47774285717e40bbda777ee72e81471272a5b8ebffd51fdf7f624e3 10:devices:/kubepods/besteffort/podcbfx2j5d-3f6f-11ea-983d-0abc95f90166/c17eb535a47774285717e40bbda777ee72e81471272a5b8ebffd51fdf7f624e3 diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 8809a706936..2cf5787646a 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -28,18 +28,30 @@ type Cluster interface { GetLabelStats() *statistics.LabelStatistics GetCoordinator() *schedule.Coordinator GetRuleManager() *placement.RuleManager + GetBasicCluster() *core.BasicCluster } // HandleStatsAsync handles the flow asynchronously. func HandleStatsAsync(c Cluster, region *core.RegionInfo) { - c.GetHotStat().CheckWriteAsync(statistics.NewCheckExpiredItemTask(region)) - c.GetHotStat().CheckReadAsync(statistics.NewCheckExpiredItemTask(region)) - reportInterval := region.GetInterval() - interval := reportInterval.GetEndTimestamp() - reportInterval.GetStartTimestamp() - for _, peer := range region.GetPeers() { - peerInfo := core.NewPeerInfo(peer, region.GetWriteLoads(), interval) - c.GetHotStat().CheckWriteAsync(statistics.NewCheckPeerTask(peerInfo, region)) + checkWritePeerTask := func(cache *statistics.HotPeerCache) { + reportInterval := region.GetInterval() + interval := reportInterval.GetEndTimestamp() - reportInterval.GetStartTimestamp() + stats := cache.CheckPeerFlow(region, region.GetPeers(), region.GetWriteLoads(), interval) + for _, stat := range stats { + cache.UpdateStat(stat) + } + } + + checkExpiredTask := func(cache *statistics.HotPeerCache) { + expiredStats := cache.CollectExpiredItems(region) + for _, stat := range expiredStats { + cache.UpdateStat(stat) + } } + + c.GetHotStat().CheckWriteAsync(checkExpiredTask) + c.GetHotStat().CheckReadAsync(checkExpiredTask) + c.GetHotStat().CheckWriteAsync(checkWritePeerTask) c.GetCoordinator().GetSchedulersController().CheckTransferWitnessLeader(region) } @@ -55,8 +67,17 @@ func HandleOverlaps(c Cluster, overlaps []*core.RegionInfo) { } // Collect collects the cluster information. -func Collect(c Cluster, region *core.RegionInfo, stores []*core.StoreInfo, hasRegionStats, isNew, isPrepared bool) { +func Collect(c Cluster, region *core.RegionInfo, hasRegionStats bool) { if hasRegionStats { - c.GetRegionStats().Observe(region, stores) + // get region again from root tree. make sure the observed region is the latest. + bc := c.GetBasicCluster() + if bc == nil { + return + } + region = bc.GetRegion(region.GetID()) + if region == nil { + return + } + c.GetRegionStats().Observe(region, c.GetBasicCluster().GetRegionStores(region)) } } diff --git a/pkg/codec/codec_test.go b/pkg/codec/codec_test.go index f734d2e528e..50bf552a60d 100644 --- a/pkg/codec/codec_test.go +++ b/pkg/codec/codec_test.go @@ -21,7 +21,6 @@ import ( ) func TestDecodeBytes(t *testing.T) { - t.Parallel() re := require.New(t) key := "abcdefghijklmnopqrstuvwxyz" for i := 0; i < len(key); i++ { @@ -32,7 +31,6 @@ func TestDecodeBytes(t *testing.T) { } func TestTableID(t *testing.T) { - t.Parallel() re := require.New(t) key := EncodeBytes([]byte("t\x80\x00\x00\x00\x00\x00\x00\xff")) re.Equal(int64(0xff), key.TableID()) diff --git a/pkg/core/basic_cluster.go b/pkg/core/basic_cluster.go index d70b620db3b..2392b7ddac6 100644 --- a/pkg/core/basic_cluster.go +++ b/pkg/core/basic_cluster.go @@ -14,218 +14,43 @@ package core -import ( - "github.com/pingcap/kvproto/pkg/metapb" - "github.com/tikv/pd/pkg/core/storelimit" - "github.com/tikv/pd/pkg/utils/syncutil" -) - // BasicCluster provides basic data member and interface for a tikv cluster. type BasicCluster struct { - Stores struct { - mu syncutil.RWMutex - *StoresInfo - } - + *StoresInfo *RegionsInfo } // NewBasicCluster creates a BasicCluster. func NewBasicCluster() *BasicCluster { return &BasicCluster{ - Stores: struct { - mu syncutil.RWMutex - *StoresInfo - }{StoresInfo: NewStoresInfo()}, - + StoresInfo: NewStoresInfo(), RegionsInfo: NewRegionsInfo(), } } -/* Stores read operations */ - -// GetStores returns all Stores in the cluster. -func (bc *BasicCluster) GetStores() []*StoreInfo { - bc.Stores.mu.RLock() - defer bc.Stores.mu.RUnlock() - return bc.Stores.GetStores() -} - -// GetMetaStores gets a complete set of metapb.Store. -func (bc *BasicCluster) GetMetaStores() []*metapb.Store { - bc.Stores.mu.RLock() - defer bc.Stores.mu.RUnlock() - return bc.Stores.GetMetaStores() -} - -// GetStore searches for a store by ID. -func (bc *BasicCluster) GetStore(storeID uint64) *StoreInfo { - bc.Stores.mu.RLock() - defer bc.Stores.mu.RUnlock() - return bc.Stores.GetStore(storeID) -} - -// GetRegionStores returns all Stores that contains the region's peer. -func (bc *BasicCluster) GetRegionStores(region *RegionInfo) []*StoreInfo { - bc.Stores.mu.RLock() - defer bc.Stores.mu.RUnlock() - var Stores []*StoreInfo - for id := range region.GetStoreIDs() { - if store := bc.Stores.GetStore(id); store != nil { - Stores = append(Stores, store) - } - } - return Stores -} - -// GetNonWitnessVoterStores returns all Stores that contains the non-witness's voter peer. -func (bc *BasicCluster) GetNonWitnessVoterStores(region *RegionInfo) []*StoreInfo { - bc.Stores.mu.RLock() - defer bc.Stores.mu.RUnlock() - var Stores []*StoreInfo - for id := range region.GetNonWitnessVoters() { - if store := bc.Stores.GetStore(id); store != nil { - Stores = append(Stores, store) - } - } - return Stores -} - -// GetFollowerStores returns all Stores that contains the region's follower peer. -func (bc *BasicCluster) GetFollowerStores(region *RegionInfo) []*StoreInfo { - bc.Stores.mu.RLock() - defer bc.Stores.mu.RUnlock() - var Stores []*StoreInfo - for id := range region.GetFollowers() { - if store := bc.Stores.GetStore(id); store != nil { - Stores = append(Stores, store) - } - } - return Stores -} - -// GetLeaderStore returns all Stores that contains the region's leader peer. -func (bc *BasicCluster) GetLeaderStore(region *RegionInfo) *StoreInfo { - bc.Stores.mu.RLock() - defer bc.Stores.mu.RUnlock() - return bc.Stores.GetStore(region.GetLeader().GetStoreId()) -} - -// GetStoreCount returns the total count of storeInfo. -func (bc *BasicCluster) GetStoreCount() int { - bc.Stores.mu.RLock() - defer bc.Stores.mu.RUnlock() - return bc.Stores.GetStoreCount() -} - -/* Stores Write operations */ - -// PauseLeaderTransfer prevents the store from been selected as source or -// target store of TransferLeader. -func (bc *BasicCluster) PauseLeaderTransfer(storeID uint64) error { - bc.Stores.mu.Lock() - defer bc.Stores.mu.Unlock() - return bc.Stores.PauseLeaderTransfer(storeID) -} - -// ResumeLeaderTransfer cleans a store's pause state. The store can be selected -// as source or target of TransferLeader again. -func (bc *BasicCluster) ResumeLeaderTransfer(storeID uint64) { - bc.Stores.mu.Lock() - defer bc.Stores.mu.Unlock() - bc.Stores.ResumeLeaderTransfer(storeID) -} - -// SlowStoreEvicted marks a store as a slow store and prevents transferring -// leader to the store -func (bc *BasicCluster) SlowStoreEvicted(storeID uint64) error { - bc.Stores.mu.Lock() - defer bc.Stores.mu.Unlock() - return bc.Stores.SlowStoreEvicted(storeID) -} - -// SlowTrendEvicted marks a store as a slow store by trend and prevents transferring -// leader to the store -func (bc *BasicCluster) SlowTrendEvicted(storeID uint64) error { - bc.Stores.mu.Lock() - defer bc.Stores.mu.Unlock() - return bc.Stores.SlowTrendEvicted(storeID) -} - -// SlowTrendRecovered cleans the evicted by slow trend state of a store. -func (bc *BasicCluster) SlowTrendRecovered(storeID uint64) { - bc.Stores.mu.Lock() - defer bc.Stores.mu.Unlock() - bc.Stores.SlowTrendRecovered(storeID) -} - -// SlowStoreRecovered cleans the evicted state of a store. -func (bc *BasicCluster) SlowStoreRecovered(storeID uint64) { - bc.Stores.mu.Lock() - defer bc.Stores.mu.Unlock() - bc.Stores.SlowStoreRecovered(storeID) -} - -// ResetStoreLimit resets the limit for a specific store. -func (bc *BasicCluster) ResetStoreLimit(storeID uint64, limitType storelimit.Type, ratePerSec ...float64) { - bc.Stores.mu.Lock() - defer bc.Stores.mu.Unlock() - bc.Stores.ResetStoreLimit(storeID, limitType, ratePerSec...) -} - // UpdateStoreStatus updates the information of the store. func (bc *BasicCluster) UpdateStoreStatus(storeID uint64) { - leaderCount, regionCount, witnessCount, learnerCount, pendingPeerCount, leaderRegionSize, regionSize := bc.RegionsInfo.GetStoreStats(storeID) - bc.Stores.mu.Lock() - defer bc.Stores.mu.Unlock() - bc.Stores.UpdateStoreStatus(storeID, leaderCount, regionCount, witnessCount, learnerCount, pendingPeerCount, leaderRegionSize, regionSize) -} - -// PutStore put a store. -func (bc *BasicCluster) PutStore(store *StoreInfo) { - bc.Stores.mu.Lock() - defer bc.Stores.mu.Unlock() - bc.Stores.SetStore(store) -} - -// ResetStores resets the store cache. -func (bc *BasicCluster) ResetStores() { - bc.Stores.mu.Lock() - defer bc.Stores.mu.Unlock() - bc.Stores.StoresInfo = NewStoresInfo() -} - -// DeleteStore deletes a store. -func (bc *BasicCluster) DeleteStore(store *StoreInfo) { - bc.Stores.mu.Lock() - defer bc.Stores.mu.Unlock() - bc.Stores.DeleteStore(store) + leaderCount, regionCount, witnessCount, learnerCount, pendingPeerCount, leaderRegionSize, regionSize := bc.GetStoreStats(storeID) + bc.StoresInfo.UpdateStoreStatus(storeID, leaderCount, regionCount, witnessCount, learnerCount, pendingPeerCount, leaderRegionSize, regionSize) } /* Regions read operations */ // GetLeaderStoreByRegionID returns the leader store of the given region. func (bc *BasicCluster) GetLeaderStoreByRegionID(regionID uint64) *StoreInfo { - region := bc.RegionsInfo.GetRegion(regionID) + region := bc.GetRegion(regionID) if region == nil || region.GetLeader() == nil { return nil } - bc.Stores.mu.RLock() - defer bc.Stores.mu.RUnlock() - return bc.Stores.GetStore(region.GetLeader().GetStoreId()) + return bc.GetStore(region.GetLeader().GetStoreId()) } func (bc *BasicCluster) getWriteRate( f func(storeID uint64) (bytesRate, keysRate float64), ) (storeIDs []uint64, bytesRates, keysRates []float64) { - bc.Stores.mu.RLock() - count := len(bc.Stores.stores) - storeIDs = make([]uint64, 0, count) - for _, store := range bc.Stores.stores { - storeIDs = append(storeIDs, store.GetID()) - } - bc.Stores.mu.RUnlock() + storeIDs = bc.GetStoreIDs() + count := len(storeIDs) bytesRates = make([]float64, 0, count) keysRates = make([]float64, 0, count) for _, id := range storeIDs { @@ -238,12 +63,12 @@ func (bc *BasicCluster) getWriteRate( // GetStoresLeaderWriteRate get total write rate of each store's leaders. func (bc *BasicCluster) GetStoresLeaderWriteRate() (storeIDs []uint64, bytesRates, keysRates []float64) { - return bc.getWriteRate(bc.RegionsInfo.GetStoreLeaderWriteRate) + return bc.getWriteRate(bc.GetStoreLeaderWriteRate) } // GetStoresWriteRate get total write rate of each store's regions. func (bc *BasicCluster) GetStoresWriteRate() (storeIDs []uint64, bytesRates, keysRates []float64) { - return bc.getWriteRate(bc.RegionsInfo.GetStoreWriteRate) + return bc.getWriteRate(bc.GetStoreWriteRate) } // UpdateAllStoreStatus updates the information of all stores. diff --git a/pkg/core/context.go b/pkg/core/context.go new file mode 100644 index 00000000000..7410f8394c2 --- /dev/null +++ b/pkg/core/context.go @@ -0,0 +1,43 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package core + +import ( + "context" + + "github.com/tikv/pd/pkg/ratelimit" +) + +// MetaProcessContext is a context for meta process. +type MetaProcessContext struct { + context.Context + Tracer RegionHeartbeatProcessTracer + TaskRunner ratelimit.Runner + MiscRunner ratelimit.Runner + LogRunner ratelimit.Runner +} + +// NewMetaProcessContext creates a new MetaProcessContext. +// used in tests, can be changed if no need to test concurrency. +func ContextTODO() *MetaProcessContext { + return &MetaProcessContext{ + Context: context.TODO(), + Tracer: NewNoopHeartbeatProcessTracer(), + TaskRunner: ratelimit.NewSyncRunner(), + MiscRunner: ratelimit.NewSyncRunner(), + LogRunner: ratelimit.NewSyncRunner(), + // Limit default is nil + } +} diff --git a/pkg/core/metrics.go b/pkg/core/metrics.go new file mode 100644 index 00000000000..7d2c904f319 --- /dev/null +++ b/pkg/core/metrics.go @@ -0,0 +1,272 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package core + +import ( + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" + "go.uber.org/zap" +) + +var ( + // HeartbeatBreakdownHandleDurationSum is the summary of the processing time of handle the heartbeat stage. + HeartbeatBreakdownHandleDurationSum = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "pd", + Subsystem: "core", + Name: "region_heartbeat_breakdown_handle_duration_seconds_sum", + Help: "Bucketed histogram of processing time (s) of handle the heartbeat stage.", + }, []string{"name"}) + + // HeartbeatBreakdownHandleCount is the summary of the processing count of handle the heartbeat stage. + HeartbeatBreakdownHandleCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "pd", + Subsystem: "core", + Name: "region_heartbeat_breakdown_handle_duration_seconds_count", + Help: "Bucketed histogram of processing count of handle the heartbeat stage.", + }, []string{"name"}) + // AcquireRegionsLockWaitDurationSum is the summary of the processing time of waiting for acquiring regions lock. + AcquireRegionsLockWaitDurationSum = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "pd", + Subsystem: "core", + Name: "acquire_regions_lock_wait_duration_seconds_sum", + Help: "Bucketed histogram of processing time (s) of waiting for acquiring regions lock.", + }, []string{"type"}) + // AcquireRegionsLockWaitCount is the summary of the processing count of waiting for acquiring regions lock. + AcquireRegionsLockWaitCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "pd", + Subsystem: "core", + Name: "acquire_regions_lock_wait_duration_seconds_count", + Help: "Bucketed histogram of processing count of waiting for acquiring regions lock.", + }, []string{"name"}) + + // lock statistics + waitRegionsLockDurationSum = AcquireRegionsLockWaitDurationSum.WithLabelValues("WaitRegionsLock") + waitRegionsLockCount = AcquireRegionsLockWaitCount.WithLabelValues("WaitRegionsLock") + waitSubRegionsLockDurationSum = AcquireRegionsLockWaitDurationSum.WithLabelValues("WaitSubRegionsLock") + waitSubRegionsLockCount = AcquireRegionsLockWaitCount.WithLabelValues("WaitSubRegionsLock") + + // heartbeat breakdown statistics + preCheckDurationSum = HeartbeatBreakdownHandleDurationSum.WithLabelValues("PreCheck") + preCheckCount = HeartbeatBreakdownHandleCount.WithLabelValues("PreCheck") + asyncHotStatsDurationSum = HeartbeatBreakdownHandleDurationSum.WithLabelValues("AsyncHotStatsDuration") + asyncHotStatsCount = HeartbeatBreakdownHandleCount.WithLabelValues("AsyncHotStatsDuration") + regionGuideDurationSum = HeartbeatBreakdownHandleDurationSum.WithLabelValues("RegionGuide") + regionGuideCount = HeartbeatBreakdownHandleCount.WithLabelValues("RegionGuide") + checkOverlapsDurationSum = HeartbeatBreakdownHandleDurationSum.WithLabelValues("SaveCache_CheckOverlaps") + checkOverlapsCount = HeartbeatBreakdownHandleCount.WithLabelValues("SaveCache_CheckOverlaps") + validateRegionDurationSum = HeartbeatBreakdownHandleDurationSum.WithLabelValues("SaveCache_InvalidRegion") + validateRegionCount = HeartbeatBreakdownHandleCount.WithLabelValues("SaveCache_InvalidRegion") + setRegionDurationSum = HeartbeatBreakdownHandleDurationSum.WithLabelValues("SaveCache_SetRegion") + setRegionCount = HeartbeatBreakdownHandleCount.WithLabelValues("SaveCache_SetRegion") + updateSubTreeDurationSum = HeartbeatBreakdownHandleDurationSum.WithLabelValues("SaveCache_UpdateSubTree") + updateSubTreeCount = HeartbeatBreakdownHandleCount.WithLabelValues("SaveCache_UpdateSubTree") + regionCollectDurationSum = HeartbeatBreakdownHandleDurationSum.WithLabelValues("CollectRegionStats") + regionCollectCount = HeartbeatBreakdownHandleCount.WithLabelValues("CollectRegionStats") + otherDurationSum = HeartbeatBreakdownHandleDurationSum.WithLabelValues("Other") + otherCount = HeartbeatBreakdownHandleCount.WithLabelValues("Other") +) + +func init() { + prometheus.MustRegister(HeartbeatBreakdownHandleDurationSum) + prometheus.MustRegister(HeartbeatBreakdownHandleCount) + prometheus.MustRegister(AcquireRegionsLockWaitDurationSum) + prometheus.MustRegister(AcquireRegionsLockWaitCount) +} + +var tracerPool = &sync.Pool{ + New: func() any { + return ®ionHeartbeatProcessTracer{} + }, +} + +type saveCacheStats struct { + startTime time.Time + lastCheckTime time.Time + checkOverlapsDuration time.Duration + validateRegionDuration time.Duration + setRegionDuration time.Duration + updateSubTreeDuration time.Duration +} + +// RegionHeartbeatProcessTracer is used to trace the process of handling region heartbeat. +type RegionHeartbeatProcessTracer interface { + Begin() + OnPreCheckFinished() + OnAsyncHotStatsFinished() + OnRegionGuideFinished() + OnSaveCacheBegin() + OnSaveCacheFinished() + OnCheckOverlapsFinished() + OnValidateRegionFinished() + OnSetRegionFinished() + OnUpdateSubTreeFinished() + OnCollectRegionStatsFinished() + OnAllStageFinished() + LogFields() []zap.Field + Release() +} + +type noopHeartbeatProcessTracer struct{} + +// NewNoopHeartbeatProcessTracer returns a noop heartbeat process tracer. +func NewNoopHeartbeatProcessTracer() RegionHeartbeatProcessTracer { + return &noopHeartbeatProcessTracer{} +} + +func (*noopHeartbeatProcessTracer) Begin() {} +func (*noopHeartbeatProcessTracer) OnPreCheckFinished() {} +func (*noopHeartbeatProcessTracer) OnAsyncHotStatsFinished() {} +func (*noopHeartbeatProcessTracer) OnRegionGuideFinished() {} +func (*noopHeartbeatProcessTracer) OnSaveCacheBegin() {} +func (*noopHeartbeatProcessTracer) OnSaveCacheFinished() {} +func (*noopHeartbeatProcessTracer) OnCheckOverlapsFinished() {} +func (*noopHeartbeatProcessTracer) OnValidateRegionFinished() {} +func (*noopHeartbeatProcessTracer) OnSetRegionFinished() {} +func (*noopHeartbeatProcessTracer) OnUpdateSubTreeFinished() {} +func (*noopHeartbeatProcessTracer) OnCollectRegionStatsFinished() {} +func (*noopHeartbeatProcessTracer) OnAllStageFinished() {} +func (*noopHeartbeatProcessTracer) LogFields() []zap.Field { + return nil +} +func (*noopHeartbeatProcessTracer) Release() {} + +type regionHeartbeatProcessTracer struct { + startTime time.Time + lastCheckTime time.Time + preCheckDuration time.Duration + asyncHotStatsDuration time.Duration + regionGuideDuration time.Duration + saveCacheStats saveCacheStats + OtherDuration time.Duration +} + +// NewHeartbeatProcessTracer returns a heartbeat process tracer. +func NewHeartbeatProcessTracer() RegionHeartbeatProcessTracer { + return tracerPool.Get().(*regionHeartbeatProcessTracer) +} + +func (h *regionHeartbeatProcessTracer) Begin() { + now := time.Now() + h.startTime = now + h.lastCheckTime = now +} + +func (h *regionHeartbeatProcessTracer) OnPreCheckFinished() { + now := time.Now() + h.preCheckDuration = now.Sub(h.lastCheckTime) + h.lastCheckTime = now + preCheckDurationSum.Add(h.preCheckDuration.Seconds()) + preCheckCount.Inc() +} + +func (h *regionHeartbeatProcessTracer) OnAsyncHotStatsFinished() { + now := time.Now() + h.asyncHotStatsDuration = now.Sub(h.lastCheckTime) + h.lastCheckTime = now + asyncHotStatsDurationSum.Add(h.preCheckDuration.Seconds()) + asyncHotStatsCount.Inc() +} + +func (h *regionHeartbeatProcessTracer) OnRegionGuideFinished() { + now := time.Now() + h.regionGuideDuration = now.Sub(h.lastCheckTime) + h.lastCheckTime = now + regionGuideDurationSum.Add(h.regionGuideDuration.Seconds()) + regionGuideCount.Inc() +} + +func (h *regionHeartbeatProcessTracer) OnSaveCacheBegin() { + now := time.Now() + h.saveCacheStats.startTime = now + h.saveCacheStats.lastCheckTime = now + h.lastCheckTime = now +} + +func (h *regionHeartbeatProcessTracer) OnSaveCacheFinished() { + // update the outer checkpoint time + h.lastCheckTime = time.Now() +} + +func (h *regionHeartbeatProcessTracer) OnCollectRegionStatsFinished() { + now := time.Now() + regionCollectDurationSum.Add(now.Sub(h.lastCheckTime).Seconds()) + regionCollectCount.Inc() + h.lastCheckTime = now +} + +func (h *regionHeartbeatProcessTracer) OnCheckOverlapsFinished() { + now := time.Now() + h.saveCacheStats.checkOverlapsDuration = now.Sub(h.lastCheckTime) + h.saveCacheStats.lastCheckTime = now + checkOverlapsDurationSum.Add(h.saveCacheStats.checkOverlapsDuration.Seconds()) + checkOverlapsCount.Inc() +} + +func (h *regionHeartbeatProcessTracer) OnValidateRegionFinished() { + now := time.Now() + h.saveCacheStats.validateRegionDuration = now.Sub(h.saveCacheStats.lastCheckTime) + h.saveCacheStats.lastCheckTime = now + validateRegionDurationSum.Add(h.saveCacheStats.validateRegionDuration.Seconds()) + validateRegionCount.Inc() +} + +func (h *regionHeartbeatProcessTracer) OnSetRegionFinished() { + now := time.Now() + h.saveCacheStats.setRegionDuration = now.Sub(h.saveCacheStats.lastCheckTime) + h.saveCacheStats.lastCheckTime = now + setRegionDurationSum.Add(h.saveCacheStats.setRegionDuration.Seconds()) + setRegionCount.Inc() +} + +func (h *regionHeartbeatProcessTracer) OnUpdateSubTreeFinished() { + now := time.Now() + h.saveCacheStats.updateSubTreeDuration = now.Sub(h.saveCacheStats.lastCheckTime) + h.saveCacheStats.lastCheckTime = now + updateSubTreeDurationSum.Add(h.saveCacheStats.updateSubTreeDuration.Seconds()) + updateSubTreeCount.Inc() +} + +func (h *regionHeartbeatProcessTracer) OnAllStageFinished() { + now := time.Now() + h.OtherDuration = now.Sub(h.lastCheckTime) + otherDurationSum.Add(h.OtherDuration.Seconds()) + otherCount.Inc() +} + +func (h *regionHeartbeatProcessTracer) LogFields() []zap.Field { + return []zap.Field{ + zap.Duration("pre-check-duration", h.preCheckDuration), + zap.Duration("async-hot-stats-duration", h.asyncHotStatsDuration), + zap.Duration("region-guide-duration", h.regionGuideDuration), + zap.Duration("check-overlaps-duration", h.saveCacheStats.checkOverlapsDuration), + zap.Duration("validate-region-duration", h.saveCacheStats.validateRegionDuration), + zap.Duration("set-region-duration", h.saveCacheStats.setRegionDuration), + zap.Duration("update-sub-tree-duration", h.saveCacheStats.updateSubTreeDuration), + zap.Duration("other-duration", h.OtherDuration), + } +} + +// Release puts the tracer back into the pool. +func (h *regionHeartbeatProcessTracer) Release() { + // Reset the fields of h to their zero values. + *h = regionHeartbeatProcessTracer{} + tracerPool.Put(h) +} diff --git a/pkg/core/peer.go b/pkg/core/peer.go index 659886e6d39..1f888ba58eb 100644 --- a/pkg/core/peer.go +++ b/pkg/core/peer.go @@ -77,34 +77,3 @@ func CountInJointState(peers ...*metapb.Peer) int { } return count } - -// PeerInfo provides peer information -type PeerInfo struct { - *metapb.Peer - loads []float64 - interval uint64 -} - -// NewPeerInfo creates PeerInfo -func NewPeerInfo(meta *metapb.Peer, loads []float64, interval uint64) *PeerInfo { - return &PeerInfo{ - Peer: meta, - loads: loads, - interval: interval, - } -} - -// GetLoads provides loads -func (p *PeerInfo) GetLoads() []float64 { - return p.loads -} - -// GetPeerID provides peer id -func (p *PeerInfo) GetPeerID() uint64 { - return p.GetId() -} - -// GetInterval returns reporting interval -func (p *PeerInfo) GetInterval() uint64 { - return p.interval -} diff --git a/pkg/core/rangetree/range_tree_test.go b/pkg/core/rangetree/range_tree_test.go index 29845cf0bca..6955947cb1b 100644 --- a/pkg/core/rangetree/range_tree_test.go +++ b/pkg/core/rangetree/range_tree_test.go @@ -73,11 +73,11 @@ func bucketDebrisFactory(startKey, endKey []byte, item RangeItem) []RangeItem { if bytes.Compare(left, right) >= 0 { return nil } - // the left has oen intersection like |010 - 100| and |020 - 100|. + // the left has one intersection like |010 - 100| and |020 - 100|. if !bytes.Equal(item.GetStartKey(), left) { res = append(res, newSimpleBucketItem(item.GetStartKey(), left)) } - // the right has oen intersection like |010 - 100| and |010 - 099|. + // the right has one intersection like |010 - 100| and |010 - 099|. if !bytes.Equal(right, item.GetEndKey()) { res = append(res, newSimpleBucketItem(right, item.GetEndKey())) } @@ -85,7 +85,6 @@ func bucketDebrisFactory(startKey, endKey []byte, item RangeItem) []RangeItem { } func TestRingPutItem(t *testing.T) { - t.Parallel() re := require.New(t) bucketTree := NewRangeTree(2, bucketDebrisFactory) bucketTree.Update(newSimpleBucketItem([]byte("002"), []byte("100"))) @@ -120,7 +119,6 @@ func TestRingPutItem(t *testing.T) { } func TestDebris(t *testing.T) { - t.Parallel() re := require.New(t) ringItem := newSimpleBucketItem([]byte("010"), []byte("090")) var overlaps []RangeItem diff --git a/pkg/core/region.go b/pkg/core/region.go index b141e8478da..df4cfc17be2 100644 --- a/pkg/core/region.go +++ b/pkg/core/region.go @@ -55,7 +55,6 @@ func errRegionIsStale(region *metapb.Region, origin *metapb.Region) error { // the properties are Read-Only once created except buckets. // the `buckets` could be modified by the request `report buckets` with greater version. type RegionInfo struct { - term uint64 meta *metapb.Region learners []*metapb.Peer witnesses []*metapb.Peer @@ -63,6 +62,7 @@ type RegionInfo struct { leader *metapb.Peer downPeers []*pdpb.PeerStats pendingPeers []*metapb.Peer + term uint64 cpuUsage uint64 writtenBytes uint64 writtenKeys uint64 @@ -79,6 +79,8 @@ type RegionInfo struct { buckets unsafe.Pointer // source is used to indicate region's source, such as Storage/Sync/Heartbeat. source RegionSource + // ref is used to indicate the reference count of the region in root-tree and sub-tree. + ref atomic.Int32 } // RegionSource is the source of region. @@ -98,6 +100,27 @@ func (r *RegionInfo) LoadedFromStorage() bool { return r.source == Storage } +// LoadedFromSync means this region's meta info loaded from region syncer. +// Only used for test. +func (r *RegionInfo) LoadedFromSync() bool { + return r.source == Sync +} + +// IncRef increases the reference count. +func (r *RegionInfo) IncRef() { + r.ref.Add(1) +} + +// DecRef decreases the reference count. +func (r *RegionInfo) DecRef() { + r.ref.Add(-1) +} + +// GetRef returns the reference count. +func (r *RegionInfo) GetRef() int32 { + return r.ref.Load() +} + // NewRegionInfo creates RegionInfo with region's meta and leader peer. func NewRegionInfo(region *metapb.Region, leader *metapb.Peer, opts ...RegionCreateOption) *RegionInfo { regionInfo := &RegionInfo{ @@ -113,26 +136,22 @@ func NewRegionInfo(region *metapb.Region, leader *metapb.Peer, opts ...RegionCre // classifyVoterAndLearner sorts out voter and learner from peers into different slice. func classifyVoterAndLearner(region *RegionInfo) { - learners := make([]*metapb.Peer, 0, 1) - voters := make([]*metapb.Peer, 0, len(region.meta.Peers)) - witnesses := make([]*metapb.Peer, 0, 1) + region.learners = make([]*metapb.Peer, 0, 1) + region.voters = make([]*metapb.Peer, 0, len(region.meta.Peers)) + region.witnesses = make([]*metapb.Peer, 0, 1) for _, p := range region.meta.Peers { if IsLearner(p) { - learners = append(learners, p) + region.learners = append(region.learners, p) } else { - voters = append(voters, p) + region.voters = append(region.voters, p) } - // Whichever peer role can be a witness if IsWitness(p) { - witnesses = append(witnesses, p) + region.witnesses = append(region.witnesses, p) } } - sort.Sort(peerSlice(learners)) - sort.Sort(peerSlice(voters)) - sort.Sort(peerSlice(witnesses)) - region.learners = learners - region.voters = voters - region.witnesses = witnesses + sort.Sort(peerSlice(region.learners)) + sort.Sort(peerSlice(region.voters)) + sort.Sort(peerSlice(region.witnesses)) } // peersEqualTo returns true when the peers are not changed, which may caused by: the region leader not changed, @@ -190,7 +209,7 @@ type RegionHeartbeatRequest interface { } // RegionFromHeartbeat constructs a Region from region heartbeat. -func RegionFromHeartbeat(heartbeat RegionHeartbeatRequest, opts ...RegionCreateOption) *RegionInfo { +func RegionFromHeartbeat(heartbeat RegionHeartbeatRequest, flowRoundDivisor int) *RegionInfo { // Convert unit to MB. // If region isn't empty and less than 1MB, use 1MB instead. // The size of empty region will be correct by the previous RegionInfo. @@ -200,20 +219,21 @@ func RegionFromHeartbeat(heartbeat RegionHeartbeatRequest, opts ...RegionCreateO } region := &RegionInfo{ - term: heartbeat.GetTerm(), - meta: heartbeat.GetRegion(), - leader: heartbeat.GetLeader(), - downPeers: heartbeat.GetDownPeers(), - pendingPeers: heartbeat.GetPendingPeers(), - writtenBytes: heartbeat.GetBytesWritten(), - writtenKeys: heartbeat.GetKeysWritten(), - readBytes: heartbeat.GetBytesRead(), - readKeys: heartbeat.GetKeysRead(), - approximateSize: int64(regionSize), - approximateKeys: int64(heartbeat.GetApproximateKeys()), - interval: heartbeat.GetInterval(), - queryStats: heartbeat.GetQueryStats(), - source: Heartbeat, + term: heartbeat.GetTerm(), + meta: heartbeat.GetRegion(), + leader: heartbeat.GetLeader(), + downPeers: heartbeat.GetDownPeers(), + pendingPeers: heartbeat.GetPendingPeers(), + writtenBytes: heartbeat.GetBytesWritten(), + writtenKeys: heartbeat.GetKeysWritten(), + readBytes: heartbeat.GetBytesRead(), + readKeys: heartbeat.GetKeysRead(), + approximateSize: int64(regionSize), + approximateKeys: int64(heartbeat.GetApproximateKeys()), + interval: heartbeat.GetInterval(), + queryStats: heartbeat.GetQueryStats(), + source: Heartbeat, + flowRoundDivisor: uint64(flowRoundDivisor), } // scheduling service doesn't need the following fields. @@ -223,10 +243,6 @@ func RegionFromHeartbeat(heartbeat RegionHeartbeatRequest, opts ...RegionCreateO region.cpuUsage = h.GetCpuUsage() } - for _, opt := range opts { - opt(region) - } - if region.writtenKeys >= ImpossibleFlowSize || region.writtenBytes >= ImpossibleFlowSize { region.writtenKeys = 0 region.writtenBytes = 0 @@ -703,34 +719,59 @@ func (r *RegionInfo) isRegionRecreated() bool { return r.GetRegionEpoch().GetVersion() == 1 && r.GetRegionEpoch().GetConfVer() == 1 && (len(r.GetStartKey()) != 0 || len(r.GetEndKey()) != 0) } +func (r *RegionInfo) Contains(key []byte) bool { + start, end := r.GetStartKey(), r.GetEndKey() + return bytes.Compare(key, start) >= 0 && (len(end) == 0 || bytes.Compare(key, end) < 0) +} + // RegionGuideFunc is a function that determines which follow-up operations need to be performed based on the origin // and new region information. -type RegionGuideFunc func(region, origin *RegionInfo) (isNew, saveKV, saveCache, needSync bool) +type RegionGuideFunc func(ctx *MetaProcessContext, region, origin *RegionInfo) (saveKV, saveCache, needSync, retained bool) // GenerateRegionGuideFunc is used to generate a RegionGuideFunc. Control the log output by specifying the log function. // nil means do not print the log. func GenerateRegionGuideFunc(enableLog bool) RegionGuideFunc { - noLog := func(msg string, fields ...zap.Field) {} - debug, info := noLog, noLog + noLog := func(string, ...zap.Field) {} + d, i := noLog, noLog if enableLog { - debug = log.Debug - info = log.Info + d = log.Debug + i = log.Info } // Save to storage if meta is updated. // Save to cache if meta or leader is updated, or contains any down/pending peer. - // Mark isNew if the region in cache does not have leader. - return func(region, origin *RegionInfo) (isNew, saveKV, saveCache, needSync bool) { + return func(ctx *MetaProcessContext, region, origin *RegionInfo) (saveKV, saveCache, needSync, retained bool) { + logRunner := ctx.LogRunner + // print log asynchronously + debug, info := d, i + regionID := region.GetID() + if logRunner != nil { + debug = func(msg string, fields ...zap.Field) { + _ = logRunner.RunTask( + regionID, + "DebugLog", + func() { + d(msg, fields...) + }, + ) + } + info = func(msg string, fields ...zap.Field) { + _ = logRunner.RunTask( + regionID, + "InfoLog", + func() { + i(msg, fields...) + }, + ) + } + } if origin == nil { if log.GetLevel() <= zap.DebugLevel { debug("insert new region", zap.Uint64("region-id", region.GetID()), logutil.ZapRedactStringer("meta-region", RegionToHexMeta(region.GetMeta()))) } - saveKV, saveCache, isNew = true, true, true + saveKV, saveCache, retained = true, true, true } else { - if origin.LoadedFromStorage() { - isNew = true - } r := region.GetRegionEpoch() o := origin.GetRegionEpoch() if r.GetVersion() > o.GetVersion() { @@ -742,7 +783,7 @@ func GenerateRegionGuideFunc(enableLog bool) RegionGuideFunc { zap.Uint64("new-version", r.GetVersion()), ) } - saveKV, saveCache = true, true + saveKV, saveCache, retained = true, true, true } if r.GetConfVer() > o.GetConfVer() { if log.GetLevel() <= zap.InfoLevel { @@ -753,12 +794,10 @@ func GenerateRegionGuideFunc(enableLog bool) RegionGuideFunc { zap.Uint64("new-confver", r.GetConfVer()), ) } - saveKV, saveCache = true, true + saveKV, saveCache, retained = true, true, true } if region.GetLeader().GetId() != origin.GetLeader().GetId() { - if origin.GetLeader().GetId() == 0 { - isNew = true - } else if log.GetLevel() <= zap.InfoLevel { + if origin.GetLeader().GetId() != 0 && log.GetLevel() <= zap.InfoLevel { info("leader changed", zap.Uint64("region-id", region.GetID()), zap.Uint64("from", origin.GetLeader().GetStoreId()), @@ -789,7 +828,7 @@ func GenerateRegionGuideFunc(enableLog bool) RegionGuideFunc { } if !SortedPeersStatsEqual(region.GetDownPeers(), origin.GetDownPeers()) { if log.GetLevel() <= zap.DebugLevel { - debug("down-peers changed", zap.Uint64("region-id", region.GetID())) + debug("down-peers changed", zap.Uint64("region-id", region.GetID()), zap.Reflect("before", origin.GetDownPeers()), zap.Reflect("after", region.GetDownPeers())) } saveCache, needSync = true, true return @@ -824,24 +863,63 @@ func GenerateRegionGuideFunc(enableLog bool) RegionGuideFunc { } } +// RWLockStats is a read-write lock with statistics. +type RWLockStats struct { + syncutil.RWMutex + totalWaitTime int64 + lockCount int64 + lastLockCount int64 + lastTotalWaitTime int64 +} + +// Lock locks the lock and records the waiting time. +func (l *RWLockStats) Lock() { + startTime := time.Now() + l.RWMutex.Lock() + elapsed := time.Since(startTime).Nanoseconds() + atomic.AddInt64(&l.totalWaitTime, elapsed) + atomic.AddInt64(&l.lockCount, 1) +} + +// Unlock unlocks the lock. +func (l *RWLockStats) Unlock() { + l.RWMutex.Unlock() +} + +// RLock locks the lock for reading and records the waiting time. +func (l *RWLockStats) RLock() { + startTime := time.Now() + l.RWMutex.RLock() + elapsed := time.Since(startTime).Nanoseconds() + atomic.AddInt64(&l.totalWaitTime, elapsed) + atomic.AddInt64(&l.lockCount, 1) +} + +// RUnlock unlocks the lock for reading. +func (l *RWLockStats) RUnlock() { + l.RWMutex.RUnlock() +} + // RegionsInfo for export type RegionsInfo struct { - t syncutil.RWMutex + t RWLockStats tree *regionTree regions map[uint64]*regionItem // regionID -> regionInfo - st syncutil.RWMutex + st RWLockStats subRegions map[uint64]*regionItem // regionID -> regionInfo leaders map[uint64]*regionTree // storeID -> sub regionTree followers map[uint64]*regionTree // storeID -> sub regionTree learners map[uint64]*regionTree // storeID -> sub regionTree witnesses map[uint64]*regionTree // storeID -> sub regionTree pendingPeers map[uint64]*regionTree // storeID -> sub regionTree + // This tree is used to check the overlaps among all the subtrees. + overlapTree *regionTree } // NewRegionsInfo creates RegionsInfo with tree, regions, leaders and followers func NewRegionsInfo() *RegionsInfo { return &RegionsInfo{ - tree: newRegionTree(), + tree: newRegionTreeWithCountRef(), regions: make(map[uint64]*regionItem), subRegions: make(map[uint64]*regionItem), leaders: make(map[uint64]*regionTree), @@ -849,6 +927,7 @@ func NewRegionsInfo() *RegionsInfo { learners: make(map[uint64]*regionTree), witnesses: make(map[uint64]*regionTree), pendingPeers: make(map[uint64]*regionTree), + overlapTree: newRegionTreeWithCountRef(), } } @@ -871,7 +950,7 @@ func (r *RegionsInfo) getRegionLocked(regionID uint64) *RegionInfo { func (r *RegionsInfo) CheckAndPutRegion(region *RegionInfo) []*RegionInfo { r.t.Lock() origin := r.getRegionLocked(region.GetID()) - var ols []*regionItem + var ols []*RegionInfo if origin == nil || !bytes.Equal(origin.GetStartKey(), region.GetStartKey()) || !bytes.Equal(origin.GetEndKey(), region.GetEndKey()) { ols = r.tree.overlaps(®ionItem{RegionInfo: region}) } @@ -896,43 +975,196 @@ func (r *RegionsInfo) PutRegion(region *RegionInfo) []*RegionInfo { } // PreCheckPutRegion checks if the region is valid to put. -func (r *RegionsInfo) PreCheckPutRegion(region *RegionInfo) (*RegionInfo, []*regionItem, error) { +func (r *RegionsInfo) PreCheckPutRegion(region *RegionInfo) (*RegionInfo, []*RegionInfo, error) { origin, overlaps := r.GetRelevantRegions(region) err := check(region, origin, overlaps) return origin, overlaps, err } // AtomicCheckAndPutRegion checks if the region is valid to put, if valid then put. -func (r *RegionsInfo) AtomicCheckAndPutRegion(region *RegionInfo) ([]*RegionInfo, error) { +func (r *RegionsInfo) AtomicCheckAndPutRegion(ctx *MetaProcessContext, region *RegionInfo) ([]*RegionInfo, error) { + tracer := ctx.Tracer r.t.Lock() - var ols []*regionItem + var ols []*RegionInfo origin := r.getRegionLocked(region.GetID()) if origin == nil || !bytes.Equal(origin.GetStartKey(), region.GetStartKey()) || !bytes.Equal(origin.GetEndKey(), region.GetEndKey()) { ols = r.tree.overlaps(®ionItem{RegionInfo: region}) } + tracer.OnCheckOverlapsFinished() err := check(region, origin, ols) if err != nil { r.t.Unlock() + tracer.OnValidateRegionFinished() return nil, err } + tracer.OnValidateRegionFinished() origin, overlaps, rangeChanged := r.setRegionLocked(region, true, ols...) r.t.Unlock() + tracer.OnSetRegionFinished() r.UpdateSubTree(region, origin, overlaps, rangeChanged) + tracer.OnUpdateSubTreeFinished() + return overlaps, nil +} + +// CheckAndPutRootTree checks if the region is valid to put to the root, if valid then return error. +// Usually used with CheckAndPutSubTree together. +func (r *RegionsInfo) CheckAndPutRootTree(ctx *MetaProcessContext, region *RegionInfo) ([]*RegionInfo, error) { + tracer := ctx.Tracer + r.t.Lock() + var ols []*RegionInfo + origin := r.getRegionLocked(region.GetID()) + if origin == nil || !bytes.Equal(origin.GetStartKey(), region.GetStartKey()) || !bytes.Equal(origin.GetEndKey(), region.GetEndKey()) { + ols = r.tree.overlaps(®ionItem{RegionInfo: region}) + } + tracer.OnCheckOverlapsFinished() + err := check(region, origin, ols) + if err != nil { + r.t.Unlock() + tracer.OnValidateRegionFinished() + return nil, err + } + tracer.OnValidateRegionFinished() + _, overlaps, _ := r.setRegionLocked(region, true, ols...) + r.t.Unlock() + tracer.OnSetRegionFinished() return overlaps, nil } +// CheckAndPutSubTree checks if the region is valid to put to the sub tree, if valid then return error. +// Usually used with CheckAndPutRootTree together. +func (r *RegionsInfo) CheckAndPutSubTree(region *RegionInfo) { + // new region get from root tree again + newRegion := r.GetRegion(region.GetID()) + if newRegion == nil { + // Make sure there is this region in the root tree, so as to ensure the correctness of reference count + return + } + r.UpdateSubTreeOrderInsensitive(newRegion) +} + +// UpdateSubTreeOrderInsensitive updates the subtree. +// It's can used to update the subtree concurrently. +// because it can use concurrently, check region version to make sure the order. +// 1. if the version is stale, drop this update. +// 2. if the version is same, then only some statistic info need to be updated. +// in this situation, the order of update is not important. +// +// in another hand, the overlap regions need re-check, because the region tree and the subtree update is not atomic. +func (r *RegionsInfo) UpdateSubTreeOrderInsensitive(region *RegionInfo) { + var origin *RegionInfo + r.st.Lock() + defer r.st.Unlock() + originItem, ok := r.subRegions[region.GetID()] + if ok { + origin = originItem.RegionInfo + } + rangeChanged := true + if origin != nil { + rangeChanged = !origin.rangeEqualsTo(region) + if r.preUpdateSubTreeLocked(rangeChanged, !origin.peersEqualTo(region), true, origin, region) { + return + } + } + r.updateSubTreeLocked(rangeChanged, nil, region) +} + +func (r *RegionsInfo) preUpdateSubTreeLocked( + rangeChanged, peerChanged, orderInsensitive bool, + origin, region *RegionInfo, +) (done bool) { + if orderInsensitive { + re := region.GetRegionEpoch() + oe := origin.GetRegionEpoch() + isTermBehind := region.GetTerm() > 0 && region.GetTerm() < origin.GetTerm() + if (isTermBehind || re.GetVersion() < oe.GetVersion() || re.GetConfVer() < oe.GetConfVer()) && !region.isRegionRecreated() { + // Region meta is stale, skip. + return true + } + } + if rangeChanged || peerChanged { + // If the range or peers have changed, clean up the subtrees before updating them. + // TODO: improve performance by deleting only the different peers. + r.removeRegionFromSubTreeLocked(origin) + } else { + // The region tree and the subtree update is not atomic and the region tree is updated first. + // If there are two thread needs to update region tree, + // t1: thread-A update region tree + // t2: thread-B: update region tree again + // t3: thread-B: update subtree + // t4: thread-A: update region subtree + // to keep region tree consistent with subtree, we need to drop this update. + if tree, ok := r.subRegions[region.GetID()]; ok { + r.updateSubTreeStat(origin, region) + tree.RegionInfo = region + } + return true + } + return false +} + +func (r *RegionsInfo) updateSubTreeLocked(rangeChanged bool, overlaps []*RegionInfo, region *RegionInfo) { + if rangeChanged { + // TODO: only perform the remove operation on the overlapped peer. + if len(overlaps) == 0 { + // If the range has changed but the overlapped regions are not provided, collect them by `[]*regionItem`. + for _, item := range r.getOverlapRegionFromOverlapTreeLocked(region) { + r.removeRegionFromSubTreeLocked(item) + } + } else { + // Remove all provided overlapped regions from the subtrees. + for _, overlap := range overlaps { + r.removeRegionFromSubTreeLocked(overlap) + } + } + } + // Reinsert the region into all subtrees. + item := ®ionItem{region} + r.subRegions[region.GetID()] = item + r.overlapTree.update(item, false) + // Add leaders and followers. + setPeer := func(peersMap map[uint64]*regionTree, storeID uint64) { + store, ok := peersMap[storeID] + if !ok { + store = newRegionTree() + peersMap[storeID] = store + } + store.update(item, false) + } + for _, peer := range region.GetVoters() { + storeID := peer.GetStoreId() + if peer.GetId() == region.leader.GetId() { + setPeer(r.leaders, storeID) + } else { + setPeer(r.followers, storeID) + } + } + // Add other peers. + setPeers := func(peersMap map[uint64]*regionTree, peers []*metapb.Peer) { + for _, peer := range peers { + setPeer(peersMap, peer.GetStoreId()) + } + } + setPeers(r.learners, region.GetLearners()) + setPeers(r.witnesses, region.GetWitnesses()) + setPeers(r.pendingPeers, region.GetPendingPeers()) +} + +func (r *RegionsInfo) getOverlapRegionFromOverlapTreeLocked(region *RegionInfo) []*RegionInfo { + return r.overlapTree.overlaps(®ionItem{RegionInfo: region}) +} + // GetRelevantRegions returns the relevant regions for a given region. -func (r *RegionsInfo) GetRelevantRegions(region *RegionInfo) (origin *RegionInfo, overlaps []*regionItem) { +func (r *RegionsInfo) GetRelevantRegions(region *RegionInfo) (origin *RegionInfo, overlaps []*RegionInfo) { r.t.RLock() defer r.t.RUnlock() origin = r.getRegionLocked(region.GetID()) if origin == nil || !bytes.Equal(origin.GetStartKey(), region.GetStartKey()) || !bytes.Equal(origin.GetEndKey(), region.GetEndKey()) { - overlaps = r.tree.overlaps(®ionItem{RegionInfo: region}) + return origin, r.tree.overlaps(®ionItem{RegionInfo: region}) } return } -func check(region, origin *RegionInfo, overlaps []*regionItem) error { +func check(region, origin *RegionInfo, overlaps []*RegionInfo) error { for _, item := range overlaps { // PD ignores stale regions' heartbeats, unless it is recreated recently by unsafe recover operation. if region.GetRegionEpoch().GetVersion() < item.GetRegionEpoch().GetVersion() && !region.isRegionRecreated() { @@ -962,7 +1194,7 @@ func (r *RegionsInfo) SetRegion(region *RegionInfo) (*RegionInfo, []*RegionInfo, return r.setRegionLocked(region, false) } -func (r *RegionsInfo) setRegionLocked(region *RegionInfo, withOverlaps bool, ol ...*regionItem) (*RegionInfo, []*RegionInfo, bool) { +func (r *RegionsInfo) setRegionLocked(region *RegionInfo, withOverlaps bool, ol ...*RegionInfo) (*RegionInfo, []*RegionInfo, bool) { var ( item *regionItem // Pointer to the *RegionInfo of this ID. origin *RegionInfo @@ -1001,7 +1233,6 @@ func (r *RegionsInfo) setRegionLocked(region *RegionInfo, withOverlaps bool, ol item = ®ionItem{RegionInfo: region} r.regions[region.GetID()] = item } - var overlaps []*RegionInfo if rangeChanged { overlaps = r.tree.update(item, withOverlaps, ol...) @@ -1023,68 +1254,11 @@ func (r *RegionsInfo) UpdateSubTree(region, origin *RegionInfo, overlaps []*Regi r.st.Lock() defer r.st.Unlock() if origin != nil { - if rangeChanged || !origin.peersEqualTo(region) { - // If the range or peers have changed, the sub regionTree needs to be cleaned up. - // TODO: Improve performance by deleting only the different peers. - r.removeRegionFromSubTreeLocked(origin) - } else { - // The region tree and the subtree update is not atomic and the region tree is updated first. - // If there are two thread needs to update region tree, - // t1: thread-A update region tree - // t2: thread-B: update region tree again - // t3: thread-B: update subtree - // t4: thread-A: update region subtree - // to keep region tree consistent with subtree, we need to drop this update. - if tree, ok := r.subRegions[region.GetID()]; ok { - r.updateSubTreeStat(origin, region) - tree.RegionInfo = region - } + if r.preUpdateSubTreeLocked(rangeChanged, !origin.peersEqualTo(region), false, origin, region) { return } } - if rangeChanged { - for _, re := range overlaps { - r.removeRegionFromSubTreeLocked(re) - } - } - - item := ®ionItem{region} - r.subRegions[region.GetID()] = item - // It has been removed and all information needs to be updated again. - // Set peers then. - setPeer := func(peersMap map[uint64]*regionTree, storeID uint64, item *regionItem) { - store, ok := peersMap[storeID] - if !ok { - store = newRegionTree() - peersMap[storeID] = store - } - store.update(item, false) - } - - // Add to leaders and followers. - for _, peer := range region.GetVoters() { - storeID := peer.GetStoreId() - if peer.GetId() == region.leader.GetId() { - // Add leader peer to leaders. - setPeer(r.leaders, storeID, item) - } else { - // Add follower peer to followers. - setPeer(r.followers, storeID, item) - } - } - - setPeers := func(peersMap map[uint64]*regionTree, peers []*metapb.Peer) { - for _, peer := range peers { - storeID := peer.GetStoreId() - setPeer(peersMap, storeID, item) - } - } - // Add to learners. - setPeers(r.learners, region.GetLearners()) - // Add to witnesses. - setPeers(r.witnesses, region.GetWitnesses()) - // Add to PendingPeers - setPeers(r.pendingPeers, region.GetPendingPeers()) + r.updateSubTreeLocked(rangeChanged, overlaps, region) } func (r *RegionsInfo) updateSubTreeStat(origin *RegionInfo, region *RegionInfo) { @@ -1120,7 +1294,7 @@ func (r *RegionsInfo) TreeLen() int { } // GetOverlaps returns the regions which are overlapped with the specified region range. -func (r *RegionsInfo) GetOverlaps(region *RegionInfo) []*regionItem { +func (r *RegionsInfo) GetOverlaps(region *RegionInfo) []*RegionInfo { r.t.RLock() defer r.t.RUnlock() return r.tree.overlaps(®ionItem{RegionInfo: region}) @@ -1138,7 +1312,7 @@ func (r *RegionsInfo) RemoveRegion(region *RegionInfo) { // ResetRegionCache resets the regions info. func (r *RegionsInfo) ResetRegionCache() { r.t.Lock() - r.tree = newRegionTree() + r.tree = newRegionTreeWithCountRef() r.regions = make(map[uint64]*regionItem) r.t.Unlock() r.st.Lock() @@ -1148,6 +1322,7 @@ func (r *RegionsInfo) ResetRegionCache() { r.learners = make(map[uint64]*regionTree) r.witnesses = make(map[uint64]*regionTree) r.pendingPeers = make(map[uint64]*regionTree) + r.overlapTree = newRegionTreeWithCountRef() } // RemoveRegionFromSubTree removes RegionInfo from regionSubTrees @@ -1160,7 +1335,6 @@ func (r *RegionsInfo) RemoveRegionFromSubTree(region *RegionInfo) { // removeRegionFromSubTreeLocked removes RegionInfo from regionSubTrees func (r *RegionsInfo) removeRegionFromSubTreeLocked(region *RegionInfo) { - // Remove from leaders and followers. for _, peer := range region.GetMeta().GetPeers() { storeID := peer.GetStoreId() r.leaders[storeID].remove(region) @@ -1169,6 +1343,7 @@ func (r *RegionsInfo) removeRegionFromSubTreeLocked(region *RegionInfo) { r.witnesses[storeID].remove(region) r.pendingPeers[storeID].remove(region) } + r.overlapTree.remove(region) delete(r.subRegions, region.GetMeta().GetId()) } @@ -1284,6 +1459,60 @@ func (r *RegionsInfo) GetStoreRegions(storeID uint64) []*RegionInfo { return regions } +// SubTreeRegionType is the type of sub tree region. +type SubTreeRegionType string + +const ( + // AllInSubTree is all sub trees. + AllInSubTree SubTreeRegionType = "all" + // LeaderInSubTree is the leader sub tree. + LeaderInSubTree SubTreeRegionType = "leader" + // FollowerInSubTree is the follower sub tree. + FollowerInSubTree SubTreeRegionType = "follower" + // LearnerInSubTree is the learner sub tree. + LearnerInSubTree SubTreeRegionType = "learner" + // WitnessInSubTree is the witness sub tree. + WitnessInSubTree SubTreeRegionType = "witness" + // PendingPeerInSubTree is the pending peer sub tree. + PendingPeerInSubTree SubTreeRegionType = "pending" +) + +// GetStoreRegions gets all RegionInfo with a given storeID +func (r *RegionsInfo) GetStoreRegionsByTypeInSubTree(storeID uint64, typ SubTreeRegionType) ([]*RegionInfo, error) { + r.st.RLock() + var regions []*RegionInfo + switch typ { + case LeaderInSubTree: + if leaders, ok := r.leaders[storeID]; ok { + regions = leaders.scanRanges() + } + case FollowerInSubTree: + if followers, ok := r.followers[storeID]; ok { + regions = followers.scanRanges() + } + case LearnerInSubTree: + if learners, ok := r.learners[storeID]; ok { + regions = learners.scanRanges() + } + case WitnessInSubTree: + if witnesses, ok := r.witnesses[storeID]; ok { + regions = witnesses.scanRanges() + } + case PendingPeerInSubTree: + if pendingPeers, ok := r.pendingPeers[storeID]; ok { + regions = pendingPeers.scanRanges() + } + case AllInSubTree: + r.st.RUnlock() + return r.GetStoreRegions(storeID), nil + default: + return nil, errors.Errorf("unknown sub tree region type %v", typ) + } + + r.st.RUnlock() + return regions, nil +} + // GetStoreLeaderRegionSize get total size of store's leader regions func (r *RegionsInfo) GetStoreLeaderRegionSize(storeID uint64) int64 { r.st.RLock() @@ -1432,13 +1661,6 @@ func (r *RegionsInfo) GetStoreWitnessCount(storeID uint64) int { return r.witnesses[storeID].length() } -// RandPendingRegion randomly gets a store's region with a pending peer. -func (r *RegionsInfo) RandPendingRegion(storeID uint64, ranges []KeyRange) *RegionInfo { - r.st.RLock() - defer r.st.RUnlock() - return r.pendingPeers[storeID].RandomRegion(ranges) -} - // RandPendingRegions randomly gets a store's n regions with a pending peer. func (r *RegionsInfo) RandPendingRegions(storeID uint64, ranges []KeyRange) []*RegionInfo { r.st.RLock() @@ -1446,11 +1668,11 @@ func (r *RegionsInfo) RandPendingRegions(storeID uint64, ranges []KeyRange) []*R return r.pendingPeers[storeID].RandomRegions(randomRegionMaxRetry, ranges) } -// RandLeaderRegion randomly gets a store's leader region. -func (r *RegionsInfo) RandLeaderRegion(storeID uint64, ranges []KeyRange) *RegionInfo { +// This function is used for test only. +func (r *RegionsInfo) randLeaderRegion(storeID uint64, ranges []KeyRange) { r.st.RLock() defer r.st.RUnlock() - return r.leaders[storeID].RandomRegion(ranges) + _ = r.leaders[storeID].randomRegion(ranges) } // RandLeaderRegions randomly gets a store's n leader regions. @@ -1460,13 +1682,6 @@ func (r *RegionsInfo) RandLeaderRegions(storeID uint64, ranges []KeyRange) []*Re return r.leaders[storeID].RandomRegions(randomRegionMaxRetry, ranges) } -// RandFollowerRegion randomly gets a store's follower region. -func (r *RegionsInfo) RandFollowerRegion(storeID uint64, ranges []KeyRange) *RegionInfo { - r.st.RLock() - defer r.st.RUnlock() - return r.followers[storeID].RandomRegion(ranges) -} - // RandFollowerRegions randomly gets a store's n follower regions. func (r *RegionsInfo) RandFollowerRegions(storeID uint64, ranges []KeyRange) []*RegionInfo { r.st.RLock() @@ -1474,13 +1689,6 @@ func (r *RegionsInfo) RandFollowerRegions(storeID uint64, ranges []KeyRange) []* return r.followers[storeID].RandomRegions(randomRegionMaxRetry, ranges) } -// RandLearnerRegion randomly gets a store's learner region. -func (r *RegionsInfo) RandLearnerRegion(storeID uint64, ranges []KeyRange) *RegionInfo { - r.st.RLock() - defer r.st.RUnlock() - return r.learners[storeID].RandomRegion(ranges) -} - // RandLearnerRegions randomly gets a store's n learner regions. func (r *RegionsInfo) RandLearnerRegions(storeID uint64, ranges []KeyRange) []*RegionInfo { r.st.RLock() @@ -1488,13 +1696,6 @@ func (r *RegionsInfo) RandLearnerRegions(storeID uint64, ranges []KeyRange) []*R return r.learners[storeID].RandomRegions(randomRegionMaxRetry, ranges) } -// RandWitnessRegion randomly gets a store's witness region. -func (r *RegionsInfo) RandWitnessRegion(storeID uint64, ranges []KeyRange) *RegionInfo { - r.st.RLock() - defer r.st.RUnlock() - return r.witnesses[storeID].RandomRegion(ranges) -} - // RandWitnessRegions randomly gets a store's n witness regions. func (r *RegionsInfo) RandWitnessRegions(storeID uint64, ranges []KeyRange) []*RegionInfo { r.st.RLock() @@ -1653,6 +1854,42 @@ func (r *RegionsInfo) GetRegionSizeByRange(startKey, endKey []byte) int64 { return size } +// metrics default poll interval +const defaultPollInterval = 15 * time.Second + +// CollectWaitLockMetrics collects the metrics of waiting time for lock +func (r *RegionsInfo) CollectWaitLockMetrics() { + regionsLockTotalWaitTime := atomic.LoadInt64(&r.t.totalWaitTime) + regionsLockCount := atomic.LoadInt64(&r.t.lockCount) + + lastRegionsLockTotalWaitTime := atomic.LoadInt64(&r.t.lastTotalWaitTime) + lastsRegionsLockCount := atomic.LoadInt64(&r.t.lastLockCount) + + subRegionsLockTotalWaitTime := atomic.LoadInt64(&r.st.totalWaitTime) + subRegionsLockCount := atomic.LoadInt64(&r.st.lockCount) + + lastSubRegionsLockTotalWaitTime := atomic.LoadInt64(&r.st.lastTotalWaitTime) + lastSubRegionsLockCount := atomic.LoadInt64(&r.st.lastLockCount) + + // update last metrics + atomic.StoreInt64(&r.t.lastTotalWaitTime, regionsLockTotalWaitTime) + atomic.StoreInt64(&r.t.lastLockCount, regionsLockCount) + atomic.StoreInt64(&r.st.lastTotalWaitTime, subRegionsLockTotalWaitTime) + atomic.StoreInt64(&r.st.lastLockCount, subRegionsLockCount) + + // skip invalid situation like initial status + if lastRegionsLockTotalWaitTime == 0 || lastsRegionsLockCount == 0 || lastSubRegionsLockTotalWaitTime == 0 || lastSubRegionsLockCount == 0 || + regionsLockTotalWaitTime-lastRegionsLockTotalWaitTime < 0 || regionsLockTotalWaitTime-lastRegionsLockTotalWaitTime > int64(defaultPollInterval) || + subRegionsLockTotalWaitTime-lastSubRegionsLockTotalWaitTime < 0 || subRegionsLockTotalWaitTime-lastSubRegionsLockTotalWaitTime > int64(defaultPollInterval) { + return + } + + waitRegionsLockDurationSum.Add(time.Duration(regionsLockTotalWaitTime - lastRegionsLockTotalWaitTime).Seconds()) + waitRegionsLockCount.Add(float64(regionsLockCount - lastsRegionsLockCount)) + waitSubRegionsLockDurationSum.Add(time.Duration(subRegionsLockTotalWaitTime - lastSubRegionsLockTotalWaitTime).Seconds()) + waitSubRegionsLockCount.Add(float64(subRegionsLockCount - lastSubRegionsLockCount)) +} + // GetAdjacentRegions returns region's info that is adjacent with specific region func (r *RegionsInfo) GetAdjacentRegions(region *RegionInfo) (*RegionInfo, *RegionInfo) { r.t.RLock() @@ -1708,16 +1945,16 @@ func (r *RegionsInfo) GetAverageRegionSize() int64 { // ValidRegion is used to decide if the region is valid. func (r *RegionsInfo) ValidRegion(region *metapb.Region) error { startKey := region.GetStartKey() - currnetRegion := r.GetRegionByKey(startKey) - if currnetRegion == nil { + currentRegion := r.GetRegionByKey(startKey) + if currentRegion == nil { return errors.Errorf("region not found, request region: %v", logutil.RedactStringer(RegionToHexMeta(region))) } // If the request epoch is less than current region epoch, then returns an error. regionEpoch := region.GetRegionEpoch() - currnetEpoch := currnetRegion.GetMeta().GetRegionEpoch() - if regionEpoch.GetVersion() < currnetEpoch.GetVersion() || - regionEpoch.GetConfVer() < currnetEpoch.GetConfVer() { - return errors.Errorf("invalid region epoch, request: %v, current: %v", regionEpoch, currnetEpoch) + currentEpoch := currentRegion.GetMeta().GetRegionEpoch() + if regionEpoch.GetVersion() < currentEpoch.GetVersion() || + regionEpoch.GetConfVer() < currentEpoch.GetConfVer() { + return errors.Errorf("invalid region epoch, request: %v, current: %v", regionEpoch, currentEpoch) } return nil } @@ -1806,19 +2043,19 @@ func EncodeToString(src []byte) []byte { return dst } -// HexRegionKey converts region key to hex format. Used for formating region in +// HexRegionKey converts region key to hex format. Used for formatting region in // logs. func HexRegionKey(key []byte) []byte { return ToUpperASCIIInplace(EncodeToString(key)) } -// HexRegionKeyStr converts region key to hex format. Used for formating region in +// HexRegionKeyStr converts region key to hex format. Used for formatting region in // logs. func HexRegionKeyStr(key []byte) string { return String(HexRegionKey(key)) } -// RegionToHexMeta converts a region meta's keys to hex format. Used for formating +// RegionToHexMeta converts a region meta's keys to hex format. Used for formatting // region in logs. func RegionToHexMeta(meta *metapb.Region) HexRegionMeta { if meta == nil { @@ -1827,7 +2064,7 @@ func RegionToHexMeta(meta *metapb.Region) HexRegionMeta { return HexRegionMeta{meta} } -// HexRegionMeta is a region meta in the hex format. Used for formating region in logs. +// HexRegionMeta is a region meta in the hex format. Used for formatting region in logs. type HexRegionMeta struct { *metapb.Region } @@ -1839,7 +2076,7 @@ func (h HexRegionMeta) String() string { return strings.TrimSpace(proto.CompactTextString(meta)) } -// RegionsToHexMeta converts regions' meta keys to hex format. Used for formating +// RegionsToHexMeta converts regions' meta keys to hex format. Used for formatting // region in logs. func RegionsToHexMeta(regions []*metapb.Region) HexRegionsMeta { hexRegionMetas := make([]*metapb.Region, len(regions)) @@ -1847,7 +2084,7 @@ func RegionsToHexMeta(regions []*metapb.Region) HexRegionsMeta { return hexRegionMetas } -// HexRegionsMeta is a slice of regions' meta in the hex format. Used for formating +// HexRegionsMeta is a slice of regions' meta in the hex format. Used for formatting // region in logs. type HexRegionsMeta []*metapb.Region diff --git a/pkg/core/region_option.go b/pkg/core/region_option.go index 36db7cf3460..e973a1e7c1f 100644 --- a/pkg/core/region_option.go +++ b/pkg/core/region_option.go @@ -248,18 +248,23 @@ func SetReadKeys(v uint64) RegionCreateOption { // SetReadQuery sets the read query for the region, only used for unit test. func SetReadQuery(v uint64) RegionCreateOption { - q := RandomKindReadQuery(v) - return SetQueryStats(q) + return func(region *RegionInfo) { + resetReadQuery(region.queryStats) + region.queryStats = mergeQueryStat(region.queryStats, RandomKindReadQuery(v)) + } } // SetWrittenQuery sets the write query for the region, only used for unit test. func SetWrittenQuery(v uint64) RegionCreateOption { - q := RandomKindWriteQuery(v) - return SetQueryStats(q) + return func(region *RegionInfo) { + resetWriteQuery(region.queryStats) + region.queryStats = mergeQueryStat(region.queryStats, RandomKindWriteQuery(v)) + } } // SetQueryStats sets the query stats for the region, it will cover previous statistic. // This func is only used for unit test. +// It will cover previous statistic. func SetQueryStats(v *pdpb.QueryStats) RegionCreateOption { return func(region *RegionInfo) { region.queryStats = v @@ -268,6 +273,7 @@ func SetQueryStats(v *pdpb.QueryStats) RegionCreateOption { // AddQueryStats sets the query stats for the region, it will preserve previous statistic. // This func is only used for test and simulator. +// It will preserve previous statistic. func AddQueryStats(v *pdpb.QueryStats) RegionCreateOption { return func(region *RegionInfo) { q := mergeQueryStat(region.queryStats, v) @@ -469,3 +475,25 @@ func mergeQueryStat(q1, q2 *pdpb.QueryStats) *pdpb.QueryStats { q2.Rollback += q1.Rollback return q2 } + +func resetReadQuery(q *pdpb.QueryStats) { + if q == nil { + return + } + q.Get = 0 + q.Scan = 0 + q.Coprocessor = 0 +} + +func resetWriteQuery(q *pdpb.QueryStats) { + if q == nil { + return + } + q.Put = 0 + q.Delete = 0 + q.DeleteRange = 0 + q.AcquirePessimisticLock = 0 + q.Rollback = 0 + q.Prewrite = 0 + q.Commit = 0 +} diff --git a/pkg/core/region_test.go b/pkg/core/region_test.go index 508e7aa59aa..ce59c0075d0 100644 --- a/pkg/core/region_test.go +++ b/pkg/core/region_test.go @@ -156,18 +156,19 @@ func TestSortedEqual(t *testing.T) { re.Equal(testCase.isEqual, SortedPeersEqual(regionA.GetVoters(), regionB.GetVoters())) } + flowRoundDivisor := 3 // test RegionFromHeartbeat for _, testCase := range testCases { regionA := RegionFromHeartbeat(&pdpb.RegionHeartbeatRequest{ Region: &metapb.Region{Id: 100, Peers: pickPeers(testCase.idsA)}, DownPeers: pickPeerStats(testCase.idsA), PendingPeers: pickPeers(testCase.idsA), - }) + }, flowRoundDivisor) regionB := RegionFromHeartbeat(&pdpb.RegionHeartbeatRequest{ Region: &metapb.Region{Id: 100, Peers: pickPeers(testCase.idsB)}, DownPeers: pickPeerStats(testCase.idsB), PendingPeers: pickPeers(testCase.idsB), - }) + }, flowRoundDivisor) re.Equal(testCase.isEqual, SortedPeersEqual(regionA.GetVoters(), regionB.GetVoters())) re.Equal(testCase.isEqual, SortedPeersEqual(regionA.GetVoters(), regionB.GetVoters())) re.Equal(testCase.isEqual, SortedPeersEqual(regionA.GetPendingPeers(), regionB.GetPendingPeers())) @@ -363,7 +364,7 @@ func TestNeedSync(t *testing.T) { for _, testCase := range testCases { regionA := region.Clone(testCase.optionsA...) regionB := region.Clone(testCase.optionsB...) - _, _, _, needSync := RegionGuide(regionA, regionB) + _, _, needSync, _ := RegionGuide(ContextTODO(), regionA, regionB) re.Equal(testCase.needSync, needSync) } } @@ -459,9 +460,9 @@ func TestSetRegionConcurrence(t *testing.T) { regions := NewRegionsInfo() region := NewTestRegionInfo(1, 1, []byte("a"), []byte("b")) go func() { - regions.AtomicCheckAndPutRegion(region) + regions.AtomicCheckAndPutRegion(ContextTODO(), region) }() - regions.AtomicCheckAndPutRegion(region) + regions.AtomicCheckAndPutRegion(ContextTODO(), region) re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/core/UpdateSubTree")) } @@ -642,21 +643,64 @@ func BenchmarkUpdateBuckets(b *testing.B) { } func BenchmarkRandomRegion(b *testing.B) { - regions := NewRegionsInfo() - for i := 0; i < 5000000; i++ { - peer := &metapb.Peer{StoreId: 1, Id: uint64(i + 1)} - region := NewRegionInfo(&metapb.Region{ - Id: uint64(i + 1), - Peers: []*metapb.Peer{peer}, - StartKey: []byte(fmt.Sprintf("%20d", i)), - EndKey: []byte(fmt.Sprintf("%20d", i+1)), - }, peer) - origin, overlaps, rangeChanged := regions.SetRegion(region) - regions.UpdateSubTree(region, origin, overlaps, rangeChanged) - } - b.ResetTimer() - for i := 0; i < b.N; i++ { - regions.RandLeaderRegion(1, nil) + for _, size := range []int{10, 100, 1000, 10000, 100000, 1000000, 10000000} { + regions := NewRegionsInfo() + for i := 0; i < size; i++ { + peer := &metapb.Peer{StoreId: 1, Id: uint64(i + 1)} + region := NewRegionInfo(&metapb.Region{ + Id: uint64(i + 1), + Peers: []*metapb.Peer{peer}, + StartKey: []byte(fmt.Sprintf("%20d", i)), + EndKey: []byte(fmt.Sprintf("%20d", i+1)), + }, peer) + origin, overlaps, rangeChanged := regions.SetRegion(region) + regions.UpdateSubTree(region, origin, overlaps, rangeChanged) + } + b.Run(fmt.Sprintf("random region whole range with size %d", size), func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + regions.randLeaderRegion(1, nil) + } + }) + b.Run(fmt.Sprintf("random regions whole range with size %d", size), func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + regions.RandLeaderRegions(1, nil) + } + }) + ranges := []KeyRange{ + NewKeyRange(fmt.Sprintf("%20d", size/4), fmt.Sprintf("%20d", size*3/4)), + } + b.Run(fmt.Sprintf("random region single range with size %d", size), func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + regions.randLeaderRegion(1, ranges) + } + }) + b.Run(fmt.Sprintf("random regions single range with size %d", size), func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + regions.RandLeaderRegions(1, ranges) + } + }) + ranges = []KeyRange{ + NewKeyRange(fmt.Sprintf("%20d", 0), fmt.Sprintf("%20d", size/4)), + NewKeyRange(fmt.Sprintf("%20d", size/4), fmt.Sprintf("%20d", size/2)), + NewKeyRange(fmt.Sprintf("%20d", size/2), fmt.Sprintf("%20d", size*3/4)), + NewKeyRange(fmt.Sprintf("%20d", size*3/4), fmt.Sprintf("%20d", size)), + } + b.Run(fmt.Sprintf("random region multiple ranges with size %d", size), func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + regions.randLeaderRegion(1, ranges) + } + }) + b.Run(fmt.Sprintf("random regions multiple ranges with size %d", size), func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + regions.RandLeaderRegions(1, ranges) + } + }) } } @@ -778,26 +822,24 @@ func BenchmarkRandomSetRegionWithGetRegionSizeByRangeParallel(b *testing.B) { ) } -const keyLength = 100 - -func randomBytes(n int) []byte { - bytes := make([]byte, n) - _, err := rand.Read(bytes) - if err != nil { - panic(err) - } - return bytes -} +const ( + peerNum = 3 + storeNum = 10 + keyLength = 100 +) -func newRegionInfoID(idAllocator id.Allocator) *RegionInfo { +func newRegionInfoIDRandom(idAllocator id.Allocator) *RegionInfo { var ( peers []*metapb.Peer leader *metapb.Peer ) - for i := 0; i < 3; i++ { + // Randomly select a peer as the leader. + leaderIdx := mrand.Intn(peerNum) + for i := 0; i < peerNum; i++ { id, _ := idAllocator.Alloc() - p := &metapb.Peer{Id: id, StoreId: id} - if i == 0 { + // Randomly distribute the peers to different stores. + p := &metapb.Peer{Id: id, StoreId: uint64(mrand.Intn(storeNum) + 1)} + if i == leaderIdx { leader = p } peers = append(peers, p) @@ -811,16 +853,24 @@ func newRegionInfoID(idAllocator id.Allocator) *RegionInfo { Peers: peers, }, leader, + SetApproximateSize(10), + SetApproximateKeys(10), ) } +func randomBytes(n int) []byte { + bytes := make([]byte, n) + _, err := rand.Read(bytes) + if err != nil { + panic(err) + } + return bytes +} + func BenchmarkAddRegion(b *testing.B) { regions := NewRegionsInfo() idAllocator := mockid.NewIDAllocator() - var items []*RegionInfo - for i := 0; i < 10000000; i++ { - items = append(items, newRegionInfoID(idAllocator)) - } + items := generateRegionItems(idAllocator, 10000000) b.ResetTimer() for i := 0; i < b.N; i++ { origin, overlaps, rangeChanged := regions.SetRegion(items[i]) @@ -828,6 +878,54 @@ func BenchmarkAddRegion(b *testing.B) { } } +func BenchmarkUpdateSubTreeOrderInsensitive(b *testing.B) { + idAllocator := mockid.NewIDAllocator() + for _, size := range []int{10, 100, 1000, 10000, 100000, 1000000, 10000000} { + regions := NewRegionsInfo() + items := generateRegionItems(idAllocator, size) + // Update the subtrees from an empty `*RegionsInfo`. + b.Run(fmt.Sprintf("from empty with size %d", size), func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + for idx := range items { + regions.UpdateSubTreeOrderInsensitive(items[idx]) + } + } + }) + + // Update the subtrees from a non-empty `*RegionsInfo` with the same regions, + // which means the regions are completely non-overlapped. + b.Run(fmt.Sprintf("from non-overlapped regions with size %d", size), func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + for idx := range items { + regions.UpdateSubTreeOrderInsensitive(items[idx]) + } + } + }) + + // Update the subtrees from a non-empty `*RegionsInfo` with different regions, + // which means the regions are most likely overlapped. + b.Run(fmt.Sprintf("from overlapped regions with size %d", size), func(b *testing.B) { + items = generateRegionItems(idAllocator, size) + b.ResetTimer() + for i := 0; i < b.N; i++ { + for idx := range items { + regions.UpdateSubTreeOrderInsensitive(items[idx]) + } + } + }) + } +} + +func generateRegionItems(idAllocator *mockid.IDAllocator, size int) []*RegionInfo { + items := make([]*RegionInfo, size) + for i := 0; i < size; i++ { + items[i] = newRegionInfoIDRandom(idAllocator) + } + return items +} + func BenchmarkRegionFromHeartbeat(b *testing.B) { peers := make([]*metapb.Peer, 0, 3) for i := uint64(1); i <= 3; i++ { @@ -853,8 +951,193 @@ func BenchmarkRegionFromHeartbeat(b *testing.B) { PendingPeers: []*metapb.Peer{peers[1]}, DownPeers: []*pdpb.PeerStats{{Peer: peers[2], DownSeconds: 100}}, } + flowRoundDivisor := 3 b.ResetTimer() for i := 0; i < b.N; i++ { - RegionFromHeartbeat(regionReq) + RegionFromHeartbeat(regionReq, flowRoundDivisor) } } + +func TestUpdateRegionEquivalence(t *testing.T) { + re := require.New(t) + regionsOld := NewRegionsInfo() + regionsNew := NewRegionsInfo() + storeNums := 5 + items := generateTestRegions(1000, storeNums) + + updateRegion := func(item *RegionInfo) { + // old way + ctx := ContextTODO() + regionsOld.AtomicCheckAndPutRegion(ctx, item) + // new way + newItem := item.Clone() + ctx = ContextTODO() + regionsNew.CheckAndPutRootTree(ctx, newItem) + regionsNew.CheckAndPutSubTree(newItem) + } + checksEquivalence := func() { + re.Equal(regionsOld.GetRegionCount([]byte(""), []byte("")), regionsNew.GetRegionCount([]byte(""), []byte(""))) + re.Equal(regionsOld.GetRegionSizeByRange([]byte(""), []byte("")), regionsNew.GetRegionSizeByRange([]byte(""), []byte(""))) + checkRegions(re, regionsOld) + checkRegions(re, regionsNew) + + for _, r := range regionsOld.GetRegions() { + re.Equal(int32(2), r.GetRef(), fmt.Sprintf("inconsistent region %d", r.GetID())) + } + for _, r := range regionsNew.GetRegions() { + re.Equal(int32(2), r.GetRef(), fmt.Sprintf("inconsistent region %d", r.GetID())) + } + + for i := 1; i <= storeNums; i++ { + re.Equal(regionsOld.GetStoreRegionCount(uint64(i)), regionsNew.GetStoreRegionCount(uint64(i))) + re.Equal(regionsOld.GetStoreLeaderCount(uint64(i)), regionsNew.GetStoreLeaderCount(uint64(i))) + re.Equal(regionsOld.GetStorePendingPeerCount(uint64(i)), regionsNew.GetStorePendingPeerCount(uint64(i))) + re.Equal(regionsOld.GetStoreLearnerRegionSize(uint64(i)), regionsNew.GetStoreLearnerRegionSize(uint64(i))) + re.Equal(regionsOld.GetStoreRegionSize(uint64(i)), regionsNew.GetStoreRegionSize(uint64(i))) + re.Equal(regionsOld.GetStoreLeaderRegionSize(uint64(i)), regionsNew.GetStoreLeaderRegionSize(uint64(i))) + re.Equal(regionsOld.GetStoreFollowerRegionSize(uint64(i)), regionsNew.GetStoreFollowerRegionSize(uint64(i))) + } + } + + // Add a region. + for _, item := range items { + updateRegion(item) + } + checksEquivalence() + + // Merge regions. + itemA, itemB := items[10], items[11] + itemMergedAB := itemA.Clone(WithEndKey(itemB.GetEndKey()), WithIncVersion()) + updateRegion(itemMergedAB) + checksEquivalence() + + // Split + itemA = itemA.Clone(WithIncVersion(), WithIncVersion()) + itemB = itemB.Clone(WithIncVersion(), WithIncVersion()) + updateRegion(itemA) + updateRegion(itemB) + checksEquivalence() +} + +func generateTestRegions(count int, storeNum int) []*RegionInfo { + var items []*RegionInfo + for i := 0; i < count; i++ { + peer1 := &metapb.Peer{StoreId: uint64(i%storeNum + 1), Id: uint64(i*storeNum + 1)} + peer2 := &metapb.Peer{StoreId: uint64((i+1)%storeNum + 1), Id: uint64(i*storeNum + 2)} + peer3 := &metapb.Peer{StoreId: uint64((i+2)%storeNum + 1), Id: uint64(i*storeNum + 3)} + if i%3 == 0 { + peer2.IsWitness = true + } + region := NewRegionInfo(&metapb.Region{ + Id: uint64(i + 1), + Peers: []*metapb.Peer{peer1, peer2, peer3}, + StartKey: []byte(fmt.Sprintf("%20d", i*10)), + EndKey: []byte(fmt.Sprintf("%20d", (i+1)*10)), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 100, Version: 100}, + }, + peer1, + SetApproximateKeys(10), + SetApproximateSize(10)) + items = append(items, region) + } + return items +} + +func TestUpdateRegionEventualConsistency(t *testing.T) { + re := require.New(t) + regionsOld := NewRegionsInfo() + regionsNew := NewRegionsInfo() + i := 1 + storeNum := 5 + peer1 := &metapb.Peer{StoreId: uint64(i%storeNum + 1), Id: uint64(i*storeNum + 1)} + peer2 := &metapb.Peer{StoreId: uint64((i+1)%storeNum + 1), Id: uint64(i*storeNum + 2)} + peer3 := &metapb.Peer{StoreId: uint64((i+2)%storeNum + 1), Id: uint64(i*storeNum + 3)} + item := NewRegionInfo(&metapb.Region{ + Id: uint64(i + 1), + Peers: []*metapb.Peer{peer1, peer2, peer3}, + StartKey: []byte(fmt.Sprintf("%20d", i*10)), + EndKey: []byte(fmt.Sprintf("%20d", (i+1)*10)), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 100, Version: 100}, + }, + peer1, + SetApproximateKeys(10), + SetApproximateSize(10), + ) + regionItemA := item + regionPendingItemA := regionItemA.Clone(WithPendingPeers([]*metapb.Peer{peer3})) + + regionItemB := regionItemA.Clone() + regionPendingItemB := regionItemB.Clone(WithPendingPeers([]*metapb.Peer{peer3})) + regionGuide := GenerateRegionGuideFunc(true) + + // Old way + { + ctx := ContextTODO() + regionsOld.AtomicCheckAndPutRegion(ctx, regionPendingItemA) + re.Equal(int32(2), regionPendingItemA.GetRef()) + // check new item + saveKV, saveCache, needSync, _ := regionGuide(ctx, regionItemA, regionPendingItemA) + re.True(needSync) + re.True(saveCache) + re.False(saveKV) + // update cache + regionsOld.AtomicCheckAndPutRegion(ctx, regionItemA) + re.Equal(int32(2), regionItemA.GetRef()) + } + + // New way + { + // root tree part in order, and updated in order, updated regionPendingItemB first, then regionItemB + ctx := ContextTODO() + regionsNew.CheckAndPutRootTree(ctx, regionPendingItemB) + re.Equal(int32(1), regionPendingItemB.GetRef()) + ctx = ContextTODO() + regionsNew.CheckAndPutRootTree(ctx, regionItemB) + re.Equal(int32(1), regionItemB.GetRef()) + re.Equal(int32(0), regionPendingItemB.GetRef()) + + // subtree part missing order, updated regionItemB first, then regionPendingItemB + regionsNew.CheckAndPutSubTree(regionItemB) + re.Equal(int32(2), regionItemB.GetRef()) + re.Equal(int32(0), regionPendingItemB.GetRef()) + regionsNew.UpdateSubTreeOrderInsensitive(regionPendingItemB) + re.Equal(int32(1), regionItemB.GetRef()) + re.Equal(int32(1), regionPendingItemB.GetRef()) + + // heartbeat again, no need updates root tree + saveKV, saveCache, needSync, _ := regionGuide(ctx, regionItemB, regionItemB) + re.False(needSync) + re.False(saveCache) + re.False(saveKV) + + // but need update sub tree again + item := regionsNew.GetRegion(regionItemB.GetID()) + re.Equal(int32(1), item.GetRef()) + regionsNew.CheckAndPutSubTree(item) + re.Equal(int32(2), item.GetRef()) + } +} + +func TestCheckAndPutSubTree(t *testing.T) { + re := require.New(t) + regions := NewRegionsInfo() + region := NewTestRegionInfo(1, 1, []byte("a"), []byte("b")) + regions.CheckAndPutSubTree(region) + // should failed to put because the root tree is missing + re.Equal(0, regions.tree.length()) +} + +func TestCntRefAfterResetRegionCache(t *testing.T) { + re := require.New(t) + regions := NewRegionsInfo() + // Put the region first. + region := NewTestRegionInfo(1, 1, []byte("a"), []byte("b")) + regions.CheckAndPutRegion(region) + re.Equal(int32(2), region.GetRef()) + regions.ResetRegionCache() + // Put the region after reset. + region = NewTestRegionInfo(1, 1, []byte("a"), []byte("b")) + re.Zero(region.GetRef()) + regions.CheckAndPutRegion(region) + re.Equal(int32(2), region.GetRef()) +} diff --git a/pkg/core/region_tree.go b/pkg/core/region_tree.go index 333e1730ec8..9a148eeed18 100644 --- a/pkg/core/region_tree.go +++ b/pkg/core/region_tree.go @@ -35,6 +35,11 @@ func (r *regionItem) GetStartKey() []byte { return r.meta.StartKey } +// GetID returns the ID of the region. +func (r *regionItem) GetID() uint64 { + return r.meta.GetId() +} + // GetEndKey returns the end key of the region. func (r *regionItem) GetEndKey() []byte { return r.meta.EndKey @@ -47,11 +52,6 @@ func (r *regionItem) Less(other *regionItem) bool { return bytes.Compare(left, right) < 0 } -func (r *regionItem) Contains(key []byte) bool { - start, end := r.GetStartKey(), r.GetEndKey() - return bytes.Compare(key, start) >= 0 && (len(end) == 0 || bytes.Compare(key, end) < 0) -} - const ( defaultBTreeDegree = 64 ) @@ -64,6 +64,8 @@ type regionTree struct { totalWriteKeysRate float64 // count the number of regions that not loaded from storage. notFromStorageRegionsCnt int + // count reference of RegionInfo + countRef bool } func newRegionTree() *regionTree { @@ -76,6 +78,17 @@ func newRegionTree() *regionTree { } } +func newRegionTreeWithCountRef() *regionTree { + return ®ionTree{ + tree: btree.NewG[*regionItem](defaultBTreeDegree), + totalSize: 0, + totalWriteBytesRate: 0, + totalWriteKeysRate: 0, + notFromStorageRegionsCnt: 0, + countRef: true, + } +} + func (t *regionTree) length() int { if t == nil { return 0 @@ -91,7 +104,7 @@ func (t *regionTree) notFromStorageRegionsCount() int { } // GetOverlaps returns the range items that has some intersections with the given items. -func (t *regionTree) overlaps(item *regionItem) []*regionItem { +func (t *regionTree) overlaps(item *regionItem) []*RegionInfo { // note that Find() gets the last item that is less or equal than the item. // in the case: |_______a_______|_____b_____|___c___| // new item is |______d______| @@ -103,12 +116,12 @@ func (t *regionTree) overlaps(item *regionItem) []*regionItem { result = item } endKey := item.GetEndKey() - var overlaps []*regionItem + var overlaps []*RegionInfo t.tree.AscendGreaterOrEqual(result, func(i *regionItem) bool { if len(endKey) > 0 && bytes.Compare(endKey, i.GetStartKey()) <= 0 { return false } - overlaps = append(overlaps, i) + overlaps = append(overlaps, i.RegionInfo) return true }) return overlaps @@ -117,7 +130,7 @@ func (t *regionTree) overlaps(item *regionItem) []*regionItem { // update updates the tree with the region. // It finds and deletes all the overlapped regions first, and then // insert the region. -func (t *regionTree) update(item *regionItem, withOverlaps bool, overlaps ...*regionItem) []*RegionInfo { +func (t *regionTree) update(item *regionItem, withOverlaps bool, overlaps ...*RegionInfo) []*RegionInfo { region := item.RegionInfo t.totalSize += region.approximateSize regionWriteBytesRate, regionWriteKeysRate := region.GetWriteRate() @@ -132,12 +145,15 @@ func (t *regionTree) update(item *regionItem, withOverlaps bool, overlaps ...*re } for _, old := range overlaps { - t.tree.Delete(old) + t.tree.Delete(®ionItem{RegionInfo: old}) } t.tree.ReplaceOrInsert(item) + if t.countRef { + item.RegionInfo.IncRef() + } result := make([]*RegionInfo, len(overlaps)) for i, overlap := range overlaps { - old := overlap.RegionInfo + old := overlap result[i] = old log.Debug("overlapping region", zap.Uint64("region-id", old.GetID()), @@ -150,12 +166,15 @@ func (t *regionTree) update(item *regionItem, withOverlaps bool, overlaps ...*re if !old.LoadedFromStorage() { t.notFromStorageRegionsCnt-- } + if t.countRef { + old.DecRef() + } } return result } -// updateStat is used to update statistics when regionItem.RegionInfo is directly replaced. +// updateStat is used to update statistics when RegionInfo is directly replaced. func (t *regionTree) updateStat(origin *RegionInfo, region *RegionInfo) { t.totalSize += region.approximateSize regionWriteBytesRate, regionWriteKeysRate := region.GetWriteRate() @@ -175,6 +194,10 @@ func (t *regionTree) updateStat(origin *RegionInfo, region *RegionInfo) { if !origin.LoadedFromStorage() && region.LoadedFromStorage() { t.notFromStorageRegionsCnt-- } + if t.countRef { + origin.DecRef() + region.IncRef() + } } // remove removes a region if the region is in the tree. @@ -194,6 +217,9 @@ func (t *regionTree) remove(region *RegionInfo) { regionWriteBytesRate, regionWriteKeysRate := result.GetWriteRate() t.totalWriteBytesRate -= regionWriteBytesRate t.totalWriteKeysRate -= regionWriteKeysRate + if t.countRef { + result.RegionInfo.DecRef() + } if !region.LoadedFromStorage() { t.notFromStorageRegionsCnt-- } @@ -246,7 +272,7 @@ func (t *regionTree) find(item *regionItem) *regionItem { // until f return false func (t *regionTree) scanRange(startKey []byte, f func(*RegionInfo) bool) { region := &RegionInfo{meta: &metapb.Region{StartKey: startKey}} - // find if there is a region with key range [s, d), s < startKey < d + // find if there is a region with key range [s, d), s <= startKey < d fn := func(item *regionItem) bool { r := item return f(r.RegionInfo) @@ -297,62 +323,115 @@ func (t *regionTree) getAdjacentItem(item *regionItem) (prev *regionItem, next * return prev, next } -// RandomRegion is used to get a random region within ranges. -func (t *regionTree) RandomRegion(ranges []KeyRange) *RegionInfo { - if t.length() == 0 { +func (t *regionTree) randomRegion(ranges []KeyRange) *RegionInfo { + regions := t.RandomRegions(1, ranges) + if len(regions) == 0 { return nil } + return regions[0] +} - if len(ranges) == 0 { - ranges = []KeyRange{NewKeyRange("", "")} +// RandomRegions get n random regions within the given ranges. +func (t *regionTree) RandomRegions(n int, ranges []KeyRange) []*RegionInfo { + treeLen := t.length() + if treeLen == 0 || n < 1 { + return nil } - - for _, i := range rand.Perm(len(ranges)) { - var endIndex int - startKey, endKey := ranges[i].StartKey, ranges[i].EndKey - startRegion, startIndex := t.tree.GetWithIndex(®ionItem{RegionInfo: &RegionInfo{meta: &metapb.Region{StartKey: startKey}}}) - - if len(endKey) != 0 { - _, endIndex = t.tree.GetWithIndex(®ionItem{RegionInfo: &RegionInfo{meta: &metapb.Region{StartKey: endKey}}}) - } else { - endIndex = t.tree.Len() - } - - // Consider that the item in the tree may not be continuous, - // we need to check if the previous item contains the key. - if startIndex != 0 && startRegion == nil && t.tree.GetAt(startIndex-1).Contains(startKey) { - startIndex-- + // Pre-allocate the variables to reduce the temporary memory allocations. + var ( + startKey, endKey []byte + // By default, we set the `startIndex` and `endIndex` to the whole tree range. + startIndex, endIndex = 0, treeLen + randIndex int + startItem *regionItem + pivotItem = ®ionItem{&RegionInfo{meta: &metapb.Region{}}} + region *RegionInfo + regions = make([]*RegionInfo, 0, n) + rangeLen, curLen = len(ranges), len(regions) + // setStartEndIndices is a helper function to set `startIndex` and `endIndex` + // according to the `startKey` and `endKey` and check if the range is invalid + // to skip the iteration. + // TODO: maybe we could cache the `startIndex` and `endIndex` for each range. + setAndCheckStartEndIndices = func() (skip bool) { + startKeyLen, endKeyLen := len(startKey), len(endKey) + if startKeyLen == 0 && endKeyLen == 0 { + startIndex, endIndex = 0, treeLen + return false + } + pivotItem.meta.StartKey = startKey + startItem, startIndex = t.tree.GetWithIndex(pivotItem) + if endKeyLen > 0 { + pivotItem.meta.StartKey = endKey + _, endIndex = t.tree.GetWithIndex(pivotItem) + } else { + endIndex = treeLen + } + // Consider that the item in the tree may not be continuous, + // we need to check if the previous item contains the key. + if startIndex != 0 && startItem == nil { + region = t.tree.GetAt(startIndex - 1).RegionInfo + if region.Contains(startKey) { + startIndex-- + } + } + // Check whether the `startIndex` and `endIndex` are valid. + if endIndex <= startIndex { + if endKeyLen > 0 && bytes.Compare(startKey, endKey) > 0 { + log.Error("wrong range keys", + logutil.ZapRedactString("start-key", string(HexRegionKey(startKey))), + logutil.ZapRedactString("end-key", string(HexRegionKey(endKey))), + errs.ZapError(errs.ErrWrongRangeKeys)) + } + return true + } + return false } - - if endIndex <= startIndex { - if len(endKey) > 0 && bytes.Compare(startKey, endKey) > 0 { - log.Error("wrong range keys", - logutil.ZapRedactString("start-key", string(HexRegionKey(startKey))), - logutil.ZapRedactString("end-key", string(HexRegionKey(endKey))), - errs.ZapError(errs.ErrWrongRangeKeys)) + ) + // This is a fast path to reduce the unnecessary iterations when we only have one range. + if rangeLen <= 1 { + if rangeLen == 1 { + startKey, endKey = ranges[0].StartKey, ranges[0].EndKey + if setAndCheckStartEndIndices() { + return regions } - continue } - index := rand.Intn(endIndex-startIndex) + startIndex - region := t.tree.GetAt(index).RegionInfo - if region.isInvolved(startKey, endKey) { - return region + for curLen < n { + randIndex = rand.Intn(endIndex-startIndex) + startIndex + region = t.tree.GetAt(randIndex).RegionInfo + if region.isInvolved(startKey, endKey) { + regions = append(regions, region) + curLen++ + } + // No region found, directly break to avoid infinite loop. + if curLen == 0 { + break + } } + return regions } + // When there are multiple ranges provided, + // keep retrying until we get enough regions. + for curLen < n { + // Shuffle the ranges to increase the randomness. + for _, i := range rand.Perm(rangeLen) { + startKey, endKey = ranges[i].StartKey, ranges[i].EndKey + if setAndCheckStartEndIndices() { + continue + } - return nil -} - -func (t *regionTree) RandomRegions(n int, ranges []KeyRange) []*RegionInfo { - if t.length() == 0 { - return nil - } - - regions := make([]*RegionInfo, 0, n) - - for i := 0; i < n; i++ { - if region := t.RandomRegion(ranges); region != nil { - regions = append(regions, region) + randIndex = rand.Intn(endIndex-startIndex) + startIndex + region = t.tree.GetAt(randIndex).RegionInfo + if region.isInvolved(startKey, endKey) { + regions = append(regions, region) + curLen++ + if curLen == n { + return regions + } + } + } + // No region found, directly break to avoid infinite loop. + if curLen == 0 { + break } } return regions diff --git a/pkg/core/region_tree_test.go b/pkg/core/region_tree_test.go index 4e002fb8157..2726b4fdab5 100644 --- a/pkg/core/region_tree_test.go +++ b/pkg/core/region_tree_test.go @@ -158,6 +158,9 @@ func TestRegionTree(t *testing.T) { updateNewItem(tree, regionA) updateNewItem(tree, regionC) + re.Nil(tree.overlaps(newRegionItem([]byte("b"), []byte("c")))) + re.Equal(regionC, tree.overlaps(newRegionItem([]byte("c"), []byte("d")))[0]) + re.Equal(regionC, tree.overlaps(newRegionItem([]byte("a"), []byte("cc")))[1]) re.Nil(tree.search([]byte{})) re.Equal(regionA, tree.search([]byte("a"))) re.Nil(tree.search([]byte("b"))) @@ -271,13 +274,19 @@ func TestRegionTreeSplitAndMerge(t *testing.T) { func TestRandomRegion(t *testing.T) { re := require.New(t) tree := newRegionTree() - r := tree.RandomRegion(nil) + r := tree.randomRegion(nil) re.Nil(r) regionA := NewTestRegionInfo(1, 1, []byte(""), []byte("g")) updateNewItem(tree, regionA) - ra := tree.RandomRegion([]KeyRange{NewKeyRange("", "")}) + ra := tree.randomRegion([]KeyRange{NewKeyRange("", "")}) re.Equal(regionA, ra) + ra = tree.randomRegion(nil) + re.Equal(regionA, ra) + ra2 := tree.RandomRegions(2, []KeyRange{NewKeyRange("", "")}) + re.Equal([]*RegionInfo{regionA, regionA}, ra2) + ra2 = tree.RandomRegions(2, nil) + re.Equal([]*RegionInfo{regionA, regionA}, ra2) regionB := NewTestRegionInfo(2, 2, []byte("g"), []byte("n")) regionC := NewTestRegionInfo(3, 3, []byte("n"), []byte("t")) @@ -286,22 +295,23 @@ func TestRandomRegion(t *testing.T) { updateNewItem(tree, regionC) updateNewItem(tree, regionD) - rb := tree.RandomRegion([]KeyRange{NewKeyRange("g", "n")}) + rb := tree.randomRegion([]KeyRange{NewKeyRange("g", "n")}) re.Equal(regionB, rb) - rc := tree.RandomRegion([]KeyRange{NewKeyRange("n", "t")}) + rc := tree.randomRegion([]KeyRange{NewKeyRange("n", "t")}) re.Equal(regionC, rc) - rd := tree.RandomRegion([]KeyRange{NewKeyRange("t", "")}) + rd := tree.randomRegion([]KeyRange{NewKeyRange("t", "")}) re.Equal(regionD, rd) - rf := tree.RandomRegion([]KeyRange{NewKeyRange("", "a")}) + rf := tree.randomRegion([]KeyRange{NewKeyRange("", "a")}) re.Nil(rf) - rf = tree.RandomRegion([]KeyRange{NewKeyRange("o", "s")}) + rf = tree.randomRegion([]KeyRange{NewKeyRange("o", "s")}) re.Nil(rf) - rf = tree.RandomRegion([]KeyRange{NewKeyRange("", "a")}) + rf = tree.randomRegion([]KeyRange{NewKeyRange("", "a")}) re.Nil(rf) - rf = tree.RandomRegion([]KeyRange{NewKeyRange("z", "")}) + rf = tree.randomRegion([]KeyRange{NewKeyRange("z", "")}) re.Nil(rf) + checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB, regionC, regionD}, nil) checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB, regionC, regionD}, []KeyRange{NewKeyRange("", "")}) checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB}, []KeyRange{NewKeyRange("", "n")}) checkRandomRegion(re, tree, []*RegionInfo{regionC, regionD}, []KeyRange{NewKeyRange("n", "")}) @@ -312,45 +322,46 @@ func TestRandomRegion(t *testing.T) { func TestRandomRegionDiscontinuous(t *testing.T) { re := require.New(t) tree := newRegionTree() - r := tree.RandomRegion([]KeyRange{NewKeyRange("c", "f")}) + r := tree.randomRegion([]KeyRange{NewKeyRange("c", "f")}) re.Nil(r) // test for single region regionA := NewTestRegionInfo(1, 1, []byte("c"), []byte("f")) updateNewItem(tree, regionA) - ra := tree.RandomRegion([]KeyRange{NewKeyRange("c", "e")}) + ra := tree.randomRegion([]KeyRange{NewKeyRange("c", "e")}) re.Nil(ra) - ra = tree.RandomRegion([]KeyRange{NewKeyRange("c", "f")}) + ra = tree.randomRegion([]KeyRange{NewKeyRange("c", "f")}) re.Equal(regionA, ra) - ra = tree.RandomRegion([]KeyRange{NewKeyRange("c", "g")}) + ra = tree.randomRegion([]KeyRange{NewKeyRange("c", "g")}) re.Equal(regionA, ra) - ra = tree.RandomRegion([]KeyRange{NewKeyRange("a", "e")}) + ra = tree.randomRegion([]KeyRange{NewKeyRange("a", "e")}) re.Nil(ra) - ra = tree.RandomRegion([]KeyRange{NewKeyRange("a", "f")}) + ra = tree.randomRegion([]KeyRange{NewKeyRange("a", "f")}) re.Equal(regionA, ra) - ra = tree.RandomRegion([]KeyRange{NewKeyRange("a", "g")}) + ra = tree.randomRegion([]KeyRange{NewKeyRange("a", "g")}) re.Equal(regionA, ra) regionB := NewTestRegionInfo(2, 2, []byte("n"), []byte("x")) updateNewItem(tree, regionB) - rb := tree.RandomRegion([]KeyRange{NewKeyRange("g", "x")}) + rb := tree.randomRegion([]KeyRange{NewKeyRange("g", "x")}) re.Equal(regionB, rb) - rb = tree.RandomRegion([]KeyRange{NewKeyRange("g", "y")}) + rb = tree.randomRegion([]KeyRange{NewKeyRange("g", "y")}) re.Equal(regionB, rb) - rb = tree.RandomRegion([]KeyRange{NewKeyRange("n", "y")}) + rb = tree.randomRegion([]KeyRange{NewKeyRange("n", "y")}) re.Equal(regionB, rb) - rb = tree.RandomRegion([]KeyRange{NewKeyRange("o", "y")}) + rb = tree.randomRegion([]KeyRange{NewKeyRange("o", "y")}) re.Nil(rb) regionC := NewTestRegionInfo(3, 3, []byte("z"), []byte("")) updateNewItem(tree, regionC) - rc := tree.RandomRegion([]KeyRange{NewKeyRange("y", "")}) + rc := tree.randomRegion([]KeyRange{NewKeyRange("y", "")}) re.Equal(regionC, rc) regionD := NewTestRegionInfo(4, 4, []byte(""), []byte("a")) updateNewItem(tree, regionD) - rd := tree.RandomRegion([]KeyRange{NewKeyRange("", "b")}) + rd := tree.randomRegion([]KeyRange{NewKeyRange("", "b")}) re.Equal(regionD, rd) + checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB, regionC, regionD}, nil) checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB, regionC, regionD}, []KeyRange{NewKeyRange("", "")}) } @@ -362,7 +373,7 @@ func updateNewItem(tree *regionTree, region *RegionInfo) { func checkRandomRegion(re *require.Assertions, tree *regionTree, regions []*RegionInfo, ranges []KeyRange) { keys := make(map[string]struct{}) for i := 0; i < 10000 && len(keys) < len(regions); i++ { - re := tree.RandomRegion(ranges) + re := tree.randomRegion(ranges) if re == nil { continue } diff --git a/pkg/core/store.go b/pkg/core/store.go index 1d3362cac0e..5baedafdb05 100644 --- a/pkg/core/store.go +++ b/pkg/core/store.go @@ -26,6 +26,7 @@ import ( "github.com/tikv/pd/pkg/core/constant" "github.com/tikv/pd/pkg/core/storelimit" "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/utils/syncutil" "github.com/tikv/pd/pkg/utils/typeutil" "go.uber.org/zap" ) @@ -600,22 +601,36 @@ func DistinctScore(labels []string, stores []*StoreInfo, other *StoreInfo) float return score } -// MergeLabels merges the passed in labels with origins, overriding duplicated -// ones. +// MergeLabels merges the passed in labels with origins, overriding duplicated ones. +// Note: To prevent potential data races, it is advisable to refrain from directly modifying the 'origin' variable. func MergeLabels(origin []*metapb.StoreLabel, labels []*metapb.StoreLabel) []*metapb.StoreLabel { - storeLabels := origin -L: + results := make([]*metapb.StoreLabel, 0, len(origin)) + for _, label := range origin { + results = append(results, &metapb.StoreLabel{ + Key: label.Key, + Value: label.Value, + }) + } + for _, newLabel := range labels { - for _, label := range storeLabels { + found := false + for _, label := range results { if strings.EqualFold(label.Key, newLabel.Key) { + // Update the value for an existing key. label.Value = newLabel.Value - continue L + found = true + break } } - storeLabels = append(storeLabels, newLabel) + // Add a new label if the key doesn't exist in the original slice. + if !found { + results = append(results, newLabel) + } } - res := storeLabels[:0] - for _, l := range storeLabels { + + // Filter out labels with an empty value. + res := results[:0] + for _, l := range results { if l.Value != "" { res = append(res, l) } @@ -625,6 +640,7 @@ L: // StoresInfo contains information about all stores. type StoresInfo struct { + syncutil.RWMutex stores map[uint64]*StoreInfo } @@ -635,8 +651,12 @@ func NewStoresInfo() *StoresInfo { } } +/* Stores read operations */ + // GetStore returns a copy of the StoreInfo with the specified storeID. func (s *StoresInfo) GetStore(storeID uint64) *StoreInfo { + s.RLock() + defer s.RUnlock() store, ok := s.stores[storeID] if !ok { return nil @@ -644,13 +664,121 @@ func (s *StoresInfo) GetStore(storeID uint64) *StoreInfo { return store } -// SetStore sets a StoreInfo with storeID. -func (s *StoresInfo) SetStore(store *StoreInfo) { +// GetStores gets a complete set of StoreInfo. +func (s *StoresInfo) GetStores() []*StoreInfo { + s.RLock() + defer s.RUnlock() + stores := make([]*StoreInfo, 0, len(s.stores)) + for _, store := range s.stores { + stores = append(stores, store) + } + return stores +} + +// GetMetaStores gets a complete set of metapb.Store. +func (s *StoresInfo) GetMetaStores() []*metapb.Store { + s.RLock() + defer s.RUnlock() + stores := make([]*metapb.Store, 0, len(s.stores)) + for _, store := range s.stores { + stores = append(stores, store.GetMeta()) + } + return stores +} + +// GetStoreIDs returns a list of store ids. +func (s *StoresInfo) GetStoreIDs() []uint64 { + s.RLock() + defer s.RUnlock() + count := len(s.stores) + storeIDs := make([]uint64, 0, count) + for _, store := range s.stores { + storeIDs = append(storeIDs, store.GetID()) + } + return storeIDs +} + +// GetFollowerStores returns all Stores that contains the region's follower peer. +func (s *StoresInfo) GetFollowerStores(region *RegionInfo) []*StoreInfo { + s.RLock() + defer s.RUnlock() + var stores []*StoreInfo + for id := range region.GetFollowers() { + if store, ok := s.stores[id]; ok && store != nil { + stores = append(stores, store) + } + } + return stores +} + +// GetRegionStores returns all Stores that contains the region's peer. +func (s *StoresInfo) GetRegionStores(region *RegionInfo) []*StoreInfo { + s.RLock() + defer s.RUnlock() + var stores []*StoreInfo + for id := range region.GetStoreIDs() { + if store, ok := s.stores[id]; ok && store != nil { + stores = append(stores, store) + } + } + return stores +} + +// GetLeaderStore returns all Stores that contains the region's leader peer. +func (s *StoresInfo) GetLeaderStore(region *RegionInfo) *StoreInfo { + s.RLock() + defer s.RUnlock() + if store, ok := s.stores[region.GetLeader().GetStoreId()]; ok && store != nil { + return store + } + return nil +} + +// GetStoreCount returns the total count of storeInfo. +func (s *StoresInfo) GetStoreCount() int { + s.RLock() + defer s.RUnlock() + return len(s.stores) +} + +// GetNonWitnessVoterStores returns all Stores that contains the non-witness's voter peer. +func (s *StoresInfo) GetNonWitnessVoterStores(region *RegionInfo) []*StoreInfo { + s.RLock() + defer s.RUnlock() + var stores []*StoreInfo + for id := range region.GetNonWitnessVoters() { + if store, ok := s.stores[id]; ok && store != nil { + stores = append(stores, store) + } + } + return stores +} + +/* Stores write operations */ + +// PutStore sets a StoreInfo with storeID. +func (s *StoresInfo) PutStore(store *StoreInfo) { + s.Lock() + defer s.Unlock() + s.putStoreLocked(store) +} + +// putStoreLocked sets a StoreInfo with storeID. +func (s *StoresInfo) putStoreLocked(store *StoreInfo) { s.stores[store.GetID()] = store } +// ResetStores resets the store cache. +func (s *StoresInfo) ResetStores() { + s.Lock() + defer s.Unlock() + s.stores = make(map[uint64]*StoreInfo) +} + // PauseLeaderTransfer pauses a StoreInfo with storeID. func (s *StoresInfo) PauseLeaderTransfer(storeID uint64) error { + s.Lock() + defer s.Unlock() store, ok := s.stores[storeID] if !ok { return errs.ErrStoreNotFound.FastGenByArgs(storeID) @@ -665,6 +793,8 @@ func (s *StoresInfo) PauseLeaderTransfer(storeID uint64) error { // ResumeLeaderTransfer cleans a store's pause state. The store can be selected // as source or target of TransferLeader again. func (s *StoresInfo) ResumeLeaderTransfer(storeID uint64) { + s.Lock() + defer s.Unlock() store, ok := s.stores[storeID] if !ok { log.Warn("try to clean a store's pause state, but it is not found. It may be cleanup", @@ -677,6 +807,8 @@ func (s *StoresInfo) ResumeLeaderTransfer(storeID uint64) { // SlowStoreEvicted marks a store as a slow store and prevents transferring // leader to the store func (s *StoresInfo) SlowStoreEvicted(storeID uint64) error { + s.Lock() + defer s.Unlock() store, ok := s.stores[storeID] if !ok { return errs.ErrStoreNotFound.FastGenByArgs(storeID) @@ -690,6 +822,8 @@ func (s *StoresInfo) SlowStoreEvicted(storeID uint64) error { // SlowStoreRecovered cleans the evicted state of a store. func (s *StoresInfo) SlowStoreRecovered(storeID uint64) { + s.Lock() + defer s.Unlock() store, ok := s.stores[storeID] if !ok { log.Warn("try to clean a store's evicted as a slow store state, but it is not found. It may be cleanup", @@ -702,6 +836,8 @@ func (s *StoresInfo) SlowStoreRecovered(storeID uint64) { // SlowTrendEvicted marks a store as a slow trend and prevents transferring // leader to the store func (s *StoresInfo) SlowTrendEvicted(storeID uint64) error { + s.Lock() + defer s.Unlock() store, ok := s.stores[storeID] if !ok { return errs.ErrStoreNotFound.FastGenByArgs(storeID) @@ -715,6 +851,8 @@ func (s *StoresInfo) SlowTrendEvicted(storeID uint64) error { // SlowTrendRecovered cleans the evicted by trend state of a store. func (s *StoresInfo) SlowTrendRecovered(storeID uint64) { + s.Lock() + defer s.Unlock() store, ok := s.stores[storeID] if !ok { log.Warn("try to clean a store's evicted by trend as a slow store state, but it is not found. It may be cleanup", @@ -726,76 +864,24 @@ func (s *StoresInfo) SlowTrendRecovered(storeID uint64) { // ResetStoreLimit resets the limit for a specific store. func (s *StoresInfo) ResetStoreLimit(storeID uint64, limitType storelimit.Type, ratePerSec ...float64) { + s.Lock() + defer s.Unlock() if store, ok := s.stores[storeID]; ok { s.stores[storeID] = store.Clone(ResetStoreLimit(limitType, ratePerSec...)) } } -// GetStores gets a complete set of StoreInfo. -func (s *StoresInfo) GetStores() []*StoreInfo { - stores := make([]*StoreInfo, 0, len(s.stores)) - for _, store := range s.stores { - stores = append(stores, store) - } - return stores -} - -// GetMetaStores gets a complete set of metapb.Store. -func (s *StoresInfo) GetMetaStores() []*metapb.Store { - stores := make([]*metapb.Store, 0, len(s.stores)) - for _, store := range s.stores { - stores = append(stores, store.GetMeta()) - } - return stores -} - // DeleteStore deletes tombstone record form store func (s *StoresInfo) DeleteStore(store *StoreInfo) { + s.Lock() + defer s.Unlock() delete(s.stores, store.GetID()) } -// GetStoreCount returns the total count of storeInfo. -func (s *StoresInfo) GetStoreCount() int { - return len(s.stores) -} - -// SetLeaderCount sets the leader count to a storeInfo. -func (s *StoresInfo) SetLeaderCount(storeID uint64, leaderCount int) { - if store, ok := s.stores[storeID]; ok { - s.stores[storeID] = store.Clone(SetLeaderCount(leaderCount)) - } -} - -// SetRegionCount sets the region count to a storeInfo. -func (s *StoresInfo) SetRegionCount(storeID uint64, regionCount int) { - if store, ok := s.stores[storeID]; ok { - s.stores[storeID] = store.Clone(SetRegionCount(regionCount)) - } -} - -// SetPendingPeerCount sets the pending count to a storeInfo. -func (s *StoresInfo) SetPendingPeerCount(storeID uint64, pendingPeerCount int) { - if store, ok := s.stores[storeID]; ok { - s.stores[storeID] = store.Clone(SetPendingPeerCount(pendingPeerCount)) - } -} - -// SetLeaderSize sets the leader size to a storeInfo. -func (s *StoresInfo) SetLeaderSize(storeID uint64, leaderSize int64) { - if store, ok := s.stores[storeID]; ok { - s.stores[storeID] = store.Clone(SetLeaderSize(leaderSize)) - } -} - -// SetRegionSize sets the region size to a storeInfo. -func (s *StoresInfo) SetRegionSize(storeID uint64, regionSize int64) { - if store, ok := s.stores[storeID]; ok { - s.stores[storeID] = store.Clone(SetRegionSize(regionSize)) - } -} - // UpdateStoreStatus updates the information of the store. func (s *StoresInfo) UpdateStoreStatus(storeID uint64, leaderCount, regionCount, witnessCount, learnerCount, pendingPeerCount int, leaderSize int64, regionSize int64) { + s.Lock() + defer s.Unlock() if store, ok := s.stores[storeID]; ok { newStore := store.ShallowClone(SetLeaderCount(leaderCount), SetRegionCount(regionCount), @@ -804,7 +890,7 @@ func (s *StoresInfo) UpdateStoreStatus(storeID uint64, leaderCount, regionCount, SetPendingPeerCount(pendingPeerCount), SetLeaderSize(leaderSize), SetRegionSize(regionSize)) - s.SetStore(newStore) + s.putStoreLocked(newStore) } } diff --git a/pkg/core/store_stats.go b/pkg/core/store_stats.go index bcc90a58a2b..d68f8b8e43c 100644 --- a/pkg/core/store_stats.go +++ b/pkg/core/store_stats.go @@ -18,6 +18,7 @@ import ( "github.com/pingcap/kvproto/pkg/pdpb" "github.com/tikv/pd/pkg/movingaverage" "github.com/tikv/pd/pkg/utils/syncutil" + "github.com/tikv/pd/pkg/utils/typeutil" ) type storeStats struct { @@ -56,10 +57,8 @@ func (ss *storeStats) GetStoreStats() *pdpb.StoreStats { // CloneStoreStats returns the statistics information cloned from the store. func (ss *storeStats) CloneStoreStats() *pdpb.StoreStats { ss.mu.RLock() - b, _ := ss.rawStats.Marshal() + stats := typeutil.DeepClone(ss.rawStats, StoreStatsFactory) ss.mu.RUnlock() - stats := &pdpb.StoreStats{} - stats.Unmarshal(b) return stats } diff --git a/pkg/core/store_test.go b/pkg/core/store_test.go index 67618a63ea9..5cb324e5635 100644 --- a/pkg/core/store_test.go +++ b/pkg/core/store_test.go @@ -62,7 +62,7 @@ func TestDistinctScore(t *testing.T) { re.Equal(float64(0), DistinctScore(labels, stores, store)) } -func TestCloneStore(t *testing.T) { +func TestCloneStore(_ *testing.T) { meta := &metapb.Store{Id: 1, Address: "mock://tikv-1", Labels: []*metapb.StoreLabel{{Key: "zone", Value: "z1"}, {Key: "host", Value: "h1"}}} store := NewStoreInfo(meta) start := time.Now() diff --git a/pkg/core/storelimit/limit_test.go b/pkg/core/storelimit/limit_test.go index 946729f8ce2..e11618767a1 100644 --- a/pkg/core/storelimit/limit_test.go +++ b/pkg/core/storelimit/limit_test.go @@ -45,7 +45,6 @@ func TestStoreLimit(t *testing.T) { } func TestSlidingWindow(t *testing.T) { - t.Parallel() re := require.New(t) capacity := int64(defaultWindowSize) s := NewSlidingWindows() @@ -92,7 +91,6 @@ func TestSlidingWindow(t *testing.T) { } func TestWindow(t *testing.T) { - t.Parallel() re := require.New(t) capacity := int64(100 * 10) s := newWindow(capacity) @@ -101,18 +99,18 @@ func TestWindow(t *testing.T) { token := capacity + 10 re.True(s.take(token)) re.False(s.take(token)) - re.EqualValues(s.ack(token), 0) + re.EqualValues(0, s.ack(token)) re.True(s.take(token)) - re.EqualValues(s.ack(token), 0) + re.EqualValues(0, s.ack(token)) re.Equal(s.ack(token), token) - re.EqualValues(s.getUsed(), 0) + re.EqualValues(0, s.getUsed()) // case2: the capacity of the window must greater than the minSnapSize. s.reset(minSnapSize - 1) - re.EqualValues(s.capacity, minSnapSize) + re.EqualValues(minSnapSize, s.capacity) re.True(s.take(minSnapSize)) - re.EqualValues(s.ack(minSnapSize*2), minSnapSize) - re.EqualValues(s.getUsed(), 0) + re.EqualValues(minSnapSize, s.ack(minSnapSize*2)) + re.EqualValues(0, s.getUsed()) } func TestFeedback(t *testing.T) { diff --git a/pkg/core/storelimit/sliding_window.go b/pkg/core/storelimit/sliding_window.go index 0a70eb548d0..8feb0a2094d 100644 --- a/pkg/core/storelimit/sliding_window.go +++ b/pkg/core/storelimit/sliding_window.go @@ -50,7 +50,7 @@ func NewSlidingWindows() *SlidingWindows { } // Version returns v2 -func (s *SlidingWindows) Version() string { +func (*SlidingWindows) Version() string { return VersionV2 } @@ -75,8 +75,7 @@ func (s *SlidingWindows) Feedback(e float64) { } // Reset does nothing because the capacity depends on the feedback. -func (s *SlidingWindows) Reset(_ float64, _ Type) { -} +func (*SlidingWindows) Reset(_ float64, _ Type) {} func (s *SlidingWindows) set(cap float64, typ Type) { if typ != SendSnapshot { diff --git a/pkg/core/storelimit/store_limit.go b/pkg/core/storelimit/store_limit.go index dc1de88e09f..e35ec773d80 100644 --- a/pkg/core/storelimit/store_limit.go +++ b/pkg/core/storelimit/store_limit.go @@ -17,6 +17,7 @@ package storelimit import ( "github.com/tikv/pd/pkg/core/constant" "github.com/tikv/pd/pkg/ratelimit" + "github.com/tikv/pd/pkg/utils/syncutil" ) const ( @@ -82,15 +83,15 @@ func NewStoreRateLimit(ratePerSec float64) StoreLimit { } // Ack does nothing. -func (l *StoreRateLimit) Ack(_ int64, _ Type) {} +func (*StoreRateLimit) Ack(_ int64, _ Type) {} // Version returns v1 -func (l *StoreRateLimit) Version() string { +func (*StoreRateLimit) Version() string { return VersionV1 } // Feedback does nothing. -func (l *StoreRateLimit) Feedback(_ float64) {} +func (*StoreRateLimit) Feedback(_ float64) {} // Available returns the number of available tokens. // notice that the priority level is not used. @@ -106,7 +107,7 @@ func (l *StoreRateLimit) Rate(typ Type) float64 { if l.limits[typ] == nil { return 0.0 } - return l.limits[typ].ratePerSec + return l.limits[typ].GetRatePerSec() } // Take takes count tokens from the bucket without blocking. @@ -128,12 +129,15 @@ func (l *StoreRateLimit) Reset(rate float64, typ Type) { // limit the operators of a store type limit struct { - limiter *ratelimit.RateLimiter - ratePerSec float64 + limiter *ratelimit.RateLimiter + ratePerSecMutex syncutil.RWMutex + ratePerSec float64 } // Reset resets the rate limit. func (l *limit) Reset(ratePerSec float64) { + l.ratePerSecMutex.Lock() + defer l.ratePerSecMutex.Unlock() if l.ratePerSec == ratePerSec { return } @@ -155,6 +159,8 @@ func (l *limit) Reset(ratePerSec float64) { // Available returns the number of available tokens // It returns true if the rate per second is zero. func (l *limit) Available(n int64) bool { + l.ratePerSecMutex.RLock() + defer l.ratePerSecMutex.RUnlock() if l.ratePerSec == 0 { return true } @@ -164,8 +170,16 @@ func (l *limit) Available(n int64) bool { // Take takes count tokens from the bucket without blocking. func (l *limit) Take(count int64) bool { + l.ratePerSecMutex.RLock() + defer l.ratePerSecMutex.RUnlock() if l.ratePerSec == 0 { return true } return l.limiter.AllowN(int(count)) } + +func (l *limit) GetRatePerSec() float64 { + l.ratePerSecMutex.RLock() + defer l.ratePerSecMutex.RUnlock() + return l.ratePerSec +} diff --git a/pkg/dashboard/adapter/config.go b/pkg/dashboard/adapter/config.go index a1661b84f2b..348b146c854 100644 --- a/pkg/dashboard/adapter/config.go +++ b/pkg/dashboard/adapter/config.go @@ -31,7 +31,7 @@ func GenDashboardConfig(srv *server.Server) (*config.Config, error) { dashboardCfg := config.Default() dashboardCfg.DataDir = cfg.DataDir - dashboardCfg.PDEndPoint = etcdCfg.ACUrls[0].String() + dashboardCfg.PDEndPoint = etcdCfg.AdvertiseClientUrls[0].String() dashboardCfg.PublicPathPrefix = cfg.Dashboard.PublicPathPrefix dashboardCfg.EnableTelemetry = cfg.Dashboard.EnableTelemetry dashboardCfg.EnableExperimental = cfg.Dashboard.EnableExperimental diff --git a/pkg/dashboard/adapter/redirector_test.go b/pkg/dashboard/adapter/redirector_test.go index fff052f1d50..7767a6fda34 100644 --- a/pkg/dashboard/adapter/redirector_test.go +++ b/pkg/dashboard/adapter/redirector_test.go @@ -42,14 +42,14 @@ func TestRedirectorTestSuite(t *testing.T) { func (suite *redirectorTestSuite) SetupSuite() { suite.tempText = "temp1" - suite.tempServer = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + suite.tempServer = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { _, _ = io.WriteString(w, suite.tempText) })) suite.testName = "test1" suite.redirector = NewRedirector(suite.testName, nil) suite.noRedirectHTTPClient = &http.Client{ - CheckRedirect: func(req *http.Request, via []*http.Request) error { + CheckRedirect: func(*http.Request, []*http.Request) error { // ErrUseLastResponse can be returned by Client.CheckRedirect hooks to // control how redirects are processed. If returned, the next request // is not sent and the most recent response is returned with its body diff --git a/pkg/dashboard/dashboard.go b/pkg/dashboard/dashboard.go index 9cd61a6f332..998127d0f1b 100644 --- a/pkg/dashboard/dashboard.go +++ b/pkg/dashboard/dashboard.go @@ -69,7 +69,7 @@ func GetServiceBuilders() []server.HandlerBuilder { // The order of execution must be sequential. return []server.HandlerBuilder{ // Dashboard API Service - func(ctx context.Context, srv *server.Server) (http.Handler, apiutil.APIServiceGroup, error) { + func(_ context.Context, srv *server.Server) (http.Handler, apiutil.APIServiceGroup, error) { distroutil.MustLoadAndReplaceStrings() if cfg, err = adapter.GenDashboardConfig(srv); err != nil { diff --git a/pkg/dashboard/uiserver/embedded_assets_rewriter.go b/pkg/dashboard/uiserver/embedded_assets_rewriter.go index 2a5b4a5b3b6..d19db01936f 100644 --- a/pkg/dashboard/uiserver/embedded_assets_rewriter.go +++ b/pkg/dashboard/uiserver/embedded_assets_rewriter.go @@ -28,6 +28,7 @@ import ( var once sync.Once // Assets returns the Assets FileSystem of the dashboard UI +// NOTE: if you see "undefined: assets" error, please run `make dashboard-ui` in the root directory of the repository. func Assets(cfg *config.Config) http.FileSystem { once.Do(func() { resPath := distroutil.MustGetResPath() diff --git a/pkg/election/leadership.go b/pkg/election/leadership.go index 02f519dbc75..f252eabe072 100644 --- a/pkg/election/leadership.go +++ b/pkg/election/leadership.go @@ -34,11 +34,12 @@ import ( ) const ( - defaultCampaignTimesSlot = 10 - watchLoopUnhealthyTimeout = 60 * time.Second - campaignTimesRecordTimeout = 5 * time.Minute + defaultCampaignTimesSlot = 10 + watchLoopUnhealthyTimeout = 60 * time.Second ) +var campaignTimesRecordTimeout = 5 * time.Minute + // GetLeader gets the corresponding leader from etcd by given leaderPath (as the key). func GetLeader(c *clientv3.Client, leaderPath string) (*pdpb.Member, int64, error) { leader := &pdpb.Member{} @@ -114,6 +115,7 @@ func (ls *Leadership) GetLeaderKey() string { } // GetCampaignTimesNum is used to get the campaign times of the leader within `campaignTimesRecordTimeout`. +// Need to make sure `AddCampaignTimes` is called before this function. func (ls *Leadership) GetCampaignTimesNum() int { if ls == nil { return 0 @@ -129,8 +131,8 @@ func (ls *Leadership) ResetCampaignTimes() { ls.campaignTimes = make([]time.Time, 0, defaultCampaignTimesSlot) } -// addCampaignTimes is used to add the campaign times of the leader. -func (ls *Leadership) addCampaignTimes() { +// AddCampaignTimes is used to add the campaign times of the leader. +func (ls *Leadership) AddCampaignTimes() { if ls == nil { return } @@ -138,7 +140,7 @@ func (ls *Leadership) addCampaignTimes() { if time.Since(ls.campaignTimes[i]) > campaignTimesRecordTimeout { // remove the time which is more than `campaignTimesRecordTimeout` // array is sorted by time - ls.campaignTimes = ls.campaignTimes[i:] + ls.campaignTimes = ls.campaignTimes[i+1:] break } } @@ -148,7 +150,6 @@ func (ls *Leadership) addCampaignTimes() { // Campaign is used to campaign the leader with given lease and returns a leadership func (ls *Leadership) Campaign(leaseTimeout int64, leaderData string, cmps ...clientv3.Cmp) error { - ls.addCampaignTimes() ls.leaderValue = leaderData // Create a new lease to campaign newLease := &lease{ @@ -160,7 +161,7 @@ func (ls *Leadership) Campaign(leaseTimeout int64, leaderData string, cmps ...cl failpoint.Inject("skipGrantLeader", func(val failpoint.Value) { var member pdpb.Member - member.Unmarshal([]byte(leaderData)) + _ = member.Unmarshal([]byte(leaderData)) name, ok := val.(string) if ok && member.Name == name { failpoint.Return(errors.Errorf("failed to grant lease")) diff --git a/pkg/election/leadership_test.go b/pkg/election/leadership_test.go index be1922fe381..40f0bcbee23 100644 --- a/pkg/election/leadership_test.go +++ b/pkg/election/leadership_test.go @@ -117,35 +117,35 @@ func TestExitWatch(t *testing.T) { re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/election/fastTick", "return(true)")) re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/utils/etcdutil/fastTick", "return(true)")) // Case1: close the client before the watch loop starts - checkExitWatch(t, leaderKey, func(server *embed.Etcd, client *clientv3.Client) func() { + checkExitWatch(t, leaderKey, func(_ *embed.Etcd, client *clientv3.Client) func() { re.NoError(failpoint.Enable("github.com/tikv/pd/server/delayWatcher", `pause`)) client.Close() re.NoError(failpoint.Disable("github.com/tikv/pd/server/delayWatcher")) return func() {} }) // Case2: close the client when the watch loop is running - checkExitWatch(t, leaderKey, func(server *embed.Etcd, client *clientv3.Client) func() { + checkExitWatch(t, leaderKey, func(_ *embed.Etcd, client *clientv3.Client) func() { // Wait for the watch loop to start time.Sleep(500 * time.Millisecond) client.Close() return func() {} }) // Case3: delete the leader key - checkExitWatch(t, leaderKey, func(server *embed.Etcd, client *clientv3.Client) func() { + checkExitWatch(t, leaderKey, func(_ *embed.Etcd, client *clientv3.Client) func() { leaderKey := leaderKey _, err := client.Delete(context.Background(), leaderKey) re.NoError(err) return func() {} }) // Case4: close the server before the watch loop starts - checkExitWatch(t, leaderKey, func(server *embed.Etcd, client *clientv3.Client) func() { + checkExitWatch(t, leaderKey, func(server *embed.Etcd, _ *clientv3.Client) func() { re.NoError(failpoint.Enable("github.com/tikv/pd/server/delayWatcher", `pause`)) server.Close() re.NoError(failpoint.Disable("github.com/tikv/pd/server/delayWatcher")) return func() {} }) // Case5: close the server when the watch loop is running - checkExitWatch(t, leaderKey, func(server *embed.Etcd, client *clientv3.Client) func() { + checkExitWatch(t, leaderKey, func(server *embed.Etcd, _ *clientv3.Client) func() { // Wait for the watch loop to start time.Sleep(500 * time.Millisecond) server.Close() @@ -155,7 +155,7 @@ func TestExitWatch(t *testing.T) { checkExitWatch(t, leaderKey, func(server *embed.Etcd, client *clientv3.Client) func() { cfg1 := server.Config() etcd2 := etcdutil.MustAddEtcdMember(t, &cfg1, client) - client2, err := etcdutil.CreateEtcdClient(nil, etcd2.Config().LCUrls) + client2, err := etcdutil.CreateEtcdClient(nil, etcd2.Config().ListenClientUrls) re.NoError(err) // close the original leader server.Server.HardStop() @@ -189,7 +189,7 @@ func checkExitWatch(t *testing.T, leaderKey string, injectFunc func(server *embe re := require.New(t) servers, client1, clean := etcdutil.NewTestEtcdCluster(t, 1) defer clean() - client2, err := etcdutil.CreateEtcdClient(nil, servers[0].Config().LCUrls) + client2, err := etcdutil.CreateEtcdClient(nil, servers[0].Config().ListenClientUrls) re.NoError(err) defer client2.Close() @@ -225,7 +225,7 @@ func TestRequestProgress(t *testing.T) { defer os.RemoveAll(fname) servers, client1, clean := etcdutil.NewTestEtcdCluster(t, 1) defer clean() - client2, err := etcdutil.CreateEtcdClient(nil, servers[0].Config().LCUrls) + client2, err := etcdutil.CreateEtcdClient(nil, servers[0].Config().ListenClientUrls) re.NoError(err) defer client2.Close() @@ -262,3 +262,36 @@ func TestRequestProgress(t *testing.T) { checkWatcherRequestProgress(false) checkWatcherRequestProgress(true) } + +func TestCampaignTimes(t *testing.T) { + re := require.New(t) + _, client, clean := etcdutil.NewTestEtcdCluster(t, 1) + defer clean() + leadership := NewLeadership(client, "test_leader", "test_leader") + + // all the campaign times are within the timeout. + campaignTimesRecordTimeout = 10 * time.Second + defer func() { + campaignTimesRecordTimeout = 5 * time.Minute + }() + for i := 0; i < 3; i++ { + leadership.AddCampaignTimes() + time.Sleep(100 * time.Millisecond) + } + re.Equal(3, leadership.GetCampaignTimesNum()) + + // only the last 2 records are valid. + campaignTimesRecordTimeout = 200 * time.Millisecond + for i := 0; i < 3; i++ { + leadership.AddCampaignTimes() + time.Sleep(100 * time.Millisecond) + } + re.Equal(2, leadership.GetCampaignTimesNum()) + + time.Sleep(200 * time.Millisecond) + // need to wait for the next addCampaignTimes to update the campaign time. + re.Equal(2, leadership.GetCampaignTimesNum()) + // check campaign leader frequency. + leadership.AddCampaignTimes() + re.Equal(1, leadership.GetCampaignTimesNum()) +} diff --git a/pkg/election/lease.go b/pkg/election/lease.go index eada4f8786d..45d702def5e 100644 --- a/pkg/election/lease.go +++ b/pkg/election/lease.go @@ -84,7 +84,9 @@ func (l *lease) Close() error { if l.ID.Load() != nil { leaseID = l.ID.Load().(clientv3.LeaseID) } - l.lease.Revoke(ctx, leaseID) + if _, err := l.lease.Revoke(ctx, leaseID); err != nil { + log.Error("revoke lease failed", zap.String("purpose", l.Purpose), errs.ZapError(err)) + } return l.lease.Close() } @@ -135,7 +137,7 @@ func (l *lease) KeepAlive(ctx context.Context) { // https://pkg.go.dev/time@master#Timer.Reset timer.Reset(l.leaseTimeout) case <-timer.C: - log.Info("lease timeout", zap.Time("expire", l.expireTime.Load().(time.Time)), zap.String("purpose", l.Purpose)) + log.Info("keep alive lease too slow", zap.Duration("timeout-duration", l.leaseTimeout), zap.Time("actual-expire", l.expireTime.Load().(time.Time)), zap.String("purpose", l.Purpose)) return case <-ctx.Done(): return @@ -154,11 +156,14 @@ func (l *lease) keepAliveWorker(ctx context.Context, interval time.Duration) <-c log.Info("start lease keep alive worker", zap.Duration("interval", interval), zap.String("purpose", l.Purpose)) defer log.Info("stop lease keep alive worker", zap.String("purpose", l.Purpose)) - + lastTime := time.Now() for { - go func() { + start := time.Now() + if start.Sub(lastTime) > interval*2 { + log.Warn("the interval between keeping alive lease is too long", zap.Time("last-time", lastTime)) + } + go func(start time.Time) { defer logutil.LogPanic() - start := time.Now() ctx1, cancel := context.WithTimeout(ctx, l.leaseTimeout) defer cancel() var leaseID clientv3.LeaseID @@ -180,12 +185,13 @@ func (l *lease) keepAliveWorker(ctx context.Context, interval time.Duration) <-c } else { log.Error("keep alive response ttl is zero", zap.String("purpose", l.Purpose)) } - }() + }(start) select { case <-ctx.Done(): return case <-ticker.C: + lastTime = start } } }() diff --git a/pkg/encryption/OWNERS b/pkg/encryption/OWNERS new file mode 100644 index 00000000000..aa02465dbd9 --- /dev/null +++ b/pkg/encryption/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs at https://go.k8s.io/owners +options: + no_parent_owners: true +filters: + "(OWNERS|config\\.go)$": + approvers: + - sig-critical-approvers-config diff --git a/pkg/encryption/config_test.go b/pkg/encryption/config_test.go index 6f7e4a41b03..4134d46c2f3 100644 --- a/pkg/encryption/config_test.go +++ b/pkg/encryption/config_test.go @@ -23,7 +23,6 @@ import ( ) func TestAdjustDefaultValue(t *testing.T) { - t.Parallel() re := require.New(t) config := &Config{} err := config.Adjust() @@ -35,21 +34,18 @@ func TestAdjustDefaultValue(t *testing.T) { } func TestAdjustInvalidDataEncryptionMethod(t *testing.T) { - t.Parallel() re := require.New(t) config := &Config{DataEncryptionMethod: "unknown"} re.Error(config.Adjust()) } func TestAdjustNegativeRotationDuration(t *testing.T) { - t.Parallel() re := require.New(t) config := &Config{DataKeyRotationPeriod: typeutil.NewDuration(time.Duration(int64(-1)))} re.Error(config.Adjust()) } func TestAdjustInvalidMasterKeyType(t *testing.T) { - t.Parallel() re := require.New(t) config := &Config{MasterKey: MasterKeyConfig{Type: "unknown"}} re.Error(config.Adjust()) diff --git a/pkg/encryption/crypter.go b/pkg/encryption/crypter.go index 16de0500f92..b1f8631ae26 100644 --- a/pkg/encryption/crypter.go +++ b/pkg/encryption/crypter.go @@ -82,7 +82,7 @@ func newIV(ivLength int) ([]byte, error) { } if n != ivLength { return nil, errs.ErrEncryptionGenerateIV.GenWithStack( - "iv length exepcted %d vs actual %d", ivLength, n) + "iv length expected %d vs actual %d", ivLength, n) } return iv, nil } diff --git a/pkg/encryption/crypter_test.go b/pkg/encryption/crypter_test.go index 12a851d1563..9ac72bd7813 100644 --- a/pkg/encryption/crypter_test.go +++ b/pkg/encryption/crypter_test.go @@ -24,7 +24,6 @@ import ( ) func TestEncryptionMethodSupported(t *testing.T) { - t.Parallel() re := require.New(t) re.Error(CheckEncryptionMethodSupported(encryptionpb.EncryptionMethod_PLAINTEXT)) re.Error(CheckEncryptionMethodSupported(encryptionpb.EncryptionMethod_UNKNOWN)) @@ -34,7 +33,6 @@ func TestEncryptionMethodSupported(t *testing.T) { } func TestKeyLength(t *testing.T) { - t.Parallel() re := require.New(t) _, err := KeyLength(encryptionpb.EncryptionMethod_PLAINTEXT) re.Error(err) @@ -52,7 +50,6 @@ func TestKeyLength(t *testing.T) { } func TestNewIv(t *testing.T) { - t.Parallel() re := require.New(t) ivCtr, err := NewIvCTR() re.NoError(err) @@ -63,7 +60,6 @@ func TestNewIv(t *testing.T) { } func TestNewDataKey(t *testing.T) { - t.Parallel() re := require.New(t) for _, method := range []encryptionpb.EncryptionMethod{ encryptionpb.EncryptionMethod_AES128_CTR, @@ -82,7 +78,6 @@ func TestNewDataKey(t *testing.T) { } func TestAesGcmCrypter(t *testing.T) { - t.Parallel() re := require.New(t) key, err := hex.DecodeString("ed568fbd8c8018ed2d042a4e5d38d6341486922d401d2022fb81e47c900d3f07") re.NoError(err) diff --git a/pkg/encryption/key_manager_test.go b/pkg/encryption/key_manager_test.go index 96bdb3c0eb5..26453eeb5b3 100644 --- a/pkg/encryption/key_manager_test.go +++ b/pkg/encryption/key_manager_test.go @@ -509,7 +509,7 @@ func TestSetLeadershipWithEncryptionMethodChanged(t *testing.T) { } err := saveKeys(leadership, masterKeyMeta, keys, defaultKeyManagerHelper()) re.NoError(err) - // Config with different encrption method. + // Config with different encryption method. config := &Config{ DataEncryptionMethod: "aes256-ctr", MasterKey: MasterKeyConfig{ @@ -579,7 +579,7 @@ func TestSetLeadershipWithCurrentKeyExposed(t *testing.T) { } err := saveKeys(leadership, masterKeyMeta, keys, defaultKeyManagerHelper()) re.NoError(err) - // Config with different encrption method. + // Config with different encryption method. config := &Config{ DataEncryptionMethod: "aes128-ctr", MasterKey: MasterKeyConfig{ @@ -774,7 +774,7 @@ func TestSetLeadershipMasterKeyWithCiphertextKey(t *testing.T) { outputMasterKey, _ := hex.DecodeString(testMasterKey) outputCiphertextKey, _ := hex.DecodeString(testCiphertextKey) helper.newMasterKey = func( - meta *encryptionpb.MasterKey, + _ *encryptionpb.MasterKey, ciphertext []byte, ) (*MasterKey, error) { if newMasterKeyCalled < 2 { @@ -905,7 +905,7 @@ func TestKeyRotation(t *testing.T) { mockNow := int64(1601679533) helper.now = func() time.Time { return time.Unix(atomic.LoadInt64(&mockNow), 0) } mockTick := make(chan time.Time) - helper.tick = func(ticker *time.Ticker) <-chan time.Time { return mockTick } + helper.tick = func(_ *time.Ticker) <-chan time.Time { return mockTick } // Listen on watcher event reloadEvent := make(chan struct{}, 10) helper.eventAfterReloadByWatcher = func() { @@ -1001,7 +1001,7 @@ func TestKeyRotationConflict(t *testing.T) { mockNow := int64(1601679533) helper.now = func() time.Time { return time.Unix(atomic.LoadInt64(&mockNow), 0) } mockTick := make(chan time.Time, 10) - helper.tick = func(ticker *time.Ticker) <-chan time.Time { return mockTick } + helper.tick = func(_ *time.Ticker) <-chan time.Time { return mockTick } // Listen on ticker event tickerEvent := make(chan struct{}, 10) helper.eventAfterTicker = func() { diff --git a/pkg/encryption/kms.go b/pkg/encryption/kms.go index 3e70b2deeb5..99dcf9619a3 100644 --- a/pkg/encryption/kms.go +++ b/pkg/encryption/kms.go @@ -60,7 +60,7 @@ func newMasterKeyFromKMS( roleArn := os.Getenv(envAwsRoleArn) tokenFile := os.Getenv(envAwsWebIdentityTokenFile) sessionName := os.Getenv(envAwsRoleSessionName) - optFn := func(options *kms.Options) {} + optFn := func(*kms.Options) {} // Session name is optional. if roleArn != "" && tokenFile != "" { client := sts.NewFromConfig(cfg) @@ -90,7 +90,7 @@ func newMasterKeyFromKMS( } if len(output.Plaintext) != masterKeyLength { return nil, errs.ErrEncryptionKMS.GenWithStack( - "unexpected data key length generated from AWS KMS, expectd %d vs actual %d", + "unexpected data key length generated from AWS KMS, expected %d vs actual %d", masterKeyLength, len(output.Plaintext)) } masterKey = &MasterKey{ diff --git a/pkg/encryption/master_key_test.go b/pkg/encryption/master_key_test.go index 4bc08dab7a5..31962e9e99d 100644 --- a/pkg/encryption/master_key_test.go +++ b/pkg/encryption/master_key_test.go @@ -24,7 +24,6 @@ import ( ) func TestPlaintextMasterKey(t *testing.T) { - t.Parallel() re := require.New(t) config := &encryptionpb.MasterKey{ Backend: &encryptionpb.MasterKey_Plaintext{ @@ -50,7 +49,6 @@ func TestPlaintextMasterKey(t *testing.T) { } func TestEncrypt(t *testing.T) { - t.Parallel() re := require.New(t) keyHex := "2f07ec61e5a50284f47f2b402a962ec672e500b26cb3aa568bb1531300c74806" // #nosec G101 key, err := hex.DecodeString(keyHex) @@ -66,7 +64,6 @@ func TestEncrypt(t *testing.T) { } func TestDecrypt(t *testing.T) { - t.Parallel() re := require.New(t) keyHex := "2f07ec61e5a50284f47f2b402a962ec672e500b26cb3aa568bb1531300c74806" // #nosec G101 key, err := hex.DecodeString(keyHex) @@ -83,7 +80,6 @@ func TestDecrypt(t *testing.T) { } func TestNewFileMasterKeyMissingPath(t *testing.T) { - t.Parallel() re := require.New(t) config := &encryptionpb.MasterKey{ Backend: &encryptionpb.MasterKey_File{ @@ -97,7 +93,6 @@ func TestNewFileMasterKeyMissingPath(t *testing.T) { } func TestNewFileMasterKeyMissingFile(t *testing.T) { - t.Parallel() re := require.New(t) dir := t.TempDir() path := dir + "/key" @@ -113,7 +108,6 @@ func TestNewFileMasterKeyMissingFile(t *testing.T) { } func TestNewFileMasterKeyNotHexString(t *testing.T) { - t.Parallel() re := require.New(t) dir := t.TempDir() path := dir + "/key" @@ -130,7 +124,6 @@ func TestNewFileMasterKeyNotHexString(t *testing.T) { } func TestNewFileMasterKeyLengthMismatch(t *testing.T) { - t.Parallel() re := require.New(t) dir := t.TempDir() path := dir + "/key" @@ -147,7 +140,6 @@ func TestNewFileMasterKeyLengthMismatch(t *testing.T) { } func TestNewFileMasterKey(t *testing.T) { - t.Parallel() re := require.New(t) key := "2f07ec61e5a50284f47f2b402a962ec672e500b26cb3aa568bb1531300c74806" // #nosec G101 dir := t.TempDir() diff --git a/pkg/encryption/region_crypter.go b/pkg/encryption/region_crypter.go index 346e8a08da0..458c5b67d7b 100644 --- a/pkg/encryption/region_crypter.go +++ b/pkg/encryption/region_crypter.go @@ -41,7 +41,7 @@ func processRegionKeys(region *metapb.Region, key *encryptionpb.DataKey, iv []by } // EncryptRegion encrypt the region start key and end key, using the current key return from the -// key manager. The return is an encypted copy of the region, with Encryption meta updated. +// key manager. The return is an encrypted copy of the region, with Encryption meta updated. func EncryptRegion(region *metapb.Region, keyManager KeyManager) (*metapb.Region, error) { if region == nil { return nil, errs.ErrEncryptionEncryptRegion.GenWithStack("trying to encrypt nil region") diff --git a/pkg/encryption/region_crypter_test.go b/pkg/encryption/region_crypter_test.go index 5fd9778a8c0..b1ca558063c 100644 --- a/pkg/encryption/region_crypter_test.go +++ b/pkg/encryption/region_crypter_test.go @@ -70,7 +70,6 @@ func (m *testKeyManager) GetKey(keyID uint64) (*encryptionpb.DataKey, error) { } func TestNilRegion(t *testing.T) { - t.Parallel() re := require.New(t) m := newTestKeyManager() region, err := EncryptRegion(nil, m) @@ -81,7 +80,6 @@ func TestNilRegion(t *testing.T) { } func TestEncryptRegionWithoutKeyManager(t *testing.T) { - t.Parallel() re := require.New(t) region := &metapb.Region{ Id: 10, @@ -98,7 +96,6 @@ func TestEncryptRegionWithoutKeyManager(t *testing.T) { } func TestEncryptRegionWhileEncryptionDisabled(t *testing.T) { - t.Parallel() re := require.New(t) region := &metapb.Region{ Id: 10, @@ -117,7 +114,6 @@ func TestEncryptRegionWhileEncryptionDisabled(t *testing.T) { } func TestEncryptRegion(t *testing.T) { - t.Parallel() re := require.New(t) startKey := []byte("abc") endKey := []byte("xyz") @@ -152,7 +148,6 @@ func TestEncryptRegion(t *testing.T) { } func TestDecryptRegionNotEncrypted(t *testing.T) { - t.Parallel() re := require.New(t) region := &metapb.Region{ Id: 10, @@ -170,7 +165,6 @@ func TestDecryptRegionNotEncrypted(t *testing.T) { } func TestDecryptRegionWithoutKeyManager(t *testing.T) { - t.Parallel() re := require.New(t) region := &metapb.Region{ Id: 10, @@ -186,7 +180,6 @@ func TestDecryptRegionWithoutKeyManager(t *testing.T) { } func TestDecryptRegionWhileKeyMissing(t *testing.T) { - t.Parallel() re := require.New(t) keyID := uint64(3) m := newTestKeyManager() @@ -207,7 +200,6 @@ func TestDecryptRegionWhileKeyMissing(t *testing.T) { } func TestDecryptRegion(t *testing.T) { - t.Parallel() re := require.New(t) keyID := uint64(1) startKey := []byte("abc") diff --git a/pkg/errs/errno.go b/pkg/errs/errno.go index 8c3e914531b..1f56a821032 100644 --- a/pkg/errs/errno.go +++ b/pkg/errs/errno.go @@ -195,10 +195,11 @@ var ( // apiutil errors var ( - ErrRedirect = errors.Normalize("redirect failed", errors.RFCCodeText("PD:apiutil:ErrRedirect")) - ErrOptionNotExist = errors.Normalize("the option %s does not exist", errors.RFCCodeText("PD:apiutil:ErrOptionNotExist")) - // ErrRedirectToNotLeader is the error message for redirect to not leader. - ErrRedirectToNotLeader = errors.Normalize("redirect to not leader", errors.RFCCodeText("PD:apiutil:ErrRedirectToNotLeader")) + ErrRedirect = errors.Normalize("redirect failed", errors.RFCCodeText("PD:apiutil:ErrRedirect")) + ErrOptionNotExist = errors.Normalize("the option %s does not exist", errors.RFCCodeText("PD:apiutil:ErrOptionNotExist")) + ErrRedirectNoLeader = errors.Normalize("redirect finds no leader", errors.RFCCodeText("PD:apiutil:ErrRedirectNoLeader")) + ErrRedirectToNotLeader = errors.Normalize("redirect to not leader", errors.RFCCodeText("PD:apiutil:ErrRedirectToNotLeader")) + ErrRedirectToNotPrimary = errors.Normalize("redirect to not primary", errors.RFCCodeText("PD:apiutil:ErrRedirectToNotPrimary")) ) // grpcutil errors diff --git a/pkg/errs/errs_test.go b/pkg/errs/errs_test.go index d76c02dc110..01b7de461b8 100644 --- a/pkg/errs/errs_test.go +++ b/pkg/errs/errs_test.go @@ -43,7 +43,7 @@ func (w *testingWriter) Write(p []byte) (n int, err error) { return n, nil } -func (w *testingWriter) Sync() error { +func (*testingWriter) Sync() error { return nil } @@ -97,7 +97,6 @@ func TestError(t *testing.T) { } func TestErrorEqual(t *testing.T) { - t.Parallel() re := require.New(t) err1 := ErrSchedulerNotFound.FastGenByArgs() err2 := ErrSchedulerNotFound.FastGenByArgs() @@ -125,7 +124,7 @@ func TestErrorEqual(t *testing.T) { re.False(errors.ErrorEqual(err1, err2)) } -func TestZapError(t *testing.T) { +func TestZapError(_ *testing.T) { err := errors.New("test") log.Info("test", ZapError(err)) err1 := ErrSchedulerNotFound @@ -134,7 +133,6 @@ func TestZapError(t *testing.T) { } func TestErrorWithStack(t *testing.T) { - t.Parallel() re := require.New(t) conf := &log.Config{Level: "debug", File: log.FileLogConfig{}, DisableTimestamp: true} lg := newZapTestLogger(conf) diff --git a/pkg/keyspace/keyspace.go b/pkg/keyspace/keyspace.go index d84b3698f69..26fd4db10f0 100644 --- a/pkg/keyspace/keyspace.go +++ b/pkg/keyspace/keyspace.go @@ -321,7 +321,12 @@ func (manager *Manager) splitKeyspaceRegion(id uint32, waitRegionSplit bool) (er } defer func() { if err != nil { - cl.GetRegionLabeler().DeleteLabelRule(keyspaceRule.ID) + if err := cl.GetRegionLabeler().DeleteLabelRule(keyspaceRule.ID); err != nil { + log.Warn("[keyspace] failed to delete region label for keyspace", + zap.Uint32("keyspace-id", id), + zap.Error(err), + ) + } } }() @@ -343,20 +348,20 @@ func (manager *Manager) splitKeyspaceRegion(id uint32, waitRegionSplit bool) (er for { select { case <-ticker.C: - regionsInfo := manager.cluster.GetBasicCluster().RegionsInfo - region := regionsInfo.GetRegionByKey(rawLeftBound) + c := manager.cluster.GetBasicCluster() + region := c.GetRegionByKey(rawLeftBound) if region == nil || !bytes.Equal(region.GetStartKey(), rawLeftBound) { continue } - region = regionsInfo.GetRegionByKey(rawRightBound) + region = c.GetRegionByKey(rawRightBound) if region == nil || !bytes.Equal(region.GetStartKey(), rawRightBound) { continue } - region = regionsInfo.GetRegionByKey(txnLeftBound) + region = c.GetRegionByKey(txnLeftBound) if region == nil || !bytes.Equal(region.GetStartKey(), txnLeftBound) { continue } - region = regionsInfo.GetRegionByKey(txnRightBound) + region = c.GetRegionByKey(txnRightBound) if region == nil || !bytes.Equal(region.GetStartKey(), txnRightBound) { continue } diff --git a/pkg/keyspace/tso_keyspace_group.go b/pkg/keyspace/tso_keyspace_group.go index 5ed9747e923..29b8add740c 100644 --- a/pkg/keyspace/tso_keyspace_group.go +++ b/pkg/keyspace/tso_keyspace_group.go @@ -36,6 +36,7 @@ import ( "github.com/tikv/pd/pkg/utils/etcdutil" "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/pkg/utils/syncutil" + "github.com/tikv/pd/pkg/utils/typeutil" "go.etcd.io/etcd/clientv3" "go.etcd.io/etcd/mvcc/mvccpb" "go.uber.org/zap" @@ -181,10 +182,6 @@ func (m *GroupManager) allocNodesToAllKeyspaceGroups(ctx context.Context) { return case <-ticker.C: } - countOfNodes := m.GetNodesCount() - if countOfNodes < utils.DefaultKeyspaceGroupReplicaCount { - continue - } groups, err := m.store.LoadKeyspaceGroups(utils.DefaultKeyspaceGroupID, 0) if err != nil { log.Error("failed to load all keyspace groups", zap.Error(err)) @@ -194,23 +191,26 @@ func (m *GroupManager) allocNodesToAllKeyspaceGroups(ctx context.Context) { if len(groups) == 0 { continue } - withError := false for _, group := range groups { - if len(group.Members) < utils.DefaultKeyspaceGroupReplicaCount { - nodes, err := m.AllocNodesForKeyspaceGroup(group.ID, utils.DefaultKeyspaceGroupReplicaCount) + existMembers := make(map[string]struct{}) + for _, member := range group.Members { + if exist, addr := m.IsExistNode(member.Address); exist { + existMembers[addr] = struct{}{} + } + } + numExistMembers := len(existMembers) + if numExistMembers != 0 && numExistMembers == len(group.Members) && numExistMembers == m.GetNodesCount() { + continue + } + if numExistMembers < utils.DefaultKeyspaceGroupReplicaCount { + nodes, err := m.AllocNodesForKeyspaceGroup(group.ID, existMembers, utils.DefaultKeyspaceGroupReplicaCount) if err != nil { - withError = true log.Error("failed to alloc nodes for keyspace group", zap.Uint32("keyspace-group-id", group.ID), zap.Error(err)) continue } group.Members = nodes } } - if !withError { - // all keyspace groups have equal or more than default replica count - log.Info("all keyspace groups have equal or more than default replica count, stop to alloc node") - return - } } } @@ -426,7 +426,7 @@ func (m *GroupManager) UpdateKeyspaceForGroup(userKind endpoint.UserKind, groupI failpoint.Inject("externalAllocNode", func(val failpoint.Value) { failpointOnce.Do(func() { addrs := val.(string) - m.SetNodesForKeyspaceGroup(utils.DefaultKeyspaceGroupID, strings.Split(addrs, ",")) + _ = m.SetNodesForKeyspaceGroup(utils.DefaultKeyspaceGroupID, strings.Split(addrs, ",")) }) }) m.Lock() @@ -745,7 +745,7 @@ func (m *GroupManager) GetNodesCount() int { } // AllocNodesForKeyspaceGroup allocates nodes for the keyspace group. -func (m *GroupManager) AllocNodesForKeyspaceGroup(id uint32, desiredReplicaCount int) ([]endpoint.KeyspaceGroupMember, error) { +func (m *GroupManager) AllocNodesForKeyspaceGroup(id uint32, existMembers map[string]struct{}, desiredReplicaCount int) ([]endpoint.KeyspaceGroupMember, error) { m.Lock() defer m.Unlock() ctx, cancel := context.WithTimeout(m.ctx, allocNodesTimeout) @@ -770,32 +770,34 @@ func (m *GroupManager) AllocNodesForKeyspaceGroup(id uint32, desiredReplicaCount if kg.IsMerging() { return ErrKeyspaceGroupInMerging(id) } - exists := make(map[string]struct{}) - for _, member := range kg.Members { - exists[member.Address] = struct{}{} - nodes = append(nodes, member) - } - if len(exists) >= desiredReplicaCount { - return nil + + for addr := range existMembers { + nodes = append(nodes, endpoint.KeyspaceGroupMember{ + Address: addr, + Priority: utils.DefaultKeyspaceGroupReplicaPriority, + }) } - for len(exists) < desiredReplicaCount { + + for len(existMembers) < desiredReplicaCount { select { case <-ctx.Done(): return nil case <-ticker.C: } - countOfNodes := m.GetNodesCount() - if countOfNodes < desiredReplicaCount || countOfNodes == 0 { // double check + if m.GetNodesCount() == 0 { // double check return ErrNoAvailableNode } + if len(existMembers) == m.GetNodesCount() { + break + } addr := m.nodesBalancer.Next() if addr == "" { return ErrNoAvailableNode } - if _, ok := exists[addr]; ok { + if _, ok := existMembers[addr]; ok { continue } - exists[addr] = struct{}{} + existMembers[addr] = struct{}{} nodes = append(nodes, endpoint.KeyspaceGroupMember{ Address: addr, Priority: utils.DefaultKeyspaceGroupReplicaPriority, @@ -874,7 +876,7 @@ func (m *GroupManager) SetPriorityForKeyspaceGroup(id uint32, node string, prior inKeyspaceGroup := false members := make([]endpoint.KeyspaceGroupMember, 0, len(kg.Members)) for _, member := range kg.Members { - if member.Address == node { + if member.CompareAddress(node) { inKeyspaceGroup = true member.Priority = priority } @@ -894,14 +896,14 @@ func (m *GroupManager) SetPriorityForKeyspaceGroup(id uint32, node string, prior } // IsExistNode checks if the node exists. -func (m *GroupManager) IsExistNode(addr string) bool { +func (m *GroupManager) IsExistNode(addr string) (bool, string) { nodes := m.nodesBalancer.GetAll() for _, node := range nodes { - if node == addr { - return true + if typeutil.EqualBaseURLs(node, addr) { + return true, node } } - return false + return false, "" } // MergeKeyspaceGroups merges the keyspace group in the list into the target keyspace group. diff --git a/pkg/keyspace/util.go b/pkg/keyspace/util.go index e3586ee35d4..a3d9f6345e3 100644 --- a/pkg/keyspace/util.go +++ b/pkg/keyspace/util.go @@ -176,14 +176,14 @@ func MakeRegionBound(id uint32) *RegionBound { } // MakeKeyRanges encodes keyspace ID to correct LabelRule data. -func MakeKeyRanges(id uint32) []interface{} { +func MakeKeyRanges(id uint32) []any { regionBound := MakeRegionBound(id) - return []interface{}{ - map[string]interface{}{ + return []any{ + map[string]any{ "start_key": hex.EncodeToString(regionBound.RawLeftBound), "end_key": hex.EncodeToString(regionBound.RawRightBound), }, - map[string]interface{}{ + map[string]any{ "start_key": hex.EncodeToString(regionBound.TxnLeftBound), "end_key": hex.EncodeToString(regionBound.TxnRightBound), }, @@ -246,14 +246,14 @@ func (hp *indexedHeap) Swap(i, j int) { } // Implementing heap.Interface. -func (hp *indexedHeap) Push(x interface{}) { +func (hp *indexedHeap) Push(x any) { item := x.(*endpoint.KeyspaceGroup) hp.index[item.ID] = hp.Len() hp.items = append(hp.items, item) } // Implementing heap.Interface. -func (hp *indexedHeap) Pop() interface{} { +func (hp *indexedHeap) Pop() any { l := hp.Len() item := hp.items[l-1] hp.items = hp.items[:l-1] diff --git a/pkg/keyspace/util_test.go b/pkg/keyspace/util_test.go index 3f9396d6989..48500fcd535 100644 --- a/pkg/keyspace/util_test.go +++ b/pkg/keyspace/util_test.go @@ -83,12 +83,12 @@ func TestMakeLabelRule(t *testing.T) { }, }, RuleType: "key-range", - Data: []interface{}{ - map[string]interface{}{ + Data: []any{ + map[string]any{ "start_key": hex.EncodeToString(codec.EncodeBytes([]byte{'r', 0, 0, 0})), "end_key": hex.EncodeToString(codec.EncodeBytes([]byte{'r', 0, 0, 1})), }, - map[string]interface{}{ + map[string]any{ "start_key": hex.EncodeToString(codec.EncodeBytes([]byte{'x', 0, 0, 0})), "end_key": hex.EncodeToString(codec.EncodeBytes([]byte{'x', 0, 0, 1})), }, @@ -107,12 +107,12 @@ func TestMakeLabelRule(t *testing.T) { }, }, RuleType: "key-range", - Data: []interface{}{ - map[string]interface{}{ + Data: []any{ + map[string]any{ "start_key": hex.EncodeToString(codec.EncodeBytes([]byte{'r', 0, 0x10, 0x92})), "end_key": hex.EncodeToString(codec.EncodeBytes([]byte{'r', 0, 0x10, 0x93})), }, - map[string]interface{}{ + map[string]any{ "start_key": hex.EncodeToString(codec.EncodeBytes([]byte{'x', 0, 0x10, 0x92})), "end_key": hex.EncodeToString(codec.EncodeBytes([]byte{'x', 0, 0x10, 0x93})), }, diff --git a/pkg/mcs/discovery/discover.go b/pkg/mcs/discovery/discover.go index 89c45497a87..1ce5ecda51d 100644 --- a/pkg/mcs/discovery/discover.go +++ b/pkg/mcs/discovery/discover.go @@ -45,7 +45,7 @@ func Discover(cli *clientv3.Client, clusterID, serviceName string) ([]string, er } // GetMSMembers returns all the members of the specified service name. -func GetMSMembers(name string, client *clientv3.Client) ([]string, error) { +func GetMSMembers(name string, client *clientv3.Client) ([]ServiceRegistryEntry, error) { switch name { case utils.TSOServiceName, utils.SchedulingServiceName, utils.ResourceManagerServiceName: clusterID, err := etcdutil.GetClusterID(client, utils.ClusterIDPath) @@ -61,7 +61,7 @@ func GetMSMembers(name string, client *clientv3.Client) ([]string, error) { return nil, errs.ErrEtcdTxnConflict.FastGenByArgs() } - var addrs []string + var entries []ServiceRegistryEntry for _, resp := range resps.Responses { for _, keyValue := range resp.GetResponseRange().GetKvs() { var entry ServiceRegistryEntry @@ -69,10 +69,10 @@ func GetMSMembers(name string, client *clientv3.Client) ([]string, error) { log.Error("try to deserialize service registry entry failed", zap.String("key", string(keyValue.Key)), zap.Error(err)) continue } - addrs = append(addrs, entry.ServiceAddr) + entries = append(entries, entry) } } - return addrs, nil + return entries, nil } return nil, errors.Errorf("unknown service name %s", name) diff --git a/pkg/mcs/discovery/register_test.go b/pkg/mcs/discovery/register_test.go index 032b0558a79..707c251e5fb 100644 --- a/pkg/mcs/discovery/register_test.go +++ b/pkg/mcs/discovery/register_test.go @@ -16,6 +16,8 @@ package discovery import ( "context" + "os" + "regexp" "testing" "time" @@ -59,10 +61,21 @@ func TestRegister(t *testing.T) { sr = NewServiceRegister(context.Background(), client, "12345", "test_service", "127.0.0.1:2", "127.0.0.1:2", 1) err = sr.Register() re.NoError(err) + fname := testutil.InitTempFileLogger("info") + defer os.Remove(fname) for i := 0; i < 3; i++ { re.Equal("127.0.0.1:2", getKeyAfterLeaseExpired(re, client, sr.key)) - etcd.Server.HardStop() // close the etcd to make the keepalive failed - time.Sleep(etcdutil.DefaultDialTimeout) // ensure that the request is timeout + etcd.Server.HardStop() // close the etcd to make the keepalive failed + // ensure that the request is timeout + testutil.Eventually(re, func() bool { + content, _ := os.ReadFile(fname) + // check log in function `ServiceRegister.Register` + // ref https://github.com/tikv/pd/blob/6377b26e4e879e7623fbc1d0b7f1be863dea88ad/pkg/mcs/discovery/register.go#L77 + // need to both contain `register.go` and `keep alive failed` + pattern := regexp.MustCompile(`register.go.*keep alive failed`) + matches := pattern.FindAll(content, -1) + return len(matches) >= i+1 + }) etcd.Close() etcd, err = embed.StartEtcd(&cfg) re.NoError(err) diff --git a/pkg/mcs/discovery/registry_entry.go b/pkg/mcs/discovery/registry_entry.go index 52751b430c4..bf11ae5c8a4 100644 --- a/pkg/mcs/discovery/registry_entry.go +++ b/pkg/mcs/discovery/registry_entry.go @@ -23,7 +23,11 @@ import ( // ServiceRegistryEntry is the registry entry of a service type ServiceRegistryEntry struct { - ServiceAddr string `json:"service-addr"` + ServiceAddr string `json:"service-addr"` + Version string `json:"version"` + GitHash string `json:"git-hash"` + DeployPath string `json:"deploy-path"` + StartTimestamp int64 `json:"start-timestamp"` } // Serialize this service registry entry diff --git a/pkg/mcs/metastorage/server/grpc_service.go b/pkg/mcs/metastorage/server/grpc_service.go index e9d35fbf14b..f018dc72f9f 100644 --- a/pkg/mcs/metastorage/server/grpc_service.go +++ b/pkg/mcs/metastorage/server/grpc_service.go @@ -20,10 +20,12 @@ import ( "net/http" "github.com/pingcap/kvproto/pkg/meta_storagepb" + "github.com/pingcap/log" bs "github.com/tikv/pd/pkg/basicserver" "github.com/tikv/pd/pkg/mcs/registry" "github.com/tikv/pd/pkg/utils/apiutil" "go.etcd.io/etcd/clientv3" + "go.uber.org/zap" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" @@ -37,13 +39,13 @@ var ( var _ meta_storagepb.MetaStorageServer = (*Service)(nil) // SetUpRestHandler is a hook to sets up the REST service. -var SetUpRestHandler = func(srv *Service) (http.Handler, apiutil.APIServiceGroup) { +var SetUpRestHandler = func(*Service) (http.Handler, apiutil.APIServiceGroup) { return dummyRestService{}, apiutil.APIServiceGroup{} } type dummyRestService struct{} -func (d dummyRestService) ServeHTTP(w http.ResponseWriter, r *http.Request) { +func (dummyRestService) ServeHTTP(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusNotImplemented) w.Write([]byte("not implemented")) } @@ -86,14 +88,15 @@ func (s *Service) Watch(req *meta_storagepb.WatchRequest, server meta_storagepb. if err := s.checkServing(); err != nil { return err } - ctx, cancel := context.WithCancel(s.ctx) + ctx, cancel := context.WithCancel(server.Context()) defer cancel() - options := []clientv3.OpOption{} + var options []clientv3.OpOption key := string(req.GetKey()) var startRevision int64 if endKey := req.GetRangeEnd(); endKey != nil { options = append(options, clientv3.WithRange(string(endKey))) } + log.Info("watch request", zap.String("key", key), zap.String("range-end", string(req.GetRangeEnd())), zap.Int64("start-revision", req.GetStartRevision())) if startRevision = req.GetStartRevision(); startRevision != 0 { options = append(options, clientv3.WithRev(startRevision)) } @@ -106,6 +109,8 @@ func (s *Service) Watch(req *meta_storagepb.WatchRequest, server meta_storagepb. select { case <-ctx.Done(): return nil + case <-s.ctx.Done(): + return nil case res := <-watchChan: if res.Err() != nil { var resp meta_storagepb.WatchResponse @@ -124,8 +129,8 @@ func (s *Service) Watch(req *meta_storagepb.WatchRequest, server meta_storagepb. return res.Err() } - events := make([]*meta_storagepb.Event, 0, len(res.Events)) - for _, e := range res.Events { + events := make([]*meta_storagepb.Event, len(res.Events)) + for i, e := range res.Events { event := &meta_storagepb.Event{Kv: &meta_storagepb.KeyValue{ Key: e.Kv.Key, Value: e.Kv.Value, @@ -137,7 +142,7 @@ func (s *Service) Watch(req *meta_storagepb.WatchRequest, server meta_storagepb. if e.PrevKv != nil { event.PrevKv = &meta_storagepb.KeyValue{Key: e.PrevKv.Key, Value: e.PrevKv.Value} } - events = append(events, event) + events[i] = event } if len(events) > 0 { if err := server.Send(&meta_storagepb.WatchResponse{ @@ -157,7 +162,7 @@ func (s *Service) Get(ctx context.Context, req *meta_storagepb.GetRequest) (*met } ctx, cancel := context.WithCancel(ctx) defer cancel() - options := []clientv3.OpOption{} + var options []clientv3.OpOption key := string(req.GetKey()) if endKey := req.GetRangeEnd(); endKey != nil { options = append(options, clientv3.WithRange(string(endKey))) @@ -182,8 +187,9 @@ func (s *Service) Get(ctx context.Context, req *meta_storagepb.GetRequest) (*met Count: res.Count, More: res.More, } - for _, kv := range res.Kvs { - resp.Kvs = append(resp.Kvs, &meta_storagepb.KeyValue{Key: kv.Key, Value: kv.Value}) + resp.Kvs = make([]*meta_storagepb.KeyValue, len(res.Kvs)) + for i, kv := range res.Kvs { + resp.Kvs[i] = &meta_storagepb.KeyValue{Key: kv.Key, Value: kv.Value} } return resp, nil @@ -196,7 +202,7 @@ func (s *Service) Put(ctx context.Context, req *meta_storagepb.PutRequest) (*met } ctx, cancel := context.WithCancel(ctx) defer cancel() - options := []clientv3.OpOption{} + var options []clientv3.OpOption key := string(req.GetKey()) value := string(req.GetValue()) if lease := clientv3.LeaseID(req.GetLease()); lease != 0 { @@ -225,6 +231,39 @@ func (s *Service) Put(ctx context.Context, req *meta_storagepb.PutRequest) (*met return resp, nil } +// Delete deletes the key-value pair from meta storage. +func (s *Service) Delete(ctx context.Context, req *meta_storagepb.DeleteRequest) (*meta_storagepb.DeleteResponse, error) { + if err := s.checkServing(); err != nil { + return nil, err + } + ctx, cancel := context.WithCancel(ctx) + defer cancel() + var options []clientv3.OpOption + key := string(req.GetKey()) + if prevKv := req.GetPrevKv(); prevKv { + options = append(options, clientv3.WithPrevKV()) + } + + cli := s.manager.GetClient() + res, err := cli.Delete(ctx, key, options...) + var revision int64 + if res != nil { + revision = res.Header.GetRevision() + } + if err != nil { + return &meta_storagepb.DeleteResponse{Header: s.wrapErrorAndRevision(revision, meta_storagepb.ErrorType_UNKNOWN, err.Error())}, nil + } + + resp := &meta_storagepb.DeleteResponse{ + Header: &meta_storagepb.ResponseHeader{ClusterId: s.manager.ClusterID(), Revision: revision}, + } + resp.PrevKvs = make([]*meta_storagepb.KeyValue, len(res.PrevKvs)) + for i, kv := range res.PrevKvs { + resp.PrevKvs[i] = &meta_storagepb.KeyValue{Key: kv.Key, Value: kv.Value} + } + return resp, nil +} + func (s *Service) wrapErrorAndRevision(revision int64, errorType meta_storagepb.ErrorType, message string) *meta_storagepb.ResponseHeader { return s.errorHeader(revision, &meta_storagepb.Error{ Type: errorType, diff --git a/pkg/mcs/resourcemanager/server/OWNERS b/pkg/mcs/resourcemanager/server/OWNERS new file mode 100644 index 00000000000..aa02465dbd9 --- /dev/null +++ b/pkg/mcs/resourcemanager/server/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs at https://go.k8s.io/owners +options: + no_parent_owners: true +filters: + "(OWNERS|config\\.go)$": + approvers: + - sig-critical-approvers-config diff --git a/pkg/mcs/resourcemanager/server/apis/v1/api.go b/pkg/mcs/resourcemanager/server/apis/v1/api.go index fee648a5595..3fd94e637d0 100644 --- a/pkg/mcs/resourcemanager/server/apis/v1/api.go +++ b/pkg/mcs/resourcemanager/server/apis/v1/api.go @@ -20,13 +20,11 @@ import ( "net/http" "reflect" "strings" - "sync" "github.com/gin-contrib/cors" "github.com/gin-contrib/gzip" "github.com/gin-contrib/pprof" "github.com/gin-gonic/gin" - "github.com/joho/godotenv" rmpb "github.com/pingcap/kvproto/pkg/resource_manager" "github.com/pingcap/log" rmserver "github.com/tikv/pd/pkg/mcs/resourcemanager/server" @@ -41,7 +39,6 @@ import ( const APIPathPrefix = "/resource-manager/api/v1/" var ( - once sync.Once apiServiceGroup = apiutil.APIServiceGroup{ Name: "resource-manager", Version: "v1", @@ -67,11 +64,6 @@ type Service struct { // NewService returns a new Service. func NewService(srv *rmserver.Service) *Service { - once.Do(func() { - // These global modification will be effective only for the first invoke. - _ = godotenv.Load() - gin.SetMode(gin.ReleaseMode) - }) apiHandlerEngine := gin.New() apiHandlerEngine.Use(gin.Recovery()) apiHandlerEngine.Use(cors.Default()) @@ -145,7 +137,7 @@ func changeLogLevel(c *gin.Context) { // @Success 200 {string} string "Success" // @Failure 400 {string} error // @Failure 500 {string} error -// @Router /config/group [POST] +// @Router /config/group [post] func (s *Service) postResourceGroup(c *gin.Context) { var group rmpb.ResourceGroup if err := c.ShouldBindJSON(&group); err != nil { @@ -189,7 +181,7 @@ func (s *Service) putResourceGroup(c *gin.Context) { // @Failure 404 {string} error // @Param name path string true "groupName" // @Param with_stats query bool false "whether to return statistics data." -// @Router /config/group/{name} [GET] +// @Router /config/group/{name} [get] func (s *Service) getResourceGroup(c *gin.Context) { withStats := strings.EqualFold(c.Query("with_stats"), "true") group := s.manager.GetResourceGroup(c.Param("name"), withStats) @@ -206,7 +198,7 @@ func (s *Service) getResourceGroup(c *gin.Context) { // @Success 200 {string} json format of []rmserver.ResourceGroup // @Failure 404 {string} error // @Param with_stats query bool false "whether to return statistics data." -// @Router /config/groups [GET] +// @Router /config/groups [get] func (s *Service) getResourceGroupList(c *gin.Context) { withStats := strings.EqualFold(c.Query("with_stats"), "true") groups := s.manager.GetResourceGroupList(withStats) @@ -220,7 +212,7 @@ func (s *Service) getResourceGroupList(c *gin.Context) { // @Param name path string true "Name of the resource group to be deleted" // @Success 200 {string} string "Success!" // @Failure 404 {string} error -// @Router /config/group/{name} [DELETE] +// @Router /config/group/{name} [delete] func (s *Service) deleteResourceGroup(c *gin.Context) { if err := s.manager.DeleteResourceGroup(c.Param("name")); err != nil { c.String(http.StatusNotFound, err.Error()) @@ -234,7 +226,7 @@ func (s *Service) deleteResourceGroup(c *gin.Context) { // @Summary Get the resource controller config. // @Success 200 {string} json format of rmserver.ControllerConfig // @Failure 400 {string} error -// @Router /config/controller [GET] +// @Router /config/controller [get] func (s *Service) getControllerConfig(c *gin.Context) { config := s.manager.GetControllerConfig() c.IndentedJSON(http.StatusOK, config) @@ -247,9 +239,9 @@ func (s *Service) getControllerConfig(c *gin.Context) { // @Param config body object true "json params, rmserver.ControllerConfig" // @Success 200 {string} string "Success!" // @Failure 400 {string} error -// @Router /config/controller [POST] +// @Router /config/controller [post] func (s *Service) setControllerConfig(c *gin.Context) { - conf := make(map[string]interface{}) + conf := make(map[string]any) if err := c.ShouldBindJSON(&conf); err != nil { c.String(http.StatusBadRequest, err.Error()) return diff --git a/pkg/mcs/resourcemanager/server/config.go b/pkg/mcs/resourcemanager/server/config.go index bcd5a853dfc..70862ffb89c 100644 --- a/pkg/mcs/resourcemanager/server/config.go +++ b/pkg/mcs/resourcemanager/server/config.go @@ -112,10 +112,13 @@ func (rmc *ControllerConfig) Adjust(meta *configutil.ConfigMetaData) { if rmc == nil { return } - rmc.RequestUnit.Adjust() - - configutil.AdjustDuration(&rmc.DegradedModeWaitDuration, defaultDegradedModeWaitDuration) - configutil.AdjustDuration(&rmc.LTBMaxWaitDuration, defaultMaxWaitDuration) + rmc.RequestUnit.Adjust(meta.Child("request-unit")) + if !meta.IsDefined("degraded-mode-wait-duration") { + configutil.AdjustDuration(&rmc.DegradedModeWaitDuration, defaultDegradedModeWaitDuration) + } + if !meta.IsDefined("ltb-max-wait-duration") { + configutil.AdjustDuration(&rmc.LTBMaxWaitDuration, defaultMaxWaitDuration) + } failpoint.Inject("enableDegradedMode", func() { configutil.AdjustDuration(&rmc.DegradedModeWaitDuration, time.Second) }) @@ -123,7 +126,6 @@ func (rmc *ControllerConfig) Adjust(meta *configutil.ConfigMetaData) { // RequestUnitConfig is the configuration of the request units, which determines the coefficients of // the RRU and WRU cost. This configuration should be modified carefully. -// TODO: use common config with client size. type RequestUnitConfig struct { // ReadBaseCost is the base cost for a read request. No matter how many bytes read/written or // the CPU times taken for a request, this cost is inevitable. @@ -145,30 +147,30 @@ type RequestUnitConfig struct { } // Adjust adjusts the configuration and initializes it with the default value if necessary. -func (ruc *RequestUnitConfig) Adjust() { +func (ruc *RequestUnitConfig) Adjust(meta *configutil.ConfigMetaData) { if ruc == nil { return } - if ruc.ReadBaseCost == 0 { - ruc.ReadBaseCost = defaultReadBaseCost + if !meta.IsDefined("read-base-cost") { + configutil.AdjustFloat64(&ruc.ReadBaseCost, defaultReadBaseCost) } - if ruc.ReadPerBatchBaseCost == 0 { - ruc.ReadPerBatchBaseCost = defaultReadPerBatchBaseCost + if !meta.IsDefined("read-per-batch-base-cost") { + configutil.AdjustFloat64(&ruc.ReadPerBatchBaseCost, defaultReadPerBatchBaseCost) } - if ruc.ReadCostPerByte == 0 { - ruc.ReadCostPerByte = defaultReadCostPerByte + if !meta.IsDefined("read-cost-per-byte") { + configutil.AdjustFloat64(&ruc.ReadCostPerByte, defaultReadCostPerByte) } - if ruc.WriteBaseCost == 0 { - ruc.WriteBaseCost = defaultWriteBaseCost + if !meta.IsDefined("write-base-cost") { + configutil.AdjustFloat64(&ruc.WriteBaseCost, defaultWriteBaseCost) } - if ruc.WritePerBatchBaseCost == 0 { - ruc.WritePerBatchBaseCost = defaultWritePerBatchBaseCost + if !meta.IsDefined("write-per-batch-base-cost") { + configutil.AdjustFloat64(&ruc.WritePerBatchBaseCost, defaultWritePerBatchBaseCost) } - if ruc.WriteCostPerByte == 0 { - ruc.WriteCostPerByte = defaultWriteCostPerByte + if !meta.IsDefined("write-cost-per-byte") { + configutil.AdjustFloat64(&ruc.WriteCostPerByte, defaultWriteCostPerByte) } - if ruc.CPUMsCost == 0 { - ruc.CPUMsCost = defaultCPUMsCost + if !meta.IsDefined("read-cpu-ms-cost") { + configutil.AdjustFloat64(&ruc.CPUMsCost, defaultCPUMsCost) } } @@ -202,11 +204,11 @@ func (c *Config) Parse(flagSet *pflag.FlagSet) error { configutil.AdjustCommandLineString(flagSet, &c.ListenAddr, "listen-addr") configutil.AdjustCommandLineString(flagSet, &c.AdvertiseListenAddr, "advertise-listen-addr") - return c.Adjust(meta, false) + return c.Adjust(meta) } // Adjust is used to adjust the resource manager configurations. -func (c *Config) Adjust(meta *toml.MetaData, reloading bool) error { +func (c *Config) Adjust(meta *toml.MetaData) error { configMetaData := configutil.NewConfigMetadata(meta) if err := configMetaData.CheckUndecoded(); err != nil { c.WarningMsgs = append(c.WarningMsgs, err.Error()) @@ -237,10 +239,6 @@ func (c *Config) Adjust(meta *toml.MetaData, reloading bool) error { c.adjustLog(configMetaData.Child("log")) c.Security.Encryption.Adjust() - if len(c.Log.Format) == 0 { - c.Log.Format = utils.DefaultLogFormat - } - c.Controller.Adjust(configMetaData.Child("controller")) configutil.AdjustInt64(&c.LeaderLease, utils.DefaultLeaderLease) @@ -251,6 +249,8 @@ func (c *Config) adjustLog(meta *configutil.ConfigMetaData) { if !meta.IsDefined("disable-error-verbose") { c.Log.DisableErrorVerbose = utils.DefaultDisableErrorVerbose } + configutil.AdjustString(&c.Log.Format, utils.DefaultLogFormat) + configutil.AdjustString(&c.Log.Level, utils.DefaultLogLevel) } // GetName returns the Name diff --git a/pkg/mcs/resourcemanager/server/config_test.go b/pkg/mcs/resourcemanager/server/config_test.go index 64fd133ea73..2d57100468e 100644 --- a/pkg/mcs/resourcemanager/server/config_test.go +++ b/pkg/mcs/resourcemanager/server/config_test.go @@ -39,7 +39,7 @@ read-cpu-ms-cost = 5.0 cfg := NewConfig() meta, err := toml.Decode(cfgData, &cfg) re.NoError(err) - err = cfg.Adjust(&meta, false) + err = cfg.Adjust(&meta) re.NoError(err) re.Equal(time.Second*2, cfg.Controller.DegradedModeWaitDuration.Duration) diff --git a/pkg/mcs/resourcemanager/server/grpc_service.go b/pkg/mcs/resourcemanager/server/grpc_service.go index cf985a14764..2f35042c48f 100644 --- a/pkg/mcs/resourcemanager/server/grpc_service.go +++ b/pkg/mcs/resourcemanager/server/grpc_service.go @@ -41,13 +41,13 @@ var ( var _ rmpb.ResourceManagerServer = (*Service)(nil) // SetUpRestHandler is a hook to sets up the REST service. -var SetUpRestHandler = func(srv *Service) (http.Handler, apiutil.APIServiceGroup) { +var SetUpRestHandler = func(*Service) (http.Handler, apiutil.APIServiceGroup) { return dummyRestService{}, apiutil.APIServiceGroup{} } type dummyRestService struct{} -func (d dummyRestService) ServeHTTP(w http.ResponseWriter, r *http.Request) { +func (dummyRestService) ServeHTTP(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusNotImplemented) w.Write([]byte("not implemented")) } @@ -94,7 +94,7 @@ func (s *Service) checkServing() error { } // GetResourceGroup implements ResourceManagerServer.GetResourceGroup. -func (s *Service) GetResourceGroup(ctx context.Context, req *rmpb.GetResourceGroupRequest) (*rmpb.GetResourceGroupResponse, error) { +func (s *Service) GetResourceGroup(_ context.Context, req *rmpb.GetResourceGroupRequest) (*rmpb.GetResourceGroupResponse, error) { if err := s.checkServing(); err != nil { return nil, err } @@ -108,7 +108,7 @@ func (s *Service) GetResourceGroup(ctx context.Context, req *rmpb.GetResourceGro } // ListResourceGroups implements ResourceManagerServer.ListResourceGroups. -func (s *Service) ListResourceGroups(ctx context.Context, req *rmpb.ListResourceGroupsRequest) (*rmpb.ListResourceGroupsResponse, error) { +func (s *Service) ListResourceGroups(_ context.Context, req *rmpb.ListResourceGroupsRequest) (*rmpb.ListResourceGroupsResponse, error) { if err := s.checkServing(); err != nil { return nil, err } @@ -123,7 +123,7 @@ func (s *Service) ListResourceGroups(ctx context.Context, req *rmpb.ListResource } // AddResourceGroup implements ResourceManagerServer.AddResourceGroup. -func (s *Service) AddResourceGroup(ctx context.Context, req *rmpb.PutResourceGroupRequest) (*rmpb.PutResourceGroupResponse, error) { +func (s *Service) AddResourceGroup(_ context.Context, req *rmpb.PutResourceGroupRequest) (*rmpb.PutResourceGroupResponse, error) { if err := s.checkServing(); err != nil { return nil, err } @@ -135,7 +135,7 @@ func (s *Service) AddResourceGroup(ctx context.Context, req *rmpb.PutResourceGro } // DeleteResourceGroup implements ResourceManagerServer.DeleteResourceGroup. -func (s *Service) DeleteResourceGroup(ctx context.Context, req *rmpb.DeleteResourceGroupRequest) (*rmpb.DeleteResourceGroupResponse, error) { +func (s *Service) DeleteResourceGroup(_ context.Context, req *rmpb.DeleteResourceGroupRequest) (*rmpb.DeleteResourceGroupResponse, error) { if err := s.checkServing(); err != nil { return nil, err } @@ -147,7 +147,7 @@ func (s *Service) DeleteResourceGroup(ctx context.Context, req *rmpb.DeleteResou } // ModifyResourceGroup implements ResourceManagerServer.ModifyResourceGroup. -func (s *Service) ModifyResourceGroup(ctx context.Context, req *rmpb.PutResourceGroupRequest) (*rmpb.PutResourceGroupResponse, error) { +func (s *Service) ModifyResourceGroup(_ context.Context, req *rmpb.PutResourceGroupRequest) (*rmpb.PutResourceGroupResponse, error) { if err := s.checkServing(); err != nil { return nil, err } diff --git a/pkg/mcs/resourcemanager/server/manager.go b/pkg/mcs/resourcemanager/server/manager.go index 61c1463d3c0..418d188823f 100644 --- a/pkg/mcs/resourcemanager/server/manager.go +++ b/pkg/mcs/resourcemanager/server/manager.go @@ -27,6 +27,7 @@ import ( "github.com/pingcap/failpoint" rmpb "github.com/pingcap/kvproto/pkg/resource_manager" "github.com/pingcap/log" + "github.com/prometheus/client_golang/prometheus" bs "github.com/tikv/pd/pkg/basicserver" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/storage/endpoint" @@ -41,7 +42,9 @@ const ( defaultConsumptionChanSize = 1024 metricsCleanupInterval = time.Minute metricsCleanupTimeout = 20 * time.Minute - metricsAvailableRUInterval = 30 * time.Second + metricsAvailableRUInterval = 1 * time.Second + defaultCollectIntervalSec = 20 + tickPerSecond = time.Second reservedDefaultGroupName = "default" middlePriority = 8 @@ -126,7 +129,9 @@ func (m *Manager) Init(ctx context.Context) error { return err } // Load resource group meta info from storage. + m.Lock() m.groups = make(map[string]*ResourceGroup) + m.Unlock() handler := func(k, v string) { group := &rmpb.ResourceGroup{} if err := proto.Unmarshal([]byte(v), group); err != nil { @@ -184,13 +189,13 @@ func (m *Manager) Init(ctx context.Context) error { } // UpdateControllerConfigItem updates the controller config item. -func (m *Manager) UpdateControllerConfigItem(key string, value interface{}) error { +func (m *Manager) UpdateControllerConfigItem(key string, value any) error { kp := strings.Split(key, ".") if len(kp) == 0 { return errors.Errorf("invalid key %s", key) } m.Lock() - var config interface{} + var config any switch kp[0] { case "request-unit": config = &m.controllerConfig.RequestUnit @@ -357,6 +362,9 @@ func (m *Manager) backgroundMetricsFlush(ctx context.Context) { defer cleanUpTicker.Stop() availableRUTicker := time.NewTicker(metricsAvailableRUInterval) defer availableRUTicker.Stop() + recordMaxTicker := time.NewTicker(tickPerSecond) + defer recordMaxTicker.Stop() + maxPerSecTrackers := make(map[string]*maxPerSecCostTracker) for { select { case <-ctx.Done(): @@ -386,6 +394,13 @@ func (m *Manager) backgroundMetricsFlush(ctx context.Context) { readRequestCountMetrics = requestCount.WithLabelValues(name, name, readTypeLabel) writeRequestCountMetrics = requestCount.WithLabelValues(name, name, writeTypeLabel) ) + t, ok := maxPerSecTrackers[name] + if !ok { + t = newMaxPerSecCostTracker(name, defaultCollectIntervalSec) + maxPerSecTrackers[name] = t + } + t.CollectConsumption(consumption) + // RU info. if consumption.RRU > 0 { rruMetrics.Add(consumption.RRU) @@ -437,21 +452,101 @@ func (m *Manager) backgroundMetricsFlush(ctx context.Context) { requestCount.DeleteLabelValues(r.name, r.name, writeTypeLabel) availableRUCounter.DeleteLabelValues(r.name, r.name, r.ruType) delete(m.consumptionRecord, r) + delete(maxPerSecTrackers, r.name) + readRequestUnitMaxPerSecCost.DeleteLabelValues(r.name) + writeRequestUnitMaxPerSecCost.DeleteLabelValues(r.name) } } case <-availableRUTicker.C: m.RLock() + groups := make([]*ResourceGroup, 0, len(m.groups)) for name, group := range m.groups { if name == reservedDefaultGroupName { continue } + groups = append(groups, group) + } + m.RUnlock() + // prevent many groups and hold the lock long time. + for _, group := range groups { ru := group.getRUToken() if ru < 0 { ru = 0 } - availableRUCounter.WithLabelValues(name, name).Set(ru) + availableRUCounter.WithLabelValues(group.Name, group.Name).Set(ru) + } + + case <-recordMaxTicker.C: + // Record the sum of RRU and WRU every second. + m.RLock() + names := make([]string, 0, len(m.groups)) + for name := range m.groups { + names = append(names, name) } m.RUnlock() + for _, name := range names { + if t, ok := maxPerSecTrackers[name]; !ok { + maxPerSecTrackers[name] = newMaxPerSecCostTracker(name, defaultCollectIntervalSec) + } else { + t.FlushMetrics() + } + } } } } + +type maxPerSecCostTracker struct { + name string + maxPerSecRRU float64 + maxPerSecWRU float64 + rruSum float64 + wruSum float64 + lastRRUSum float64 + lastWRUSum float64 + flushPeriod int + cnt int + rruMaxMetrics prometheus.Gauge + wruMaxMetrics prometheus.Gauge +} + +func newMaxPerSecCostTracker(name string, flushPeriod int) *maxPerSecCostTracker { + return &maxPerSecCostTracker{ + name: name, + flushPeriod: flushPeriod, + rruMaxMetrics: readRequestUnitMaxPerSecCost.WithLabelValues(name), + wruMaxMetrics: writeRequestUnitMaxPerSecCost.WithLabelValues(name), + } +} + +// CollectConsumption collects the consumption info. +func (t *maxPerSecCostTracker) CollectConsumption(consume *rmpb.Consumption) { + t.rruSum += consume.RRU + t.wruSum += consume.WRU +} + +// FlushMetrics and set the maxPerSecRRU and maxPerSecWRU to the metrics. +func (t *maxPerSecCostTracker) FlushMetrics() { + if t.lastRRUSum == 0 && t.lastWRUSum == 0 { + t.lastRRUSum = t.rruSum + t.lastWRUSum = t.wruSum + return + } + deltaRRU := t.rruSum - t.lastRRUSum + deltaWRU := t.wruSum - t.lastWRUSum + t.lastRRUSum = t.rruSum + t.lastWRUSum = t.wruSum + if deltaRRU > t.maxPerSecRRU { + t.maxPerSecRRU = deltaRRU + } + if deltaWRU > t.maxPerSecWRU { + t.maxPerSecWRU = deltaWRU + } + t.cnt++ + // flush to metrics in every flushPeriod. + if t.cnt%t.flushPeriod == 0 { + t.rruMaxMetrics.Set(t.maxPerSecRRU) + t.wruMaxMetrics.Set(t.maxPerSecWRU) + t.maxPerSecRRU = 0 + t.maxPerSecWRU = 0 + } +} diff --git a/pkg/mcs/resourcemanager/server/metrics.go b/pkg/mcs/resourcemanager/server/metrics.go index 6bb90c45d12..45c94e5c735 100644 --- a/pkg/mcs/resourcemanager/server/metrics.go +++ b/pkg/mcs/resourcemanager/server/metrics.go @@ -48,6 +48,22 @@ var ( Name: "write_request_unit_sum", Help: "Counter of the write request unit cost for all resource groups.", }, []string{resourceGroupNameLabel, newResourceGroupNameLabel, typeLabel}) + + readRequestUnitMaxPerSecCost = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: ruSubsystem, + Name: "read_request_unit_max_per_sec", + Help: "Gauge of the max read request unit per second for all resource groups.", + }, []string{newResourceGroupNameLabel}) + writeRequestUnitMaxPerSecCost = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: ruSubsystem, + Name: "write_request_unit_max_per_sec", + Help: "Gauge of the max write request unit per second for all resource groups.", + }, []string{newResourceGroupNameLabel}) + sqlLayerRequestUnitCost = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, @@ -112,4 +128,6 @@ func init() { prometheus.MustRegister(sqlCPUCost) prometheus.MustRegister(requestCount) prometheus.MustRegister(availableRUCounter) + prometheus.MustRegister(readRequestUnitMaxPerSecCost) + prometheus.MustRegister(writeRequestUnitMaxPerSecCost) } diff --git a/pkg/mcs/resourcemanager/server/metrics_test.go b/pkg/mcs/resourcemanager/server/metrics_test.go new file mode 100644 index 00000000000..62d07286eaf --- /dev/null +++ b/pkg/mcs/resourcemanager/server/metrics_test.go @@ -0,0 +1,51 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package server + +import ( + "fmt" + "testing" + + rmpb "github.com/pingcap/kvproto/pkg/resource_manager" + "github.com/stretchr/testify/require" +) + +func TestMaxPerSecCostTracker(t *testing.T) { + tracker := newMaxPerSecCostTracker("test", defaultCollectIntervalSec) + re := require.New(t) + + // Define the expected max values for each flushPeriod + expectedMaxRU := []float64{19, 39, 59} + expectedSum := []float64{190, 780, 1770} + + for i := 0; i < 60; i++ { + // Record data + consumption := &rmpb.Consumption{ + RRU: float64(i), + WRU: float64(i), + } + tracker.CollectConsumption(consumption) + tracker.FlushMetrics() + + // Check the max values at the end of each flushPeriod + if (i+1)%20 == 0 { + period := i / 20 + re.Equal(tracker.maxPerSecRRU, expectedMaxRU[period], fmt.Sprintf("maxPerSecRRU in period %d is incorrect", period+1)) + re.Equal(tracker.maxPerSecWRU, expectedMaxRU[period], fmt.Sprintf("maxPerSecWRU in period %d is incorrect", period+1)) + re.Equal(tracker.rruSum, expectedSum[period]) + re.Equal(tracker.rruSum, expectedSum[period]) + } + } +} diff --git a/pkg/mcs/resourcemanager/server/resource_group_test.go b/pkg/mcs/resourcemanager/server/resource_group_test.go index da5f5c4f0e4..87ff6da2632 100644 --- a/pkg/mcs/resourcemanager/server/resource_group_test.go +++ b/pkg/mcs/resourcemanager/server/resource_group_test.go @@ -37,7 +37,7 @@ func TestPatchResourceGroup(t *testing.T) { } } -func resetSizeCache(obj interface{}) { +func resetSizeCache(obj any) { resetSizeCacheRecursive(reflect.ValueOf(obj)) } diff --git a/pkg/mcs/resourcemanager/server/server.go b/pkg/mcs/resourcemanager/server/server.go index 2d02fd00434..1fac592f791 100644 --- a/pkg/mcs/resourcemanager/server/server.go +++ b/pkg/mcs/resourcemanager/server/server.go @@ -217,6 +217,7 @@ func (s *Server) Close() { utils.StopHTTPServer(s) utils.StopGRPCServer(s) s.GetListener().Close() + s.CloseClientConns() s.serverLoopCancel() s.serverLoopWg.Wait() diff --git a/pkg/mcs/scheduling/server/apis/v1/api.go b/pkg/mcs/scheduling/server/apis/v1/api.go index 8b48fde611e..39aa11927ca 100644 --- a/pkg/mcs/scheduling/server/apis/v1/api.go +++ b/pkg/mcs/scheduling/server/apis/v1/api.go @@ -22,17 +22,17 @@ import ( "net/url" "strconv" "strings" - "sync" "github.com/gin-contrib/cors" "github.com/gin-contrib/gzip" "github.com/gin-contrib/pprof" "github.com/gin-gonic/gin" - "github.com/joho/godotenv" + "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" scheserver "github.com/tikv/pd/pkg/mcs/scheduling/server" mcsutils "github.com/tikv/pd/pkg/mcs/utils" + "github.com/tikv/pd/pkg/response" sche "github.com/tikv/pd/pkg/schedule/core" "github.com/tikv/pd/pkg/schedule/handler" "github.com/tikv/pd/pkg/schedule/operator" @@ -51,7 +51,6 @@ const APIPathPrefix = "/scheduling/api/v1" const handlerKey = "handler" var ( - once sync.Once apiServiceGroup = apiutil.APIServiceGroup{ Name: "scheduling", Version: "v1", @@ -92,11 +91,6 @@ func createIndentRender() *render.Render { // NewService returns a new Service. func NewService(srv *scheserver.Service) *Service { - once.Do(func() { - // These global modification will be effective only for the first invoke. - _ = godotenv.Load() - gin.SetMode(gin.ReleaseMode) - }) apiHandlerEngine := gin.New() apiHandlerEngine.Use(gin.Recovery()) apiHandlerEngine.Use(cors.Default()) @@ -124,6 +118,7 @@ func NewService(srv *scheserver.Service) *Service { s.RegisterCheckersRouter() s.RegisterHotspotRouter() s.RegisterRegionsRouter() + s.RegisterStoresRouter() return s } @@ -169,14 +164,25 @@ func (s *Service) RegisterOperatorsRouter() { router := s.root.Group("operators") router.GET("", getOperators) router.POST("", createOperator) + router.DELETE("", deleteOperators) router.GET("/:id", getOperatorByRegion) router.DELETE("/:id", deleteOperatorByRegion) router.GET("/records", getOperatorRecords) } +// RegisterStoresRouter registers the router of the stores handler. +func (s *Service) RegisterStoresRouter() { + router := s.root.Group("stores") + router.GET("", getAllStores) + router.GET("/:id", getStoreByID) +} + // RegisterRegionsRouter registers the router of the regions handler. func (s *Service) RegisterRegionsRouter() { router := s.root.Group("regions") + router.GET("", getAllRegions) + router.GET("/:id", getRegionByID) + router.GET("/count", getRegionCount) router.POST("/accelerate-schedule", accelerateRegionsScheduleInRange) router.POST("/accelerate-schedule/batch", accelerateRegionsScheduleInRanges) router.POST("/scatter", scatterRegions) @@ -266,7 +272,7 @@ func deleteAllRegionCache(c *gin.Context) { c.String(http.StatusInternalServerError, errs.ErrNotBootstrapped.GenWithStackByArgs().Error()) return } - cluster.DropCacheAllRegion() + cluster.ResetRegionCache() c.String(http.StatusOK, "All regions are removed from server cache.") } @@ -291,7 +297,7 @@ func deleteRegionCacheByID(c *gin.Context) { c.String(http.StatusBadRequest, err.Error()) return } - cluster.DropCacheRegion(regionID) + cluster.RemoveRegionIfExist(regionID) c.String(http.StatusOK, "The region is removed from server cache.") } @@ -302,7 +308,7 @@ func deleteRegionCacheByID(c *gin.Context) { // @Success 200 {object} operator.OpWithStatus // @Failure 400 {string} string "The input is invalid." // @Failure 500 {string} string "PD server failed to proceed the request." -// @Router /operators/{id} [GET] +// @Router /operators/{id} [get] func getOperatorByRegion(c *gin.Context) { handler := c.MustGet(handlerKey).(*handler.Handler) id := c.Param("id") @@ -329,7 +335,7 @@ func getOperatorByRegion(c *gin.Context) { // @Produce json // @Success 200 {array} operator.Operator // @Failure 500 {string} string "PD server failed to proceed the request." -// @Router /operators [GET] +// @Router /operators [get] func getOperators(c *gin.Context) { handler := c.MustGet(handlerKey).(*handler.Handler) var ( @@ -360,6 +366,22 @@ func getOperators(c *gin.Context) { } } +// @Tags operators +// @Summary Delete operators. +// @Produce json +// @Success 200 {string} string "All pending operator are canceled." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /operators [delete] +func deleteOperators(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + if err := handler.RemoveOperators(); err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + + c.String(http.StatusOK, "All pending operator are canceled.") +} + // @Tags operator // @Summary Cancel a Region's pending operator. // @Param region_id path int true "A Region's Id" @@ -421,7 +443,7 @@ func getOperatorRecords(c *gin.Context) { // @Router /operators [post] func createOperator(c *gin.Context) { handler := c.MustGet(handlerKey).(*handler.Handler) - var input map[string]interface{} + var input map[string]any if err := c.BindJSON(&input); err != nil { c.String(http.StatusBadRequest, err.Error()) return @@ -517,7 +539,7 @@ func getSchedulers(c *gin.Context) { // @Tags schedulers // @Summary List all scheduler configs. // @Produce json -// @Success 200 {object} map[string]interface{} +// @Success 200 {object} map[string]any // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /schedulers/config/ [get] func getSchedulerConfig(c *gin.Context) { @@ -538,7 +560,7 @@ func getSchedulerConfig(c *gin.Context) { // @Tags schedulers // @Summary List scheduler config by name. // @Produce json -// @Success 200 {object} map[string]interface{} +// @Success 200 {object} map[string]any // @Failure 404 {string} string scheduler not found // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /schedulers/config/{name}/list [get] @@ -1158,7 +1180,7 @@ func getRegionLabelRuleByID(c *gin.Context) { func accelerateRegionsScheduleInRange(c *gin.Context) { handler := c.MustGet(handlerKey).(*handler.Handler) - var input map[string]interface{} + var input map[string]any if err := c.BindJSON(&input); err != nil { c.String(http.StatusBadRequest, err.Error()) return @@ -1198,7 +1220,7 @@ func accelerateRegionsScheduleInRange(c *gin.Context) { func accelerateRegionsScheduleInRanges(c *gin.Context) { handler := c.MustGet(handlerKey).(*handler.Handler) - var input []map[string]interface{} + var input []map[string]any if err := c.BindJSON(&input); err != nil { c.String(http.StatusBadRequest, err.Error()) return @@ -1249,7 +1271,7 @@ func accelerateRegionsScheduleInRanges(c *gin.Context) { func scatterRegions(c *gin.Context) { handler := c.MustGet(handlerKey).(*handler.Handler) - var input map[string]interface{} + var input map[string]any if err := c.BindJSON(&input); err != nil { c.String(http.StatusBadRequest, err.Error()) return @@ -1270,7 +1292,7 @@ func scatterRegions(c *gin.Context) { if !ok { return 0, nil, errors.New("regions_id is invalid") } - return handler.ScatterRegionsByID(ids, group, retryLimit, false) + return handler.ScatterRegionsByID(ids, group, retryLimit) }() if err != nil { c.String(http.StatusInternalServerError, err.Error()) @@ -1292,7 +1314,7 @@ func scatterRegions(c *gin.Context) { func splitRegions(c *gin.Context) { handler := c.MustGet(handlerKey).(*handler.Handler) - var input map[string]interface{} + var input map[string]any if err := c.BindJSON(&input); err != nil { c.String(http.StatusBadRequest, err.Error()) return @@ -1302,7 +1324,7 @@ func splitRegions(c *gin.Context) { c.String(http.StatusBadRequest, "split_keys should be provided.") return } - rawSplitKeys := s.([]interface{}) + rawSplitKeys := s.([]any) if len(rawSplitKeys) < 1 { c.String(http.StatusBadRequest, "empty split keys.") return @@ -1343,3 +1365,115 @@ func checkRegionsReplicated(c *gin.Context) { } c.IndentedJSON(http.StatusOK, state) } + +// @Tags store +// @Summary Get a store's information. +// @Param id path integer true "Store Id" +// @Produce json +// @Success 200 {object} response.StoreInfo +// @Failure 400 {string} string "The input is invalid." +// @Failure 404 {string} string "The store does not exist." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /stores/{id} [get] +func getStoreByID(c *gin.Context) { + svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) + idStr := c.Param("id") + storeID, err := strconv.ParseUint(idStr, 10, 64) + if err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + store := svr.GetBasicCluster().GetStore(storeID) + if store == nil { + c.String(http.StatusNotFound, errs.ErrStoreNotFound.FastGenByArgs(storeID).Error()) + return + } + + storeInfo := response.BuildStoreInfo(&svr.GetConfig().Schedule, store) + c.IndentedJSON(http.StatusOK, storeInfo) +} + +// @Tags store +// @Summary Get all stores in the cluster. +// @Produce json +// @Success 200 {object} response.StoresInfo +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /stores [get] +func getAllStores(c *gin.Context) { + svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) + stores := svr.GetBasicCluster().GetMetaStores() + StoresInfo := &response.StoresInfo{ + Stores: make([]*response.StoreInfo, 0, len(stores)), + } + + for _, s := range stores { + storeID := s.GetId() + store := svr.GetBasicCluster().GetStore(storeID) + if store == nil { + c.String(http.StatusInternalServerError, errs.ErrStoreNotFound.FastGenByArgs(storeID).Error()) + return + } + if store.GetMeta().State == metapb.StoreState_Tombstone { + continue + } + storeInfo := response.BuildStoreInfo(&svr.GetConfig().Schedule, store) + StoresInfo.Stores = append(StoresInfo.Stores, storeInfo) + } + StoresInfo.Count = len(StoresInfo.Stores) + c.IndentedJSON(http.StatusOK, StoresInfo) +} + +// @Tags region +// @Summary List all regions in the cluster. +// @Produce json +// @Success 200 {object} response.RegionsInfo +// @Router /regions [get] +func getAllRegions(c *gin.Context) { + svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) + regions := svr.GetBasicCluster().GetRegions() + b, err := response.MarshalRegionsInfoJSON(c.Request.Context(), regions) + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + c.Data(http.StatusOK, "application/json", b) +} + +// @Tags region +// @Summary Get count of regions. +// @Produce json +// @Success 200 {object} response.RegionsInfo +// @Router /regions/count [get] +func getRegionCount(c *gin.Context) { + svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) + count := svr.GetBasicCluster().GetTotalRegionCount() + c.IndentedJSON(http.StatusOK, &response.RegionsInfo{Count: count}) +} + +// @Tags region +// @Summary Search for a region by region ID. +// @Param id path integer true "Region Id" +// @Produce json +// @Success 200 {object} response.RegionInfo +// @Failure 400 {string} string "The input is invalid." +// @Router /regions/{id} [get] +func getRegionByID(c *gin.Context) { + svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) + idStr := c.Param("id") + regionID, err := strconv.ParseUint(idStr, 10, 64) + if err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + regionInfo := svr.GetBasicCluster().GetRegion(regionID) + if regionInfo == nil { + c.String(http.StatusNotFound, errs.ErrRegionNotFound.FastGenByArgs(regionID).Error()) + return + } + b, err := response.MarshalRegionInfoJSON(c.Request.Context(), regionInfo) + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + c.Data(http.StatusOK, "application/json", b) +} diff --git a/pkg/mcs/scheduling/server/cluster.go b/pkg/mcs/scheduling/server/cluster.go index bbecad51f7c..4062ed38fd6 100644 --- a/pkg/mcs/scheduling/server/cluster.go +++ b/pkg/mcs/scheduling/server/cluster.go @@ -2,11 +2,14 @@ package server import ( "context" + "runtime" "sync" "sync/atomic" "time" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" + "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/kvproto/pkg/schedulingpb" "github.com/pingcap/log" @@ -14,6 +17,7 @@ import ( "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/mcs/scheduling/server/config" + "github.com/tikv/pd/pkg/ratelimit" "github.com/tikv/pd/pkg/schedule" sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/schedule/hbstream" @@ -50,14 +54,28 @@ type Cluster struct { apiServerLeader atomic.Value clusterID uint64 running atomic.Bool + + // heartbeatRunner is used to process the subtree update task asynchronously. + heartbeatRunner ratelimit.Runner + // miscRunner is used to process the statistics and persistent tasks asynchronously. + miscRunner ratelimit.Runner + // logRunner is used to process the log asynchronously. + logRunner ratelimit.Runner } const ( regionLabelGCInterval = time.Hour requestTimeout = 3 * time.Second collectWaitTime = time.Minute + + // heartbeat relative const + heartbeatTaskRunner = "heartbeat-task-runner" + miscTaskRunner = "misc-task-runner" + logTaskRunner = "log-task-runner" ) +var syncRunner = ratelimit.NewSyncRunner() + // NewCluster creates a new cluster. func NewCluster(parentCtx context.Context, persistConfig *config.PersistConfig, storage storage.Storage, basicCluster *core.BasicCluster, hbStreams *hbstream.HeartbeatStreams, clusterID uint64, checkMembershipCh chan struct{}) (*Cluster, error) { ctx, cancel := context.WithCancel(parentCtx) @@ -80,6 +98,10 @@ func NewCluster(parentCtx context.Context, persistConfig *config.PersistConfig, storage: storage, clusterID: clusterID, checkMembershipCh: checkMembershipCh, + + heartbeatRunner: ratelimit.NewConcurrentRunner(heartbeatTaskRunner, ratelimit.NewConcurrencyLimiter(uint64(runtime.NumCPU()*2)), time.Minute), + miscRunner: ratelimit.NewConcurrentRunner(miscTaskRunner, ratelimit.NewConcurrencyLimiter(uint64(runtime.NumCPU()*2)), time.Minute), + logRunner: ratelimit.NewConcurrentRunner(logTaskRunner, ratelimit.NewConcurrencyLimiter(uint64(runtime.NumCPU()*2)), time.Minute), } c.coordinator = schedule.NewCoordinator(ctx, c, hbStreams) err = c.ruleManager.Initialize(persistConfig.GetMaxReplicas(), persistConfig.GetLocationLabels(), persistConfig.GetIsolationLevel()) @@ -421,12 +443,23 @@ func (c *Cluster) HandleStoreHeartbeat(heartbeat *schedulingpb.StoreHeartbeatReq utils.RegionWriteKeys: 0, utils.RegionWriteQueryNum: 0, } - peerInfo := core.NewPeerInfo(peer, loads, interval) - c.hotStat.CheckReadAsync(statistics.NewCheckPeerTask(peerInfo, region)) + checkReadPeerTask := func(cache *statistics.HotPeerCache) { + stats := cache.CheckPeerFlow(region, []*metapb.Peer{peer}, loads, interval) + for _, stat := range stats { + cache.UpdateStat(stat) + } + } + c.hotStat.CheckReadAsync(checkReadPeerTask) } // Here we will compare the reported regions with the previous hot peers to decide if it is still hot. - c.hotStat.CheckReadAsync(statistics.NewCollectUnReportedPeerTask(storeID, regions, interval)) + collectUnReportedPeerTask := func(cache *statistics.HotPeerCache) { + stats := cache.CheckColdPeer(storeID, regions, interval) + for _, stat := range stats { + cache.UpdateStat(stat) + } + } + c.hotStat.CheckReadAsync(collectUnReportedPeerTask) return nil } @@ -454,7 +487,11 @@ func (c *Cluster) runCoordinator() { defer logutil.LogPanic() defer c.wg.Done() // force wait for 1 minute to make prepare checker won't be directly skipped - c.coordinator.RunUntilStop(collectWaitTime) + runCollectWaitTime := collectWaitTime + failpoint.Inject("changeRunCollectWaitTime", func() { + runCollectWaitTime = 1 * time.Second + }) + c.coordinator.RunUntilStop(runCollectWaitTime) } func (c *Cluster) runMetricsCollectionJob() { @@ -468,7 +505,7 @@ func (c *Cluster) runMetricsCollectionJob() { select { case <-c.ctx.Done(): log.Info("metrics are reset") - c.resetMetrics() + resetMetrics() log.Info("metrics collection job has been stopped") return case <-ticker.C: @@ -482,7 +519,7 @@ func (c *Cluster) collectMetrics() { stores := c.GetStores() for _, s := range stores { statsMap.Observe(s) - statsMap.ObserveHotStat(s, c.hotStat.StoresStats) + statistics.ObserveHotStat(s, c.hotStat.StoresStats) } statsMap.Collect() @@ -495,9 +532,11 @@ func (c *Cluster) collectMetrics() { c.labelStats.Collect() // collect hot cache metrics c.hotStat.CollectMetrics() + // collect the lock metrics + c.CollectWaitLockMetrics() } -func (c *Cluster) resetMetrics() { +func resetMetrics() { statistics.Reset() schedulers.ResetSchedulerMetrics() schedule.ResetHotSpotMetrics() @@ -510,6 +549,9 @@ func (c *Cluster) StartBackgroundJobs() { go c.runUpdateStoreStats() go c.runCoordinator() go c.runMetricsCollectionJob() + c.heartbeatRunner.Start() + c.miscRunner.Start() + c.logRunner.Start() c.running.Store(true) } @@ -520,6 +562,9 @@ func (c *Cluster) StopBackgroundJobs() { } c.running.Store(false) c.coordinator.Stop() + c.heartbeatRunner.Stop() + c.miscRunner.Stop() + c.logRunner.Stop() c.cancel() c.wg.Wait() } @@ -531,52 +576,117 @@ func (c *Cluster) IsBackgroundJobsRunning() bool { // HandleRegionHeartbeat processes RegionInfo reports from client. func (c *Cluster) HandleRegionHeartbeat(region *core.RegionInfo) error { - if err := c.processRegionHeartbeat(region); err != nil { + tracer := core.NewNoopHeartbeatProcessTracer() + if c.persistConfig.GetScheduleConfig().EnableHeartbeatBreakdownMetrics { + tracer = core.NewHeartbeatProcessTracer() + } + var taskRunner, miscRunner, logRunner ratelimit.Runner + taskRunner, miscRunner, logRunner = syncRunner, syncRunner, syncRunner + if c.persistConfig.GetScheduleConfig().EnableHeartbeatConcurrentRunner { + taskRunner = c.heartbeatRunner + miscRunner = c.miscRunner + logRunner = c.logRunner + } + ctx := &core.MetaProcessContext{ + Context: c.ctx, + Tracer: tracer, + TaskRunner: taskRunner, + MiscRunner: miscRunner, + LogRunner: logRunner, + } + tracer.Begin() + if err := c.processRegionHeartbeat(ctx, region); err != nil { + tracer.OnAllStageFinished() return err } - + tracer.OnAllStageFinished() c.coordinator.GetOperatorController().Dispatch(region, operator.DispatchFromHeartBeat, c.coordinator.RecordOpStepWithTTL) return nil } // processRegionHeartbeat updates the region information. -func (c *Cluster) processRegionHeartbeat(region *core.RegionInfo) error { +func (c *Cluster) processRegionHeartbeat(ctx *core.MetaProcessContext, region *core.RegionInfo) error { + tracer := ctx.Tracer origin, _, err := c.PreCheckPutRegion(region) + tracer.OnPreCheckFinished() if err != nil { return err } region.Inherit(origin, c.GetStoreConfig().IsEnableRegionBucket()) - cluster.HandleStatsAsync(c, region) - + tracer.OnAsyncHotStatsFinished() hasRegionStats := c.regionStats != nil // Save to storage if meta is updated, except for flashback. // Save to cache if meta or leader is updated, or contains any down/pending peer. - // Mark isNew if the region in cache does not have leader. - isNew, _, saveCache, _ := core.GenerateRegionGuideFunc(true)(region, origin) - if !saveCache && !isNew { + _, saveCache, _, retained := core.GenerateRegionGuideFunc(true)(ctx, region, origin) + regionID := region.GetID() + if !saveCache { // Due to some config changes need to update the region stats as well, // so we do some extra checks here. if hasRegionStats && c.regionStats.RegionStatsNeedUpdate(region) { - c.regionStats.Observe(region, c.GetRegionStores(region)) + _ = ctx.TaskRunner.RunTask( + regionID, + ratelimit.ObserveRegionStatsAsync, + func() { + if c.regionStats.RegionStatsNeedUpdate(region) { + cluster.Collect(c, region, hasRegionStats) + } + }, + ) + } + // region is not updated to the subtree. + if origin.GetRef() < 2 { + _ = ctx.TaskRunner.RunTask( + regionID, + ratelimit.UpdateSubTree, + func() { + c.CheckAndPutSubTree(region) + }, + ratelimit.WithRetained(true), + ) } return nil } - + tracer.OnSaveCacheBegin() var overlaps []*core.RegionInfo if saveCache { // To prevent a concurrent heartbeat of another region from overriding the up-to-date region info by a stale one, // check its validation again here. // // However, it can't solve the race condition of concurrent heartbeats from the same region. - if overlaps, err = c.AtomicCheckAndPutRegion(region); err != nil { + + // Async task in next PR. + if overlaps, err = c.CheckAndPutRootTree(ctx, region); err != nil { + tracer.OnSaveCacheFinished() return err } - - cluster.HandleOverlaps(c, overlaps) + _ = ctx.TaskRunner.RunTask( + regionID, + ratelimit.UpdateSubTree, + func() { + c.CheckAndPutSubTree(region) + }, + ratelimit.WithRetained(retained), + ) + tracer.OnUpdateSubTreeFinished() + _ = ctx.TaskRunner.RunTask( + regionID, + ratelimit.HandleOverlaps, + func() { + cluster.HandleOverlaps(c, overlaps) + }, + ) } - - cluster.Collect(c, region, c.GetRegionStores(region), hasRegionStats, isNew, c.IsPrepared()) + tracer.OnSaveCacheFinished() + // handle region stats + _ = ctx.TaskRunner.RunTask( + regionID, + ratelimit.CollectRegionStatsAsync, + func() { + cluster.Collect(c, region, hasRegionStats) + }, + ) + tracer.OnCollectRegionStatsFinished() return nil } @@ -590,12 +700,9 @@ func (c *Cluster) SetPrepared() { c.coordinator.GetPrepareChecker().SetPrepared() } -// DropCacheAllRegion removes all cached regions. -func (c *Cluster) DropCacheAllRegion() { - c.ResetRegionCache() -} - -// DropCacheRegion removes a region from the cache. -func (c *Cluster) DropCacheRegion(id uint64) { - c.RemoveRegionIfExist(id) +// IsSchedulingHalted returns whether the scheduling is halted. +// Currently, the microservice scheduling is halted when: +// - The `HaltScheduling` persist option is set to true. +func (c *Cluster) IsSchedulingHalted() bool { + return c.persistConfig.IsSchedulingHalted() } diff --git a/pkg/mcs/scheduling/server/config/OWNERS b/pkg/mcs/scheduling/server/config/OWNERS new file mode 100644 index 00000000000..aa02465dbd9 --- /dev/null +++ b/pkg/mcs/scheduling/server/config/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs at https://go.k8s.io/owners +options: + no_parent_owners: true +filters: + "(OWNERS|config\\.go)$": + approvers: + - sig-critical-approvers-config diff --git a/pkg/mcs/scheduling/server/config/config.go b/pkg/mcs/scheduling/server/config/config.go index 3e347afc12e..9dc6590a0b4 100644 --- a/pkg/mcs/scheduling/server/config/config.go +++ b/pkg/mcs/scheduling/server/config/config.go @@ -148,10 +148,6 @@ func (c *Config) adjust(meta *toml.MetaData) error { c.adjustLog(configMetaData.Child("log")) c.Security.Encryption.Adjust() - if len(c.Log.Format) == 0 { - c.Log.Format = utils.DefaultLogFormat - } - configutil.AdjustInt64(&c.LeaderLease, utils.DefaultLeaderLease) if err := c.Schedule.Adjust(configMetaData.Child("schedule"), false); err != nil { @@ -164,6 +160,8 @@ func (c *Config) adjustLog(meta *configutil.ConfigMetaData) { if !meta.IsDefined("disable-error-verbose") { c.Log.DisableErrorVerbose = utils.DefaultDisableErrorVerbose } + configutil.AdjustString(&c.Log.Format, utils.DefaultLogFormat) + configutil.AdjustString(&c.Log.Level, utils.DefaultLogLevel) } // GetName returns the Name @@ -294,7 +292,7 @@ func (o *PersistConfig) SetScheduleConfig(cfg *sc.ScheduleConfig) { } // AdjustScheduleCfg adjusts the schedule config during the initialization. -func (o *PersistConfig) AdjustScheduleCfg(scheduleCfg *sc.ScheduleConfig) { +func AdjustScheduleCfg(scheduleCfg *sc.ScheduleConfig) { // In case we add new default schedulers. for _, ps := range sc.DefaultSchedulers { if slice.NoneOf(scheduleCfg.Schedulers, func(i int) bool { @@ -374,7 +372,7 @@ func (o *PersistConfig) IsUseJointConsensus() bool { } // GetKeyType returns the key type. -func (o *PersistConfig) GetKeyType() constant.KeyType { +func (*PersistConfig) GetKeyType() constant.KeyType { return constant.StringToKeyType("table") } @@ -684,8 +682,12 @@ func (o *PersistConfig) SetSplitMergeInterval(splitMergeInterval time.Duration) o.SetScheduleConfig(v) } +// SetSchedulingAllowanceStatus sets the scheduling allowance status to help distinguish the source of the halt. +// TODO: support this metrics for the scheduling service in the future. +func (*PersistConfig) SetSchedulingAllowanceStatus(bool, string) {} + // SetHaltScheduling set HaltScheduling. -func (o *PersistConfig) SetHaltScheduling(halt bool, source string) { +func (o *PersistConfig) SetHaltScheduling(halt bool, _ string) { v := o.GetScheduleConfig().Clone() v.HaltScheduling = halt o.SetScheduleConfig(v) @@ -735,25 +737,25 @@ func (o *PersistConfig) IsRaftKV2() bool { // AddSchedulerCfg adds the scheduler configurations. // This method is a no-op since we only use configurations derived from one-way synchronization from API server now. -func (o *PersistConfig) AddSchedulerCfg(string, []string) {} +func (*PersistConfig) AddSchedulerCfg(string, []string) {} // RemoveSchedulerCfg removes the scheduler configurations. // This method is a no-op since we only use configurations derived from one-way synchronization from API server now. -func (o *PersistConfig) RemoveSchedulerCfg(tp string) {} +func (*PersistConfig) RemoveSchedulerCfg(string) {} // CheckLabelProperty checks if the label property is satisfied. -func (o *PersistConfig) CheckLabelProperty(typ string, labels []*metapb.StoreLabel) bool { +func (*PersistConfig) CheckLabelProperty(string, []*metapb.StoreLabel) bool { return false } // IsTraceRegionFlow returns if the region flow is tracing. // If the accuracy cannot reach 0.1 MB, it is considered not. -func (o *PersistConfig) IsTraceRegionFlow() bool { +func (*PersistConfig) IsTraceRegionFlow() bool { return false } // Persist saves the configuration to the storage. -func (o *PersistConfig) Persist(storage endpoint.ConfigStorage) error { +func (*PersistConfig) Persist(endpoint.ConfigStorage) error { return nil } diff --git a/pkg/mcs/scheduling/server/config/watcher.go b/pkg/mcs/scheduling/server/config/watcher.go index 8db5e656279..d1ca99bd36d 100644 --- a/pkg/mcs/scheduling/server/config/watcher.go +++ b/pkg/mcs/scheduling/server/config/watcher.go @@ -129,14 +129,14 @@ func (cw *Watcher) initializeConfigWatcher() error { return err } log.Info("update scheduling config", zap.Reflect("new", cfg)) - cw.AdjustScheduleCfg(&cfg.Schedule) + AdjustScheduleCfg(&cfg.Schedule) cw.SetClusterVersion(&cfg.ClusterVersion) cw.SetScheduleConfig(&cfg.Schedule) cw.SetReplicationConfig(&cfg.Replication) cw.SetStoreConfig(&cfg.Store) return nil } - deleteFn := func(kv *mvccpb.KeyValue) error { + deleteFn := func(*mvccpb.KeyValue) error { return nil } cw.configWatcher = etcdutil.NewLoopWatcher( diff --git a/pkg/mcs/scheduling/server/grpc_service.go b/pkg/mcs/scheduling/server/grpc_service.go index b865e917d75..842e876885c 100644 --- a/pkg/mcs/scheduling/server/grpc_service.go +++ b/pkg/mcs/scheduling/server/grpc_service.go @@ -45,20 +45,20 @@ var ( ) // SetUpRestHandler is a hook to sets up the REST service. -var SetUpRestHandler = func(srv *Service) (http.Handler, apiutil.APIServiceGroup) { +var SetUpRestHandler = func(*Service) (http.Handler, apiutil.APIServiceGroup) { return dummyRestService{}, apiutil.APIServiceGroup{} } type dummyRestService struct{} -func (d dummyRestService) ServeHTTP(w http.ResponseWriter, r *http.Request) { +func (dummyRestService) ServeHTTP(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusNotImplemented) w.Write([]byte("not implemented")) } // ConfigProvider is used to get scheduling config from the given // `bs.server` without modifying its interface. -type ConfigProvider interface{} +type ConfigProvider any // Service is the scheduling grpc service. type Service struct { @@ -158,7 +158,8 @@ func (s *Service) RegionHeartbeat(stream schedulingpb.Scheduling_RegionHeartbeat s.hbStreams.BindStream(storeID, server) lastBind = time.Now() } - region := core.RegionFromHeartbeat(request, core.SetSource(core.Heartbeat)) + // scheduling service doesn't sync the pd server config, so we use 0 here + region := core.RegionFromHeartbeat(request, 0) err = c.HandleRegionHeartbeat(region) if err != nil { // TODO: if we need to send the error back to API server. @@ -169,7 +170,7 @@ func (s *Service) RegionHeartbeat(stream schedulingpb.Scheduling_RegionHeartbeat } // StoreHeartbeat implements gRPC SchedulingServer. -func (s *Service) StoreHeartbeat(ctx context.Context, request *schedulingpb.StoreHeartbeatRequest) (*schedulingpb.StoreHeartbeatResponse, error) { +func (s *Service) StoreHeartbeat(_ context.Context, request *schedulingpb.StoreHeartbeatRequest) (*schedulingpb.StoreHeartbeatResponse, error) { c := s.GetCluster() if c == nil { // TODO: add metrics @@ -203,7 +204,7 @@ func (s *Service) SplitRegions(ctx context.Context, request *schedulingpb.SplitR } // ScatterRegions implements gRPC SchedulingServer. -func (s *Service) ScatterRegions(ctx context.Context, request *schedulingpb.ScatterRegionsRequest) (*schedulingpb.ScatterRegionsResponse, error) { +func (s *Service) ScatterRegions(_ context.Context, request *schedulingpb.ScatterRegionsRequest) (*schedulingpb.ScatterRegionsResponse, error) { c := s.GetCluster() if c == nil { return &schedulingpb.ScatterRegionsResponse{Header: s.notBootstrappedHeader()}, nil @@ -235,7 +236,7 @@ func (s *Service) ScatterRegions(ctx context.Context, request *schedulingpb.Scat } // GetOperator gets information about the operator belonging to the specify region. -func (s *Service) GetOperator(ctx context.Context, request *schedulingpb.GetOperatorRequest) (*schedulingpb.GetOperatorResponse, error) { +func (s *Service) GetOperator(_ context.Context, request *schedulingpb.GetOperatorRequest) (*schedulingpb.GetOperatorResponse, error) { c := s.GetCluster() if c == nil { return &schedulingpb.GetOperatorResponse{Header: s.notBootstrappedHeader()}, nil @@ -262,7 +263,7 @@ func (s *Service) GetOperator(ctx context.Context, request *schedulingpb.GetOper } // AskBatchSplit implements gRPC SchedulingServer. -func (s *Service) AskBatchSplit(ctx context.Context, request *schedulingpb.AskBatchSplitRequest) (*schedulingpb.AskBatchSplitResponse, error) { +func (s *Service) AskBatchSplit(_ context.Context, request *schedulingpb.AskBatchSplitRequest) (*schedulingpb.AskBatchSplitResponse, error) { c := s.GetCluster() if c == nil { return &schedulingpb.AskBatchSplitResponse{Header: s.notBootstrappedHeader()}, nil @@ -275,7 +276,7 @@ func (s *Service) AskBatchSplit(ctx context.Context, request *schedulingpb.AskBa }, nil } - if c.persistConfig.IsSchedulingHalted() { + if c.IsSchedulingHalted() { return nil, errs.ErrSchedulingIsHalted.FastGenByArgs() } if !c.persistConfig.IsTikvRegionSplitEnabled() { diff --git a/pkg/mcs/scheduling/server/meta/watcher.go b/pkg/mcs/scheduling/server/meta/watcher.go index 925b28763b5..2daa6766d75 100644 --- a/pkg/mcs/scheduling/server/meta/watcher.go +++ b/pkg/mcs/scheduling/server/meta/watcher.go @@ -78,6 +78,7 @@ func (w *Watcher) initializeStoreWatcher() error { zap.String("event-kv-key", string(kv.Key)), zap.Error(err)) return err } + log.Debug("update store meta", zap.Stringer("store", store)) origin := w.basicCluster.GetStore(store.GetId()) if origin == nil { w.basicCluster.PutStore(core.NewStoreInfo(store)) @@ -101,6 +102,7 @@ func (w *Watcher) initializeStoreWatcher() error { origin := w.basicCluster.GetStore(storeID) if origin != nil { w.basicCluster.DeleteStore(origin) + log.Info("delete store meta", zap.Uint64("store-id", storeID)) } return nil } diff --git a/pkg/mcs/scheduling/server/rule/watcher.go b/pkg/mcs/scheduling/server/rule/watcher.go index d8a8dd3e609..ea90b9d4e49 100644 --- a/pkg/mcs/scheduling/server/rule/watcher.go +++ b/pkg/mcs/scheduling/server/rule/watcher.go @@ -109,7 +109,7 @@ func NewWatcher( func (rw *Watcher) initializeRuleWatcher() error { var suspectKeyRanges *core.KeyRanges - preEventsFn := func(events []*clientv3.Event) error { + preEventsFn := func([]*clientv3.Event) error { // It will be locked until the postEventsFn is finished. rw.ruleManager.Lock() rw.patch = rw.ruleManager.BeginPatch() @@ -149,10 +149,9 @@ func (rw *Watcher) initializeRuleWatcher() error { suspectKeyRanges.Append(rule.StartKey, rule.EndKey) } return nil - } else { - log.Warn("unknown key when updating placement rule", zap.String("key", key)) - return nil } + log.Warn("unknown key when updating placement rule", zap.String("key", key)) + return nil } deleteFn := func(kv *mvccpb.KeyValue) error { key := string(kv.Key) @@ -181,12 +180,11 @@ func (rw *Watcher) initializeRuleWatcher() error { suspectKeyRanges.Append(rule.StartKey, rule.EndKey) } return nil - } else { - log.Warn("unknown key when deleting placement rule", zap.String("key", key)) - return nil } + log.Warn("unknown key when deleting placement rule", zap.String("key", key)) + return nil } - postEventsFn := func(events []*clientv3.Event) error { + postEventsFn := func([]*clientv3.Event) error { defer rw.ruleManager.Unlock() if err := rw.ruleManager.TryCommitPatchLocked(rw.patch); err != nil { log.Error("failed to commit patch", zap.Error(err)) @@ -213,7 +211,7 @@ func (rw *Watcher) initializeRuleWatcher() error { func (rw *Watcher) initializeRegionLabelWatcher() error { prefixToTrim := rw.regionLabelPathPrefix + "/" // TODO: use txn in region labeler. - preEventsFn := func(events []*clientv3.Event) error { + preEventsFn := func([]*clientv3.Event) error { // It will be locked until the postEventsFn is finished. rw.regionLabeler.Lock() return nil @@ -231,7 +229,7 @@ func (rw *Watcher) initializeRegionLabelWatcher() error { log.Info("delete region label rule", zap.String("key", key)) return rw.regionLabeler.DeleteLabelRuleLocked(strings.TrimPrefix(key, prefixToTrim)) } - postEventsFn := func(events []*clientv3.Event) error { + postEventsFn := func([]*clientv3.Event) error { defer rw.regionLabeler.Unlock() rw.regionLabeler.BuildRangeListLocked() return nil diff --git a/pkg/mcs/scheduling/server/rule/watcher_test.go b/pkg/mcs/scheduling/server/rule/watcher_test.go index dafc7dcac2f..37fce0a0ded 100644 --- a/pkg/mcs/scheduling/server/rule/watcher_test.go +++ b/pkg/mcs/scheduling/server/rule/watcher_test.go @@ -86,7 +86,7 @@ func prepare(t require.TestingT) (context.Context, *clientv3.Client, func()) { os.RemoveAll(cfg.Dir) etcd, err := embed.StartEtcd(cfg) re.NoError(err) - client, err := etcdutil.CreateEtcdClient(nil, cfg.LCUrls) + client, err := etcdutil.CreateEtcdClient(nil, cfg.ListenClientUrls) re.NoError(err) <-etcd.Server.ReadyNotify() diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 8013f1d0e7b..47a7cf9962b 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -20,6 +20,7 @@ import ( "net/http" "os" "os/signal" + "path/filepath" "runtime" "strconv" "sync" @@ -319,6 +320,7 @@ func (s *Server) Close() { utils.StopHTTPServer(s) utils.StopGRPCServer(s) s.GetListener().Close() + s.CloseClientConns() s.serverLoopCancel() s.serverLoopWg.Wait() @@ -412,7 +414,18 @@ func (s *Server) startServer() (err error) { // different service modes provided by the same pd-server binary bs.ServerInfoGauge.WithLabelValues(versioninfo.PDReleaseVersion, versioninfo.PDGitHash).Set(float64(time.Now().Unix())) bs.ServerMaxProcsGauge.Set(float64(runtime.GOMAXPROCS(0))) - s.serviceID = &discovery.ServiceRegistryEntry{ServiceAddr: s.cfg.AdvertiseListenAddr} + execPath, err := os.Executable() + deployPath := filepath.Dir(execPath) + if err != nil { + deployPath = "" + } + s.serviceID = &discovery.ServiceRegistryEntry{ + ServiceAddr: s.cfg.AdvertiseListenAddr, + Version: versioninfo.PDReleaseVersion, + GitHash: versioninfo.PDGitHash, + DeployPath: deployPath, + StartTimestamp: s.StartTimestamp(), + } uniqueName := s.cfg.GetAdvertiseListenAddr() uniqueID := memberutil.GenerateUniqueID(uniqueName) log.Info("joining primary election", zap.String("participant-name", uniqueName), zap.Uint64("participant-id", uniqueID)) diff --git a/pkg/mcs/server/server.go b/pkg/mcs/server/server.go index a8dedd8ad91..6aec799278c 100644 --- a/pkg/mcs/server/server.go +++ b/pkg/mcs/server/server.go @@ -23,6 +23,7 @@ import ( "sync" "time" + "github.com/pingcap/log" "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/utils/grpcutil" "go.etcd.io/etcd/clientv3" @@ -167,3 +168,14 @@ func (bs *BaseServer) IsSecure() bool { func (bs *BaseServer) StartTimestamp() int64 { return bs.startTimestamp } + +// CloseClientConns closes all client connections. +func (bs *BaseServer) CloseClientConns() { + bs.clientConns.Range(func(_, value any) bool { + conn := value.(*grpc.ClientConn) + if err := conn.Close(); err != nil { + log.Error("close client connection meet error") + } + return true + }) +} diff --git a/pkg/mcs/tso/server/OWNERS b/pkg/mcs/tso/server/OWNERS new file mode 100644 index 00000000000..aa02465dbd9 --- /dev/null +++ b/pkg/mcs/tso/server/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs at https://go.k8s.io/owners +options: + no_parent_owners: true +filters: + "(OWNERS|config\\.go)$": + approvers: + - sig-critical-approvers-config diff --git a/pkg/mcs/tso/server/apis/v1/api.go b/pkg/mcs/tso/server/apis/v1/api.go index 94282592151..44f4b353d58 100644 --- a/pkg/mcs/tso/server/apis/v1/api.go +++ b/pkg/mcs/tso/server/apis/v1/api.go @@ -18,13 +18,11 @@ import ( "fmt" "net/http" "strconv" - "sync" "github.com/gin-contrib/cors" "github.com/gin-contrib/gzip" "github.com/gin-contrib/pprof" "github.com/gin-gonic/gin" - "github.com/joho/godotenv" "github.com/pingcap/kvproto/pkg/tsopb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" @@ -44,7 +42,6 @@ const ( ) var ( - once sync.Once apiServiceGroup = apiutil.APIServiceGroup{ Name: "tso", Version: "v1", @@ -77,11 +74,6 @@ func createIndentRender() *render.Render { // NewService returns a new Service. func NewService(srv *tsoserver.Service) *Service { - once.Do(func() { - // These global modification will be effective only for the first invoke. - _ = godotenv.Load() - gin.SetMode(gin.ReleaseMode) - }) apiHandlerEngine := gin.New() apiHandlerEngine.Use(gin.Recovery()) apiHandlerEngine.Use(cors.Default()) @@ -103,7 +95,8 @@ func NewService(srv *tsoserver.Service) *Service { } s.RegisterAdminRouter() s.RegisterKeyspaceGroupRouter() - s.RegisterHealth() + s.RegisterHealthRouter() + s.RegisterConfigRouter() return s } @@ -120,12 +113,18 @@ func (s *Service) RegisterKeyspaceGroupRouter() { router.GET("/members", GetKeyspaceGroupMembers) } -// RegisterHealth registers the router of the health handler. -func (s *Service) RegisterHealth() { +// RegisterHealthRouter registers the router of the health handler. +func (s *Service) RegisterHealthRouter() { router := s.root.Group("health") router.GET("", GetHealth) } +// RegisterConfigRouter registers the router of the config handler. +func (s *Service) RegisterConfigRouter() { + router := s.root.Group("config") + router.GET("", getConfig) +} + func changeLogLevel(c *gin.Context) { svr := c.MustGet(multiservicesapi.ServiceContextKey).(*tsoserver.Service) var level string @@ -256,3 +255,13 @@ func GetKeyspaceGroupMembers(c *gin.Context) { } c.IndentedJSON(http.StatusOK, members) } + +// @Tags config +// @Summary Get full config. +// @Produce json +// @Success 200 {object} config.Config +// @Router /config [get] +func getConfig(c *gin.Context) { + svr := c.MustGet(multiservicesapi.ServiceContextKey).(*tsoserver.Service) + c.IndentedJSON(http.StatusOK, svr.GetConfig()) +} diff --git a/pkg/mcs/tso/server/config.go b/pkg/mcs/tso/server/config.go index eedf3a2f1b1..06e9054e117 100644 --- a/pkg/mcs/tso/server/config.go +++ b/pkg/mcs/tso/server/config.go @@ -177,11 +177,11 @@ func (c *Config) Parse(flagSet *pflag.FlagSet) error { configutil.AdjustCommandLineString(flagSet, &c.ListenAddr, "listen-addr") configutil.AdjustCommandLineString(flagSet, &c.AdvertiseListenAddr, "advertise-listen-addr") - return c.Adjust(meta, false) + return c.Adjust(meta) } // Adjust is used to adjust the TSO configurations. -func (c *Config) Adjust(meta *toml.MetaData, reloading bool) error { +func (c *Config) Adjust(meta *toml.MetaData) error { configMetaData := configutil.NewConfigMetadata(meta) if err := configMetaData.CheckUndecoded(); err != nil { c.WarningMsgs = append(c.WarningMsgs, err.Error()) @@ -226,10 +226,6 @@ func (c *Config) Adjust(meta *toml.MetaData, reloading bool) error { c.adjustLog(configMetaData.Child("log")) c.Security.Encryption.Adjust() - if len(c.Log.Format) == 0 { - c.Log.Format = utils.DefaultLogFormat - } - return nil } @@ -237,6 +233,8 @@ func (c *Config) adjustLog(meta *configutil.ConfigMetaData) { if !meta.IsDefined("disable-error-verbose") { c.Log.DisableErrorVerbose = utils.DefaultDisableErrorVerbose } + configutil.AdjustString(&c.Log.Format, utils.DefaultLogFormat) + configutil.AdjustString(&c.Log.Level, utils.DefaultLogLevel) } // Validate is used to validate if some configurations are right. diff --git a/pkg/mcs/tso/server/config_test.go b/pkg/mcs/tso/server/config_test.go index 9f5bc298964..2cb9c8e019a 100644 --- a/pkg/mcs/tso/server/config_test.go +++ b/pkg/mcs/tso/server/config_test.go @@ -83,7 +83,7 @@ max-gap-reset-ts = "1h" cfg := NewConfig() meta, err := toml.Decode(cfgData, &cfg) re.NoError(err) - err = cfg.Adjust(&meta, false) + err = cfg.Adjust(&meta) re.NoError(err) re.Equal("tso-test-name", cfg.GetName()) diff --git a/pkg/mcs/tso/server/grpc_service.go b/pkg/mcs/tso/server/grpc_service.go index 9006faf49da..03250d9ed37 100644 --- a/pkg/mcs/tso/server/grpc_service.go +++ b/pkg/mcs/tso/server/grpc_service.go @@ -42,20 +42,20 @@ var ( var _ tsopb.TSOServer = (*Service)(nil) // SetUpRestHandler is a hook to sets up the REST service. -var SetUpRestHandler = func(srv *Service) (http.Handler, apiutil.APIServiceGroup) { +var SetUpRestHandler = func(*Service) (http.Handler, apiutil.APIServiceGroup) { return dummyRestService{}, apiutil.APIServiceGroup{} } type dummyRestService struct{} -func (d dummyRestService) ServeHTTP(w http.ResponseWriter, r *http.Request) { +func (dummyRestService) ServeHTTP(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusNotImplemented) w.Write([]byte("not implemented")) } // ConfigProvider is used to get tso config from the given // `bs.server` without modifying its interface. -type ConfigProvider interface{} +type ConfigProvider any // Service is the TSO grpc service. type Service struct { @@ -135,7 +135,7 @@ func (s *Service) Tso(stream tsopb.TSO_TsoServer) error { // FindGroupByKeyspaceID returns the keyspace group that the keyspace belongs to. func (s *Service) FindGroupByKeyspaceID( - ctx context.Context, request *tsopb.FindGroupByKeyspaceIDRequest, + _ context.Context, request *tsopb.FindGroupByKeyspaceIDRequest, ) (*tsopb.FindGroupByKeyspaceIDResponse, error) { respKeyspaceGroup := request.GetHeader().GetKeyspaceGroupId() if errorType, err := s.validRequest(request.GetHeader()); err != nil { @@ -189,7 +189,7 @@ func (s *Service) FindGroupByKeyspaceID( // GetMinTS gets the minimum timestamp across all keyspace groups served by the TSO server // who receives and handles the request. func (s *Service) GetMinTS( - ctx context.Context, request *tsopb.GetMinTSRequest, + _ context.Context, request *tsopb.GetMinTSRequest, ) (*tsopb.GetMinTSResponse, error) { respKeyspaceGroup := request.GetHeader().GetKeyspaceGroupId() if errorType, err := s.validRequest(request.GetHeader()); err != nil { diff --git a/pkg/mcs/tso/server/server.go b/pkg/mcs/tso/server/server.go index 55473efc8bb..c38c7142730 100644 --- a/pkg/mcs/tso/server/server.go +++ b/pkg/mcs/tso/server/server.go @@ -20,6 +20,7 @@ import ( "net/http" "os" "os/signal" + "path/filepath" "runtime" "strconv" "sync" @@ -177,6 +178,7 @@ func (s *Server) Close() { utils.StopHTTPServer(s) utils.StopGRPCServer(s) s.GetListener().Close() + s.CloseClientConns() s.serverLoopCancel() s.serverLoopWg.Wait() @@ -248,7 +250,7 @@ func (s *Server) ResignPrimary(keyspaceID, keyspaceGroupID uint32) error { // AddServiceReadyCallback implements basicserver. // It adds callbacks when it's ready for providing tso service. -func (s *Server) AddServiceReadyCallback(callbacks ...func(context.Context) error) { +func (*Server) AddServiceReadyCallback(...func(context.Context) error) { // Do nothing here. The primary of each keyspace group assigned to this host // will respond to the requests accordingly. } @@ -276,7 +278,7 @@ func (s *Server) GetTSOAllocatorManager(keyspaceGroupID uint32) (*tso.AllocatorM } // IsLocalRequest checks if the forwarded host is the current host -func (s *Server) IsLocalRequest(forwardedHost string) bool { +func (*Server) IsLocalRequest(forwardedHost string) bool { // TODO: Check if the forwarded host is the current host. // The logic is depending on etcd service mode -- if the TSO service // uses the embedded etcd, check against ClientUrls; otherwise check @@ -308,13 +310,13 @@ func (s *Server) ValidateRequest(header *tsopb.RequestHeader) error { // GetExternalTS returns external timestamp from the cache or the persistent storage. // TODO: Implement GetExternalTS -func (s *Server) GetExternalTS() uint64 { +func (*Server) GetExternalTS() uint64 { return 0 } // SetExternalTS saves external timestamp to cache and the persistent storage. // TODO: Implement SetExternalTS -func (s *Server) SetExternalTS(externalTS uint64) error { +func (*Server) SetExternalTS(uint64) error { return nil } @@ -367,10 +369,21 @@ func (s *Server) startServer() (err error) { s.serverLoopCtx, s.serverLoopCancel = context.WithCancel(s.Context()) legacySvcRootPath := endpoint.LegacyRootPath(s.clusterID) tsoSvcRootPath := endpoint.TSOSvcRootPath(s.clusterID) - s.serviceID = &discovery.ServiceRegistryEntry{ServiceAddr: s.cfg.AdvertiseListenAddr} + execPath, err := os.Executable() + deployPath := filepath.Dir(execPath) + if err != nil { + deployPath = "" + } + s.serviceID = &discovery.ServiceRegistryEntry{ + ServiceAddr: s.cfg.AdvertiseListenAddr, + Version: versioninfo.PDReleaseVersion, + GitHash: versioninfo.PDGitHash, + DeployPath: deployPath, + StartTimestamp: s.StartTimestamp(), + } s.keyspaceGroupManager = tso.NewKeyspaceGroupManager( s.serverLoopCtx, s.serviceID, s.GetClient(), s.GetHTTPClient(), s.cfg.AdvertiseListenAddr, - discovery.TSOPath(s.clusterID), legacySvcRootPath, tsoSvcRootPath, s.cfg) + s.clusterID, legacySvcRootPath, tsoSvcRootPath, s.cfg) if err := s.keyspaceGroupManager.Initialize(); err != nil { return err } diff --git a/pkg/mcs/tso/server/testutil.go b/pkg/mcs/tso/server/testutil.go index 626d1474673..cf5d45e7754 100644 --- a/pkg/mcs/tso/server/testutil.go +++ b/pkg/mcs/tso/server/testutil.go @@ -21,11 +21,12 @@ import ( "github.com/spf13/pflag" "github.com/stretchr/testify/require" "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" ) // MustNewGrpcClient must create a new TSO grpc client. func MustNewGrpcClient(re *require.Assertions, addr string) (*grpc.ClientConn, tsopb.TSOClient) { - conn, err := grpc.Dial(strings.TrimPrefix(addr, "http://"), grpc.WithInsecure()) + conn, err := grpc.Dial(strings.TrimPrefix(addr, "http://"), grpc.WithTransportCredentials(insecure.NewCredentials())) re.NoError(err) return conn, tsopb.NewTSOClient(conn) } diff --git a/pkg/mcs/utils/constant.go b/pkg/mcs/utils/constant.go index 6174852d89f..c6c882f5179 100644 --- a/pkg/mcs/utils/constant.go +++ b/pkg/mcs/utils/constant.go @@ -17,8 +17,6 @@ package utils import "time" const ( - // MaxRetryTimesWaitAPIService is the max retry times for initializing the cluster ID. - MaxRetryTimesWaitAPIService = 360 // RetryIntervalWaitAPIService is the interval to retry. // Note: the interval must be less than the timeout of tidb and tikv, which is 2s by default in tikv. RetryIntervalWaitAPIService = 500 * time.Millisecond @@ -34,6 +32,8 @@ const ( DefaultHTTPGracefulShutdownTimeout = 5 * time.Second // DefaultLogFormat is the default log format DefaultLogFormat = "text" + // DefaultLogLevel is the default log level + DefaultLogLevel = "info" // DefaultDisableErrorVerbose is the default value of DisableErrorVerbose DefaultDisableErrorVerbose = true // DefaultLeaderLease is the default value of LeaderLease diff --git a/pkg/mcs/utils/util.go b/pkg/mcs/utils/util.go index 0e587688fce..b70b050617e 100644 --- a/pkg/mcs/utils/util.go +++ b/pkg/mcs/utils/util.go @@ -123,22 +123,23 @@ func WaitAPIServiceReady(s server) error { ) ticker := time.NewTicker(RetryIntervalWaitAPIService) defer ticker.Stop() - for i := 0; i < MaxRetryTimesWaitAPIService; i++ { + retryTimes := 0 + for { ready, err = isAPIServiceReady(s) if err == nil && ready { return nil } - log.Debug("api server is not ready, retrying", errs.ZapError(err), zap.Bool("ready", ready)) select { case <-s.Context().Done(): return errors.New("context canceled while waiting api server ready") case <-ticker.C: + retryTimes++ + if retryTimes/500 > 0 { + log.Warn("api server is not ready, retrying", errs.ZapError(err)) + retryTimes /= 500 + } } } - if err != nil { - log.Warn("failed to check api server ready", errs.ZapError(err)) - } - return errors.Errorf("failed to wait api server ready after retrying %d times", MaxRetryTimesWaitAPIService) } func isAPIServiceReady(s server) (bool, error) { @@ -177,7 +178,7 @@ func InitClient(s server) error { if err != nil { return err } - etcdClient, err := etcdutil.CreateEtcdClient(tlsConfig, backendUrls) + etcdClient, err := etcdutil.CreateEtcdClient(tlsConfig, backendUrls, "mcs-etcd-client") if err != nil { return err } @@ -265,7 +266,9 @@ func StopHTTPServer(s server) { ch := make(chan struct{}) go func() { defer close(ch) - s.GetHTTPServer().Shutdown(ctx) + if err := s.GetHTTPServer().Shutdown(ctx); err != nil { + log.Error("http server graceful shutdown failed", errs.ZapError(err)) + } }() select { @@ -273,7 +276,9 @@ func StopHTTPServer(s server) { case <-ctx.Done(): // Took too long, manually close open transports log.Warn("http server graceful shutdown timeout, forcing close") - s.GetHTTPServer().Close() + if err := s.GetHTTPServer().Close(); err != nil { + log.Warn("http server close failed", errs.ZapError(err)) + } // concurrent Graceful Shutdown should be interrupted <-ch } @@ -319,6 +324,6 @@ func StopGRPCServer(s server) { // Exit exits the program with the given code. func Exit(code int) { - log.Sync() + _ = log.Sync() os.Exit(code) } diff --git a/pkg/member/member.go b/pkg/member/member.go index 8d0eb978c50..bbf46d8f167 100644 --- a/pkg/member/member.go +++ b/pkg/member/member.go @@ -85,7 +85,7 @@ func (m *EmbeddedEtcdMember) Name() string { } // GetMember returns the member. -func (m *EmbeddedEtcdMember) GetMember() interface{} { +func (m *EmbeddedEtcdMember) GetMember() any { return m.member } @@ -182,15 +182,16 @@ func (m *EmbeddedEtcdMember) GetLastLeaderUpdatedTime() time.Time { // and make it become a PD leader. // leader should be changed when campaign leader frequently. func (m *EmbeddedEtcdMember) CampaignLeader(ctx context.Context, leaseTimeout int64) error { + m.leadership.AddCampaignTimes() failpoint.Inject("skipCampaignLeaderCheck", func() { failpoint.Return(m.leadership.Campaign(leaseTimeout, m.MemberValue())) }) - if m.leadership.GetCampaignTimesNum() >= campaignLeaderFrequencyTimes { - m.leadership.ResetCampaignTimes() + if m.leadership.GetCampaignTimesNum() > campaignLeaderFrequencyTimes { if err := m.ResignEtcdLeader(ctx, m.Name(), ""); err != nil { return err } + m.leadership.ResetCampaignTimes() return errs.ErrLeaderFrequentlyChange.FastGenByArgs(m.Name(), m.GetLeaderPath()) } diff --git a/pkg/member/participant.go b/pkg/member/participant.go index 189da7b96c9..8a0ffadd31e 100644 --- a/pkg/member/participant.go +++ b/pkg/member/participant.go @@ -104,7 +104,7 @@ func (m *Participant) Name() string { } // GetMember returns the member. -func (m *Participant) GetMember() interface{} { +func (m *Participant) GetMember() any { return m.member } @@ -200,7 +200,7 @@ func (m *Participant) KeepLeader(ctx context.Context) { // PreCheckLeader does some pre-check before checking whether or not it's the leader. // It returns true if it passes the pre-check, false otherwise. -func (m *Participant) PreCheckLeader() error { +func (*Participant) PreCheckLeader() error { // No specific thing to check. Returns no error. return nil } @@ -280,7 +280,7 @@ func (m *Participant) IsSameLeader(leader participant) bool { } // CheckPriority checks whether there is another participant has higher priority and resign it as the leader if so. -func (m *Participant) CheckPriority(ctx context.Context) { +func (*Participant) CheckPriority(_ context.Context) { // TODO: implement weighted-election when it's in need } diff --git a/pkg/mock/mockcluster/mockcluster.go b/pkg/mock/mockcluster/mockcluster.go index 6cf7ae143df..5d3aba2d2e8 100644 --- a/pkg/mock/mockcluster/mockcluster.go +++ b/pkg/mock/mockcluster/mockcluster.go @@ -123,7 +123,7 @@ func (mc *Cluster) AllocID() (uint64, error) { } // UpdateRegionsLabelLevelStats updates the label level stats for the regions. -func (mc *Cluster) UpdateRegionsLabelLevelStats(regions []*core.RegionInfo) {} +func (*Cluster) UpdateRegionsLabelLevelStats(_ []*core.RegionInfo) {} // LoadRegion puts region info without leader func (mc *Cluster) LoadRegion(regionID uint64, peerStoreIDs ...uint64) { @@ -138,11 +138,6 @@ func (mc *Cluster) GetStoresLoads() map[uint64][]float64 { return mc.HotStat.GetStoresLoads() } -// GetStore gets a store with a given store ID. -func (mc *Cluster) GetStore(storeID uint64) *core.StoreInfo { - return mc.Stores.GetStore(storeID) -} - // IsRegionHot checks if the region is hot. func (mc *Cluster) IsRegionHot(region *core.RegionInfo) bool { return mc.HotCache.IsRegionHot(region, mc.GetHotRegionCacheHitsThreshold()) @@ -561,11 +556,6 @@ func (mc *Cluster) AddLeaderRegionWithWriteInfo( return items } -// DropCacheAllRegion removes all regions from the cache. -func (mc *Cluster) DropCacheAllRegion() { - mc.ResetRegionCache() -} - // UpdateStoreLeaderWeight updates store leader weight. func (mc *Cluster) UpdateStoreLeaderWeight(storeID uint64, weight float64) { store := mc.GetStore(storeID) @@ -752,7 +742,7 @@ func (mc *Cluster) UpdateStoreStatus(id uint64) { pendingPeerCount := mc.GetStorePendingPeerCount(id) leaderSize := mc.GetStoreLeaderRegionSize(id) regionSize := mc.GetStoreRegionSize(id) - store := mc.Stores.GetStore(id) + store := mc.GetStore(id) stats := &pdpb.StoreStats{} stats.Capacity = defaultStoreCapacity stats.Available = stats.Capacity - uint64(store.GetRegionSize()*units.MiB) @@ -896,14 +886,7 @@ func (mc *Cluster) CheckRegionRead(region *core.RegionInfo) []*statistics.HotPee items = append(items, expiredItems...) reportInterval := region.GetInterval() interval := reportInterval.GetEndTimestamp() - reportInterval.GetStartTimestamp() - for _, peer := range region.GetPeers() { - peerInfo := core.NewPeerInfo(peer, region.GetLoads(), interval) - item := mc.HotCache.CheckReadPeerSync(peerInfo, region) - if item != nil { - items = append(items, item) - } - } - return items + return append(items, mc.HotCache.CheckReadPeerSync(region, region.GetPeers(), region.GetLoads(), interval)...) } // CheckRegionWrite checks region write info with all peers @@ -913,14 +896,7 @@ func (mc *Cluster) CheckRegionWrite(region *core.RegionInfo) []*statistics.HotPe items = append(items, expiredItems...) reportInterval := region.GetInterval() interval := reportInterval.GetEndTimestamp() - reportInterval.GetStartTimestamp() - for _, peer := range region.GetPeers() { - peerInfo := core.NewPeerInfo(peer, region.GetLoads(), interval) - item := mc.HotCache.CheckWritePeerSync(peerInfo, region) - if item != nil { - items = append(items, item) - } - } - return items + return append(items, mc.HotCache.CheckWritePeerSync(region, region.GetPeers(), region.GetLoads(), interval)...) } // CheckRegionLeaderRead checks region read info with leader peer @@ -930,13 +906,7 @@ func (mc *Cluster) CheckRegionLeaderRead(region *core.RegionInfo) []*statistics. items = append(items, expiredItems...) reportInterval := region.GetInterval() interval := reportInterval.GetEndTimestamp() - reportInterval.GetStartTimestamp() - peer := region.GetLeader() - peerInfo := core.NewPeerInfo(peer, region.GetLoads(), interval) - item := mc.HotCache.CheckReadPeerSync(peerInfo, region) - if item != nil { - items = append(items, item) - } - return items + return append(items, mc.HotCache.CheckReadPeerSync(region, []*metapb.Peer{region.GetLeader()}, region.GetLoads(), interval)...) } // ObserveRegionsStats records the current stores stats from region stats. diff --git a/pkg/mock/mockhbstream/mockhbstream.go b/pkg/mock/mockhbstream/mockhbstream.go index 289f31d63dd..ac8f246f86a 100644 --- a/pkg/mock/mockhbstream/mockhbstream.go +++ b/pkg/mock/mockhbstream/mockhbstream.go @@ -46,10 +46,10 @@ func (s HeartbeatStream) Send(m core.RegionHeartbeatResponse) error { } // SendMsg is used to send the message. -func (s HeartbeatStream) SendMsg(region *core.RegionInfo, msg *pdpb.RegionHeartbeatResponse) {} +func (HeartbeatStream) SendMsg(*core.RegionInfo, *pdpb.RegionHeartbeatResponse) {} // BindStream mock method. -func (s HeartbeatStream) BindStream(storeID uint64, stream hbstream.HeartbeatStream) {} +func (HeartbeatStream) BindStream(uint64, hbstream.HeartbeatStream) {} // Recv mocks method. func (s HeartbeatStream) Recv() core.RegionHeartbeatResponse { diff --git a/pkg/mock/mockhbstream/mockhbstream_test.go b/pkg/mock/mockhbstream/mockhbstream_test.go index a8e88f61aee..aa1ca85279b 100644 --- a/pkg/mock/mockhbstream/mockhbstream_test.go +++ b/pkg/mock/mockhbstream/mockhbstream_test.go @@ -29,7 +29,6 @@ import ( ) func TestActivity(t *testing.T) { - t.Parallel() re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() diff --git a/pkg/mock/mockid/mockid.go b/pkg/mock/mockid/mockid.go index 4c0e7540653..7b4902a6a04 100644 --- a/pkg/mock/mockid/mockid.go +++ b/pkg/mock/mockid/mockid.go @@ -38,6 +38,6 @@ func (alloc *IDAllocator) SetBase(newBase uint64) error { } // Rebase implements the IDAllocator interface. -func (alloc *IDAllocator) Rebase() error { +func (*IDAllocator) Rebase() error { return nil } diff --git a/pkg/movingaverage/avg_over_time_test.go b/pkg/movingaverage/avg_over_time_test.go index 43553d9d608..4a54e33d449 100644 --- a/pkg/movingaverage/avg_over_time_test.go +++ b/pkg/movingaverage/avg_over_time_test.go @@ -23,7 +23,6 @@ import ( ) func TestPulse(t *testing.T) { - t.Parallel() re := require.New(t) aot := NewAvgOverTime(5 * time.Second) // warm up @@ -43,7 +42,6 @@ func TestPulse(t *testing.T) { } func TestPulse2(t *testing.T) { - t.Parallel() re := require.New(t) dur := 5 * time.Second aot := NewAvgOverTime(dur) @@ -57,7 +55,6 @@ func TestPulse2(t *testing.T) { } func TestChange(t *testing.T) { - t.Parallel() re := require.New(t) aot := NewAvgOverTime(5 * time.Second) @@ -91,7 +88,6 @@ func TestChange(t *testing.T) { } func TestMinFilled(t *testing.T) { - t.Parallel() re := require.New(t) interval := 10 * time.Second rate := 1.0 @@ -108,7 +104,6 @@ func TestMinFilled(t *testing.T) { } func TestUnstableInterval(t *testing.T) { - t.Parallel() re := require.New(t) aot := NewAvgOverTime(5 * time.Second) re.Equal(0., aot.Get()) diff --git a/pkg/movingaverage/max_filter_test.go b/pkg/movingaverage/max_filter_test.go index bba770cecc2..7d3906ec93c 100644 --- a/pkg/movingaverage/max_filter_test.go +++ b/pkg/movingaverage/max_filter_test.go @@ -21,7 +21,6 @@ import ( ) func TestMaxFilter(t *testing.T) { - t.Parallel() re := require.New(t) var empty float64 = 0 data := []float64{2, 1, 3, 4, 1, 1, 3, 3, 2, 0, 5} diff --git a/pkg/movingaverage/moving_average_test.go b/pkg/movingaverage/moving_average_test.go index 49c20637c20..fd0a1a9fcf3 100644 --- a/pkg/movingaverage/moving_average_test.go +++ b/pkg/movingaverage/moving_average_test.go @@ -72,7 +72,6 @@ func checkInstantaneous(re *require.Assertions, ma MovingAvg) { } func TestMedianFilter(t *testing.T) { - t.Parallel() re := require.New(t) var empty float64 = 0 data := []float64{2, 4, 2, 800, 600, 6, 3} @@ -92,7 +91,6 @@ type testCase struct { } func TestMovingAvg(t *testing.T) { - t.Parallel() re := require.New(t) var empty float64 = 0 data := []float64{1, 1, 1, 1, 5, 1, 1, 1} diff --git a/pkg/movingaverage/weight_allocator.go b/pkg/movingaverage/weight_allocator.go index 06be4616a85..f63ce377e08 100644 --- a/pkg/movingaverage/weight_allocator.go +++ b/pkg/movingaverage/weight_allocator.go @@ -37,7 +37,7 @@ func NewWeightAllocator(length, segNum int) *WeightAllocator { segLength := length / segNum // segMod is used for split seg when is length not divisible by segNum. segMod := length % segNum - segIndexs := make([]int, 0, segNum) + segIndexes := make([]int, 0, segNum) weights := make([]float64, 0, length) unitCount := 0 for i := 0; i < segNum; i++ { @@ -46,11 +46,11 @@ func NewWeightAllocator(length, segNum int) *WeightAllocator { next++ } unitCount += (segNum - i) * next - segIndexs = append(segIndexs, next) + segIndexes = append(segIndexes, next) } unitWeight := 1.0 / float64(unitCount) for i := 0; i < segNum; i++ { - for j := 0; j < segIndexs[i]; j++ { + for j := 0; j < segIndexes[i]; j++ { weights = append(weights, unitWeight*float64(segNum-i)) } } diff --git a/pkg/movingaverage/weight_allocator_test.go b/pkg/movingaverage/weight_allocator_test.go index 631a71f10c9..405d8f72876 100644 --- a/pkg/movingaverage/weight_allocator_test.go +++ b/pkg/movingaverage/weight_allocator_test.go @@ -21,7 +21,6 @@ import ( ) func TestWeightAllocator(t *testing.T) { - t.Parallel() re := require.New(t) checkSumFunc := func(wa *WeightAllocator, length int) { diff --git a/pkg/progress/progress.go b/pkg/progress/progress.go index 345e4928c41..8319a395ac8 100644 --- a/pkg/progress/progress.go +++ b/pkg/progress/progress.go @@ -24,19 +24,25 @@ import ( "github.com/tikv/pd/pkg/utils/syncutil" ) -// speedStatisticalWindow is the speed calculation window -const speedStatisticalWindow = 10 * time.Minute +const ( + // maxSpeedCalculationWindow is the maximum size of the time window used to calculate the speed, + // but it does not mean that all data in it will be used to calculate the speed, + // which data is used depends on the patrol region duration + maxSpeedCalculationWindow = 2 * time.Hour + // minSpeedCalculationWindow is the minimum speed calculation window + minSpeedCalculationWindow = 10 * time.Minute +) // Manager is used to maintain the progresses we care about. type Manager struct { syncutil.RWMutex - progesses map[string]*progressIndicator + progresses map[string]*progressIndicator } // NewManager creates a new Manager. func NewManager() *Manager { return &Manager{ - progesses: make(map[string]*progressIndicator), + progresses: make(map[string]*progressIndicator), } } @@ -46,12 +52,28 @@ type progressIndicator struct { remaining float64 // We use a fixed interval's history to calculate the latest average speed. history *list.List - // We use speedStatisticalWindow / updateInterval to get the windowLengthLimit. - // Assume that the windowLengthLimit is 3, the init value is 1. after update 3 times with 2, 3, 4 separately. The window will become [1, 2, 3, 4]. + // We use (maxSpeedCalculationWindow / updateInterval + 1) to get the windowCapacity. + // Assume that the windowCapacity is 4, the init value is 1. After update 3 times with 2, 3, 4 separately. The window will become [1, 2, 3, 4]. // Then we update it again with 5, the window will become [2, 3, 4, 5]. - windowLengthLimit int - updateInterval time.Duration - lastSpeed float64 + windowCapacity int + // windowLength is used to determine what data will be computed. + // Assume that the windowLength is 2, the init value is 1. The value that will be calculated are [1]. + // After update 3 times with 2, 3, 4 separately. The value that will be calculated are [3,4] and the values in queue are [(1,2),3,4]. + // It helps us avoid calculation results jumping change when patrol-region-interval changes. + windowLength int + // front is the first element which should be used. + // currentWindowLength indicates where the front is currently in the queue. + // Assume that the windowLength is 2, the init value is 1. The front is [1] and currentWindowLength is 1. + // After update 3 times with 2, 3, 4 separately. + // The front is [3], the currentWindowLength is 2, and values in queue are [(1,2),3,4] + // ^ front + // - - currentWindowLength = len([3,4]) = 2 + // We will always keep the currentWindowLength equal to windowLength if the actual size is enough. + front *list.Element + currentWindowLength int + + updateInterval time.Duration + lastSpeed float64 } // Reset resets the progress manager. @@ -59,59 +81,99 @@ func (m *Manager) Reset() { m.Lock() defer m.Unlock() - m.progesses = make(map[string]*progressIndicator) + m.progresses = make(map[string]*progressIndicator) +} + +// Option is used to do some action for progressIndicator. +type Option func(*progressIndicator) + +// WindowDurationOption changes the time window size. +func WindowDurationOption(dur time.Duration) func(*progressIndicator) { + return func(pi *progressIndicator) { + if dur < minSpeedCalculationWindow { + dur = minSpeedCalculationWindow + } else if dur > maxSpeedCalculationWindow { + dur = maxSpeedCalculationWindow + } + pi.windowLength = int(dur/pi.updateInterval) + 1 + } } // AddProgress adds a progress into manager if it doesn't exist. -func (m *Manager) AddProgress(progress string, current, total float64, updateInterval time.Duration) (exist bool) { +func (m *Manager) AddProgress(progress string, current, total float64, updateInterval time.Duration, opts ...Option) (exist bool) { m.Lock() defer m.Unlock() history := list.New() history.PushBack(current) - if _, exist = m.progesses[progress]; !exist { - m.progesses[progress] = &progressIndicator{ - total: total, - remaining: total, - history: history, - windowLengthLimit: int(speedStatisticalWindow / updateInterval), - updateInterval: updateInterval, + if _, exist = m.progresses[progress]; !exist { + pi := &progressIndicator{ + total: total, + remaining: total, + history: history, + windowCapacity: int(maxSpeedCalculationWindow/updateInterval) + 1, + windowLength: int(minSpeedCalculationWindow / updateInterval), + updateInterval: updateInterval, } + for _, op := range opts { + op(pi) + } + m.progresses[progress] = pi + pi.front = history.Front() + pi.currentWindowLength = 1 } return } // UpdateProgress updates the progress if it exists. -func (m *Manager) UpdateProgress(progress string, current, remaining float64, isInc bool) { +func (m *Manager) UpdateProgress(progress string, current, remaining float64, isInc bool, opts ...Option) { m.Lock() defer m.Unlock() - if p, exist := m.progesses[progress]; exist { - p.remaining = remaining - if p.total < remaining { - p.total = remaining - } + p, exist := m.progresses[progress] + if !exist { + return + } - if p.history.Len() > p.windowLengthLimit { - p.history.Remove(p.history.Front()) - } - p.history.PushBack(current) - - // It means it just init and we haven't update the progress - if p.history.Len() <= 1 { - p.lastSpeed = 0 - } else if isInc { - // the value increases, e.g., [1, 2, 3] - p.lastSpeed = (p.history.Back().Value.(float64) - p.history.Front().Value.(float64)) / - (float64(p.history.Len()-1) * p.updateInterval.Seconds()) - } else { - // the value decreases, e.g., [3, 2, 1] - p.lastSpeed = (p.history.Front().Value.(float64) - p.history.Back().Value.(float64)) / - (float64(p.history.Len()-1) * p.updateInterval.Seconds()) - } - if p.lastSpeed < 0 { - p.lastSpeed = 0 - } + for _, op := range opts { + op(p) + } + p.remaining = remaining + if p.total < remaining { + p.total = remaining + } + + p.history.PushBack(current) + p.currentWindowLength++ + + // try to move `front` into correct place. + for p.currentWindowLength > p.windowLength { + p.front = p.front.Next() + p.currentWindowLength-- + } + for p.currentWindowLength < p.windowLength && p.front.Prev() != nil { + p.front = p.front.Prev() + p.currentWindowLength++ + } + + for p.history.Len() > p.windowCapacity { + p.history.Remove(p.history.Front()) + } + + // It means it just init and we haven't update the progress + if p.history.Len() <= 1 { + p.lastSpeed = 0 + } else if isInc { + // the value increases, e.g., [1, 2, 3] + p.lastSpeed = (current - p.front.Value.(float64)) / + (float64(p.currentWindowLength-1) * p.updateInterval.Seconds()) + } else { + // the value decreases, e.g., [3, 2, 1] + p.lastSpeed = (p.front.Value.(float64) - current) / + (float64(p.currentWindowLength-1) * p.updateInterval.Seconds()) + } + if p.lastSpeed < 0 { + p.lastSpeed = 0 } } @@ -120,7 +182,7 @@ func (m *Manager) UpdateProgressTotal(progress string, total float64) { m.Lock() defer m.Unlock() - if p, exist := m.progesses[progress]; exist { + if p, exist := m.progresses[progress]; exist { p.total = total } } @@ -130,8 +192,8 @@ func (m *Manager) RemoveProgress(progress string) (exist bool) { m.Lock() defer m.Unlock() - if _, exist = m.progesses[progress]; exist { - delete(m.progesses, progress) + if _, exist = m.progresses[progress]; exist { + delete(m.progresses, progress) return } return @@ -142,39 +204,40 @@ func (m *Manager) GetProgresses(filter func(p string) bool) []string { m.RLock() defer m.RUnlock() - processes := []string{} - for p := range m.progesses { + progresses := make([]string, 0, len(m.progresses)) + for p := range m.progresses { if filter(p) { - processes = append(processes, p) + progresses = append(progresses, p) } } - return processes + return progresses } // Status returns the current progress status of a give name. -func (m *Manager) Status(progress string) (process, leftSeconds, currentSpeed float64, err error) { +func (m *Manager) Status(progressName string) (progress, leftSeconds, currentSpeed float64, err error) { m.RLock() defer m.RUnlock() - if p, exist := m.progesses[progress]; exist { - process = 1 - p.remaining/p.total - if process < 0 { - process = 0 - err = errs.ErrProgressWrongStatus.FastGenByArgs(fmt.Sprintf("the remaining: %v is larger than the total: %v", p.remaining, p.total)) - return - } - currentSpeed = p.lastSpeed - // When the progress is newly added, there is no last speed. - if p.lastSpeed == 0 && p.history.Len() <= 1 { - currentSpeed = 0 - } - - leftSeconds = p.remaining / currentSpeed - if math.IsNaN(leftSeconds) || math.IsInf(leftSeconds, 0) { - leftSeconds = math.MaxFloat64 - } + p, exist := m.progresses[progressName] + if !exist { + err = errs.ErrProgressNotFound.FastGenByArgs(fmt.Sprintf("the progress: %s", progressName)) + return + } + progress = 1 - p.remaining/p.total + if progress < 0 { + progress = 0 + err = errs.ErrProgressWrongStatus.FastGenByArgs(fmt.Sprintf("the remaining: %v is larger than the total: %v", p.remaining, p.total)) return } - err = errs.ErrProgressNotFound.FastGenByArgs(fmt.Sprintf("the progress: %s", progress)) + currentSpeed = p.lastSpeed + // When the progress is newly added, there is no last speed. + if p.lastSpeed == 0 && p.history.Len() <= 1 { + currentSpeed = 0 + } + + leftSeconds = p.remaining / currentSpeed + if math.IsNaN(leftSeconds) || math.IsInf(leftSeconds, 0) { + leftSeconds = math.MaxFloat64 + } return } diff --git a/pkg/progress/progress_test.go b/pkg/progress/progress_test.go index e6799fb0ff8..a7b159bc907 100644 --- a/pkg/progress/progress_test.go +++ b/pkg/progress/progress_test.go @@ -24,7 +24,6 @@ import ( ) func TestProgress(t *testing.T) { - t.Parallel() re := require.New(t) n := "test" m := NewManager() @@ -41,15 +40,13 @@ func TestProgress(t *testing.T) { p, ls, cs, err = m.Status(n) re.NoError(err) re.Equal(0.7, p) - // 30/(70/1s+) > 30/70 - re.Greater(ls, 30.0/70.0) - // 70/1s+ > 70 - re.Less(cs, 70.0) + re.Less(math.Abs(ls-30.0/7.0), 1e-6) + re.Less(math.Abs(cs-7), 1e-6) // there is no scheduling - for i := 0; i < 100; i++ { + for i := 0; i < 1000; i++ { m.UpdateProgress(n, 30, 30, false) } - re.Equal(61, m.progesses[n].history.Len()) + re.Equal(721, m.progresses[n].history.Len()) p, ls, cs, err = m.Status(n) re.NoError(err) re.Equal(0.7, p) @@ -70,7 +67,6 @@ func TestProgress(t *testing.T) { } func TestAbnormal(t *testing.T) { - t.Parallel() re := require.New(t) n := "test" m := NewManager() @@ -95,3 +91,127 @@ func TestAbnormal(t *testing.T) { re.Equal(0.0, ls) re.Equal(0.0, cs) } + +func TestProgressWithDynamicWindow(t *testing.T) { + // The full capacity of queue is 721. + re := require.New(t) + n := "test" + m := NewManager() + re.False(m.AddProgress(n, 100, 100, 10*time.Second)) + p, ls, cs, err := m.Status(n) + re.NoError(err) + re.Equal(0.0, p) + re.Equal(math.MaxFloat64, ls) + re.Equal(0.0, cs) + time.Sleep(time.Second) + re.True(m.AddProgress(n, 100, 100, 10*time.Second)) + + m.UpdateProgress(n, 31, 31, false) + p, ls, cs, err = m.Status(n) + re.NoError(err) + re.Equal(0.69, p) + re.Less(math.Abs(ls-31.0/6.9), 1e-6) + re.Less(math.Abs(cs-6.9), 1e-6) + re.Equal(2, m.progresses[n].currentWindowLength) + re.Equal(100.0, m.progresses[n].front.Value.(float64)) + + m.UpdateProgress(n, 30, 30, false, WindowDurationOption(time.Minute*20)) + re.Equal(3, m.progresses[n].currentWindowLength) + re.Equal(100.0, m.progresses[n].front.Value.(float64)) + p, ls, cs, err = m.Status(n) + re.NoError(err) + re.Equal(0.7, p) + re.Less(math.Abs(ls-30.0/(7.0/2)), 1e-6) + re.Less(math.Abs(cs-3.5), 1e-6) + + for i := 0; i < 1000; i++ { + m.UpdateProgress(n, 30, 30, false) + } + re.Equal(721, m.progresses[n].history.Len()) + p, ls, cs, err = m.Status(n) + re.NoError(err) + re.Equal(0.7, p) + re.Equal(math.MaxFloat64, ls) + re.Equal(0.0, cs) + m.UpdateProgress(n, 29, 29, false, WindowDurationOption(time.Minute*20)) + re.Equal(121, m.progresses[n].currentWindowLength) + re.Equal(30.0, m.progresses[n].front.Value.(float64)) + re.Equal(721, m.progresses[n].history.Len()) + + for i := 0; i < 60; i++ { + m.UpdateProgress(n, 28, 28, false) + } + re.Equal(721, m.progresses[n].history.Len()) + p, ls, cs, err = m.Status(n) + re.NoError(err) + re.Equal(0.72, p) + re.Equal(float64(28/(2./120)*10.), ls) + re.Equal(float64(2./120/10.), cs) + + m.UpdateProgress(n, 28, 28, false, WindowDurationOption(time.Minute*10)) + re.Equal(721, m.progresses[n].history.Len()) + re.Equal(61, m.progresses[n].currentWindowLength) + re.Equal(28.0, m.progresses[n].front.Value.(float64)) + p, ls, cs, err = m.Status(n) + re.NoError(err) + re.Equal(0.72, p) + re.Equal(math.MaxFloat64, ls) + re.Equal(0.0, cs) + + m.UpdateProgress(n, 28, 28, false, WindowDurationOption(time.Minute*20)) + re.Equal(121, m.progresses[n].currentWindowLength) + re.Equal(30.0, m.progresses[n].front.Value.(float64)) + p, ls, cs, err = m.Status(n) + re.NoError(err) + re.Equal(0.72, p) + re.Equal(float64(28/(2./120)*10.), ls) + re.Equal(float64(2./120/10.), cs) + + m.UpdateProgress(n, 1, 1, false, WindowDurationOption(time.Minute*12)) + re.Equal(73, m.progresses[n].currentWindowLength) + re.Equal(30.0, m.progresses[n].front.Value.(float64)) + p, ls, cs, err = m.Status(n) + re.NoError(err) + re.Equal(0.99, p) + re.Equal(float64(1/(29./72)*10.), ls) + re.Equal(float64(29./72/10.), cs) + + m.UpdateProgress(n, 1, 1, false, WindowDurationOption(time.Minute*5)) + re.Equal(61, m.progresses[n].currentWindowLength) + re.Equal(28.0, m.progresses[n].front.Value.(float64)) + p, ls, cs, err = m.Status(n) + re.NoError(err) + re.Equal(0.99, p) + re.Equal(float64(1/(27./60)*10.), ls) + re.Equal(float64(27./60/10.), cs) + + m.UpdateProgress(n, 1, 1, false, WindowDurationOption(time.Minute*180)) + p, ls, cs, err = m.Status(n) + re.Equal(721, m.progresses[n].currentWindowLength) + re.Equal(30.0, m.progresses[n].front.Value.(float64)) + re.NoError(err) + re.Equal(0.99, p) + re.Equal(float64(1/(29./720)*10.), ls) + re.Equal(float64(29./720/10.), cs) + for i := 0; i < 2000; i++ { + m.UpdateProgress(n, 1, 1, false) + } + re.Equal(721, m.progresses[n].history.Len()) + p, ls, cs, err = m.Status(n) + re.NoError(err) + re.Equal(0.99, p) + re.Equal(math.MaxFloat64, ls) + re.Equal(0.0, cs) + + ps := m.GetProgresses(func(p string) bool { + return strings.Contains(p, n) + }) + re.Len(ps, 1) + re.Equal(n, ps[0]) + ps = m.GetProgresses(func(p string) bool { + return strings.Contains(p, "a") + }) + re.Empty(ps) + re.True(m.RemoveProgress(n)) + re.False(m.RemoveProgress(n)) +} diff --git a/pkg/ratelimit/concurrency_limiter.go b/pkg/ratelimit/concurrency_limiter.go index b1eef3c8101..e5379bc48cc 100644 --- a/pkg/ratelimit/concurrency_limiter.go +++ b/pkg/ratelimit/concurrency_limiter.go @@ -14,24 +14,33 @@ package ratelimit -import "github.com/tikv/pd/pkg/utils/syncutil" +import ( + "context" -type concurrencyLimiter struct { - mu syncutil.RWMutex + "github.com/tikv/pd/pkg/utils/syncutil" +) + +// ConcurrencyLimiter is a limiter that limits the number of concurrent tasks. +type ConcurrencyLimiter struct { + mu syncutil.Mutex current uint64 + waiting uint64 limit uint64 // statistic maxLimit uint64 + queue chan *TaskToken } -func newConcurrencyLimiter(limit uint64) *concurrencyLimiter { - return &concurrencyLimiter{limit: limit} +// NewConcurrencyLimiter creates a new ConcurrencyLimiter. +func NewConcurrencyLimiter(limit uint64) *ConcurrencyLimiter { + return &ConcurrencyLimiter{limit: limit, queue: make(chan *TaskToken, limit)} } const unlimit = uint64(0) -func (l *concurrencyLimiter) allow() bool { +// old interface. only used in the ratelimiter package. +func (l *ConcurrencyLimiter) allow() bool { l.mu.Lock() defer l.mu.Unlock() @@ -45,7 +54,8 @@ func (l *concurrencyLimiter) allow() bool { return false } -func (l *concurrencyLimiter) release() { +// old interface. only used in the ratelimiter package. +func (l *ConcurrencyLimiter) release() { l.mu.Lock() defer l.mu.Unlock() @@ -54,28 +64,32 @@ func (l *concurrencyLimiter) release() { } } -func (l *concurrencyLimiter) getLimit() uint64 { - l.mu.RLock() - defer l.mu.RUnlock() +// old interface. only used in the ratelimiter package. +func (l *ConcurrencyLimiter) getLimit() uint64 { + l.mu.Lock() + defer l.mu.Unlock() return l.limit } -func (l *concurrencyLimiter) setLimit(limit uint64) { +// old interface. only used in the ratelimiter package. +func (l *ConcurrencyLimiter) setLimit(limit uint64) { l.mu.Lock() defer l.mu.Unlock() l.limit = limit } -func (l *concurrencyLimiter) getCurrent() uint64 { - l.mu.RLock() - defer l.mu.RUnlock() +// GetRunningTasksNum returns the number of running tasks. +func (l *ConcurrencyLimiter) GetRunningTasksNum() uint64 { + l.mu.Lock() + defer l.mu.Unlock() return l.current } -func (l *concurrencyLimiter) getMaxConcurrency() uint64 { +// old interface. only used in the ratelimiter package. +func (l *ConcurrencyLimiter) getMaxConcurrency() uint64 { l.mu.Lock() defer func() { l.maxLimit = l.current @@ -84,3 +98,57 @@ func (l *concurrencyLimiter) getMaxConcurrency() uint64 { return l.maxLimit } + +// GetWaitingTasksNum returns the number of waiting tasks. +func (l *ConcurrencyLimiter) GetWaitingTasksNum() uint64 { + l.mu.Lock() + defer l.mu.Unlock() + return l.waiting +} + +// AcquireToken acquires a token from the limiter. which will block until a token is available or ctx is done, like Timeout. +func (l *ConcurrencyLimiter) AcquireToken(ctx context.Context) (*TaskToken, error) { + l.mu.Lock() + if l.current >= l.limit { + l.waiting++ + l.mu.Unlock() + // block the waiting task on the caller goroutine + select { + case <-ctx.Done(): + l.mu.Lock() + l.waiting-- + l.mu.Unlock() + return nil, ctx.Err() + case token := <-l.queue: + l.mu.Lock() + token.released = false + l.current++ + l.waiting-- + l.mu.Unlock() + return token, nil + } + } + l.current++ + token := &TaskToken{} + l.mu.Unlock() + return token, nil +} + +// ReleaseToken releases the token. +func (l *ConcurrencyLimiter) ReleaseToken(token *TaskToken) { + l.mu.Lock() + defer l.mu.Unlock() + if token.released { + return + } + token.released = true + l.current-- + if len(l.queue) < int(l.limit) { + l.queue <- token + } +} + +// TaskToken is a token that must be released after the task is done. +type TaskToken struct { + released bool +} diff --git a/pkg/ratelimit/concurrency_limiter_test.go b/pkg/ratelimit/concurrency_limiter_test.go index 5fe03740394..f0af1125d21 100644 --- a/pkg/ratelimit/concurrency_limiter_test.go +++ b/pkg/ratelimit/concurrency_limiter_test.go @@ -15,15 +15,19 @@ package ratelimit import ( + "context" + "fmt" + "sync" + "sync/atomic" "testing" + "time" "github.com/stretchr/testify/require" ) func TestConcurrencyLimiter(t *testing.T) { - t.Parallel() re := require.New(t) - cl := newConcurrencyLimiter(10) + cl := NewConcurrencyLimiter(10) for i := 0; i < 10; i++ { re.True(cl.allow()) } @@ -35,9 +39,9 @@ func TestConcurrencyLimiter(t *testing.T) { re.Equal(uint64(10), cl.getMaxConcurrency()) cl.setLimit(5) re.Equal(uint64(5), cl.getLimit()) - re.Equal(uint64(10), cl.getCurrent()) + re.Equal(uint64(10), cl.GetRunningTasksNum()) cl.release() - re.Equal(uint64(9), cl.getCurrent()) + re.Equal(uint64(9), cl.GetRunningTasksNum()) for i := 0; i < 9; i++ { cl.release() } @@ -45,10 +49,79 @@ func TestConcurrencyLimiter(t *testing.T) { for i := 0; i < 5; i++ { re.True(cl.allow()) } - re.Equal(uint64(5), cl.getCurrent()) + re.Equal(uint64(5), cl.GetRunningTasksNum()) for i := 0; i < 5; i++ { cl.release() } re.Equal(uint64(5), cl.getMaxConcurrency()) re.Equal(uint64(0), cl.getMaxConcurrency()) } + +func TestConcurrencyLimiter2(t *testing.T) { + limit := uint64(2) + limiter := NewConcurrencyLimiter(limit) + + require.Equal(t, uint64(0), limiter.GetRunningTasksNum(), "Expected running tasks to be 0") + require.Equal(t, uint64(0), limiter.GetWaitingTasksNum(), "Expected waiting tasks to be 0") + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Acquire two tokens + token1, err := limiter.AcquireToken(ctx) + require.NoError(t, err, "Failed to acquire token") + + token2, err := limiter.AcquireToken(ctx) + require.NoError(t, err, "Failed to acquire token") + + require.Equal(t, limit, limiter.GetRunningTasksNum(), "Expected running tasks to be 2") + + // Try to acquire third token, it should not be able to acquire immediately due to limit + go func() { + _, err := limiter.AcquireToken(ctx) + require.NoError(t, err, "Failed to acquire token") + }() + + time.Sleep(100 * time.Millisecond) // Give some time for the goroutine to run + require.Equal(t, uint64(1), limiter.GetWaitingTasksNum(), "Expected waiting tasks to be 1") + + // Release a token + limiter.ReleaseToken(token1) + time.Sleep(100 * time.Millisecond) // Give some time for the goroutine to run + require.Equal(t, uint64(2), limiter.GetRunningTasksNum(), "Expected running tasks to be 2") + require.Equal(t, uint64(0), limiter.GetWaitingTasksNum(), "Expected waiting tasks to be 0") + + // Release the second token + limiter.ReleaseToken(token2) + time.Sleep(100 * time.Millisecond) // Give some time for the goroutine to run + require.Equal(t, uint64(1), limiter.GetRunningTasksNum(), "Expected running tasks to be 1") +} + +func TestConcurrencyLimiterAcquire(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + limiter := NewConcurrencyLimiter(20) + sum := int64(0) + start := time.Now() + wg := &sync.WaitGroup{} + wg.Add(100) + for i := 0; i < 100; i++ { + go func(i int) { + defer wg.Done() + token, err := limiter.AcquireToken(ctx) + if err != nil { + fmt.Printf("Task %d failed to acquire: %v\n", i, err) + return + } + defer limiter.ReleaseToken(token) + // simulate takes some time + time.Sleep(10 * time.Millisecond) + atomic.AddInt64(&sum, 1) + }(i) + } + wg.Wait() + // We should have 20 tasks running concurrently, so it should take at least 50ms to complete + require.GreaterOrEqual(t, time.Since(start).Milliseconds(), int64(50)) + require.Equal(t, int64(100), sum) +} diff --git a/pkg/ratelimit/controller_test.go b/pkg/ratelimit/controller_test.go index 48a5ee2054b..d4093555ba7 100644 --- a/pkg/ratelimit/controller_test.go +++ b/pkg/ratelimit/controller_test.go @@ -78,7 +78,6 @@ func runMulitLabelLimiter(t *testing.T, limiter *Controller, testCase []labelCas } func TestControllerWithConcurrencyLimiter(t *testing.T) { - t.Parallel() re := require.New(t) limiter := NewController(context.Background(), "grpc", nil) defer limiter.Close() @@ -109,7 +108,7 @@ func TestControllerWithConcurrencyLimiter(t *testing.T) { status := limiter.Update(label, o) re.NotZero(status & ConcurrencyNoChange) }, - checkStatusFunc: func(label string) {}, + checkStatusFunc: func(_ string) {}, }, { opt: UpdateConcurrencyLimiter(5), @@ -191,7 +190,6 @@ func TestControllerWithConcurrencyLimiter(t *testing.T) { } func TestBlockList(t *testing.T) { - t.Parallel() re := require.New(t) opts := []Option{AddLabelAllowList()} limiter := NewController(context.Background(), "grpc", nil) @@ -213,7 +211,6 @@ func TestBlockList(t *testing.T) { } func TestControllerWithQPSLimiter(t *testing.T) { - t.Parallel() re := require.New(t) limiter := NewController(context.Background(), "grpc", nil) defer limiter.Close() @@ -243,7 +240,7 @@ func TestControllerWithQPSLimiter(t *testing.T) { status := limiter.Update(label, o) re.NotZero(status & QPSNoChange) }, - checkStatusFunc: func(label string) {}, + checkStatusFunc: func(_ string) {}, }, { opt: UpdateQPSLimiter(5, 5), @@ -323,7 +320,6 @@ func TestControllerWithQPSLimiter(t *testing.T) { } func TestControllerWithTwoLimiters(t *testing.T) { - t.Parallel() re := require.New(t) limiter := NewController(context.Background(), "grpc", nil) defer limiter.Close() diff --git a/pkg/ratelimit/limiter.go b/pkg/ratelimit/limiter.go index dc744d9ac1b..eaf6acf7c17 100644 --- a/pkg/ratelimit/limiter.go +++ b/pkg/ratelimit/limiter.go @@ -36,18 +36,18 @@ type DimensionConfig struct { type limiter struct { mu syncutil.RWMutex - concurrency *concurrencyLimiter + concurrency *ConcurrencyLimiter rate *RateLimiter } func newLimiter() *limiter { lim := &limiter{ - concurrency: newConcurrencyLimiter(0), + concurrency: NewConcurrencyLimiter(0), } return lim } -func (l *limiter) getConcurrencyLimiter() *concurrencyLimiter { +func (l *limiter) getConcurrencyLimiter() *ConcurrencyLimiter { l.mu.RLock() defer l.mu.RUnlock() return l.concurrency @@ -81,7 +81,7 @@ func (l *limiter) getQPSLimiterStatus() (limit rate.Limit, burst int) { func (l *limiter) getConcurrencyLimiterStatus() (limit uint64, current uint64) { baseLimiter := l.getConcurrencyLimiter() if baseLimiter != nil { - return baseLimiter.getLimit(), baseLimiter.getCurrent() + return baseLimiter.getLimit(), baseLimiter.GetRunningTasksNum() } return 0, 0 } @@ -101,7 +101,7 @@ func (l *limiter) updateConcurrencyConfig(limit uint64) UpdateStatus { } l.concurrency.setLimit(limit) } else { - l.concurrency = newConcurrencyLimiter(limit) + l.concurrency = NewConcurrencyLimiter(limit) } return ConcurrencyChanged } diff --git a/pkg/ratelimit/limiter_test.go b/pkg/ratelimit/limiter_test.go index fabb9d98917..36f339b47ac 100644 --- a/pkg/ratelimit/limiter_test.go +++ b/pkg/ratelimit/limiter_test.go @@ -40,7 +40,6 @@ func (r *releaseUtil) append(d DoneFunc) { } func TestWithConcurrencyLimiter(t *testing.T) { - t.Parallel() re := require.New(t) limiter := newLimiter() @@ -103,7 +102,6 @@ func TestWithConcurrencyLimiter(t *testing.T) { } func TestWithQPSLimiter(t *testing.T) { - t.Parallel() re := require.New(t) limiter := newLimiter() status := limiter.updateQPSConfig(float64(rate.Every(time.Second)), 1) @@ -177,7 +175,6 @@ func TestWithQPSLimiter(t *testing.T) { } func TestWithTwoLimiters(t *testing.T) { - t.Parallel() re := require.New(t) cfg := &DimensionConfig{ QPS: 100, diff --git a/pkg/ratelimit/metrics.go b/pkg/ratelimit/metrics.go new file mode 100644 index 00000000000..c5510e66b26 --- /dev/null +++ b/pkg/ratelimit/metrics.go @@ -0,0 +1,71 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ratelimit + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +const ( + nameStr = "runner_name" + taskStr = "task_type" +) + +var ( + RunnerTaskMaxWaitingDuration = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "pd", + Subsystem: "ratelimit", + Name: "runner_task_max_waiting_duration_seconds", + Help: "The duration of tasks waiting in the runner.", + }, []string{nameStr}) + RunnerPendingTasks = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "pd", + Subsystem: "ratelimit", + Name: "runner_pending_tasks", + Help: "The number of pending tasks in the runner.", + }, []string{nameStr, taskStr}) + RunnerFailedTasks = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "pd", + Subsystem: "ratelimit", + Name: "runner_failed_tasks_total", + Help: "The number of failed tasks in the runner.", + }, []string{nameStr, taskStr}) + RunnerSucceededTasks = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "pd", + Subsystem: "ratelimit", + Name: "runner_success_tasks_total", + Help: "The number of tasks in the runner.", + }, []string{nameStr, taskStr}) + RunnerTaskExecutionDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "pd", + Subsystem: "ratelimit", + Name: "runner_task_execution_duration_seconds", + Help: "Bucketed histogram of processing time (s) of finished tasks.", + Buckets: prometheus.ExponentialBuckets(0.0005, 2, 13), + }, []string{nameStr, taskStr}) +) + +func init() { + prometheus.MustRegister(RunnerTaskMaxWaitingDuration) + prometheus.MustRegister(RunnerPendingTasks) + prometheus.MustRegister(RunnerFailedTasks) + prometheus.MustRegister(RunnerTaskExecutionDuration) + prometheus.MustRegister(RunnerSucceededTasks) +} diff --git a/pkg/ratelimit/ratelimiter_test.go b/pkg/ratelimit/ratelimiter_test.go index 35b355e7b21..f16bb6a83d2 100644 --- a/pkg/ratelimit/ratelimiter_test.go +++ b/pkg/ratelimit/ratelimiter_test.go @@ -22,7 +22,6 @@ import ( ) func TestRateLimiter(t *testing.T) { - t.Parallel() re := require.New(t) limiter := NewRateLimiter(100, 100) diff --git a/pkg/ratelimit/runner.go b/pkg/ratelimit/runner.go new file mode 100644 index 00000000000..2d88e36106e --- /dev/null +++ b/pkg/ratelimit/runner.go @@ -0,0 +1,244 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ratelimit + +import ( + "context" + "errors" + "sync" + "time" + + "github.com/pingcap/log" + "github.com/prometheus/client_golang/prometheus" + "go.uber.org/zap" +) + +// RegionHeartbeatStageName is the name of the stage of the region heartbeat. +const ( + HandleStatsAsync = "HandleStatsAsync" + ObserveRegionStatsAsync = "ObserveRegionStatsAsync" + UpdateSubTree = "UpdateSubTree" + HandleOverlaps = "HandleOverlaps" + CollectRegionStatsAsync = "CollectRegionStatsAsync" + SaveRegionToKV = "SaveRegionToKV" +) + +const ( + initialCapacity = 10000 + maxPendingTaskNum = 20000000 +) + +// Runner is the interface for running tasks. +type Runner interface { + RunTask(id uint64, name string, f func(), opts ...TaskOption) error + Start() + Stop() +} + +// Task is a task to be run. +type Task struct { + id uint64 + submittedAt time.Time + f func() + name string + // retained indicates whether the task should be dropped if the task queue exceeds maxPendingDuration. + retained bool +} + +// ErrMaxWaitingTasksExceeded is returned when the number of waiting tasks exceeds the maximum. +var ErrMaxWaitingTasksExceeded = errors.New("max waiting tasks exceeded") + +type taskID struct { + id uint64 + name string +} + +type ConcurrentRunner struct { + name string + limiter *ConcurrencyLimiter + maxPendingDuration time.Duration + taskChan chan *Task + pendingMu sync.Mutex + stopChan chan struct{} + wg sync.WaitGroup + pendingTaskCount map[string]int + pendingTasks []*Task + existTasks map[taskID]*Task + maxWaitingDuration prometheus.Gauge +} + +// NewConcurrentRunner creates a new ConcurrentRunner. +func NewConcurrentRunner(name string, limiter *ConcurrencyLimiter, maxPendingDuration time.Duration) *ConcurrentRunner { + s := &ConcurrentRunner{ + name: name, + limiter: limiter, + maxPendingDuration: maxPendingDuration, + taskChan: make(chan *Task), + pendingTasks: make([]*Task, 0, initialCapacity), + pendingTaskCount: make(map[string]int), + existTasks: make(map[taskID]*Task), + maxWaitingDuration: RunnerTaskMaxWaitingDuration.WithLabelValues(name), + } + return s +} + +// TaskOption configures TaskOp +type TaskOption func(opts *Task) + +// WithRetained sets whether the task should be retained. +func WithRetained(retained bool) TaskOption { + return func(opts *Task) { opts.retained = retained } +} + +// Start starts the runner. +func (cr *ConcurrentRunner) Start() { + cr.stopChan = make(chan struct{}) + cr.wg.Add(1) + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + go func() { + defer cr.wg.Done() + for { + select { + case task := <-cr.taskChan: + if cr.limiter != nil { + token, err := cr.limiter.AcquireToken(context.Background()) + if err != nil { + continue + } + go cr.run(task, token) + } else { + go cr.run(task, nil) + } + case <-cr.stopChan: + cr.pendingMu.Lock() + cr.pendingTasks = make([]*Task, 0, initialCapacity) + cr.pendingMu.Unlock() + log.Info("stopping async task runner", zap.String("name", cr.name)) + return + case <-ticker.C: + maxDuration := time.Duration(0) + cr.pendingMu.Lock() + if len(cr.pendingTasks) > 0 { + maxDuration = time.Since(cr.pendingTasks[0].submittedAt) + } + for taskName, cnt := range cr.pendingTaskCount { + RunnerPendingTasks.WithLabelValues(cr.name, taskName).Set(float64(cnt)) + } + cr.pendingMu.Unlock() + cr.maxWaitingDuration.Set(maxDuration.Seconds()) + } + } + }() +} + +func (cr *ConcurrentRunner) run(task *Task, token *TaskToken) { + start := time.Now() + task.f() + if token != nil { + cr.limiter.ReleaseToken(token) + cr.processPendingTasks() + } + RunnerTaskExecutionDuration.WithLabelValues(cr.name, task.name).Observe(time.Since(start).Seconds()) + RunnerSucceededTasks.WithLabelValues(cr.name, task.name).Inc() +} + +func (cr *ConcurrentRunner) processPendingTasks() { + cr.pendingMu.Lock() + defer cr.pendingMu.Unlock() + if len(cr.pendingTasks) > 0 { + task := cr.pendingTasks[0] + select { + case cr.taskChan <- task: + cr.pendingTasks = cr.pendingTasks[1:] + cr.pendingTaskCount[task.name]-- + delete(cr.existTasks, taskID{id: task.id, name: task.name}) + default: + } + return + } +} + +// Stop stops the runner. +func (cr *ConcurrentRunner) Stop() { + close(cr.stopChan) + cr.wg.Wait() +} + +// RunTask runs the task asynchronously. +func (cr *ConcurrentRunner) RunTask(id uint64, name string, f func(), opts ...TaskOption) error { + task := &Task{ + id: id, + name: name, + f: f, + submittedAt: time.Now(), + } + for _, opt := range opts { + opt(task) + } + cr.processPendingTasks() + cr.pendingMu.Lock() + defer func() { + cr.pendingMu.Unlock() + cr.processPendingTasks() + }() + + pendingTaskNum := len(cr.pendingTasks) + tid := taskID{task.id, task.name} + if pendingTaskNum > 0 { + // Here we use a map to find the task with the same ID. + // Then replace the old task with the new one. + if t, ok := cr.existTasks[tid]; ok { + t.f = f + t.submittedAt = time.Now() + return nil + } + if !task.retained { + maxWait := time.Since(cr.pendingTasks[0].submittedAt) + if maxWait > cr.maxPendingDuration { + RunnerFailedTasks.WithLabelValues(cr.name, task.name).Inc() + return ErrMaxWaitingTasksExceeded + } + } + if pendingTaskNum > maxPendingTaskNum { + RunnerFailedTasks.WithLabelValues(cr.name, task.name).Inc() + return ErrMaxWaitingTasksExceeded + } + } + cr.pendingTasks = append(cr.pendingTasks, task) + cr.existTasks[tid] = task + cr.pendingTaskCount[task.name]++ + return nil +} + +// SyncRunner is a simple task runner that limits the number of concurrent tasks. +type SyncRunner struct{} + +// NewSyncRunner creates a new SyncRunner. +func NewSyncRunner() *SyncRunner { + return &SyncRunner{} +} + +// RunTask runs the task synchronously. +func (*SyncRunner) RunTask(_ uint64, _ string, f func(), _ ...TaskOption) error { + f() + return nil +} + +// Start starts the runner. +func (*SyncRunner) Start() {} + +// Stop stops the runner. +func (*SyncRunner) Stop() {} diff --git a/pkg/ratelimit/runner_test.go b/pkg/ratelimit/runner_test.go new file mode 100644 index 00000000000..0335a78bcbe --- /dev/null +++ b/pkg/ratelimit/runner_test.go @@ -0,0 +1,101 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ratelimit + +import ( + "sync" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestConcurrentRunner(t *testing.T) { + t.Run("RunTask", func(t *testing.T) { + runner := NewConcurrentRunner("test", NewConcurrencyLimiter(1), time.Second) + runner.Start() + defer runner.Stop() + + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + time.Sleep(50 * time.Millisecond) + wg.Add(1) + err := runner.RunTask( + uint64(i), + "test1", + func() { + defer wg.Done() + time.Sleep(100 * time.Millisecond) + }, + ) + require.NoError(t, err) + } + wg.Wait() + }) + + t.Run("MaxPendingDuration", func(t *testing.T) { + runner := NewConcurrentRunner("test", NewConcurrencyLimiter(1), 2*time.Millisecond) + runner.Start() + defer runner.Stop() + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + err := runner.RunTask( + uint64(i), + "test2", + func() { + defer wg.Done() + time.Sleep(100 * time.Millisecond) + }, + ) + if err != nil { + wg.Done() + // task 0 running + // task 1 after recv by runner, blocked by task 1, wait on Acquire. + // task 2 enqueue pendingTasks + // task 3 enqueue pendingTasks + // task 4 enqueue pendingTasks, check pendingTasks[0] timeout, report error + require.GreaterOrEqual(t, i, 4) + } + time.Sleep(1 * time.Millisecond) + } + wg.Wait() + }) + + t.Run("DuplicatedTask", func(t *testing.T) { + runner := NewConcurrentRunner("test", NewConcurrencyLimiter(1), time.Minute) + runner.Start() + defer runner.Stop() + for i := 1; i < 11; i++ { + regionID := uint64(i) + if i == 10 { + regionID = 4 + } + err := runner.RunTask( + regionID, + "test3", + func() { + time.Sleep(time.Second) + }, + ) + require.NoError(t, err) + time.Sleep(1 * time.Millisecond) + } + + updatedSubmitted := runner.pendingTasks[1].submittedAt + lastSubmitted := runner.pendingTasks[len(runner.pendingTasks)-1].submittedAt + require.Greater(t, updatedSubmitted, lastSubmitted) + }) +} diff --git a/pkg/replication/replication_mode.go b/pkg/replication/replication_mode.go index 9776a36a8f3..5f6b212529b 100644 --- a/pkg/replication/replication_mode.go +++ b/pkg/replication/replication_mode.go @@ -366,7 +366,10 @@ func (m *ModeManager) Run(ctx context.Context) { }() go func() { - defer wg.Done() + defer func() { + wg.Done() + drStateGauge.Set(0) + }() ticker := time.NewTicker(replicateStateInterval) defer ticker.Stop() for { diff --git a/pkg/replication/replication_mode_test.go b/pkg/replication/replication_mode_test.go index 5cf9f1a1450..d19a4f70d66 100644 --- a/pkg/replication/replication_mode_test.go +++ b/pkg/replication/replication_mode_test.go @@ -144,7 +144,7 @@ func (rep *mockFileReplicator) GetMembers() ([]*pdpb.Member, error) { return members, nil } -func (rep *mockFileReplicator) ReplicateFileToMember(ctx context.Context, member *pdpb.Member, name string, data []byte) error { +func (rep *mockFileReplicator) ReplicateFileToMember(_ context.Context, member *pdpb.Member, _ string, data []byte) error { if err := rep.errors[member.GetMemberId()]; err != nil { return err } @@ -260,7 +260,7 @@ func TestStateSwitch(t *testing.T) { rep.tickUpdateState() re.Equal(drStateSync, rep.drGetState()) - // once zone2 down, swith to async state. + // once zone2 down, switch to async state. setStoreState(cluster, "up", "up", "up", "up", "down", "down") rep.tickUpdateState() re.Equal(drStateAsyncWait, rep.drGetState()) diff --git a/pkg/response/region.go b/pkg/response/region.go new file mode 100644 index 00000000000..153294c2861 --- /dev/null +++ b/pkg/response/region.go @@ -0,0 +1,275 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package response + +import ( + "context" + + "github.com/mailru/easyjson/jwriter" + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pingcap/kvproto/pkg/pdpb" + "github.com/pingcap/kvproto/pkg/replication_modepb" + "github.com/tikv/pd/pkg/core" +) + +// MetaPeer is api compatible with *metapb.Peer. +// NOTE: This type is exported by HTTP API. Please pay more attention when modifying it. +type MetaPeer struct { + *metapb.Peer + // RoleName is `Role.String()`. + // Since Role is serialized as int by json by default, + // introducing it will make the output of pd-ctl easier to identify Role. + RoleName string `json:"role_name"` + // IsLearner is `Role == "Learner"`. + // Since IsLearner was changed to Role in kvproto in 5.0, this field was introduced to ensure api compatibility. + IsLearner bool `json:"is_learner,omitempty"` +} + +func (m *MetaPeer) setDefaultIfNil() { + if m.Peer == nil { + m.Peer = &metapb.Peer{ + Id: m.GetId(), + StoreId: m.GetStoreId(), + Role: m.GetRole(), + IsWitness: m.GetIsWitness(), + } + } +} + +// PDPeerStats is api compatible with *pdpb.PeerStats. +// NOTE: This type is exported by HTTP API. Please pay more attention when modifying it. +type PDPeerStats struct { + *pdpb.PeerStats + Peer MetaPeer `json:"peer"` +} + +func (s *PDPeerStats) setDefaultIfNil() { + if s.PeerStats == nil { + s.PeerStats = &pdpb.PeerStats{ + Peer: s.GetPeer(), + DownSeconds: s.GetDownSeconds(), + } + } + s.Peer.setDefaultIfNil() +} + +func fromPeer(peer *metapb.Peer) MetaPeer { + if peer == nil { + return MetaPeer{} + } + return MetaPeer{ + Peer: peer, + RoleName: peer.GetRole().String(), + IsLearner: core.IsLearner(peer), + } +} + +func fromPeerSlice(peers []*metapb.Peer) []MetaPeer { + if peers == nil { + return nil + } + slice := make([]MetaPeer, len(peers)) + for i, peer := range peers { + slice[i] = fromPeer(peer) + } + return slice +} + +func fromPeerStats(peer *pdpb.PeerStats) PDPeerStats { + return PDPeerStats{ + PeerStats: peer, + Peer: fromPeer(peer.Peer), + } +} + +func fromPeerStatsSlice(peers []*pdpb.PeerStats) []PDPeerStats { + if peers == nil { + return nil + } + slice := make([]PDPeerStats, len(peers)) + for i, peer := range peers { + slice[i] = fromPeerStats(peer) + } + return slice +} + +// RegionInfo records detail region info for api usage. +// NOTE: This type is exported by HTTP API. Please pay more attention when modifying it. +// easyjson:json +type RegionInfo struct { + ID uint64 `json:"id"` + StartKey string `json:"start_key"` + EndKey string `json:"end_key"` + RegionEpoch *metapb.RegionEpoch `json:"epoch,omitempty"` + Peers []MetaPeer `json:"peers,omitempty"` + + Leader MetaPeer `json:"leader,omitempty"` + DownPeers []PDPeerStats `json:"down_peers,omitempty"` + PendingPeers []MetaPeer `json:"pending_peers,omitempty"` + CPUUsage uint64 `json:"cpu_usage"` + WrittenBytes uint64 `json:"written_bytes"` + ReadBytes uint64 `json:"read_bytes"` + WrittenKeys uint64 `json:"written_keys"` + ReadKeys uint64 `json:"read_keys"` + ApproximateSize int64 `json:"approximate_size"` + ApproximateKeys int64 `json:"approximate_keys"` + Buckets []string `json:"buckets,omitempty"` + + ReplicationStatus *ReplicationStatus `json:"replication_status,omitempty"` +} + +// ReplicationStatus represents the replication mode status of the region. +// NOTE: This type is exported by HTTP API. Please pay more attention when modifying it. +type ReplicationStatus struct { + State string `json:"state"` + StateID uint64 `json:"state_id"` +} + +func fromPBReplicationStatus(s *replication_modepb.RegionReplicationStatus) *ReplicationStatus { + if s == nil { + return nil + } + return &ReplicationStatus{ + State: s.GetState().String(), + StateID: s.GetStateId(), + } +} + +// NewAPIRegionInfo create a new API RegionInfo. +func NewAPIRegionInfo(r *core.RegionInfo) *RegionInfo { + return InitRegion(r, &RegionInfo{}) +} + +// InitRegion init a new API RegionInfo from the core.RegionInfo. +func InitRegion(r *core.RegionInfo, s *RegionInfo) *RegionInfo { + if r == nil { + return nil + } + + s.ID = r.GetID() + s.StartKey = core.HexRegionKeyStr(r.GetStartKey()) + s.EndKey = core.HexRegionKeyStr(r.GetEndKey()) + s.RegionEpoch = r.GetRegionEpoch() + s.Peers = fromPeerSlice(r.GetPeers()) + s.Leader = fromPeer(r.GetLeader()) + s.DownPeers = fromPeerStatsSlice(r.GetDownPeers()) + s.PendingPeers = fromPeerSlice(r.GetPendingPeers()) + s.CPUUsage = r.GetCPUUsage() + s.WrittenBytes = r.GetBytesWritten() + s.WrittenKeys = r.GetKeysWritten() + s.ReadBytes = r.GetBytesRead() + s.ReadKeys = r.GetKeysRead() + s.ApproximateSize = r.GetApproximateSize() + s.ApproximateKeys = r.GetApproximateKeys() + s.ReplicationStatus = fromPBReplicationStatus(r.GetReplicationStatus()) + s.Buckets = nil + + keys := r.GetBuckets().GetKeys() + if len(keys) > 0 { + s.Buckets = make([]string, len(keys)) + for i, key := range keys { + s.Buckets[i] = core.HexRegionKeyStr(key) + } + } + return s +} + +// Adjust is only used in testing, in order to compare the data from json deserialization. +func (r *RegionInfo) Adjust() { + for _, peer := range r.DownPeers { + // Since api.PDPeerStats uses the api.MetaPeer type variable Peer to overwrite PeerStats.Peer, + // it needs to be restored after deserialization to be completely consistent with the original. + peer.PeerStats.Peer = peer.Peer.Peer + } +} + +// RegionsInfo contains some regions with the detailed region info. +type RegionsInfo struct { + Count int `json:"count"` + Regions []RegionInfo `json:"regions"` +} + +// Adjust is only used in testing, in order to compare the data from json deserialization. +func (s *RegionsInfo) Adjust() { + for _, r := range s.Regions { + r.Adjust() + } +} + +// MarshalRegionInfoJSON marshals region to bytes in `RegionInfo`'s JSON format. +// It is used to reduce the cost of JSON serialization. +func MarshalRegionInfoJSON(ctx context.Context, r *core.RegionInfo) ([]byte, error) { + out := &jwriter.Writer{} + + region := &RegionInfo{} + select { + case <-ctx.Done(): + // Return early, avoid the unnecessary computation. + // See more details in https://github.com/tikv/pd/issues/6835 + return nil, ctx.Err() + default: + } + + covertAPIRegionInfo(r, region, out) + return out.Buffer.BuildBytes(), out.Error +} + +// MarshalRegionsInfoJSON marshals regions to bytes in `RegionsInfo`'s JSON format. +// It is used to reduce the cost of JSON serialization. +func MarshalRegionsInfoJSON(ctx context.Context, regions []*core.RegionInfo) ([]byte, error) { + out := &jwriter.Writer{} + out.RawByte('{') + + out.RawString("\"count\":") + out.Int(len(regions)) + + out.RawString(",\"regions\":") + out.RawByte('[') + region := &RegionInfo{} + for i, r := range regions { + select { + case <-ctx.Done(): + // Return early, avoid the unnecessary computation. + // See more details in https://github.com/tikv/pd/issues/6835 + return nil, ctx.Err() + default: + } + if i > 0 { + out.RawByte(',') + } + covertAPIRegionInfo(r, region, out) + } + out.RawByte(']') + + out.RawByte('}') + return out.Buffer.BuildBytes(), out.Error +} + +func covertAPIRegionInfo(r *core.RegionInfo, region *RegionInfo, out *jwriter.Writer) { + InitRegion(r, region) + // EasyJSON will not check anonymous struct pointer field and will panic if the field is nil. + // So we need to set the field to default value explicitly when the anonymous struct pointer is nil. + region.Leader.setDefaultIfNil() + for i := range region.Peers { + region.Peers[i].setDefaultIfNil() + } + for i := range region.PendingPeers { + region.PendingPeers[i].setDefaultIfNil() + } + for i := range region.DownPeers { + region.DownPeers[i].setDefaultIfNil() + } + region.MarshalEasyJSON(out) +} diff --git a/server/api/region_easyjson.go b/pkg/response/region_easyjson.go similarity index 99% rename from server/api/region_easyjson.go rename to pkg/response/region_easyjson.go index 4bd9fe69e42..33598360235 100644 --- a/server/api/region_easyjson.go +++ b/pkg/response/region_easyjson.go @@ -1,6 +1,6 @@ // Code generated by easyjson for marshaling/unmarshaling. DO NOT EDIT. -package api +package response import ( json "encoding/json" diff --git a/pkg/response/region_test.go b/pkg/response/region_test.go new file mode 100644 index 00000000000..de6daa2c2fe --- /dev/null +++ b/pkg/response/region_test.go @@ -0,0 +1,70 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package response + +import ( + "encoding/json" + "testing" + + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pingcap/kvproto/pkg/pdpb" + "github.com/stretchr/testify/require" +) + +func TestPeer(t *testing.T) { + re := require.New(t) + peers := []*metapb.Peer{ + {Id: 1, StoreId: 10, Role: metapb.PeerRole_Voter}, + {Id: 2, StoreId: 20, Role: metapb.PeerRole_Learner}, + {Id: 3, StoreId: 30, Role: metapb.PeerRole_IncomingVoter}, + {Id: 4, StoreId: 40, Role: metapb.PeerRole_DemotingVoter}, + } + // float64 is the default numeric type for JSON + expected := []map[string]any{ + {"id": float64(1), "store_id": float64(10), "role_name": "Voter"}, + {"id": float64(2), "store_id": float64(20), "role": float64(1), "role_name": "Learner", "is_learner": true}, + {"id": float64(3), "store_id": float64(30), "role": float64(2), "role_name": "IncomingVoter"}, + {"id": float64(4), "store_id": float64(40), "role": float64(3), "role_name": "DemotingVoter"}, + } + + data, err := json.Marshal(fromPeerSlice(peers)) + re.NoError(err) + var ret []map[string]any + re.NoError(json.Unmarshal(data, &ret)) + re.Equal(expected, ret) +} + +func TestPeerStats(t *testing.T) { + re := require.New(t) + peers := []*pdpb.PeerStats{ + {Peer: &metapb.Peer{Id: 1, StoreId: 10, Role: metapb.PeerRole_Voter}, DownSeconds: 0}, + {Peer: &metapb.Peer{Id: 2, StoreId: 20, Role: metapb.PeerRole_Learner}, DownSeconds: 1}, + {Peer: &metapb.Peer{Id: 3, StoreId: 30, Role: metapb.PeerRole_IncomingVoter}, DownSeconds: 2}, + {Peer: &metapb.Peer{Id: 4, StoreId: 40, Role: metapb.PeerRole_DemotingVoter}, DownSeconds: 3}, + } + // float64 is the default numeric type for JSON + expected := []map[string]any{ + {"peer": map[string]any{"id": float64(1), "store_id": float64(10), "role_name": "Voter"}}, + {"peer": map[string]any{"id": float64(2), "store_id": float64(20), "role": float64(1), "role_name": "Learner", "is_learner": true}, "down_seconds": float64(1)}, + {"peer": map[string]any{"id": float64(3), "store_id": float64(30), "role": float64(2), "role_name": "IncomingVoter"}, "down_seconds": float64(2)}, + {"peer": map[string]any{"id": float64(4), "store_id": float64(40), "role": float64(3), "role_name": "DemotingVoter"}, "down_seconds": float64(3)}, + } + + data, err := json.Marshal(fromPeerStatsSlice(peers)) + re.NoError(err) + var ret []map[string]any + re.NoError(json.Unmarshal(data, &ret)) + re.Equal(expected, ret) +} diff --git a/pkg/response/store.go b/pkg/response/store.go new file mode 100644 index 00000000000..8bff1e75e42 --- /dev/null +++ b/pkg/response/store.go @@ -0,0 +1,152 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package response + +import ( + "time" + + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/core/constant" + sc "github.com/tikv/pd/pkg/schedule/config" + "github.com/tikv/pd/pkg/utils/typeutil" +) + +// MetaStore contains meta information about a store. +type MetaStore struct { + *metapb.Store + StateName string `json:"state_name"` +} + +// SlowTrend contains slow trend information about a store. +type SlowTrend struct { + // CauseValue is the slow trend detecting raw input, it changes by the performance and pressure along time of the store. + // The value itself is not important, what matter is: + // - The comparison result from store to store. + // - The change magnitude along time (represented by CauseRate). + // Currently, it's one of store's internal latency (duration of waiting in the task queue of raftstore.store). + CauseValue float64 `json:"cause_value"` + // CauseRate is for measuring the change magnitude of CauseValue of the store, + // - CauseRate > 0 means the store is become slower currently + // - CauseRate < 0 means the store is become faster currently + // - CauseRate == 0 means the store's performance and pressure does not have significant changes + CauseRate float64 `json:"cause_rate"` + // ResultValue is the current gRPC QPS of the store. + ResultValue float64 `json:"result_value"` + // ResultRate is for measuring the change magnitude of ResultValue of the store. + ResultRate float64 `json:"result_rate"` +} + +// StoreStatus contains status about a store. +type StoreStatus struct { + Capacity typeutil.ByteSize `json:"capacity"` + Available typeutil.ByteSize `json:"available"` + UsedSize typeutil.ByteSize `json:"used_size"` + LeaderCount int `json:"leader_count"` + LeaderWeight float64 `json:"leader_weight"` + LeaderScore float64 `json:"leader_score"` + LeaderSize int64 `json:"leader_size"` + RegionCount int `json:"region_count"` + RegionWeight float64 `json:"region_weight"` + RegionScore float64 `json:"region_score"` + RegionSize int64 `json:"region_size"` + LearnerCount int `json:"learner_count,omitempty"` + WitnessCount int `json:"witness_count,omitempty"` + PendingPeerCount int `json:"pending_peer_count,omitempty"` + SlowScore uint64 `json:"slow_score,omitempty"` + SlowTrend *SlowTrend `json:"slow_trend,omitempty"` + SendingSnapCount uint32 `json:"sending_snap_count,omitempty"` + ReceivingSnapCount uint32 `json:"receiving_snap_count,omitempty"` + IsBusy bool `json:"is_busy,omitempty"` + StartTS *time.Time `json:"start_ts,omitempty"` + LastHeartbeatTS *time.Time `json:"last_heartbeat_ts,omitempty"` + Uptime *typeutil.Duration `json:"uptime,omitempty"` +} + +// StoreInfo contains information about a store. +type StoreInfo struct { + Store *MetaStore `json:"store"` + Status *StoreStatus `json:"status"` +} + +const ( + // DisconnectedName is the name when store is disconnected. + DisconnectedName = "Disconnected" + // DownStateName is the name when store is down. + DownStateName = "Down" +) + +// BuildStoreInfo builds a storeInfo response. +func BuildStoreInfo(opt *sc.ScheduleConfig, store *core.StoreInfo) *StoreInfo { + var slowTrend *SlowTrend + coreSlowTrend := store.GetSlowTrend() + if coreSlowTrend != nil { + slowTrend = &SlowTrend{coreSlowTrend.CauseValue, coreSlowTrend.CauseRate, coreSlowTrend.ResultValue, coreSlowTrend.ResultRate} + } + s := &StoreInfo{ + Store: &MetaStore{ + Store: store.GetMeta(), + StateName: store.GetState().String(), + }, + Status: &StoreStatus{ + Capacity: typeutil.ByteSize(store.GetCapacity()), + Available: typeutil.ByteSize(store.GetAvailable()), + UsedSize: typeutil.ByteSize(store.GetUsedSize()), + LeaderCount: store.GetLeaderCount(), + LeaderWeight: store.GetLeaderWeight(), + LeaderScore: store.LeaderScore(constant.StringToSchedulePolicy(opt.LeaderSchedulePolicy), 0), + LeaderSize: store.GetLeaderSize(), + RegionCount: store.GetRegionCount(), + RegionWeight: store.GetRegionWeight(), + RegionScore: store.RegionScore(opt.RegionScoreFormulaVersion, opt.HighSpaceRatio, opt.LowSpaceRatio, 0), + RegionSize: store.GetRegionSize(), + LearnerCount: store.GetLearnerCount(), + WitnessCount: store.GetWitnessCount(), + SlowScore: store.GetSlowScore(), + SlowTrend: slowTrend, + SendingSnapCount: store.GetSendingSnapCount(), + ReceivingSnapCount: store.GetReceivingSnapCount(), + PendingPeerCount: store.GetPendingPeerCount(), + IsBusy: store.IsBusy(), + }, + } + + if store.GetStoreStats() != nil { + startTS := store.GetStartTime() + s.Status.StartTS = &startTS + } + if lastHeartbeat := store.GetLastHeartbeatTS(); !lastHeartbeat.IsZero() { + s.Status.LastHeartbeatTS = &lastHeartbeat + } + if upTime := store.GetUptime(); upTime > 0 { + duration := typeutil.NewDuration(upTime) + s.Status.Uptime = &duration + } + + if store.GetState() == metapb.StoreState_Up { + if store.DownTime() > opt.MaxStoreDownTime.Duration { + s.Store.StateName = DownStateName + } else if store.IsDisconnected() { + s.Store.StateName = DisconnectedName + } + } + return s +} + +// StoresInfo records stores' info. +type StoresInfo struct { + Count int `json:"count"` + Stores []*StoreInfo `json:"stores"` +} diff --git a/pkg/schedule/checker/checker_controller.go b/pkg/schedule/checker/checker_controller.go index 355226cd2d8..cdc826a1dda 100644 --- a/pkg/schedule/checker/checker_controller.go +++ b/pkg/schedule/checker/checker_controller.go @@ -31,7 +31,7 @@ import ( ) // DefaultCacheSize is the default length of waiting list. -const DefaultCacheSize = 1000 +const DefaultCacheSize = 100000 var denyCheckersByLabelerCounter = labeler.LabelerEventCounter.WithLabelValues("checkers", "deny") diff --git a/pkg/schedule/checker/merge_checker.go b/pkg/schedule/checker/merge_checker.go index 1ce7bddd1dc..821c21cc119 100644 --- a/pkg/schedule/checker/merge_checker.go +++ b/pkg/schedule/checker/merge_checker.go @@ -94,7 +94,7 @@ func NewMergeChecker(ctx context.Context, cluster sche.CheckerCluster, conf conf } // GetType return MergeChecker's type -func (m *MergeChecker) GetType() string { +func (*MergeChecker) GetType() string { return "merge-checker" } diff --git a/pkg/schedule/checker/merge_checker_test.go b/pkg/schedule/checker/merge_checker_test.go index 40466d33947..06e8d468de3 100644 --- a/pkg/schedule/checker/merge_checker_test.go +++ b/pkg/schedule/checker/merge_checker_test.go @@ -544,10 +544,10 @@ func (suite *mergeCheckerTestSuite) TestCache() { re.NotNil(ops) } -func makeKeyRanges(keys ...string) []interface{} { - var res []interface{} +func makeKeyRanges(keys ...string) []any { + var res []any for i := 0; i < len(keys); i += 2 { - res = append(res, map[string]interface{}{"start_key": keys[i], "end_key": keys[i+1]}) + res = append(res, map[string]any{"start_key": keys[i], "end_key": keys[i+1]}) } return res } diff --git a/pkg/schedule/checker/replica_checker.go b/pkg/schedule/checker/replica_checker.go index 3e23f3bdcac..6324fd2ca10 100644 --- a/pkg/schedule/checker/replica_checker.go +++ b/pkg/schedule/checker/replica_checker.go @@ -76,7 +76,7 @@ func NewReplicaChecker(cluster sche.CheckerCluster, conf config.CheckerConfigPro } // GetType return ReplicaChecker's type -func (r *ReplicaChecker) GetType() string { +func (*ReplicaChecker) GetType() string { return replicaCheckerName } diff --git a/pkg/schedule/checker/replica_strategy.go b/pkg/schedule/checker/replica_strategy.go index fdf05a0c479..e234189fe96 100644 --- a/pkg/schedule/checker/replica_strategy.go +++ b/pkg/schedule/checker/replica_strategy.go @@ -97,8 +97,13 @@ func (s *ReplicaStrategy) SelectStoreToFix(coLocationStores []*core.StoreInfo, o return 0, false } // trick to avoid creating a slice with `old` removed. - s.swapStoreToFirst(coLocationStores, old) - return s.SelectStoreToAdd(coLocationStores[1:]) + swapStoreToFirst(coLocationStores, old) + // If the coLocationStores only has one store, no need to remove. + // Otherwise, the other stores will be filtered. + if len(coLocationStores) > 1 { + coLocationStores = coLocationStores[1:] + } + return s.SelectStoreToAdd(coLocationStores) } // SelectStoreToImprove returns a store to replace oldStore. The location @@ -108,7 +113,7 @@ func (s *ReplicaStrategy) SelectStoreToImprove(coLocationStores []*core.StoreInf return 0, false } // trick to avoid creating a slice with `old` removed. - s.swapStoreToFirst(coLocationStores, old) + swapStoreToFirst(coLocationStores, old) oldStore := s.cluster.GetStore(old) if oldStore == nil { return 0, false @@ -122,7 +127,7 @@ func (s *ReplicaStrategy) SelectStoreToImprove(coLocationStores []*core.StoreInf return s.SelectStoreToAdd(coLocationStores[1:], filters...) } -func (s *ReplicaStrategy) swapStoreToFirst(stores []*core.StoreInfo, id uint64) { +func swapStoreToFirst(stores []*core.StoreInfo, id uint64) { for i, s := range stores { if s.GetID() == id { stores[0], stores[i] = stores[i], stores[0] diff --git a/pkg/schedule/checker/rule_checker.go b/pkg/schedule/checker/rule_checker.go index 464f5e97be8..66b958911b1 100644 --- a/pkg/schedule/checker/rule_checker.go +++ b/pkg/schedule/checker/rule_checker.go @@ -107,7 +107,7 @@ func NewRuleChecker(ctx context.Context, cluster sche.CheckerCluster, ruleManage } // GetType returns RuleChecker's Type -func (c *RuleChecker) GetType() string { +func (*RuleChecker) GetType() string { return ruleCheckerName } @@ -347,7 +347,7 @@ func (c *RuleChecker) fixLooseMatchPeer(region *core.RegionInfo, fit *placement. if region.GetLeader().GetId() != peer.GetId() && rf.Rule.Role == placement.Leader { ruleCheckerFixLeaderRoleCounter.Inc() if c.allowLeader(fit, peer) { - return operator.CreateTransferLeaderOperator("fix-leader-role", c.cluster, region, region.GetLeader().GetStoreId(), peer.GetStoreId(), []uint64{}, 0) + return operator.CreateTransferLeaderOperator("fix-leader-role", c.cluster, region, peer.GetStoreId(), []uint64{}, 0) } ruleCheckerNotAllowLeaderCounter.Inc() return nil, errPeerCannotBeLeader @@ -356,7 +356,7 @@ func (c *RuleChecker) fixLooseMatchPeer(region *core.RegionInfo, fit *placement. ruleCheckerFixFollowerRoleCounter.Inc() for _, p := range region.GetPeers() { if c.allowLeader(fit, p) { - return operator.CreateTransferLeaderOperator("fix-follower-role", c.cluster, region, peer.GetStoreId(), p.GetStoreId(), []uint64{}, 0) + return operator.CreateTransferLeaderOperator("fix-follower-role", c.cluster, region, p.GetStoreId(), []uint64{}, 0) } } ruleCheckerNoNewLeaderCounter.Inc() diff --git a/pkg/schedule/checker/rule_checker_test.go b/pkg/schedule/checker/rule_checker_test.go index 2668ac8cc43..f99208a988b 100644 --- a/pkg/schedule/checker/rule_checker_test.go +++ b/pkg/schedule/checker/rule_checker_test.go @@ -1018,20 +1018,20 @@ func (suite *ruleCheckerTestSuite) TestFixOrphanPeerWithDisconnectedStoreAndRule op = suite.rc.Check(suite.cluster.GetRegion(1)) re.NotNil(op) re.Contains(op.Desc(), "orphan") - var removedPeerStroeID uint64 + var removedPeerStoreID uint64 newLeaderStoreID := r1.GetLeader().GetStoreId() for i := 0; i < op.Len(); i++ { if s, ok := op.Step(i).(operator.RemovePeer); ok { - removedPeerStroeID = s.FromStore + removedPeerStoreID = s.FromStore } if s, ok := op.Step(i).(operator.TransferLeader); ok { newLeaderStoreID = s.ToStore } } - re.NotZero(removedPeerStroeID) + re.NotZero(removedPeerStoreID) r1 = r1.Clone( core.WithLeader(r1.GetStorePeer(newLeaderStoreID)), - core.WithRemoveStorePeer(removedPeerStroeID)) + core.WithRemoveStorePeer(removedPeerStoreID)) suite.cluster.PutRegion(r1) r1 = suite.cluster.GetRegion(1) re.Len(r1.GetPeers(), 6-j) @@ -1571,7 +1571,7 @@ func (suite *ruleCheckerTestSuite) TestFixOfflinePeer() { re.Nil(suite.rc.Check(region)) } -func (suite *ruleCheckerTestSuite) TestFixOfflinePeerWithAvaliableWitness() { +func (suite *ruleCheckerTestSuite) TestFixOfflinePeerWithAvailableWitness() { re := suite.Require() suite.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1"}) suite.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z1"}) @@ -1980,7 +1980,7 @@ func makeStores() placement.StoreSet { if zone == 1 && host == 1 { labels["type"] = "read" } - stores.SetStore(core.NewStoreInfoWithLabel(id, labels).Clone(core.SetLastHeartbeatTS(now), core.SetStoreState(metapb.StoreState_Up))) + stores.PutStore(core.NewStoreInfoWithLabel(id, labels).Clone(core.SetLastHeartbeatTS(now), core.SetStoreState(metapb.StoreState_Up))) } } } @@ -2053,7 +2053,7 @@ func (suite *ruleCheckerTestAdvancedSuite) TestReplaceAnExistingPeerCases() { {"111_learner,211_learner,311_learner,151_leader,252,351", []string{"3/voter//", "3/learner/type=read/"}, ""}, } groupName := "a_test" - for i, cas := range testCases { + for _, cas := range testCases { bundle := placement.GroupBundle{ ID: groupName, Index: 1000, @@ -2071,7 +2071,7 @@ func (suite *ruleCheckerTestAdvancedSuite) TestReplaceAnExistingPeerCases() { suite.cluster.PutRegion(region) op := suite.rc.Check(region) if len(cas.opStr) > 0 { - re.Contains(op.String(), cas.opStr, i, cas.opStr) + re.Contains(op.String(), cas.opStr, cas.opStr) } suite.ruleManager.DeleteGroupBundle(groupName, false) } @@ -2112,3 +2112,63 @@ func (suite *ruleCheckerTestSuite) TestRemoveOrphanPeer() { suite.NotNil(op) suite.Equal("remove-orphan-peer", op.Desc()) } + +func (suite *ruleCheckerTestSuite) TestIssue7808() { + re := suite.Require() + suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1", "disk_type": "mix"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host2", "disk_type": "mix"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host3", "disk_type": "ssd"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4", "disk_type": "ssd"}) + suite.cluster.AddLabelsStore(5, 1, map[string]string{"host": "host5", "disk_type": "ssd"}) + suite.cluster.AddLeaderRegionWithRange(1, "", "", 3, 4, 1) + err := suite.ruleManager.SetRules([]*placement.Rule{ + { + GroupID: "pd", + ID: "1", + Role: placement.Voter, + Count: 2, + LabelConstraints: []placement.LabelConstraint{ + { + Key: "disk_type", + Values: []string{ + "ssd", + }, + Op: placement.In, + }, + }, + LocationLabels: []string{"host"}, + IsolationLevel: "host", + }, + { + GroupID: "pd", + ID: "2", + Role: placement.Follower, + Count: 1, + LabelConstraints: []placement.LabelConstraint{ + { + Key: "disk_type", + Values: []string{ + "mix", + }, + Op: placement.In, + }, + }, + LocationLabels: []string{"host"}, + IsolationLevel: "host", + }, + }) + re.NoError(err) + err = suite.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) + re.NoError(err) + suite.cluster.SetStoreDown(1) + region := suite.cluster.GetRegion(1) + downPeer := []*pdpb.PeerStats{ + {Peer: region.GetStorePeer(1), DownSeconds: 6000}, + } + region = region.Clone(core.WithDownPeers(downPeer)) + suite.cluster.PutRegion(region) + op := suite.rc.Check(suite.cluster.GetRegion(1)) + re.NotNil(op) + re.Equal("fast-replace-rule-down-peer", op.Desc()) + re.Contains(op.Brief(), "mv peer: store [1] to [2]") +} diff --git a/pkg/schedule/checker/split_checker.go b/pkg/schedule/checker/split_checker.go index 072bdcf7a2e..3a34eee8c90 100644 --- a/pkg/schedule/checker/split_checker.go +++ b/pkg/schedule/checker/split_checker.go @@ -51,7 +51,7 @@ func NewSplitChecker(cluster sche.CheckerCluster, ruleManager *placement.RuleMan } // GetType returns the checker type. -func (c *SplitChecker) GetType() string { +func (*SplitChecker) GetType() string { return "split-checker" } diff --git a/pkg/schedule/config/OWNERS b/pkg/schedule/config/OWNERS new file mode 100644 index 00000000000..ce5d15ddc19 --- /dev/null +++ b/pkg/schedule/config/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs at https://go.k8s.io/owners +options: + no_parent_owners: true +filters: + "(OWNERS|(config|store_config)\\.go)$": + approvers: + - sig-critical-approvers-config diff --git a/pkg/schedule/config/config.go b/pkg/schedule/config/config.go index 90a37c93d91..5a67a547483 100644 --- a/pkg/schedule/config/config.go +++ b/pkg/schedule/config/config.go @@ -49,14 +49,16 @@ const ( defaultSlowStoreEvictingAffectedStoreRatioThreshold = 0.3 defaultMaxMovableHotPeerSize = int64(512) - defaultEnableJointConsensus = true - defaultEnableTiKVSplitRegion = true - defaultEnableCrossTableMerge = true - defaultEnableDiagnostic = true - defaultStrictlyMatchLabel = false - defaultEnablePlacementRules = true - defaultEnableWitness = false - defaultHaltScheduling = false + defaultEnableJointConsensus = true + defaultEnableTiKVSplitRegion = true + defaultEnableHeartbeatBreakdownMetrics = true + defaultEnableHeartbeatConcurrentRunner = true + defaultEnableCrossTableMerge = true + defaultEnableDiagnostic = true + defaultStrictlyMatchLabel = false + defaultEnablePlacementRules = true + defaultEnableWitness = false + defaultHaltScheduling = false defaultRegionScoreFormulaVersion = "v2" defaultLeaderSchedulePolicy = "count" @@ -263,11 +265,17 @@ type ScheduleConfig struct { // on ebs-based BR we need to disable it with TTL EnableTiKVSplitRegion bool `toml:"enable-tikv-split-region" json:"enable-tikv-split-region,string"` + // EnableHeartbeatBreakdownMetrics is the option to enable heartbeat stats metrics. + EnableHeartbeatBreakdownMetrics bool `toml:"enable-heartbeat-breakdown-metrics" json:"enable-heartbeat-breakdown-metrics,string"` + + // EnableHeartbeatConcurrentRunner is the option to enable heartbeat concurrent runner. + EnableHeartbeatConcurrentRunner bool `toml:"enable-heartbeat-concurrent-runner" json:"enable-heartbeat-concurrent-runner,string"` + // Schedulers support for loading customized schedulers Schedulers SchedulerConfigs `toml:"schedulers" json:"schedulers-v2"` // json v2 is for the sake of compatible upgrade // Only used to display - SchedulersPayload map[string]interface{} `toml:"schedulers-payload" json:"schedulers-payload"` + SchedulersPayload map[string]any `toml:"schedulers-payload" json:"schedulers-payload"` // Controls the time interval between write hot regions info into leveldb. HotRegionsWriteInterval typeutil.Duration `toml:"hot-regions-write-interval" json:"hot-regions-write-interval"` @@ -373,6 +381,15 @@ func (c *ScheduleConfig) Adjust(meta *configutil.ConfigMetaData, reloading bool) if !meta.IsDefined("enable-tikv-split-region") { c.EnableTiKVSplitRegion = defaultEnableTiKVSplitRegion } + + if !meta.IsDefined("enable-heartbeat-breakdown-metrics") { + c.EnableHeartbeatBreakdownMetrics = defaultEnableHeartbeatBreakdownMetrics + } + + if !meta.IsDefined("enable-heartbeat-concurrent-runner") { + c.EnableHeartbeatConcurrentRunner = defaultEnableHeartbeatConcurrentRunner + } + if !meta.IsDefined("enable-cross-table-merge") { c.EnableCrossTableMerge = defaultEnableCrossTableMerge } @@ -398,7 +415,7 @@ func (c *ScheduleConfig) Adjust(meta *configutil.ConfigMetaData, reloading bool) adjustSchedulers(&c.Schedulers, DefaultSchedulers) for k, b := range c.migrateConfigurationMap() { - v, err := c.parseDeprecatedFlag(meta, k, *b[0], *b[1]) + v, err := parseDeprecatedFlag(meta, k, *b[0], *b[1]) if err != nil { return err } @@ -447,7 +464,7 @@ func (c *ScheduleConfig) GetMaxMergeRegionKeys() uint64 { return c.MaxMergeRegionSize * 10000 } -func (c *ScheduleConfig) parseDeprecatedFlag(meta *configutil.ConfigMetaData, name string, old, new bool) (bool, error) { +func parseDeprecatedFlag(meta *configutil.ConfigMetaData, name string, old, new bool) (bool, error) { oldName, newName := "disable-"+name, "enable-"+name defineOld, defineNew := meta.IsDefined(oldName), meta.IsDefined(newName) switch { @@ -553,13 +570,11 @@ type SchedulerConfig struct { var DefaultSchedulers = SchedulerConfigs{ {Type: "balance-region"}, {Type: "balance-leader"}, - {Type: "balance-witness"}, {Type: "hot-region"}, - {Type: "transfer-witness-leader"}, {Type: "evict-slow-store"}, } -// IsDefaultScheduler checks whether the scheduler is enable by default. +// IsDefaultScheduler checks whether the scheduler is enabled by default. func IsDefaultScheduler(typ string) bool { for _, c := range DefaultSchedulers { if typ == c.Type { diff --git a/pkg/schedule/config/config_provider.go b/pkg/schedule/config/config_provider.go index 20c7f0dc2cf..90e489f86f3 100644 --- a/pkg/schedule/config/config_provider.go +++ b/pkg/schedule/config/config_provider.go @@ -46,7 +46,7 @@ func IsSchedulerRegistered(name string) bool { type SchedulerConfigProvider interface { SharedConfigProvider - IsSchedulingHalted() bool + SetSchedulingAllowanceStatus(bool, string) GetStoresLimit() map[uint64]StoreLimitConfig IsSchedulerDisabled(string) bool diff --git a/pkg/schedule/coordinator.go b/pkg/schedule/coordinator.go index c7c77cabf3f..fb22303f0b7 100644 --- a/pkg/schedule/coordinator.go +++ b/pkg/schedule/coordinator.go @@ -52,7 +52,8 @@ const ( // pushOperatorTickInterval is the interval try to push the operator. pushOperatorTickInterval = 500 * time.Millisecond - patrolScanRegionLimit = 128 // It takes about 14 minutes to iterate 1 million regions. + // It takes about 1.3 minutes(1000000/128*10/60/1000) to iterate 1 million regions(with DefaultPatrolRegionInterval=10ms). + patrolScanRegionLimit = 128 // PluginLoad means action for load plugin PluginLoad = "PluginLoad" // PluginUnload means action for unload plugin @@ -74,6 +75,7 @@ type Coordinator struct { cancel context.CancelFunc schedulersInitialized bool + patrolRegionsDuration time.Duration cluster sche.ClusterInformer prepareChecker *prepareChecker @@ -110,6 +112,22 @@ func NewCoordinator(parentCtx context.Context, cluster sche.ClusterInformer, hbS } } +// GetPatrolRegionsDuration returns the duration of the last patrol region round. +func (c *Coordinator) GetPatrolRegionsDuration() time.Duration { + if c == nil { + return 0 + } + c.RLock() + defer c.RUnlock() + return c.patrolRegionsDuration +} + +func (c *Coordinator) setPatrolRegionsDuration(dur time.Duration) { + c.Lock() + defer c.Unlock() + c.patrolRegionsDuration = dur +} + // markSchedulersInitialized marks the scheduler initialization is finished. func (c *Coordinator) markSchedulersInitialized() { c.Lock() @@ -157,10 +175,11 @@ func (c *Coordinator) PatrolRegions() { ticker.Reset(c.cluster.GetCheckerConfig().GetPatrolRegionInterval()) case <-c.ctx.Done(): patrolCheckRegionsGauge.Set(0) + c.setPatrolRegionsDuration(0) log.Info("patrol regions has been stopped") return } - if c.isSchedulingHalted() { + if c.cluster.IsSchedulingHalted() { continue } @@ -178,7 +197,9 @@ func (c *Coordinator) PatrolRegions() { // Updates the label level isolation statistics. c.cluster.UpdateRegionsLabelLevelStats(regions) if len(key) == 0 { - patrolCheckRegionsGauge.Set(time.Since(start).Seconds()) + dur := time.Since(start) + patrolCheckRegionsGauge.Set(dur.Seconds()) + c.setPatrolRegionsDuration(dur) start = time.Now() } failpoint.Inject("break-patrol", func() { @@ -187,10 +208,6 @@ func (c *Coordinator) PatrolRegions() { } } -func (c *Coordinator) isSchedulingHalted() bool { - return c.cluster.GetSchedulerConfig().IsSchedulingHalted() -} - func (c *Coordinator) checkRegions(startKey []byte) (key []byte, regions []*core.RegionInfo) { regions = c.cluster.ScanRegions(startKey, nil, patrolScanRegionLimit) if len(regions) == 0 { diff --git a/pkg/schedule/core/cluster_informer.go b/pkg/schedule/core/cluster_informer.go index 63dacd0c30d..b97459d26ea 100644 --- a/pkg/schedule/core/cluster_informer.go +++ b/pkg/schedule/core/cluster_informer.go @@ -43,6 +43,7 @@ type SchedulerCluster interface { GetSchedulerConfig() sc.SchedulerConfigProvider GetRegionLabeler() *labeler.RegionLabeler GetStoreConfig() sc.StoreConfigProvider + IsSchedulingHalted() bool } // CheckerCluster is an aggregate interface that wraps multiple interfaces diff --git a/pkg/schedule/filter/candidates_test.go b/pkg/schedule/filter/candidates_test.go index 13e8ed661cc..0d805312ba7 100644 --- a/pkg/schedule/filter/candidates_test.go +++ b/pkg/schedule/filter/candidates_test.go @@ -48,9 +48,9 @@ func idComparer2(a, b *core.StoreInfo) int { type idFilter func(uint64) bool -func (f idFilter) Scope() string { return "idFilter" } -func (f idFilter) Type() filterType { return filterType(0) } -func (f idFilter) Source(conf config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { +func (idFilter) Scope() string { return "idFilter" } +func (idFilter) Type() filterType { return filterType(0) } +func (f idFilter) Source(_ config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { if f(store.GetID()) { return statusOK } @@ -58,7 +58,7 @@ func (f idFilter) Source(conf config.SharedConfigProvider, store *core.StoreInfo return statusStoreScoreDisallowed } -func (f idFilter) Target(conf config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { +func (f idFilter) Target(_ config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { if f(store.GetID()) { return statusOK } diff --git a/pkg/schedule/filter/counter.go b/pkg/schedule/filter/counter.go index 0120ef5b666..29c75bbe41d 100644 --- a/pkg/schedule/filter/counter.go +++ b/pkg/schedule/filter/counter.go @@ -188,6 +188,10 @@ func NewCounter(scope string) *Counter { return &Counter{counter: counter, scope: scope} } +func (c *Counter) SetScope(scope string) { + c.scope = scope +} + // Add adds the filter counter. func (c *Counter) inc(action action, filterType filterType, sourceID uint64, targetID uint64) { if _, ok := c.counter[action][filterType][sourceID]; !ok { diff --git a/pkg/schedule/filter/filters.go b/pkg/schedule/filter/filters.go index 0d188e69180..1838f0104f4 100644 --- a/pkg/schedule/filter/filters.go +++ b/pkg/schedule/filter/filters.go @@ -185,18 +185,18 @@ func (f *excludedFilter) Scope() string { return f.scope } -func (f *excludedFilter) Type() filterType { +func (*excludedFilter) Type() filterType { return excluded } -func (f *excludedFilter) Source(conf config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { +func (f *excludedFilter) Source(_ config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { if _, ok := f.sources[store.GetID()]; ok { return statusStoreAlreadyHasPeer } return statusOK } -func (f *excludedFilter) Target(conf config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { +func (f *excludedFilter) Target(_ config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { if _, ok := f.targets[store.GetID()]; ok { return statusStoreAlreadyHasPeer } @@ -215,15 +215,15 @@ func (f *storageThresholdFilter) Scope() string { return f.scope } -func (f *storageThresholdFilter) Type() filterType { +func (*storageThresholdFilter) Type() filterType { return storageThreshold } -func (f *storageThresholdFilter) Source(conf config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { +func (*storageThresholdFilter) Source(config.SharedConfigProvider, *core.StoreInfo) *plan.Status { return statusOK } -func (f *storageThresholdFilter) Target(conf config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { +func (*storageThresholdFilter) Target(conf config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { if !store.IsLowSpace(conf.GetLowSpaceRatio()) { return statusOK } @@ -283,11 +283,11 @@ func (f *distinctScoreFilter) Scope() string { return f.scope } -func (f *distinctScoreFilter) Type() filterType { +func (*distinctScoreFilter) Type() filterType { return distinctScore } -func (f *distinctScoreFilter) Source(_ config.SharedConfigProvider, _ *core.StoreInfo) *plan.Status { +func (*distinctScoreFilter) Source(config.SharedConfigProvider, *core.StoreInfo) *plan.Status { return statusOK } @@ -387,7 +387,7 @@ func (f *StoreStateFilter) pauseLeaderTransfer(_ config.SharedConfigProvider, st return statusOK } -func (f *StoreStateFilter) slowStoreEvicted(conf config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { +func (f *StoreStateFilter) slowStoreEvicted(_ config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { if store.EvictedAsSlowStore() { f.Reason = storeStateSlow return statusStoreRejectLeader @@ -583,12 +583,12 @@ func (f labelConstraintFilter) Scope() string { } // Type returns the name of the filter. -func (f labelConstraintFilter) Type() filterType { +func (labelConstraintFilter) Type() filterType { return labelConstraint } // Source filters stores when select them as schedule source. -func (f labelConstraintFilter) Source(conf config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { +func (f labelConstraintFilter) Source(_ config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { if placement.MatchLabelConstraints(store, f.constraints) { return statusOK } @@ -634,11 +634,11 @@ func (f *ruleFitFilter) Scope() string { return f.scope } -func (f *ruleFitFilter) Type() filterType { +func (*ruleFitFilter) Type() filterType { return ruleFit } -func (f *ruleFitFilter) Source(_ config.SharedConfigProvider, _ *core.StoreInfo) *plan.Status { +func (*ruleFitFilter) Source(config.SharedConfigProvider, *core.StoreInfo) *plan.Status { return statusOK } @@ -687,11 +687,11 @@ func (f *ruleLeaderFitFilter) Scope() string { return f.scope } -func (f *ruleLeaderFitFilter) Type() filterType { +func (*ruleLeaderFitFilter) Type() filterType { return ruleLeader } -func (f *ruleLeaderFitFilter) Source(_ config.SharedConfigProvider, _ *core.StoreInfo) *plan.Status { +func (*ruleLeaderFitFilter) Source(config.SharedConfigProvider, *core.StoreInfo) *plan.Status { return statusOK } @@ -743,11 +743,11 @@ func (f *ruleWitnessFitFilter) Scope() string { return f.scope } -func (f *ruleWitnessFitFilter) Type() filterType { +func (*ruleWitnessFitFilter) Type() filterType { return ruleFit } -func (f *ruleWitnessFitFilter) Source(_ config.SharedConfigProvider, _ *core.StoreInfo) *plan.Status { +func (*ruleWitnessFitFilter) Source(config.SharedConfigProvider, *core.StoreInfo) *plan.Status { return statusOK } @@ -815,7 +815,7 @@ func (f *engineFilter) Scope() string { return f.scope } -func (f *engineFilter) Type() filterType { +func (*engineFilter) Type() filterType { return engine } @@ -858,7 +858,7 @@ func (f *specialUseFilter) Scope() string { return f.scope } -func (f *specialUseFilter) Type() filterType { +func (*specialUseFilter) Type() filterType { return specialUse } @@ -869,7 +869,7 @@ func (f *specialUseFilter) Source(conf config.SharedConfigProvider, store *core. return statusStoreNotMatchRule } -func (f *specialUseFilter) Target(conf config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { +func (f *specialUseFilter) Target(_ config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { if !f.constraint.MatchStore(store) { return statusOK } @@ -932,11 +932,11 @@ func (f *isolationFilter) Scope() string { return f.scope } -func (f *isolationFilter) Type() filterType { +func (*isolationFilter) Type() filterType { return isolation } -func (f *isolationFilter) Source(conf config.SharedConfigProvider, store *core.StoreInfo) *plan.Status { +func (*isolationFilter) Source(config.SharedConfigProvider, *core.StoreInfo) *plan.Status { return statusOK } diff --git a/pkg/schedule/filter/filters_test.go b/pkg/schedule/filter/filters_test.go index f030dff81a4..f061a472d65 100644 --- a/pkg/schedule/filter/filters_test.go +++ b/pkg/schedule/filter/filters_test.go @@ -313,7 +313,7 @@ func TestStoreStateFilterReason(t *testing.T) { } } - // No reason catched + // No reason caught store = store.Clone(core.SetLastHeartbeatTS(time.Now())) testCases := []testCase{ {2, "store-state-ok-filter", "store-state-ok-filter"}, diff --git a/pkg/schedule/filter/region_filters.go b/pkg/schedule/filter/region_filters.go index 799cee7d90c..7cd015412c2 100644 --- a/pkg/schedule/filter/region_filters.go +++ b/pkg/schedule/filter/region_filters.go @@ -76,7 +76,7 @@ func NewRegionPendingFilter() RegionFilter { return ®ionPendingFilter{} } -func (f *regionPendingFilter) Select(region *core.RegionInfo) *plan.Status { +func (*regionPendingFilter) Select(region *core.RegionInfo) *plan.Status { if hasPendingPeers(region) { return statusRegionPendingPeer } @@ -91,7 +91,7 @@ func NewRegionDownFilter() RegionFilter { return ®ionDownFilter{} } -func (f *regionDownFilter) Select(region *core.RegionInfo) *plan.Status { +func (*regionDownFilter) Select(region *core.RegionInfo) *plan.Status { if hasDownPeers(region) { return statusRegionDownPeer } diff --git a/pkg/schedule/handler/handler.go b/pkg/schedule/handler/handler.go index 346a7254284..0541a2d6567 100644 --- a/pkg/schedule/handler/handler.go +++ b/pkg/schedule/handler/handler.go @@ -133,6 +133,17 @@ func (h *Handler) RemoveOperator(regionID uint64) error { return nil } +// RemoveOperators removes the all operators. +func (h *Handler) RemoveOperators() error { + c, err := h.GetOperatorController() + if err != nil { + return err + } + + c.RemoveOperators(operator.AdminStop) + return nil +} + // GetOperators returns the running operators. func (h *Handler) GetOperators() ([]*operator.Operator, error) { c, err := h.GetOperatorController() @@ -229,7 +240,7 @@ func (h *Handler) GetRecords(from time.Time) ([]*operator.OpRecord, error) { // HandleOperatorCreation processes the request and creates an operator based on the provided input. // It supports various types of operators such as transfer-leader, transfer-region, add-peer, remove-peer, merge-region, split-region, scatter-region, and scatter-regions. // The function validates the input, performs the corresponding operation, and returns the HTTP status code, response body, and any error encountered during the process. -func (h *Handler) HandleOperatorCreation(input map[string]interface{}) (int, interface{}, error) { +func (h *Handler) HandleOperatorCreation(input map[string]any) (int, any, error) { name, ok := input["name"].(string) if !ok { return http.StatusBadRequest, nil, errors.Errorf("missing operator name") @@ -337,7 +348,7 @@ func (h *Handler) HandleOperatorCreation(input map[string]interface{}) (int, int } var keys []string if ks, ok := input["keys"]; ok { - for _, k := range ks.([]interface{}) { + for _, k := range ks.([]any) { key, ok := k.(string) if !ok { return http.StatusBadRequest, nil, errors.Errorf("bad format keys") @@ -406,7 +417,7 @@ func (h *Handler) AddTransferLeaderOperator(regionID uint64, storeID uint64) err return errors.Errorf("region has no voter in store %v", storeID) } - op, err := operator.CreateTransferLeaderOperator("admin-transfer-leader", c, region, region.GetLeader().GetStoreId(), newLeader.GetStoreId(), []uint64{}, operator.OpAdmin) + op, err := operator.CreateTransferLeaderOperator("admin-transfer-leader", c, region, newLeader.GetStoreId(), []uint64{}, operator.OpAdmin) if err != nil { log.Debug("fail to create transfer leader operator", errs.ZapError(err)) return err @@ -729,8 +740,8 @@ func checkStoreState(c sche.SharedCluster, storeID uint64) error { return nil } -func parseStoreIDsAndPeerRole(ids interface{}, roles interface{}) (map[uint64]placement.PeerRoleType, bool) { - items, ok := ids.([]interface{}) +func parseStoreIDsAndPeerRole(ids any, roles any) (map[uint64]placement.PeerRoleType, bool) { + items, ok := ids.([]any) if !ok { return nil, false } @@ -745,7 +756,7 @@ func parseStoreIDsAndPeerRole(ids interface{}, roles interface{}) (map[uint64]pl storeIDToPeerRole[uint64(id)] = "" } - peerRoles, ok := roles.([]interface{}) + peerRoles, ok := roles.([]any) // only consider roles having the same length with ids as the valid case if ok && len(peerRoles) == len(storeIDs) { for i, v := range storeIDs { @@ -799,7 +810,7 @@ type schedulerPausedPeriod struct { } // GetSchedulerByStatus returns all names of schedulers by status. -func (h *Handler) GetSchedulerByStatus(status string, needTS bool) (interface{}, error) { +func (h *Handler) GetSchedulerByStatus(status string, needTS bool) (any, error) { sc, err := h.GetSchedulersController() if err != nil { return nil, err @@ -1146,7 +1157,7 @@ func (h *Handler) AccelerateRegionsScheduleInRanges(startKeys [][]byte, endKeys } // AdjustLimit adjusts the limit of regions to schedule. -func (h *Handler) AdjustLimit(limitStr string, defaultLimits ...int) (int, error) { +func (*Handler) AdjustLimit(limitStr string, defaultLimits ...int) (int, error) { limit := defaultRegionLimit if len(defaultLimits) > 0 { limit = defaultLimits[0] @@ -1170,7 +1181,7 @@ type ScatterRegionsResponse struct { } // BuildScatterRegionsResp builds ScatterRegionsResponse. -func (h *Handler) BuildScatterRegionsResp(opsCount int, failures map[uint64]error) *ScatterRegionsResponse { +func (*Handler) BuildScatterRegionsResp(opsCount int, failures map[uint64]error) *ScatterRegionsResponse { // If there existed any operator failed to be added into Operator Controller, add its regions into unProcessedRegions percentage := 100 if len(failures) > 0 { @@ -1206,7 +1217,7 @@ func (h *Handler) ScatterRegionsByRange(rawStartKey, rawEndKey string, group str } // ScatterRegionsByID scatters regions by id. -func (h *Handler) ScatterRegionsByID(ids []uint64, group string, retryLimit int, skipStoreLimit bool) (int, map[uint64]error, error) { +func (h *Handler) ScatterRegionsByID(ids []uint64, group string, retryLimit int) (int, map[uint64]error, error) { co := h.GetCoordinator() if co == nil { return 0, nil, errs.ErrNotBootstrapped.GenWithStackByArgs() @@ -1221,7 +1232,7 @@ type SplitRegionsResponse struct { } // SplitRegions splits regions by split keys. -func (h *Handler) SplitRegions(ctx context.Context, rawSplitKeys []interface{}, retryLimit int) (*SplitRegionsResponse, error) { +func (h *Handler) SplitRegions(ctx context.Context, rawSplitKeys []any, retryLimit int) (*SplitRegionsResponse, error) { co := h.GetCoordinator() if co == nil { return nil, errs.ErrNotBootstrapped.GenWithStackByArgs() diff --git a/pkg/schedule/labeler/labeler.go b/pkg/schedule/labeler/labeler.go index aeb4ff7b2f9..7670ccdedd7 100644 --- a/pkg/schedule/labeler/labeler.go +++ b/pkg/schedule/labeler/labeler.go @@ -201,10 +201,12 @@ func (l *RegionLabeler) getAndCheckRule(id string, now time.Time) *LabelRule { return rule } if len(rule.Labels) == 0 { - l.DeleteLabelRuleLocked(id) + if err := l.DeleteLabelRuleLocked(id); err != nil { + log.Error("failed to delete label rule", zap.String("rule-key", id), zap.Error(err)) + } return nil } - l.SaveLabelRuleLocked(rule) + _ = l.SaveLabelRuleLocked(rule) return rule } @@ -382,10 +384,10 @@ func (l *RegionLabeler) GetRegionLabels(region *core.RegionInfo) []*RegionLabel } // MakeKeyRanges is a helper function to make key ranges. -func MakeKeyRanges(keys ...string) []interface{} { - var res []interface{} +func MakeKeyRanges(keys ...string) []any { + var res []any for i := 0; i < len(keys); i += 2 { - res = append(res, map[string]interface{}{"start_key": keys[i], "end_key": keys[i+1]}) + res = append(res, map[string]any{"start_key": keys[i], "end_key": keys[i+1]}) } return res } diff --git a/pkg/schedule/labeler/labeler_test.go b/pkg/schedule/labeler/labeler_test.go index 87773ce892d..bd51bab7d83 100644 --- a/pkg/schedule/labeler/labeler_test.go +++ b/pkg/schedule/labeler/labeler_test.go @@ -31,6 +31,7 @@ import ( "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/storage/kv" "github.com/tikv/pd/pkg/utils/etcdutil" + "github.com/tikv/pd/pkg/utils/testutil" ) func TestAdjustRule(t *testing.T) { @@ -369,7 +370,7 @@ func TestLabelerRuleTTL(t *testing.T) { start, _ := hex.DecodeString("1234") end, _ := hex.DecodeString("5678") region := core.NewTestRegionInfo(1, 1, start, end) - // the region has no lable rule at the beginning. + // the region has no label rule at the beginning. re.Empty(labeler.GetRegionLabels(region)) // set rules for the region. @@ -382,15 +383,17 @@ func TestLabelerRuleTTL(t *testing.T) { re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/labeler/regionLabelExpireSub1Minute", "return(true)")) // rule2 should expire and only 2 labels left. - labels := labeler.GetRegionLabels(region) - re.Len(labels, 2) + testutil.Eventually(re, func() bool { + labels := labeler.GetRegionLabels(region) + return len(labels) == 2 + }) re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/schedule/labeler/regionLabelExpireSub1Minute")) - // rule2 should be exist since `GetRegionLabels` won't clear it physically. - checkRuleInMemoryAndStoage(re, labeler, "rule2", true) + // rule2 should be existed since `GetRegionLabels` won't clear it physically. + checkRuleInMemoryAndStorage(re, labeler, "rule2", true) re.Nil(labeler.GetLabelRule("rule2")) // rule2 should be physically clear. - checkRuleInMemoryAndStoage(re, labeler, "rule2", false) + checkRuleInMemoryAndStorage(re, labeler, "rule2", false) re.Equal("", labeler.GetRegionLabel(region, "k2")) @@ -398,10 +401,10 @@ func TestLabelerRuleTTL(t *testing.T) { re.NotNil(labeler.GetLabelRule("rule1")) } -func checkRuleInMemoryAndStoage(re *require.Assertions, labeler *RegionLabeler, ruleID string, exist bool) { +func checkRuleInMemoryAndStorage(re *require.Assertions, labeler *RegionLabeler, ruleID string, exist bool) { re.Equal(exist, labeler.labelRules[ruleID] != nil) existInStorage := false - labeler.storage.LoadRegionRules(func(k, v string) { + labeler.storage.LoadRegionRules(func(k, _ string) { if k == ruleID { existInStorage = true } @@ -419,10 +422,10 @@ func TestGC(t *testing.T) { start, _ := hex.DecodeString("1234") end, _ := hex.DecodeString("5678") region := core.NewTestRegionInfo(1, 1, start, end) - // the region has no lable rule at the beginning. + // the region has no label rule at the beginning. re.Empty(labeler.GetRegionLabels(region)) - labels := []RegionLabel{} + labels := make([]RegionLabel, 0, len(ttls)) for id, ttl := range ttls { labels = append(labels, RegionLabel{Key: fmt.Sprintf("k%d", id), Value: fmt.Sprintf("v%d", id), TTL: ttl}) rule := &LabelRule{ @@ -436,7 +439,7 @@ func TestGC(t *testing.T) { re.Len(labeler.labelRules, len(ttls)) - // check all rules unitl some rule expired. + // check all rules until some rule expired. for { time.Sleep(time.Millisecond * 5) labels := labeler.GetRegionLabels(region) diff --git a/pkg/schedule/labeler/rules.go b/pkg/schedule/labeler/rules.go index 5726a9f904e..3462cb7c459 100644 --- a/pkg/schedule/labeler/rules.go +++ b/pkg/schedule/labeler/rules.go @@ -45,7 +45,7 @@ type LabelRule struct { Index int `json:"index"` Labels []RegionLabel `json:"labels"` RuleType string `json:"rule_type"` - Data interface{} `json:"data"` + Data any `json:"data"` minExpire *time.Time } @@ -183,8 +183,8 @@ func (rule *LabelRule) expireBefore(t time.Time) bool { } // initKeyRangeRulesFromLabelRuleData init and adjust []KeyRangeRule from `LabelRule.Data` -func initKeyRangeRulesFromLabelRuleData(data interface{}) ([]*KeyRangeRule, error) { - rules, ok := data.([]interface{}) +func initKeyRangeRulesFromLabelRuleData(data any) ([]*KeyRangeRule, error) { + rules, ok := data.([]any) if !ok { return nil, errs.ErrRegionRuleContent.FastGenByArgs(fmt.Sprintf("invalid rule type: %T", data)) } @@ -203,8 +203,8 @@ func initKeyRangeRulesFromLabelRuleData(data interface{}) ([]*KeyRangeRule, erro } // initAndAdjustKeyRangeRule inits and adjusts the KeyRangeRule from one item in `LabelRule.Data` -func initAndAdjustKeyRangeRule(rule interface{}) (*KeyRangeRule, error) { - data, ok := rule.(map[string]interface{}) +func initAndAdjustKeyRangeRule(rule any) (*KeyRangeRule, error) { + data, ok := rule.(map[string]any) if !ok { return nil, errs.ErrRegionRuleContent.FastGenByArgs(fmt.Sprintf("invalid rule type: %T", reflect.TypeOf(rule))) } diff --git a/pkg/schedule/operator/create_operator.go b/pkg/schedule/operator/create_operator.go index 1c96128ab32..64680520933 100644 --- a/pkg/schedule/operator/create_operator.go +++ b/pkg/schedule/operator/create_operator.go @@ -78,7 +78,7 @@ func CreateRemovePeerOperator(desc string, ci sche.SharedCluster, kind OpKind, r } // CreateTransferLeaderOperator creates an operator that transfers the leader from a source store to a target store. -func CreateTransferLeaderOperator(desc string, ci sche.SharedCluster, region *core.RegionInfo, sourceStoreID uint64, targetStoreID uint64, targetStoreIDs []uint64, kind OpKind) (*Operator, error) { +func CreateTransferLeaderOperator(desc string, ci sche.SharedCluster, region *core.RegionInfo, targetStoreID uint64, targetStoreIDs []uint64, kind OpKind) (*Operator, error) { return NewBuilder(desc, ci, region, SkipOriginJointStateCheck). SetLeader(targetStoreID). SetLeaders(targetStoreIDs). @@ -86,7 +86,7 @@ func CreateTransferLeaderOperator(desc string, ci sche.SharedCluster, region *co } // CreateForceTransferLeaderOperator creates an operator that transfers the leader from a source store to a target store forcible. -func CreateForceTransferLeaderOperator(desc string, ci sche.SharedCluster, region *core.RegionInfo, sourceStoreID uint64, targetStoreID uint64, kind OpKind) (*Operator, error) { +func CreateForceTransferLeaderOperator(desc string, ci sche.SharedCluster, region *core.RegionInfo, targetStoreID uint64, kind OpKind) (*Operator, error) { return NewBuilder(desc, ci, region, SkipOriginJointStateCheck, SkipPlacementRulesCheck). SetLeader(targetStoreID). EnableForceTargetLeader(). @@ -170,8 +170,8 @@ func CreateSplitRegionOperator(desc string, region *core.RegionInfo, kind OpKind brief += fmt.Sprintf(" and keys %v", hexKeys) } op := NewOperator(desc, brief, region.GetID(), region.GetRegionEpoch(), kind|OpSplit, region.GetApproximateSize(), step) - op.AdditionalInfos["region-start-key"] = core.HexRegionKeyStr(logutil.RedactBytes(region.GetStartKey())) - op.AdditionalInfos["region-end-key"] = core.HexRegionKeyStr(logutil.RedactBytes(region.GetEndKey())) + op.SetAdditionalInfo("region-start-key", core.HexRegionKeyStr(logutil.RedactBytes(region.GetStartKey()))) + op.SetAdditionalInfo("region-end-key", core.HexRegionKeyStr(logutil.RedactBytes(region.GetEndKey()))) return op, nil } diff --git a/pkg/schedule/operator/create_operator_test.go b/pkg/schedule/operator/create_operator_test.go index 80c6cac4a04..d481334bbcb 100644 --- a/pkg/schedule/operator/create_operator_test.go +++ b/pkg/schedule/operator/create_operator_test.go @@ -423,7 +423,7 @@ func (suite *createOperatorTestSuite) TestCreateTransferLeaderOperator() { } for _, testCase := range testCases { region := core.NewRegionInfo(&metapb.Region{Id: 1, Peers: testCase.originPeers}, testCase.originPeers[0]) - op, err := CreateTransferLeaderOperator("test", suite.cluster, region, testCase.originPeers[0].StoreId, testCase.targetLeaderStoreID, []uint64{}, 0) + op, err := CreateTransferLeaderOperator("test", suite.cluster, region, testCase.targetLeaderStoreID, []uint64{}, 0) if testCase.isErr { re.Error(err) diff --git a/pkg/schedule/operator/operator.go b/pkg/schedule/operator/operator.go index b87a050969f..4d57d4fc6c7 100644 --- a/pkg/schedule/operator/operator.go +++ b/pkg/schedule/operator/operator.go @@ -15,7 +15,6 @@ package operator import ( - "encoding/json" "fmt" "reflect" "strconv" @@ -83,7 +82,7 @@ type Operator struct { level constant.PriorityLevel Counters []prometheus.Counter FinishedCounters []prometheus.Counter - AdditionalInfos map[string]string + additionalInfos opAdditionalInfo ApproximateSize int64 timeout time.Duration influence *OpInfluence @@ -100,16 +99,18 @@ func NewOperator(desc, brief string, regionID uint64, regionEpoch *metapb.Region maxDuration += v.Timeout(approximateSize).Seconds() } return &Operator{ - desc: desc, - brief: brief, - regionID: regionID, - regionEpoch: regionEpoch, - kind: kind, - steps: steps, - stepsTime: make([]int64, len(steps)), - status: NewOpStatusTracker(), - level: level, - AdditionalInfos: make(map[string]string), + desc: desc, + brief: brief, + regionID: regionID, + regionEpoch: regionEpoch, + kind: kind, + steps: steps, + stepsTime: make([]int64, len(steps)), + status: NewOpStatusTracker(), + level: level, + additionalInfos: opAdditionalInfo{ + value: make(map[string]string), + }, ApproximateSize: approximateSize, timeout: time.Duration(maxDuration) * time.Second, } @@ -118,8 +119,8 @@ func NewOperator(desc, brief string, regionID uint64, regionEpoch *metapb.Region // Sync some attribute with the given timeout. func (o *Operator) Sync(other *Operator) { o.timeout = other.timeout - o.AdditionalInfos[string(RelatedMergeRegion)] = strconv.FormatUint(other.RegionID(), 10) - other.AdditionalInfos[string(RelatedMergeRegion)] = strconv.FormatUint(o.RegionID(), 10) + o.SetAdditionalInfo(string(RelatedMergeRegion), strconv.FormatUint(other.RegionID(), 10)) + other.SetAdditionalInfo(string(RelatedMergeRegion), strconv.FormatUint(o.RegionID(), 10)) } func (o *Operator) String() string { @@ -297,8 +298,10 @@ func (o *Operator) CheckSuccess() bool { // Cancel marks the operator canceled. func (o *Operator) Cancel(reason ...CancelReasonType) bool { - if _, ok := o.AdditionalInfos[cancelReason]; !ok && len(reason) != 0 { - o.AdditionalInfos[cancelReason] = string(reason[0]) + o.additionalInfos.Lock() + defer o.additionalInfos.Unlock() + if _, ok := o.additionalInfos.value[cancelReason]; !ok && len(reason) != 0 { + o.additionalInfos.value[cancelReason] = string(reason[0]) } return o.status.To(CANCELED) } @@ -373,10 +376,11 @@ func (o *Operator) Check(region *core.RegionInfo) OpStep { defer func() { _ = o.CheckTimeout() }() for step := atomic.LoadInt32(&o.currentStep); int(step) < len(o.steps); step++ { if o.steps[int(step)].IsFinish(region) { - if atomic.CompareAndSwapInt64(&(o.stepsTime[step]), 0, time.Now().UnixNano()) { + current := time.Now() + if atomic.CompareAndSwapInt64(&(o.stepsTime[step]), 0, current.UnixNano()) { startTime, _ := o.getCurrentTimeAndStep() operatorStepDuration.WithLabelValues(reflect.TypeOf(o.steps[int(step)]).Name()). - Observe(time.Unix(0, o.stepsTime[step]).Sub(startTime).Seconds()) + Observe(current.Sub(startTime).Seconds()) } atomic.StoreInt32(&o.currentStep, step+1) } else { @@ -507,17 +511,6 @@ func (o *Operator) Record(finishTime time.Time) *OpRecord { return record } -// GetAdditionalInfo returns additional info with string -func (o *Operator) GetAdditionalInfo() string { - if len(o.AdditionalInfos) != 0 { - additionalInfo, err := json.Marshal(o.AdditionalInfos) - if err == nil { - return string(additionalInfo) - } - } - return "" -} - // IsLeaveJointStateOperator returns true if the desc is OpDescLeaveJointState. func (o *Operator) IsLeaveJointStateOperator() bool { return strings.EqualFold(o.desc, OpDescLeaveJointState) diff --git a/pkg/schedule/operator/operator_controller.go b/pkg/schedule/operator/operator_controller.go index e3bead3ffca..fe93bd98756 100644 --- a/pkg/schedule/operator/operator_controller.go +++ b/pkg/schedule/operator/operator_controller.go @@ -15,10 +15,10 @@ package operator import ( - "container/heap" "context" "fmt" "strconv" + "sync" "time" "github.com/pingcap/failpoint" @@ -52,20 +52,51 @@ var ( FastOperatorFinishTime = 10 * time.Second ) +type opCounter struct { + syncutil.RWMutex + count map[OpKind]uint64 +} + +func (c *opCounter) inc(kind OpKind) { + c.Lock() + defer c.Unlock() + c.count[kind]++ +} + +func (c *opCounter) dec(kind OpKind) { + c.Lock() + defer c.Unlock() + if c.count[kind] > 0 { + c.count[kind]-- + } +} + +func (c *opCounter) getCountByKind(kind OpKind) uint64 { + c.RLock() + defer c.RUnlock() + return c.count[kind] +} + // Controller is used to limit the speed of scheduling. type Controller struct { - syncutil.RWMutex - ctx context.Context - config config.SharedConfigProvider - cluster *core.BasicCluster - operators map[uint64]*Operator - hbStreams *hbstream.HeartbeatStreams - fastOperators *cache.TTLUint64 - counts map[OpKind]uint64 - records *records - wop WaitingOperator - wopStatus *waitingOperatorStatus - opNotifierQueue operatorQueue + operators sync.Map + ctx context.Context + config config.SharedConfigProvider + cluster *core.BasicCluster + hbStreams *hbstream.HeartbeatStreams + + // fast path, TTLUint64 is safe for concurrent. + fastOperators *cache.TTLUint64 + + // opNotifierQueue is a priority queue to notify the operator to be checked. + // safe for concurrent. + opNotifierQueue *concurrentHeapOpQueue + + // states + records *records // safe for concurrent + wop WaitingOperator + wopStatus *waitingOperatorStatus + counts *opCounter } // NewController creates a Controller. @@ -74,14 +105,14 @@ func NewController(ctx context.Context, cluster *core.BasicCluster, config confi ctx: ctx, cluster: cluster, config: config, - operators: make(map[uint64]*Operator), hbStreams: hbStreams, fastOperators: cache.NewIDTTL(ctx, time.Minute, FastOperatorFinishTime), - counts: make(map[OpKind]uint64), - records: newRecords(ctx), - wop: newRandBuckets(), - wopStatus: newWaitingOperatorStatus(), - opNotifierQueue: make(operatorQueue, 0), + opNotifierQueue: newConcurrentHeapOpQueue(), + // states + records: newRecords(ctx), + wop: newRandBuckets(), + wopStatus: newWaitingOperatorStatus(), + counts: &opCounter{count: make(map[OpKind]uint64)}, } } @@ -93,8 +124,6 @@ func (oc *Controller) Ctx() context.Context { // GetCluster exports basic cluster to evict-scheduler for check store status. func (oc *Controller) GetCluster() *core.BasicCluster { - oc.RLock() - defer oc.RUnlock() return oc.cluster } @@ -193,7 +222,7 @@ func (oc *Controller) checkStaleOperator(op *Operator, step OpStep, region *core return false } -func (oc *Controller) getNextPushOperatorTime(step OpStep, now time.Time) time.Time { +func getNextPushOperatorTime(step OpStep, now time.Time) time.Time { nextTime := slowNotifyInterval switch step.(type) { case TransferLeader, PromoteLearner, ChangePeerV2Enter, ChangePeerV2Leave: @@ -206,21 +235,22 @@ func (oc *Controller) getNextPushOperatorTime(step OpStep, now time.Time) time.T // "next" is true to indicate that it may exist in next attempt, // and false is the end for the poll. func (oc *Controller) pollNeedDispatchRegion() (r *core.RegionInfo, next bool) { - oc.Lock() - defer oc.Unlock() if oc.opNotifierQueue.Len() == 0 { return nil, false } - item := heap.Pop(&oc.opNotifierQueue).(*operatorWithTime) + item, _ := oc.opNotifierQueue.Pop() regionID := item.op.RegionID() - op, ok := oc.operators[regionID] - if !ok || op == nil { + opi, ok := oc.operators.Load(regionID) + if !ok || opi.(*Operator) == nil { return nil, true } - r = oc.cluster.GetRegion(regionID) - if r == nil { - _ = oc.removeOperatorLocked(op) - if op.Cancel(RegionNotFound) { + op := opi.(*Operator) + // Check the operator lightly. It cant't dispatch the op for some scenario. + var reason CancelReasonType + r, reason = oc.checkOperatorLightly(op) + if len(reason) != 0 { + _ = oc.removeOperatorInner(op) + if op.Cancel(reason) { log.Warn("remove operator because region disappeared", zap.Uint64("region-id", op.RegionID()), zap.Stringer("operator", op)) @@ -235,13 +265,13 @@ func (oc *Controller) pollNeedDispatchRegion() (r *core.RegionInfo, next bool) { } now := time.Now() if now.Before(item.time) { - heap.Push(&oc.opNotifierQueue, item) + oc.opNotifierQueue.Push(item) return nil, false } // pushes with new notify time. - item.time = oc.getNextPushOperatorTime(step, now) - heap.Push(&oc.opNotifierQueue, item) + item.time = getNextPushOperatorTime(step, now) + oc.opNotifierQueue.Push(item) return r, true } @@ -262,7 +292,6 @@ func (oc *Controller) PushOperators(recordOpStepWithTTL func(regionID uint64)) { // AddWaitingOperator adds operators to waiting operators. func (oc *Controller) AddWaitingOperator(ops ...*Operator) int { - oc.Lock() added := 0 needPromoted := 0 @@ -274,13 +303,11 @@ func (oc *Controller) AddWaitingOperator(ops ...*Operator) int { if i+1 >= len(ops) { // should not be here forever log.Error("orphan merge operators found", zap.String("desc", desc), errs.ZapError(errs.ErrMergeOperator.FastGenByArgs("orphan operator found"))) - oc.Unlock() return added } if ops[i+1].Kind()&OpMerge == 0 { log.Error("merge operator should be paired", zap.String("desc", ops[i+1].Desc()), errs.ZapError(errs.ErrMergeOperator.FastGenByArgs("operator should be paired"))) - oc.Unlock() return added } isMerge = true @@ -297,21 +324,22 @@ func (oc *Controller) AddWaitingOperator(ops ...*Operator) int { } continue } - oc.wop.PutOperator(op) + if isMerge { // count two merge operators as one, so wopStatus.ops[desc] should // not be updated here + // TODO: call checkAddOperator ... + oc.wop.PutMergeOperators([]*Operator{op, ops[i+1]}) i++ added++ - oc.wop.PutOperator(ops[i]) + } else { + oc.wop.PutOperator(op) } operatorCounter.WithLabelValues(desc, "put").Inc() - oc.wopStatus.ops[desc]++ + oc.wopStatus.incCount(desc) added++ needPromoted++ } - - oc.Unlock() operatorCounter.WithLabelValues(ops[0].Desc(), "promote-add").Add(float64(needPromoted)) for i := 0; i < needPromoted; i++ { oc.PromoteWaitingOperator() @@ -321,13 +349,10 @@ func (oc *Controller) AddWaitingOperator(ops ...*Operator) int { // AddOperator adds operators to the running operators. func (oc *Controller) AddOperator(ops ...*Operator) bool { - oc.Lock() - defer oc.Unlock() - // note: checkAddOperator uses false param for `isPromoting`. // This is used to keep check logic before fixing issue #4946, // but maybe user want to add operator when waiting queue is busy - if oc.exceedStoreLimitLocked(ops...) { + if oc.ExceedStoreLimit(ops...) { for _, op := range ops { operatorCounter.WithLabelValues(op.Desc(), "exceed-limit").Inc() _ = op.Cancel(ExceedStoreLimit) @@ -343,7 +368,7 @@ func (oc *Controller) AddOperator(ops ...*Operator) bool { return false } for _, op := range ops { - if !oc.addOperatorLocked(op) { + if !oc.addOperatorInner(op) { return false } } @@ -352,23 +377,22 @@ func (oc *Controller) AddOperator(ops ...*Operator) bool { // PromoteWaitingOperator promotes operators from waiting operators. func (oc *Controller) PromoteWaitingOperator() { - oc.Lock() - defer oc.Unlock() var ops []*Operator for { // GetOperator returns one operator or two merge operators + // need write lock ops = oc.wop.GetOperator() if ops == nil { return } operatorCounter.WithLabelValues(ops[0].Desc(), "get").Inc() - if oc.exceedStoreLimitLocked(ops...) { + if oc.ExceedStoreLimit(ops...) { for _, op := range ops { operatorCounter.WithLabelValues(op.Desc(), "exceed-limit").Inc() _ = op.Cancel(ExceedStoreLimit) oc.buryOperator(op) } - oc.wopStatus.ops[ops[0].Desc()]-- + oc.wopStatus.decCount(ops[0].Desc()) continue } @@ -378,15 +402,15 @@ func (oc *Controller) PromoteWaitingOperator() { _ = op.Cancel(reason) oc.buryOperator(op) } - oc.wopStatus.ops[ops[0].Desc()]-- + oc.wopStatus.decCount(ops[0].Desc()) continue } - oc.wopStatus.ops[ops[0].Desc()]-- + oc.wopStatus.decCount(ops[0].Desc()) break } for _, op := range ops { - if !oc.addOperatorLocked(op) { + if !oc.addOperatorInner(op) { break } } @@ -417,7 +441,8 @@ func (oc *Controller) checkAddOperator(isPromoting bool, ops ...*Operator) (bool operatorCounter.WithLabelValues(op.Desc(), "epoch-not-match").Inc() return false, EpochNotMatch } - if old := oc.operators[op.RegionID()]; old != nil && !isHigherPriorityOperator(op, old) { + if oldi, ok := oc.operators.Load(op.RegionID()); ok && oldi.(*Operator) != nil && !isHigherPriorityOperator(op, oldi.(*Operator)) { + old := oldi.(*Operator) log.Debug("already have operator, cancel add operator", zap.Uint64("region-id", op.RegionID()), zap.Reflect("old", old)) @@ -435,8 +460,8 @@ func (oc *Controller) checkAddOperator(isPromoting bool, ops ...*Operator) (bool operatorCounter.WithLabelValues(op.Desc(), "unexpected-status").Inc() return false, NotInCreateStatus } - if !isPromoting && oc.wopStatus.ops[op.Desc()] >= oc.config.GetSchedulerMaxWaitingOperator() { - log.Debug("exceed max return false", zap.Uint64("waiting", oc.wopStatus.ops[op.Desc()]), zap.String("desc", op.Desc()), zap.Uint64("max", oc.config.GetSchedulerMaxWaitingOperator())) + if !isPromoting && oc.wopStatus.getCount(op.Desc()) >= oc.config.GetSchedulerMaxWaitingOperator() { + log.Debug("exceed max return false", zap.Uint64("waiting", oc.wopStatus.getCount(op.Desc())), zap.String("desc", op.Desc()), zap.Uint64("max", oc.config.GetSchedulerMaxWaitingOperator())) operatorCounter.WithLabelValues(op.Desc(), "exceed-max-waiting").Inc() return false, ExceedWaitLimit } @@ -455,21 +480,43 @@ func (oc *Controller) checkAddOperator(isPromoting bool, ops ...*Operator) (bool return reason != Expired, reason } +// checkOperatorLightly checks whether the ops can be dispatched in Controller::pollNeedDispatchRegion. +// The operators can't be dispatched for some scenarios, such as region disappeared, region changed ... +// `region` is the target region of `op`. +func (oc *Controller) checkOperatorLightly(op *Operator) (*core.RegionInfo, CancelReasonType) { + region := oc.cluster.GetRegion(op.RegionID()) + if region == nil { + operatorCounter.WithLabelValues(op.Desc(), "not-found").Inc() + return nil, RegionNotFound + } + + // It may be suitable for all kinds of operator but not merge-region. + // But to be cautions, it only takes effect on merge-region currently. + // If the version of epoch is changed, the region has been splitted or merged, and the key range has been changed. + // The changing for conf_version of epoch doesn't modify the region key range, skip it. + if (op.Kind()&OpMerge != 0) && region.GetRegionEpoch().GetVersion() > op.RegionEpoch().GetVersion() { + operatorCounter.WithLabelValues(op.Desc(), "epoch-not-match").Inc() + return nil, EpochNotMatch + } + return region, "" +} + func isHigherPriorityOperator(new, old *Operator) bool { return new.GetPriorityLevel() > old.GetPriorityLevel() } -func (oc *Controller) addOperatorLocked(op *Operator) bool { +func (oc *Controller) addOperatorInner(op *Operator) bool { regionID := op.RegionID() log.Info("add operator", zap.Uint64("region-id", regionID), zap.Reflect("operator", op), - zap.String("additional-info", op.GetAdditionalInfo())) + zap.String("additional-info", op.LogAdditionalInfo())) // If there is an old operator, replace it. The priority should be checked // already. - if old, ok := oc.operators[regionID]; ok { - _ = oc.removeOperatorLocked(old) + if oldi, ok := oc.operators.Load(regionID); ok { + old := oldi.(*Operator) + _ = oc.removeOperatorInner(old) _ = old.Replace() oc.buryOperator(old) } @@ -485,7 +532,8 @@ func (oc *Controller) addOperatorLocked(op *Operator) bool { operatorCounter.WithLabelValues(op.Desc(), "unexpected").Inc() return false } - oc.operators[regionID] = op + oc.operators.Store(regionID, op) + oc.counts.inc(op.SchedulerKind()) operatorCounter.WithLabelValues(op.Desc(), "start").Inc() operatorSizeHist.WithLabelValues(op.Desc()).Observe(float64(op.ApproximateSize)) opInfluence := NewTotalOpInfluence([]*Operator{op}, oc.cluster) @@ -505,7 +553,6 @@ func (oc *Controller) addOperatorLocked(op *Operator) bool { storeLimitCostCounter.WithLabelValues(strconv.FormatUint(storeID, 10), n).Add(float64(stepCost) / float64(storelimit.RegionInfluence[v])) } } - oc.updateCounts(oc.operators) var step OpStep if region := oc.cluster.GetRegion(op.RegionID()); region != nil { @@ -514,7 +561,7 @@ func (oc *Controller) addOperatorLocked(op *Operator) bool { } } - heap.Push(&oc.opNotifierQueue, &operatorWithTime{op: op, time: oc.getNextPushOperatorTime(step, time.Now())}) + oc.opNotifierQueue.Push(&operatorWithTime{op: op, time: getNextPushOperatorTime(step, time.Now())}) operatorCounter.WithLabelValues(op.Desc(), "create").Inc() for _, counter := range op.Counters { counter.Inc() @@ -536,11 +583,44 @@ func (oc *Controller) ack(op *Operator) { } } +// RemoveOperators removes all operators from the running operators. +func (oc *Controller) RemoveOperators(reasons ...CancelReasonType) { + removed := oc.removeOperatorsInner() + var cancelReason CancelReasonType + if len(reasons) > 0 { + cancelReason = reasons[0] + } + for _, op := range removed { + if op.Cancel(cancelReason) { + log.Info("operator removed", + zap.Uint64("region-id", op.RegionID()), + zap.Duration("takes", op.RunningTime()), + zap.Reflect("operator", op)) + } + oc.buryOperator(op) + } +} + +func (oc *Controller) removeOperatorsInner() []*Operator { + var removed []*Operator + oc.operators.Range(func(regionID, value any) bool { + op := value.(*Operator) + oc.operators.Delete(regionID) + oc.counts.dec(op.SchedulerKind()) + operatorCounter.WithLabelValues(op.Desc(), "remove").Inc() + oc.ack(op) + if op.Kind()&OpMerge != 0 { + oc.removeRelatedMergeOperator(op) + } + removed = append(removed, op) + return true + }) + return removed +} + // RemoveOperator removes an operator from the running operators. func (oc *Controller) RemoveOperator(op *Operator, reasons ...CancelReasonType) bool { - oc.Lock() - removed := oc.removeOperatorLocked(op) - oc.Unlock() + removed := oc.removeOperatorInner(op) var cancelReason CancelReasonType if len(reasons) > 0 { cancelReason = reasons[0] @@ -558,16 +638,14 @@ func (oc *Controller) RemoveOperator(op *Operator, reasons ...CancelReasonType) } func (oc *Controller) removeOperatorWithoutBury(op *Operator) bool { - oc.Lock() - defer oc.Unlock() - return oc.removeOperatorLocked(op) + return oc.removeOperatorInner(op) } -func (oc *Controller) removeOperatorLocked(op *Operator) bool { +func (oc *Controller) removeOperatorInner(op *Operator) bool { regionID := op.RegionID() - if cur := oc.operators[regionID]; cur == op { - delete(oc.operators, regionID) - oc.updateCounts(oc.operators) + if cur, ok := oc.operators.Load(regionID); ok && cur.(*Operator) == op { + oc.operators.Delete(regionID) + oc.counts.dec(op.SchedulerKind()) operatorCounter.WithLabelValues(op.Desc(), "remove").Inc() oc.ack(op) if op.Kind()&OpMerge != 0 { @@ -579,13 +657,18 @@ func (oc *Controller) removeOperatorLocked(op *Operator) bool { } func (oc *Controller) removeRelatedMergeOperator(op *Operator) { - relatedID, _ := strconv.ParseUint(op.AdditionalInfos[string(RelatedMergeRegion)], 10, 64) - if relatedOp := oc.operators[relatedID]; relatedOp != nil && relatedOp.Status() != CANCELED { + relatedID, _ := strconv.ParseUint(op.GetAdditionalInfo(string(RelatedMergeRegion)), 10, 64) + relatedOpi, ok := oc.operators.Load(relatedID) + if !ok { + return + } + relatedOp := relatedOpi.(*Operator) + if relatedOp != nil && relatedOp.Status() != CANCELED { log.Info("operator canceled related merge region", zap.Uint64("region-id", relatedOp.RegionID()), - zap.String("additional-info", relatedOp.GetAdditionalInfo()), + zap.String("additional-info", relatedOp.LogAdditionalInfo()), zap.Duration("takes", relatedOp.RunningTime())) - oc.removeOperatorLocked(relatedOp) + oc.removeOperatorInner(relatedOp) relatedOp.Cancel(RelatedMergeRegion) oc.buryOperator(relatedOp) } @@ -612,7 +695,7 @@ func (oc *Controller) buryOperator(op *Operator) { zap.Uint64("region-id", op.RegionID()), zap.Duration("takes", op.RunningTime()), zap.Reflect("operator", op), - zap.String("additional-info", op.GetAdditionalInfo())) + zap.String("additional-info", op.LogAdditionalInfo())) operatorCounter.WithLabelValues(op.Desc(), "finish").Inc() operatorDuration.WithLabelValues(op.Desc()).Observe(op.RunningTime().Seconds()) for _, counter := range op.FinishedCounters { @@ -623,7 +706,7 @@ func (oc *Controller) buryOperator(op *Operator) { zap.Uint64("region-id", op.RegionID()), zap.Duration("takes", op.RunningTime()), zap.Reflect("operator", op), - zap.String("additional-info", op.GetAdditionalInfo())) + zap.String("additional-info", op.LogAdditionalInfo())) operatorCounter.WithLabelValues(op.Desc(), "replace").Inc() case EXPIRED: log.Info("operator expired", @@ -636,14 +719,14 @@ func (oc *Controller) buryOperator(op *Operator) { zap.Uint64("region-id", op.RegionID()), zap.Duration("takes", op.RunningTime()), zap.Reflect("operator", op), - zap.String("additional-info", op.GetAdditionalInfo())) + zap.String("additional-info", op.LogAdditionalInfo())) operatorCounter.WithLabelValues(op.Desc(), "timeout").Inc() case CANCELED: log.Info("operator canceled", zap.Uint64("region-id", op.RegionID()), zap.Duration("takes", op.RunningTime()), zap.Reflect("operator", op), - zap.String("additional-info", op.GetAdditionalInfo()), + zap.String("additional-info", op.LogAdditionalInfo()), ) operatorCounter.WithLabelValues(op.Desc(), "cancel").Inc() } @@ -653,9 +736,8 @@ func (oc *Controller) buryOperator(op *Operator) { // GetOperatorStatus gets the operator and its status with the specify id. func (oc *Controller) GetOperatorStatus(id uint64) *OpWithStatus { - oc.Lock() - defer oc.Unlock() - if op, ok := oc.operators[id]; ok { + if opi, ok := oc.operators.Load(id); ok && opi.(*Operator) != nil { + op := opi.(*Operator) return NewOpWithStatus(op) } return oc.records.Get(id) @@ -663,43 +745,39 @@ func (oc *Controller) GetOperatorStatus(id uint64) *OpWithStatus { // GetOperator gets an operator from the given region. func (oc *Controller) GetOperator(regionID uint64) *Operator { - oc.RLock() - defer oc.RUnlock() - return oc.operators[regionID] + if v, ok := oc.operators.Load(regionID); ok { + return v.(*Operator) + } + return nil } // GetOperators gets operators from the running operators. func (oc *Controller) GetOperators() []*Operator { - oc.RLock() - defer oc.RUnlock() - - operators := make([]*Operator, 0, len(oc.operators)) - for _, op := range oc.operators { - operators = append(operators, op) - } - + operators := make([]*Operator, 0, oc.opNotifierQueue.Len()) + oc.operators.Range( + func(_, value any) bool { + operators = append(operators, value.(*Operator)) + return true + }) return operators } // GetWaitingOperators gets operators from the waiting operators. func (oc *Controller) GetWaitingOperators() []*Operator { - oc.RLock() - defer oc.RUnlock() return oc.wop.ListOperator() } // GetOperatorsOfKind returns the running operators of the kind. func (oc *Controller) GetOperatorsOfKind(mask OpKind) []*Operator { - oc.RLock() - defer oc.RUnlock() - - operators := make([]*Operator, 0, len(oc.operators)) - for _, op := range oc.operators { - if op.Kind()&mask != 0 { - operators = append(operators, op) - } - } - + operators := make([]*Operator, 0, oc.opNotifierQueue.Len()) + oc.operators.Range( + func(_, value any) bool { + op := value.(*Operator) + if op.Kind()&mask != 0 { + operators = append(operators, value.(*Operator)) + } + return true + }) return operators } @@ -748,22 +826,10 @@ func (oc *Controller) GetHistory(start time.Time) []OpHistory { return history } -// updateCounts updates resource counts using current pending operators. -func (oc *Controller) updateCounts(operators map[uint64]*Operator) { - for k := range oc.counts { - delete(oc.counts, k) - } - for _, op := range operators { - oc.counts[op.SchedulerKind()]++ - } -} - // OperatorCount gets the count of operators filtered by kind. // kind only has one OpKind. func (oc *Controller) OperatorCount(kind OpKind) uint64 { - oc.RLock() - defer oc.RUnlock() - return oc.counts[kind] + return oc.counts.getCountByKind(kind) } // GetOpInfluence gets OpInfluence. @@ -771,16 +837,17 @@ func (oc *Controller) GetOpInfluence(cluster *core.BasicCluster) OpInfluence { influence := OpInfluence{ StoresInfluence: make(map[uint64]*StoreInfluence), } - oc.RLock() - defer oc.RUnlock() - for _, op := range oc.operators { - if !op.CheckTimeout() && !op.CheckSuccess() { - region := cluster.GetRegion(op.RegionID()) - if region != nil { - op.UnfinishedInfluence(influence, region) + oc.operators.Range( + func(_, value any) bool { + op := value.(*Operator) + if !op.CheckTimeout() && !op.CheckSuccess() { + region := cluster.GetRegion(op.RegionID()) + if region != nil { + op.UnfinishedInfluence(influence, region) + } } - } - } + return true + }) return influence } @@ -824,10 +891,8 @@ func NewTotalOpInfluence(operators []*Operator, cluster *core.BasicCluster) OpIn // SetOperator is only used for test. func (oc *Controller) SetOperator(op *Operator) { - oc.Lock() - defer oc.Unlock() - oc.operators[op.RegionID()] = op - oc.updateCounts(oc.operators) + oc.operators.Store(op.RegionID(), op) + oc.counts.inc(op.SchedulerKind()) } // OpWithStatus records the operator and its status. @@ -883,13 +948,6 @@ func (o *records) Put(op *Operator) { // ExceedStoreLimit returns true if the store exceeds the cost limit after adding the Otherwise, returns false. func (oc *Controller) ExceedStoreLimit(ops ...*Operator) bool { - oc.Lock() - defer oc.Unlock() - return oc.exceedStoreLimitLocked(ops...) -} - -// exceedStoreLimitLocked returns true if the store exceeds the cost limit after adding the Otherwise, returns false. -func (oc *Controller) exceedStoreLimitLocked(ops ...*Operator) bool { // The operator with Urgent priority, like admin operators, should ignore the store limit check. var desc string if len(ops) != 0 { diff --git a/pkg/schedule/operator/operator_controller_test.go b/pkg/schedule/operator/operator_controller_test.go index e47281a2c68..2b16516c4c7 100644 --- a/pkg/schedule/operator/operator_controller_test.go +++ b/pkg/schedule/operator/operator_controller_test.go @@ -15,7 +15,6 @@ package operator import ( - "container/heap" "context" "encoding/hex" "fmt" @@ -109,7 +108,7 @@ func (suite *operatorControllerTestSuite) TestGetOpInfluence() { re.True(op2.Start()) oc.SetOperator(op2) go func(ctx context.Context) { - suite.checkRemoveOperatorSuccess(re, oc, op1) + checkRemoveOperatorSuccess(re, oc, op1) for { select { case <-ctx.Done(): @@ -365,10 +364,10 @@ func (suite *operatorControllerTestSuite) TestPollDispatchRegion() { oc.SetOperator(op4) re.True(op2.Start()) oc.SetOperator(op2) - heap.Push(&oc.opNotifierQueue, &operatorWithTime{op: op1, time: time.Now().Add(100 * time.Millisecond)}) - heap.Push(&oc.opNotifierQueue, &operatorWithTime{op: op3, time: time.Now().Add(300 * time.Millisecond)}) - heap.Push(&oc.opNotifierQueue, &operatorWithTime{op: op4, time: time.Now().Add(499 * time.Millisecond)}) - heap.Push(&oc.opNotifierQueue, &operatorWithTime{op: op2, time: time.Now().Add(500 * time.Millisecond)}) + oc.opNotifierQueue.Push(&operatorWithTime{op: op1, time: time.Now().Add(100 * time.Millisecond)}) + oc.opNotifierQueue.Push(&operatorWithTime{op: op3, time: time.Now().Add(300 * time.Millisecond)}) + oc.opNotifierQueue.Push(&operatorWithTime{op: op4, time: time.Now().Add(499 * time.Millisecond)}) + oc.opNotifierQueue.Push(&operatorWithTime{op: op2, time: time.Now().Add(500 * time.Millisecond)}) } // first poll got nil r, next := oc.pollNeedDispatchRegion() @@ -407,6 +406,131 @@ func (suite *operatorControllerTestSuite) TestPollDispatchRegion() { re.False(next) } +// issue #7992 +func (suite *operatorControllerTestSuite) TestPollDispatchRegionForMergeRegion() { + re := suite.Require() + opts := mockconfig.NewTestOptions() + cluster := mockcluster.NewCluster(suite.ctx, opts) + stream := hbstream.NewTestHeartbeatStreams(suite.ctx, cluster.ID, cluster, false /* no need to run */) + controller := NewController(suite.ctx, cluster.GetBasicCluster(), cluster.GetSharedConfig(), stream) + cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + cluster.AddLabelsStore(2, 1, map[string]string{"host": "host2"}) + cluster.AddLabelsStore(3, 1, map[string]string{"host": "host3"}) + + source := newRegionInfo(101, "1a", "1b", 10, 10, []uint64{101, 1}, []uint64{101, 1}) + source.GetMeta().RegionEpoch = &metapb.RegionEpoch{} + cluster.PutRegion(source) + target := newRegionInfo(102, "1b", "1c", 10, 10, []uint64{101, 1}, []uint64{101, 1}) + target.GetMeta().RegionEpoch = &metapb.RegionEpoch{} + cluster.PutRegion(target) + + ops, err := CreateMergeRegionOperator("merge-region", cluster, source, target, OpMerge) + re.NoError(err) + re.Len(ops, 2) + re.Equal(2, controller.AddWaitingOperator(ops...)) + // Change next push time to now, it's used to make test case faster. + controller.opNotifierQueue.heap[0].time = time.Now() + + // first poll gets source region op. + r, next := controller.pollNeedDispatchRegion() + re.True(next) + re.Equal(r, source) + + // second poll gets target region op. + controller.opNotifierQueue.heap[0].time = time.Now() + r, next = controller.pollNeedDispatchRegion() + re.True(next) + re.Equal(r, target) + + // third poll removes the two merge-region ops. + source.GetMeta().RegionEpoch = &metapb.RegionEpoch{ConfVer: 0, Version: 1} + r, next = controller.pollNeedDispatchRegion() + re.True(next) + re.Nil(r) + re.Equal(1, controller.opNotifierQueue.Len()) + re.Empty(controller.GetOperators()) + re.Empty(controller.wop.ListOperator()) + re.NotNil(controller.records.Get(101)) + re.NotNil(controller.records.Get(102)) + + // fourth poll removes target region op from opNotifierQueue + controller.opNotifierQueue.heap[0].time = time.Now() + r, next = controller.pollNeedDispatchRegion() + re.True(next) + re.Nil(r) + re.Equal(0, controller.opNotifierQueue.Len()) + + // Add the two ops to waiting operators again. + source.GetMeta().RegionEpoch = &metapb.RegionEpoch{ConfVer: 0, Version: 0} + controller.records.ttl.Remove(101) + controller.records.ttl.Remove(102) + ops, err = CreateMergeRegionOperator("merge-region", cluster, source, target, OpMerge) + re.NoError(err) + re.Equal(2, controller.AddWaitingOperator(ops...)) + // change the target RegionEpoch + // first poll gets source region from opNotifierQueue + target.GetMeta().RegionEpoch = &metapb.RegionEpoch{ConfVer: 0, Version: 1} + controller.opNotifierQueue.heap[0].time = time.Now() + r, next = controller.pollNeedDispatchRegion() + re.True(next) + re.Equal(r, source) + + r, next = controller.pollNeedDispatchRegion() + re.True(next) + re.Nil(r) + re.Equal(1, controller.opNotifierQueue.Len()) + re.Empty(controller.GetOperators()) + re.Empty(controller.wop.ListOperator()) + re.NotNil(controller.records.Get(101)) + re.NotNil(controller.records.Get(102)) + + controller.opNotifierQueue.heap[0].time = time.Now() + r, next = controller.pollNeedDispatchRegion() + re.True(next) + re.Nil(r) + re.Equal(0, controller.opNotifierQueue.Len()) +} + +func (suite *operatorControllerTestSuite) TestCheckOperatorLightly() { + re := suite.Require() + opts := mockconfig.NewTestOptions() + cluster := mockcluster.NewCluster(suite.ctx, opts) + stream := hbstream.NewTestHeartbeatStreams(suite.ctx, cluster.ID, cluster, false /* no need to run */) + controller := NewController(suite.ctx, cluster.GetBasicCluster(), cluster.GetSharedConfig(), stream) + cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + cluster.AddLabelsStore(2, 1, map[string]string{"host": "host2"}) + cluster.AddLabelsStore(3, 1, map[string]string{"host": "host3"}) + + source := newRegionInfo(101, "1a", "1b", 10, 10, []uint64{101, 1}, []uint64{101, 1}) + source.GetMeta().RegionEpoch = &metapb.RegionEpoch{} + cluster.PutRegion(source) + target := newRegionInfo(102, "1b", "1c", 10, 10, []uint64{101, 1}, []uint64{101, 1}) + target.GetMeta().RegionEpoch = &metapb.RegionEpoch{} + cluster.PutRegion(target) + + ops, err := CreateMergeRegionOperator("merge-region", cluster, source, target, OpMerge) + re.NoError(err) + re.Len(ops, 2) + + // check successfully + r, reason := controller.checkOperatorLightly(ops[0]) + re.Empty(reason) + re.Equal(r, source) + + // check failed because of region disappeared + cluster.RemoveRegion(target) + r, reason = controller.checkOperatorLightly(ops[1]) + re.Nil(r) + re.Equal(reason, RegionNotFound) + + // check failed because of verions of region epoch changed + cluster.PutRegion(target) + source.GetMeta().RegionEpoch = &metapb.RegionEpoch{ConfVer: 0, Version: 1} + r, reason = controller.checkOperatorLightly(ops[0]) + re.Nil(r) + re.Equal(reason, EpochNotMatch) +} + func (suite *operatorControllerTestSuite) TestStoreLimit() { re := suite.Require() opt := mockconfig.NewTestOptions() @@ -426,7 +550,7 @@ func (suite *operatorControllerTestSuite) TestStoreLimit() { for i := uint64(1); i <= 5; i++ { op := NewTestOperator(1, &metapb.RegionEpoch{}, OpRegion, AddPeer{ToStore: 2, PeerID: i}) re.True(oc.AddOperator(op)) - suite.checkRemoveOperatorSuccess(re, oc, op) + checkRemoveOperatorSuccess(re, oc, op) } op := NewTestOperator(1, &metapb.RegionEpoch{}, OpRegion, AddPeer{ToStore: 2, PeerID: 1}) re.False(oc.AddOperator(op)) @@ -436,13 +560,13 @@ func (suite *operatorControllerTestSuite) TestStoreLimit() { for i := uint64(1); i <= 10; i++ { op = NewTestOperator(i, &metapb.RegionEpoch{}, OpRegion, AddPeer{ToStore: 2, PeerID: i}) re.True(oc.AddOperator(op)) - suite.checkRemoveOperatorSuccess(re, oc, op) + checkRemoveOperatorSuccess(re, oc, op) } tc.SetAllStoresLimit(storelimit.AddPeer, 60) for i := uint64(1); i <= 5; i++ { op = NewTestOperator(i, &metapb.RegionEpoch{}, OpRegion, AddPeer{ToStore: 2, PeerID: i}) re.True(oc.AddOperator(op)) - suite.checkRemoveOperatorSuccess(re, oc, op) + checkRemoveOperatorSuccess(re, oc, op) } op = NewTestOperator(1, &metapb.RegionEpoch{}, OpRegion, AddPeer{ToStore: 2, PeerID: 1}) re.False(oc.AddOperator(op)) @@ -452,7 +576,7 @@ func (suite *operatorControllerTestSuite) TestStoreLimit() { for i := uint64(1); i <= 5; i++ { op := NewTestOperator(1, &metapb.RegionEpoch{}, OpRegion, RemovePeer{FromStore: 2}) re.True(oc.AddOperator(op)) - suite.checkRemoveOperatorSuccess(re, oc, op) + checkRemoveOperatorSuccess(re, oc, op) } op = NewTestOperator(1, &metapb.RegionEpoch{}, OpRegion, RemovePeer{FromStore: 2}) re.False(oc.AddOperator(op)) @@ -462,13 +586,13 @@ func (suite *operatorControllerTestSuite) TestStoreLimit() { for i := uint64(1); i <= 10; i++ { op = NewTestOperator(i, &metapb.RegionEpoch{}, OpRegion, RemovePeer{FromStore: 2}) re.True(oc.AddOperator(op)) - suite.checkRemoveOperatorSuccess(re, oc, op) + checkRemoveOperatorSuccess(re, oc, op) } tc.SetAllStoresLimit(storelimit.RemovePeer, 60) for i := uint64(1); i <= 5; i++ { op = NewTestOperator(i, &metapb.RegionEpoch{}, OpRegion, RemovePeer{FromStore: 2}) re.True(oc.AddOperator(op)) - suite.checkRemoveOperatorSuccess(re, oc, op) + checkRemoveOperatorSuccess(re, oc, op) } op = NewTestOperator(1, &metapb.RegionEpoch{}, OpRegion, RemovePeer{FromStore: 2}) re.False(oc.AddOperator(op)) @@ -736,7 +860,7 @@ func newRegionInfo(id uint64, startKey, endKey string, size, keys int64, leader ) } -func (suite *operatorControllerTestSuite) checkRemoveOperatorSuccess(re *require.Assertions, oc *Controller, op *Operator) { +func checkRemoveOperatorSuccess(re *require.Assertions, oc *Controller, op *Operator) { re.True(oc.RemoveOperator(op)) re.True(op.IsEnd()) re.Equal(op, oc.GetOperatorStatus(op.RegionID()).Operator) @@ -786,7 +910,7 @@ func (suite *operatorControllerTestSuite) TestAddWaitingOperator() { batch = append(batch, addPeerOp(100)) added = controller.AddWaitingOperator(batch...) re.Equal(1, added) - re.NotNil(controller.operators[uint64(100)]) + re.NotNil(controller.GetOperator(uint64(100))) source := newRegionInfo(101, "1a", "1b", 1, 1, []uint64{101, 1}, []uint64{101, 1}) cluster.PutRegion(source) @@ -803,7 +927,7 @@ func (suite *operatorControllerTestSuite) TestAddWaitingOperator() { ID: "schedulelabel", Labels: []labeler.RegionLabel{{Key: "schedule", Value: "deny"}}, RuleType: labeler.KeyRange, - Data: []interface{}{map[string]interface{}{"start_key": "1a", "end_key": "1b"}}, + Data: []any{map[string]any{"start_key": "1a", "end_key": "1b"}}, }) re.True(labelerManager.ScheduleDisabled(source)) @@ -827,7 +951,44 @@ func (suite *operatorControllerTestSuite) TestInvalidStoreId() { RemovePeer{FromStore: 3, PeerID: 3, IsDownStore: false}, } op := NewTestOperator(1, &metapb.RegionEpoch{}, OpRegion, steps...) - re.True(oc.addOperatorLocked(op)) + re.True(oc.AddOperator(op)) // Although store 3 does not exist in PD, PD can also send op to TiKV. re.Equal(pdpb.OperatorStatus_RUNNING, oc.GetOperatorStatus(1).Status) } + +func TestConcurrentAddOperatorAndSetStoreLimit(t *testing.T) { + re := require.New(t) + opt := mockconfig.NewTestOptions() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + tc := mockcluster.NewCluster(ctx, opt) + stream := hbstream.NewTestHeartbeatStreams(ctx, tc.ID, tc, false /* no need to run */) + oc := NewController(ctx, tc.GetBasicCluster(), tc.GetSharedConfig(), stream) + + regionNum := 1000 + limit := 1600.0 + storeID := uint64(2) + for i := 1; i < 4; i++ { + tc.AddRegionStore(uint64(i), regionNum) + tc.SetStoreLimit(uint64(i), storelimit.AddPeer, limit) + } + for i := 1; i <= regionNum; i++ { + tc.AddLeaderRegion(uint64(i), 1, 3, 4) + } + + // Add operator and set store limit concurrently + var wg sync.WaitGroup + for i := 1; i < 10; i++ { + wg.Add(1) + go func(i uint64) { + defer wg.Done() + for j := 1; j < 10; j++ { + regionID := uint64(j) + i*100 + op := NewTestOperator(regionID, tc.GetRegion(regionID).GetRegionEpoch(), OpRegion, AddPeer{ToStore: storeID, PeerID: regionID}) + re.True(oc.AddOperator(op)) + tc.SetStoreLimit(storeID, storelimit.AddPeer, limit-float64(j)) // every goroutine set a different limit + } + }(uint64(i)) + } + wg.Wait() +} diff --git a/pkg/schedule/operator/operator_queue.go b/pkg/schedule/operator/operator_queue.go index 7765427793f..2233845724e 100644 --- a/pkg/schedule/operator/operator_queue.go +++ b/pkg/schedule/operator/operator_queue.go @@ -15,6 +15,8 @@ package operator import ( + "container/heap" + "sync" "time" ) @@ -35,12 +37,12 @@ func (opn operatorQueue) Swap(i, j int) { opn[i], opn[j] = opn[j], opn[i] } -func (opn *operatorQueue) Push(x interface{}) { +func (opn *operatorQueue) Push(x any) { item := x.(*operatorWithTime) *opn = append(*opn, item) } -func (opn *operatorQueue) Pop() interface{} { +func (opn *operatorQueue) Pop() any { old := *opn n := len(old) if n == 0 { @@ -50,3 +52,34 @@ func (opn *operatorQueue) Pop() interface{} { *opn = old[0 : n-1] return item } + +type concurrentHeapOpQueue struct { + sync.Mutex + heap operatorQueue +} + +func newConcurrentHeapOpQueue() *concurrentHeapOpQueue { + return &concurrentHeapOpQueue{heap: make(operatorQueue, 0)} +} + +func (ch *concurrentHeapOpQueue) Len() int { + ch.Lock() + defer ch.Unlock() + return len(ch.heap) +} + +func (ch *concurrentHeapOpQueue) Push(x *operatorWithTime) { + ch.Lock() + defer ch.Unlock() + heap.Push(&ch.heap, x) +} + +func (ch *concurrentHeapOpQueue) Pop() (*operatorWithTime, bool) { + ch.Lock() + defer ch.Unlock() + if len(ch.heap) == 0 { + return nil, false + } + x := heap.Pop(&ch.heap).(*operatorWithTime) + return x, true +} diff --git a/pkg/schedule/operator/operator_test.go b/pkg/schedule/operator/operator_test.go index 4719df9408b..1f44d813f1e 100644 --- a/pkg/schedule/operator/operator_test.go +++ b/pkg/schedule/operator/operator_test.go @@ -17,6 +17,7 @@ package operator import ( "context" "encoding/json" + "sync" "sync/atomic" "testing" "time" @@ -65,7 +66,7 @@ func (suite *operatorTestSuite) TearDownTest() { suite.cancel() } -func (suite *operatorTestSuite) newTestRegion(regionID uint64, leaderPeer uint64, peers ...[2]uint64) *core.RegionInfo { +func newTestRegion(regionID uint64, leaderPeer uint64, peers ...[2]uint64) *core.RegionInfo { var ( region metapb.Region leader *metapb.Peer @@ -87,7 +88,7 @@ func (suite *operatorTestSuite) newTestRegion(regionID uint64, leaderPeer uint64 func (suite *operatorTestSuite) TestOperatorStep() { re := suite.Require() - region := suite.newTestRegion(1, 1, [2]uint64{1, 1}, [2]uint64{2, 2}) + region := newTestRegion(1, 1, [2]uint64{1, 1}, [2]uint64{2, 2}) re.False(TransferLeader{FromStore: 1, ToStore: 2}.IsFinish(region)) re.True(TransferLeader{FromStore: 2, ToStore: 1}.IsFinish(region)) re.False(AddPeer{ToStore: 3, PeerID: 3}.IsFinish(region)) @@ -96,11 +97,7 @@ func (suite *operatorTestSuite) TestOperatorStep() { re.True(RemovePeer{FromStore: 3}.IsFinish(region)) } -func (suite *operatorTestSuite) newTestOperator(regionID uint64, kind OpKind, steps ...OpStep) *Operator { - return NewTestOperator(regionID, &metapb.RegionEpoch{}, kind, steps...) -} - -func (suite *operatorTestSuite) checkSteps(re *require.Assertions, op *Operator, steps []OpStep) { +func checkSteps(re *require.Assertions, op *Operator, steps []OpStep) { re.Len(steps, op.Len()) for i := range steps { re.Equal(steps[i], op.Step(i)) @@ -109,16 +106,16 @@ func (suite *operatorTestSuite) checkSteps(re *require.Assertions, op *Operator, func (suite *operatorTestSuite) TestOperator() { re := suite.Require() - region := suite.newTestRegion(1, 1, [2]uint64{1, 1}, [2]uint64{2, 2}) + region := newTestRegion(1, 1, [2]uint64{1, 1}, [2]uint64{2, 2}) // addPeer1, transferLeader1, removePeer3 steps := []OpStep{ AddPeer{ToStore: 1, PeerID: 1}, TransferLeader{FromStore: 3, ToStore: 1}, RemovePeer{FromStore: 3}, } - op := suite.newTestOperator(1, OpAdmin|OpLeader|OpRegion, steps...) + op := NewTestOperator(1, &metapb.RegionEpoch{}, OpAdmin|OpLeader|OpRegion, steps...) re.Equal(constant.Urgent, op.GetPriorityLevel()) - suite.checkSteps(re, op, steps) + checkSteps(re, op, steps) op.Start() re.Nil(op.Check(region)) @@ -132,9 +129,9 @@ func (suite *operatorTestSuite) TestOperator() { TransferLeader{FromStore: 2, ToStore: 1}, RemovePeer{FromStore: 2}, } - op = suite.newTestOperator(1, OpLeader|OpRegion, steps...) + op = NewTestOperator(1, &metapb.RegionEpoch{}, OpLeader|OpRegion, steps...) re.Equal(constant.Medium, op.GetPriorityLevel()) - suite.checkSteps(re, op, steps) + checkSteps(re, op, steps) op.Start() re.Equal(RemovePeer{FromStore: 2}, op.Check(region)) re.Equal(int32(2), atomic.LoadInt32(&op.currentStep)) @@ -149,7 +146,7 @@ func (suite *operatorTestSuite) TestOperator() { // check short timeout for transfer leader only operators. steps = []OpStep{TransferLeader{FromStore: 2, ToStore: 1}} - op = suite.newTestOperator(1, OpLeader, steps...) + op = NewTestOperator(1, &metapb.RegionEpoch{}, OpLeader, steps...) op.Start() re.False(op.CheckTimeout()) op.SetStatusReachTime(STARTED, op.GetStartTime().Add(-FastStepWaitTime-time.Second)) @@ -166,7 +163,7 @@ func (suite *operatorTestSuite) TestOperator() { func (suite *operatorTestSuite) TestInfluence() { re := suite.Require() - region := suite.newTestRegion(1, 1, [2]uint64{1, 1}, [2]uint64{2, 2}) + region := newTestRegion(1, 1, [2]uint64{1, 1}, [2]uint64{2, 2}) opInfluence := OpInfluence{StoresInfluence: make(map[uint64]*StoreInfluence)} storeOpInfluence := opInfluence.StoresInfluence storeOpInfluence[1] = &StoreInfluence{} @@ -309,7 +306,7 @@ func (suite *operatorTestSuite) TestCheckSuccess() { TransferLeader{FromStore: 2, ToStore: 1}, RemovePeer{FromStore: 2}, } - op := suite.newTestOperator(1, OpLeader|OpRegion, steps...) + op := NewTestOperator(1, &metapb.RegionEpoch{}, OpLeader|OpRegion, steps...) re.Equal(CREATED, op.Status()) re.False(op.CheckSuccess()) re.True(op.Start()) @@ -324,7 +321,7 @@ func (suite *operatorTestSuite) TestCheckSuccess() { TransferLeader{FromStore: 2, ToStore: 1}, RemovePeer{FromStore: 2}, } - op := suite.newTestOperator(1, OpLeader|OpRegion, steps...) + op := NewTestOperator(1, &metapb.RegionEpoch{}, OpLeader|OpRegion, steps...) op.currentStep = int32(len(op.steps)) re.Equal(CREATED, op.Status()) re.False(op.CheckSuccess()) @@ -342,7 +339,7 @@ func (suite *operatorTestSuite) TestCheckTimeout() { TransferLeader{FromStore: 2, ToStore: 1}, RemovePeer{FromStore: 2}, } - op := suite.newTestOperator(1, OpLeader|OpRegion, steps...) + op := NewTestOperator(1, &metapb.RegionEpoch{}, OpLeader|OpRegion, steps...) re.Equal(CREATED, op.Status()) re.True(op.Start()) op.currentStep = int32(len(op.steps)) @@ -355,7 +352,7 @@ func (suite *operatorTestSuite) TestCheckTimeout() { TransferLeader{FromStore: 2, ToStore: 1}, RemovePeer{FromStore: 2}, } - op := suite.newTestOperator(1, OpLeader|OpRegion, steps...) + op := NewTestOperator(1, &metapb.RegionEpoch{}, OpLeader|OpRegion, steps...) re.Equal(CREATED, op.Status()) re.True(op.Start()) op.currentStep = int32(len(op.steps)) @@ -372,7 +369,7 @@ func (suite *operatorTestSuite) TestStart() { TransferLeader{FromStore: 2, ToStore: 1}, RemovePeer{FromStore: 2}, } - op := suite.newTestOperator(1, OpLeader|OpRegion, steps...) + op := NewTestOperator(1, &metapb.RegionEpoch{}, OpLeader|OpRegion, steps...) re.Equal(0, op.GetStartTime().Nanosecond()) re.Equal(CREATED, op.Status()) re.True(op.Start()) @@ -387,7 +384,7 @@ func (suite *operatorTestSuite) TestCheckExpired() { TransferLeader{FromStore: 2, ToStore: 1}, RemovePeer{FromStore: 2}, } - op := suite.newTestOperator(1, OpLeader|OpRegion, steps...) + op := NewTestOperator(1, &metapb.RegionEpoch{}, OpLeader|OpRegion, steps...) re.False(op.CheckExpired()) re.Equal(CREATED, op.Status()) op.SetStatusReachTime(CREATED, time.Now().Add(-OperatorExpireTime)) @@ -398,30 +395,30 @@ func (suite *operatorTestSuite) TestCheckExpired() { func (suite *operatorTestSuite) TestCheck() { re := suite.Require() { - region := suite.newTestRegion(2, 2, [2]uint64{1, 1}, [2]uint64{2, 2}) + region := newTestRegion(2, 2, [2]uint64{1, 1}, [2]uint64{2, 2}) steps := []OpStep{ AddPeer{ToStore: 1, PeerID: 1}, TransferLeader{FromStore: 2, ToStore: 1}, RemovePeer{FromStore: 2}, } - op := suite.newTestOperator(2, OpLeader|OpRegion, steps...) + op := NewTestOperator(2, &metapb.RegionEpoch{}, OpLeader|OpRegion, steps...) re.True(op.Start()) re.NotNil(op.Check(region)) re.Equal(STARTED, op.Status()) - region = suite.newTestRegion(1, 1, [2]uint64{1, 1}) + region = newTestRegion(1, 1, [2]uint64{1, 1}) re.Nil(op.Check(region)) re.Equal(SUCCESS, op.Status()) } { - region := suite.newTestRegion(1, 1, [2]uint64{1, 1}, [2]uint64{2, 2}) + region := newTestRegion(1, 1, [2]uint64{1, 1}, [2]uint64{2, 2}) steps := []OpStep{ AddPeer{ToStore: 1, PeerID: 1}, TransferLeader{FromStore: 2, ToStore: 1}, RemovePeer{FromStore: 2}, } - op := suite.newTestOperator(1, OpLeader|OpRegion, steps...) + op := NewTestOperator(1, &metapb.RegionEpoch{}, OpLeader|OpRegion, steps...) re.True(op.Start()) re.NotNil(op.Check(region)) re.Equal(STARTED, op.Status()) @@ -430,18 +427,18 @@ func (suite *operatorTestSuite) TestCheck() { re.Equal(TIMEOUT, op.Status()) } { - region := suite.newTestRegion(1, 1, [2]uint64{1, 1}, [2]uint64{2, 2}) + region := newTestRegion(1, 1, [2]uint64{1, 1}, [2]uint64{2, 2}) steps := []OpStep{ AddPeer{ToStore: 1, PeerID: 1}, TransferLeader{FromStore: 2, ToStore: 1}, RemovePeer{FromStore: 2}, } - op := suite.newTestOperator(1, OpLeader|OpRegion, steps...) + op := NewTestOperator(1, &metapb.RegionEpoch{}, OpLeader|OpRegion, steps...) re.True(op.Start()) re.NotNil(op.Check(region)) re.Equal(STARTED, op.Status()) op.status.setTime(STARTED, time.Now().Add(-SlowStepWaitTime)) - region = suite.newTestRegion(1, 1, [2]uint64{1, 1}) + region = newTestRegion(1, 1, [2]uint64{1, 1}) re.Nil(op.Check(region)) re.Equal(SUCCESS, op.Status()) } @@ -454,28 +451,28 @@ func (suite *operatorTestSuite) TestSchedulerKind() { expect OpKind }{ { - op: suite.newTestOperator(1, OpAdmin|OpMerge|OpRegion), + op: NewTestOperator(1, &metapb.RegionEpoch{}, OpAdmin|OpMerge|OpRegion), expect: OpAdmin, }, { - op: suite.newTestOperator(1, OpMerge|OpLeader|OpRegion), + op: NewTestOperator(1, &metapb.RegionEpoch{}, OpMerge|OpLeader|OpRegion), expect: OpMerge, }, { - op: suite.newTestOperator(1, OpReplica|OpRegion), + op: NewTestOperator(1, &metapb.RegionEpoch{}, OpReplica|OpRegion), expect: OpReplica, }, { - op: suite.newTestOperator(1, OpSplit|OpRegion), + op: NewTestOperator(1, &metapb.RegionEpoch{}, OpSplit|OpRegion), expect: OpSplit, }, { - op: suite.newTestOperator(1, OpRange|OpRegion), + op: NewTestOperator(1, &metapb.RegionEpoch{}, OpRange|OpRegion), expect: OpRange, }, { - op: suite.newTestOperator(1, OpHotRegion|OpLeader|OpRegion), + op: NewTestOperator(1, &metapb.RegionEpoch{}, OpHotRegion|OpLeader|OpRegion), expect: OpHotRegion, }, { - op: suite.newTestOperator(1, OpRegion|OpLeader), + op: NewTestOperator(1, &metapb.RegionEpoch{}, OpRegion|OpLeader), expect: OpRegion, }, { - op: suite.newTestOperator(1, OpLeader), + op: NewTestOperator(1, &metapb.RegionEpoch{}, OpLeader), expect: OpLeader, }, } @@ -534,7 +531,7 @@ func (suite *operatorTestSuite) TestOpStepTimeout() { func (suite *operatorTestSuite) TestRecord() { re := suite.Require() - operator := suite.newTestOperator(1, OpLeader, AddLearner{ToStore: 1, PeerID: 1}, RemovePeer{FromStore: 1, PeerID: 1}) + operator := NewTestOperator(1, &metapb.RegionEpoch{}, OpLeader, AddLearner{ToStore: 1, PeerID: 1}, RemovePeer{FromStore: 1, PeerID: 1}) now := time.Now() time.Sleep(time.Second) ob := operator.Record(now) @@ -548,7 +545,7 @@ func (suite *operatorTestSuite) TestToJSONObject() { TransferLeader{FromStore: 3, ToStore: 1}, RemovePeer{FromStore: 3}, } - op := suite.newTestOperator(101, OpLeader|OpRegion, steps...) + op := NewTestOperator(101, &metapb.RegionEpoch{}, OpLeader|OpRegion, steps...) op.Start() obj := op.ToJSONObject() suite.Equal("test", obj.Desc) @@ -559,7 +556,7 @@ func (suite *operatorTestSuite) TestToJSONObject() { suite.Equal(STARTED, obj.Status) // Test SUCCESS status. - region := suite.newTestRegion(1, 1, [2]uint64{1, 1}, [2]uint64{2, 2}) + region := newTestRegion(1, 1, [2]uint64{1, 1}, [2]uint64{2, 2}) suite.Nil(op.Check(region)) suite.Equal(SUCCESS, op.Status()) obj = op.ToJSONObject() @@ -567,10 +564,34 @@ func (suite *operatorTestSuite) TestToJSONObject() { // Test TIMEOUT status. steps = []OpStep{TransferLeader{FromStore: 2, ToStore: 1}} - op = suite.newTestOperator(1, OpLeader, steps...) + op = NewTestOperator(1, &metapb.RegionEpoch{}, OpLeader, steps...) op.Start() op.SetStatusReachTime(STARTED, op.GetStartTime().Add(-FastStepWaitTime-time.Second)) suite.True(op.CheckTimeout()) obj = op.ToJSONObject() suite.Equal(TIMEOUT, obj.Status) } + +func TestOperatorCheckConcurrently(t *testing.T) { + re := require.New(t) + region := newTestRegion(1, 1, [2]uint64{1, 1}, [2]uint64{2, 2}) + // addPeer1, transferLeader1, removePeer3 + steps := []OpStep{ + AddPeer{ToStore: 1, PeerID: 1}, + TransferLeader{FromStore: 3, ToStore: 1}, + RemovePeer{FromStore: 3}, + } + op := NewTestOperator(1, &metapb.RegionEpoch{}, OpAdmin|OpLeader|OpRegion, steps...) + re.Equal(constant.Urgent, op.GetPriorityLevel()) + checkSteps(re, op, steps) + op.Start() + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + defer wg.Done() + re.Nil(op.Check(region)) + }() + } + wg.Wait() +} diff --git a/pkg/schedule/operator/status_tracker.go b/pkg/schedule/operator/status_tracker.go index e103a74ccb3..a74d94b18a4 100644 --- a/pkg/schedule/operator/status_tracker.go +++ b/pkg/schedule/operator/status_tracker.go @@ -15,6 +15,7 @@ package operator import ( + "encoding/json" "time" "github.com/tikv/pd/pkg/utils/syncutil" @@ -64,9 +65,8 @@ func (trk *OpStatusTracker) getTime(s OpStatus) time.Time { return trk.reachTimes[s] } else if trk.current == s { return trk.reachTimes[firstEndStatus] - } else { - return time.Time{} } + return time.Time{} } // To transfer the current status to dst if this transition is valid, @@ -136,3 +136,35 @@ func (trk *OpStatusTracker) String() string { defer trk.rw.RUnlock() return OpStatusToString(trk.current) } + +type opAdditionalInfo struct { + syncutil.RWMutex + value map[string]string +} + +// SetAdditionalInfo sets additional info with key and value. +func (o *Operator) SetAdditionalInfo(key string, value string) { + o.additionalInfos.Lock() + defer o.additionalInfos.Unlock() + o.additionalInfos.value[key] = value +} + +// GetAdditionalInfo returns additional info with key. +func (o *Operator) GetAdditionalInfo(key string) string { + o.additionalInfos.RLock() + defer o.additionalInfos.RUnlock() + return o.additionalInfos.value[key] +} + +// LogAdditionalInfo returns additional info with string +func (o *Operator) LogAdditionalInfo() string { + o.additionalInfos.RLock() + defer o.additionalInfos.RUnlock() + if len(o.additionalInfos.value) != 0 { + additionalInfo, err := json.Marshal(o.additionalInfos.value) + if err == nil { + return string(additionalInfo) + } + } + return "" +} diff --git a/pkg/schedule/operator/status_tracker_test.go b/pkg/schedule/operator/status_tracker_test.go index e53b017229a..8c897d1e545 100644 --- a/pkg/schedule/operator/status_tracker_test.go +++ b/pkg/schedule/operator/status_tracker_test.go @@ -15,6 +15,8 @@ package operator import ( + "fmt" + "sync" "testing" "time" @@ -178,3 +180,26 @@ func checkReachTime(re *require.Assertions, trk *OpStatusTracker, reached ...OpS re.True(trk.ReachTimeOf(st).IsZero()) } } + +func TestAdditionalInfoConcurrent(t *testing.T) { + op := NewOperator("test", "test", 0, nil, OpAdmin, 0) + + var wg sync.WaitGroup + for i := 0; i < 1000; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + key := fmt.Sprintf("key%d", i) + value := fmt.Sprintf("value%d", i) + op.SetAdditionalInfo(key, value) + if op.GetAdditionalInfo(key) != value { + t.Errorf("unexpected value for key %s", key) + } + }(i) + } + wg.Wait() + + if logInfo := op.LogAdditionalInfo(); logInfo == "" { + t.Error("LogAdditionalInfo returned an empty string") + } +} diff --git a/pkg/schedule/operator/step.go b/pkg/schedule/operator/step.go index 6f14cbb326b..04e41028865 100644 --- a/pkg/schedule/operator/step.go +++ b/pkg/schedule/operator/step.go @@ -70,7 +70,7 @@ type TransferLeader struct { } // ConfVerChanged returns the delta value for version increased by this step. -func (tl TransferLeader) ConfVerChanged(_ *core.RegionInfo) uint64 { +func (TransferLeader) ConfVerChanged(_ *core.RegionInfo) uint64 { return 0 // transfer leader never change the conf version } @@ -122,12 +122,12 @@ func (tl TransferLeader) Influence(opInfluence OpInfluence, region *core.RegionI } // Timeout returns duration that current step may take. -func (tl TransferLeader) Timeout(regionSize int64) time.Duration { +func (TransferLeader) Timeout(regionSize int64) time.Duration { return fastStepWaitDuration(regionSize) } // GetCmd returns the schedule command for heartbeat response. -func (tl TransferLeader) GetCmd(region *core.RegionInfo, useConfChangeV2 bool) *hbstream.Operation { +func (tl TransferLeader) GetCmd(region *core.RegionInfo, _ bool) *hbstream.Operation { peers := make([]*metapb.Peer, 0, len(tl.ToStores)) for _, storeID := range tl.ToStores { peers = append(peers, region.GetStorePeer(storeID)) @@ -206,7 +206,7 @@ func (ap AddPeer) CheckInProgress(ci *core.BasicCluster, config config.SharedCon } // Timeout returns duration that current step may take. -func (ap AddPeer) Timeout(regionSize int64) time.Duration { +func (AddPeer) Timeout(regionSize int64) time.Duration { return slowStepWaitDuration(regionSize) } @@ -270,7 +270,7 @@ func (bw BecomeWitness) Influence(opInfluence OpInfluence, region *core.RegionIn } // Timeout returns duration that current step may take. -func (bw BecomeWitness) Timeout(regionSize int64) time.Duration { +func (BecomeWitness) Timeout(regionSize int64) time.Duration { return fastStepWaitDuration(regionSize) } @@ -338,12 +338,12 @@ func (bn BecomeNonWitness) Influence(opInfluence OpInfluence, region *core.Regio } // Timeout returns duration that current step may take. -func (bn BecomeNonWitness) Timeout(regionSize int64) time.Duration { +func (BecomeNonWitness) Timeout(regionSize int64) time.Duration { return slowStepWaitDuration(regionSize) } // GetCmd returns the schedule command for heartbeat response. -func (bn BecomeNonWitness) GetCmd(region *core.RegionInfo, useConfChangeV2 bool) *hbstream.Operation { +func (bn BecomeNonWitness) GetCmd(*core.RegionInfo, bool) *hbstream.Operation { return switchWitness(bn.PeerID, false) } @@ -518,7 +518,7 @@ func (al AddLearner) Influence(opInfluence OpInfluence, region *core.RegionInfo) } // Timeout returns duration that current step may take. -func (al AddLearner) Timeout(regionSize int64) time.Duration { +func (AddLearner) Timeout(regionSize int64) time.Duration { return slowStepWaitDuration(regionSize) } @@ -565,7 +565,7 @@ func (pl PromoteLearner) IsFinish(region *core.RegionInfo) bool { } // CheckInProgress checks if the step is in the progress of advancing. -func (pl PromoteLearner) CheckInProgress(_ *core.BasicCluster, config config.SharedConfigProvider, region *core.RegionInfo) error { +func (pl PromoteLearner) CheckInProgress(_ *core.BasicCluster, _ config.SharedConfigProvider, region *core.RegionInfo) error { peer := region.GetStorePeer(pl.ToStore) if peer.GetId() != pl.PeerID { return errors.New("peer does not exist") @@ -574,10 +574,10 @@ func (pl PromoteLearner) CheckInProgress(_ *core.BasicCluster, config config.Sha } // Influence calculates the store difference that current step makes. -func (pl PromoteLearner) Influence(_ OpInfluence, _ *core.RegionInfo) {} +func (PromoteLearner) Influence(OpInfluence, *core.RegionInfo) {} // Timeout returns duration that current step may take. -func (pl PromoteLearner) Timeout(regionSize int64) time.Duration { +func (PromoteLearner) Timeout(regionSize int64) time.Duration { return fastStepWaitDuration(regionSize) } @@ -617,7 +617,7 @@ func (rp RemovePeer) IsFinish(region *core.RegionInfo) bool { } // CheckInProgress checks if the step is in the progress of advancing. -func (rp RemovePeer) CheckInProgress(_ *core.BasicCluster, config config.SharedConfigProvider, region *core.RegionInfo) error { +func (rp RemovePeer) CheckInProgress(_ *core.BasicCluster, _ config.SharedConfigProvider, region *core.RegionInfo) error { if rp.FromStore == region.GetLeader().GetStoreId() { return errors.New("cannot remove leader peer") } @@ -648,7 +648,7 @@ func (rp RemovePeer) Influence(opInfluence OpInfluence, region *core.RegionInfo) } // Timeout returns duration that current step may take. -func (rp RemovePeer) Timeout(regionSize int64) time.Duration { +func (RemovePeer) Timeout(regionSize int64) time.Duration { return fastStepWaitDuration(regionSize) } @@ -674,7 +674,7 @@ type MergeRegion struct { } // ConfVerChanged returns the delta value for version increased by this step. -func (mr MergeRegion) ConfVerChanged(_ *core.RegionInfo) uint64 { +func (MergeRegion) ConfVerChanged(*core.RegionInfo) uint64 { return 0 } @@ -691,7 +691,7 @@ func (mr MergeRegion) IsFinish(region *core.RegionInfo) bool { } // CheckInProgress checks if the step is in the progress of advancing. -func (mr MergeRegion) CheckInProgress(_ *core.BasicCluster, config config.SharedConfigProvider, _ *core.RegionInfo) error { +func (MergeRegion) CheckInProgress(*core.BasicCluster, config.SharedConfigProvider, *core.RegionInfo) error { return nil } @@ -710,12 +710,12 @@ func (mr MergeRegion) Influence(opInfluence OpInfluence, region *core.RegionInfo // Timeout returns duration that current step may take. // The merge step need more time to finish but less than slow step. -func (mr MergeRegion) Timeout(regionSize int64) time.Duration { +func (MergeRegion) Timeout(regionSize int64) time.Duration { return fastStepWaitDuration(regionSize) * 10 } // GetCmd returns the schedule command for heartbeat response. -func (mr MergeRegion) GetCmd(region *core.RegionInfo, useConfChangeV2 bool) *hbstream.Operation { +func (mr MergeRegion) GetCmd(*core.RegionInfo, bool) *hbstream.Operation { if mr.IsPassive { return nil } @@ -734,7 +734,7 @@ type SplitRegion struct { } // ConfVerChanged returns the delta value for version increased by this step. -func (sr SplitRegion) ConfVerChanged(_ *core.RegionInfo) uint64 { +func (SplitRegion) ConfVerChanged(*core.RegionInfo) uint64 { return 0 } @@ -748,7 +748,7 @@ func (sr SplitRegion) IsFinish(region *core.RegionInfo) bool { } // Influence calculates the store difference that current step makes. -func (sr SplitRegion) Influence(opInfluence OpInfluence, region *core.RegionInfo) { +func (SplitRegion) Influence(opInfluence OpInfluence, region *core.RegionInfo) { for _, peer := range region.GetPeers() { inf := opInfluence.GetStoreInfluence(peer.GetStoreId()) inf.RegionCount++ @@ -759,17 +759,17 @@ func (sr SplitRegion) Influence(opInfluence OpInfluence, region *core.RegionInfo } // CheckInProgress checks if the step is in the progress of advancing. -func (sr SplitRegion) CheckInProgress(_ *core.BasicCluster, config config.SharedConfigProvider, _ *core.RegionInfo) error { +func (SplitRegion) CheckInProgress(*core.BasicCluster, config.SharedConfigProvider, *core.RegionInfo) error { return nil } // Timeout returns duration that current step may take. -func (sr SplitRegion) Timeout(regionSize int64) time.Duration { +func (SplitRegion) Timeout(regionSize int64) time.Duration { return fastStepWaitDuration(regionSize) } // GetCmd returns the schedule command for heartbeat response. -func (sr SplitRegion) GetCmd(region *core.RegionInfo, useConfChangeV2 bool) *hbstream.Operation { +func (sr SplitRegion) GetCmd(*core.RegionInfo, bool) *hbstream.Operation { return &hbstream.Operation{ SplitRegion: &pdpb.SplitRegion{ Policy: sr.Policy, @@ -814,7 +814,7 @@ func (dv DemoteVoter) IsFinish(region *core.RegionInfo) bool { } // Timeout returns duration that current step may take. -func (dv DemoteVoter) Timeout(regionSize int64) time.Duration { +func (DemoteVoter) Timeout(regionSize int64) time.Duration { return fastStepWaitDuration(regionSize) } @@ -884,7 +884,7 @@ func (cpe ChangePeerV2Enter) IsFinish(region *core.RegionInfo) bool { } // CheckInProgress checks if the step is in the progress of advancing. -func (cpe ChangePeerV2Enter) CheckInProgress(_ *core.BasicCluster, config config.SharedConfigProvider, region *core.RegionInfo) error { +func (cpe ChangePeerV2Enter) CheckInProgress(_ *core.BasicCluster, _ config.SharedConfigProvider, region *core.RegionInfo) error { inJointState, notInJointState := false, false for _, pl := range cpe.PromoteLearners { peer := region.GetStorePeer(pl.ToStore) @@ -932,7 +932,7 @@ func (cpe ChangePeerV2Enter) CheckInProgress(_ *core.BasicCluster, config config } // Influence calculates the store difference that current step makes. -func (cpe ChangePeerV2Enter) Influence(_ OpInfluence, _ *core.RegionInfo) {} +func (ChangePeerV2Enter) Influence(OpInfluence, *core.RegionInfo) {} // Timeout returns duration that current step may take. func (cpe ChangePeerV2Enter) Timeout(regionSize int64) time.Duration { @@ -1013,7 +1013,7 @@ func (cpl ChangePeerV2Leave) IsFinish(region *core.RegionInfo) bool { } // CheckInProgress checks if the step is in the progress of advancing. -func (cpl ChangePeerV2Leave) CheckInProgress(_ *core.BasicCluster, config config.SharedConfigProvider, region *core.RegionInfo) error { +func (cpl ChangePeerV2Leave) CheckInProgress(_ *core.BasicCluster, _ config.SharedConfigProvider, region *core.RegionInfo) error { inJointState, notInJointState, demoteLeader := false, false, false leaderStoreID := region.GetLeader().GetStoreId() @@ -1072,7 +1072,7 @@ func (cpl ChangePeerV2Leave) CheckInProgress(_ *core.BasicCluster, config config } // Influence calculates the store difference that current step makes. -func (cpl ChangePeerV2Leave) Influence(_ OpInfluence, _ *core.RegionInfo) {} +func (ChangePeerV2Leave) Influence(OpInfluence, *core.RegionInfo) {} // Timeout returns duration that current step may take. func (cpl ChangePeerV2Leave) Timeout(regionSize int64) time.Duration { @@ -1081,7 +1081,7 @@ func (cpl ChangePeerV2Leave) Timeout(regionSize int64) time.Duration { } // GetCmd returns the schedule command for heartbeat response. -func (cpl ChangePeerV2Leave) GetCmd(region *core.RegionInfo, useConfChangeV2 bool) *hbstream.Operation { +func (ChangePeerV2Leave) GetCmd(_ *core.RegionInfo, useConfChangeV2 bool) *hbstream.Operation { if !useConfChangeV2 { // only supported in ChangePeerV2 return nil diff --git a/pkg/schedule/operator/step_test.go b/pkg/schedule/operator/step_test.go index f362d988f89..014703d00f9 100644 --- a/pkg/schedule/operator/step_test.go +++ b/pkg/schedule/operator/step_test.go @@ -40,7 +40,7 @@ type testCase struct { Peers []*metapb.Peer // first is leader ConfVerChanged uint64 IsFinish bool - CheckInProgress func(err error, msgAndArgs ...interface{}) + CheckInProgress func(err error, msgAndArgs ...any) } func (suite *operatorStepTestSuite) SetupTest() { diff --git a/pkg/schedule/operator/waiting_operator.go b/pkg/schedule/operator/waiting_operator.go index 8f5c72b053b..f75dcf25cd8 100644 --- a/pkg/schedule/operator/waiting_operator.go +++ b/pkg/schedule/operator/waiting_operator.go @@ -16,6 +16,8 @@ package operator import ( "math/rand" + + "github.com/tikv/pd/pkg/utils/syncutil" ) // priorityWeight is used to represent the weight of different priorities of operators. @@ -24,6 +26,7 @@ var priorityWeight = []float64{1.0, 4.0, 9.0, 16.0} // WaitingOperator is an interface of waiting operators. type WaitingOperator interface { PutOperator(op *Operator) + PutMergeOperators(op []*Operator) GetOperator() []*Operator ListOperator() []*Operator } @@ -36,6 +39,7 @@ type bucket struct { // randBuckets is an implementation of waiting operators type randBuckets struct { + mu syncutil.Mutex totalWeight float64 buckets []*bucket } @@ -53,6 +57,8 @@ func newRandBuckets() *randBuckets { // PutOperator puts an operator into the random buckets. func (b *randBuckets) PutOperator(op *Operator) { + b.mu.Lock() + defer b.mu.Unlock() priority := op.GetPriorityLevel() bucket := b.buckets[priority] if len(bucket.ops) == 0 { @@ -61,8 +67,25 @@ func (b *randBuckets) PutOperator(op *Operator) { bucket.ops = append(bucket.ops, op) } +// PutMergeOperators puts two operators into the random buckets. +func (b *randBuckets) PutMergeOperators(ops []*Operator) { + b.mu.Lock() + defer b.mu.Unlock() + if len(ops) != 2 && (ops[0].Kind()&OpMerge == 0 || ops[1].Kind()&OpMerge == 0) { + return + } + priority := ops[0].GetPriorityLevel() + bucket := b.buckets[priority] + if len(bucket.ops) == 0 { + b.totalWeight += bucket.weight + } + bucket.ops = append(bucket.ops, ops...) +} + // ListOperator lists all operator in the random buckets. func (b *randBuckets) ListOperator() []*Operator { + b.mu.Lock() + defer b.mu.Unlock() var ops []*Operator for i := range b.buckets { bucket := b.buckets[i] @@ -73,6 +96,8 @@ func (b *randBuckets) ListOperator() []*Operator { // GetOperator gets an operator from the random buckets. func (b *randBuckets) GetOperator() []*Operator { + b.mu.Lock() + defer b.mu.Unlock() if b.totalWeight == 0 { return nil } @@ -106,12 +131,34 @@ func (b *randBuckets) GetOperator() []*Operator { // waitingOperatorStatus is used to limit the count of each kind of operators. type waitingOperatorStatus struct { + mu syncutil.Mutex ops map[string]uint64 } // newWaitingOperatorStatus creates a new waitingOperatorStatus. func newWaitingOperatorStatus() *waitingOperatorStatus { return &waitingOperatorStatus{ - make(map[string]uint64), + ops: make(map[string]uint64), } } + +// incCount increments the count of the given operator kind. +func (s *waitingOperatorStatus) incCount(kind string) { + s.mu.Lock() + defer s.mu.Unlock() + s.ops[kind]++ +} + +// decCount decrements the count of the given operator kind. +func (s *waitingOperatorStatus) decCount(kind string) { + s.mu.Lock() + defer s.mu.Unlock() + s.ops[kind]-- +} + +// getCount returns the count of the given operator kind. +func (s *waitingOperatorStatus) getCount(kind string) uint64 { + s.mu.Lock() + defer s.mu.Unlock() + return s.ops[kind] +} diff --git a/pkg/schedule/placement/config.go b/pkg/schedule/placement/config.go index 00c0f94b94e..53cb0636536 100644 --- a/pkg/schedule/placement/config.go +++ b/pkg/schedule/placement/config.go @@ -180,7 +180,7 @@ func (p *RuleConfigPatch) commit() { p.c.adjust() } -func jsonEquals(a, b interface{}) bool { +func jsonEquals(a, b any) bool { aa, _ := json.Marshal(a) bb, _ := json.Marshal(b) return bytes.Equal(aa, bb) diff --git a/pkg/schedule/placement/fit.go b/pkg/schedule/placement/fit.go index d907bcd011a..30530462664 100644 --- a/pkg/schedule/placement/fit.go +++ b/pkg/schedule/placement/fit.go @@ -314,8 +314,8 @@ func pickPeersFromBinaryInt(candidates []*fitPeer, binaryNumber uint) []*fitPeer return selected } -func unSelectPeers(seleted []*fitPeer) { - for _, p := range seleted { +func unSelectPeers(selected []*fitPeer) { + for _, p := range selected { p.selected = false } } diff --git a/pkg/schedule/placement/fit_region_test.go b/pkg/schedule/placement/fit_region_test.go index 5bc62d9cc12..2006801e71a 100644 --- a/pkg/schedule/placement/fit_region_test.go +++ b/pkg/schedule/placement/fit_region_test.go @@ -299,26 +299,26 @@ func BenchmarkFitRegionWithMoreRulesAndStoreLabels(b *testing.B) { values := []string{} for id := 1; id < 100; id++ { values = append(values, fmt.Sprintf("value_%08d", id)) - labelContaint := LabelConstraint{ + labelConstraint := LabelConstraint{ Key: fmt.Sprintf("key_%08d", id), Op: NotIn, Values: values, } - rule.LabelConstraints = append(rule.LabelConstraints, labelContaint) + rule.LabelConstraints = append(rule.LabelConstraints, labelConstraint) } - // add an exclusive containt. + // add an exclusive constraint. values = append(values, "exclusive") - labelContaint := LabelConstraint{ + labelConstraint := LabelConstraint{ Key: "exclusive", Op: In, Values: values, } - rule.LabelConstraints = append(rule.LabelConstraints, labelContaint) + rule.LabelConstraints = append(rule.LabelConstraints, labelConstraint) rules = append(rules, rule) } - // create stores, with each stores has 101 normal labels(1 exclusive label). + // create stores, with each store has 101 normal labels(1 exclusive label). lists := make([]*core.StoreInfo, 0) - labels := []*metapb.StoreLabel{} + labels := make([]*metapb.StoreLabel, 0, 101) for labID := 0; labID < 100; labID++ { label := &metapb.StoreLabel{Key: fmt.Sprintf("store_%08d", labID), Value: fmt.Sprintf("value_%08d", labID)} labels = append(labels, label) @@ -349,7 +349,7 @@ func BenchmarkFitRegionWithMoreRulesAndStoreLabels(b *testing.B) { func BenchmarkFitRegionWithLocationLabels(b *testing.B) { region := mockRegion(5, 5) - rules := []*Rule{} + var rules []*Rule rule := &Rule{ GroupID: DefaultGroupID, ID: "followers", diff --git a/pkg/schedule/placement/fit_test.go b/pkg/schedule/placement/fit_test.go index 286dbcdacd5..cc49d25640c 100644 --- a/pkg/schedule/placement/fit_test.go +++ b/pkg/schedule/placement/fit_test.go @@ -47,7 +47,7 @@ func makeStores() StoreSet { if id == 1111 || id == 2111 || id == 3111 { labels["disk"] = "ssd" } - stores.SetStore(core.NewStoreInfoWithLabel(id, labels).Clone(core.SetLastHeartbeatTS(now))) + stores.PutStore(core.NewStoreInfoWithLabel(id, labels).Clone(core.SetLastHeartbeatTS(now))) } } } @@ -151,7 +151,7 @@ func TestReplace(t *testing.T) { } for _, tc := range testCases { region := makeRegion(tc.region) - var rules []*Rule + rules := make([]*Rule, 0, len(tc.rules)) for _, r := range tc.rules { rules = append(rules, makeRule(r)) } @@ -196,7 +196,7 @@ func TestFitRegion(t *testing.T) { for _, testCase := range testCases { region := makeRegion(testCase.region) - var rules []*Rule + rules := make([]*Rule, 0, len(testCase.rules)) for _, r := range testCase.rules { rules = append(rules, makeRule(r)) } @@ -215,7 +215,7 @@ func TestIsolationScore(t *testing.T) { as := assert.New(t) stores := makeStores() testCases := []struct { - checker func(interface{}, interface{}, ...interface{}) bool + checker func(any, any, ...any) bool peers1 []uint64 peers2 []uint64 }{ diff --git a/pkg/schedule/placement/region_rule_cache_test.go b/pkg/schedule/placement/region_rule_cache_test.go index 835203bed26..e951ea10cc5 100644 --- a/pkg/schedule/placement/region_rule_cache_test.go +++ b/pkg/schedule/placement/region_rule_cache_test.go @@ -226,7 +226,7 @@ func (manager *RegionRuleFitCacheManager) mockRegionRuleFitCache(region *core.Re } } -// nolint +// nolint:unparam func mockStores(num int) []*core.StoreInfo { stores := make([]*core.StoreInfo, 0, num) now := time.Now() @@ -237,7 +237,6 @@ func mockStores(num int) []*core.StoreInfo { return stores } -// nolint func mockStoresNoHeartbeat(num int) []*core.StoreInfo { stores := make([]*core.StoreInfo, 0, num) for i := 1; i <= num; i++ { diff --git a/pkg/schedule/placement/rule.go b/pkg/schedule/placement/rule.go index 75ccd509ee8..07054b7b1cd 100644 --- a/pkg/schedule/placement/rule.go +++ b/pkg/schedule/placement/rule.go @@ -90,7 +90,7 @@ func (r *Rule) String() string { // Clone returns a copy of Rule. func (r *Rule) Clone() *Rule { var clone Rule - json.Unmarshal([]byte(r.String()), &clone) + _ = json.Unmarshal([]byte(r.String()), &clone) clone.StartKey = append(r.StartKey[:0:0], r.StartKey...) clone.EndKey = append(r.EndKey[:0:0], r.EndKey...) return &clone diff --git a/pkg/schedule/placement/rule_list.go b/pkg/schedule/placement/rule_list.go index 9c43bb91627..73b2f5271a1 100644 --- a/pkg/schedule/placement/rule_list.go +++ b/pkg/schedule/placement/rule_list.go @@ -66,7 +66,7 @@ type ruleContainer interface { // rules indicates the map (rule's GroupID, ID) => rule func buildRuleList(rules ruleContainer) (ruleList, error) { builder := rangelist.NewBuilder() - builder.SetCompareFunc(func(a, b interface{}) int { + builder.SetCompareFunc(func(a, b any) int { return compareRule(a.(*Rule), b.(*Rule)) }) rules.iterateRules(func(r *Rule) { diff --git a/pkg/schedule/placement/rule_test.go b/pkg/schedule/placement/rule_test.go index 75d7bab23c9..c7b8dd97ef6 100644 --- a/pkg/schedule/placement/rule_test.go +++ b/pkg/schedule/placement/rule_test.go @@ -186,3 +186,30 @@ func TestBuildRuleList(t *testing.T) { re.Equal(testCase.expect.ranges, result.ranges) } } + +// startKey and endKey are json:"-" which means cannot be Unmarshal from json +// We need to take care of `Clone` method. +func TestRuleKeyClone(t *testing.T) { + re := require.New(t) + r := &Rule{ + StartKey: []byte{1, 2, 3}, + EndKey: []byte{4, 5, 6}, + } + + clone := r.Clone() + // Modify the original rule + r.StartKey[0] = 9 + r.EndKey[0] = 9 + + // The clone should not be affected + re.Equal([]byte{1, 2, 3}, clone.StartKey) + re.Equal([]byte{4, 5, 6}, clone.EndKey) + + // Modify the clone + clone.StartKey[0] = 8 + clone.EndKey[0] = 8 + + // The original rule should not be affected + re.Equal([]byte{9, 2, 3}, r.StartKey) + re.Equal([]byte{9, 5, 6}, r.EndKey) +} diff --git a/pkg/schedule/plan/balance_plan.go b/pkg/schedule/plan/balance_plan.go index 819a00f94e7..57396ffc80d 100644 --- a/pkg/schedule/plan/balance_plan.go +++ b/pkg/schedule/plan/balance_plan.go @@ -51,7 +51,7 @@ func (p *BalanceSchedulerPlan) GetStep() int { } // SetResource is used to set resource for current step. -func (p *BalanceSchedulerPlan) SetResource(resource interface{}) { +func (p *BalanceSchedulerPlan) SetResource(resource any) { switch p.Step { // for balance-region/leader scheduler, the first step is selecting stores as source candidates. case pickSource: @@ -66,7 +66,7 @@ func (p *BalanceSchedulerPlan) SetResource(resource interface{}) { } // SetResourceWithStep is used to set resource for specific step. -func (p *BalanceSchedulerPlan) SetResourceWithStep(resource interface{}, step int) { +func (p *BalanceSchedulerPlan) SetResourceWithStep(resource any, step int) { p.Step = step p.SetResource(resource) } diff --git a/pkg/schedule/plan/plan.go b/pkg/schedule/plan/plan.go index fcd5102012c..8a389b9b9e8 100644 --- a/pkg/schedule/plan/plan.go +++ b/pkg/schedule/plan/plan.go @@ -22,11 +22,11 @@ type Plan interface { GetResource(int) uint64 Clone(ops ...Option) Plan // generate plan for clone option - SetResource(interface{}) + SetResource(any) // SetResourceWithStep is used to set resource for specific step. // The meaning of step is different for different plans. // Such as balancePlan, pickSource = 0, pickRegion = 1, pickTarget = 2 - SetResourceWithStep(resource interface{}, step int) + SetResourceWithStep(resource any, step int) SetStatus(*Status) } @@ -82,14 +82,14 @@ func SetStatus(status *Status) Option { } // SetResource is used to generate Resource for plan -func SetResource(resource interface{}) Option { +func SetResource(resource any) Option { return func(plan Plan) { plan.SetResource(resource) } } // SetResourceWithStep is used to generate Resource for plan -func SetResourceWithStep(resource interface{}, step int) Option { +func SetResourceWithStep(resource any, step int) Option { return func(plan Plan) { plan.SetResourceWithStep(resource, step) } diff --git a/pkg/schedule/plan/status.go b/pkg/schedule/plan/status.go index 4242b631493..636b9ceaaca 100644 --- a/pkg/schedule/plan/status.go +++ b/pkg/schedule/plan/status.go @@ -49,7 +49,7 @@ const ( const ( // StatusStoreRejectLeader represents the store is restricted by the special configuration. e.g. reject label setting, evict leader/slow store scheduler. StatusStoreRejectLeader = iota + 300 - // StatusNotMatchIsolation represents the isolation cannot satisfy the requirement. + // StatusStoreNotMatchIsolation represents the isolation cannot satisfy the requirement. StatusStoreNotMatchIsolation ) @@ -189,7 +189,7 @@ func (s *Status) String() string { return StatusText(s.StatusCode) } -// IsNormal returns true if the status is noraml. +// IsNormal returns true if the status is normal. func (s *Status) IsNormal() bool { return int(s.StatusCode)/10 == 10 } diff --git a/pkg/schedule/rangelist/builder.go b/pkg/schedule/rangelist/builder.go index 92977d698e0..cb178585cb6 100644 --- a/pkg/schedule/rangelist/builder.go +++ b/pkg/schedule/rangelist/builder.go @@ -30,7 +30,7 @@ const ( type splitPoint struct { ty splitPointType key []byte - data interface{} + data any } // Builder is used to create key range list. @@ -45,12 +45,12 @@ func NewBuilder() *Builder { } // SetCompareFunc sets up the comparer to determine item order (ascending) for a key range. -func (b *Builder) SetCompareFunc(f func(a, b interface{}) int) { +func (b *Builder) SetCompareFunc(f func(a, b any) int) { b.compare = f } // AddItem pushes an item to key range list. -func (b *Builder) AddItem(start, end []byte, data interface{}) { +func (b *Builder) AddItem(start, end []byte, data any) { b.splitPoints = append(b.splitPoints, splitPoint{ty: tStart, key: start, data: data}) if len(end) > 0 { b.splitPoints = append(b.splitPoints, splitPoint{ty: tEnd, key: end, data: data}) @@ -59,10 +59,10 @@ func (b *Builder) AddItem(start, end []byte, data interface{}) { // An item slice that keeps items in ascending order. type sortedItems struct { - items []interface{} + items []any } -func (si *sortedItems) insertItem(item interface{}, comparer compareFunc) { +func (si *sortedItems) insertItem(item any, comparer compareFunc) { pos := len(si.items) if comparer != nil { pos = sort.Search(len(si.items), func(i int) bool { @@ -77,7 +77,7 @@ func (si *sortedItems) insertItem(item interface{}, comparer compareFunc) { si.items[pos] = item } -func (si *sortedItems) deleteItem(del interface{}) { +func (si *sortedItems) deleteItem(del any) { for i, item := range si.items { if item == del { si.items = append(si.items[:i], si.items[i+1:]...) diff --git a/pkg/schedule/rangelist/range_list.go b/pkg/schedule/rangelist/range_list.go index dce7d756f79..45e179520f3 100644 --- a/pkg/schedule/rangelist/range_list.go +++ b/pkg/schedule/rangelist/range_list.go @@ -19,11 +19,11 @@ import ( "sort" ) -type compareFunc func(a, b interface{}) int +type compareFunc func(a, b any) int type segment struct { startKey []byte - data []interface{} + data []any } // List manages a list of key ranges. @@ -37,12 +37,12 @@ func (l List) Len() int { } // Get returns key and items at the position. -func (l List) Get(i int) ([]byte, []interface{}) { +func (l List) Get(i int) ([]byte, []any) { return l.segments[i].startKey, l.segments[i].data } // GetDataByKey returns position and items by key. -func (l List) GetDataByKey(key []byte) (index int, data []interface{}) { +func (l List) GetDataByKey(key []byte) (index int, data []any) { i := sort.Search(len(l.segments), func(i int) bool { return bytes.Compare(l.segments[i].startKey, key) > 0 }) @@ -53,7 +53,7 @@ func (l List) GetDataByKey(key []byte) (index int, data []interface{}) { } // GetData returns position and items by key range. -func (l List) GetData(start, end []byte) (index int, data []interface{}) { +func (l List) GetData(start, end []byte) (index int, data []any) { i := sort.Search(len(l.segments), func(i int) bool { return bytes.Compare(l.segments[i].startKey, start) > 0 }) diff --git a/pkg/schedule/rangelist/range_list_test.go b/pkg/schedule/rangelist/range_list_test.go index d8517705153..5baa7f16f28 100644 --- a/pkg/schedule/rangelist/range_list_test.go +++ b/pkg/schedule/rangelist/range_list_test.go @@ -41,20 +41,20 @@ func TestRangeList(t *testing.T) { key, data := rl.Get(0) re.Nil(key) - re.Equal([]interface{}{1}, data) + re.Equal([]any{1}, data) i, data = rl.GetDataByKey([]byte("foo")) re.Equal(0, i) - re.Equal([]interface{}{1}, data) + re.Equal([]any{1}, data) i, data = rl.GetData([]byte("a"), []byte("b")) re.Equal(0, i) - re.Equal([]interface{}{1}, data) + re.Equal([]any{1}, data) re.Nil(rl.GetSplitKeys(nil, []byte("foo"))) } func TestRangeList2(t *testing.T) { re := require.New(t) b := NewBuilder() - b.SetCompareFunc(func(a, b interface{}) int { + b.SetCompareFunc(func(a, b any) int { if a.(int) > b.(int) { return 1 } @@ -80,7 +80,7 @@ func TestRangeList2(t *testing.T) { expectKeys := [][]byte{ {}, {'a'}, {'b'}, {'c'}, {'d'}, {'e'}, {'f'}, {'g'}, {'h'}, {'i'}, } - expectData := [][]interface{}{ + expectData := [][]any{ {2, 3}, {2, 3, 4}, {4}, {1, 4}, {4}, {}, {3}, {2, 3}, {2}, {}, } diff --git a/pkg/schedule/scatter/region_scatterer.go b/pkg/schedule/scatter/region_scatterer.go index 898c4d052a7..100b9eb764d 100644 --- a/pkg/schedule/scatter/region_scatterer.go +++ b/pkg/schedule/scatter/region_scatterer.go @@ -255,7 +255,7 @@ func (r *RegionScatterer) scatterRegions(regions map[uint64]*core.RegionInfo, fa continue } failpoint.Inject("scatterHbStreamsDrain", func() { - r.opController.GetHBStreams().Drain(1) + _ = r.opController.GetHBStreams().Drain(1) r.opController.RemoveOperator(op, operator.AdminStop) }) } @@ -399,8 +399,8 @@ func (r *RegionScatterer) scatterRegion(region *core.RegionInfo, group string, s if op != nil { scatterSuccessCounter.Inc() r.Put(targetPeers, targetLeader, group) - op.AdditionalInfos["group"] = group - op.AdditionalInfos["leader-picked-count"] = strconv.FormatUint(leaderStorePickedCount, 10) + op.SetAdditionalInfo("group", group) + op.SetAdditionalInfo("leader-picked-count", strconv.FormatUint(leaderStorePickedCount, 10)) op.SetPriorityLevel(constant.High) } return op, nil diff --git a/pkg/schedule/scatter/region_scatterer_test.go b/pkg/schedule/scatter/region_scatterer_test.go index af41ed04b76..89e55e5c9c7 100644 --- a/pkg/schedule/scatter/region_scatterer_test.go +++ b/pkg/schedule/scatter/region_scatterer_test.go @@ -216,7 +216,7 @@ func scatterSpecial(re *require.Assertions, numOrdinaryStores, numSpecialStores, leaderStoreID := region.GetLeader().GetStoreId() for _, peer := range region.GetPeers() { storeID := peer.GetStoreId() - store := tc.Stores.GetStore(storeID) + store := tc.GetStore(storeID) if store.GetLabelValue("engine") == "tiflash" { countSpecialPeers[storeID]++ } else { @@ -679,7 +679,7 @@ func TestSelectedStoresTooFewPeers(t *testing.T) { re.NoError(err) re.False(isPeerCountChanged(op)) if op != nil { - re.Equal(group, op.AdditionalInfos["group"]) + re.Equal(group, op.GetAdditionalInfo("group")) } } } diff --git a/pkg/schedule/schedulers/OWNERS b/pkg/schedule/schedulers/OWNERS new file mode 100644 index 00000000000..ae96e4f1f42 --- /dev/null +++ b/pkg/schedule/schedulers/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs at https://go.k8s.io/owners +options: + no_parent_owners: true +filters: + "(OWNERS|hot_region_config\\.go)$": + approvers: + - sig-critical-approvers-config diff --git a/pkg/schedule/schedulers/balance_leader.go b/pkg/schedule/schedulers/balance_leader.go index eb94752944b..910ed86c752 100644 --- a/pkg/schedule/schedulers/balance_leader.go +++ b/pkg/schedule/schedulers/balance_leader.go @@ -74,7 +74,7 @@ type balanceLeaderSchedulerConfig struct { Batch int `json:"batch"` } -func (conf *balanceLeaderSchedulerConfig) Update(data []byte) (int, interface{}) { +func (conf *balanceLeaderSchedulerConfig) Update(data []byte) (int, any) { conf.Lock() defer conf.Unlock() @@ -93,7 +93,7 @@ func (conf *balanceLeaderSchedulerConfig) Update(data []byte) (int, interface{}) log.Info("balance-leader-scheduler config is updated", zap.ByteString("old", oldConfig), zap.ByteString("new", newConfig)) return http.StatusOK, "Config is updated." } - m := make(map[string]interface{}) + m := make(map[string]any) if err := json.Unmarshal(data, &m); err != nil { return http.StatusInternalServerError, err.Error() } @@ -164,7 +164,7 @@ func (handler *balanceLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http handler.rd.JSON(w, httpCode, v) } -func (handler *balanceLeaderHandler) ListConfig(w http.ResponseWriter, r *http.Request) { +func (handler *balanceLeaderHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { conf := handler.config.Clone() handler.rd.JSON(w, http.StatusOK, conf) } @@ -208,6 +208,13 @@ func (l *balanceLeaderScheduler) ServeHTTP(w http.ResponseWriter, r *http.Reques // BalanceLeaderCreateOption is used to create a scheduler with an option. type BalanceLeaderCreateOption func(s *balanceLeaderScheduler) +// WithBalanceLeaderFilterCounterName sets the filter counter name for the scheduler. +func WithBalanceLeaderFilterCounterName(name string) BalanceLeaderCreateOption { + return func(s *balanceLeaderScheduler) { + s.filterCounter.SetScope(name) + } +} + // WithBalanceLeaderName sets the name for the scheduler. func WithBalanceLeaderName(name string) BalanceLeaderCreateOption { return func(s *balanceLeaderScheduler) { @@ -219,7 +226,7 @@ func (l *balanceLeaderScheduler) GetName() string { return l.name } -func (l *balanceLeaderScheduler) GetType() string { +func (*balanceLeaderScheduler) GetType() string { return BalanceLeaderType } @@ -354,6 +361,7 @@ func (l *balanceLeaderScheduler) Schedule(cluster sche.SchedulerCluster, dryRun if dryRun { collector = plan.NewCollector(basePlan) } + defer l.filterCounter.Flush() batch := l.conf.getBatch() balanceLeaderScheduleCounter.Inc() @@ -395,7 +403,6 @@ func (l *balanceLeaderScheduler) Schedule(cluster sche.SchedulerCluster, dryRun } } } - l.filterCounter.Flush() l.retryQuota.GC(append(sourceCandidate.stores, targetCandidate.stores...)) return result, collector.GetPlans() } @@ -553,7 +560,7 @@ func (l *balanceLeaderScheduler) createOperator(solver *solver, collector *plan. } solver.Step++ defer func() { solver.Step-- }() - op, err := operator.CreateTransferLeaderOperator(BalanceLeaderType, solver, solver.Region, solver.Region.GetLeader().GetStoreId(), solver.TargetStoreID(), []uint64{}, operator.OpLeader) + op, err := operator.CreateTransferLeaderOperator(BalanceLeaderType, solver, solver.Region, solver.TargetStoreID(), []uint64{}, operator.OpLeader) if err != nil { log.Debug("fail to create balance leader operator", errs.ZapError(err)) if collector != nil { @@ -567,7 +574,7 @@ func (l *balanceLeaderScheduler) createOperator(solver *solver, collector *plan. op.FinishedCounters = append(op.FinishedCounters, balanceDirectionCounter.WithLabelValues(l.GetName(), solver.SourceMetricLabel(), solver.TargetMetricLabel()), ) - op.AdditionalInfos["sourceScore"] = strconv.FormatFloat(solver.sourceScore, 'f', 2, 64) - op.AdditionalInfos["targetScore"] = strconv.FormatFloat(solver.targetScore, 'f', 2, 64) + op.SetAdditionalInfo("sourceScore", strconv.FormatFloat(solver.sourceScore, 'f', 2, 64)) + op.SetAdditionalInfo("targetScore", strconv.FormatFloat(solver.targetScore, 'f', 2, 64)) return op } diff --git a/pkg/schedule/schedulers/balance_region.go b/pkg/schedule/schedulers/balance_region.go index 1cef3a4615b..bfc1a236481 100644 --- a/pkg/schedule/schedulers/balance_region.go +++ b/pkg/schedule/schedulers/balance_region.go @@ -92,11 +92,18 @@ func WithBalanceRegionName(name string) BalanceRegionCreateOption { } } +// WithBalanceRegionFilterCounterName sets the filter counter name for the scheduler. +func WithBalanceRegionFilterCounterName(name string) BalanceRegionCreateOption { + return func(s *balanceRegionScheduler) { + s.filterCounter.SetScope(name) + } +} + func (s *balanceRegionScheduler) GetName() string { return s.conf.Name } -func (s *balanceRegionScheduler) GetType() string { +func (*balanceRegionScheduler) GetType() string { return BalanceRegionType } @@ -114,6 +121,7 @@ func (s *balanceRegionScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster func (s *balanceRegionScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { basePlan := plan.NewBalanceSchedulerPlan() + defer s.filterCounter.Flush() var collector *plan.Collector if dryRun { collector = plan.NewCollector(basePlan) @@ -217,7 +225,6 @@ func (s *balanceRegionScheduler) Schedule(cluster sche.SchedulerCluster, dryRun } s.retryQuota.Attenuate(solver.Source) } - s.filterCounter.Flush() s.retryQuota.GC(stores) return nil, collector.GetPlans() } @@ -278,8 +285,8 @@ func (s *balanceRegionScheduler) transferPeer(solver *solver, collector *plan.Co op.FinishedCounters = append(op.FinishedCounters, balanceDirectionCounter.WithLabelValues(s.GetName(), sourceLabel, targetLabel), ) - op.AdditionalInfos["sourceScore"] = strconv.FormatFloat(solver.sourceScore, 'f', 2, 64) - op.AdditionalInfos["targetScore"] = strconv.FormatFloat(solver.targetScore, 'f', 2, 64) + op.SetAdditionalInfo("sourceScore", strconv.FormatFloat(solver.sourceScore, 'f', 2, 64)) + op.SetAdditionalInfo("targetScore", strconv.FormatFloat(solver.targetScore, 'f', 2, 64)) return op } diff --git a/pkg/schedule/schedulers/balance_test.go b/pkg/schedule/schedulers/balance_test.go index 68332d7067e..26214ed5456 100644 --- a/pkg/schedule/schedulers/balance_test.go +++ b/pkg/schedule/schedulers/balance_test.go @@ -186,7 +186,7 @@ func TestTolerantRatio(t *testing.T) { kind constant.ScheduleKind expectTolerantResource func(constant.ScheduleKind) int64 }{ - {0, constant.ScheduleKind{Resource: constant.LeaderKind, Policy: constant.ByCount}, func(k constant.ScheduleKind) int64 { + {0, constant.ScheduleKind{Resource: constant.LeaderKind, Policy: constant.ByCount}, func(constant.ScheduleKind) int64 { return int64(leaderTolerantSizeRatio) }}, {0, constant.ScheduleKind{Resource: constant.LeaderKind, Policy: constant.BySize}, func(k constant.ScheduleKind) int64 { @@ -198,7 +198,7 @@ func TestTolerantRatio(t *testing.T) { {0, constant.ScheduleKind{Resource: constant.RegionKind, Policy: constant.BySize}, func(k constant.ScheduleKind) int64 { return int64(adjustTolerantRatio(tc, k) * float64(regionSize)) }}, - {10, constant.ScheduleKind{Resource: constant.LeaderKind, Policy: constant.ByCount}, func(k constant.ScheduleKind) int64 { + {10, constant.ScheduleKind{Resource: constant.LeaderKind, Policy: constant.ByCount}, func(constant.ScheduleKind) int64 { return int64(tc.GetScheduleConfig().TolerantSizeRatio) }}, {10, constant.ScheduleKind{Resource: constant.LeaderKind, Policy: constant.BySize}, func(k constant.ScheduleKind) int64 { @@ -697,7 +697,7 @@ func (suite *balanceLeaderRangeSchedulerTestSuite) TestReSortStores() { suite.tc.AddLeaderStore(4, 100) suite.tc.AddLeaderStore(5, 100) suite.tc.AddLeaderStore(6, 0) - stores := suite.tc.Stores.GetStores() + stores := suite.tc.GetStores() sort.Slice(stores, func(i, j int) bool { return stores[i].GetID() < stores[j].GetID() }) diff --git a/pkg/schedule/schedulers/balance_witness.go b/pkg/schedule/schedulers/balance_witness.go index 9994866ac50..aa97874409a 100644 --- a/pkg/schedule/schedulers/balance_witness.go +++ b/pkg/schedule/schedulers/balance_witness.go @@ -60,7 +60,7 @@ type balanceWitnessSchedulerConfig struct { Batch int `json:"batch"` } -func (conf *balanceWitnessSchedulerConfig) Update(data []byte) (int, interface{}) { +func (conf *balanceWitnessSchedulerConfig) Update(data []byte) (int, any) { conf.Lock() defer conf.Unlock() @@ -79,7 +79,7 @@ func (conf *balanceWitnessSchedulerConfig) Update(data []byte) (int, interface{} log.Info("balance-witness-scheduler config is updated", zap.ByteString("old", oldc), zap.ByteString("new", newc)) return http.StatusOK, "Config is updated." } - m := make(map[string]interface{}) + m := make(map[string]any) if err := json.Unmarshal(data, &m); err != nil { return http.StatusInternalServerError, err.Error() } @@ -150,7 +150,7 @@ func (handler *balanceWitnessHandler) UpdateConfig(w http.ResponseWriter, r *htt handler.rd.JSON(w, httpCode, v) } -func (handler *balanceWitnessHandler) ListConfig(w http.ResponseWriter, r *http.Request) { +func (handler *balanceWitnessHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { conf := handler.config.Clone() handler.rd.JSON(w, http.StatusOK, conf) } @@ -214,7 +214,7 @@ func (b *balanceWitnessScheduler) GetName() string { return b.name } -func (b *balanceWitnessScheduler) GetType() string { +func (*balanceWitnessScheduler) GetType() string { return BalanceWitnessType } @@ -378,7 +378,7 @@ func (b *balanceWitnessScheduler) createOperator(solver *solver, collector *plan b.counter.WithLabelValues("move-witness", solver.SourceMetricLabel()+"-out"), b.counter.WithLabelValues("move-witness", solver.TargetMetricLabel()+"-in"), ) - op.AdditionalInfos["sourceScore"] = strconv.FormatFloat(solver.sourceScore, 'f', 2, 64) - op.AdditionalInfos["targetScore"] = strconv.FormatFloat(solver.targetScore, 'f', 2, 64) + op.SetAdditionalInfo("sourceScore", strconv.FormatFloat(solver.sourceScore, 'f', 2, 64)) + op.SetAdditionalInfo("targetScore", strconv.FormatFloat(solver.targetScore, 'f', 2, 64)) return op } diff --git a/pkg/schedule/schedulers/base_scheduler.go b/pkg/schedule/schedulers/base_scheduler.go index f4c8c577767..f3772757ad3 100644 --- a/pkg/schedule/schedulers/base_scheduler.go +++ b/pkg/schedule/schedulers/base_scheduler.go @@ -68,32 +68,32 @@ func NewBaseScheduler(opController *operator.Controller) *BaseScheduler { return &BaseScheduler{OpController: opController} } -func (s *BaseScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { +func (*BaseScheduler) ServeHTTP(w http.ResponseWriter, _ *http.Request) { fmt.Fprintf(w, "not implements") } // GetMinInterval returns the minimal interval for the scheduler -func (s *BaseScheduler) GetMinInterval() time.Duration { +func (*BaseScheduler) GetMinInterval() time.Duration { return MinScheduleInterval } // EncodeConfig encode config for the scheduler -func (s *BaseScheduler) EncodeConfig() ([]byte, error) { +func (*BaseScheduler) EncodeConfig() ([]byte, error) { return EncodeConfig(nil) } // ReloadConfig reloads the config from the storage. // By default, the scheduler does not need to reload the config // if it doesn't support the dynamic configuration. -func (s *BaseScheduler) ReloadConfig() error { return nil } +func (*BaseScheduler) ReloadConfig() error { return nil } // GetNextInterval return the next interval for the scheduler -func (s *BaseScheduler) GetNextInterval(interval time.Duration) time.Duration { +func (*BaseScheduler) GetNextInterval(interval time.Duration) time.Duration { return intervalGrow(interval, MaxScheduleInterval, exponentialGrowth) } // PrepareConfig does some prepare work about config. -func (s *BaseScheduler) PrepareConfig(cluster sche.SchedulerCluster) error { return nil } +func (*BaseScheduler) PrepareConfig(sche.SchedulerCluster) error { return nil } // CleanConfig does some cleanup work about config. -func (s *BaseScheduler) CleanConfig(cluster sche.SchedulerCluster) {} +func (*BaseScheduler) CleanConfig(sche.SchedulerCluster) {} diff --git a/pkg/schedule/schedulers/diagnostic_recorder.go b/pkg/schedule/schedulers/diagnostic_recorder.go index b990bdc8f22..df57dbebe71 100644 --- a/pkg/schedule/schedulers/diagnostic_recorder.go +++ b/pkg/schedule/schedulers/diagnostic_recorder.go @@ -86,7 +86,7 @@ func (d *DiagnosticRecorder) GetLastResult() *DiagnosticResult { if d.results.Len() == 0 { return nil } - items := d.results.FromLastSameElems(func(i interface{}) (bool, string) { + items := d.results.FromLastSameElems(func(i any) (bool, string) { result, ok := i.(*DiagnosticResult) if result == nil { return ok, "" diff --git a/pkg/schedule/schedulers/evict_leader.go b/pkg/schedule/schedulers/evict_leader.go index d2759e47d98..3750834a82d 100644 --- a/pkg/schedule/schedulers/evict_leader.go +++ b/pkg/schedule/schedulers/evict_leader.go @@ -40,7 +40,7 @@ const ( EvictLeaderName = "evict-leader-scheduler" // EvictLeaderType is evict leader scheduler type. EvictLeaderType = "evict-leader" - // EvictLeaderBatchSize is the number of operators to to transfer + // EvictLeaderBatchSize is the number of operators to transfer // leaders by one scheduling EvictLeaderBatchSize = 3 lastStoreDeleteInfo = "The last store has been deleted" @@ -118,7 +118,7 @@ func (conf *evictLeaderSchedulerConfig) Persist() error { return conf.storage.SaveSchedulerConfig(name, data) } -func (conf *evictLeaderSchedulerConfig) getSchedulerName() string { +func (*evictLeaderSchedulerConfig) getSchedulerName() string { return EvictLeaderName } @@ -190,11 +190,11 @@ func (s *evictLeaderScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) s.handler.ServeHTTP(w, r) } -func (s *evictLeaderScheduler) GetName() string { +func (*evictLeaderScheduler) GetName() string { return EvictLeaderName } -func (s *evictLeaderScheduler) GetType() string { +func (*evictLeaderScheduler) GetType() string { return EvictLeaderType } @@ -251,7 +251,7 @@ func (s *evictLeaderScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) return allowed } -func (s *evictLeaderScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { +func (s *evictLeaderScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { evictLeaderCounter.Inc() return scheduleEvictLeaderBatch(s.GetName(), s.GetType(), cluster, s.conf, EvictLeaderBatchSize), nil } @@ -338,7 +338,7 @@ func scheduleEvictLeaderOnce(name, typ string, cluster sche.SchedulerCluster, co for _, t := range targets { targetIDs = append(targetIDs, t.GetID()) } - op, err := operator.CreateTransferLeaderOperator(typ, cluster, region, region.GetLeader().GetStoreId(), target.GetID(), targetIDs, operator.OpLeader) + op, err := operator.CreateTransferLeaderOperator(typ, cluster, region, target.GetID(), targetIDs, operator.OpLeader) if err != nil { log.Debug("fail to create evict leader operator", errs.ZapError(err)) continue @@ -356,7 +356,7 @@ type evictLeaderHandler struct { } func (handler *evictLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(handler.rd, w, r.Body, &input); err != nil { return } @@ -395,7 +395,7 @@ func (handler *evictLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R handler.rd.JSON(w, http.StatusOK, "The scheduler has been applied to the store.") } -func (handler *evictLeaderHandler) ListConfig(w http.ResponseWriter, r *http.Request) { +func (handler *evictLeaderHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { conf := handler.config.Clone() handler.rd.JSON(w, http.StatusOK, conf) } @@ -408,7 +408,7 @@ func (handler *evictLeaderHandler) DeleteConfig(w http.ResponseWriter, r *http.R return } - var resp interface{} + var resp any keyRanges := handler.config.getKeyRangesByID(id) succ, last := handler.config.removeStore(id) if succ { diff --git a/pkg/schedule/schedulers/evict_slow_store.go b/pkg/schedule/schedulers/evict_slow_store.go index 79715a6fd44..9b13e292c87 100644 --- a/pkg/schedule/schedulers/evict_slow_store.go +++ b/pkg/schedule/schedulers/evict_slow_store.go @@ -154,7 +154,7 @@ func newEvictSlowStoreHandler(config *evictSlowStoreSchedulerConfig) http.Handle } func (handler *evictSlowStoreHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(handler.rd, w, r.Body, &input); err != nil { return } @@ -177,7 +177,7 @@ func (handler *evictSlowStoreHandler) UpdateConfig(w http.ResponseWriter, r *htt handler.rd.JSON(w, http.StatusOK, "Config updated.") } -func (handler *evictSlowStoreHandler) ListConfig(w http.ResponseWriter, r *http.Request) { +func (handler *evictSlowStoreHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { conf := handler.config.Clone() handler.rd.JSON(w, http.StatusOK, conf) } @@ -192,11 +192,11 @@ func (s *evictSlowStoreScheduler) ServeHTTP(w http.ResponseWriter, r *http.Reque s.handler.ServeHTTP(w, r) } -func (s *evictSlowStoreScheduler) GetName() string { +func (*evictSlowStoreScheduler) GetName() string { return EvictSlowStoreName } -func (s *evictSlowStoreScheduler) GetType() string { +func (*evictSlowStoreScheduler) GetType() string { return EvictSlowStoreType } @@ -280,9 +280,8 @@ func (s *evictSlowStoreScheduler) IsScheduleAllowed(cluster sche.SchedulerCluste return true } -func (s *evictSlowStoreScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { +func (s *evictSlowStoreScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { evictSlowStoreCounter.Inc() - var ops []*operator.Operator if s.conf.evictStore() != 0 { store := cluster.GetStore(s.conf.evictStore()) @@ -298,7 +297,7 @@ func (s *evictSlowStoreScheduler) Schedule(cluster sche.SchedulerCluster, dryRun return s.schedulerEvictLeader(cluster), nil } s.cleanupEvictLeader(cluster) - return ops, nil + return nil, nil } var slowStore *core.StoreInfo @@ -311,14 +310,14 @@ func (s *evictSlowStoreScheduler) Schedule(cluster sche.SchedulerCluster, dryRun if (store.IsPreparing() || store.IsServing()) && store.IsSlow() { // Do nothing if there is more than one slow store. if slowStore != nil { - return ops, nil + return nil, nil } slowStore = store } } if slowStore == nil || slowStore.GetSlowScore() < slowStoreEvictThreshold { - return ops, nil + return nil, nil } // If there is only one slow store, evict leaders from that store. @@ -327,7 +326,7 @@ func (s *evictSlowStoreScheduler) Schedule(cluster sche.SchedulerCluster, dryRun err := s.prepareEvictLeader(cluster, slowStore.GetID()) if err != nil { log.Info("prepare for evicting leader failed", zap.Error(err), zap.Uint64("store-id", slowStore.GetID())) - return ops, nil + return nil, nil } return s.schedulerEvictLeader(cluster), nil } diff --git a/pkg/schedule/schedulers/evict_slow_trend.go b/pkg/schedule/schedulers/evict_slow_trend.go index 20c53219765..da3dbc24e95 100644 --- a/pkg/schedule/schedulers/evict_slow_trend.go +++ b/pkg/schedule/schedulers/evict_slow_trend.go @@ -240,7 +240,7 @@ func newEvictSlowTrendHandler(config *evictSlowTrendSchedulerConfig) http.Handle } func (handler *evictSlowTrendHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(handler.rd, w, r.Body, &input); err != nil { return } @@ -263,7 +263,7 @@ func (handler *evictSlowTrendHandler) UpdateConfig(w http.ResponseWriter, r *htt handler.rd.JSON(w, http.StatusOK, "Config updated.") } -func (handler *evictSlowTrendHandler) ListConfig(w http.ResponseWriter, r *http.Request) { +func (handler *evictSlowTrendHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { conf := handler.config.Clone() handler.rd.JSON(w, http.StatusOK, conf) } @@ -274,7 +274,7 @@ type evictSlowTrendScheduler struct { handler http.Handler } -func (s *evictSlowTrendScheduler) GetNextInterval(interval time.Duration) time.Duration { +func (s *evictSlowTrendScheduler) GetNextInterval(time.Duration) time.Duration { var growthType intervalGrowthType // If it already found a slow node as candidate, the next interval should be shorter // to make the next scheduling as soon as possible. This adjustment will decrease the @@ -291,11 +291,11 @@ func (s *evictSlowTrendScheduler) ServeHTTP(w http.ResponseWriter, r *http.Reque s.handler.ServeHTTP(w, r) } -func (s *evictSlowTrendScheduler) GetName() string { +func (*evictSlowTrendScheduler) GetName() string { return EvictSlowTrendName } -func (s *evictSlowTrendScheduler) GetType() string { +func (*evictSlowTrendScheduler) GetType() string { return EvictSlowTrendType } @@ -384,7 +384,7 @@ func (s *evictSlowTrendScheduler) IsScheduleAllowed(cluster sche.SchedulerCluste return allowed } -func (s *evictSlowTrendScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { +func (s *evictSlowTrendScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { schedulerCounter.WithLabelValues(s.GetName(), "schedule").Inc() var ops []*operator.Operator @@ -597,7 +597,7 @@ func checkStoreSlowerThanOthers(cluster sche.SchedulerCluster, target *core.Stor } slowTrend := store.GetSlowTrend() // Use `SlowTrend.ResultValue` at first, but not good, `CauseValue` is better - // Greater `CuaseValue` means slower + // Greater `CauseValue` means slower if slowTrend != nil && (targetSlowTrend.CauseValue-slowTrend.CauseValue) > alterEpsilon && slowTrend.CauseValue > alterEpsilon { slowerThanStoresNum += 1 } diff --git a/pkg/schedule/schedulers/evict_slow_trend_test.go b/pkg/schedule/schedulers/evict_slow_trend_test.go index 834ef337639..dd6807f4a85 100644 --- a/pkg/schedule/schedulers/evict_slow_trend_test.go +++ b/pkg/schedule/schedulers/evict_slow_trend_test.go @@ -105,7 +105,7 @@ func (suite *evictSlowTrendTestSuite) TestEvictSlowTrendBasicFuncs() { re.Equal(slowCandidate{}, es2.conf.evictCandidate) es2.conf.markCandidateRecovered() lastCapturedCandidate = es2.conf.lastCapturedCandidate() - re.Greater(lastCapturedCandidate.recoverTS.Compare(recoverTS), 0) + re.Positive(lastCapturedCandidate.recoverTS.Compare(recoverTS)) re.Equal(lastCapturedCandidate.storeID, store.GetID()) // Test capture another store 2 diff --git a/pkg/schedule/schedulers/grant_hot_region.go b/pkg/schedule/schedulers/grant_hot_region.go index 81399b58c58..56ed7cd730e 100644 --- a/pkg/schedule/schedulers/grant_hot_region.go +++ b/pkg/schedule/schedulers/grant_hot_region.go @@ -108,7 +108,7 @@ func (conf *grantHotRegionSchedulerConfig) Persist() error { return conf.storage.SaveSchedulerConfig(name, data) } -func (conf *grantHotRegionSchedulerConfig) getSchedulerName() string { +func (*grantHotRegionSchedulerConfig) getSchedulerName() string { return GrantHotRegionName } @@ -137,7 +137,8 @@ type grantHotRegionScheduler struct { // newGrantHotRegionScheduler creates an admin scheduler that transfers hot region peer to fixed store and hot region leader to one store. func newGrantHotRegionScheduler(opController *operator.Controller, conf *grantHotRegionSchedulerConfig) *grantHotRegionScheduler { - base := newBaseHotScheduler(opController) + base := newBaseHotScheduler(opController, + statistics.DefaultHistorySampleDuration, statistics.DefaultHistorySampleInterval) handler := newGrantHotRegionHandler(conf) ret := &grantHotRegionScheduler{ baseHotScheduler: base, @@ -147,11 +148,11 @@ func newGrantHotRegionScheduler(opController *operator.Controller, conf *grantHo return ret } -func (s *grantHotRegionScheduler) GetName() string { +func (*grantHotRegionScheduler) GetName() string { return GrantHotRegionName } -func (s *grantHotRegionScheduler) GetType() string { +func (*grantHotRegionScheduler) GetType() string { return GrantHotRegionType } @@ -203,7 +204,7 @@ type grantHotRegionHandler struct { } func (handler *grantHotRegionHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(handler.rd, w, r.Body, &input); err != nil { return } @@ -255,7 +256,7 @@ func newGrantHotRegionHandler(config *grantHotRegionSchedulerConfig) http.Handle return router } -func (s *grantHotRegionScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { +func (s *grantHotRegionScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { grantHotRegionCounter.Inc() rw := s.randomRWType() s.prepareForBalance(rw, cluster) @@ -351,7 +352,7 @@ func (s *grantHotRegionScheduler) transfer(cluster sche.SchedulerCluster, region dstStore := &metapb.Peer{StoreId: destStoreIDs[i]} if isLeader { - op, err = operator.CreateTransferLeaderOperator(GrantHotRegionType+"-leader", cluster, srcRegion, srcRegion.GetLeader().GetStoreId(), dstStore.StoreId, []uint64{}, operator.OpLeader) + op, err = operator.CreateTransferLeaderOperator(GrantHotRegionType+"-leader", cluster, srcRegion, dstStore.StoreId, []uint64{}, operator.OpLeader) } else { op, err = operator.CreateMovePeerOperator(GrantHotRegionType+"-move", cluster, srcRegion, operator.OpRegion|operator.OpLeader, srcStore.GetID(), dstStore) } diff --git a/pkg/schedule/schedulers/grant_leader.go b/pkg/schedule/schedulers/grant_leader.go index 13f7c5e28d5..5de898489d9 100644 --- a/pkg/schedule/schedulers/grant_leader.go +++ b/pkg/schedule/schedulers/grant_leader.go @@ -98,7 +98,7 @@ func (conf *grantLeaderSchedulerConfig) Persist() error { return conf.storage.SaveSchedulerConfig(name, data) } -func (conf *grantLeaderSchedulerConfig) getSchedulerName() string { +func (*grantLeaderSchedulerConfig) getSchedulerName() string { return GrantLeaderName } @@ -176,11 +176,11 @@ func (s *grantLeaderScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) s.handler.ServeHTTP(w, r) } -func (s *grantLeaderScheduler) GetName() string { +func (*grantLeaderScheduler) GetName() string { return GrantLeaderName } -func (s *grantLeaderScheduler) GetType() string { +func (*grantLeaderScheduler) GetType() string { return GrantLeaderType } @@ -235,7 +235,7 @@ func (s *grantLeaderScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) return allowed } -func (s *grantLeaderScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { +func (s *grantLeaderScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { grantLeaderCounter.Inc() storeIDWithRanges := s.conf.getStoreIDWithRanges() ops := make([]*operator.Operator, 0, len(storeIDWithRanges)) @@ -248,7 +248,7 @@ func (s *grantLeaderScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bo continue } - op, err := operator.CreateForceTransferLeaderOperator(GrantLeaderType, cluster, region, region.GetLeader().GetStoreId(), id, operator.OpLeader) + op, err := operator.CreateForceTransferLeaderOperator(GrantLeaderType, cluster, region, id, operator.OpLeader) if err != nil { log.Debug("fail to create grant leader operator", errs.ZapError(err)) continue @@ -267,7 +267,7 @@ type grantLeaderHandler struct { } func (handler *grantLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(handler.rd, w, r.Body, &input); err != nil { return } @@ -306,7 +306,7 @@ func (handler *grantLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R handler.rd.JSON(w, http.StatusOK, "The scheduler has been applied to the store.") } -func (handler *grantLeaderHandler) ListConfig(w http.ResponseWriter, r *http.Request) { +func (handler *grantLeaderHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { conf := handler.config.Clone() handler.rd.JSON(w, http.StatusOK, conf) } @@ -319,7 +319,7 @@ func (handler *grantLeaderHandler) DeleteConfig(w http.ResponseWriter, r *http.R return } - var resp interface{} + var resp any keyRanges := handler.config.getKeyRangesByID(id) succ, last := handler.config.removeStore(id) if succ { diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index fdd07e85145..5e5e254596a 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -127,13 +127,13 @@ type baseHotScheduler struct { updateWriteTime time.Time } -func newBaseHotScheduler(opController *operator.Controller) *baseHotScheduler { +func newBaseHotScheduler(opController *operator.Controller, sampleDuration time.Duration, sampleInterval time.Duration) *baseHotScheduler { base := NewBaseScheduler(opController) ret := &baseHotScheduler{ BaseScheduler: base, types: []utils.RWType{utils.Write, utils.Read}, regionPendings: make(map[uint64]*pendingInfluence), - stHistoryLoads: statistics.NewStoreHistoryLoads(utils.DimLen), + stHistoryLoads: statistics.NewStoreHistoryLoads(utils.DimLen, sampleDuration, sampleInterval), r: rand.New(rand.NewSource(time.Now().UnixNano())), } for ty := resourceType(0); ty < resourceTypeLen; ty++ { @@ -180,6 +180,10 @@ func (h *baseHotScheduler) prepareForBalance(rw utils.RWType, cluster sche.Sched } } +func (h *baseHotScheduler) updateHistoryLoadConfig(sampleDuration, sampleInterval time.Duration) { + h.stHistoryLoads = h.stHistoryLoads.UpdateConfig(sampleDuration, sampleInterval) +} + // summaryPendingInfluence calculate the summary of pending Influence for each store // and clean the region from regionInfluence if they have ended operator. // It makes each dim rate or count become `weight` times to the origin value. @@ -233,7 +237,8 @@ type hotScheduler struct { } func newHotScheduler(opController *operator.Controller, conf *hotRegionSchedulerConfig) *hotScheduler { - base := newBaseHotScheduler(opController) + base := newBaseHotScheduler(opController, + conf.GetHistorySampleDuration(), conf.GetHistorySampleInterval()) ret := &hotScheduler{ name: HotRegionName, baseHotScheduler: base, @@ -249,7 +254,7 @@ func (h *hotScheduler) GetName() string { return h.name } -func (h *hotScheduler) GetType() string { +func (*hotScheduler) GetType() string { return HotRegionType } @@ -292,6 +297,8 @@ func (h *hotScheduler) ReloadConfig() error { h.conf.RankFormulaVersion = newCfg.RankFormulaVersion h.conf.ForbidRWType = newCfg.ForbidRWType h.conf.SplitThresholds = newCfg.SplitThresholds + h.conf.HistorySampleDuration = newCfg.HistorySampleDuration + h.conf.HistorySampleInterval = newCfg.HistorySampleInterval return nil } @@ -299,11 +306,11 @@ func (h *hotScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { h.conf.ServeHTTP(w, r) } -func (h *hotScheduler) GetMinInterval() time.Duration { +func (*hotScheduler) GetMinInterval() time.Duration { return minHotScheduleInterval } -func (h *hotScheduler) GetNextInterval(interval time.Duration) time.Duration { +func (h *hotScheduler) GetNextInterval(time.Duration) time.Duration { return intervalGrow(h.GetMinInterval(), maxHotScheduleInterval, exponentialGrowth) } @@ -315,7 +322,7 @@ func (h *hotScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { return allowed } -func (h *hotScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { +func (h *hotScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { hotSchedulerCounter.Inc() rw := h.randomRWType() return h.dispatch(rw, cluster), nil @@ -324,6 +331,7 @@ func (h *hotScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]* func (h *hotScheduler) dispatch(typ utils.RWType, cluster sche.SchedulerCluster) []*operator.Operator { h.Lock() defer h.Unlock() + h.updateHistoryLoadConfig(h.conf.GetHistorySampleDuration(), h.conf.GetHistorySampleInterval()) h.prepareForBalance(typ, cluster) // it can not move earlier to support to use api and metrics. if h.conf.IsForbidRWType(typ) { @@ -1185,7 +1193,7 @@ func (bs *balanceSolver) checkHistoryByPriorityAndToleranceAnyOf(loads [][]float }) } -func (bs *balanceSolver) checkByPriorityAndToleranceFirstOnly(loads []float64, f func(int) bool) bool { +func (bs *balanceSolver) checkByPriorityAndToleranceFirstOnly(_ []float64, f func(int) bool) bool { return f(bs.firstPriority) } @@ -1538,20 +1546,12 @@ func (bs *balanceSolver) buildOperators() (ops []*operator.Operator) { targetLabel := strconv.FormatUint(dstStoreID, 10) dim := bs.rankToDimString() - var createOperator func(region *core.RegionInfo, srcStoreID, dstStoreID uint64) (op *operator.Operator, typ string, err error) - switch bs.rwTy { - case utils.Read: - createOperator = bs.createReadOperator - case utils.Write: - createOperator = bs.createWriteOperator - } - - currentOp, typ, err := createOperator(bs.cur.region, srcStoreID, dstStoreID) + currentOp, typ, err := bs.createOperator(bs.cur.region, srcStoreID, dstStoreID) if err == nil { bs.decorateOperator(currentOp, false, sourceLabel, targetLabel, typ, dim) ops = []*operator.Operator{currentOp} if bs.cur.revertRegion != nil { - currentOp, typ, err = createOperator(bs.cur.revertRegion, dstStoreID, srcStoreID) + currentOp, typ, err = bs.createOperator(bs.cur.revertRegion, dstStoreID, srcStoreID) if err == nil { bs.decorateOperator(currentOp, true, targetLabel, sourceLabel, typ, dim) ops = append(ops, currentOp) @@ -1653,8 +1653,8 @@ func (bs *balanceSolver) splitBucketsByLoad(region *core.RegionInfo, bucketStats } op := bs.splitBucketsOperator(region, [][]byte{splitKey}) if op != nil { - op.AdditionalInfos["accLoads"] = strconv.FormatUint(acc-stats[splitIdx-1].Loads[dim], 10) - op.AdditionalInfos["totalLoads"] = strconv.FormatUint(totalLoads, 10) + op.SetAdditionalInfo("accLoads", strconv.FormatUint(acc-stats[splitIdx-1].Loads[dim], 10)) + op.SetAdditionalInfo("totalLoads", strconv.FormatUint(totalLoads, 10)) } return op } @@ -1717,14 +1717,13 @@ func (bs *balanceSolver) createSplitOperator(regions []*core.RegionInfo, strateg return operators } -func (bs *balanceSolver) createReadOperator(region *core.RegionInfo, srcStoreID, dstStoreID uint64) (op *operator.Operator, typ string, err error) { +func (bs *balanceSolver) createOperator(region *core.RegionInfo, srcStoreID, dstStoreID uint64) (op *operator.Operator, typ string, err error) { if region.GetStorePeer(dstStoreID) != nil { typ = "transfer-leader" op, err = operator.CreateTransferLeaderOperator( - "transfer-hot-read-leader", + "transfer-hot-"+bs.rwTy.String()+"-leader", bs, region, - srcStoreID, dstStoreID, []uint64{}, operator.OpHotRegion) @@ -1734,7 +1733,7 @@ func (bs *balanceSolver) createReadOperator(region *core.RegionInfo, srcStoreID, if region.GetLeader().GetStoreId() == srcStoreID { typ = "move-leader" op, err = operator.CreateMoveLeaderOperator( - "move-hot-read-leader", + "move-hot-"+bs.rwTy.String()+"-leader", bs, region, operator.OpHotRegion, @@ -1743,7 +1742,7 @@ func (bs *balanceSolver) createReadOperator(region *core.RegionInfo, srcStoreID, } else { typ = "move-peer" op, err = operator.CreateMovePeerOperator( - "move-hot-read-peer", + "move-hot-"+bs.rwTy.String()+"-peer", bs, region, operator.OpHotRegion, @@ -1754,32 +1753,6 @@ func (bs *balanceSolver) createReadOperator(region *core.RegionInfo, srcStoreID, return } -func (bs *balanceSolver) createWriteOperator(region *core.RegionInfo, srcStoreID, dstStoreID uint64) (op *operator.Operator, typ string, err error) { - if region.GetStorePeer(dstStoreID) != nil { - typ = "transfer-leader" - op, err = operator.CreateTransferLeaderOperator( - "transfer-hot-write-leader", - bs, - region, - srcStoreID, - dstStoreID, - []uint64{}, - operator.OpHotRegion) - } else { - srcPeer := region.GetStorePeer(srcStoreID) // checked in `filterHotPeers` - dstPeer := &metapb.Peer{StoreId: dstStoreID, Role: srcPeer.Role} - typ = "move-peer" - op, err = operator.CreateMovePeerOperator( - "move-hot-write-peer", - bs, - region, - operator.OpHotRegion, - srcStoreID, - dstPeer) - } - return -} - func (bs *balanceSolver) decorateOperator(op *operator.Operator, isRevert bool, sourceLabel, targetLabel, typ, dim string) { op.SetPriorityLevel(constant.High) op.FinishedCounters = append(op.FinishedCounters, diff --git a/pkg/schedule/schedulers/hot_region_config.go b/pkg/schedule/schedulers/hot_region_config.go index 3f9f8b8c669..80d20ca65bb 100644 --- a/pkg/schedule/schedulers/hot_region_config.go +++ b/pkg/schedule/schedulers/hot_region_config.go @@ -26,10 +26,12 @@ import ( "github.com/tikv/pd/pkg/errs" sche "github.com/tikv/pd/pkg/schedule/core" "github.com/tikv/pd/pkg/slice" + "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/statistics/utils" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/reflectutil" "github.com/tikv/pd/pkg/utils/syncutil" + "github.com/tikv/pd/pkg/utils/typeutil" "github.com/tikv/pd/pkg/versioninfo" "github.com/unrolled/render" "go.uber.org/zap" @@ -76,6 +78,8 @@ func initHotRegionScheduleConfig() *hotRegionSchedulerConfig { RankFormulaVersion: "v2", ForbidRWType: "none", SplitThresholds: 0.2, + HistorySampleDuration: typeutil.NewDuration(statistics.DefaultHistorySampleDuration), + HistorySampleInterval: typeutil.NewDuration(statistics.DefaultHistorySampleInterval), } cfg.applyPrioritiesConfig(defaultPrioritiesConfig) return cfg @@ -104,6 +108,8 @@ func (conf *hotRegionSchedulerConfig) getValidConf() *hotRegionSchedulerConfig { RankFormulaVersion: conf.getRankFormulaVersionLocked(), ForbidRWType: conf.getForbidRWTypeLocked(), SplitThresholds: conf.SplitThresholds, + HistorySampleDuration: conf.HistorySampleDuration, + HistorySampleInterval: conf.HistorySampleInterval, } } @@ -147,6 +153,9 @@ type hotRegionSchedulerConfig struct { ForbidRWType string `json:"forbid-rw-type,omitempty"` // SplitThresholds is the threshold to split hot region if the first priority flow of on hot region exceeds it. SplitThresholds float64 `json:"split-thresholds"` + + HistorySampleDuration typeutil.Duration `json:"history-sample-duration"` + HistorySampleInterval typeutil.Duration `json:"history-sample-interval"` } func (conf *hotRegionSchedulerConfig) EncodeConfig() ([]byte, error) { @@ -305,6 +314,30 @@ func (conf *hotRegionSchedulerConfig) GetRankFormulaVersion() string { return conf.getRankFormulaVersionLocked() } +func (conf *hotRegionSchedulerConfig) GetHistorySampleDuration() time.Duration { + conf.RLock() + defer conf.RUnlock() + return conf.HistorySampleDuration.Duration +} + +func (conf *hotRegionSchedulerConfig) GetHistorySampleInterval() time.Duration { + conf.RLock() + defer conf.RUnlock() + return conf.HistorySampleInterval.Duration +} + +func (conf *hotRegionSchedulerConfig) SetHistorySampleDuration(d time.Duration) { + conf.Lock() + defer conf.Unlock() + conf.HistorySampleDuration = typeutil.NewDuration(d) +} + +func (conf *hotRegionSchedulerConfig) SetHistorySampleInterval(d time.Duration) { + conf.Lock() + defer conf.Unlock() + conf.HistorySampleInterval = typeutil.NewDuration(d) +} + func (conf *hotRegionSchedulerConfig) getRankFormulaVersionLocked() string { switch conf.RankFormulaVersion { case "v2": @@ -342,7 +375,7 @@ func (conf *hotRegionSchedulerConfig) ServeHTTP(w http.ResponseWriter, r *http.R router.ServeHTTP(w, r) } -func (conf *hotRegionSchedulerConfig) handleGetConfig(w http.ResponseWriter, r *http.Request) { +func (conf *hotRegionSchedulerConfig) handleGetConfig(w http.ResponseWriter, _ *http.Request) { conf.RLock() defer conf.RUnlock() rd := render.New(render.Options{IndentJSON: true}) @@ -426,7 +459,7 @@ func (conf *hotRegionSchedulerConfig) handleSetConfig(w http.ResponseWriter, r * return } - m := make(map[string]interface{}) + m := make(map[string]any) if err := json.Unmarshal(data, &m); err != nil { rd.JSON(w, http.StatusInternalServerError, err.Error()) return diff --git a/pkg/schedule/schedulers/hot_region_test.go b/pkg/schedule/schedulers/hot_region_test.go index 5b1bc3db4b4..304698c915e 100644 --- a/pkg/schedule/schedulers/hot_region_test.go +++ b/pkg/schedule/schedulers/hot_region_test.go @@ -35,17 +35,23 @@ import ( "github.com/tikv/pd/pkg/storage" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/operatorutil" + "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/pkg/utils/typeutil" "github.com/tikv/pd/pkg/versioninfo" ) func init() { + // TODO: remove this global variable in the future. + // And use a function to create hot schduler for test. schedulePeerPr = 1.0 - RegisterScheduler(utils.Write.String(), func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { + // disable denoising in test. + statistics.Denoising = false + statisticsInterval = 0 + RegisterScheduler(utils.Write.String(), func(opController *operator.Controller, _ endpoint.ConfigStorage, _ ConfigDecoder, _ ...func(string) error) (Scheduler, error) { cfg := initHotRegionScheduleConfig() return newHotWriteScheduler(opController, cfg), nil }) - RegisterScheduler(utils.Read.String(), func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { + RegisterScheduler(utils.Read.String(), func(opController *operator.Controller, _ endpoint.ConfigStorage, _ ConfigDecoder, _ ...func(string) error) (Scheduler, error) { return newHotReadScheduler(opController, initHotRegionScheduleConfig()), nil }) } @@ -68,6 +74,11 @@ func clearPendingInfluence(h *hotScheduler) { h.regionPendings = make(map[uint64]*pendingInfluence) } +func newTestRegion(id uint64) *core.RegionInfo { + peers := []*metapb.Peer{{Id: id*100 + 1, StoreId: 1}, {Id: id*100 + 2, StoreId: 2}, {Id: id*100 + 3, StoreId: 3}} + return core.NewRegionInfo(&metapb.Region{Id: id, Peers: peers}, peers[0]) +} + func TestUpgrade(t *testing.T) { re := require.New(t) cancel, _, _, oc := prepareSchedulersTest() @@ -136,7 +147,7 @@ func checkGCPendingOpInfos(re *require.Assertions, enablePlacementRules bool) { case movePeer: op, err = operator.CreateMovePeerOperator("move-peer-test", tc, region, operator.OpAdmin, 2, &metapb.Peer{Id: region.GetID()*10000 + 1, StoreId: 4}) case transferLeader: - op, err = operator.CreateTransferLeaderOperator("transfer-leader-test", tc, region, 1, 2, []uint64{}, operator.OpAdmin) + op, err = operator.CreateTransferLeaderOperator("transfer-leader-test", tc, region, 2, []uint64{}, operator.OpAdmin) } re.NoError(err) re.NotNil(op) @@ -191,26 +202,10 @@ func checkGCPendingOpInfos(re *require.Assertions, enablePlacementRules bool) { } } -func newTestRegion(id uint64) *core.RegionInfo { - peers := []*metapb.Peer{{Id: id*100 + 1, StoreId: 1}, {Id: id*100 + 2, StoreId: 2}, {Id: id*100 + 3, StoreId: 3}} - return core.NewRegionInfo(&metapb.Region{Id: id, Peers: peers}, peers[0]) -} - -func TestHotWriteRegionScheduleByteRateOnly(t *testing.T) { - re := require.New(t) - statistics.Denoising = false - statistics.HistorySampleDuration = 0 - statisticsInterval = 0 - checkHotWriteRegionScheduleByteRateOnly(re, false /* disable placement rules */) - checkHotWriteRegionScheduleByteRateOnly(re, true /* enable placement rules */) -} - func TestSplitIfRegionTooHot(t *testing.T) { re := require.New(t) - statistics.Denoising = false cancel, _, tc, oc := prepareSchedulersTest() defer cancel() - tc.SetHotRegionCacheHitsThreshold(1) hb, err := CreateScheduler(utils.Read.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) b := &metapb.Buckets{ @@ -281,9 +276,7 @@ func TestSplitIfRegionTooHot(t *testing.T) { func TestSplitBucketsBySize(t *testing.T) { re := require.New(t) - statistics.Denoising = false cancel, _, tc, oc := prepareSchedulersTest() - tc.SetHotRegionCacheHitsThreshold(1) tc.SetRegionBucketEnabled(true) defer cancel() hb, err := CreateScheduler(utils.Read.String(), oc, storage.NewStorageWithMemoryBackend(), nil) @@ -334,9 +327,7 @@ func TestSplitBucketsBySize(t *testing.T) { func TestSplitBucketsByLoad(t *testing.T) { re := require.New(t) - statistics.Denoising = false cancel, _, tc, oc := prepareSchedulersTest() - tc.SetHotRegionCacheHitsThreshold(1) tc.SetRegionBucketEnabled(true) defer cancel() hb, err := CreateScheduler(utils.Read.String(), oc, storage.NewStorageWithMemoryBackend(), nil) @@ -393,15 +384,76 @@ func TestSplitBucketsByLoad(t *testing.T) { } } +func TestHotWriteRegionScheduleByteRateOnly(t *testing.T) { + re := require.New(t) + checkHotWriteRegionScheduleByteRateOnly(re, false /* disable placement rules */) + checkHotWriteRegionScheduleByteRateOnly(re, true /* enable placement rules */) + checkHotWriteRegionPlacement(re, true) +} + +func checkHotWriteRegionPlacement(re *require.Assertions, enablePlacementRules bool) { + cancel, _, tc, oc := prepareSchedulersTest() + defer cancel() + tc.SetEnableUseJointConsensus(true) + tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.ConfChangeV2)) + tc.SetEnablePlacementRules(enablePlacementRules) + labels := []string{"zone", "host"} + tc.SetMaxReplicasWithLabel(enablePlacementRules, 3, labels...) + hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) + re.NoError(err) + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + + tc.AddLabelsStore(1, 2, map[string]string{"zone": "z1", "host": "h1"}) + tc.AddLabelsStore(2, 2, map[string]string{"zone": "z1", "host": "h2"}) + tc.AddLabelsStore(3, 2, map[string]string{"zone": "z2", "host": "h3"}) + tc.AddLabelsStore(4, 2, map[string]string{"zone": "z2", "host": "h4"}) + tc.AddLabelsStore(5, 2, map[string]string{"zone": "z2", "host": "h5"}) + tc.AddLabelsStore(6, 2, map[string]string{"zone": "z2", "host": "h6"}) + tc.RuleManager.SetRule(&placement.Rule{ + GroupID: "pd", ID: "leader", Role: placement.Leader, Count: 1, LabelConstraints: []placement.LabelConstraint{{Key: "zone", Op: "in", Values: []string{"z1"}}}, + }) + tc.RuleManager.SetRule(&placement.Rule{ + GroupID: "pd", ID: "voter", Role: placement.Follower, Count: 2, LabelConstraints: []placement.LabelConstraint{{Key: "zone", Op: "in", Values: []string{"z2"}}}, + }) + tc.RuleManager.DeleteRule("pd", "default") + + tc.UpdateStorageWrittenBytes(1, 10*units.MiB*utils.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenBytes(2, 0) + tc.UpdateStorageWrittenBytes(3, 6*units.MiB*utils.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenBytes(4, 3*units.MiB*utils.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenBytes(5, 3*units.MiB*utils.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenBytes(6, 6*units.MiB*utils.StoreHeartBeatReportInterval) + + // Region 1, 2 and 3 are hot regions. + addRegionInfo(tc, utils.Write, []testRegionInfo{ + {1, []uint64{1, 3, 5}, 512 * units.KiB, 0, 0}, + {2, []uint64{1, 4, 6}, 512 * units.KiB, 0, 0}, + {3, []uint64{1, 3, 6}, 512 * units.KiB, 0, 0}, + }) + ops, _ := hb.Schedule(tc, false) + re.NotEmpty(ops) + re.NotContains(ops[0].Step(1).String(), "transfer leader") + clearPendingInfluence(hb.(*hotScheduler)) + + tc.RuleManager.SetRule(&placement.Rule{ + GroupID: "pd", ID: "voter", Role: placement.Voter, Count: 2, LabelConstraints: []placement.LabelConstraint{{Key: "zone", Op: "in", Values: []string{"z2"}}}, + }) + tc.RuleManager.DeleteRule("pd", "follower") + ops, _ = hb.Schedule(tc, false) + re.NotEmpty(ops) + re.NotContains(ops[0].Step(1).String(), "transfer leader") +} + func checkHotWriteRegionScheduleByteRateOnly(re *require.Assertions, enablePlacementRules bool) { cancel, opt, tc, oc := prepareSchedulersTest() defer cancel() - tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) + tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.ConfChangeV2)) tc.SetEnablePlacementRules(enablePlacementRules) labels := []string{"zone", "host"} tc.SetMaxReplicasWithLabel(enablePlacementRules, 3, labels...) hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) tc.SetHotRegionCacheHitsThreshold(0) // Add stores 1, 2, 3, 4, 5, 6 with region counts 3, 2, 2, 2, 0, 0. @@ -453,12 +505,14 @@ func checkHotWriteRegionScheduleByteRateOnly(re *require.Assertions, enablePlace switch op.Len() { case 1: // balance by leader selected + re.Equal("transfer-hot-write-leader", op.Desc()) operatorutil.CheckTransferLeaderFrom(re, op, operator.OpHotRegion, 1) - case 4: + case 5: // balance by peer selected + re.Equal("move-hot-write-leader", op.Desc()) if op.RegionID() == 2 { // peer in store 1 of the region 2 can transfer to store 5 or store 6 because of the label - operatorutil.CheckTransferPeerWithLeaderTransferFrom(re, op, operator.OpHotRegion, 1) + operatorutil.CheckTransferPeerWithLeaderTransfer(re, op, operator.OpHotRegion, 1, 0) } else { // peer in store 1 of the region 1,3 can only transfer to store 6 operatorutil.CheckTransferPeerWithLeaderTransfer(re, op, operator.OpHotRegion, 1, 6) @@ -478,10 +532,10 @@ func checkHotWriteRegionScheduleByteRateOnly(re *require.Assertions, enablePlace ops, _ := hb.Schedule(tc, false) op := ops[0] clearPendingInfluence(hb.(*hotScheduler)) - re.Equal(4, op.Len()) + re.Equal(5, op.Len()) if op.RegionID() == 2 { // peer in store 1 of the region 2 can transfer to store 5 or store 6 because of the label - operatorutil.CheckTransferPeerWithLeaderTransferFrom(re, op, operator.OpHotRegion, 1) + operatorutil.CheckTransferPeerWithLeaderTransfer(re, op, operator.OpHotRegion, 1, 0) } else { // peer in store 1 of the region 1,3 can only transfer to store 6 operatorutil.CheckTransferPeerWithLeaderTransfer(re, op, operator.OpHotRegion, 1, 6) @@ -574,12 +628,9 @@ func checkHotWriteRegionScheduleByteRateOnly(re *require.Assertions, enablePlace func TestHotWriteRegionScheduleByteRateOnlyWithTiFlash(t *testing.T) { re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 cancel, _, tc, oc := prepareSchedulersTest() defer cancel() - tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) - tc.SetHotRegionCacheHitsThreshold(0) + tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.ConfChangeV2)) re.NoError(tc.RuleManager.SetRules([]*placement.Rule{ { GroupID: placement.DefaultGroupID, @@ -606,6 +657,7 @@ func TestHotWriteRegionScheduleByteRateOnlyWithTiFlash(t *testing.T) { sche, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) hb := sche.(*hotScheduler) + hb.conf.SetHistorySampleDuration(0) // Add TiKV stores 1, 2, 3, 4, 5, 6, 7 (Down) with region counts 3, 3, 2, 2, 0, 0, 0. // Add TiFlash stores 8, 9, 10, 11 with region counts 3, 1, 1, 0. @@ -671,9 +723,11 @@ func TestHotWriteRegionScheduleByteRateOnlyWithTiFlash(t *testing.T) { switch op.Len() { case 1: // balance by leader selected + re.Equal("transfer-hot-write-leader", op.Desc()) operatorutil.CheckTransferLeaderFrom(re, op, operator.OpHotRegion, 1) case 2: // balance by peer selected + re.Equal("move-hot-write-leader", op.Desc()) operatorutil.CheckTransferLearner(re, op, operator.OpHotRegion, 8, 10) default: re.FailNow("wrong op: " + op.String()) @@ -764,12 +818,14 @@ func TestHotWriteRegionScheduleByteRateOnlyWithTiFlash(t *testing.T) { switch op.Len() { case 1: // balance by leader selected + re.Equal("transfer-hot-write-leader", op.Desc()) operatorutil.CheckTransferLeaderFrom(re, op, operator.OpHotRegion, 1) - case 4: + case 5: // balance by peer selected + re.Equal("move-hot-write-leader", op.Desc()) if op.RegionID() == 2 { // peer in store 1 of the region 2 can transfer to store 5 or store 6 because of the label - operatorutil.CheckTransferPeerWithLeaderTransferFrom(re, op, operator.OpHotRegion, 1) + operatorutil.CheckTransferPeerWithLeaderTransfer(re, op, operator.OpHotRegion, 1, 0) } else { // peer in store 1 of the region 1,3 can only transfer to store 6 operatorutil.CheckTransferPeerWithLeaderTransfer(re, op, operator.OpHotRegion, 1, 6) @@ -789,16 +845,14 @@ func TestHotWriteRegionScheduleWithQuery(t *testing.T) { }() cancel, _, tc, oc := prepareSchedulersTest() defer cancel() - statistics.Denoising = false - statisticsInterval = 0 hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) hb.(*hotScheduler).conf.SetDstToleranceRatio(1) hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.QueryPriority, utils.BytePriority} + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) - tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) tc.AddRegionStore(3, 20) @@ -823,8 +877,6 @@ func TestHotWriteRegionScheduleWithQuery(t *testing.T) { func TestHotWriteRegionScheduleWithKeyRate(t *testing.T) { re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 cancel, _, tc, oc := prepareSchedulersTest() defer cancel() @@ -834,8 +886,8 @@ func TestHotWriteRegionScheduleWithKeyRate(t *testing.T) { hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.KeyPriority, utils.BytePriority} hb.(*hotScheduler).conf.RankFormulaVersion = "v1" + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) - tc.SetHotRegionCacheHitsThreshold(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) @@ -882,8 +934,6 @@ func TestHotWriteRegionScheduleWithKeyRate(t *testing.T) { func TestHotWriteRegionScheduleUnhealthyStore(t *testing.T) { re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 cancel, _, tc, oc := prepareSchedulersTest() defer cancel() @@ -892,7 +942,6 @@ func TestHotWriteRegionScheduleUnhealthyStore(t *testing.T) { hb.(*hotScheduler).conf.SetDstToleranceRatio(1) hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) - tc.SetHotRegionCacheHitsThreshold(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) @@ -930,8 +979,6 @@ func TestHotWriteRegionScheduleUnhealthyStore(t *testing.T) { func TestHotWriteRegionScheduleCheckHot(t *testing.T) { re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 cancel, _, tc, oc := prepareSchedulersTest() defer cancel() @@ -940,7 +987,6 @@ func TestHotWriteRegionScheduleCheckHot(t *testing.T) { hb.(*hotScheduler).conf.SetDstToleranceRatio(1) hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) - tc.SetHotRegionCacheHitsThreshold(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) @@ -965,16 +1011,14 @@ func TestHotWriteRegionScheduleCheckHot(t *testing.T) { func TestHotWriteRegionScheduleWithLeader(t *testing.T) { re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 cancel, _, tc, oc := prepareSchedulersTest() defer cancel() hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.KeyPriority, utils.BytePriority} + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) re.NoError(err) - tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) tc.AddRegionStore(3, 20) @@ -1027,8 +1071,6 @@ func TestHotWriteRegionScheduleWithLeader(t *testing.T) { func TestHotWriteRegionScheduleWithPendingInfluence(t *testing.T) { re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 checkHotWriteRegionScheduleWithPendingInfluence(re, 0) // 0: byte rate checkHotWriteRegionScheduleWithPendingInfluence(re, 1) // 1: key rate } @@ -1040,13 +1082,13 @@ func checkHotWriteRegionScheduleWithPendingInfluence(re *require.Assertions, dim re.NoError(err) hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.KeyPriority, utils.BytePriority} hb.(*hotScheduler).conf.RankFormulaVersion = "v1" + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) old := pendingAmpFactor pendingAmpFactor = 0.0 defer func() { pendingAmpFactor = old }() - tc.SetHotRegionCacheHitsThreshold(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) @@ -1101,9 +1143,11 @@ func checkHotWriteRegionScheduleWithPendingInfluence(re *require.Assertions, dim switch op.Len() { case 1: // balance by leader selected + re.Equal("transfer-hot-write-leader", op.Desc()) operatorutil.CheckTransferLeaderFrom(re, op, operator.OpHotRegion, 1) - case 4: + case 5: // balance by peer selected + re.Equal("move-hot-write-leader", op.Desc()) operatorutil.CheckTransferPeerWithLeaderTransfer(re, op, operator.OpHotRegion, 1, 4) cnt++ if cnt == 3 { @@ -1119,16 +1163,14 @@ func checkHotWriteRegionScheduleWithPendingInfluence(re *require.Assertions, dim func TestHotWriteRegionScheduleWithRuleEnabled(t *testing.T) { re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 cancel, _, tc, oc := prepareSchedulersTest() defer cancel() tc.SetEnablePlacementRules(true) hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.KeyPriority, utils.BytePriority} + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) - tc.SetHotRegionCacheHitsThreshold(0) key, err := hex.DecodeString("") re.NoError(err) // skip stddev check @@ -1210,7 +1252,7 @@ func TestHotReadRegionScheduleByteRateOnly(t *testing.T) { re.NoError(err) hb := scheduler.(*hotScheduler) hb.conf.ReadPriorities = []string{utils.BytePriority, utils.KeyPriority} - tc.SetHotRegionCacheHitsThreshold(0) + hb.conf.SetHistorySampleDuration(0) // Add stores 1, 2, 3, 4, 5 with region counts 3, 2, 2, 2, 0. tc.AddRegionStore(1, 3) @@ -1246,8 +1288,9 @@ func TestHotReadRegionScheduleByteRateOnly(t *testing.T) { {11, []uint64{1, 2, 3}, 7 * units.KiB, 0, 0}, }) - re.True(tc.IsRegionHot(tc.GetRegion(1))) - re.False(tc.IsRegionHot(tc.GetRegion(11))) + testutil.Eventually(re, func() bool { + return tc.IsRegionHot(tc.GetRegion(1)) && !tc.IsRegionHot(tc.GetRegion(11)) + }) // check randomly pick hot region r := tc.HotRegionsFromStore(2, utils.Read) re.Len(r, 3) @@ -1324,8 +1367,6 @@ func TestHotReadRegionScheduleByteRateOnly(t *testing.T) { func TestHotReadRegionScheduleWithQuery(t *testing.T) { re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 cancel, _, tc, oc := prepareSchedulersTest() defer cancel() @@ -1334,8 +1375,8 @@ func TestHotReadRegionScheduleWithQuery(t *testing.T) { hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) hb.(*hotScheduler).conf.SetDstToleranceRatio(1) hb.(*hotScheduler).conf.RankFormulaVersion = "v1" + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) - tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) tc.AddRegionStore(3, 20) @@ -1359,8 +1400,6 @@ func TestHotReadRegionScheduleWithQuery(t *testing.T) { func TestHotReadRegionScheduleWithKeyRate(t *testing.T) { re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 cancel, _, tc, oc := prepareSchedulersTest() defer cancel() @@ -1370,8 +1409,8 @@ func TestHotReadRegionScheduleWithKeyRate(t *testing.T) { hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) hb.(*hotScheduler).conf.SetDstToleranceRatio(1) hb.(*hotScheduler).conf.ReadPriorities = []string{utils.BytePriority, utils.KeyPriority} + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) - tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) tc.AddRegionStore(3, 20) @@ -1417,8 +1456,6 @@ func TestHotReadRegionScheduleWithKeyRate(t *testing.T) { func TestHotReadRegionScheduleWithPendingInfluence(t *testing.T) { re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 checkHotReadRegionScheduleWithPendingInfluence(re, 0) // 0: byte rate checkHotReadRegionScheduleWithPendingInfluence(re, 1) // 1: key rate } @@ -1434,13 +1471,13 @@ func checkHotReadRegionScheduleWithPendingInfluence(re *require.Assertions, dim hb.(*hotScheduler).conf.MinorDecRatio = 1 hb.(*hotScheduler).conf.DstToleranceRatio = 1 hb.(*hotScheduler).conf.ReadPriorities = []string{utils.BytePriority, utils.KeyPriority} + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) old := pendingAmpFactor pendingAmpFactor = 0.0 defer func() { pendingAmpFactor = old }() - tc.SetHotRegionCacheHitsThreshold(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) @@ -1539,8 +1576,6 @@ func checkHotReadRegionScheduleWithPendingInfluence(re *require.Assertions, dim func TestHotReadWithEvictLeaderScheduler(t *testing.T) { re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 cancel, _, tc, oc := prepareSchedulersTest() defer cancel() @@ -1550,7 +1585,6 @@ func TestHotReadWithEvictLeaderScheduler(t *testing.T) { hb.(*hotScheduler).conf.SetDstToleranceRatio(1) hb.(*hotScheduler).conf.SetStrictPickingStore(false) hb.(*hotScheduler).conf.ReadPriorities = []string{utils.BytePriority, utils.KeyPriority} - tc.SetHotRegionCacheHitsThreshold(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) @@ -1582,7 +1616,6 @@ func TestHotCacheUpdateCache(t *testing.T) { re := require.New(t) cancel, _, tc, _ := prepareSchedulersTest() defer cancel() - tc.SetHotRegionCacheHitsThreshold(0) // For read flow addRegionInfo(tc, utils.Read, []testRegionInfo{ @@ -1649,7 +1682,6 @@ func TestHotCacheKeyThresholds(t *testing.T) { { // only a few regions cancel, _, tc, _ := prepareSchedulersTest() defer cancel() - tc.SetHotRegionCacheHitsThreshold(0) addRegionInfo(tc, utils.Read, []testRegionInfo{ {1, []uint64{1, 2, 3}, 0, 1, 0}, {2, []uint64{1, 2, 3}, 0, 1 * units.KiB, 0}, @@ -1721,7 +1753,6 @@ func TestHotCacheByteAndKey(t *testing.T) { re := require.New(t) cancel, _, tc, _ := prepareSchedulersTest() defer cancel() - tc.SetHotRegionCacheHitsThreshold(0) statistics.ThresholdsUpdateInterval = 0 defer func() { statistics.ThresholdsUpdateInterval = 8 * time.Second @@ -2011,13 +2042,10 @@ func checkSortResult(re *require.Assertions, regions []uint64, hotPeers []*stati func TestInfluenceByRWType(t *testing.T) { re := require.New(t) - statistics.HistorySampleDuration = 0 originValue := schedulePeerPr defer func() { schedulePeerPr = originValue }() - statistics.Denoising = false - statisticsInterval = 0 cancel, _, tc, oc := prepareSchedulersTest() defer cancel() @@ -2025,7 +2053,7 @@ func TestInfluenceByRWType(t *testing.T) { re.NoError(err) hb.(*hotScheduler).conf.SetDstToleranceRatio(1) hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) - tc.SetHotRegionCacheHitsThreshold(0) + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) @@ -2139,8 +2167,6 @@ func checkHotReadPeerSchedule(re *require.Assertions, enablePlacementRules bool) func TestHotScheduleWithPriority(t *testing.T) { re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 cancel, _, tc, oc := prepareSchedulersTest() defer cancel() @@ -2148,6 +2174,7 @@ func TestHotScheduleWithPriority(t *testing.T) { re.NoError(err) hb.(*hotScheduler).conf.SetDstToleranceRatio(1.05) hb.(*hotScheduler).conf.SetSrcToleranceRatio(1.05) + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) // skip stddev check origin := stddevThreshold stddevThreshold = -1.0 @@ -2155,7 +2182,6 @@ func TestHotScheduleWithPriority(t *testing.T) { stddevThreshold = origin }() - tc.SetHotRegionCacheHitsThreshold(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) @@ -2196,6 +2222,7 @@ func TestHotScheduleWithPriority(t *testing.T) { addRegionInfo(tc, utils.Read, []testRegionInfo{ {1, []uint64{1, 2, 3}, 2 * units.MiB, 2 * units.MiB, 0}, }) + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) hb.(*hotScheduler).conf.ReadPriorities = []string{utils.BytePriority, utils.KeyPriority} ops, _ = hb.Schedule(tc, false) re.Len(ops, 1) @@ -2209,6 +2236,7 @@ func TestHotScheduleWithPriority(t *testing.T) { hb, err = CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.KeyPriority, utils.BytePriority} hb.(*hotScheduler).conf.RankFormulaVersion = "v1" + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) re.NoError(err) // assert loose store picking @@ -2245,9 +2273,6 @@ func TestHotScheduleWithPriority(t *testing.T) { func TestHotScheduleWithStddev(t *testing.T) { re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 - cancel, _, tc, oc := prepareSchedulersTest() defer cancel() hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) @@ -2256,13 +2281,13 @@ func TestHotScheduleWithStddev(t *testing.T) { hb.(*hotScheduler).conf.SetSrcToleranceRatio(1.0) hb.(*hotScheduler).conf.RankFormulaVersion = "v1" tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) - tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) tc.AddRegionStore(3, 20) tc.AddRegionStore(4, 20) tc.AddRegionStore(5, 20) hb.(*hotScheduler).conf.StrictPickingStore = false + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) // skip uniform cluster tc.UpdateStorageWrittenStats(1, 5*units.MiB*utils.StoreHeartBeatReportInterval, 5*units.MiB*utils.StoreHeartBeatReportInterval) @@ -2305,18 +2330,15 @@ func TestHotScheduleWithStddev(t *testing.T) { func TestHotWriteLeaderScheduleWithPriority(t *testing.T) { re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 - cancel, _, tc, oc := prepareSchedulersTest() defer cancel() hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) hb.(*hotScheduler).conf.SetDstToleranceRatio(1) hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) - tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) tc.AddRegionStore(3, 20) @@ -2348,9 +2370,6 @@ func TestHotWriteLeaderScheduleWithPriority(t *testing.T) { func TestCompatibility(t *testing.T) { re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 - cancel, _, tc, oc := prepareSchedulersTest() defer cancel() hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) @@ -2442,7 +2461,7 @@ func TestCompatibilityConfig(t *testing.T) { // from 4.0 or 5.0 or 5.1 cluster var data []byte storage := storage.NewStorageWithMemoryBackend() - data, err = EncodeConfig(map[string]interface{}{ + data, err = EncodeConfig(map[string]any{ "min-hot-byte-rate": 100, "min-hot-key-rate": 10, "max-zombie-rounds": 3, diff --git a/pkg/schedule/schedulers/hot_region_v2.go b/pkg/schedule/schedulers/hot_region_v2.go index 04ba0fc978f..50016231cad 100644 --- a/pkg/schedule/schedulers/hot_region_v2.go +++ b/pkg/schedule/schedulers/hot_region_v2.go @@ -138,7 +138,7 @@ func (bs *balanceSolver) filterUniformStoreV2() (string, bool) { if !bs.enableExpectation() { return "", false } - // Because region is available for src and dst, so stddev is the same for both, only need to calcurate one. + // Because region is available for src and dst, so stddev is the same for both, only need to calculate one. isUniformFirstPriority, isUniformSecondPriority := bs.isUniformFirstPriority(bs.cur.srcStore), bs.isUniformSecondPriority(bs.cur.srcStore) if isUniformFirstPriority && isUniformSecondPriority { // If both dims are enough uniform, any schedule is unnecessary. @@ -457,13 +457,13 @@ func (bs *balanceSolver) betterThanV2(old *solution) bool { if bs.cur.mainPeerStat != old.mainPeerStat { // We will firstly consider ensuring converge faster, secondly reduce oscillation if bs.resourceTy == writeLeader { - return bs.getRkCmpByPriorityV2(bs.firstPriority, bs.cur.firstScore, old.firstScore, + return getRkCmpByPriorityV2(bs.firstPriority, bs.cur.firstScore, old.firstScore, bs.cur.getPeersRateFromCache(bs.firstPriority), old.getPeersRateFromCache(bs.firstPriority)) > 0 } - firstCmp := bs.getRkCmpByPriorityV2(bs.firstPriority, bs.cur.firstScore, old.firstScore, + firstCmp := getRkCmpByPriorityV2(bs.firstPriority, bs.cur.firstScore, old.firstScore, bs.cur.getPeersRateFromCache(bs.firstPriority), old.getPeersRateFromCache(bs.firstPriority)) - secondCmp := bs.getRkCmpByPriorityV2(bs.secondPriority, bs.cur.secondScore, old.secondScore, + secondCmp := getRkCmpByPriorityV2(bs.secondPriority, bs.cur.secondScore, old.secondScore, bs.cur.getPeersRateFromCache(bs.secondPriority), old.getPeersRateFromCache(bs.secondPriority)) switch bs.cur.progressiveRank { case -4, -3, -2: // firstPriority @@ -482,7 +482,7 @@ func (bs *balanceSolver) betterThanV2(old *solution) bool { return false } -func (bs *balanceSolver) getRkCmpByPriorityV2(dim int, curScore, oldScore int, curPeersRate, oldPeersRate float64) int { +func getRkCmpByPriorityV2(dim int, curScore, oldScore int, curPeersRate, oldPeersRate float64) int { switch { case curScore > oldScore: return 1 diff --git a/pkg/schedule/schedulers/hot_region_v2_test.go b/pkg/schedule/schedulers/hot_region_v2_test.go index f5e21e02981..25d6d94f7b1 100644 --- a/pkg/schedule/schedulers/hot_region_v2_test.go +++ b/pkg/schedule/schedulers/hot_region_v2_test.go @@ -21,7 +21,6 @@ import ( "github.com/stretchr/testify/require" "github.com/tikv/pd/pkg/mock/mockcluster" "github.com/tikv/pd/pkg/schedule/operator" - "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/statistics/utils" "github.com/tikv/pd/pkg/storage" "github.com/tikv/pd/pkg/utils/operatorutil" @@ -33,17 +32,14 @@ func TestHotWriteRegionScheduleWithRevertRegionsDimSecond(t *testing.T) { re := require.New(t) cancel, _, tc, oc := prepareSchedulersTest() defer cancel() - statistics.Denoising = false - statisticsInterval = 0 - statistics.HistorySampleDuration = 0 sche, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil, nil) re.NoError(err) hb := sche.(*hotScheduler) hb.conf.SetDstToleranceRatio(0.0) hb.conf.SetSrcToleranceRatio(0.0) hb.conf.SetRankFormulaVersion("v1") + hb.conf.SetHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) - tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) tc.AddRegionStore(3, 20) @@ -96,16 +92,14 @@ func TestHotWriteRegionScheduleWithRevertRegionsDimFirst(t *testing.T) { re := require.New(t) cancel, _, tc, oc := prepareSchedulersTest() defer cancel() - statistics.Denoising = false - sche, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil, nil) re.NoError(err) hb := sche.(*hotScheduler) hb.conf.SetDstToleranceRatio(0.0) hb.conf.SetSrcToleranceRatio(0.0) hb.conf.SetRankFormulaVersion("v1") + hb.conf.SetHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) - tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) tc.AddRegionStore(3, 20) @@ -147,10 +141,6 @@ func TestHotWriteRegionScheduleWithRevertRegionsDimFirst(t *testing.T) { func TestHotWriteRegionScheduleWithRevertRegionsDimFirstOnly(t *testing.T) { // This is a test that searchRevertRegions finds a solution of rank -2. re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 - statistics.HistorySampleDuration = 0 - cancel, _, tc, oc := prepareSchedulersTest() defer cancel() sche, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil, nil) @@ -159,8 +149,8 @@ func TestHotWriteRegionScheduleWithRevertRegionsDimFirstOnly(t *testing.T) { hb.conf.SetDstToleranceRatio(0.0) hb.conf.SetSrcToleranceRatio(0.0) hb.conf.SetRankFormulaVersion("v1") + hb.conf.SetHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) - tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) tc.AddRegionStore(3, 20) @@ -211,10 +201,6 @@ func TestHotWriteRegionScheduleWithRevertRegionsDimFirstOnly(t *testing.T) { func TestHotReadRegionScheduleWithRevertRegionsDimSecond(t *testing.T) { // This is a test that searchRevertRegions finds a solution of rank -1. re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 - statistics.HistorySampleDuration = 0 - cancel, _, tc, oc := prepareSchedulersTest() defer cancel() sche, err := CreateScheduler(utils.Read.String(), oc, storage.NewStorageWithMemoryBackend(), nil, nil) @@ -223,8 +209,8 @@ func TestHotReadRegionScheduleWithRevertRegionsDimSecond(t *testing.T) { hb.conf.SetDstToleranceRatio(0.0) hb.conf.SetSrcToleranceRatio(0.0) hb.conf.SetRankFormulaVersion("v1") + hb.conf.SetHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) - tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) tc.AddRegionStore(3, 20) @@ -274,10 +260,6 @@ func TestHotReadRegionScheduleWithRevertRegionsDimSecond(t *testing.T) { func TestSkipUniformStore(t *testing.T) { re := require.New(t) - statistics.Denoising = false - statisticsInterval = 0 - statistics.HistorySampleDuration = 0 - cancel, _, tc, oc := prepareSchedulersTest() defer cancel() hb, err := CreateScheduler(utils.Read.String(), oc, storage.NewStorageWithMemoryBackend(), nil, nil) @@ -286,7 +268,7 @@ func TestSkipUniformStore(t *testing.T) { hb.(*hotScheduler).conf.SetDstToleranceRatio(1) hb.(*hotScheduler).conf.SetRankFormulaVersion("v2") hb.(*hotScheduler).conf.ReadPriorities = []string{utils.BytePriority, utils.KeyPriority} - tc.SetHotRegionCacheHitsThreshold(0) + hb.(*hotScheduler).conf.SetHistorySampleDuration(0) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) tc.AddRegionStore(3, 20) @@ -438,7 +420,6 @@ func checkHotReadRegionScheduleWithSmallHotRegion(re *require.Assertions, highLo addOtherRegions func(*mockcluster.Cluster, *hotScheduler)) []*operator.Operator { cancel, _, tc, oc := prepareSchedulersTest() defer cancel() - statistics.Denoising = false sche, err := CreateScheduler(utils.Read.String(), oc, storage.NewStorageWithMemoryBackend(), nil, nil) re.NoError(err) hb := sche.(*hotScheduler) @@ -446,7 +427,6 @@ func checkHotReadRegionScheduleWithSmallHotRegion(re *require.Assertions, highLo hb.conf.SetDstToleranceRatio(1) hb.conf.SetRankFormulaVersion("v2") hb.conf.ReadPriorities = []string{utils.QueryPriority, utils.BytePriority} - tc.SetHotRegionCacheHitsThreshold(0) tc.AddRegionStore(1, 40) tc.AddRegionStore(2, 10) tc.AddRegionStore(3, 10) @@ -469,7 +449,6 @@ func checkHotReadRegionScheduleWithSmallHotRegion(re *require.Assertions, highLo } } addRegionInfo(tc, utils.Read, regions) - tc.SetHotRegionCacheHitsThreshold(1) addOtherRegions(tc, hb) ops, _ := hb.Schedule(tc, false) return ops diff --git a/pkg/schedule/schedulers/init.go b/pkg/schedule/schedulers/init.go index 57eb4b90985..6bca686404d 100644 --- a/pkg/schedule/schedulers/init.go +++ b/pkg/schedule/schedulers/init.go @@ -37,7 +37,7 @@ func Register() { func schedulersRegister() { // balance leader RegisterSliceDecoderBuilder(BalanceLeaderType, func(args []string) ConfigDecoder { - return func(v interface{}) error { + return func(v any) error { conf, ok := v.(*balanceLeaderSchedulerConfig) if !ok { return errs.ErrScheduleConfigNotExist.FastGenByArgs() @@ -52,7 +52,7 @@ func schedulersRegister() { } }) - RegisterScheduler(BalanceLeaderType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { + RegisterScheduler(BalanceLeaderType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { conf := &balanceLeaderSchedulerConfig{storage: storage} if err := decoder(conf); err != nil { return nil, err @@ -65,7 +65,7 @@ func schedulersRegister() { // balance region RegisterSliceDecoderBuilder(BalanceRegionType, func(args []string) ConfigDecoder { - return func(v interface{}) error { + return func(v any) error { conf, ok := v.(*balanceRegionSchedulerConfig) if !ok { return errs.ErrScheduleConfigNotExist.FastGenByArgs() @@ -80,7 +80,7 @@ func schedulersRegister() { } }) - RegisterScheduler(BalanceRegionType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { + RegisterScheduler(BalanceRegionType, func(opController *operator.Controller, _ endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { conf := &balanceRegionSchedulerConfig{} if err := decoder(conf); err != nil { return nil, err @@ -90,7 +90,7 @@ func schedulersRegister() { // balance witness RegisterSliceDecoderBuilder(BalanceWitnessType, func(args []string) ConfigDecoder { - return func(v interface{}) error { + return func(v any) error { conf, ok := v.(*balanceWitnessSchedulerConfig) if !ok { return errs.ErrScheduleConfigNotExist.FastGenByArgs() @@ -105,7 +105,7 @@ func schedulersRegister() { } }) - RegisterScheduler(BalanceWitnessType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { + RegisterScheduler(BalanceWitnessType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { conf := &balanceWitnessSchedulerConfig{storage: storage} if err := decoder(conf); err != nil { return nil, err @@ -118,7 +118,7 @@ func schedulersRegister() { // evict leader RegisterSliceDecoderBuilder(EvictLeaderType, func(args []string) ConfigDecoder { - return func(v interface{}) error { + return func(v any) error { if len(args) != 1 { return errs.ErrSchedulerConfig.FastGenByArgs("id") } @@ -152,13 +152,13 @@ func schedulersRegister() { }) // evict slow store - RegisterSliceDecoderBuilder(EvictSlowStoreType, func(args []string) ConfigDecoder { - return func(v interface{}) error { + RegisterSliceDecoderBuilder(EvictSlowStoreType, func([]string) ConfigDecoder { + return func(any) error { return nil } }) - RegisterScheduler(EvictSlowStoreType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { + RegisterScheduler(EvictSlowStoreType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { conf := initEvictSlowStoreSchedulerConfig(storage) if err := decoder(conf); err != nil { return nil, err @@ -169,7 +169,7 @@ func schedulersRegister() { // grant hot region RegisterSliceDecoderBuilder(GrantHotRegionType, func(args []string) ConfigDecoder { - return func(v interface{}) error { + return func(v any) error { if len(args) != 2 { return errs.ErrSchedulerConfig.FastGenByArgs("id") } @@ -198,7 +198,7 @@ func schedulersRegister() { } }) - RegisterScheduler(GrantHotRegionType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { + RegisterScheduler(GrantHotRegionType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { conf := &grantHotRegionSchedulerConfig{StoreIDs: make([]uint64, 0), storage: storage} conf.cluster = opController.GetCluster() if err := decoder(conf); err != nil { @@ -208,15 +208,15 @@ func schedulersRegister() { }) // hot region - RegisterSliceDecoderBuilder(HotRegionType, func(args []string) ConfigDecoder { - return func(v interface{}) error { + RegisterSliceDecoderBuilder(HotRegionType, func([]string) ConfigDecoder { + return func(any) error { return nil } }) - RegisterScheduler(HotRegionType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { + RegisterScheduler(HotRegionType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { conf := initHotRegionScheduleConfig() - var data map[string]interface{} + var data map[string]any if err := decoder(&data); err != nil { return nil, err } @@ -236,7 +236,7 @@ func schedulersRegister() { // grant leader RegisterSliceDecoderBuilder(GrantLeaderType, func(args []string) ConfigDecoder { - return func(v interface{}) error { + return func(v any) error { if len(args) != 1 { return errs.ErrSchedulerConfig.FastGenByArgs("id") } @@ -271,7 +271,7 @@ func schedulersRegister() { // label RegisterSliceDecoderBuilder(LabelType, func(args []string) ConfigDecoder { - return func(v interface{}) error { + return func(v any) error { conf, ok := v.(*labelSchedulerConfig) if !ok { return errs.ErrScheduleConfigNotExist.FastGenByArgs() @@ -286,7 +286,7 @@ func schedulersRegister() { } }) - RegisterScheduler(LabelType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { + RegisterScheduler(LabelType, func(opController *operator.Controller, _ endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { conf := &labelSchedulerConfig{} if err := decoder(conf); err != nil { return nil, err @@ -296,7 +296,7 @@ func schedulersRegister() { // random merge RegisterSliceDecoderBuilder(RandomMergeType, func(args []string) ConfigDecoder { - return func(v interface{}) error { + return func(v any) error { conf, ok := v.(*randomMergeSchedulerConfig) if !ok { return errs.ErrScheduleConfigNotExist.FastGenByArgs() @@ -311,7 +311,7 @@ func schedulersRegister() { } }) - RegisterScheduler(RandomMergeType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { + RegisterScheduler(RandomMergeType, func(opController *operator.Controller, _ endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { conf := &randomMergeSchedulerConfig{} if err := decoder(conf); err != nil { return nil, err @@ -322,7 +322,7 @@ func schedulersRegister() { // scatter range // args: [start-key, end-key, range-name]. RegisterSliceDecoderBuilder(ScatterRangeType, func(args []string) ConfigDecoder { - return func(v interface{}) error { + return func(v any) error { if len(args) != 3 { return errs.ErrSchedulerConfig.FastGenByArgs("ranges and name") } @@ -340,7 +340,7 @@ func schedulersRegister() { } }) - RegisterScheduler(ScatterRangeType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { + RegisterScheduler(ScatterRangeType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { conf := &scatterRangeSchedulerConfig{ storage: storage, } @@ -356,7 +356,7 @@ func schedulersRegister() { // shuffle hot region RegisterSliceDecoderBuilder(ShuffleHotRegionType, func(args []string) ConfigDecoder { - return func(v interface{}) error { + return func(v any) error { conf, ok := v.(*shuffleHotRegionSchedulerConfig) if !ok { return errs.ErrScheduleConfigNotExist.FastGenByArgs() @@ -374,7 +374,7 @@ func schedulersRegister() { } }) - RegisterScheduler(ShuffleHotRegionType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { + RegisterScheduler(ShuffleHotRegionType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { conf := &shuffleHotRegionSchedulerConfig{Limit: uint64(1)} if err := decoder(conf); err != nil { return nil, err @@ -385,7 +385,7 @@ func schedulersRegister() { // shuffle leader RegisterSliceDecoderBuilder(ShuffleLeaderType, func(args []string) ConfigDecoder { - return func(v interface{}) error { + return func(v any) error { conf, ok := v.(*shuffleLeaderSchedulerConfig) if !ok { return errs.ErrScheduleConfigNotExist.FastGenByArgs() @@ -400,7 +400,7 @@ func schedulersRegister() { } }) - RegisterScheduler(ShuffleLeaderType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { + RegisterScheduler(ShuffleLeaderType, func(opController *operator.Controller, _ endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { conf := &shuffleLeaderSchedulerConfig{} if err := decoder(conf); err != nil { return nil, err @@ -410,7 +410,7 @@ func schedulersRegister() { // shuffle region RegisterSliceDecoderBuilder(ShuffleRegionType, func(args []string) ConfigDecoder { - return func(v interface{}) error { + return func(v any) error { conf, ok := v.(*shuffleRegionSchedulerConfig) if !ok { return errs.ErrScheduleConfigNotExist.FastGenByArgs() @@ -425,7 +425,7 @@ func schedulersRegister() { } }) - RegisterScheduler(ShuffleRegionType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { + RegisterScheduler(ShuffleRegionType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { conf := &shuffleRegionSchedulerConfig{storage: storage} if err := decoder(conf); err != nil { return nil, err @@ -434,13 +434,13 @@ func schedulersRegister() { }) // split bucket - RegisterSliceDecoderBuilder(SplitBucketType, func(args []string) ConfigDecoder { - return func(v interface{}) error { + RegisterSliceDecoderBuilder(SplitBucketType, func([]string) ConfigDecoder { + return func(any) error { return nil } }) - RegisterScheduler(SplitBucketType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { + RegisterScheduler(SplitBucketType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { conf := initSplitBucketConfig() if err := decoder(conf); err != nil { return nil, err @@ -450,24 +450,24 @@ func schedulersRegister() { }) // transfer witness leader - RegisterSliceDecoderBuilder(TransferWitnessLeaderType, func(args []string) ConfigDecoder { - return func(v interface{}) error { + RegisterSliceDecoderBuilder(TransferWitnessLeaderType, func([]string) ConfigDecoder { + return func(any) error { return nil } }) - RegisterScheduler(TransferWitnessLeaderType, func(opController *operator.Controller, _ endpoint.ConfigStorage, _ ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { + RegisterScheduler(TransferWitnessLeaderType, func(opController *operator.Controller, _ endpoint.ConfigStorage, _ ConfigDecoder, _ ...func(string) error) (Scheduler, error) { return newTransferWitnessLeaderScheduler(opController), nil }) // evict slow store by trend - RegisterSliceDecoderBuilder(EvictSlowTrendType, func(args []string) ConfigDecoder { - return func(v interface{}) error { + RegisterSliceDecoderBuilder(EvictSlowTrendType, func([]string) ConfigDecoder { + return func(any) error { return nil } }) - RegisterScheduler(EvictSlowTrendType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, removeSchedulerCb ...func(string) error) (Scheduler, error) { + RegisterScheduler(EvictSlowTrendType, func(opController *operator.Controller, storage endpoint.ConfigStorage, decoder ConfigDecoder, _ ...func(string) error) (Scheduler, error) { conf := initEvictSlowTrendSchedulerConfig(storage) if err := decoder(conf); err != nil { return nil, err diff --git a/pkg/schedule/schedulers/label.go b/pkg/schedule/schedulers/label.go index 90310bcf10e..24875e3e26a 100644 --- a/pkg/schedule/schedulers/label.go +++ b/pkg/schedule/schedulers/label.go @@ -68,7 +68,7 @@ func (s *labelScheduler) GetName() string { return s.conf.Name } -func (s *labelScheduler) GetType() string { +func (*labelScheduler) GetType() string { return LabelType } @@ -84,7 +84,7 @@ func (s *labelScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { return allowed } -func (s *labelScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { +func (s *labelScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { labelCounter.Inc() stores := cluster.GetStores() rejectLeaderStores := make(map[uint64]struct{}) @@ -119,7 +119,7 @@ func (s *labelScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([ continue } - op, err := operator.CreateTransferLeaderOperator("label-reject-leader", cluster, region, id, target.GetID(), []uint64{}, operator.OpLeader) + op, err := operator.CreateTransferLeaderOperator("label-reject-leader", cluster, region, target.GetID(), []uint64{}, operator.OpLeader) if err != nil { log.Debug("fail to create transfer label reject leader operator", errs.ZapError(err)) return nil, nil diff --git a/pkg/schedule/schedulers/random_merge.go b/pkg/schedule/schedulers/random_merge.go index 44bb5081ef9..7fec0bd9530 100644 --- a/pkg/schedule/schedulers/random_merge.go +++ b/pkg/schedule/schedulers/random_merge.go @@ -70,7 +70,7 @@ func (s *randomMergeScheduler) GetName() string { return s.conf.Name } -func (s *randomMergeScheduler) GetType() string { +func (*randomMergeScheduler) GetType() string { return RandomMergeType } @@ -86,7 +86,7 @@ func (s *randomMergeScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) return allowed } -func (s *randomMergeScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { +func (s *randomMergeScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { randomMergeCounter.Inc() store := filter.NewCandidates(cluster.GetStores()). @@ -113,7 +113,7 @@ func (s *randomMergeScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bo return nil, nil } - if !s.allowMerge(cluster, region, target) { + if !allowMerge(cluster, region, target) { randomMergeNotAllowedCounter.Inc() return nil, nil } @@ -129,7 +129,7 @@ func (s *randomMergeScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bo return ops, nil } -func (s *randomMergeScheduler) allowMerge(cluster sche.SchedulerCluster, region, target *core.RegionInfo) bool { +func allowMerge(cluster sche.SchedulerCluster, region, target *core.RegionInfo) bool { if !filter.IsRegionHealthy(region) || !filter.IsRegionHealthy(target) { return false } diff --git a/pkg/schedule/schedulers/scatter_range.go b/pkg/schedule/schedulers/scatter_range.go index 977d8cff05c..daa3c5cc5c1 100644 --- a/pkg/schedule/schedulers/scatter_range.go +++ b/pkg/schedule/schedulers/scatter_range.go @@ -138,11 +138,13 @@ func newScatterRangeScheduler(opController *operator.Controller, config *scatter opController, &balanceLeaderSchedulerConfig{Ranges: []core.KeyRange{core.NewKeyRange("", "")}}, WithBalanceLeaderName("scatter-range-leader"), + WithBalanceLeaderFilterCounterName("scatter-range-leader"), ), balanceRegion: newBalanceRegionScheduler( opController, &balanceRegionSchedulerConfig{Ranges: []core.KeyRange{core.NewKeyRange("", "")}}, WithBalanceRegionName("scatter-range-region"), + WithBalanceRegionFilterCounterName("scatter-range-region"), ), } return scheduler @@ -156,7 +158,7 @@ func (l *scatterRangeScheduler) GetName() string { return l.name } -func (l *scatterRangeScheduler) GetType() string { +func (*scatterRangeScheduler) GetType() string { return ScatterRangeType } @@ -206,7 +208,7 @@ func (l *scatterRangeScheduler) allowBalanceRegion(cluster sche.SchedulerCluster return allowed } -func (l *scatterRangeScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { +func (l *scatterRangeScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { scatterRangeCounter.Inc() // isolate a new cluster according to the key range c := genRangeCluster(cluster, l.config.GetStartKey(), l.config.GetEndKey()) @@ -245,7 +247,7 @@ type scatterRangeHandler struct { } func (handler *scatterRangeHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(handler.rd, w, r.Body, &input); err != nil { return } @@ -282,7 +284,7 @@ func (handler *scatterRangeHandler) UpdateConfig(w http.ResponseWriter, r *http. handler.rd.JSON(w, http.StatusOK, nil) } -func (handler *scatterRangeHandler) ListConfig(w http.ResponseWriter, r *http.Request) { +func (handler *scatterRangeHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { conf := handler.config.Clone() handler.rd.JSON(w, http.StatusOK, conf) } diff --git a/pkg/schedule/schedulers/scheduler.go b/pkg/schedule/schedulers/scheduler.go index 38fc8f5607d..abace59a266 100644 --- a/pkg/schedule/schedulers/scheduler.go +++ b/pkg/schedule/schedulers/scheduler.go @@ -49,7 +49,7 @@ type Scheduler interface { } // EncodeConfig encode the custom config for each scheduler. -func EncodeConfig(v interface{}) ([]byte, error) { +func EncodeConfig(v any) ([]byte, error) { marshaled, err := json.Marshal(v) if err != nil { return nil, errs.ErrJSONMarshal.Wrap(err) @@ -58,7 +58,7 @@ func EncodeConfig(v interface{}) ([]byte, error) { } // DecodeConfig decode the custom config for each scheduler. -func DecodeConfig(data []byte, v interface{}) error { +func DecodeConfig(data []byte, v any) error { err := json.Unmarshal(data, v) if err != nil { return errs.ErrJSONUnmarshal.Wrap(err) @@ -67,10 +67,10 @@ func DecodeConfig(data []byte, v interface{}) error { } // ToPayload returns the payload of config. -func ToPayload(sches, configs []string) map[string]interface{} { - payload := make(map[string]interface{}) +func ToPayload(sches, configs []string) map[string]any { + payload := make(map[string]any) for i, sche := range sches { - var config interface{} + var config any err := DecodeConfig([]byte(configs[i]), &config) if err != nil { log.Error("failed to decode scheduler config", @@ -85,14 +85,14 @@ func ToPayload(sches, configs []string) map[string]interface{} { } // ConfigDecoder used to decode the config. -type ConfigDecoder func(v interface{}) error +type ConfigDecoder func(v any) error // ConfigSliceDecoderBuilder used to build slice decoder of the config. type ConfigSliceDecoderBuilder func([]string) ConfigDecoder // ConfigJSONDecoder used to build a json decoder of the config. func ConfigJSONDecoder(data []byte) ConfigDecoder { - return func(v interface{}) error { + return func(v any) error { return DecodeConfig(data, v) } } @@ -101,7 +101,7 @@ func ConfigJSONDecoder(data []byte) ConfigDecoder { func ConfigSliceDecoder(name string, args []string) ConfigDecoder { builder, ok := schedulerArgsToDecoder[name] if !ok { - return func(v interface{}) error { + return func(any) error { return errors.Errorf("the config decoder do not register for %s", name) } } diff --git a/pkg/schedule/schedulers/scheduler_controller.go b/pkg/schedule/schedulers/scheduler_controller.go index 818f02685ea..ea480a06845 100644 --- a/pkg/schedule/schedulers/scheduler_controller.go +++ b/pkg/schedule/schedulers/scheduler_controller.go @@ -48,7 +48,7 @@ type Controller struct { ctx context.Context cluster sche.SchedulerCluster storage endpoint.ConfigStorage - // schedulers is used to manage all schedulers, which will only be initialized + // schedulers are used to manage all schedulers, which will only be initialized // and used in the PD leader service mode now. schedulers map[string]*ScheduleController // schedulerHandlers is used to manage the HTTP handlers of schedulers, @@ -115,7 +115,7 @@ func (c *Controller) CollectSchedulerMetrics() { var allowScheduler float64 // If the scheduler is not allowed to schedule, it will disappear in Grafana panel. // See issue #1341. - if !s.IsPaused() && !c.isSchedulingHalted() { + if !s.IsPaused() && !c.cluster.IsSchedulingHalted() { allowScheduler = 1 } schedulerStatusGauge.WithLabelValues(s.Scheduler.GetName(), "allow").Set(allowScheduler) @@ -131,10 +131,6 @@ func (c *Controller) CollectSchedulerMetrics() { ruleStatusGauge.WithLabelValues("group_count").Set(float64(groupCnt)) } -func (c *Controller) isSchedulingHalted() bool { - return c.cluster.GetSchedulerConfig().IsSchedulingHalted() -} - // ResetSchedulerMetrics resets metrics of all schedulers. func ResetSchedulerMetrics() { schedulerStatusGauge.Reset() @@ -460,6 +456,7 @@ func (s *ScheduleController) Stop() { // Schedule tries to create some operators. func (s *ScheduleController) Schedule(diagnosable bool) []*operator.Operator { +retry: for i := 0; i < maxScheduleRetries; i++ { // no need to retry if schedule should stop to speed exit select { @@ -474,29 +471,27 @@ func (s *ScheduleController) Schedule(diagnosable bool) []*operator.Operator { if diagnosable { s.diagnosticRecorder.SetResultFromPlans(ops, plans) } - foundDisabled := false + if len(ops) == 0 { + continue + } + + // If we have schedule, reset interval to the minimal interval. + s.nextInterval = s.Scheduler.GetMinInterval() for _, op := range ops { - if labelMgr := s.cluster.GetRegionLabeler(); labelMgr != nil { - region := s.cluster.GetRegion(op.RegionID()) - if region == nil { - continue - } - if labelMgr.ScheduleDisabled(region) { - denySchedulersByLabelerCounter.Inc() - foundDisabled = true - break - } + region := s.cluster.GetRegion(op.RegionID()) + if region == nil { + continue retry } - } - if len(ops) > 0 { - // If we have schedule, reset interval to the minimal interval. - s.nextInterval = s.Scheduler.GetMinInterval() - // try regenerating operators - if foundDisabled { + labelMgr := s.cluster.GetRegionLabeler() + if labelMgr == nil { continue } - return ops + if labelMgr.ScheduleDisabled(region) { + denySchedulersByLabelerCounter.Inc() + continue retry + } } + return ops } s.nextInterval = s.Scheduler.GetNextInterval(s.nextInterval) return nil @@ -526,7 +521,7 @@ func (s *ScheduleController) AllowSchedule(diagnosable bool) bool { } return false } - if s.isSchedulingHalted() { + if s.cluster.IsSchedulingHalted() { if diagnosable { s.diagnosticRecorder.SetResultFromStatus(Halted) } @@ -541,10 +536,6 @@ func (s *ScheduleController) AllowSchedule(diagnosable bool) bool { return true } -func (s *ScheduleController) isSchedulingHalted() bool { - return s.cluster.GetSchedulerConfig().IsSchedulingHalted() -} - // IsPaused returns if a scheduler is paused. func (s *ScheduleController) IsPaused() bool { delayUntil := atomic.LoadInt64(&s.delayUntil) diff --git a/pkg/schedule/schedulers/scheduler_test.go b/pkg/schedule/schedulers/scheduler_test.go index 77c190ad943..1480d76b75b 100644 --- a/pkg/schedule/schedulers/scheduler_test.go +++ b/pkg/schedule/schedulers/scheduler_test.go @@ -31,6 +31,7 @@ import ( "github.com/tikv/pd/pkg/statistics/utils" "github.com/tikv/pd/pkg/storage" "github.com/tikv/pd/pkg/utils/operatorutil" + "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/pkg/versioninfo" ) @@ -46,6 +47,7 @@ func prepareSchedulersTest(needToRunStream ...bool) (context.CancelFunc, config. stream = hbstream.NewTestHeartbeatStreams(ctx, tc.ID, tc, needToRunStream[0]) } oc := operator.NewController(ctx, tc.GetBasicCluster(), tc.GetSchedulerConfig(), stream) + tc.SetHotRegionCacheHitsThreshold(1) return cancel, opt, tc, oc } @@ -183,7 +185,6 @@ func checkBalance(re *require.Assertions, enablePlacementRules bool) { tc.AddLeaderRegionWithWriteInfo(1, 1, 512*units.KiB*utils.RegionHeartBeatReportInterval, 0, 0, utils.RegionHeartBeatReportInterval, []uint64{2, 3}) tc.AddLeaderRegionWithWriteInfo(2, 1, 512*units.KiB*utils.RegionHeartBeatReportInterval, 0, 0, utils.RegionHeartBeatReportInterval, []uint64{3, 4}) tc.AddLeaderRegionWithWriteInfo(3, 1, 512*units.KiB*utils.RegionHeartBeatReportInterval, 0, 0, utils.RegionHeartBeatReportInterval, []uint64{2, 4}) - tc.SetHotRegionCacheHitsThreshold(0) // try to get an operator var ops []*operator.Operator @@ -218,8 +219,9 @@ func TestHotRegionScheduleAbnormalReplica(t *testing.T) { tc.AddRegionWithReadInfo(1, 1, 512*units.KiB*utils.StoreHeartBeatReportInterval, 0, 0, utils.StoreHeartBeatReportInterval, []uint64{2}) tc.AddRegionWithReadInfo(2, 2, 512*units.KiB*utils.StoreHeartBeatReportInterval, 0, 0, utils.StoreHeartBeatReportInterval, []uint64{1, 3}) tc.AddRegionWithReadInfo(3, 1, 512*units.KiB*utils.StoreHeartBeatReportInterval, 0, 0, utils.StoreHeartBeatReportInterval, []uint64{2, 3}) - tc.SetHotRegionCacheHitsThreshold(0) - re.True(tc.IsRegionHot(tc.GetRegion(1))) + testutil.Eventually(re, func() bool { + return tc.IsRegionHot(tc.GetRegion(1)) + }) re.False(hb.IsScheduleAllowed(tc)) } @@ -315,10 +317,7 @@ func TestSpecialUseHotRegion(t *testing.T) { cd := ConfigSliceDecoder(BalanceRegionType, []string{"", ""}) bs, err := CreateScheduler(BalanceRegionType, oc, storage, cd) re.NoError(err) - hs, err := CreateScheduler(utils.Write.String(), oc, storage, cd) - re.NoError(err) - tc.SetHotRegionCacheHitsThreshold(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 10) tc.AddRegionStore(2, 4) @@ -353,6 +352,8 @@ func TestSpecialUseHotRegion(t *testing.T) { tc.AddLeaderRegionWithWriteInfo(3, 1, 512*units.KiB*utils.RegionHeartBeatReportInterval, 0, 0, utils.RegionHeartBeatReportInterval, []uint64{2, 3}) tc.AddLeaderRegionWithWriteInfo(4, 2, 512*units.KiB*utils.RegionHeartBeatReportInterval, 0, 0, utils.RegionHeartBeatReportInterval, []uint64{1, 3}) tc.AddLeaderRegionWithWriteInfo(5, 3, 512*units.KiB*utils.RegionHeartBeatReportInterval, 0, 0, utils.RegionHeartBeatReportInterval, []uint64{1, 2}) + hs, err := CreateScheduler(utils.Write.String(), oc, storage, cd) + re.NoError(err) ops, _ = hs.Schedule(tc, false) re.Len(ops, 1) operatorutil.CheckTransferPeer(re, ops[0], operator.OpHotRegion, 1, 4) @@ -368,7 +369,6 @@ func TestSpecialUseReserved(t *testing.T) { bs, err := CreateScheduler(BalanceRegionType, oc, storage, cd) re.NoError(err) - tc.SetHotRegionCacheHitsThreshold(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 10) tc.AddRegionStore(2, 4) diff --git a/pkg/schedule/schedulers/shuffle_hot_region.go b/pkg/schedule/schedulers/shuffle_hot_region.go index cd5c40d4e07..726138e8f7a 100644 --- a/pkg/schedule/schedulers/shuffle_hot_region.go +++ b/pkg/schedule/schedulers/shuffle_hot_region.go @@ -95,7 +95,8 @@ type shuffleHotRegionScheduler struct { // newShuffleHotRegionScheduler creates an admin scheduler that random balance hot regions func newShuffleHotRegionScheduler(opController *operator.Controller, conf *shuffleHotRegionSchedulerConfig) Scheduler { - base := newBaseHotScheduler(opController) + base := newBaseHotScheduler(opController, + statistics.DefaultHistorySampleDuration, statistics.DefaultHistorySampleInterval) handler := newShuffleHotRegionHandler(conf) ret := &shuffleHotRegionScheduler{ baseHotScheduler: base, @@ -113,7 +114,7 @@ func (s *shuffleHotRegionScheduler) GetName() string { return s.conf.Name } -func (s *shuffleHotRegionScheduler) GetType() string { +func (*shuffleHotRegionScheduler) GetType() string { return ShuffleHotRegionType } @@ -156,7 +157,7 @@ func (s *shuffleHotRegionScheduler) IsScheduleAllowed(cluster sche.SchedulerClus return hotRegionAllowed && regionAllowed && leaderAllowed } -func (s *shuffleHotRegionScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { +func (s *shuffleHotRegionScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { shuffleHotRegionCounter.Inc() rw := s.randomRWType() s.prepareForBalance(rw, cluster) @@ -227,7 +228,7 @@ type shuffleHotRegionHandler struct { } func (handler *shuffleHotRegionHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(handler.rd, w, r.Body, &input); err != nil { return } @@ -249,7 +250,7 @@ func (handler *shuffleHotRegionHandler) UpdateConfig(w http.ResponseWriter, r *h handler.rd.JSON(w, http.StatusOK, nil) } -func (handler *shuffleHotRegionHandler) ListConfig(w http.ResponseWriter, r *http.Request) { +func (handler *shuffleHotRegionHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { conf := handler.config.Clone() handler.rd.JSON(w, http.StatusOK, conf) } diff --git a/pkg/schedule/schedulers/shuffle_leader.go b/pkg/schedule/schedulers/shuffle_leader.go index a6ff4baf65b..5b3dfd9fd20 100644 --- a/pkg/schedule/schedulers/shuffle_leader.go +++ b/pkg/schedule/schedulers/shuffle_leader.go @@ -71,7 +71,7 @@ func (s *shuffleLeaderScheduler) GetName() string { return s.conf.Name } -func (s *shuffleLeaderScheduler) GetType() string { +func (*shuffleLeaderScheduler) GetType() string { return ShuffleLeaderType } @@ -87,7 +87,7 @@ func (s *shuffleLeaderScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster return allowed } -func (s *shuffleLeaderScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { +func (s *shuffleLeaderScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { // We shuffle leaders between stores by: // 1. random select a valid store. // 2. transfer a leader to the store. @@ -106,7 +106,7 @@ func (s *shuffleLeaderScheduler) Schedule(cluster sche.SchedulerCluster, dryRun shuffleLeaderNoFollowerCounter.Inc() return nil, nil } - op, err := operator.CreateTransferLeaderOperator(ShuffleLeaderType, cluster, region, region.GetLeader().GetId(), targetStore.GetID(), []uint64{}, operator.OpAdmin) + op, err := operator.CreateTransferLeaderOperator(ShuffleLeaderType, cluster, region, targetStore.GetID(), []uint64{}, operator.OpAdmin) if err != nil { log.Debug("fail to create shuffle leader operator", errs.ZapError(err)) return nil, nil diff --git a/pkg/schedule/schedulers/shuffle_region.go b/pkg/schedule/schedulers/shuffle_region.go index f9bed18d3fa..b1a100384ae 100644 --- a/pkg/schedule/schedulers/shuffle_region.go +++ b/pkg/schedule/schedulers/shuffle_region.go @@ -68,11 +68,11 @@ func (s *shuffleRegionScheduler) ServeHTTP(w http.ResponseWriter, r *http.Reques s.conf.ServeHTTP(w, r) } -func (s *shuffleRegionScheduler) GetName() string { +func (*shuffleRegionScheduler) GetName() string { return ShuffleRegionName } -func (s *shuffleRegionScheduler) GetType() string { +func (*shuffleRegionScheduler) GetType() string { return ShuffleRegionType } @@ -107,7 +107,7 @@ func (s *shuffleRegionScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster return allowed } -func (s *shuffleRegionScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { +func (s *shuffleRegionScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { shuffleRegionCounter.Inc() region, oldPeer := s.scheduleRemovePeer(cluster) if region == nil { diff --git a/pkg/schedule/schedulers/shuffle_region_config.go b/pkg/schedule/schedulers/shuffle_region_config.go index 552d7ea8bce..bce64f743b8 100644 --- a/pkg/schedule/schedulers/shuffle_region_config.go +++ b/pkg/schedule/schedulers/shuffle_region_config.go @@ -77,7 +77,7 @@ func (conf *shuffleRegionSchedulerConfig) ServeHTTP(w http.ResponseWriter, r *ht router.ServeHTTP(w, r) } -func (conf *shuffleRegionSchedulerConfig) handleGetRoles(w http.ResponseWriter, r *http.Request) { +func (conf *shuffleRegionSchedulerConfig) handleGetRoles(w http.ResponseWriter, _ *http.Request) { rd := render.New(render.Options{IndentJSON: true}) rd.JSON(w, http.StatusOK, conf.GetRoles()) } diff --git a/pkg/schedule/schedulers/split_bucket.go b/pkg/schedule/schedulers/split_bucket.go index 5e31f58129c..32e57ec9b3d 100644 --- a/pkg/schedule/schedulers/split_bucket.go +++ b/pkg/schedule/schedulers/split_bucket.go @@ -138,7 +138,7 @@ func (h *splitBucketHandler) UpdateConfig(w http.ResponseWriter, r *http.Request return } - m := make(map[string]interface{}) + m := make(map[string]any) if err := json.Unmarshal(data, &m); err != nil { rd.JSON(w, http.StatusInternalServerError, err.Error()) return @@ -175,12 +175,12 @@ func newSplitBucketScheduler(opController *operator.Controller, conf *splitBucke } // GetName returns the name of the split bucket scheduler. -func (s *splitBucketScheduler) GetName() string { +func (*splitBucketScheduler) GetName() string { return SplitBucketName } // GetType returns the type of the split bucket scheduler. -func (s *splitBucketScheduler) GetType() string { +func (*splitBucketScheduler) GetType() string { return SplitBucketType } @@ -230,7 +230,7 @@ type splitBucketPlan struct { } // Schedule return operators if some bucket is too hot. -func (s *splitBucketScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { +func (s *splitBucketScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { splitBucketScheduleCounter.Inc() conf := s.conf.Clone() plan := &splitBucketPlan{ @@ -297,7 +297,7 @@ func (s *splitBucketScheduler) splitBucket(plan *splitBucketPlan) []*operator.Op return nil } splitBucketNewOperatorCounter.Inc() - op.AdditionalInfos["hot-degree"] = strconv.FormatInt(int64(splitBucket.HotDegree), 10) + op.SetAdditionalInfo("hot-degree", strconv.FormatInt(int64(splitBucket.HotDegree), 10)) return []*operator.Operator{op} } return nil diff --git a/pkg/schedule/schedulers/transfer_witness_leader.go b/pkg/schedule/schedulers/transfer_witness_leader.go index c651a8ef872..9ba78985d13 100644 --- a/pkg/schedule/schedulers/transfer_witness_leader.go +++ b/pkg/schedule/schedulers/transfer_witness_leader.go @@ -60,19 +60,19 @@ func newTransferWitnessLeaderScheduler(opController *operator.Controller) Schedu } } -func (s *transferWitnessLeaderScheduler) GetName() string { +func (*transferWitnessLeaderScheduler) GetName() string { return TransferWitnessLeaderName } -func (s *transferWitnessLeaderScheduler) GetType() string { +func (*transferWitnessLeaderScheduler) GetType() string { return TransferWitnessLeaderType } -func (s *transferWitnessLeaderScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { +func (*transferWitnessLeaderScheduler) IsScheduleAllowed(sche.SchedulerCluster) bool { return true } -func (s *transferWitnessLeaderScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { +func (s *transferWitnessLeaderScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { transferWitnessLeaderCounter.Inc() return s.scheduleTransferWitnessLeaderBatch(s.GetName(), s.GetType(), cluster, transferWitnessLeaderBatchSize), nil } @@ -83,7 +83,7 @@ batchLoop: for i := 0; i < batchSize; i++ { select { case region := <-s.regions: - op, err := s.scheduleTransferWitnessLeader(name, typ, cluster, region) + op, err := scheduleTransferWitnessLeader(name, typ, cluster, region) if err != nil { log.Debug("fail to create transfer leader operator", errs.ZapError(err)) continue @@ -100,7 +100,7 @@ batchLoop: return ops } -func (s *transferWitnessLeaderScheduler) scheduleTransferWitnessLeader(name, typ string, cluster sche.SchedulerCluster, region *core.RegionInfo) (*operator.Operator, error) { +func scheduleTransferWitnessLeader(name, typ string, cluster sche.SchedulerCluster, region *core.RegionInfo) (*operator.Operator, error) { var filters []filter.Filter unhealthyPeerStores := make(map[uint64]struct{}) for _, peer := range region.GetDownPeers() { @@ -123,7 +123,7 @@ func (s *transferWitnessLeaderScheduler) scheduleTransferWitnessLeader(name, typ for _, t := range targets { targetIDs = append(targetIDs, t.GetID()) } - return operator.CreateTransferLeaderOperator(typ, cluster, region, region.GetLeader().GetStoreId(), target.GetID(), targetIDs, operator.OpWitnessLeader) + return operator.CreateTransferLeaderOperator(typ, cluster, region, target.GetID(), targetIDs, operator.OpWitnessLeader) } // RecvRegionInfo receives a checked region from coordinator diff --git a/pkg/schedule/splitter/region_splitter.go b/pkg/schedule/splitter/region_splitter.go index f0da8442a2c..aeab4b70cf0 100644 --- a/pkg/schedule/splitter/region_splitter.go +++ b/pkg/schedule/splitter/region_splitter.go @@ -108,6 +108,7 @@ func (r *RegionSplitter) splitRegionsByKeys(parCtx context.Context, splitKeys [] ticker.Stop() cancel() }() +outerLoop: for { select { case <-ticker.C: @@ -118,7 +119,7 @@ func (r *RegionSplitter) splitRegionsByKeys(parCtx context.Context, splitKeys [] r.handler.ScanRegionsByKeyRange(groupKeys, results) } case <-ctx.Done(): - break + break outerLoop } finished := true for _, groupKeys := range validGroups { diff --git a/pkg/schedule/splitter/region_splitter_test.go b/pkg/schedule/splitter/region_splitter_test.go index ebb8b225a9b..99fd53df1e5 100644 --- a/pkg/schedule/splitter/region_splitter_test.go +++ b/pkg/schedule/splitter/region_splitter_test.go @@ -37,7 +37,7 @@ func newMockSplitRegionsHandler() *mockSplitRegionsHandler { } // SplitRegionByKeys mock SplitRegionsHandler -func (m *mockSplitRegionsHandler) SplitRegionByKeys(region *core.RegionInfo, splitKeys [][]byte) error { +func (m *mockSplitRegionsHandler) SplitRegionByKeys(region *core.RegionInfo, _ [][]byte) error { m.regions[region.GetID()] = [2][]byte{ region.GetStartKey(), region.GetEndKey(), @@ -76,7 +76,7 @@ func (suite *regionSplitterTestSuite) SetupSuite() { suite.ctx, suite.cancel = context.WithCancel(context.Background()) } -func (suite *regionSplitterTestSuite) TearDownTest() { +func (suite *regionSplitterTestSuite) TearDownSuite() { suite.cancel() } diff --git a/pkg/slice/slice_test.go b/pkg/slice/slice_test.go index 1fe3fe79dcf..019cd49c46a 100644 --- a/pkg/slice/slice_test.go +++ b/pkg/slice/slice_test.go @@ -22,7 +22,6 @@ import ( ) func TestSlice(t *testing.T) { - t.Parallel() re := require.New(t) testCases := []struct { a []int @@ -45,7 +44,6 @@ func TestSlice(t *testing.T) { } func TestSliceContains(t *testing.T) { - t.Parallel() re := require.New(t) ss := []string{"a", "b", "c"} re.True(slice.Contains(ss, "a")) @@ -61,7 +59,6 @@ func TestSliceContains(t *testing.T) { } func TestSliceRemoveGenericTypes(t *testing.T) { - t.Parallel() re := require.New(t) ss := []string{"a", "b", "c"} ss = slice.Remove(ss, "a") @@ -77,7 +74,6 @@ func TestSliceRemoveGenericTypes(t *testing.T) { } func TestSliceRemove(t *testing.T) { - t.Parallel() re := require.New(t) is := []int64{} diff --git a/pkg/statistics/buckets/hot_bucket_task.go b/pkg/statistics/buckets/hot_bucket_task.go index d6a43a6f8ae..ff7c30a7d81 100644 --- a/pkg/statistics/buckets/hot_bucket_task.go +++ b/pkg/statistics/buckets/hot_bucket_task.go @@ -55,7 +55,7 @@ func NewCheckPeerTask(buckets *metapb.Buckets) flowBucketsItemTask { } } -func (t *checkBucketsTask) taskType() flowItemTaskKind { +func (*checkBucketsTask) taskType() flowItemTaskKind { return checkBucketsTaskType } @@ -79,7 +79,7 @@ func NewCollectBucketStatsTask(minDegree int, regionIDs ...uint64) *collectBucke } } -func (t *collectBucketStatsTask) taskType() flowItemTaskKind { +func (*collectBucketStatsTask) taskType() flowItemTaskKind { return collectBucketStatsTaskType } diff --git a/pkg/statistics/collector.go b/pkg/statistics/collector.go index e64b673803d..88986b93d4b 100644 --- a/pkg/statistics/collector.go +++ b/pkg/statistics/collector.go @@ -36,11 +36,11 @@ func newTikvCollector() storeCollector { return tikvCollector{} } -func (c tikvCollector) Engine() string { +func (tikvCollector) Engine() string { return core.EngineTiKV } -func (c tikvCollector) Filter(info *StoreSummaryInfo, kind constant.ResourceKind) bool { +func (tikvCollector) Filter(info *StoreSummaryInfo, kind constant.ResourceKind) bool { if info.IsTiFlash() { return false } @@ -53,7 +53,7 @@ func (c tikvCollector) Filter(info *StoreSummaryInfo, kind constant.ResourceKind return false } -func (c tikvCollector) GetLoads(storeLoads, peerLoadSum []float64, rwTy utils.RWType, kind constant.ResourceKind) (loads []float64) { +func (tikvCollector) GetLoads(storeLoads, peerLoadSum []float64, rwTy utils.RWType, kind constant.ResourceKind) (loads []float64) { loads = make([]float64, utils.DimLen) switch rwTy { case utils.Read: @@ -87,11 +87,11 @@ func newTiFlashCollector(isTraceRegionFlow bool) storeCollector { return tiflashCollector{isTraceRegionFlow: isTraceRegionFlow} } -func (c tiflashCollector) Engine() string { +func (tiflashCollector) Engine() string { return core.EngineTiFlash } -func (c tiflashCollector) Filter(info *StoreSummaryInfo, kind constant.ResourceKind) bool { +func (tiflashCollector) Filter(info *StoreSummaryInfo, kind constant.ResourceKind) bool { switch kind { case constant.LeaderKind: return false diff --git a/pkg/statistics/hot_cache.go b/pkg/statistics/hot_cache.go index 799fb240d10..3f076734a7b 100644 --- a/pkg/statistics/hot_cache.go +++ b/pkg/statistics/hot_cache.go @@ -17,6 +17,7 @@ package statistics import ( "context" + "github.com/pingcap/kvproto/pkg/metapb" "github.com/smallnest/chanx" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/statistics/utils" @@ -33,8 +34,8 @@ var ( // HotCache is a cache hold hot regions. type HotCache struct { ctx context.Context - writeCache *hotPeerCache - readCache *hotPeerCache + writeCache *HotPeerCache + readCache *HotPeerCache } // NewHotCache creates a new hot spot cache. @@ -50,7 +51,7 @@ func NewHotCache(ctx context.Context) *HotCache { } // CheckWriteAsync puts the flowItem into queue, and check it asynchronously -func (w *HotCache) CheckWriteAsync(task FlowItemTask) bool { +func (w *HotCache) CheckWriteAsync(task func(cache *HotPeerCache)) bool { if w.writeCache.taskQueue.Len() > chanMaxLength { return false } @@ -63,7 +64,7 @@ func (w *HotCache) CheckWriteAsync(task FlowItemTask) bool { } // CheckReadAsync puts the flowItem into queue, and check it asynchronously -func (w *HotCache) CheckReadAsync(task FlowItemTask) bool { +func (w *HotCache) CheckReadAsync(task func(cache *HotPeerCache)) bool { if w.readCache.taskQueue.Len() > chanMaxLength { return false } @@ -77,52 +78,86 @@ func (w *HotCache) CheckReadAsync(task FlowItemTask) bool { // RegionStats returns hot items according to kind func (w *HotCache) RegionStats(kind utils.RWType, minHotDegree int) map[uint64][]*HotPeerStat { - task := newCollectRegionStatsTask(minHotDegree) + ret := make(chan map[uint64][]*HotPeerStat, 1) + collectRegionStatsTask := func(cache *HotPeerCache) { + ret <- cache.RegionStats(minHotDegree) + } var succ bool switch kind { case utils.Write: - succ = w.CheckWriteAsync(task) + succ = w.CheckWriteAsync(collectRegionStatsTask) case utils.Read: - succ = w.CheckReadAsync(task) + succ = w.CheckReadAsync(collectRegionStatsTask) } if !succ { return nil } - return task.waitRet(w.ctx) + select { + case <-w.ctx.Done(): + return nil + case r := <-ret: + return r + } } // IsRegionHot checks if the region is hot. func (w *HotCache) IsRegionHot(region *core.RegionInfo, minHotDegree int) bool { - checkRegionHotWriteTask := newCheckRegionHotTask(region, minHotDegree) - checkRegionHotReadTask := newCheckRegionHotTask(region, minHotDegree) + retWrite := make(chan bool, 1) + retRead := make(chan bool, 1) + checkRegionHotWriteTask := func(cache *HotPeerCache) { + retWrite <- cache.isRegionHotWithAnyPeers(region, minHotDegree) + } + checkRegionHotReadTask := func(cache *HotPeerCache) { + retRead <- cache.isRegionHotWithAnyPeers(region, minHotDegree) + } succ1 := w.CheckWriteAsync(checkRegionHotWriteTask) succ2 := w.CheckReadAsync(checkRegionHotReadTask) if succ1 && succ2 { - return checkRegionHotWriteTask.waitRet(w.ctx) || checkRegionHotReadTask.waitRet(w.ctx) + select { + case <-w.ctx.Done(): + return false + case r := <-retWrite: + return r + case r := <-retRead: + return r + } } return false } // GetHotPeerStat returns hot peer stat with specified regionID and storeID. func (w *HotCache) GetHotPeerStat(kind utils.RWType, regionID, storeID uint64) *HotPeerStat { - task := newGetHotPeerStatTask(regionID, storeID) + ret := make(chan *HotPeerStat, 1) + getHotPeerStatTask := func(cache *HotPeerCache) { + ret <- cache.getHotPeerStat(regionID, storeID) + } + var succ bool switch kind { case utils.Read: - succ = w.CheckReadAsync(task) + succ = w.CheckReadAsync(getHotPeerStatTask) case utils.Write: - succ = w.CheckWriteAsync(task) + succ = w.CheckWriteAsync(getHotPeerStatTask) } if !succ { return nil } - return task.waitRet(w.ctx) + select { + case <-w.ctx.Done(): + return nil + case r := <-ret: + return r + } } // CollectMetrics collects the hot cache metrics. func (w *HotCache) CollectMetrics() { - w.CheckWriteAsync(newCollectMetricsTask()) - w.CheckReadAsync(newCollectMetricsTask()) + w.CheckWriteAsync(func(cache *HotPeerCache) { + cache.collectMetrics() + }) + w.CheckReadAsync(func(cache *HotPeerCache) { + cache.collectMetrics() + }) } // ResetHotCacheStatusMetrics resets the hot cache metrics. @@ -130,7 +165,7 @@ func ResetHotCacheStatusMetrics() { hotCacheStatusGauge.Reset() } -func (w *HotCache) updateItems(queue *chanx.UnboundedChan[FlowItemTask], runTask func(task FlowItemTask)) { +func (w *HotCache) updateItems(queue *chanx.UnboundedChan[func(*HotPeerCache)], runTask func(task func(*HotPeerCache))) { defer logutil.LogPanic() for { @@ -143,18 +178,18 @@ func (w *HotCache) updateItems(queue *chanx.UnboundedChan[FlowItemTask], runTask } } -func (w *HotCache) runReadTask(task FlowItemTask) { +func (w *HotCache) runReadTask(task func(cache *HotPeerCache)) { if task != nil { // TODO: do we need a run-task timeout to protect the queue won't be stuck by a task? - task.runTask(w.readCache) + task(w.readCache) readTaskMetrics.Set(float64(w.readCache.taskQueue.Len())) } } -func (w *HotCache) runWriteTask(task FlowItemTask) { +func (w *HotCache) runWriteTask(task func(cache *HotPeerCache)) { if task != nil { // TODO: do we need a run-task timeout to protect the queue won't be stuck by a task? - task.runTask(w.writeCache) + task(w.writeCache) writeTaskMetrics.Set(float64(w.writeCache.taskQueue.Len())) } } @@ -164,34 +199,34 @@ func (w *HotCache) runWriteTask(task FlowItemTask) { func (w *HotCache) Update(item *HotPeerStat, kind utils.RWType) { switch kind { case utils.Write: - w.writeCache.updateStat(item) + w.writeCache.UpdateStat(item) case utils.Read: - w.readCache.updateStat(item) + w.readCache.UpdateStat(item) } } // CheckWritePeerSync checks the write status, returns update items. // This is used for mockcluster, for test purpose. -func (w *HotCache) CheckWritePeerSync(peer *core.PeerInfo, region *core.RegionInfo) *HotPeerStat { - return w.writeCache.checkPeerFlow(peer, region) +func (w *HotCache) CheckWritePeerSync(region *core.RegionInfo, peers []*metapb.Peer, loads []float64, interval uint64) []*HotPeerStat { + return w.writeCache.CheckPeerFlow(region, peers, loads, interval) } // CheckReadPeerSync checks the read status, returns update items. // This is used for mockcluster, for test purpose. -func (w *HotCache) CheckReadPeerSync(peer *core.PeerInfo, region *core.RegionInfo) *HotPeerStat { - return w.readCache.checkPeerFlow(peer, region) +func (w *HotCache) CheckReadPeerSync(region *core.RegionInfo, peers []*metapb.Peer, loads []float64, interval uint64) []*HotPeerStat { + return w.readCache.CheckPeerFlow(region, peers, loads, interval) } // ExpiredReadItems returns the read items which are already expired. // This is used for mockcluster, for test purpose. func (w *HotCache) ExpiredReadItems(region *core.RegionInfo) []*HotPeerStat { - return w.readCache.collectExpiredItems(region) + return w.readCache.CollectExpiredItems(region) } // ExpiredWriteItems returns the write items which are already expired. // This is used for mockcluster, for test purpose. func (w *HotCache) ExpiredWriteItems(region *core.RegionInfo) []*HotPeerStat { - return w.writeCache.collectExpiredItems(region) + return w.writeCache.CollectExpiredItems(region) } // GetThresholds returns thresholds. diff --git a/pkg/statistics/hot_cache_task.go b/pkg/statistics/hot_cache_task.go deleted file mode 100644 index c84a292b4e7..00000000000 --- a/pkg/statistics/hot_cache_task.go +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright 2021 TiKV Project Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statistics - -import ( - "context" - - "github.com/tikv/pd/pkg/core" -) - -// FlowItemTask indicates the task in flowItem queue -type FlowItemTask interface { - runTask(cache *hotPeerCache) -} - -type checkPeerTask struct { - peerInfo *core.PeerInfo - regionInfo *core.RegionInfo -} - -// NewCheckPeerTask creates task to update peerInfo -func NewCheckPeerTask(peerInfo *core.PeerInfo, regionInfo *core.RegionInfo) FlowItemTask { - return &checkPeerTask{ - peerInfo: peerInfo, - regionInfo: regionInfo, - } -} - -func (t *checkPeerTask) runTask(cache *hotPeerCache) { - stat := cache.checkPeerFlow(t.peerInfo, t.regionInfo) - if stat != nil { - cache.updateStat(stat) - } -} - -type checkExpiredTask struct { - region *core.RegionInfo -} - -// NewCheckExpiredItemTask creates task to collect expired items -func NewCheckExpiredItemTask(region *core.RegionInfo) FlowItemTask { - return &checkExpiredTask{ - region: region, - } -} - -func (t *checkExpiredTask) runTask(cache *hotPeerCache) { - expiredStats := cache.collectExpiredItems(t.region) - for _, stat := range expiredStats { - cache.updateStat(stat) - } -} - -type collectUnReportedPeerTask struct { - storeID uint64 - regions map[uint64]*core.RegionInfo - interval uint64 -} - -// NewCollectUnReportedPeerTask creates task to collect unreported peers -func NewCollectUnReportedPeerTask(storeID uint64, regions map[uint64]*core.RegionInfo, interval uint64) FlowItemTask { - return &collectUnReportedPeerTask{ - storeID: storeID, - regions: regions, - interval: interval, - } -} - -func (t *collectUnReportedPeerTask) runTask(cache *hotPeerCache) { - stats := cache.checkColdPeer(t.storeID, t.regions, t.interval) - for _, stat := range stats { - cache.updateStat(stat) - } -} - -type collectRegionStatsTask struct { - minDegree int - ret chan map[uint64][]*HotPeerStat -} - -func newCollectRegionStatsTask(minDegree int) *collectRegionStatsTask { - return &collectRegionStatsTask{ - minDegree: minDegree, - ret: make(chan map[uint64][]*HotPeerStat, 1), - } -} - -func (t *collectRegionStatsTask) runTask(cache *hotPeerCache) { - t.ret <- cache.RegionStats(t.minDegree) -} - -// TODO: do we need a wait-return timeout? -func (t *collectRegionStatsTask) waitRet(ctx context.Context) map[uint64][]*HotPeerStat { - select { - case <-ctx.Done(): - return nil - case ret := <-t.ret: - return ret - } -} - -type checkRegionHotTask struct { - region *core.RegionInfo - minHotDegree int - ret chan bool -} - -func newCheckRegionHotTask(region *core.RegionInfo, minDegree int) *checkRegionHotTask { - return &checkRegionHotTask{ - region: region, - minHotDegree: minDegree, - ret: make(chan bool, 1), - } -} - -func (t *checkRegionHotTask) runTask(cache *hotPeerCache) { - t.ret <- cache.isRegionHotWithAnyPeers(t.region, t.minHotDegree) -} - -// TODO: do we need a wait-return timeout? -func (t *checkRegionHotTask) waitRet(ctx context.Context) bool { - select { - case <-ctx.Done(): - return false - case r := <-t.ret: - return r - } -} - -type collectMetricsTask struct { -} - -func newCollectMetricsTask() *collectMetricsTask { - return &collectMetricsTask{} -} - -func (t *collectMetricsTask) runTask(cache *hotPeerCache) { - cache.collectMetrics() -} - -type getHotPeerStatTask struct { - regionID uint64 - storeID uint64 - ret chan *HotPeerStat -} - -func newGetHotPeerStatTask(regionID, storeID uint64) *getHotPeerStatTask { - return &getHotPeerStatTask{ - regionID: regionID, - storeID: storeID, - ret: make(chan *HotPeerStat, 1), - } -} - -func (t *getHotPeerStatTask) runTask(cache *hotPeerCache) { - t.ret <- cache.getHotPeerStat(t.regionID, t.storeID) -} - -// TODO: do we need a wait-return timeout? -func (t *getHotPeerStatTask) waitRet(ctx context.Context) *HotPeerStat { - select { - case <-ctx.Done(): - return nil - case r := <-t.ret: - return r - } -} diff --git a/pkg/statistics/hot_peer_cache.go b/pkg/statistics/hot_peer_cache.go index 0e35e0e23be..4db0c304bb9 100644 --- a/pkg/statistics/hot_peer_cache.go +++ b/pkg/statistics/hot_peer_cache.go @@ -57,27 +57,27 @@ type thresholds struct { metrics [utils.DimLen + 1]prometheus.Gauge // 0 is for byte, 1 is for key, 2 is for query, 3 is for total length. } -// hotPeerCache saves the hot peer's statistics. -type hotPeerCache struct { +// HotPeerCache saves the hot peer's statistics. +type HotPeerCache struct { kind utils.RWType peersOfStore map[uint64]*utils.TopN // storeID -> hot peers storesOfRegion map[uint64]map[uint64]struct{} // regionID -> storeIDs regionsOfStore map[uint64]map[uint64]struct{} // storeID -> regionIDs topNTTL time.Duration - taskQueue *chanx.UnboundedChan[FlowItemTask] + taskQueue *chanx.UnboundedChan[func(*HotPeerCache)] thresholdsOfStore map[uint64]*thresholds // storeID -> thresholds metrics map[uint64][utils.ActionTypeLen]prometheus.Gauge // storeID -> metrics // TODO: consider to remove store info when store is offline. } -// NewHotPeerCache creates a hotPeerCache -func NewHotPeerCache(ctx context.Context, kind utils.RWType) *hotPeerCache { - return &hotPeerCache{ +// NewHotPeerCache creates a HotPeerCache +func NewHotPeerCache(ctx context.Context, kind utils.RWType) *HotPeerCache { + return &HotPeerCache{ kind: kind, peersOfStore: make(map[uint64]*utils.TopN), storesOfRegion: make(map[uint64]map[uint64]struct{}), regionsOfStore: make(map[uint64]map[uint64]struct{}), - taskQueue: chanx.NewUnboundedChan[FlowItemTask](ctx, queueCap), + taskQueue: chanx.NewUnboundedChan[func(*HotPeerCache)](ctx, queueCap), thresholdsOfStore: make(map[uint64]*thresholds), topNTTL: time.Duration(3*kind.ReportInterval()) * time.Second, metrics: make(map[uint64][utils.ActionTypeLen]prometheus.Gauge), @@ -86,7 +86,7 @@ func NewHotPeerCache(ctx context.Context, kind utils.RWType) *hotPeerCache { // TODO: rename RegionStats as PeerStats // RegionStats returns hot items -func (f *hotPeerCache) RegionStats(minHotDegree int) map[uint64][]*HotPeerStat { +func (f *HotPeerCache) RegionStats(minHotDegree int) map[uint64][]*HotPeerStat { res := make(map[uint64][]*HotPeerStat) defaultAntiCount := f.kind.DefaultAntiCount() for storeID, peers := range f.peersOfStore { @@ -102,7 +102,7 @@ func (f *hotPeerCache) RegionStats(minHotDegree int) map[uint64][]*HotPeerStat { return res } -func (f *hotPeerCache) updateStat(item *HotPeerStat) { +func (f *HotPeerCache) UpdateStat(item *HotPeerStat) { switch item.actionType { case utils.Remove: f.removeItem(item) @@ -116,7 +116,7 @@ func (f *hotPeerCache) updateStat(item *HotPeerStat) { f.incMetrics(item.actionType, item.StoreID) } -func (f *hotPeerCache) incMetrics(action utils.ActionType, storeID uint64) { +func (f *HotPeerCache) incMetrics(action utils.ActionType, storeID uint64) { if _, ok := f.metrics[storeID]; !ok { store := storeTag(storeID) kind := f.kind.String() @@ -129,7 +129,7 @@ func (f *hotPeerCache) incMetrics(action utils.ActionType, storeID uint64) { f.metrics[storeID][action].Inc() } -func (f *hotPeerCache) collectPeerMetrics(loads []float64, interval uint64) { +func (f *HotPeerCache) collectPeerMetrics(loads []float64, interval uint64) { regionHeartbeatIntervalHist.Observe(float64(interval)) if interval == 0 { return @@ -153,8 +153,8 @@ func (f *hotPeerCache) collectPeerMetrics(loads []float64, interval uint64) { } } -// collectExpiredItems collects expired items, mark them as needDelete and puts them into inherit items -func (f *hotPeerCache) collectExpiredItems(region *core.RegionInfo) []*HotPeerStat { +// CollectExpiredItems collects expired items, mark them as needDelete and puts them into inherit items +func (f *HotPeerCache) CollectExpiredItems(region *core.RegionInfo) []*HotPeerStat { regionID := region.GetID() items := make([]*HotPeerStat, 0) if ids, ok := f.storesOfRegion[regionID]; ok { @@ -171,65 +171,68 @@ func (f *hotPeerCache) collectExpiredItems(region *core.RegionInfo) []*HotPeerSt return items } -// checkPeerFlow checks the flow information of a peer. -// Notice: checkPeerFlow couldn't be used concurrently. -// checkPeerFlow will update oldItem's rollingLoads into newItem, thus we should use write lock here. -func (f *hotPeerCache) checkPeerFlow(peer *core.PeerInfo, region *core.RegionInfo) *HotPeerStat { - interval := peer.GetInterval() +// CheckPeerFlow checks the flow information of a peer. +// Notice: CheckPeerFlow couldn't be used concurrently. +// CheckPeerFlow will update oldItem's rollingLoads into newItem, thus we should use write lock here. +func (f *HotPeerCache) CheckPeerFlow(region *core.RegionInfo, peers []*metapb.Peer, deltaLoads []float64, interval uint64) []*HotPeerStat { if Denoising && interval < HotRegionReportMinInterval { // for test or simulator purpose return nil } - storeID := peer.GetStoreId() - deltaLoads := peer.GetLoads() + f.collectPeerMetrics(deltaLoads, interval) // update metrics regionID := region.GetID() - oldItem := f.getOldHotPeerStat(regionID, storeID) - - // check whether the peer is allowed to be inherited - source := utils.Direct - if oldItem == nil { - for _, storeID := range f.getAllStoreIDs(region) { - oldItem = f.getOldHotPeerStat(regionID, storeID) - if oldItem != nil && oldItem.allowInherited { - source = utils.Inherit - break + + regionPeers := region.GetPeers() + stats := make([]*HotPeerStat, 0, len(peers)) + for _, peer := range peers { + storeID := peer.GetStoreId() + oldItem := f.getOldHotPeerStat(regionID, storeID) + + // check whether the peer is allowed to be inherited + source := utils.Direct + if oldItem == nil { + for _, storeID := range f.getAllStoreIDs(region) { + oldItem = f.getOldHotPeerStat(regionID, storeID) + if oldItem != nil && oldItem.allowInherited { + source = utils.Inherit + break + } } } - } - - // check new item whether is hot - if oldItem == nil { - regionStats := f.kind.RegionStats() - thresholds := f.calcHotThresholds(storeID) - isHot := slice.AnyOf(regionStats, func(i int) bool { - return deltaLoads[regionStats[i]]/float64(interval) >= thresholds[i] - }) - if !isHot { - return nil + // check new item whether is hot + if oldItem == nil { + regionStats := f.kind.RegionStats() + thresholds := f.calcHotThresholds(storeID) + isHot := slice.AnyOf(regionStats, func(i int) bool { + return deltaLoads[regionStats[i]]/float64(interval) >= thresholds[i] + }) + if !isHot { + continue + } } - } - - peers := region.GetPeers() - newItem := &HotPeerStat{ - StoreID: storeID, - RegionID: regionID, - Loads: f.kind.GetLoadRatesFromPeer(peer), - isLeader: region.GetLeader().GetStoreId() == storeID, - actionType: utils.Update, - stores: make([]uint64, len(peers)), - } - for i, peer := range peers { - newItem.stores[i] = peer.GetStoreId() - } - if oldItem == nil { - return f.updateNewHotPeerStat(newItem, deltaLoads, time.Duration(interval)*time.Second) + newItem := &HotPeerStat{ + StoreID: storeID, + RegionID: regionID, + Loads: f.kind.GetLoadRates(deltaLoads, interval), + isLeader: region.GetLeader().GetStoreId() == storeID, + actionType: utils.Update, + stores: make([]uint64, len(regionPeers)), + } + for i, peer := range regionPeers { + newItem.stores[i] = peer.GetStoreId() + } + if oldItem == nil { + stats = append(stats, f.updateNewHotPeerStat(newItem, deltaLoads, time.Duration(interval)*time.Second)) + continue + } + stats = append(stats, f.updateHotPeerStat(region, newItem, oldItem, deltaLoads, time.Duration(interval)*time.Second, source)) } - return f.updateHotPeerStat(region, newItem, oldItem, deltaLoads, time.Duration(interval)*time.Second, source) + return stats } -// checkColdPeer checks the collect the un-heartbeat peer and maintain it. -func (f *hotPeerCache) checkColdPeer(storeID uint64, reportRegions map[uint64]*core.RegionInfo, interval uint64) (ret []*HotPeerStat) { +// CheckColdPeer checks the collect the un-heartbeat peer and maintain it. +func (f *HotPeerCache) CheckColdPeer(storeID uint64, reportRegions map[uint64]*core.RegionInfo, interval uint64) (ret []*HotPeerStat) { // for test or simulator purpose if Denoising && interval < HotRegionReportMinInterval { return @@ -275,7 +278,7 @@ func (f *hotPeerCache) checkColdPeer(storeID uint64, reportRegions map[uint64]*c return } -func (f *hotPeerCache) collectMetrics() { +func (f *HotPeerCache) collectMetrics() { for _, thresholds := range f.thresholdsOfStore { thresholds.metrics[utils.ByteDim].Set(thresholds.rates[utils.ByteDim]) thresholds.metrics[utils.KeyDim].Set(thresholds.rates[utils.KeyDim]) @@ -284,7 +287,7 @@ func (f *hotPeerCache) collectMetrics() { } } -func (f *hotPeerCache) getOldHotPeerStat(regionID, storeID uint64) *HotPeerStat { +func (f *HotPeerCache) getOldHotPeerStat(regionID, storeID uint64) *HotPeerStat { if hotPeers, ok := f.peersOfStore[storeID]; ok { if v := hotPeers.Get(regionID); v != nil { return v.(*HotPeerStat) @@ -293,7 +296,7 @@ func (f *hotPeerCache) getOldHotPeerStat(regionID, storeID uint64) *HotPeerStat return nil } -func (f *hotPeerCache) calcHotThresholds(storeID uint64) []float64 { +func (f *HotPeerCache) calcHotThresholds(storeID uint64) []float64 { // check whether the thresholds is updated recently t, ok := f.thresholdsOfStore[storeID] if ok && time.Since(t.updatedTime) <= ThresholdsUpdateInterval { @@ -333,7 +336,7 @@ func (f *hotPeerCache) calcHotThresholds(storeID uint64) []float64 { } // gets the storeIDs, including old region and new region -func (f *hotPeerCache) getAllStoreIDs(region *core.RegionInfo) []uint64 { +func (f *HotPeerCache) getAllStoreIDs(region *core.RegionInfo) []uint64 { regionPeers := region.GetPeers() ret := make([]uint64, 0, len(regionPeers)) isInSlice := func(id uint64) bool { @@ -361,7 +364,7 @@ func (f *hotPeerCache) getAllStoreIDs(region *core.RegionInfo) []uint64 { return ret } -func (f *hotPeerCache) isOldColdPeer(oldItem *HotPeerStat, storeID uint64) bool { +func (f *HotPeerCache) isOldColdPeer(oldItem *HotPeerStat, storeID uint64) bool { isOldPeer := func() bool { for _, id := range oldItem.stores { if id == storeID { @@ -381,7 +384,7 @@ func (f *hotPeerCache) isOldColdPeer(oldItem *HotPeerStat, storeID uint64) bool return isOldPeer() && !isInHotCache() } -func (f *hotPeerCache) justTransferLeader(region *core.RegionInfo, oldItem *HotPeerStat) bool { +func (f *HotPeerCache) justTransferLeader(region *core.RegionInfo, oldItem *HotPeerStat) bool { if region == nil { return false } @@ -403,7 +406,7 @@ func (f *hotPeerCache) justTransferLeader(region *core.RegionInfo, oldItem *HotP return false } -func (f *hotPeerCache) isRegionHotWithAnyPeers(region *core.RegionInfo, hotDegree int) bool { +func (f *HotPeerCache) isRegionHotWithAnyPeers(region *core.RegionInfo, hotDegree int) bool { for _, peer := range region.GetPeers() { if f.isRegionHotWithPeer(region, peer, hotDegree) { return true @@ -412,7 +415,7 @@ func (f *hotPeerCache) isRegionHotWithAnyPeers(region *core.RegionInfo, hotDegre return false } -func (f *hotPeerCache) isRegionHotWithPeer(region *core.RegionInfo, peer *metapb.Peer, hotDegree int) bool { +func (f *HotPeerCache) isRegionHotWithPeer(region *core.RegionInfo, peer *metapb.Peer, hotDegree int) bool { if peer == nil { return false } @@ -422,7 +425,7 @@ func (f *hotPeerCache) isRegionHotWithPeer(region *core.RegionInfo, peer *metapb return false } -func (f *hotPeerCache) getHotPeerStat(regionID, storeID uint64) *HotPeerStat { +func (f *HotPeerCache) getHotPeerStat(regionID, storeID uint64) *HotPeerStat { if peers, ok := f.peersOfStore[storeID]; ok { if stat := peers.Get(regionID); stat != nil { return stat.(*HotPeerStat) @@ -431,7 +434,7 @@ func (f *hotPeerCache) getHotPeerStat(regionID, storeID uint64) *HotPeerStat { return nil } -func (f *hotPeerCache) updateHotPeerStat(region *core.RegionInfo, newItem, oldItem *HotPeerStat, deltaLoads []float64, interval time.Duration, source utils.SourceKind) *HotPeerStat { +func (f *HotPeerCache) updateHotPeerStat(region *core.RegionInfo, newItem, oldItem *HotPeerStat, deltaLoads []float64, interval time.Duration, source utils.SourceKind) *HotPeerStat { regionStats := f.kind.RegionStats() if source == utils.Inherit { @@ -451,7 +454,7 @@ func (f *hotPeerCache) updateHotPeerStat(region *core.RegionInfo, newItem, oldIt // For write stat, as the stat is send by region heartbeat, the first heartbeat will be skipped. // For read stat, as the stat is send by store heartbeat, the first heartbeat won't be skipped. if f.kind == utils.Write { - f.inheritItem(newItem, oldItem) + inheritItem(newItem, oldItem) return newItem } } else { @@ -465,25 +468,25 @@ func (f *hotPeerCache) updateHotPeerStat(region *core.RegionInfo, newItem, oldIt isFull := newItem.rollingLoads[0].isFull(f.interval()) // The intervals of dims are the same, so it is only necessary to determine whether any of them if !isFull { // not update hot degree and anti count - f.inheritItem(newItem, oldItem) + inheritItem(newItem, oldItem) } else { // If item is inCold, it means the pd didn't recv this item in the store heartbeat, // thus we make it colder if newItem.inCold { - f.coldItem(newItem, oldItem) + coldItem(newItem, oldItem) } else { thresholds := f.calcHotThresholds(newItem.StoreID) if f.isOldColdPeer(oldItem, newItem.StoreID) { if newItem.isHot(thresholds) { - f.initItem(newItem) + initItem(newItem, f.kind.DefaultAntiCount()) } else { newItem.actionType = utils.Remove } } else { if newItem.isHot(thresholds) { - f.hotItem(newItem, oldItem) + hotItem(newItem, oldItem, f.kind.DefaultAntiCount()) } else { - f.coldItem(newItem, oldItem) + coldItem(newItem, oldItem) } } } @@ -492,11 +495,11 @@ func (f *hotPeerCache) updateHotPeerStat(region *core.RegionInfo, newItem, oldIt return newItem } -func (f *hotPeerCache) updateNewHotPeerStat(newItem *HotPeerStat, deltaLoads []float64, interval time.Duration) *HotPeerStat { +func (f *HotPeerCache) updateNewHotPeerStat(newItem *HotPeerStat, deltaLoads []float64, interval time.Duration) *HotPeerStat { regionStats := f.kind.RegionStats() // interval is not 0 which is guaranteed by the caller. if interval.Seconds() >= float64(f.kind.ReportInterval()) { - f.initItem(newItem) + initItem(newItem, f.kind.DefaultAntiCount()) } newItem.actionType = utils.Add newItem.rollingLoads = make([]*dimStat, len(regionStats)) @@ -511,7 +514,7 @@ func (f *hotPeerCache) updateNewHotPeerStat(newItem *HotPeerStat, deltaLoads []f return newItem } -func (f *hotPeerCache) putItem(item *HotPeerStat) { +func (f *HotPeerCache) putItem(item *HotPeerStat) { peers, ok := f.peersOfStore[item.StoreID] if !ok { peers = utils.NewTopN(utils.DimLen, TopNN, f.topNTTL) @@ -532,7 +535,7 @@ func (f *hotPeerCache) putItem(item *HotPeerStat) { regions[item.RegionID] = struct{}{} } -func (f *hotPeerCache) removeItem(item *HotPeerStat) { +func (f *HotPeerCache) removeItem(item *HotPeerStat) { if peers, ok := f.peersOfStore[item.StoreID]; ok { peers.Remove(item.RegionID) } @@ -546,17 +549,17 @@ func (f *hotPeerCache) removeItem(item *HotPeerStat) { // removeAllItem removes all items of the cache. // It is used for test. -func (f *hotPeerCache) removeAllItem() { +func (f *HotPeerCache) removeAllItem() { for _, peers := range f.peersOfStore { for _, peer := range peers.GetAll() { item := peer.(*HotPeerStat) item.actionType = utils.Remove - f.updateStat(item) + f.UpdateStat(item) } } } -func (f *hotPeerCache) coldItem(newItem, oldItem *HotPeerStat) { +func coldItem(newItem, oldItem *HotPeerStat) { newItem.HotDegree = oldItem.HotDegree - 1 newItem.AntiCount = oldItem.AntiCount - 1 if newItem.AntiCount <= 0 { @@ -566,9 +569,9 @@ func (f *hotPeerCache) coldItem(newItem, oldItem *HotPeerStat) { } } -func (f *hotPeerCache) hotItem(newItem, oldItem *HotPeerStat) { +func hotItem(newItem, oldItem *HotPeerStat, defaultAntiCount int) { newItem.HotDegree = oldItem.HotDegree + 1 - if oldItem.AntiCount < f.kind.DefaultAntiCount() { + if oldItem.AntiCount < defaultAntiCount { newItem.AntiCount = oldItem.AntiCount + 1 } else { newItem.AntiCount = oldItem.AntiCount @@ -576,18 +579,18 @@ func (f *hotPeerCache) hotItem(newItem, oldItem *HotPeerStat) { newItem.allowInherited = true } -func (f *hotPeerCache) initItem(item *HotPeerStat) { +func initItem(item *HotPeerStat, defaultAntiCount int) { item.HotDegree = 1 - item.AntiCount = f.kind.DefaultAntiCount() + item.AntiCount = defaultAntiCount item.allowInherited = true } -func (f *hotPeerCache) inheritItem(newItem, oldItem *HotPeerStat) { +func inheritItem(newItem, oldItem *HotPeerStat) { newItem.HotDegree = oldItem.HotDegree newItem.AntiCount = oldItem.AntiCount } -func (f *hotPeerCache) interval() time.Duration { +func (f *HotPeerCache) interval() time.Duration { return time.Duration(f.kind.ReportInterval()) * time.Second } diff --git a/pkg/statistics/hot_peer_cache_test.go b/pkg/statistics/hot_peer_cache_test.go index 36f922d3830..ce4e352bc3d 100644 --- a/pkg/statistics/hot_peer_cache_test.go +++ b/pkg/statistics/hot_peer_cache_test.go @@ -93,7 +93,7 @@ func TestCache(t *testing.T) { } } -func orderingPeers(cache *hotPeerCache, region *core.RegionInfo) []*metapb.Peer { +func orderingPeers(cache *HotPeerCache, region *core.RegionInfo) []*metapb.Peer { var peers []*metapb.Peer for _, peer := range region.GetPeers() { if cache.getOldHotPeerStat(region.GetID(), peer.StoreId) != nil { @@ -105,30 +105,23 @@ func orderingPeers(cache *hotPeerCache, region *core.RegionInfo) []*metapb.Peer return peers } -func checkFlow(cache *hotPeerCache, region *core.RegionInfo, peers []*metapb.Peer) (res []*HotPeerStat) { +func checkFlow(cache *HotPeerCache, region *core.RegionInfo, peers []*metapb.Peer) (res []*HotPeerStat) { reportInterval := region.GetInterval() interval := reportInterval.GetEndTimestamp() - reportInterval.GetStartTimestamp() - res = append(res, cache.collectExpiredItems(region)...) - for _, peer := range peers { - peerInfo := core.NewPeerInfo(peer, region.GetLoads(), interval) - item := cache.checkPeerFlow(peerInfo, region) - if item != nil { - res = append(res, item) - } - } - return res + res = append(res, cache.CollectExpiredItems(region)...) + return append(res, cache.CheckPeerFlow(region, peers, region.GetLoads(), interval)...) } -func updateFlow(cache *hotPeerCache, res []*HotPeerStat) []*HotPeerStat { +func updateFlow(cache *HotPeerCache, res []*HotPeerStat) []*HotPeerStat { for _, p := range res { - cache.updateStat(p) + cache.UpdateStat(p) } return res } -type check func(re *require.Assertions, cache *hotPeerCache, region *core.RegionInfo, expect ...int) (res []*HotPeerStat) +type check func(re *require.Assertions, cache *HotPeerCache, region *core.RegionInfo, expect ...int) (res []*HotPeerStat) -func checkAndUpdate(re *require.Assertions, cache *hotPeerCache, region *core.RegionInfo, expect ...int) (res []*HotPeerStat) { +func checkAndUpdate(re *require.Assertions, cache *HotPeerCache, region *core.RegionInfo, expect ...int) (res []*HotPeerStat) { res = checkFlow(cache, region, region.GetPeers()) if len(expect) != 0 { re.Len(res, expect[0]) @@ -138,7 +131,7 @@ func checkAndUpdate(re *require.Assertions, cache *hotPeerCache, region *core.Re // Check and update peers in the specified order that old item that he items that have not expired come first, and the items that have expired come second. // This order is also similar to the previous version. By the way the order in now version is random. -func checkAndUpdateWithOrdering(re *require.Assertions, cache *hotPeerCache, region *core.RegionInfo, expect ...int) (res []*HotPeerStat) { +func checkAndUpdateWithOrdering(re *require.Assertions, cache *HotPeerCache, region *core.RegionInfo, expect ...int) (res []*HotPeerStat) { res = checkFlow(cache, region, orderingPeers(cache, region)) if len(expect) != 0 { re.Len(res, expect[0]) @@ -146,7 +139,7 @@ func checkAndUpdateWithOrdering(re *require.Assertions, cache *hotPeerCache, reg return updateFlow(cache, res) } -func checkAndUpdateSkipOne(re *require.Assertions, cache *hotPeerCache, region *core.RegionInfo, expect ...int) (res []*HotPeerStat) { +func checkAndUpdateSkipOne(re *require.Assertions, cache *HotPeerCache, region *core.RegionInfo, expect ...int) (res []*HotPeerStat) { res = checkFlow(cache, region, region.GetPeers()[1:]) if len(expect) != 0 { re.Len(res, expect[0]) @@ -154,7 +147,7 @@ func checkAndUpdateSkipOne(re *require.Assertions, cache *hotPeerCache, region * return updateFlow(cache, res) } -func checkHit(re *require.Assertions, cache *hotPeerCache, region *core.RegionInfo, kind utils.RWType, actionType utils.ActionType) { +func checkHit(re *require.Assertions, cache *HotPeerCache, region *core.RegionInfo, kind utils.RWType, actionType utils.ActionType) { var peers []*metapb.Peer if kind == utils.Read { peers = []*metapb.Peer{region.GetLeader()} @@ -178,7 +171,7 @@ func checkOp(re *require.Assertions, ret []*HotPeerStat, storeID uint64, actionT } // checkIntervalSum checks whether the interval sum of the peers are different. -func checkIntervalSum(cache *hotPeerCache, region *core.RegionInfo) bool { +func checkIntervalSum(cache *HotPeerCache, region *core.RegionInfo) bool { var intervalSums []int for _, peer := range region.GetPeers() { oldItem := cache.getOldHotPeerStat(region.GetID(), peer.StoreId) @@ -318,13 +311,13 @@ func TestUpdateHotPeerStat(t *testing.T) { }() // skip interval=0 - interval := 0 + interval := uint64(0) deltaLoads := []float64{0.0, 0.0, 0.0} utils.MinHotThresholds[utils.RegionReadBytes] = 0.0 utils.MinHotThresholds[utils.RegionReadKeys] = 0.0 utils.MinHotThresholds[utils.RegionReadQueryNum] = 0.0 - newItem := cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) + newItem := cache.CheckPeerFlow(region, []*metapb.Peer{peer}, deltaLoads, interval) re.Nil(newItem) // new peer, interval is larger than report interval, but no hot @@ -333,8 +326,8 @@ func TestUpdateHotPeerStat(t *testing.T) { utils.MinHotThresholds[utils.RegionReadBytes] = 1.0 utils.MinHotThresholds[utils.RegionReadKeys] = 1.0 utils.MinHotThresholds[utils.RegionReadQueryNum] = 1.0 - newItem = cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) - re.Nil(newItem) + newItem = cache.CheckPeerFlow(region, []*metapb.Peer{peer}, deltaLoads, interval) + re.Empty(newItem) // new peer, interval is less than report interval interval = 4 @@ -342,50 +335,49 @@ func TestUpdateHotPeerStat(t *testing.T) { utils.MinHotThresholds[utils.RegionReadBytes] = 0.0 utils.MinHotThresholds[utils.RegionReadKeys] = 0.0 utils.MinHotThresholds[utils.RegionReadQueryNum] = 0.0 - newItem = cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) + newItem = cache.CheckPeerFlow(region, []*metapb.Peer{peer}, deltaLoads, interval) re.NotNil(newItem) - re.Equal(0, newItem.HotDegree) - re.Equal(0, newItem.AntiCount) + re.Equal(0, newItem[0].HotDegree) + re.Equal(0, newItem[0].AntiCount) // sum of interval is less than report interval - interval = 4 deltaLoads = []float64{60.0, 60.0, 60.0} - cache.updateStat(newItem) - newItem = cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) - re.Equal(0, newItem.HotDegree) - re.Equal(0, newItem.AntiCount) + cache.UpdateStat(newItem[0]) + newItem = cache.CheckPeerFlow(region, []*metapb.Peer{peer}, deltaLoads, interval) + re.Equal(0, newItem[0].HotDegree) + re.Equal(0, newItem[0].AntiCount) // sum of interval is larger than report interval, and hot - newItem.AntiCount = utils.Read.DefaultAntiCount() - cache.updateStat(newItem) - newItem = cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) - re.Equal(1, newItem.HotDegree) - re.Equal(2*m, newItem.AntiCount) + newItem[0].AntiCount = utils.Read.DefaultAntiCount() + cache.UpdateStat(newItem[0]) + newItem = cache.CheckPeerFlow(region, []*metapb.Peer{peer}, deltaLoads, interval) + re.Equal(1, newItem[0].HotDegree) + re.Equal(2*m, newItem[0].AntiCount) // sum of interval is less than report interval - cache.updateStat(newItem) - newItem = cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) - re.Equal(1, newItem.HotDegree) - re.Equal(2*m, newItem.AntiCount) + cache.UpdateStat(newItem[0]) + newItem = cache.CheckPeerFlow(region, []*metapb.Peer{peer}, deltaLoads, interval) + re.Equal(1, newItem[0].HotDegree) + re.Equal(2*m, newItem[0].AntiCount) // sum of interval is larger than report interval, and hot interval = 10 - cache.updateStat(newItem) - newItem = cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) - re.Equal(2, newItem.HotDegree) - re.Equal(2*m, newItem.AntiCount) + cache.UpdateStat(newItem[0]) + newItem = cache.CheckPeerFlow(region, []*metapb.Peer{peer}, deltaLoads, interval) + re.Equal(2, newItem[0].HotDegree) + re.Equal(2*m, newItem[0].AntiCount) // sum of interval is larger than report interval, and cold utils.MinHotThresholds[utils.RegionReadBytes] = 10.0 utils.MinHotThresholds[utils.RegionReadKeys] = 10.0 utils.MinHotThresholds[utils.RegionReadQueryNum] = 10.0 - cache.updateStat(newItem) - newItem = cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) - re.Equal(1, newItem.HotDegree) - re.Equal(2*m-1, newItem.AntiCount) + cache.UpdateStat(newItem[0]) + newItem = cache.CheckPeerFlow(region, []*metapb.Peer{peer}, deltaLoads, interval) + re.Equal(1, newItem[0].HotDegree) + re.Equal(2*m-1, newItem[0].AntiCount) // sum of interval is larger than report interval, and cold for i := 0; i < 2*m-1; i++ { - cache.updateStat(newItem) - newItem = cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) + cache.UpdateStat(newItem[0]) + newItem = cache.CheckPeerFlow(region, []*metapb.Peer{peer}, deltaLoads, interval) } - re.Less(newItem.HotDegree, 0) - re.Equal(0, newItem.AntiCount) - re.Equal(utils.Remove, newItem.actionType) + re.Negative(newItem[0].HotDegree) + re.Equal(0, newItem[0].AntiCount) + re.Equal(utils.Remove, newItem[0].actionType) } func TestThresholdWithUpdateHotPeerStat(t *testing.T) { @@ -430,7 +422,7 @@ func testMetrics(re *require.Assertions, interval, byteRate, expectThreshold flo } else { item = cache.updateHotPeerStat(nil, newItem, oldItem, loads, time.Duration(interval)*time.Second, utils.Direct) } - cache.updateStat(item) + cache.UpdateStat(item) } thresholds := cache.calcHotThresholds(storeID) if i < TopNN { @@ -529,7 +521,7 @@ func TestRemoveFromCacheRandom(t *testing.T) { } } -func checkCoolDown(re *require.Assertions, cache *hotPeerCache, region *core.RegionInfo, expect bool) { +func checkCoolDown(re *require.Assertions, cache *HotPeerCache, region *core.RegionInfo, expect bool) { item := cache.getOldHotPeerStat(region.GetID(), region.GetLeader().GetStoreId()) re.Equal(expect, item.IsNeedCoolDownTransferLeader(3, cache.kind)) } @@ -688,10 +680,9 @@ func TestHotPeerCacheTopNThreshold(t *testing.T) { StartTimestamp: start, EndTimestamp: end, })) - newPeer := core.NewPeerInfo(meta.Peers[0], region.GetLoads(), end-start) - stat := cache.checkPeerFlow(newPeer, newRegion) - if stat != nil { - cache.updateStat(stat) + stats := cache.CheckPeerFlow(newRegion, newRegion.GetPeers(), newRegion.GetLoads(), end-start) + for _, stat := range stats { + cache.UpdateStat(stat) } } if ThresholdsUpdateInterval == 0 { @@ -717,22 +708,11 @@ func TestHotPeerCacheTopNThreshold(t *testing.T) { func BenchmarkCheckRegionFlow(b *testing.B) { cache := NewHotPeerCache(context.Background(), utils.Read) region := buildRegion(utils.Read, 3, 10) - peerInfos := make([]*core.PeerInfo, 0) - for _, peer := range region.GetPeers() { - peerInfo := core.NewPeerInfo(peer, region.GetLoads(), 10) - peerInfos = append(peerInfos, peerInfo) - } b.ResetTimer() for i := 0; i < b.N; i++ { - items := make([]*HotPeerStat, 0) - for _, peerInfo := range peerInfos { - item := cache.checkPeerFlow(peerInfo, region) - if item != nil { - items = append(items, item) - } - } - for _, ret := range items { - cache.updateStat(ret) + stats := cache.CheckPeerFlow(region, region.GetPeers(), region.GetLoads(), 10) + for _, stat := range stats { + cache.UpdateStat(stat) } } } diff --git a/pkg/statistics/region_collection.go b/pkg/statistics/region_collection.go index 21af8e152fd..30197dd43ea 100644 --- a/pkg/statistics/region_collection.go +++ b/pkg/statistics/region_collection.go @@ -22,6 +22,7 @@ import ( sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/utils/syncutil" + "go.uber.org/zap" ) // RegionInfoProvider is an interface to provide the region information. @@ -31,7 +32,9 @@ type RegionInfoProvider interface { } // RegionStatisticType represents the type of the region's status. -type RegionStatisticType uint32 +type RegionStatisticType uint16 + +const emptyStatistic = RegionStatisticType(0) // region status type const ( @@ -78,7 +81,6 @@ var ( // RegionInfoWithTS is used to record the extra timestamp status of a region. type RegionInfoWithTS struct { - id uint64 startMissVoterPeerTS int64 startDownPeerTS int64 } @@ -88,7 +90,7 @@ type RegionStatistics struct { syncutil.RWMutex rip RegionInfoProvider conf sc.CheckerConfigProvider - stats map[RegionStatisticType]map[uint64]*RegionInfoWithTS + stats map[RegionStatisticType]map[uint64]any index map[uint64]RegionStatisticType ruleManager *placement.RuleManager } @@ -103,11 +105,11 @@ func NewRegionStatistics( rip: rip, conf: conf, ruleManager: ruleManager, - stats: make(map[RegionStatisticType]map[uint64]*RegionInfoWithTS), + stats: make(map[RegionStatisticType]map[uint64]any), index: make(map[uint64]RegionStatisticType), } for _, typ := range regionStatisticTypes { - r.stats[typ] = make(map[uint64]*RegionInfoWithTS) + r.stats[typ] = make(map[uint64]any) } return r } @@ -148,14 +150,37 @@ func (r *RegionStatistics) deleteEntry(deleteIndex RegionStatisticType, regionID // due to some special state types. func (r *RegionStatistics) RegionStatsNeedUpdate(region *core.RegionInfo) bool { regionID := region.GetID() + if !r.isObserved(regionID) { + return true + } if r.IsRegionStatsType(regionID, OversizedRegion) != region.IsOversized(int64(r.conf.GetRegionMaxSize()), int64(r.conf.GetRegionMaxKeys())) { return true } + + if r.IsRegionStatsType(regionID, PendingPeer) != (len(region.GetPendingPeers()) != 0) { + return true + } + if r.IsRegionStatsType(regionID, DownPeer) != (len(region.GetDownPeers()) != 0) { + return true + } + if r.IsRegionStatsType(regionID, LearnerPeer) != (len(region.GetLearners()) != 0) { + return true + } + + // merge return r.IsRegionStatsType(regionID, UndersizedRegion) != region.NeedMerge(int64(r.conf.GetMaxMergeRegionSize()), int64(r.conf.GetMaxMergeRegionKeys())) } +// isObserved returns whether the region is observed. And it also shows whether PD received heartbeat of this region. +func (r *RegionStatistics) isObserved(id uint64) bool { + r.RLock() + defer r.RUnlock() + _, ok := r.index[id] + return ok +} + // Observe records the current regions' status. func (r *RegionStatistics) Observe(region *core.RegionInfo, stores []*core.StoreInfo) { r.Lock() @@ -164,7 +189,6 @@ func (r *RegionStatistics) Observe(region *core.RegionInfo, stores []*core.Store desiredReplicas = r.conf.GetMaxReplicas() desiredVoters = desiredReplicas peerTypeIndex RegionStatisticType - deleteIndex RegionStatisticType ) // Check if the region meets count requirements of its rules. if r.conf.IsPlacementRulesEnabled() { @@ -182,68 +206,115 @@ func (r *RegionStatistics) Observe(region *core.RegionInfo, stores []*core.Store } } } + + peers := region.GetPeers() + downPeers := region.GetDownPeers() + pendingPeers := region.GetPendingPeers() + learners := region.GetLearners() + voters := region.GetVoters() + regionSize := region.GetApproximateSize() + regionMaxSize := int64(r.conf.GetRegionMaxSize()) + regionMaxKeys := int64(r.conf.GetRegionMaxKeys()) + maxMergeRegionSize := int64(r.conf.GetMaxMergeRegionSize()) + maxMergeRegionKeys := int64(r.conf.GetMaxMergeRegionKeys()) + leaderIsWitness := region.GetLeader().GetIsWitness() + // Better to make sure once any of these conditions changes, it will trigger the heartbeat `save_cache`. // Otherwise, the state may be out-of-date for a long time, which needs another way to apply the change ASAP. // For example, see `RegionStatsNeedUpdate` above to know how `OversizedRegion` and `UndersizedRegion` are updated. - conditions := map[RegionStatisticType]bool{ - MissPeer: len(region.GetPeers()) < desiredReplicas, - ExtraPeer: len(region.GetPeers()) > desiredReplicas, - DownPeer: len(region.GetDownPeers()) > 0, - PendingPeer: len(region.GetPendingPeers()) > 0, - OfflinePeer: func() bool { - for _, store := range stores { - if store.IsRemoving() { - peer := region.GetStorePeer(store.GetID()) - if peer != nil { - return true - } - } + var conditions RegionStatisticType + if len(peers) < desiredReplicas { + conditions |= MissPeer + } + if len(peers) > desiredReplicas { + conditions |= ExtraPeer + } + if len(downPeers) > 0 { + conditions |= DownPeer + } + if len(pendingPeers) > 0 { + conditions |= PendingPeer + } + for _, store := range stores { + if store.IsRemoving() { + peer := region.GetStorePeer(store.GetID()) + if peer != nil { + conditions |= OfflinePeer + break } - return false - }(), - LearnerPeer: len(region.GetLearners()) > 0, - EmptyRegion: region.GetApproximateSize() <= core.EmptyRegionApproximateSize, - OversizedRegion: region.IsOversized( - int64(r.conf.GetRegionMaxSize()), - int64(r.conf.GetRegionMaxKeys()), - ), - UndersizedRegion: region.NeedMerge( - int64(r.conf.GetMaxMergeRegionSize()), - int64(r.conf.GetMaxMergeRegionKeys()), - ), - WitnessLeader: region.GetLeader().GetIsWitness(), + } + } + if len(learners) > 0 { + conditions |= LearnerPeer + } + if regionSize <= core.EmptyRegionApproximateSize { + conditions |= EmptyRegion + } + if region.IsOversized(regionMaxSize, regionMaxKeys) { + conditions |= OversizedRegion + } + if region.NeedMerge(maxMergeRegionSize, maxMergeRegionKeys) { + conditions |= UndersizedRegion + } + if leaderIsWitness { + conditions |= WitnessLeader } // Check if the region meets any of the conditions and update the corresponding info. regionID := region.GetID() - for typ, c := range conditions { - if c { - info := r.stats[typ][regionID] + for i := 0; i < len(regionStatisticTypes); i++ { + condition := RegionStatisticType(1 << i) + if conditions&condition == 0 { + continue + } + info := r.stats[condition][regionID] + // The condition is met + switch condition { + case MissPeer: if info == nil { - info = &RegionInfoWithTS{id: regionID} + info = &RegionInfoWithTS{} } - if typ == DownPeer { - if info.startDownPeerTS != 0 { - regionDownPeerDuration.Observe(float64(time.Now().Unix() - info.startDownPeerTS)) + if len(voters) < desiredVoters { + if info.(*RegionInfoWithTS).startMissVoterPeerTS != 0 { + regionMissVoterPeerDuration.Observe(float64(time.Now().Unix() - info.(*RegionInfoWithTS).startMissVoterPeerTS)) } else { - info.startDownPeerTS = time.Now().Unix() - } - } else if typ == MissPeer && len(region.GetVoters()) < desiredVoters { - if info.startMissVoterPeerTS != 0 { - regionMissVoterPeerDuration.Observe(float64(time.Now().Unix() - info.startMissVoterPeerTS)) - } else { - info.startMissVoterPeerTS = time.Now().Unix() + info.(*RegionInfoWithTS).startMissVoterPeerTS = time.Now().Unix() } } - - r.stats[typ][regionID] = info - peerTypeIndex |= typ + case DownPeer: + if info == nil { + info = &RegionInfoWithTS{} + } + if info.(*RegionInfoWithTS).startDownPeerTS != 0 { + regionDownPeerDuration.Observe(float64(time.Now().Unix() - info.(*RegionInfoWithTS).startDownPeerTS)) + } else { + info.(*RegionInfoWithTS).startDownPeerTS = time.Now().Unix() + logDownPeerWithNoDisconnectedStore(region, stores) + } + case ExtraPeer: + fallthrough + case PendingPeer: + fallthrough + case OfflinePeer: + fallthrough + case LearnerPeer: + fallthrough + case EmptyRegion: + fallthrough + case OversizedRegion: + fallthrough + case UndersizedRegion: + fallthrough + case WitnessLeader: + info = struct{}{} } + r.stats[condition][regionID] = info + peerTypeIndex |= condition } // Remove the info if any of the conditions are not met any more. - if oldIndex, ok := r.index[regionID]; ok { - deleteIndex = oldIndex &^ peerTypeIndex + if oldIndex, ok := r.index[regionID]; ok && oldIndex > emptyStatistic { + deleteIndex := oldIndex &^ peerTypeIndex + r.deleteEntry(deleteIndex, regionID) } - r.deleteEntry(deleteIndex, regionID) r.index[regionID] = peerTypeIndex } @@ -252,7 +323,10 @@ func (r *RegionStatistics) ClearDefunctRegion(regionID uint64) { r.Lock() defer r.Unlock() if oldIndex, ok := r.index[regionID]; ok { - r.deleteEntry(oldIndex, regionID) + delete(r.index, regionID) + if oldIndex > emptyStatistic { + r.deleteEntry(oldIndex, regionID) + } } } @@ -413,3 +487,24 @@ func notIsolatedStoresWithLabel(stores []*core.StoreInfo, label string) [][]*cor } return res } + +// logDownPeerWithNoDisconnectedStore logs down peers on connected stores. +// It won't log down peer when any store of the replica is disconnected which is +// used to avoid too many logs when a store is disconnected. +// TODO: it's not a good way to log down peer during process region heartbeat, we should handle it in another way. +// region: the region which has down peer +// stores: all stores that the region has peer on them +func logDownPeerWithNoDisconnectedStore(region *core.RegionInfo, stores []*core.StoreInfo) { + for _, store := range stores { + if store.IsDisconnected() { + return + } + } + for _, p := range region.GetDownPeers() { + log.Warn("region has down peer on connected store", + zap.Uint64("region-id", region.GetID()), + zap.Uint64("down-peer", p.GetPeer().GetId()), + zap.Uint64("down-seconds", p.GetDownSeconds()), + zap.Uint64("store-id", p.GetPeer().GetStoreId())) + } +} diff --git a/pkg/statistics/region_collection_test.go b/pkg/statistics/region_collection_test.go index cbbf7672bee..64a625a04e2 100644 --- a/pkg/statistics/region_collection_test.go +++ b/pkg/statistics/region_collection_test.go @@ -269,3 +269,43 @@ func TestRegionLabelIsolationLevel(t *testing.T) { re.Equal(res, labelLevelStats.labelCounter[i]) } } + +func BenchmarkObserve(b *testing.B) { + // Setup + store := storage.NewStorageWithMemoryBackend() + manager := placement.NewRuleManager(context.Background(), store, nil, nil) + manager.Initialize(3, []string{"zone", "rack", "host"}, "") + opt := mockconfig.NewTestOptions() + opt.SetPlacementRuleEnabled(false) + peers := []*metapb.Peer{ + {Id: 4, StoreId: 1}, + {Id: 5, StoreId: 2}, + {Id: 6, StoreId: 3}, + } + + metaStores := []*metapb.Store{ + {Id: 1, Address: "mock://tikv-1"}, + {Id: 2, Address: "mock://tikv-2"}, + {Id: 3, Address: "mock://tikv-3"}, + } + + stores := make([]*core.StoreInfo, 0, len(metaStores)) + for _, m := range metaStores { + s := core.NewStoreInfo(m) + stores = append(stores, s) + } + + regionNum := uint64(1000000) + regions := make([]*core.RegionInfo, 0, regionNum) + for i := uint64(1); i <= regionNum; i++ { + r := &metapb.Region{Id: i, Peers: peers, StartKey: []byte{byte(i)}, EndKey: []byte{byte(i + 1)}} + regions = append(regions, core.NewRegionInfo(r, peers[0])) + } + regionStats := NewRegionStatistics(nil, opt, manager) + + b.ResetTimer() + // Run the Observe function b.N times + for i := 0; i < b.N; i++ { + regionStats.Observe(regions[i%int(regionNum)], stores) + } +} diff --git a/pkg/statistics/slow_stat.go b/pkg/statistics/slow_stat.go index 4079043d154..cc579b3d90b 100644 --- a/pkg/statistics/slow_stat.go +++ b/pkg/statistics/slow_stat.go @@ -15,8 +15,6 @@ package statistics import ( - "context" - "github.com/tikv/pd/pkg/utils/syncutil" ) @@ -26,7 +24,7 @@ type SlowStat struct { } // NewSlowStat creates the container to hold slow nodes' statistics. -func NewSlowStat(ctx context.Context) *SlowStat { +func NewSlowStat() *SlowStat { return &SlowStat{ SlowStoresStats: NewSlowStoresStats(), } diff --git a/pkg/statistics/store_collection.go b/pkg/statistics/store_collection.go index aacd45338d1..4f76ffb0b5f 100644 --- a/pkg/statistics/store_collection.go +++ b/pkg/statistics/store_collection.go @@ -147,7 +147,7 @@ func (s *storeStatistics) Observe(store *core.StoreInfo) { } } -func (s *storeStatistics) ObserveHotStat(store *core.StoreInfo, stats *StoresStats) { +func ObserveHotStat(store *core.StoreInfo, stats *StoresStats) { // Store flows. storeAddress := store.GetAddress() id := strconv.FormatUint(store.GetID(), 10) @@ -309,10 +309,6 @@ func (m *storeStatisticsMap) Observe(store *core.StoreInfo) { m.stats.Observe(store) } -func (m *storeStatisticsMap) ObserveHotStat(store *core.StoreInfo, stats *StoresStats) { - m.stats.ObserveHotStat(store, stats) -} - func (m *storeStatisticsMap) Collect() { m.stats.Collect() } diff --git a/pkg/statistics/store_collection_test.go b/pkg/statistics/store_collection_test.go index 054e55a9fda..64a02a54bb4 100644 --- a/pkg/statistics/store_collection_test.go +++ b/pkg/statistics/store_collection_test.go @@ -68,7 +68,7 @@ func TestStoreStatistics(t *testing.T) { storeStats := NewStoreStatisticsMap(opt) for _, store := range stores { storeStats.Observe(store) - storeStats.ObserveHotStat(store, storesStats) + ObserveHotStat(store, storesStats) } stats := storeStats.stats @@ -98,7 +98,7 @@ func TestSummaryStoreInfos(t *testing.T) { rw := utils.Read kind := constant.LeaderKind collector := newTikvCollector() - storeHistoryLoad := NewStoreHistoryLoads(utils.DimLen) + storeHistoryLoad := NewStoreHistoryLoads(utils.DimLen, DefaultHistorySampleDuration, DefaultHistorySampleInterval) storeInfos := make(map[uint64]*StoreSummaryInfo) storeLoads := make(map[uint64][]float64) for _, storeID := range []int{1, 3} { @@ -130,7 +130,7 @@ func TestSummaryStoreInfos(t *testing.T) { } // case 2: put many elements into history load - historySampleInterval = 0 + storeHistoryLoad.sampleDuration = 0 for i := 1; i < 10; i++ { details = summaryStoresLoadByEngine(storeInfos, storeLoads, storeHistoryLoad, nil, rw, kind, collector) expect := []float64{2, 4, 10} diff --git a/pkg/statistics/store_load.go b/pkg/statistics/store_load.go index 79417b65b7e..c468024e3d6 100644 --- a/pkg/statistics/store_load.go +++ b/pkg/statistics/store_load.go @@ -245,24 +245,29 @@ func MaxLoad(a, b *StoreLoad) *StoreLoad { } } -var ( - // historySampleInterval is the sampling interval for history load. - historySampleInterval = 30 * time.Second - // HistorySampleDuration is the duration for saving history load. - HistorySampleDuration = 5 * time.Minute - defaultSize = 10 +const ( + // DefaultHistorySampleInterval is the sampling interval for history load. + DefaultHistorySampleInterval = 30 * time.Second + // DefaultHistorySampleDuration is the duration for saving history load. + DefaultHistorySampleDuration = 5 * time.Minute ) // StoreHistoryLoads records the history load of a store. type StoreHistoryLoads struct { // loads[read/write][leader/follower]-->[store id]-->history load - loads [utils.RWTypeLen][constant.ResourceKindLen]map[uint64]*storeHistoryLoad - dim int + loads [utils.RWTypeLen][constant.ResourceKindLen]map[uint64]*storeHistoryLoad + dim int + sampleInterval time.Duration + sampleDuration time.Duration } // NewStoreHistoryLoads creates a StoreHistoryLoads. -func NewStoreHistoryLoads(dim int) *StoreHistoryLoads { - st := StoreHistoryLoads{dim: dim} +func NewStoreHistoryLoads(dim int, sampleDuration time.Duration, sampleInterval time.Duration) *StoreHistoryLoads { + st := StoreHistoryLoads{ + dim: dim, + sampleDuration: sampleDuration, + sampleInterval: sampleInterval, + } for i := utils.RWType(0); i < utils.RWTypeLen; i++ { for j := constant.ResourceKind(0); j < constant.ResourceKindLen; j++ { st.loads[i][j] = make(map[uint64]*storeHistoryLoad) @@ -272,20 +277,24 @@ func NewStoreHistoryLoads(dim int) *StoreHistoryLoads { } // Add adds the store load to the history. -func (s *StoreHistoryLoads) Add(storeID uint64, rwTp utils.RWType, kind constant.ResourceKind, loads []float64) { +func (s *StoreHistoryLoads) Add(storeID uint64, rwTp utils.RWType, kind constant.ResourceKind, pointLoad []float64) { load, ok := s.loads[rwTp][kind][storeID] if !ok { - size := defaultSize - if historySampleInterval != 0 { - size = int(HistorySampleDuration / historySampleInterval) + size := int(DefaultHistorySampleDuration / DefaultHistorySampleInterval) + if s.sampleInterval != 0 { + size = int(s.sampleDuration / s.sampleInterval) + } + if s.sampleDuration == 0 { + size = 0 } - load = newStoreHistoryLoad(size, s.dim) + load = newStoreHistoryLoad(size, s.dim, s.sampleInterval) s.loads[rwTp][kind][storeID] = load } - load.add(loads) + load.add(pointLoad) } // Get returns the store loads from the history, not one time point. +// In another word, the result is [dim][time]. func (s *StoreHistoryLoads) Get(storeID uint64, rwTp utils.RWType, kind constant.ResourceKind) [][]float64 { load, ok := s.loads[rwTp][kind][storeID] if !ok { @@ -294,36 +303,46 @@ func (s *StoreHistoryLoads) Get(storeID uint64, rwTp utils.RWType, kind constant return load.get() } +// UpdateConfig updates the sample duration and interval. +func (s *StoreHistoryLoads) UpdateConfig(sampleDuration time.Duration, sampleInterval time.Duration) *StoreHistoryLoads { + if s.sampleDuration == sampleDuration && s.sampleInterval == sampleInterval { + return s + } + return NewStoreHistoryLoads(s.dim, sampleDuration, sampleInterval) +} + type storeHistoryLoad struct { update time.Time // loads is a circular buffer. // [dim] --> [1,2,3...] - loads [][]float64 - size int - count int + loads [][]float64 + size int + count int + sampleInterval time.Duration } -func newStoreHistoryLoad(size int, dim int) *storeHistoryLoad { +func newStoreHistoryLoad(size int, dimLen int, sampleInterval time.Duration) *storeHistoryLoad { return &storeHistoryLoad{ - loads: make([][]float64, dim), - size: size, + loads: make([][]float64, dimLen), + size: size, + sampleInterval: sampleInterval, } } // add adds the store load to the history. // eg. add([1,2,3]) --> [][]float64{{1}, {2}, {3}} -func (s *storeHistoryLoad) add(loads []float64) { +func (s *storeHistoryLoad) add(pointLoad []float64) { // reject if the loads length is not equal to the dimension. - if time.Since(s.update) < historySampleInterval || s.size == 0 || len(loads) != len(s.loads) { + if time.Since(s.update) < s.sampleInterval || s.size == 0 || len(pointLoad) != len(s.loads) { return } if s.count == 0 { - for i := range s.loads { - s.loads[i] = make([]float64, s.size) + for dim := range s.loads { + s.loads[dim] = make([]float64, s.size) } } - for i, v := range loads { - s.loads[i][s.count%s.size] = v + for dim, v := range pointLoad { + s.loads[dim][s.count%s.size] = v } s.count++ s.update = time.Now() diff --git a/pkg/statistics/store_load_test.go b/pkg/statistics/store_load_test.go index 67f2dff9cf9..67c9e53482f 100644 --- a/pkg/statistics/store_load_test.go +++ b/pkg/statistics/store_load_test.go @@ -16,6 +16,7 @@ package statistics import ( "testing" + "time" "github.com/stretchr/testify/require" "github.com/tikv/pd/pkg/core/constant" @@ -24,8 +25,7 @@ import ( func TestHistoryLoads(t *testing.T) { re := require.New(t) - historySampleInterval = 0 - historyLoads := NewStoreHistoryLoads(utils.DimLen) + historyLoads := NewStoreHistoryLoads(utils.DimLen, DefaultHistorySampleDuration, 0) loads := []float64{1.0, 2.0, 3.0} rwTp := utils.Read kind := constant.LeaderKind @@ -43,4 +43,20 @@ func TestHistoryLoads(t *testing.T) { expectLoads[utils.QueryDim][i] = 3.0 } re.EqualValues(expectLoads, historyLoads.Get(1, rwTp, kind)) + + historyLoads = NewStoreHistoryLoads(utils.DimLen, time.Millisecond, time.Millisecond) + historyLoads.Add(1, rwTp, kind, loads) + re.Len(historyLoads.Get(1, rwTp, kind)[0], 1) + + historyLoads = NewStoreHistoryLoads(utils.DimLen, time.Millisecond, time.Second) + historyLoads.Add(1, rwTp, kind, loads) + re.Empty(historyLoads.Get(1, rwTp, kind)[0]) + + historyLoads = NewStoreHistoryLoads(utils.DimLen, 0, time.Second) + historyLoads.Add(1, rwTp, kind, loads) + re.Empty(historyLoads.Get(1, rwTp, kind)[0]) + + historyLoads = NewStoreHistoryLoads(utils.DimLen, 0, 0) + historyLoads.Add(1, rwTp, kind, loads) + re.Empty(historyLoads.Get(1, rwTp, kind)[0]) } diff --git a/pkg/statistics/store_test.go b/pkg/statistics/store_test.go index a0e7140a882..ccf85caaa72 100644 --- a/pkg/statistics/store_test.go +++ b/pkg/statistics/store_test.go @@ -24,7 +24,7 @@ import ( "github.com/tikv/pd/pkg/core" ) -func TestFilterUnhealtyStore(t *testing.T) { +func TestFilterUnhealthyStore(t *testing.T) { re := require.New(t) stats := NewStoresStats() cluster := core.NewBasicCluster() diff --git a/pkg/statistics/utils/kind.go b/pkg/statistics/utils/kind.go index 4d44b8d57e1..089732f759f 100644 --- a/pkg/statistics/utils/kind.go +++ b/pkg/statistics/utils/kind.go @@ -14,10 +14,6 @@ package utils -import ( - "github.com/tikv/pd/pkg/core" -) - const ( // BytePriority indicates hot-region-scheduler prefer byte dim BytePriority = "byte" @@ -230,10 +226,8 @@ func (rw RWType) DefaultAntiCount() int { } } -// GetLoadRatesFromPeer gets the load rates of the read or write type from PeerInfo. -func (rw RWType) GetLoadRatesFromPeer(peer *core.PeerInfo) []float64 { - deltaLoads := peer.GetLoads() - interval := peer.GetInterval() +// GetLoadRates gets the load rates of the read or write type. +func (rw RWType) GetLoadRates(deltaLoads []float64, interval uint64) []float64 { loads := make([]float64, DimLen) for dim, k := range rw.RegionStats() { loads[dim] = deltaLoads[k] / float64(interval) diff --git a/pkg/statistics/utils/topn.go b/pkg/statistics/utils/topn.go index 916bbb82f92..7ab6c6eaf3e 100644 --- a/pkg/statistics/utils/topn.go +++ b/pkg/statistics/utils/topn.go @@ -261,14 +261,14 @@ func (hp *indexedHeap) Swap(i, j int) { } // Implementing heap.Interface. -func (hp *indexedHeap) Push(x interface{}) { +func (hp *indexedHeap) Push(x any) { item := x.(TopNItem) hp.index[item.ID()] = hp.Len() hp.items = append(hp.items, item) } // Implementing heap.Interface. -func (hp *indexedHeap) Pop() interface{} { +func (hp *indexedHeap) Pop() any { l := hp.Len() item := hp.items[l-1] hp.items = hp.items[:l-1] diff --git a/pkg/storage/endpoint/config.go b/pkg/storage/endpoint/config.go index edfdcbca9a3..820778e36ff 100644 --- a/pkg/storage/endpoint/config.go +++ b/pkg/storage/endpoint/config.go @@ -25,8 +25,8 @@ import ( // ConfigStorage defines the storage operations on the config. type ConfigStorage interface { // Persisted config will be stored in the storage. - LoadConfig(cfg interface{}) (bool, error) - SaveConfig(cfg interface{}) error + LoadConfig(cfg any) (bool, error) + SaveConfig(cfg any) error // Each scheduler has its own customized config, so we need to store them separately. LoadAllSchedulerConfigs() ([]string, []string, error) LoadSchedulerConfig(schedulerName string) (string, error) @@ -37,7 +37,7 @@ type ConfigStorage interface { var _ ConfigStorage = (*StorageEndpoint)(nil) // LoadConfig loads config from configPath then unmarshal it to cfg. -func (se *StorageEndpoint) LoadConfig(cfg interface{}) (bool, error) { +func (se *StorageEndpoint) LoadConfig(cfg any) (bool, error) { value, err := se.Load(configPath) if err != nil || value == "" { return false, err @@ -50,7 +50,7 @@ func (se *StorageEndpoint) LoadConfig(cfg interface{}) (bool, error) { } // SaveConfig stores marshallable cfg to the configPath. -func (se *StorageEndpoint) SaveConfig(cfg interface{}) error { +func (se *StorageEndpoint) SaveConfig(cfg any) error { return se.saveJSON(configPath, cfg) } diff --git a/pkg/storage/endpoint/key_path.go b/pkg/storage/endpoint/key_path.go index 69b8d0f2f8e..dbcd9690419 100644 --- a/pkg/storage/endpoint/key_path.go +++ b/pkg/storage/endpoint/key_path.go @@ -149,24 +149,23 @@ func storeRegionWeightPath(storeID uint64) string { // RegionPath returns the region meta info key path with the given region ID. func RegionPath(regionID uint64) string { var buf strings.Builder + buf.Grow(len(regionPathPrefix) + 1 + keyLen) // Preallocate memory + buf.WriteString(regionPathPrefix) buf.WriteString("/") s := strconv.FormatUint(regionID, 10) - if len(s) > keyLen { - s = s[len(s)-keyLen:] - } else { - b := make([]byte, keyLen) + b := make([]byte, keyLen) + copy(b, s) + if len(s) < keyLen { diff := keyLen - len(s) - for i := 0; i < keyLen; i++ { - if i < diff { - b[i] = 48 - } else { - b[i] = s[i-diff] - } + copy(b[diff:], s) + for i := 0; i < diff; i++ { + b[i] = '0' } - s = string(b) + } else if len(s) > keyLen { + copy(b, s[len(s)-keyLen:]) } - buf.WriteString(s) + buf.Write(b) return buf.String() } diff --git a/pkg/storage/endpoint/keyspace.go b/pkg/storage/endpoint/keyspace.go index 77c81b2c8d6..30540e49a2e 100644 --- a/pkg/storage/endpoint/keyspace.go +++ b/pkg/storage/endpoint/keyspace.go @@ -48,7 +48,7 @@ type KeyspaceStorage interface { var _ KeyspaceStorage = (*StorageEndpoint)(nil) // SaveKeyspaceMeta adds a save keyspace meta operation to target transaction. -func (se *StorageEndpoint) SaveKeyspaceMeta(txn kv.Txn, meta *keyspacepb.KeyspaceMeta) error { +func (*StorageEndpoint) SaveKeyspaceMeta(txn kv.Txn, meta *keyspacepb.KeyspaceMeta) error { metaPath := KeyspaceMetaPath(meta.GetId()) metaVal, err := proto.Marshal(meta) if err != nil { @@ -59,7 +59,7 @@ func (se *StorageEndpoint) SaveKeyspaceMeta(txn kv.Txn, meta *keyspacepb.Keyspac // LoadKeyspaceMeta load and return keyspace meta specified by id. // If keyspace does not exist or error occurs, returned meta will be nil. -func (se *StorageEndpoint) LoadKeyspaceMeta(txn kv.Txn, id uint32) (*keyspacepb.KeyspaceMeta, error) { +func (*StorageEndpoint) LoadKeyspaceMeta(txn kv.Txn, id uint32) (*keyspacepb.KeyspaceMeta, error) { metaPath := KeyspaceMetaPath(id) metaVal, err := txn.Load(metaPath) if err != nil || metaVal == "" { @@ -74,7 +74,7 @@ func (se *StorageEndpoint) LoadKeyspaceMeta(txn kv.Txn, id uint32) (*keyspacepb. } // SaveKeyspaceID saves keyspace ID to the path specified by keyspace name. -func (se *StorageEndpoint) SaveKeyspaceID(txn kv.Txn, id uint32, name string) error { +func (*StorageEndpoint) SaveKeyspaceID(txn kv.Txn, id uint32, name string) error { idPath := KeyspaceIDPath(name) idVal := strconv.FormatUint(uint64(id), SpaceIDBase) return txn.Save(idPath, idVal) @@ -83,7 +83,7 @@ func (se *StorageEndpoint) SaveKeyspaceID(txn kv.Txn, id uint32, name string) er // LoadKeyspaceID loads keyspace ID from the path specified by keyspace name. // An additional boolean is returned to indicate whether target id exists, // it returns false if target id not found, or if error occurred. -func (se *StorageEndpoint) LoadKeyspaceID(txn kv.Txn, name string) (bool, uint32, error) { +func (*StorageEndpoint) LoadKeyspaceID(txn kv.Txn, name string) (bool, uint32, error) { idPath := KeyspaceIDPath(name) idVal, err := txn.Load(idPath) // Failed to load the keyspaceID if loading operation errored, or if keyspace does not exist. @@ -99,7 +99,7 @@ func (se *StorageEndpoint) LoadKeyspaceID(txn kv.Txn, name string) (bool, uint32 // LoadRangeKeyspace loads keyspaces starting at startID. // limit specifies the limit of loaded keyspaces. -func (se *StorageEndpoint) LoadRangeKeyspace(txn kv.Txn, startID uint32, limit int) ([]*keyspacepb.KeyspaceMeta, error) { +func (*StorageEndpoint) LoadRangeKeyspace(txn kv.Txn, startID uint32, limit int) ([]*keyspacepb.KeyspaceMeta, error) { startKey := KeyspaceMetaPath(startID) endKey := clientv3.GetPrefixRangeEnd(KeyspaceMetaPrefix()) keys, values, err := txn.LoadRange(startKey, endKey, limit) diff --git a/pkg/storage/endpoint/meta.go b/pkg/storage/endpoint/meta.go index d83e2b386c8..33482da512f 100644 --- a/pkg/storage/endpoint/meta.go +++ b/pkg/storage/endpoint/meta.go @@ -236,7 +236,7 @@ func (se *StorageEndpoint) DeleteRegion(region *metapb.Region) error { } // Flush flushes the pending data to the underlying storage backend. -func (se *StorageEndpoint) Flush() error { return nil } +func (*StorageEndpoint) Flush() error { return nil } // Close closes the underlying storage backend. -func (se *StorageEndpoint) Close() error { return nil } +func (*StorageEndpoint) Close() error { return nil } diff --git a/pkg/storage/endpoint/replication_status.go b/pkg/storage/endpoint/replication_status.go index 0a14770ff47..3cfaaefb9a4 100644 --- a/pkg/storage/endpoint/replication_status.go +++ b/pkg/storage/endpoint/replication_status.go @@ -22,14 +22,14 @@ import ( // ReplicationStatusStorage defines the storage operations on the replication status. type ReplicationStatusStorage interface { - LoadReplicationStatus(mode string, status interface{}) (bool, error) - SaveReplicationStatus(mode string, status interface{}) error + LoadReplicationStatus(mode string, status any) (bool, error) + SaveReplicationStatus(mode string, status any) error } var _ ReplicationStatusStorage = (*StorageEndpoint)(nil) // LoadReplicationStatus loads replication status by mode. -func (se *StorageEndpoint) LoadReplicationStatus(mode string, status interface{}) (bool, error) { +func (se *StorageEndpoint) LoadReplicationStatus(mode string, status any) (bool, error) { v, err := se.Load(replicationModePath(mode)) if err != nil || v == "" { return false, err @@ -42,6 +42,6 @@ func (se *StorageEndpoint) LoadReplicationStatus(mode string, status interface{} } // SaveReplicationStatus stores replication status by mode. -func (se *StorageEndpoint) SaveReplicationStatus(mode string, status interface{}) error { +func (se *StorageEndpoint) SaveReplicationStatus(mode string, status any) error { return se.saveJSON(replicationModePath(mode), status) } diff --git a/pkg/storage/endpoint/resource_group.go b/pkg/storage/endpoint/resource_group.go index 150ea77a1c7..e777ea635c6 100644 --- a/pkg/storage/endpoint/resource_group.go +++ b/pkg/storage/endpoint/resource_group.go @@ -24,9 +24,9 @@ type ResourceGroupStorage interface { SaveResourceGroupSetting(name string, msg proto.Message) error DeleteResourceGroupSetting(name string) error LoadResourceGroupStates(f func(k, v string)) error - SaveResourceGroupStates(name string, obj interface{}) error + SaveResourceGroupStates(name string, obj any) error DeleteResourceGroupStates(name string) error - SaveControllerConfig(config interface{}) error + SaveControllerConfig(config any) error LoadControllerConfig() (string, error) } @@ -48,7 +48,7 @@ func (se *StorageEndpoint) LoadResourceGroupSettings(f func(k, v string)) error } // SaveResourceGroupStates stores a resource group to storage. -func (se *StorageEndpoint) SaveResourceGroupStates(name string, obj interface{}) error { +func (se *StorageEndpoint) SaveResourceGroupStates(name string, obj any) error { return se.saveJSON(resourceGroupStateKeyPath(name), obj) } @@ -63,7 +63,7 @@ func (se *StorageEndpoint) LoadResourceGroupStates(f func(k, v string)) error { } // SaveControllerConfig stores the resource controller config to storage. -func (se *StorageEndpoint) SaveControllerConfig(config interface{}) error { +func (se *StorageEndpoint) SaveControllerConfig(config any) error { return se.saveJSON(controllerConfigPath, config) } diff --git a/pkg/storage/endpoint/rule.go b/pkg/storage/endpoint/rule.go index b0827fda477..84ad6ee1352 100644 --- a/pkg/storage/endpoint/rule.go +++ b/pkg/storage/endpoint/rule.go @@ -31,11 +31,11 @@ type RuleStorage interface { // We need to use txn to avoid concurrent modification. // And it is helpful for the scheduling server to watch the rule. - SaveRule(txn kv.Txn, ruleKey string, rule interface{}) error + SaveRule(txn kv.Txn, ruleKey string, rule any) error DeleteRule(txn kv.Txn, ruleKey string) error - SaveRuleGroup(txn kv.Txn, groupID string, group interface{}) error + SaveRuleGroup(txn kv.Txn, groupID string, group any) error DeleteRuleGroup(txn kv.Txn, groupID string) error - SaveRegionRule(txn kv.Txn, ruleKey string, rule interface{}) error + SaveRegionRule(txn kv.Txn, ruleKey string, rule any) error DeleteRegionRule(txn kv.Txn, ruleKey string) error RunInTxn(ctx context.Context, f func(txn kv.Txn) error) error @@ -44,12 +44,12 @@ type RuleStorage interface { var _ RuleStorage = (*StorageEndpoint)(nil) // SaveRule stores a rule cfg to the rulesPath. -func (se *StorageEndpoint) SaveRule(txn kv.Txn, ruleKey string, rule interface{}) error { +func (*StorageEndpoint) SaveRule(txn kv.Txn, ruleKey string, rule any) error { return saveJSONInTxn(txn, ruleKeyPath(ruleKey), rule) } // DeleteRule removes a rule from storage. -func (se *StorageEndpoint) DeleteRule(txn kv.Txn, ruleKey string) error { +func (*StorageEndpoint) DeleteRule(txn kv.Txn, ruleKey string) error { return txn.Remove(ruleKeyPath(ruleKey)) } @@ -59,12 +59,12 @@ func (se *StorageEndpoint) LoadRuleGroups(f func(k, v string)) error { } // SaveRuleGroup stores a rule group config to storage. -func (se *StorageEndpoint) SaveRuleGroup(txn kv.Txn, groupID string, group interface{}) error { +func (*StorageEndpoint) SaveRuleGroup(txn kv.Txn, groupID string, group any) error { return saveJSONInTxn(txn, ruleGroupIDPath(groupID), group) } // DeleteRuleGroup removes a rule group from storage. -func (se *StorageEndpoint) DeleteRuleGroup(txn kv.Txn, groupID string) error { +func (*StorageEndpoint) DeleteRuleGroup(txn kv.Txn, groupID string) error { return txn.Remove(ruleGroupIDPath(groupID)) } @@ -74,12 +74,12 @@ func (se *StorageEndpoint) LoadRegionRules(f func(k, v string)) error { } // SaveRegionRule saves a region rule to the storage. -func (se *StorageEndpoint) SaveRegionRule(txn kv.Txn, ruleKey string, rule interface{}) error { +func (*StorageEndpoint) SaveRegionRule(txn kv.Txn, ruleKey string, rule any) error { return saveJSONInTxn(txn, regionLabelKeyPath(ruleKey), rule) } // DeleteRegionRule removes a region rule from storage. -func (se *StorageEndpoint) DeleteRegionRule(txn kv.Txn, ruleKey string) error { +func (*StorageEndpoint) DeleteRegionRule(txn kv.Txn, ruleKey string) error { return txn.Remove(regionLabelKeyPath(ruleKey)) } diff --git a/pkg/storage/endpoint/service_middleware.go b/pkg/storage/endpoint/service_middleware.go index 2becbf3686e..23095900755 100644 --- a/pkg/storage/endpoint/service_middleware.go +++ b/pkg/storage/endpoint/service_middleware.go @@ -22,14 +22,14 @@ import ( // ServiceMiddlewareStorage defines the storage operations on the service middleware. type ServiceMiddlewareStorage interface { - LoadServiceMiddlewareConfig(cfg interface{}) (bool, error) - SaveServiceMiddlewareConfig(cfg interface{}) error + LoadServiceMiddlewareConfig(cfg any) (bool, error) + SaveServiceMiddlewareConfig(cfg any) error } var _ ServiceMiddlewareStorage = (*StorageEndpoint)(nil) // LoadServiceMiddlewareConfig loads service middleware config from serviceMiddlewarePath then unmarshal it to cfg. -func (se *StorageEndpoint) LoadServiceMiddlewareConfig(cfg interface{}) (bool, error) { +func (se *StorageEndpoint) LoadServiceMiddlewareConfig(cfg any) (bool, error) { value, err := se.Load(serviceMiddlewarePath) if err != nil || value == "" { return false, err @@ -42,6 +42,6 @@ func (se *StorageEndpoint) LoadServiceMiddlewareConfig(cfg interface{}) (bool, e } // SaveServiceMiddlewareConfig stores marshallable cfg to the serviceMiddlewarePath. -func (se *StorageEndpoint) SaveServiceMiddlewareConfig(cfg interface{}) error { +func (se *StorageEndpoint) SaveServiceMiddlewareConfig(cfg any) error { return se.saveJSON(serviceMiddlewarePath, cfg) } diff --git a/pkg/storage/endpoint/tso_keyspace_group.go b/pkg/storage/endpoint/tso_keyspace_group.go index 39a08afe937..d24b6e0dd1a 100644 --- a/pkg/storage/endpoint/tso_keyspace_group.go +++ b/pkg/storage/endpoint/tso_keyspace_group.go @@ -20,6 +20,7 @@ import ( "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/storage/kv" + "github.com/tikv/pd/pkg/utils/typeutil" "go.etcd.io/etcd/clientv3" ) @@ -80,6 +81,14 @@ type KeyspaceGroupMember struct { Priority int `json:"priority"` } +// CompareAddress compares the address with the given address. +// It compares the address without the scheme. +// Otherwise, it will not work when we update the scheme from http to https. +// Issue: https://github.com/tikv/pd/issues/8284 +func (m *KeyspaceGroupMember) CompareAddress(addr string) bool { + return typeutil.EqualBaseURLs(m.Address, addr) +} + // SplitState defines the split state of a keyspace group. type SplitState struct { // SplitSource is the current keyspace group ID from which the keyspace group is split. @@ -163,7 +172,7 @@ type KeyspaceGroupStorage interface { var _ KeyspaceGroupStorage = (*StorageEndpoint)(nil) // LoadKeyspaceGroup loads the keyspace group by ID. -func (se *StorageEndpoint) LoadKeyspaceGroup(txn kv.Txn, id uint32) (*KeyspaceGroup, error) { +func (*StorageEndpoint) LoadKeyspaceGroup(txn kv.Txn, id uint32) (*KeyspaceGroup, error) { value, err := txn.Load(KeyspaceGroupIDPath(id)) if err != nil || value == "" { return nil, err @@ -176,12 +185,12 @@ func (se *StorageEndpoint) LoadKeyspaceGroup(txn kv.Txn, id uint32) (*KeyspaceGr } // SaveKeyspaceGroup saves the keyspace group. -func (se *StorageEndpoint) SaveKeyspaceGroup(txn kv.Txn, kg *KeyspaceGroup) error { +func (*StorageEndpoint) SaveKeyspaceGroup(txn kv.Txn, kg *KeyspaceGroup) error { return saveJSONInTxn(txn, KeyspaceGroupIDPath(kg.ID), kg) } // DeleteKeyspaceGroup deletes the keyspace group. -func (se *StorageEndpoint) DeleteKeyspaceGroup(txn kv.Txn, id uint32) error { +func (*StorageEndpoint) DeleteKeyspaceGroup(txn kv.Txn, id uint32) error { return txn.Remove(KeyspaceGroupIDPath(id)) } diff --git a/pkg/storage/endpoint/util.go b/pkg/storage/endpoint/util.go index cf1e4ef2315..06535490206 100644 --- a/pkg/storage/endpoint/util.go +++ b/pkg/storage/endpoint/util.go @@ -46,11 +46,11 @@ func (se *StorageEndpoint) saveProto(key string, msg proto.Message) error { return se.Save(key, string(value)) } -func (se *StorageEndpoint) saveJSON(key string, data interface{}) error { +func (se *StorageEndpoint) saveJSON(key string, data any) error { return saveJSONInTxn(se /* use the same interface */, key, data) } -func saveJSONInTxn(txn kv.Txn, key string, data interface{}) error { +func saveJSONInTxn(txn kv.Txn, key string, data any) error { value, err := json.Marshal(data) if err != nil { return errs.ErrJSONMarshal.Wrap(err).GenWithStackByArgs() diff --git a/pkg/storage/hot_region_storage.go b/pkg/storage/hot_region_storage.go index 0393035c85b..c08825dbba1 100644 --- a/pkg/storage/hot_region_storage.go +++ b/pkg/storage/hot_region_storage.go @@ -171,7 +171,9 @@ func (h *HotRegionStorage) backgroundDelete() { there may be residual hot regions, you can remove it manually, [pd-dir]/data/hot-region.`) continue } - h.delete(int(curReservedDays)) + if err := h.delete(int(curReservedDays)); err != nil { + log.Error("delete hot region meet error", errs.ZapError(err)) + } case <-h.hotRegionInfoCtx.Done(): return } diff --git a/pkg/storage/hot_region_storage_test.go b/pkg/storage/hot_region_storage_test.go index 629c638c1ff..1486fb8271d 100644 --- a/pkg/storage/hot_region_storage_test.go +++ b/pkg/storage/hot_region_storage_test.go @@ -172,14 +172,14 @@ func TestHotRegionWrite(t *testing.T) { func TestHotRegionDelete(t *testing.T) { re := require.New(t) defaultRemainDay := 7 - defaultDelteData := 30 + defaultDeleteData := 30 deleteDate := time.Now().AddDate(0, 0, 0) packHotRegionInfo := &MockPackHotRegionInfo{} store, clean, err := newTestHotRegionStorage(10*time.Minute, uint64(defaultRemainDay), packHotRegionInfo) re.NoError(err) defer clean() historyHotRegions := make([]HistoryHotRegion, 0) - for i := 0; i < defaultDelteData; i++ { + for i := 0; i < defaultDeleteData; i++ { historyHotRegion := HistoryHotRegion{ UpdateTime: deleteDate.UnixNano() / int64(time.Millisecond), RegionID: 1, diff --git a/pkg/storage/leveldb_backend.go b/pkg/storage/leveldb_backend.go index d25044e9c20..8fb1db196c1 100644 --- a/pkg/storage/leveldb_backend.go +++ b/pkg/storage/leveldb_backend.go @@ -18,9 +18,7 @@ import ( "context" "time" - "github.com/gogo/protobuf/proto" "github.com/pingcap/failpoint" - "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/log" "github.com/syndtr/goleveldb/leveldb" "github.com/tikv/pd/pkg/encryption" @@ -32,25 +30,27 @@ import ( ) const ( - // DefaultFlushRegionRate is the ttl to sync the regions to region storage. - defaultFlushRegionRate = 3 * time.Second - // DefaultBatchSize is the batch size to save the regions to region storage. + // defaultFlushRate is the default interval to flush the data into the local storage. + defaultFlushRate = 3 * time.Second + // defaultBatchSize is the default batch size to save the data to the local storage. defaultBatchSize = 100 + // defaultDirtyFlushTick + defaultDirtyFlushTick = time.Second ) // levelDBBackend is a storage backend that stores data in LevelDB, -// which is mainly used by the PD region storage. +// which is mainly used to store the PD Region meta information. type levelDBBackend struct { *endpoint.StorageEndpoint - ekm *encryption.Manager - mu syncutil.RWMutex - batchRegions map[string]*metapb.Region - batchSize int - cacheSize int - flushRate time.Duration - flushTime time.Time - regionStorageCtx context.Context - regionStorageCancel context.CancelFunc + ekm *encryption.Manager + mu syncutil.RWMutex + batch map[string][]byte + batchSize int + cacheSize int + flushRate time.Duration + flushTime time.Time + ctx context.Context + cancel context.CancelFunc } // newLevelDBBackend is used to create a new LevelDB backend. @@ -63,23 +63,19 @@ func newLevelDBBackend( if err != nil { return nil, err } - regionStorageCtx, regionStorageCancel := context.WithCancel(ctx) lb := &levelDBBackend{ - StorageEndpoint: endpoint.NewStorageEndpoint(levelDB, ekm), - ekm: ekm, - batchSize: defaultBatchSize, - flushRate: defaultFlushRegionRate, - batchRegions: make(map[string]*metapb.Region, defaultBatchSize), - flushTime: time.Now().Add(defaultFlushRegionRate), - regionStorageCtx: regionStorageCtx, - regionStorageCancel: regionStorageCancel, + StorageEndpoint: endpoint.NewStorageEndpoint(levelDB, ekm), + ekm: ekm, + batchSize: defaultBatchSize, + flushRate: defaultFlushRate, + batch: make(map[string][]byte, defaultBatchSize), + flushTime: time.Now().Add(defaultFlushRate), } + lb.ctx, lb.cancel = context.WithCancel(ctx) go lb.backgroundFlush() return lb, nil } -var dirtyFlushTick = time.Second - func (lb *levelDBBackend) backgroundFlush() { defer logutil.LogPanic() @@ -87,14 +83,14 @@ func (lb *levelDBBackend) backgroundFlush() { isFlush bool err error ) - ticker := time.NewTicker(dirtyFlushTick) + ticker := time.NewTicker(defaultDirtyFlushTick) defer ticker.Stop() for { select { case <-ticker.C: lb.mu.RLock() isFlush = lb.flushTime.Before(time.Now()) - failpoint.Inject("regionStorageFastFlush", func() { + failpoint.Inject("levelDBStorageFastFlush", func() { isFlush = true }) lb.mu.RUnlock() @@ -102,42 +98,32 @@ func (lb *levelDBBackend) backgroundFlush() { continue } if err = lb.Flush(); err != nil { - log.Error("flush regions meet error", errs.ZapError(err)) + log.Error("flush data meet error", errs.ZapError(err)) } - case <-lb.regionStorageCtx.Done(): + case <-lb.ctx.Done(): return } } } -func (lb *levelDBBackend) SaveRegion(region *metapb.Region) error { - region, err := encryption.EncryptRegion(region, lb.ekm) - if err != nil { - return err - } +// SaveIntoBatch saves the key-value pair into the batch cache, and it will +// only be saved to the underlying storage when the `Flush` method is +// called or the cache is full. +func (lb *levelDBBackend) SaveIntoBatch(key string, value []byte) error { lb.mu.Lock() defer lb.mu.Unlock() if lb.cacheSize < lb.batchSize-1 { - lb.batchRegions[endpoint.RegionPath(region.GetId())] = region + lb.batch[key] = value lb.cacheSize++ lb.flushTime = time.Now().Add(lb.flushRate) return nil } - lb.batchRegions[endpoint.RegionPath(region.GetId())] = region - err = lb.flushLocked() - - if err != nil { - return err - } - return nil -} - -func (lb *levelDBBackend) DeleteRegion(region *metapb.Region) error { - return lb.Remove(endpoint.RegionPath(region.GetId())) + lb.batch[key] = value + return lb.flushLocked() } -// Flush saves the cache region to the underlying storage. +// Flush saves the batch cache to the underlying storage. func (lb *levelDBBackend) Flush() error { lb.mu.Lock() defer lb.mu.Unlock() @@ -145,38 +131,32 @@ func (lb *levelDBBackend) Flush() error { } func (lb *levelDBBackend) flushLocked() error { - if err := lb.saveRegions(lb.batchRegions); err != nil { + if err := lb.saveBatchLocked(); err != nil { return err } lb.cacheSize = 0 - lb.batchRegions = make(map[string]*metapb.Region, lb.batchSize) + lb.batch = make(map[string][]byte, lb.batchSize) return nil } -func (lb *levelDBBackend) saveRegions(regions map[string]*metapb.Region) error { +func (lb *levelDBBackend) saveBatchLocked() error { batch := new(leveldb.Batch) - - for key, r := range regions { - value, err := proto.Marshal(r) - if err != nil { - return errs.ErrProtoMarshal.Wrap(err).GenWithStackByCause() - } + for key, value := range lb.batch { batch.Put([]byte(key), value) } - if err := lb.Base.(*kv.LevelDBKV).Write(batch, nil); err != nil { return errs.ErrLevelDBWrite.Wrap(err).GenWithStackByCause() } return nil } -// Close closes the LevelDB kv. It will call Flush() once before closing. +// Close will gracefully close the LevelDB backend and flush the data to the underlying storage before closing. func (lb *levelDBBackend) Close() error { err := lb.Flush() if err != nil { - log.Error("meet error before close the region storage", errs.ZapError(err)) + log.Error("meet error before closing the leveldb storage", errs.ZapError(err)) } - lb.regionStorageCancel() + lb.cancel() err = lb.Base.(*kv.LevelDBKV).Close() if err != nil { return errs.ErrLevelDBClose.Wrap(err).GenWithStackByArgs() diff --git a/pkg/storage/leveldb_backend_test.go b/pkg/storage/leveldb_backend_test.go new file mode 100644 index 00000000000..45af7201c85 --- /dev/null +++ b/pkg/storage/leveldb_backend_test.go @@ -0,0 +1,123 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/tikv/pd/pkg/utils/testutil" +) + +func TestLevelDBBackend(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + backend, err := newLevelDBBackend(ctx, t.TempDir(), nil) + re.NoError(err) + re.NotNil(backend) + key, value := "k1", "v1" + // Save without flush. + err = backend.SaveIntoBatch(key, []byte(value)) + re.NoError(err) + val, err := backend.Load(key) + re.NoError(err) + re.Empty(val) + // Flush and load. + err = backend.Flush() + re.NoError(err) + val, err = backend.Load(key) + re.NoError(err) + re.Equal(value, val) + // Delete and load. + err = backend.Remove(key) + re.NoError(err) + val, err = backend.Load(key) + re.NoError(err) + re.Empty(val) + // Save twice without flush. + err = backend.SaveIntoBatch(key, []byte(value)) + re.NoError(err) + val, err = backend.Load(key) + re.NoError(err) + re.Empty(val) + value = "v2" + err = backend.SaveIntoBatch(key, []byte(value)) + re.NoError(err) + val, err = backend.Load(key) + re.NoError(err) + re.Empty(val) + // Delete before flush. + err = backend.Remove(key) + re.NoError(err) + val, err = backend.Load(key) + re.NoError(err) + re.Empty(val) + // Flush and load. + err = backend.Flush() + re.NoError(err) + val, err = backend.Load(key) + re.NoError(err) + re.Equal(value, val) + // Delete and load. + err = backend.Remove(key) + re.NoError(err) + val, err = backend.Load(key) + re.NoError(err) + re.Empty(val) + // Test the background flush. + backend.flushRate = defaultDirtyFlushTick + err = backend.SaveIntoBatch(key, []byte(value)) + re.NoError(err) + val, err = backend.Load(key) + re.NoError(err) + re.Empty(val) + testutil.Eventually(re, func() bool { + val, err = backend.Load(key) + re.NoError(err) + return value == val + }, testutil.WithWaitFor(defaultDirtyFlushTick*5), testutil.WithTickInterval(defaultDirtyFlushTick/2)) + err = backend.Remove(key) + re.NoError(err) + val, err = backend.Load(key) + re.NoError(err) + re.Empty(val) + backend.flushRate = defaultFlushRate + // Test the flush when the cache is full. + backend.flushRate = time.Minute + for i := 0; i < backend.batchSize; i++ { + key, value = fmt.Sprintf("k%d", i), fmt.Sprintf("v%d", i) + err = backend.SaveIntoBatch(key, []byte(value)) + re.NoError(err) + if i < backend.batchSize-1 { + // The cache is not full yet. + val, err = backend.Load(key) + re.NoError(err) + re.Empty(val) + } else { + // The cache is full, and the flush is triggered. + val, err = backend.Load(key) + re.NoError(err) + re.Equal(value, val) + } + } + backend.flushRate = defaultFlushRate + // Close the backend. + err = backend.Close() + re.NoError(err) +} diff --git a/pkg/storage/region_storage.go b/pkg/storage/region_storage.go new file mode 100644 index 00000000000..11bc6a7cc21 --- /dev/null +++ b/pkg/storage/region_storage.go @@ -0,0 +1,79 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "context" + + "github.com/gogo/protobuf/proto" + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/encryption" + "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/storage/endpoint" + "github.com/tikv/pd/pkg/storage/kv" +) + +// RegionStorage is a storage for the PD region meta information based on LevelDB, +// which will override the default implementation of the `endpoint.RegionStorage`. +type RegionStorage struct { + kv.Base + backend *levelDBBackend +} + +var _ endpoint.RegionStorage = (*RegionStorage)(nil) + +func newRegionStorage(backend *levelDBBackend) *RegionStorage { + return &RegionStorage{Base: backend.Base, backend: backend} +} + +// LoadRegion implements the `endpoint.RegionStorage` interface. +func (s *RegionStorage) LoadRegion(regionID uint64, region *metapb.Region) (bool, error) { + return s.backend.LoadRegion(regionID, region) +} + +// LoadRegions implements the `endpoint.RegionStorage` interface. +func (s *RegionStorage) LoadRegions(ctx context.Context, f func(region *core.RegionInfo) []*core.RegionInfo) error { + return s.backend.LoadRegions(ctx, f) +} + +// SaveRegion implements the `endpoint.RegionStorage` interface. +// Instead of saving the region directly, it will encrypt the region and then save it in batch. +func (s *RegionStorage) SaveRegion(region *metapb.Region) error { + encryptedRegion, err := encryption.EncryptRegion(region, s.backend.ekm) + if err != nil { + return err + } + value, err := proto.Marshal(encryptedRegion) + if err != nil { + return errs.ErrProtoMarshal.Wrap(err).GenWithStackByCause() + } + return s.backend.SaveIntoBatch(endpoint.RegionPath(region.GetId()), value) +} + +// DeleteRegion implements the `endpoint.RegionStorage` interface. +func (s *RegionStorage) DeleteRegion(region *metapb.Region) error { + return s.backend.Remove((endpoint.RegionPath(region.GetId()))) +} + +// Flush implements the `endpoint.RegionStorage` interface. +func (s *RegionStorage) Flush() error { + return s.backend.Flush() +} + +// Close implements the `endpoint.RegionStorage` interface. +func (s *RegionStorage) Close() error { + return s.backend.Close() +} diff --git a/pkg/storage/region_storage_test.go b/pkg/storage/region_storage_test.go new file mode 100644 index 00000000000..f6670f8c82e --- /dev/null +++ b/pkg/storage/region_storage_test.go @@ -0,0 +1,95 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "context" + "testing" + + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/stretchr/testify/require" + "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/storage/endpoint" +) + +func TestRegionStorage(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + var ( + regionStorage endpoint.RegionStorage + err error + ) + regionStorage, err = NewRegionStorageWithLevelDBBackend(ctx, t.TempDir(), nil) + re.NoError(err) + re.NotNil(regionStorage) + // Load regions from the storage. + regions := make([]*core.RegionInfo, 0) + appendRegionFunc := func(region *core.RegionInfo) []*core.RegionInfo { + regions = append(regions, region) + return nil + } + err = regionStorage.LoadRegions(ctx, appendRegionFunc) + re.NoError(err) + re.Empty(regions) + // Save regions to the storage. + region1 := newTestRegionMeta(1) + err = regionStorage.SaveRegion(region1) + re.NoError(err) + region2 := newTestRegionMeta(2) + err = regionStorage.SaveRegion(region2) + re.NoError(err) + regions = make([]*core.RegionInfo, 0) + err = regionStorage.LoadRegions(ctx, appendRegionFunc) + re.NoError(err) + re.Empty(regions) + // Flush and load. + err = regionStorage.Flush() + re.NoError(err) + regions = make([]*core.RegionInfo, 0) + err = regionStorage.LoadRegions(ctx, appendRegionFunc) + re.NoError(err) + re.Len(regions, 2) + re.Equal(region1, regions[0].GetMeta()) + re.Equal(region2, regions[1].GetMeta()) + newRegion := &metapb.Region{} + ok, err := regionStorage.LoadRegion(3, newRegion) + re.NoError(err) + re.False(ok) + ok, err = regionStorage.LoadRegion(1, newRegion) + re.NoError(err) + re.True(ok) + re.Equal(region1, newRegion) + ok, err = regionStorage.LoadRegion(2, newRegion) + re.NoError(err) + re.True(ok) + re.Equal(region2, newRegion) + // Delete and load. + err = regionStorage.DeleteRegion(region1) + re.NoError(err) + regions = make([]*core.RegionInfo, 0) + err = regionStorage.LoadRegions(ctx, appendRegionFunc) + re.NoError(err) + re.Len(regions, 1) + re.Equal(region2, regions[0].GetMeta()) + ok, err = regionStorage.LoadRegion(2, newRegion) + re.NoError(err) + re.True(ok) + re.Equal(region2, newRegion) + re.Equal(regions[0].GetMeta(), newRegion) + // Close the storage. + err = regionStorage.Close() + re.NoError(err) +} diff --git a/pkg/storage/storage.go b/pkg/storage/storage.go index aba01dfa806..5e006133d22 100644 --- a/pkg/storage/storage.go +++ b/pkg/storage/storage.go @@ -57,13 +57,18 @@ func NewStorageWithEtcdBackend(client *clientv3.Client, rootPath string) Storage return newEtcdBackend(client, rootPath) } -// NewStorageWithLevelDBBackend creates a new storage with LevelDB backend. -func NewStorageWithLevelDBBackend( +// NewRegionStorageWithLevelDBBackend will create a specialized storage to +// store region meta information based on a LevelDB backend. +func NewRegionStorageWithLevelDBBackend( ctx context.Context, filePath string, ekm *encryption.Manager, -) (Storage, error) { - return newLevelDBBackend(ctx, filePath, ekm) +) (*RegionStorage, error) { + levelDBBackend, err := newLevelDBBackend(ctx, filePath, ekm) + if err != nil { + return nil, err + } + return newRegionStorage(levelDBBackend), nil } // TODO: support other KV storage backends like BadgerDB in the future. @@ -88,15 +93,14 @@ func NewCoreStorage(defaultStorage Storage, regionStorage endpoint.RegionStorage } } -// TryGetLocalRegionStorage gets the local region storage. Returns nil if not present. -func TryGetLocalRegionStorage(s Storage) endpoint.RegionStorage { +// RetrieveRegionStorage retrieve the region storage from the given storage. +// If it's a `coreStorage`, it will return the regionStorage inside, otherwise it will return the original storage. +func RetrieveRegionStorage(s Storage) endpoint.RegionStorage { switch ps := s.(type) { case *coreStorage: return ps.regionStorage - case *levelDBBackend, *memoryStorage: - return ps default: - return nil + return ps } } diff --git a/pkg/storage/storage_test.go b/pkg/storage/storage_test.go index dbb5a03b264..460489ecd10 100644 --- a/pkg/storage/storage_test.go +++ b/pkg/storage/storage_test.go @@ -100,7 +100,7 @@ func TestLoadStores(t *testing.T) { n := 10 stores := mustSaveStores(re, storage, n) - re.NoError(storage.LoadStores(cache.SetStore)) + re.NoError(storage.LoadStores(cache.PutStore)) re.Equal(n, cache.GetStoreCount()) for _, store := range cache.GetMetaStores() { @@ -117,7 +117,7 @@ func TestStoreWeight(t *testing.T) { mustSaveStores(re, storage, n) re.NoError(storage.SaveStoreWeight(1, 2.0, 3.0)) re.NoError(storage.SaveStoreWeight(2, 0.2, 0.3)) - re.NoError(storage.LoadStores(cache.SetStore)) + re.NoError(storage.LoadStores(cache.PutStore)) leaderWeights := []float64{1.0, 2.0, 0.2} regionWeights := []float64{1.0, 3.0, 0.3} for i := 0; i < n; i++ { @@ -209,6 +209,57 @@ func TestLoadMinServiceGCSafePoint(t *testing.T) { re.Equal(uint64(2), ssp.SafePoint) } +func TestTryGetLocalRegionStorage(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + // Memory backend integrated into core storage. + defaultStorage := NewStorageWithMemoryBackend() + var regionStorage endpoint.RegionStorage = NewStorageWithMemoryBackend() + coreStorage := NewCoreStorage(defaultStorage, regionStorage) + storage := RetrieveRegionStorage(coreStorage) + re.NotNil(storage) + re.Equal(regionStorage, storage) + // RegionStorage with LevelDB backend integrated into core storage. + defaultStorage = NewStorageWithMemoryBackend() + regionStorage, err := NewRegionStorageWithLevelDBBackend(ctx, t.TempDir(), nil) + re.NoError(err) + coreStorage = NewCoreStorage(defaultStorage, regionStorage) + storage = RetrieveRegionStorage(coreStorage) + re.NotNil(storage) + re.Equal(regionStorage, storage) + // Raw LevelDB backend integrated into core storage. + defaultStorage = NewStorageWithMemoryBackend() + regionStorage, err = newLevelDBBackend(ctx, t.TempDir(), nil) + re.NoError(err) + coreStorage = NewCoreStorage(defaultStorage, regionStorage) + storage = RetrieveRegionStorage(coreStorage) + re.NotNil(storage) + re.Equal(regionStorage, storage) + defaultStorage = NewStorageWithMemoryBackend() + regionStorage, err = newLevelDBBackend(ctx, t.TempDir(), nil) + re.NoError(err) + coreStorage = NewCoreStorage(defaultStorage, regionStorage) + storage = RetrieveRegionStorage(coreStorage) + re.NotNil(storage) + re.Equal(regionStorage, storage) + // Without core storage. + defaultStorage = NewStorageWithMemoryBackend() + storage = RetrieveRegionStorage(defaultStorage) + re.NotNil(storage) + re.Equal(defaultStorage, storage) + defaultStorage, err = newLevelDBBackend(ctx, t.TempDir(), nil) + re.NoError(err) + storage = RetrieveRegionStorage(defaultStorage) + re.NotNil(storage) + re.Equal(defaultStorage, storage) + defaultStorage, err = newLevelDBBackend(ctx, t.TempDir(), nil) + re.NoError(err) + storage = RetrieveRegionStorage(defaultStorage) + re.NotNil(storage) + re.Equal(defaultStorage, storage) +} + func TestLoadRegions(t *testing.T) { re := require.New(t) storage := NewStorageWithMemoryBackend() @@ -367,7 +418,7 @@ func randomMerge(regions []*metapb.Region, n int, ratio int) { } } -func saveRegions(lb *levelDBBackend, n int, ratio int) error { +func saveRegions(storage endpoint.RegionStorage, n int, ratio int) error { keys := generateKeys(n) regions := make([]*metapb.Region, 0, n) for i := uint64(0); i < uint64(n); i++ { @@ -398,36 +449,36 @@ func saveRegions(lb *levelDBBackend, n int, ratio int) error { } for _, region := range regions { - err := lb.SaveRegion(region) + err := storage.SaveRegion(region) if err != nil { return err } } - return lb.Flush() + return storage.Flush() } func benchmarkLoadRegions(b *testing.B, n int, ratio int) { re := require.New(b) ctx := context.Background() dir := b.TempDir() - lb, err := newLevelDBBackend(ctx, dir, nil) + regionStorage, err := NewRegionStorageWithLevelDBBackend(ctx, dir, nil) if err != nil { b.Fatal(err) } cluster := core.NewBasicCluster() - err = saveRegions(lb, n, ratio) + err = saveRegions(regionStorage, n, ratio) if err != nil { b.Fatal(err) } defer func() { - err = lb.Close() + err = regionStorage.Close() if err != nil { b.Fatal(err) } }() b.ResetTimer() - err = lb.LoadRegions(context.Background(), cluster.CheckAndPutRegion) + err = regionStorage.LoadRegions(ctx, cluster.CheckAndPutRegion) re.NoError(err) } diff --git a/pkg/syncer/client.go b/pkg/syncer/client.go index 00dd8c5107d..00fa8dc389b 100644 --- a/pkg/syncer/client.go +++ b/pkg/syncer/client.go @@ -26,6 +26,7 @@ import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/ratelimit" "github.com/tikv/pd/pkg/storage" "github.com/tikv/pd/pkg/utils/grpcutil" "github.com/tikv/pd/pkg/utils/logutil" @@ -205,7 +206,13 @@ func (s *RegionSyncer) StartSyncWithLeader(addr string) { log.Debug("region is stale", zap.Stringer("origin", origin.GetMeta()), errs.ZapError(err)) continue } - _, saveKV, _, _ := regionGuide(region, origin) + ctx := &core.MetaProcessContext{ + Context: ctx, + TaskRunner: ratelimit.NewSyncRunner(), + Tracer: core.NewNoopHeartbeatProcessTracer(), + // no limit for followers. + } + saveKV, _, _, _ := regionGuide(ctx, region, origin) overlaps := bc.PutRegion(region) if hasBuckets { diff --git a/pkg/syncer/client_test.go b/pkg/syncer/client_test.go index ba389b5de6d..e7be77d2bb0 100644 --- a/pkg/syncer/client_test.go +++ b/pkg/syncer/client_test.go @@ -34,7 +34,7 @@ import ( func TestLoadRegion(t *testing.T) { re := require.New(t) tempDir := t.TempDir() - rs, err := storage.NewStorageWithLevelDBBackend(context.Background(), tempDir, nil) + rs, err := storage.NewRegionStorageWithLevelDBBackend(context.Background(), tempDir, nil) re.NoError(err) server := &mockServer{ @@ -62,7 +62,7 @@ func TestLoadRegion(t *testing.T) { func TestErrorCode(t *testing.T) { re := require.New(t) tempDir := t.TempDir() - rs, err := storage.NewStorageWithLevelDBBackend(context.Background(), tempDir, nil) + rs, err := storage.NewRegionStorageWithLevelDBBackend(context.Background(), tempDir, nil) re.NoError(err) server := &mockServer{ ctx: context.Background(), @@ -71,7 +71,7 @@ func TestErrorCode(t *testing.T) { } ctx, cancel := context.WithCancel(context.TODO()) rc := NewRegionSyncer(server) - conn, err := grpcutil.GetClientConn(ctx, "127.0.0.1", nil) + conn, err := grpcutil.GetClientConn(ctx, "http://127.0.0.1", nil) re.NoError(err) cancel() _, err = rc.syncRegion(ctx, conn) @@ -91,7 +91,7 @@ func (s *mockServer) LoopContext() context.Context { return s.ctx } -func (s *mockServer) ClusterID() uint64 { +func (*mockServer) ClusterID() uint64 { return 1 } @@ -107,7 +107,7 @@ func (s *mockServer) GetStorage() storage.Storage { return s.storage } -func (s *mockServer) Name() string { +func (*mockServer) Name() string { return "mock-server" } @@ -115,7 +115,7 @@ func (s *mockServer) GetRegions() []*core.RegionInfo { return s.bc.GetRegions() } -func (s *mockServer) GetTLSConfig() *grpcutil.TLSConfig { +func (*mockServer) GetTLSConfig() *grpcutil.TLSConfig { return &grpcutil.TLSConfig{} } diff --git a/pkg/syncer/server.go b/pkg/syncer/server.go index 4fb38614de0..ccc32b13303 100644 --- a/pkg/syncer/server.go +++ b/pkg/syncer/server.go @@ -88,19 +88,16 @@ type RegionSyncer struct { streamingRunning atomic.Bool } -// NewRegionSyncer returns a region syncer. -// The final consistency is ensured by the heartbeat. -// Strong consistency is not guaranteed. -// Usually open the region syncer in huge cluster and the server -// no longer etcd but go-leveldb. +// NewRegionSyncer returns a region syncer that ensures final consistency through the heartbeat, +// but it does not guarantee strong consistency. Using the same storage backend of the region storage. func NewRegionSyncer(s Server) *RegionSyncer { - localRegionStorage := storage.TryGetLocalRegionStorage(s.GetStorage()) - if localRegionStorage == nil { + regionStorage := storage.RetrieveRegionStorage(s.GetStorage()) + if regionStorage == nil { return nil } syncer := &RegionSyncer{ server: s, - history: newHistoryBuffer(defaultHistoryBufferSize, localRegionStorage.(kv.Base)), + history: newHistoryBuffer(defaultHistoryBufferSize, regionStorage.(kv.Base)), limit: ratelimit.NewRateLimiter(defaultBucketRate, defaultBucketCapacity), tlsConfig: s.GetTLSConfig(), } diff --git a/pkg/tso/admin.go b/pkg/tso/admin.go index f19d8e71d05..bc9fd1f853d 100644 --- a/pkg/tso/admin.go +++ b/pkg/tso/admin.go @@ -67,7 +67,7 @@ func NewAdminHandler(handler Handler, rd *render.Render) *AdminHandler { // during EBS based restore, we call this to make sure ts of pd >= resolved_ts in backup. func (h *AdminHandler) ResetTS(w http.ResponseWriter, r *http.Request) { handler := h.handler - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { return } diff --git a/pkg/tso/allocator_manager.go b/pkg/tso/allocator_manager.go index 251a3aaf2e6..62a4fb97a57 100644 --- a/pkg/tso/allocator_manager.go +++ b/pkg/tso/allocator_manager.go @@ -108,7 +108,7 @@ type ElectionMember interface { // MemberValue returns the member value. MemberValue() string // GetMember returns the current member - GetMember() interface{} + GetMember() any // Client returns the etcd client. Client() *clientv3.Client // IsLeader returns whether the participant is the leader or not by checking its @@ -325,6 +325,12 @@ func (am *AllocatorManager) close() { allocatorGroup.allocator.(*GlobalTSOAllocator).close() } + for _, cc := range am.localAllocatorConn.clientConns { + if err := cc.Close(); err != nil { + log.Error("failed to close allocator manager grpc clientConn", errs.ZapError(errs.ErrCloseGRPCConn, err)) + } + } + am.cancel() am.svcLoopWG.Wait() @@ -618,11 +624,13 @@ func (am *AllocatorManager) campaignAllocatorLeader( dcLocationInfo *pdpb.GetDCLocationInfoResponse, isNextLeader bool, ) { - log.Info("start to campaign local tso allocator leader", + logger := log.With( logutil.CondUint32("keyspace-group-id", am.kgID, am.kgID > 0), zap.String("dc-location", allocator.GetDCLocation()), zap.Any("dc-location-info", dcLocationInfo), - zap.String("name", am.member.Name())) + zap.String("name", am.member.Name()), + ) + logger.Info("start to campaign local tso allocator leader") cmps := make([]clientv3.Cmp, 0) nextLeaderKey := am.nextLeaderKey(allocator.GetDCLocation()) if !isNextLeader { @@ -642,18 +650,9 @@ func (am *AllocatorManager) campaignAllocatorLeader( }) if err := allocator.CampaignAllocatorLeader(am.leaderLease, cmps...); err != nil { if err.Error() == errs.ErrEtcdTxnConflict.Error() { - log.Info("failed to campaign local tso allocator leader due to txn conflict, another allocator may campaign successfully", - logutil.CondUint32("keyspace-group-id", am.kgID, am.kgID > 0), - zap.String("dc-location", allocator.GetDCLocation()), - zap.Any("dc-location-info", dcLocationInfo), - zap.String("name", am.member.Name())) + logger.Info("failed to campaign local tso allocator leader due to txn conflict, another allocator may campaign successfully") } else { - log.Error("failed to campaign local tso allocator leader due to etcd error", - logutil.CondUint32("keyspace-group-id", am.kgID, am.kgID > 0), - zap.String("dc-location", allocator.GetDCLocation()), - zap.Any("dc-location-info", dcLocationInfo), - zap.String("name", am.member.Name()), - errs.ZapError(err)) + logger.Error("failed to campaign local tso allocator leader due to etcd error", errs.ZapError(err)) } return } @@ -664,44 +663,25 @@ func (am *AllocatorManager) campaignAllocatorLeader( defer am.ResetAllocatorGroup(allocator.GetDCLocation()) // Maintain the Local TSO Allocator leader go allocator.KeepAllocatorLeader(ctx) - log.Info("campaign local tso allocator leader ok", - logutil.CondUint32("keyspace-group-id", am.kgID, am.kgID > 0), - zap.String("dc-location", allocator.GetDCLocation()), - zap.Any("dc-location-info", dcLocationInfo), - zap.String("name", am.member.Name())) - log.Info("initialize the local TSO allocator", - logutil.CondUint32("keyspace-group-id", am.kgID, am.kgID > 0), - zap.String("dc-location", allocator.GetDCLocation()), - zap.Any("dc-location-info", dcLocationInfo), - zap.String("name", am.member.Name())) + logger.Info("Complete campaign local tso allocator leader, begin to initialize the local TSO allocator") if err := allocator.Initialize(int(dcLocationInfo.Suffix)); err != nil { - log.Error("failed to initialize the local TSO allocator", - logutil.CondUint32("keyspace-group-id", am.kgID, am.kgID > 0), - zap.String("dc-location", allocator.GetDCLocation()), - zap.Any("dc-location-info", dcLocationInfo), - errs.ZapError(err)) + log.Error("failed to initialize the local TSO allocator", errs.ZapError(err)) return } if dcLocationInfo.GetMaxTs().GetPhysical() != 0 { if err := allocator.WriteTSO(dcLocationInfo.GetMaxTs()); err != nil { - log.Error("failed to write the max local TSO after member changed", - logutil.CondUint32("keyspace-group-id", am.kgID, am.kgID > 0), - zap.String("dc-location", allocator.GetDCLocation()), - zap.Any("dc-location-info", dcLocationInfo), - errs.ZapError(err)) + log.Error("failed to write the max local TSO after member changed", errs.ZapError(err)) return } } am.compareAndSetMaxSuffix(dcLocationInfo.Suffix) allocator.EnableAllocatorLeader() // The next leader is me, delete it to finish campaigning - am.deleteNextLeaderID(allocator.GetDCLocation()) - log.Info("local tso allocator leader is ready to serve", - logutil.CondUint32("keyspace-group-id", am.kgID, am.kgID > 0), - zap.String("dc-location", allocator.GetDCLocation()), - zap.Any("dc-location-info", dcLocationInfo), - zap.String("name", am.member.Name())) + if err := am.deleteNextLeaderID(allocator.GetDCLocation()); err != nil { + logger.Warn("failed to delete next leader key after campaign local tso allocator leader", errs.ZapError(err)) + } + logger.Info("local tso allocator leader is ready to serve") leaderTicker := time.NewTicker(mcsutils.LeaderTickInterval) defer leaderTicker.Stop() @@ -710,20 +690,12 @@ func (am *AllocatorManager) campaignAllocatorLeader( select { case <-leaderTicker.C: if !allocator.IsAllocatorLeader() { - log.Info("no longer a local tso allocator leader because lease has expired, local tso allocator leader will step down", - logutil.CondUint32("keyspace-group-id", am.kgID, am.kgID > 0), - zap.String("dc-location", allocator.GetDCLocation()), - zap.Any("dc-location-info", dcLocationInfo), - zap.String("name", am.member.Name())) + logger.Info("no longer a local tso allocator leader because lease has expired, local tso allocator leader will step down") return } case <-ctx.Done(): // Server is closed and it should return nil. - log.Info("server is closed, reset the local tso allocator", - logutil.CondUint32("keyspace-group-id", am.kgID, am.kgID > 0), - zap.String("dc-location", allocator.GetDCLocation()), - zap.Any("dc-location-info", dcLocationInfo), - zap.String("name", am.member.Name())) + logger.Info("server is closed, reset the local tso allocator") return } } diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index a37bcc73881..f90dc5f26fe 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -187,7 +187,7 @@ func (gta *GlobalTSOAllocator) Initialize(int) error { gta.tsoAllocatorRoleGauge.Set(1) // The suffix of a Global TSO should always be 0. gta.timestampOracle.suffix = 0 - return gta.timestampOracle.SyncTimestamp(gta.member.GetLeadership()) + return gta.timestampOracle.SyncTimestamp() } // IsInitialize is used to indicates whether this allocator is initialized. @@ -197,7 +197,7 @@ func (gta *GlobalTSOAllocator) IsInitialize() bool { // UpdateTSO is used to update the TSO in memory and the time window in etcd. func (gta *GlobalTSOAllocator) UpdateTSO() error { - return gta.timestampOracle.UpdateTimestamp(gta.member.GetLeadership()) + return gta.timestampOracle.UpdateTimestamp() } // SetTSO sets the physical part with given TSO. diff --git a/pkg/tso/keyspace_group_manager.go b/pkg/tso/keyspace_group_manager.go index 77967a97ef7..ae4cca83374 100644 --- a/pkg/tso/keyspace_group_manager.go +++ b/pkg/tso/keyspace_group_manager.go @@ -290,7 +290,7 @@ func (s *state) getNextPrimaryToReset( if member.Priority > maxPriority { maxPriority = member.Priority } - if member.Address == localAddress { + if member.CompareAddress(localAddress) { localPriority = member.Priority } } @@ -398,7 +398,7 @@ func NewKeyspaceGroupManager( etcdClient *clientv3.Client, httpClient *http.Client, electionNamePrefix string, - tsoServiceKey string, + clusterID uint64, legacySvcRootPath string, tsoSvcRootPath string, cfg ServiceConfig, @@ -417,7 +417,7 @@ func NewKeyspaceGroupManager( etcdClient: etcdClient, httpClient: httpClient, electionNamePrefix: electionNamePrefix, - tsoServiceKey: tsoServiceKey, + tsoServiceKey: discovery.TSOPath(clusterID), legacySvcRootPath: legacySvcRootPath, tsoSvcRootPath: tsoSvcRootPath, primaryPriorityCheckInterval: defaultPrimaryPriorityCheckInterval, @@ -624,7 +624,7 @@ func (kgm *KeyspaceGroupManager) primaryPriorityCheckLoop() { member, kg, localPriority, nextGroupID := kgm.getNextPrimaryToReset(groupID, kgm.tsoServiceID.ServiceAddr) if member != nil { aliveTSONodes := make(map[string]struct{}) - kgm.tsoNodes.Range(func(key, _ interface{}) bool { + kgm.tsoNodes.Range(func(key, _ any) bool { aliveTSONodes[key.(string)] = struct{}{} return true }) @@ -667,14 +667,14 @@ func (kgm *KeyspaceGroupManager) primaryPriorityCheckLoop() { func (kgm *KeyspaceGroupManager) isAssignedToMe(group *endpoint.KeyspaceGroup) bool { return slice.AnyOf(group.Members, func(i int) bool { - return group.Members[i].Address == kgm.tsoServiceID.ServiceAddr + return group.Members[i].CompareAddress(kgm.tsoServiceID.ServiceAddr) }) } // updateKeyspaceGroup applies the given keyspace group. If the keyspace group is just assigned to // this host/pod, it will join the primary election. func (kgm *KeyspaceGroupManager) updateKeyspaceGroup(group *endpoint.KeyspaceGroup) { - if err := kgm.checkKeySpaceGroupID(group.ID); err != nil { + if err := checkKeySpaceGroupID(group.ID); err != nil { log.Warn("keyspace group ID is invalid, ignore it", zap.Error(err)) return } @@ -751,7 +751,7 @@ func (kgm *KeyspaceGroupManager) updateKeyspaceGroup(group *endpoint.KeyspaceGro kgm.groupUpdateRetryList[group.ID] = group return } - participant.SetCampaignChecker(func(leadership *election.Leadership) bool { + participant.SetCampaignChecker(func(*election.Leadership) bool { return splitSourceAM.GetMember().IsLeader() }) } @@ -997,7 +997,7 @@ func (kgm *KeyspaceGroupManager) exitElectionMembership(group *endpoint.Keyspace // GetAllocatorManager returns the AllocatorManager of the given keyspace group func (kgm *KeyspaceGroupManager) GetAllocatorManager(keyspaceGroupID uint32) (*AllocatorManager, error) { - if err := kgm.checkKeySpaceGroupID(keyspaceGroupID); err != nil { + if err := checkKeySpaceGroupID(keyspaceGroupID); err != nil { return nil, err } if am, _ := kgm.getKeyspaceGroupMeta(keyspaceGroupID); am != nil { @@ -1022,7 +1022,7 @@ func (kgm *KeyspaceGroupManager) FindGroupByKeyspaceID( func (kgm *KeyspaceGroupManager) GetElectionMember( keyspaceID, keyspaceGroupID uint32, ) (ElectionMember, error) { - if err := kgm.checkKeySpaceGroupID(keyspaceGroupID); err != nil { + if err := checkKeySpaceGroupID(keyspaceGroupID); err != nil { return nil, err } am, _, _, err := kgm.getKeyspaceGroupMetaWithCheck(keyspaceID, keyspaceGroupID) @@ -1052,7 +1052,7 @@ func (kgm *KeyspaceGroupManager) HandleTSORequest( keyspaceID, keyspaceGroupID uint32, dcLocation string, count uint32, ) (ts pdpb.Timestamp, curKeyspaceGroupID uint32, err error) { - if err := kgm.checkKeySpaceGroupID(keyspaceGroupID); err != nil { + if err := checkKeySpaceGroupID(keyspaceGroupID); err != nil { return pdpb.Timestamp{}, keyspaceGroupID, err } am, _, curKeyspaceGroupID, err := kgm.getKeyspaceGroupMetaWithCheck(keyspaceID, keyspaceGroupID) @@ -1086,7 +1086,7 @@ func (kgm *KeyspaceGroupManager) HandleTSORequest( return ts, curKeyspaceGroupID, err } -func (kgm *KeyspaceGroupManager) checkKeySpaceGroupID(id uint32) error { +func checkKeySpaceGroupID(id uint32) error { if id < mcsutils.MaxKeyspaceGroupCountInUse { return nil } @@ -1439,7 +1439,7 @@ func (kgm *KeyspaceGroupManager) groupSplitPatroller() { defer kgm.wg.Done() patrolInterval := groupPatrolInterval failpoint.Inject("fastGroupSplitPatroller", func() { - patrolInterval = 200 * time.Millisecond + patrolInterval = 3 * time.Second }) ticker := time.NewTicker(patrolInterval) defer ticker.Stop() diff --git a/pkg/tso/keyspace_group_manager_test.go b/pkg/tso/keyspace_group_manager_test.go index d3d5f8256e6..0e237fb32f0 100644 --- a/pkg/tso/keyspace_group_manager_test.go +++ b/pkg/tso/keyspace_group_manager_test.go @@ -22,12 +22,12 @@ import ( "path" "reflect" "sort" + "strconv" "strings" "sync" "testing" "time" - "github.com/google/uuid" "github.com/pingcap/failpoint" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" @@ -69,7 +69,7 @@ func (suite *keyspaceGroupManagerTestSuite) SetupSuite() { suite.ctx, suite.cancel = context.WithCancel(context.Background()) suite.ClusterID = rand.Uint64() servers, client, clean := etcdutil.NewTestEtcdCluster(t, 1) - suite.backendEndpoints, suite.etcdClient, suite.clean = servers[0].Config().LCUrls[0].String(), client, clean + suite.backendEndpoints, suite.etcdClient, suite.clean = servers[0].Config().ListenClientUrls[0].String(), client, clean suite.cfg = suite.createConfig() } @@ -152,15 +152,16 @@ func (suite *keyspaceGroupManagerTestSuite) TestNewKeyspaceGroupManager() { re := suite.Require() tsoServiceID := &discovery.ServiceRegistryEntry{ServiceAddr: suite.cfg.AdvertiseListenAddr} - guid := uuid.New().String() - tsoServiceKey := discovery.ServicePath(guid, "tso") - legacySvcRootPath := path.Join("/pd", guid) - tsoSvcRootPath := path.Join(mcsutils.MicroserviceRootPath, guid, "tso") - electionNamePrefix := "tso-server-" + guid + clusterID := rand.Uint64() + clusterIDStr := strconv.FormatUint(clusterID, 10) + + legacySvcRootPath := path.Join("/pd", clusterIDStr) + tsoSvcRootPath := path.Join(mcsutils.MicroserviceRootPath, clusterIDStr, "tso") + electionNamePrefix := "tso-server-" + clusterIDStr kgm := NewKeyspaceGroupManager( suite.ctx, tsoServiceID, suite.etcdClient, nil, electionNamePrefix, - tsoServiceKey, legacySvcRootPath, tsoSvcRootPath, suite.cfg) + clusterID, legacySvcRootPath, tsoSvcRootPath, suite.cfg) defer kgm.Close() err := kgm.Initialize() re.NoError(err) @@ -757,7 +758,7 @@ func (suite *keyspaceGroupManagerTestSuite) runTestLoadKeyspaceGroupsAssignment( if assignToMe { svcAddrs = append(svcAddrs, mgr.tsoServiceID.ServiceAddr) } else { - svcAddrs = append(svcAddrs, uuid.NewString()) + svcAddrs = append(svcAddrs, fmt.Sprintf("test-%d", rand.Uint64())) } addKeyspaceGroupAssignment( suite.ctx, suite.etcdClient, uint32(j), mgr.legacySvcRootPath, @@ -787,23 +788,23 @@ func (suite *keyspaceGroupManagerTestSuite) runTestLoadKeyspaceGroupsAssignment( func (suite *keyspaceGroupManagerTestSuite) newUniqueKeyspaceGroupManager( loadKeyspaceGroupsBatchSize int64, // set to 0 to use the default value ) *KeyspaceGroupManager { - return suite.newKeyspaceGroupManager(loadKeyspaceGroupsBatchSize, uuid.New().String(), suite.cfg) + return suite.newKeyspaceGroupManager(loadKeyspaceGroupsBatchSize, rand.Uint64(), suite.cfg) } func (suite *keyspaceGroupManagerTestSuite) newKeyspaceGroupManager( loadKeyspaceGroupsBatchSize int64, // set to 0 to use the default value - uniqueStr string, + clusterID uint64, cfg *TestServiceConfig, ) *KeyspaceGroupManager { tsoServiceID := &discovery.ServiceRegistryEntry{ServiceAddr: cfg.GetAdvertiseListenAddr()} - tsoServiceKey := discovery.ServicePath(uniqueStr, "tso") - legacySvcRootPath := path.Join("/pd", uniqueStr) - tsoSvcRootPath := path.Join(mcsutils.MicroserviceRootPath, uniqueStr, "tso") + clusterIDStr := strconv.FormatUint(clusterID, 10) + legacySvcRootPath := path.Join("/pd", clusterIDStr) + tsoSvcRootPath := path.Join(mcsutils.MicroserviceRootPath, clusterIDStr, "tso") electionNamePrefix := "kgm-test-" + cfg.GetAdvertiseListenAddr() kgm := NewKeyspaceGroupManager( suite.ctx, tsoServiceID, suite.etcdClient, nil, electionNamePrefix, - tsoServiceKey, legacySvcRootPath, tsoSvcRootPath, cfg) + clusterID, legacySvcRootPath, tsoSvcRootPath, cfg) if loadKeyspaceGroupsBatchSize != 0 { kgm.loadKeyspaceGroupsBatchSize = loadKeyspaceGroupsBatchSize } @@ -890,7 +891,7 @@ func collectAssignedKeyspaceGroupIDs(re *require.Assertions, kgm *KeyspaceGroupM re.Equal(i, int(am.kgID)) re.Equal(i, int(kg.ID)) for _, m := range kg.Members { - if m.Address == kgm.tsoServiceID.ServiceAddr { + if m.CompareAddress(kgm.tsoServiceID.ServiceAddr) { ids = append(ids, uint32(i)) break } @@ -1043,18 +1044,20 @@ func (suite *keyspaceGroupManagerTestSuite) TestPrimaryPriorityChange() { var err error defaultPriority := mcsutils.DefaultKeyspaceGroupReplicaPriority - uniqueStr := uuid.New().String() - rootPath := path.Join("/pd", uniqueStr) + clusterID := rand.Uint64() + clusterIDStr := strconv.FormatUint(clusterID, 10) + + rootPath := path.Join("/pd", clusterIDStr) cfg1 := suite.createConfig() cfg2 := suite.createConfig() svcAddr1 := cfg1.GetAdvertiseListenAddr() svcAddr2 := cfg2.GetAdvertiseListenAddr() // Register TSO server 1 - err = suite.registerTSOServer(re, uniqueStr, svcAddr1, cfg1) + err = suite.registerTSOServer(re, clusterIDStr, svcAddr1, cfg1) re.NoError(err) defer func() { - re.NoError(suite.deregisterTSOServer(uniqueStr, svcAddr1)) + re.NoError(suite.deregisterTSOServer(clusterIDStr, svcAddr1)) }() // Create three keyspace groups on two TSO servers with default replica priority. @@ -1067,7 +1070,7 @@ func (suite *keyspaceGroupManagerTestSuite) TestPrimaryPriorityChange() { // Create the first TSO server which loads all three keyspace groups created above. // All primaries should be on the first TSO server. - mgr1 := suite.newKeyspaceGroupManager(1, uniqueStr, cfg1) + mgr1 := suite.newKeyspaceGroupManager(1, clusterID, cfg1) re.NotNil(mgr1) defer mgr1.Close() err = mgr1.Initialize() @@ -1099,9 +1102,9 @@ func (suite *keyspaceGroupManagerTestSuite) TestPrimaryPriorityChange() { checkTSO(ctx, re, &wg, mgr1, ids) // Create the Second TSO server. - err = suite.registerTSOServer(re, uniqueStr, svcAddr2, cfg2) + err = suite.registerTSOServer(re, clusterIDStr, svcAddr2, cfg2) re.NoError(err) - mgr2 := suite.newKeyspaceGroupManager(1, uniqueStr, cfg2) + mgr2 := suite.newKeyspaceGroupManager(1, clusterID, cfg2) re.NotNil(mgr2) err = mgr2.Initialize() re.NoError(err) @@ -1110,17 +1113,17 @@ func (suite *keyspaceGroupManagerTestSuite) TestPrimaryPriorityChange() { // Shutdown the second TSO server. mgr2.Close() - re.NoError(suite.deregisterTSOServer(uniqueStr, svcAddr2)) + re.NoError(suite.deregisterTSOServer(clusterIDStr, svcAddr2)) // The primaries should move back to the first TSO server. waitForPrimariesServing(re, []*KeyspaceGroupManager{mgr1, mgr1, mgr1}, ids) // Restart the Second TSO server. - err = suite.registerTSOServer(re, uniqueStr, svcAddr2, cfg2) + err = suite.registerTSOServer(re, clusterIDStr, svcAddr2, cfg2) re.NoError(err) defer func() { - re.NoError(suite.deregisterTSOServer(uniqueStr, svcAddr2)) + re.NoError(suite.deregisterTSOServer(clusterIDStr, svcAddr2)) }() - mgr2 = suite.newKeyspaceGroupManager(1, uniqueStr, cfg2) + mgr2 = suite.newKeyspaceGroupManager(1, clusterID, cfg2) re.NotNil(mgr2) defer mgr2.Close() err = mgr2.Initialize() @@ -1209,5 +1212,5 @@ func waitForPrimariesServing( } } return true - }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) + }, testutil.WithWaitFor(10*time.Second), testutil.WithTickInterval(50*time.Millisecond)) } diff --git a/pkg/tso/local_allocator.go b/pkg/tso/local_allocator.go index 9d244d2531d..e9019bf2bf3 100644 --- a/pkg/tso/local_allocator.go +++ b/pkg/tso/local_allocator.go @@ -101,7 +101,7 @@ func (lta *LocalTSOAllocator) GetDCLocation() string { func (lta *LocalTSOAllocator) Initialize(suffix int) error { lta.tsoAllocatorRoleGauge.Set(1) lta.timestampOracle.suffix = suffix - return lta.timestampOracle.SyncTimestamp(lta.leadership) + return lta.timestampOracle.SyncTimestamp() } // IsInitialize is used to indicates whether this allocator is initialized. @@ -112,7 +112,7 @@ func (lta *LocalTSOAllocator) IsInitialize() bool { // UpdateTSO is used to update the TSO in memory and the time window in etcd // for all local TSO allocators this PD server hold. func (lta *LocalTSOAllocator) UpdateTSO() error { - return lta.timestampOracle.UpdateTimestamp(lta.leadership) + return lta.timestampOracle.UpdateTimestamp() } // SetTSO sets the physical part with given TSO. @@ -139,7 +139,7 @@ func (lta *LocalTSOAllocator) Reset() { } // setAllocatorLeader sets the current Local TSO Allocator leader. -func (lta *LocalTSOAllocator) setAllocatorLeader(member interface{}) { +func (lta *LocalTSOAllocator) setAllocatorLeader(member any) { lta.allocatorLeader.Store(member) } diff --git a/pkg/tso/tso.go b/pkg/tso/tso.go index 5ad786678c4..bcb3169e73c 100644 --- a/pkg/tso/tso.go +++ b/pkg/tso/tso.go @@ -156,7 +156,7 @@ func (t *timestampOracle) GetTimestampPath() string { } // SyncTimestamp is used to synchronize the timestamp. -func (t *timestampOracle) SyncTimestamp(leadership *election.Leadership) error { +func (t *timestampOracle) SyncTimestamp() error { log.Info("start to sync timestamp", logutil.CondUint32("keyspace-group-id", t.keyspaceGroupID, t.keyspaceGroupID > 0)) t.metrics.syncEvent.Inc() @@ -311,7 +311,7 @@ func (t *timestampOracle) resetUserTimestampInner(leadership *election.Leadershi // // NOTICE: this function should be called after the TSO in memory has been initialized // and should not be called when the TSO in memory has been reset anymore. -func (t *timestampOracle) UpdateTimestamp(leadership *election.Leadership) error { +func (t *timestampOracle) UpdateTimestamp() error { if !t.isInitialized() { return errs.ErrUpdateTimestamp.FastGenByArgs("timestamp in memory has not been initialized") } diff --git a/pkg/unsaferecovery/unsafe_recovery_controller.go b/pkg/unsaferecovery/unsafe_recovery_controller.go index aa45ba6a2bd..89cd6e6393c 100644 --- a/pkg/unsaferecovery/unsafe_recovery_controller.go +++ b/pkg/unsaferecovery/unsafe_recovery_controller.go @@ -34,6 +34,7 @@ import ( sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/pkg/utils/syncutil" + "github.com/tikv/pd/pkg/utils/typeutil" "go.uber.org/zap" ) @@ -106,7 +107,7 @@ const ( type cluster interface { core.StoreSetInformer - DropCacheAllRegion() + ResetRegionCache() AllocID() (uint64, error) BuryStore(storeID uint64, forceBury bool) error GetSchedulerConfig() sc.SchedulerConfigProvider @@ -492,12 +493,11 @@ func (u *Controller) GetStage() stage { } func (u *Controller) changeStage(stage stage) { - u.stage = stage - // Halt and resume the scheduling once the running state changed. - running := isRunning(stage) - if opt := u.cluster.GetSchedulerConfig(); opt.IsSchedulingHalted() != running { - opt.SetHaltScheduling(running, "online-unsafe-recovery") + // If the running stage changes, update the scheduling allowance status to add or remove "online-unsafe-recovery" halt. + if running := isRunning(stage); running != isRunning(u.stage) { + u.cluster.GetSchedulerConfig().SetSchedulingAllowanceStatus(running, "online-unsafe-recovery") } + u.stage = stage var output StageOutput output.Time = time.Now().Format("2006-01-02 15:04:05.000") @@ -544,7 +544,7 @@ func (u *Controller) changeStage(stage stage) { case Finished: if u.step > 1 { // == 1 means no operation has done, no need to invalid cache - u.cluster.DropCacheAllRegion() + u.cluster.ResetRegionCache() } output.Info = "Unsafe recovery Finished" output.Details = u.getAffectedTableDigest() @@ -780,6 +780,12 @@ func (r *regionItem) IsRaftStale(origin *regionItem, u *Controller) bool { func(a, b *regionItem) int { return int(a.report.GetRaftState().GetHardState().GetTerm()) - int(b.report.GetRaftState().GetHardState().GetTerm()) }, + // choose the peer has maximum applied index or last index. + func(a, b *regionItem) int { + maxIdxA := typeutil.MaxUint64(a.report.GetRaftState().GetLastIndex(), a.report.AppliedIndex) + maxIdxB := typeutil.MaxUint64(b.report.GetRaftState().GetLastIndex(), b.report.AppliedIndex) + return int(maxIdxA - maxIdxB) + }, func(a, b *regionItem) int { return int(a.report.GetRaftState().GetLastIndex()) - int(b.report.GetRaftState().GetLastIndex()) }, diff --git a/pkg/unsaferecovery/unsafe_recovery_controller_test.go b/pkg/unsaferecovery/unsafe_recovery_controller_test.go index 956b9b8729c..cce38285212 100644 --- a/pkg/unsaferecovery/unsafe_recovery_controller_test.go +++ b/pkg/unsaferecovery/unsafe_recovery_controller_test.go @@ -1755,7 +1755,7 @@ func TestRunning(t *testing.T) { re.True(recoveryController.IsRunning()) } -func TestEpochComparsion(t *testing.T) { +func TestEpochComparison(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -1856,3 +1856,105 @@ func newTestStores(n uint64, version string) []*core.StoreInfo { func getTestDeployPath(storeID uint64) string { return fmt.Sprintf("test/store%d", storeID) } + +func TestSelectLeader(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + opts := mockconfig.NewTestOptions() + cluster := mockcluster.NewCluster(ctx, opts) + coordinator := schedule.NewCoordinator(ctx, cluster, hbstream.NewTestHeartbeatStreams(ctx, cluster.ID, cluster, true)) + coordinator.Run() + stores := newTestStores(6, "6.0.0") + labels := []*metapb.StoreLabel{ + { + Key: core.EngineKey, + Value: core.EngineTiFlash, + }, + } + stores[5].IsTiFlash() + core.SetStoreLabels(labels)(stores[5]) + for _, store := range stores { + cluster.PutStore(store) + } + recoveryController := NewController(cluster) + + cases := []struct { + peers []*regionItem + leaderID uint64 + }{ + { + peers: []*regionItem{ + newPeer(1, 1, 10, 5, 4), + newPeer(2, 2, 9, 9, 8), + }, + leaderID: 2, + }, + { + peers: []*regionItem{ + newPeer(1, 1, 10, 10, 9), + newPeer(2, 1, 8, 8, 15), + newPeer(3, 1, 12, 11, 11), + }, + leaderID: 2, + }, + { + peers: []*regionItem{ + newPeer(1, 1, 9, 9, 11), + newPeer(2, 1, 10, 8, 7), + newPeer(3, 1, 11, 7, 6), + }, + leaderID: 3, + }, + { + peers: []*regionItem{ + newPeer(1, 1, 11, 11, 8), + newPeer(2, 1, 11, 10, 10), + newPeer(3, 1, 11, 9, 8), + }, + leaderID: 1, + }, + { + peers: []*regionItem{ + newPeer(6, 1, 11, 11, 9), + newPeer(1, 1, 11, 11, 8), + newPeer(2, 1, 11, 10, 10), + newPeer(3, 1, 11, 9, 8), + }, + leaderID: 1, + }, + } + + for i, c := range cases { + peersMap := map[uint64][]*regionItem{ + 1: c.peers, + } + region := &metapb.Region{ + Id: 1, + } + leader := recoveryController.selectLeader(peersMap, region) + re.Equal(leader.Region().Id, c.leaderID, "case: %d", i) + } +} + +func newPeer(storeID, term, lastIndex, committedIndex, appliedIndex uint64) *regionItem { + return ®ionItem{ + storeID: storeID, + report: &pdpb.PeerReport{ + RaftState: &raft_serverpb.RaftLocalState{ + HardState: &eraftpb.HardState{ + Term: term, + Commit: committedIndex, + }, + LastIndex: lastIndex, + }, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: storeID, + }, + }, + AppliedIndex: appliedIndex, + }, + } +} diff --git a/pkg/utils/apiutil/apiutil.go b/pkg/utils/apiutil/apiutil.go index f8ca1174b3b..2503ba9aecf 100644 --- a/pkg/utils/apiutil/apiutil.go +++ b/pkg/utils/apiutil/apiutil.go @@ -27,9 +27,12 @@ import ( "path" "strconv" "strings" + "sync" "time" + "github.com/gin-gonic/gin" "github.com/gorilla/mux" + "github.com/joho/godotenv" "github.com/pingcap/errcode" "github.com/pingcap/errors" "github.com/pingcap/log" @@ -67,6 +70,17 @@ const ( chunkSize = 4096 ) +var once sync.Once + +func init() { + once.Do(func() { + // See https://github.com/pingcap/tidb-dashboard/blob/f8ecb64e3d63f4ed91c3dca7a04362418ade01d8/pkg/apiserver/apiserver.go#L84 + // These global modification will be effective only for the first invoke. + _ = godotenv.Load() + gin.SetMode(gin.ReleaseMode) + }) +} + // DeferClose captures the error returned from closing (if an error occurs). // This is designed to be used in a defer statement. func DeferClose(c io.Closer, err *error) { @@ -102,14 +116,14 @@ func TagJSONError(err error) error { func ErrorResp(rd *render.Render, w http.ResponseWriter, err error) { if err == nil { log.Error("nil is given to errorResp") - rd.JSON(w, http.StatusInternalServerError, "nil error") + _ = rd.JSON(w, http.StatusInternalServerError, "nil error") return } if errCode := errcode.CodeChain(err); errCode != nil { w.Header().Set("TiDB-Error-Code", errCode.Code().CodeStr().String()) - rd.JSON(w, errCode.Code().HTTPCode(), errcode.NewJSONFormat(errCode)) + _ = rd.JSON(w, errCode.Code().HTTPCode(), errcode.NewJSONFormat(errCode)) } else { - rd.JSON(w, http.StatusInternalServerError, err.Error()) + _ = rd.JSON(w, http.StatusInternalServerError, err.Error()) } } @@ -280,7 +294,7 @@ func ParseUint64VarsField(vars map[string]string, varName string) (uint64, *Fiel } // CollectEscapeStringOption is used to collect string using escaping from input map for given option -func CollectEscapeStringOption(option string, input map[string]interface{}, collectors ...func(v string)) error { +func CollectEscapeStringOption(option string, input map[string]any, collectors ...func(v string)) error { if v, ok := input[option].(string); ok { value, err := url.QueryUnescape(v) if err != nil { @@ -295,7 +309,7 @@ func CollectEscapeStringOption(option string, input map[string]interface{}, coll } // CollectStringOption is used to collect string using from input map for given option -func CollectStringOption(option string, input map[string]interface{}, collectors ...func(v string)) error { +func CollectStringOption(option string, input map[string]any, collectors ...func(v string)) error { if v, ok := input[option].(string); ok { for _, c := range collectors { c(v) @@ -306,7 +320,7 @@ func CollectStringOption(option string, input map[string]interface{}, collectors } // ParseKey is used to parse interface into []byte and string -func ParseKey(name string, input map[string]interface{}) ([]byte, string, error) { +func ParseKey(name string, input map[string]any) ([]byte, string, error) { k, ok := input[name] if !ok { return nil, "", fmt.Errorf("missing %s", name) @@ -322,9 +336,33 @@ func ParseKey(name string, input map[string]interface{}) ([]byte, string, error) return returned, rawKey, nil } +// ParseHexKeys decodes hexadecimal src into DecodedLen(len(src)) bytes if the format is "hex". +// +// ParseHexKeys expects that each key contains only +// hexadecimal characters and each key has even length. +// If existing one key is malformed, ParseHexKeys returns +// the original bytes. +func ParseHexKeys(format string, keys [][]byte) (decodedBytes [][]byte, err error) { + if format != "hex" { + return keys, nil + } + + for _, key := range keys { + // We can use the source slice itself as the destination + // because the decode loop increments by one and then the 'seen' byte is not used anymore. + // Reference to hex.DecodeString() + n, err := hex.Decode(key, key) + if err != nil { + return keys, err + } + decodedBytes = append(decodedBytes, key[:n]) + } + return decodedBytes, nil +} + // ReadJSON reads a JSON data from r and then closes it. // An error due to invalid json will be returned as a JSONError -func ReadJSON(r io.ReadCloser, data interface{}) error { +func ReadJSON(r io.ReadCloser, data any) error { var err error defer DeferClose(r, &err) b, err := io.ReadAll(r) @@ -342,7 +380,7 @@ func ReadJSON(r io.ReadCloser, data interface{}) error { // ReadJSONRespondError writes json into data. // On error respond with a 400 Bad Request -func ReadJSONRespondError(rd *render.Render, w http.ResponseWriter, body io.ReadCloser, data interface{}) error { +func ReadJSONRespondError(rd *render.Render, w http.ResponseWriter, body io.ReadCloser, data any) error { err := ReadJSON(body, data) if err == nil { return nil @@ -427,16 +465,15 @@ func (p *customReverseProxies) ServeHTTP(w http.ResponseWriter, r *http.Request) log.Error("request failed", errs.ZapError(errs.ErrSendRequest, err)) continue } - defer resp.Body.Close() var reader io.ReadCloser switch resp.Header.Get("Content-Encoding") { case "gzip": reader, err = gzip.NewReader(resp.Body) if err != nil { log.Error("failed to parse response with gzip compress", zap.Error(err)) + resp.Body.Close() continue } - defer reader.Close() default: reader = resp.Body } @@ -460,6 +497,8 @@ func (p *customReverseProxies) ServeHTTP(w http.ResponseWriter, r *http.Request) break } } + resp.Body.Close() + reader.Close() if err != nil { log.Error("write failed", errs.ZapError(errs.ErrWriteHTTPBody, err), zap.String("target-address", url.String())) // try next url. diff --git a/pkg/utils/apiutil/apiutil_test.go b/pkg/utils/apiutil/apiutil_test.go index 106d3fb21cb..3e8a998d5fd 100644 --- a/pkg/utils/apiutil/apiutil_test.go +++ b/pkg/utils/apiutil/apiutil_test.go @@ -26,7 +26,6 @@ import ( ) func TestJsonRespondErrorOk(t *testing.T) { - t.Parallel() re := require.New(t) rd := render.New(render.Options{ IndentJSON: true, @@ -45,7 +44,6 @@ func TestJsonRespondErrorOk(t *testing.T) { } func TestJsonRespondErrorBadInput(t *testing.T) { - t.Parallel() re := require.New(t) rd := render.New(render.Options{ IndentJSON: true, @@ -71,7 +69,6 @@ func TestJsonRespondErrorBadInput(t *testing.T) { } func TestGetIPPortFromHTTPRequest(t *testing.T) { - t.Parallel() re := require.New(t) testCases := []struct { @@ -207,3 +204,39 @@ func TestGetIPPortFromHTTPRequest(t *testing.T) { re.Equal(testCase.port, port, "case %d", idx) } } + +func TestParseHexKeys(t *testing.T) { + re := require.New(t) + // Test for hex format + hexBytes := [][]byte{[]byte(""), []byte("67"), []byte("0001020304050607"), []byte("08090a0b0c0d0e0f"), []byte("f0f1f2f3f4f5f6f7")} + parseKeys, err := ParseHexKeys("hex", hexBytes) + re.NoError(err) + expectedBytes := [][]byte{[]byte(""), []byte("g"), []byte("\x00\x01\x02\x03\x04\x05\x06\x07"), []byte("\x08\t\n\x0b\x0c\r\x0e\x0f"), []byte("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7")} + re.Equal(expectedBytes, parseKeys) + // Test for other format NOT hex + hexBytes = [][]byte{[]byte("hello")} + parseKeys, err = ParseHexKeys("other", hexBytes) + re.NoError(err) + re.Len(parseKeys, 1) + re.Equal([]byte("hello"), parseKeys[0]) + // Test for wrong key + hexBytes = [][]byte{[]byte("world")} + parseKeys, err = ParseHexKeys("hex", hexBytes) + re.Error(err) + re.Len(parseKeys, 1) + re.Equal([]byte("world"), parseKeys[0]) + // Test for the first key is not valid, but the second key is valid + hexBytes = [][]byte{[]byte("world"), []byte("0001020304050607")} + parseKeys, err = ParseHexKeys("hex", hexBytes) + re.Error(err) + re.Len(parseKeys, 2) + re.Equal([]byte("world"), parseKeys[0]) + re.NotEqual([]byte("\x00\x01\x02\x03\x04\x05\x06\x07"), parseKeys[1]) + // Test for the first key is valid, but the second key is not valid + hexBytes = [][]byte{[]byte("0001020304050607"), []byte("world")} + parseKeys, err = ParseHexKeys("hex", hexBytes) + re.Error(err) + re.Len(parseKeys, 2) + re.NotEqual([]byte("\x00\x01\x02\x03\x04\x05\x06\x07"), parseKeys[0]) + re.Equal([]byte("world"), parseKeys[1]) +} diff --git a/pkg/utils/apiutil/multiservicesapi/middleware.go b/pkg/utils/apiutil/multiservicesapi/middleware.go index ed34ecc6afb..4343adcc981 100644 --- a/pkg/utils/apiutil/multiservicesapi/middleware.go +++ b/pkg/utils/apiutil/multiservicesapi/middleware.go @@ -48,8 +48,8 @@ func ServiceRedirector() gin.HandlerFunc { // Prevent more than one redirection. if name := c.Request.Header.Get(ServiceRedirectorHeader); len(name) != 0 { - log.Error("redirect but server is not primary", zap.String("from", name), zap.String("server", svr.Name()), errs.ZapError(errs.ErrRedirect)) - c.AbortWithStatusJSON(http.StatusInternalServerError, errs.ErrRedirect.FastGenByArgs().Error()) + log.Error("redirect but server is not primary", zap.String("from", name), zap.String("server", svr.Name()), errs.ZapError(errs.ErrRedirectToNotPrimary)) + c.AbortWithStatusJSON(http.StatusInternalServerError, errs.ErrRedirectToNotPrimary.FastGenByArgs().Error()) return } diff --git a/pkg/utils/apiutil/serverapi/middleware.go b/pkg/utils/apiutil/serverapi/middleware.go old mode 100755 new mode 100644 index 2432e15c967..0718702b5a5 --- a/pkg/utils/apiutil/serverapi/middleware.go +++ b/pkg/utils/apiutil/serverapi/middleware.go @@ -18,7 +18,9 @@ import ( "net/http" "net/url" "strings" + "time" + "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" mcsutils "github.com/tikv/pd/pkg/mcs/utils" @@ -204,20 +206,25 @@ func (h *redirector) ServeHTTP(w http.ResponseWriter, r *http.Request, next http clientUrls = append(clientUrls, targetAddr) // Add a header to the response, it is used to mark whether the request has been forwarded to the micro service. w.Header().Add(apiutil.XForwardedToMicroServiceHeader, "true") - } else { - leader := h.s.GetMember().GetLeader() + } else if name := r.Header.Get(apiutil.PDRedirectorHeader); len(name) == 0 { + leader := h.waitForLeader(r) + // The leader has not been elected yet. if leader == nil { - http.Error(w, "no leader", http.StatusServiceUnavailable) + http.Error(w, errs.ErrRedirectNoLeader.FastGenByArgs().Error(), http.StatusServiceUnavailable) return } - clientUrls = leader.GetClientUrls() - // Prevent more than one redirection among PD/API servers. - if name := r.Header.Get(apiutil.PDRedirectorHeader); len(name) != 0 { - log.Error("redirect but server is not leader", zap.String("from", name), zap.String("server", h.s.Name()), errs.ZapError(errs.ErrRedirect)) - http.Error(w, errs.ErrRedirectToNotLeader.FastGenByArgs().Error(), http.StatusInternalServerError) + // If the leader is the current server now, we can handle the request directly. + if h.s.GetMember().IsLeader() || leader.GetName() == h.s.Name() { + next(w, r) return } + clientUrls = leader.GetClientUrls() r.Header.Set(apiutil.PDRedirectorHeader, h.s.Name()) + } else { + // Prevent more than one redirection among PD/API servers. + log.Error("redirect but server is not leader", zap.String("from", name), zap.String("server", h.s.Name()), errs.ZapError(errs.ErrRedirectToNotLeader)) + http.Error(w, errs.ErrRedirectToNotLeader.FastGenByArgs().Error(), http.StatusInternalServerError) + return } urls := make([]url.URL, 0, len(clientUrls)) @@ -233,3 +240,38 @@ func (h *redirector) ServeHTTP(w http.ResponseWriter, r *http.Request, next http client := h.s.GetHTTPClient() apiutil.NewCustomReverseProxies(client, urls).ServeHTTP(w, r) } + +const ( + backoffMaxDelay = 3 * time.Second + backoffInterval = 100 * time.Millisecond +) + +// If current server does not have a leader, backoff to increase the chance of success. +func (h *redirector) waitForLeader(r *http.Request) (leader *pdpb.Member) { + var ( + interval = backoffInterval + maxDelay = backoffMaxDelay + curDelay = time.Duration(0) + ) + for { + leader = h.s.GetMember().GetLeader() + if leader != nil { + return + } + select { + case <-time.After(interval): + curDelay += interval + if curDelay >= maxDelay { + return + } + interval *= 2 + if curDelay+interval > maxDelay { + interval = maxDelay - curDelay + } + case <-r.Context().Done(): + return + case <-h.s.LoopContext().Done(): + return + } + } +} diff --git a/pkg/utils/assertutil/assertutil.go b/pkg/utils/assertutil/assertutil.go index 9eb2719b220..74b831d5472 100644 --- a/pkg/utils/assertutil/assertutil.go +++ b/pkg/utils/assertutil/assertutil.go @@ -19,7 +19,7 @@ import "github.com/stretchr/testify/require" // Checker accepts the injection of check functions and context from test files. // Any check function should be set before usage unless the test will fail. type Checker struct { - IsNil func(obtained interface{}) + IsNil func(obtained any) FailNow func() } @@ -34,14 +34,14 @@ func CheckerWithNilAssert(re *require.Assertions) *Checker { checker.FailNow = func() { re.FailNow("should be nil") } - checker.IsNil = func(obtained interface{}) { + checker.IsNil = func(obtained any) { re.Nil(obtained) } return checker } // AssertNil calls the injected IsNil assertion. -func (c *Checker) AssertNil(obtained interface{}) { +func (c *Checker) AssertNil(obtained any) { if c.IsNil == nil { c.FailNow() return diff --git a/pkg/utils/assertutil/assertutil_test.go b/pkg/utils/assertutil/assertutil_test.go index 84bd21cef05..076cdd2ac93 100644 --- a/pkg/utils/assertutil/assertutil_test.go +++ b/pkg/utils/assertutil/assertutil_test.go @@ -22,7 +22,6 @@ import ( ) func TestNilFail(t *testing.T) { - t.Parallel() re := require.New(t) var failErr error checker := NewChecker() diff --git a/pkg/utils/configutil/configutil.go b/pkg/utils/configutil/configutil.go index 978edce7764..086f74ff842 100644 --- a/pkg/utils/configutil/configutil.go +++ b/pkg/utils/configutil/configutil.go @@ -96,7 +96,7 @@ func PrintConfigCheckMsg(w io.Writer, warningMsgs []string) { } // ConfigFromFile loads config from file. -func ConfigFromFile(c interface{}, path string) (*toml.MetaData, error) { +func ConfigFromFile(c any, path string) (*toml.MetaData, error) { meta, err := toml.DecodeFile(path, c) return &meta, errors.WithStack(err) } @@ -171,3 +171,10 @@ func AdjustPath(p *string) { *p = absPath } } + +// AdjustBool adjusts the value of a bool variable. +func AdjustBool(v *bool, defValue bool) { + if !*v { + *v = defValue + } +} diff --git a/pkg/utils/etcdutil/etcdutil.go b/pkg/utils/etcdutil/etcdutil.go index 7db4d1ea56a..2b89c8b4da6 100644 --- a/pkg/utils/etcdutil/etcdutil.go +++ b/pkg/utils/etcdutil/etcdutil.go @@ -88,7 +88,7 @@ func CheckClusterID(localClusterID types.ID, um types.URLsMap, tlsConfig *tls.Co trp := &http.Transport{ TLSClientConfig: tlsConfig, } - remoteCluster, gerr := etcdserver.GetClusterFromRemotePeers(nil, []string{u}, trp) + remoteCluster, gerr := etcdserver.GetClusterFromRemotePeers(nil, []string{u}, trp, true) trp.CloseIdleConnections() if gerr != nil { // Do not return error, because other members may be not ready. @@ -253,7 +253,7 @@ func newClient(tlsConfig *tls.Config, endpoints ...string) (*clientv3.Client, er } // CreateEtcdClient creates etcd v3 client with detecting endpoints. -func CreateEtcdClient(tlsConfig *tls.Config, acURLs []url.URL) (*clientv3.Client, error) { +func CreateEtcdClient(tlsConfig *tls.Config, acURLs []url.URL, sourceOpt ...string) (*clientv3.Client, error) { urls := make([]string, 0, len(acURLs)) for _, u := range acURLs { urls = append(urls, u.String()) @@ -270,7 +270,11 @@ func CreateEtcdClient(tlsConfig *tls.Config, acURLs []url.URL) (*clientv3.Client failpoint.Inject("closeTick", func() { failpoint.Return(client, err) }) - initHealthChecker(tickerInterval, tlsConfig, client) + source := "default-etcd-client" + if len(sourceOpt) > 0 { + source = sourceOpt[0] + } + initHealthChecker(tickerInterval, tlsConfig, client, source) return client, err } @@ -414,6 +418,8 @@ type LoopWatcher struct { // updateClientCh is used to update the etcd client. // It's only used for testing. updateClientCh chan *clientv3.Client + // watchChTimeoutDuration is the timeout duration for a watchChan. + watchChTimeoutDuration time.Duration } // NewLoopWatcher creates a new LoopWatcher. @@ -444,6 +450,7 @@ func NewLoopWatcher( loadRetryTimes: defaultLoadFromEtcdRetryTimes, loadBatchSize: maxLoadBatchSize, watchChangeRetryInterval: defaultEtcdRetryInterval, + watchChTimeoutDuration: WatchChTimeoutDuration, } } @@ -593,7 +600,7 @@ func (lw *LoopWatcher) watch(ctx context.Context, revision int64) (nextRevision cancel() // If no message comes from an etcd watchChan for WatchChTimeoutDuration, // create a new one and need not to reset lastReceivedResponseTime. - if time.Since(lastReceivedResponseTime) >= WatchChTimeoutDuration { + if time.Since(lastReceivedResponseTime) >= lw.watchChTimeoutDuration { log.Warn("watch channel is blocked for a long time, recreating a new one in watch loop", zap.Duration("timeout", time.Since(lastReceivedResponseTime)), zap.Int64("revision", revision), zap.String("name", lw.name), zap.String("key", lw.key)) diff --git a/pkg/utils/etcdutil/etcdutil_test.go b/pkg/utils/etcdutil/etcdutil_test.go index 55af4c92a2d..c402081fa2f 100644 --- a/pkg/utils/etcdutil/etcdutil_test.go +++ b/pkg/utils/etcdutil/etcdutil_test.go @@ -172,28 +172,48 @@ func TestEtcdClientSync(t *testing.T) { servers, client1, clean := NewTestEtcdCluster(t, 1) defer clean() etcd1, cfg1 := servers[0], servers[0].Config() + defer etcd1.Close() // Add a new member. etcd2 := MustAddEtcdMember(t, &cfg1, client1) defer etcd2.Close() checkMembers(re, client1, []*embed.Etcd{etcd1, etcd2}) - testutil.Eventually(re, func() bool { - // wait for etcd client sync endpoints - return len(client1.Endpoints()) == 2 - }) + // wait for etcd client sync endpoints + checkEtcdEndpointNum(re, client1, 2) + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + // remove one member that is not the one we connected to. + resp, err := ListEtcdMembers(ctx, client1) + re.NoError(err) + + var memIDToRemove uint64 + for _, m := range resp.Members { + if m.ID != resp.Header.MemberId { + memIDToRemove = m.ID + break + } + } - // Remove the first member and close the etcd1. - _, err := RemoveEtcdMember(client1, uint64(etcd1.Server.ID())) + _, err = RemoveEtcdMember(client1, memIDToRemove) re.NoError(err) - etcd1.Close() // Check the client can get the new member with the new endpoints. + checkEtcdEndpointNum(re, client1, 1) + + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/utils/etcdutil/fastTick")) +} + +func checkEtcdEndpointNum(re *require.Assertions, client *clientv3.Client, num int) { testutil.Eventually(re, func() bool { - // wait for etcd client sync endpoints - return len(client1.Endpoints()) == 1 + return len(client.Endpoints()) == num }) +} - re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/utils/etcdutil/fastTick")) +func checkEtcdClientHealth(re *require.Assertions, client *clientv3.Client) { + testutil.Eventually(re, func() bool { + return IsHealthy(context.Background(), client) + }) } func TestEtcdScaleInAndOut(t *testing.T) { @@ -204,10 +224,10 @@ func TestEtcdScaleInAndOut(t *testing.T) { etcd1, cfg1 := servers[0], servers[0].Config() // Create two etcd clients with etcd1 as endpoint. - client1, err := CreateEtcdClient(nil, cfg1.LCUrls) // execute member change operation with this client + client1, err := CreateEtcdClient(nil, cfg1.ListenClientUrls) // execute member change operation with this client re.NoError(err) defer client1.Close() - client2, err := CreateEtcdClient(nil, cfg1.LCUrls) // check member change with this client + client2, err := CreateEtcdClient(nil, cfg1.ListenClientUrls) // check member change with this client re.NoError(err) defer client2.Close() @@ -228,25 +248,21 @@ func TestRandomKillEtcd(t *testing.T) { // Start a etcd server. etcds, client1, clean := NewTestEtcdCluster(t, 3) defer clean() - testutil.Eventually(re, func() bool { - return len(client1.Endpoints()) == 3 - }) + checkEtcdEndpointNum(re, client1, 3) // Randomly kill an etcd server and restart it cfgs := []embed.Config{etcds[0].Config(), etcds[1].Config(), etcds[2].Config()} - for i := 0; i < 10; i++ { + for i := 0; i < len(cfgs)*2; i++ { killIndex := rand.Intn(len(etcds)) etcds[killIndex].Close() - testutil.Eventually(re, func() bool { - return IsHealthy(context.Background(), client1) - }) + checkEtcdEndpointNum(re, client1, 2) + checkEtcdClientHealth(re, client1) etcd, err := embed.StartEtcd(&cfgs[killIndex]) re.NoError(err) <-etcd.Server.ReadyNotify() etcds[killIndex] = etcd - testutil.Eventually(re, func() bool { - return IsHealthy(context.Background(), client1) - }) + checkEtcdEndpointNum(re, client1, 3) + checkEtcdClientHealth(re, client1) } for _, etcd := range etcds { if etcd != nil { @@ -282,9 +298,11 @@ func checkEtcdWithHangLeader(t *testing.T) error { // Create a proxy to etcd1. proxyAddr := tempurl.Alloc() var enableDiscard atomic.Bool - go proxyWithDiscard(re, cfg1.LCUrls[0].String(), proxyAddr, &enableDiscard) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go proxyWithDiscard(ctx, re, cfg1.ListenClientUrls[0].String(), proxyAddr, &enableDiscard) - // Create a etcd client with etcd1 as endpoint. + // Create an etcd client with etcd1 as endpoint. urls, err := types.NewURLs([]string{proxyAddr}) re.NoError(err) client1, err := CreateEtcdClient(nil, urls) @@ -295,39 +313,56 @@ func checkEtcdWithHangLeader(t *testing.T) error { etcd2 := MustAddEtcdMember(t, &cfg1, client1) defer etcd2.Close() checkMembers(re, client1, []*embed.Etcd{etcd1, etcd2}) - time.Sleep(1 * time.Second) // wait for etcd client sync endpoints // Hang the etcd1 and wait for the client to connect to etcd2. enableDiscard.Store(true) - time.Sleep(time.Second) + time.Sleep(3 * time.Second) _, err = EtcdKVGet(client1, "test/key1") return err } -func proxyWithDiscard(re *require.Assertions, server, proxy string, enableDiscard *atomic.Bool) { +func proxyWithDiscard(ctx context.Context, re *require.Assertions, server, proxy string, enableDiscard *atomic.Bool) { server = strings.TrimPrefix(server, "http://") proxy = strings.TrimPrefix(proxy, "http://") l, err := net.Listen("tcp", proxy) re.NoError(err) + defer l.Close() for { - connect, err := l.Accept() - re.NoError(err) - go func(connect net.Conn) { - serverConnect, err := net.Dial("tcp", server) - re.NoError(err) - pipe(connect, serverConnect, enableDiscard) - }(connect) + type accepted struct { + conn net.Conn + err error + } + accept := make(chan accepted, 1) + go func() { + // closed by `l.Close()` + conn, err := l.Accept() + accept <- accepted{conn, err} + }() + + select { + case <-ctx.Done(): + return + case a := <-accept: + if a.err != nil { + return + } + go func(connect net.Conn) { + serverConnect, err := net.DialTimeout("tcp", server, 3*time.Second) + re.NoError(err) + pipe(ctx, connect, serverConnect, enableDiscard) + }(a.conn) + } } } -func pipe(src net.Conn, dst net.Conn, enableDiscard *atomic.Bool) { +func pipe(ctx context.Context, src net.Conn, dst net.Conn, enableDiscard *atomic.Bool) { errChan := make(chan error, 1) go func() { - err := ioCopy(src, dst, enableDiscard) + err := ioCopy(ctx, src, dst, enableDiscard) errChan <- err }() go func() { - err := ioCopy(dst, src, enableDiscard) + err := ioCopy(ctx, dst, src, enableDiscard) errChan <- err }() <-errChan @@ -335,28 +370,32 @@ func pipe(src net.Conn, dst net.Conn, enableDiscard *atomic.Bool) { src.Close() } -func ioCopy(dst io.Writer, src io.Reader, enableDiscard *atomic.Bool) (err error) { +func ioCopy(ctx context.Context, dst io.Writer, src io.Reader, enableDiscard *atomic.Bool) error { buffer := make([]byte, 32*1024) for { - if enableDiscard.Load() { - io.Copy(io.Discard, src) - } - readNum, errRead := src.Read(buffer) - if readNum > 0 { - writeNum, errWrite := dst.Write(buffer[:readNum]) - if errWrite != nil { - return errWrite + select { + case <-ctx.Done(): + return nil + default: + if enableDiscard.Load() { + _, err := io.Copy(io.Discard, src) + return err } - if readNum != writeNum { - return io.ErrShortWrite + readNum, errRead := src.Read(buffer) + if readNum > 0 { + writeNum, errWrite := dst.Write(buffer[:readNum]) + if errWrite != nil { + return errWrite + } + if readNum != writeNum { + return io.ErrShortWrite + } + } + if errRead != nil { + return errRead } - } - if errRead != nil { - err = errRead - break } } - return err } type loopWatcherTestSuite struct { @@ -383,7 +422,7 @@ func (suite *loopWatcherTestSuite) SetupSuite() { suite.config = NewTestSingleConfig() suite.config.Dir = suite.T().TempDir() suite.startEtcd(re) - suite.client, err = CreateEtcdClient(nil, suite.config.LCUrls) + suite.client, err = CreateEtcdClient(nil, suite.config.ListenClientUrls) re.NoError(err) suite.cleans = append(suite.cleans, func() { suite.client.Close() @@ -412,7 +451,7 @@ func (suite *loopWatcherTestSuite) TestLoadNoExistedKey() { cache[string(kv.Key)] = struct{}{} return nil }, - func(kv *mvccpb.KeyValue) error { return nil }, + func(*mvccpb.KeyValue) error { return nil }, func([]*clientv3.Event) error { return nil }, false, /* withPrefix */ ) @@ -426,9 +465,9 @@ func (suite *loopWatcherTestSuite) TestLoadWithLimitChange() { re := suite.Require() re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/utils/etcdutil/meetEtcdError", `return()`)) cache := make(map[string]struct{}) - for i := 0; i < int(maxLoadBatchSize)*2; i++ { + testutil.GenerateTestDataConcurrently(int(maxLoadBatchSize)*2, func(i int) { suite.put(re, fmt.Sprintf("TestLoadWithLimitChange%d", i), "") - } + }) watcher := NewLoopWatcher( suite.ctx, &suite.wg, @@ -440,7 +479,7 @@ func (suite *loopWatcherTestSuite) TestLoadWithLimitChange() { cache[string(kv.Key)] = struct{}{} return nil }, - func(kv *mvccpb.KeyValue) error { return nil }, + func(*mvccpb.KeyValue) error { return nil }, func([]*clientv3.Event) error { return nil }, true, /* withPrefix */ ) @@ -533,7 +572,7 @@ func (suite *loopWatcherTestSuite) TestWatcherLoadLimit() { cache = append(cache, string(kv.Key)) return nil }, - func(kv *mvccpb.KeyValue) error { + func(*mvccpb.KeyValue) error { return nil }, func([]*clientv3.Event) error { @@ -557,9 +596,9 @@ func (suite *loopWatcherTestSuite) TestWatcherLoadLargeKey() { count := 65536 ctx, cancel := context.WithCancel(suite.ctx) defer cancel() - for i := 0; i < count; i++ { + testutil.GenerateTestDataConcurrently(count, func(i int) { suite.put(re, fmt.Sprintf("TestWatcherLoadLargeKey/test-%d", i), "") - } + }) cache := make([]string, 0) watcher := NewLoopWatcher( ctx, @@ -572,7 +611,7 @@ func (suite *loopWatcherTestSuite) TestWatcherLoadLargeKey() { cache = append(cache, string(kv.Key)) return nil }, - func(kv *mvccpb.KeyValue) error { + func(*mvccpb.KeyValue) error { return nil }, func([]*clientv3.Event) error { @@ -615,7 +654,7 @@ func (suite *loopWatcherTestSuite) TestWatcherBreak() { } return nil }, - func(kv *mvccpb.KeyValue) error { return nil }, + func(*mvccpb.KeyValue) error { return nil }, func([]*clientv3.Event) error { return nil }, false, /* withPrefix */ ) @@ -642,7 +681,7 @@ func (suite *loopWatcherTestSuite) TestWatcherBreak() { // Case2: close the etcd client and put a new value after watcher restarts suite.client.Close() - suite.client, err = CreateEtcdClient(nil, suite.config.LCUrls) + suite.client, err = CreateEtcdClient(nil, suite.config.ListenClientUrls) re.NoError(err) watcher.updateClientCh <- suite.client suite.put(re, "TestWatcherBreak", "2") @@ -650,7 +689,7 @@ func (suite *loopWatcherTestSuite) TestWatcherBreak() { // Case3: close the etcd client and put a new value before watcher restarts suite.client.Close() - suite.client, err = CreateEtcdClient(nil, suite.config.LCUrls) + suite.client, err = CreateEtcdClient(nil, suite.config.ListenClientUrls) re.NoError(err) suite.put(re, "TestWatcherBreak", "3") watcher.updateClientCh <- suite.client @@ -658,7 +697,7 @@ func (suite *loopWatcherTestSuite) TestWatcherBreak() { // Case4: close the etcd client and put a new value with compact suite.client.Close() - suite.client, err = CreateEtcdClient(nil, suite.config.LCUrls) + suite.client, err = CreateEtcdClient(nil, suite.config.ListenClientUrls) re.NoError(err) suite.put(re, "TestWatcherBreak", "4") resp, err := EtcdKVGet(suite.client, "TestWatcherBreak") @@ -693,11 +732,12 @@ func (suite *loopWatcherTestSuite) TestWatcherRequestProgress() { "test", "TestWatcherChanBlock", func([]*clientv3.Event) error { return nil }, - func(kv *mvccpb.KeyValue) error { return nil }, - func(kv *mvccpb.KeyValue) error { return nil }, + func(*mvccpb.KeyValue) error { return nil }, + func(*mvccpb.KeyValue) error { return nil }, func([]*clientv3.Event) error { return nil }, false, /* withPrefix */ ) + watcher.watchChTimeoutDuration = 2 * RequestProgressInterval suite.wg.Add(1) go func() { diff --git a/pkg/utils/etcdutil/health_checker.go b/pkg/utils/etcdutil/health_checker.go index 9ba7efa5903..c5ece5a0804 100644 --- a/pkg/utils/etcdutil/health_checker.go +++ b/pkg/utils/etcdutil/health_checker.go @@ -22,6 +22,7 @@ import ( "time" "github.com/pingcap/log" + "github.com/prometheus/client_golang/prometheus" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/pkg/utils/typeutil" @@ -29,34 +30,52 @@ import ( "go.uber.org/zap" ) +const pickedCountThreshold = 3 + // healthyClient will wrap an etcd client and record its last health time. // The etcd client inside will only maintain one connection to the etcd server // to make sure each healthyClient could be used to check the health of a certain // etcd endpoint without involving the load balancer of etcd client. type healthyClient struct { *clientv3.Client - lastHealth time.Time + lastHealth time.Time + healthState prometheus.Gauge + latency prometheus.Observer } // healthChecker is used to check the health of etcd endpoints. Inside the checker, // we will maintain a map from each available etcd endpoint to its healthyClient. type healthChecker struct { + source string tickerInterval time.Duration tlsConfig *tls.Config // Store as endpoint(string) -> *healthyClient healthyClients sync.Map + // evictedEps records the endpoints which are evicted from the last health patrol, + // the value is the count the endpoint being picked continuously after evicted. + // Store as endpoint(string) -> pickedCount(int) + evictedEps sync.Map // client is the etcd client the health checker is guarding, it will be set with // the checked healthy endpoints dynamically and periodically. client *clientv3.Client + + endpointCountState prometheus.Gauge } // initHealthChecker initializes the health checker for etcd client. -func initHealthChecker(tickerInterval time.Duration, tlsConfig *tls.Config, client *clientv3.Client) { +func initHealthChecker( + tickerInterval time.Duration, + tlsConfig *tls.Config, + client *clientv3.Client, + source string, +) { healthChecker := &healthChecker{ - tickerInterval: tickerInterval, - tlsConfig: tlsConfig, - client: client, + source: source, + tickerInterval: tickerInterval, + tlsConfig: tlsConfig, + client: client, + endpointCountState: etcdStateGauge.WithLabelValues(source, endpointLabel), } // A health checker has the same lifetime with the given etcd client. ctx := client.Ctx() @@ -74,7 +93,8 @@ func (checker *healthChecker) syncer(ctx context.Context) { for { select { case <-ctx.Done(): - log.Info("etcd client is closed, exit update endpoint goroutine") + log.Info("etcd client is closed, exit the endpoint syncer goroutine", + zap.String("source", checker.source)) return case <-ticker.C: checker.update() @@ -90,13 +110,13 @@ func (checker *healthChecker) inspector(ctx context.Context) { for { select { case <-ctx.Done(): - log.Info("etcd client is closed, exit health check goroutine") + log.Info("etcd client is closed, exit the health inspector goroutine", + zap.String("source", checker.source)) checker.close() return case <-ticker.C: - lastEps := checker.client.Endpoints() - healthyEps := checker.patrol(ctx) - if len(healthyEps) == 0 { + lastEps, pickedEps, changed := checker.patrol(ctx) + if len(pickedEps) == 0 { // when no endpoint could be used, try to reset endpoints to update connect rather // than delete them to avoid there is no any endpoint in client. // Note: reset endpoints will trigger sub-connection closed, and then trigger reconnection. @@ -104,29 +124,32 @@ func (checker *healthChecker) inspector(ctx context.Context) { // and it cannot recover as soon as possible. if time.Since(lastAvailable) > etcdServerDisconnectedTimeout { log.Info("no available endpoint, try to reset endpoints", - zap.Strings("last-endpoints", lastEps)) - resetClientEndpoints(checker.client, lastEps...) - } - } else { - if !typeutil.AreStringSlicesEquivalent(healthyEps, lastEps) { - oldNum, newNum := len(lastEps), len(healthyEps) - checker.client.SetEndpoints(healthyEps...) - etcdStateGauge.WithLabelValues("endpoints").Set(float64(newNum)) - log.Info("update endpoints", - zap.String("num-change", fmt.Sprintf("%d->%d", oldNum, newNum)), zap.Strings("last-endpoints", lastEps), - zap.Strings("endpoints", checker.client.Endpoints())) + zap.String("source", checker.source)) + resetClientEndpoints(checker.client, lastEps...) } - lastAvailable = time.Now() + continue + } + if changed { + oldNum, newNum := len(lastEps), len(pickedEps) + checker.client.SetEndpoints(pickedEps...) + checker.endpointCountState.Set(float64(newNum)) + log.Info("update endpoints", + zap.String("num-change", fmt.Sprintf("%d->%d", oldNum, newNum)), + zap.Strings("last-endpoints", lastEps), + zap.Strings("endpoints", checker.client.Endpoints()), + zap.String("source", checker.source)) } + lastAvailable = time.Now() } } } func (checker *healthChecker) close() { - checker.healthyClients.Range(func(key, value interface{}) bool { - client := value.(*healthyClient) - client.Close() + checker.healthyClients.Range(func(_, value any) bool { + healthyCli := value.(*healthyClient) + healthyCli.healthState.Set(0) + healthyCli.Client.Close() return true }) } @@ -137,46 +160,196 @@ func resetClientEndpoints(client *clientv3.Client, endpoints ...string) { client.SetEndpoints(endpoints...) } +type healthProbe struct { + ep string + took time.Duration +} + // See https://github.com/etcd-io/etcd/blob/85b640cee793e25f3837c47200089d14a8392dc7/etcdctl/ctlv3/command/ep_command.go#L105-L145 -func (checker *healthChecker) patrol(ctx context.Context) []string { +func (checker *healthChecker) patrol(ctx context.Context) ([]string, []string, bool) { var ( - count = checker.clientCount() - hch = make(chan string, count) - healthyList = make([]string, 0, count) - wg sync.WaitGroup + count = checker.clientCount() + probeCh = make(chan healthProbe, count) + wg sync.WaitGroup ) - checker.healthyClients.Range(func(key, value interface{}) bool { + checker.healthyClients.Range(func(key, value any) bool { wg.Add(1) - go func(key, value interface{}) { + go func(key, value any) { defer wg.Done() defer logutil.LogPanic() var ( - ep = key.(string) - client = value.(*healthyClient) + ep = key.(string) + healthyCli = value.(*healthyClient) + client = healthyCli.Client + healthState = healthyCli.healthState + latency = healthyCli.latency + start = time.Now() ) - if IsHealthy(ctx, client.Client) { - hch <- ep - checker.storeClient(ep, &healthyClient{ - Client: client.Client, - lastHealth: time.Now(), - }) + // Check the health of the endpoint. + healthy := IsHealthy(ctx, client) + took := time.Since(start) + latency.Observe(took.Seconds()) + if !healthy { + healthState.Set(0) + log.Warn("etcd endpoint is unhealthy", + zap.String("endpoint", ep), + zap.Duration("took", took), + zap.String("source", checker.source)) return } + healthState.Set(1) + // If the endpoint is healthy, update its last health time. + checker.storeClient(ep, client, start) + // Send the healthy probe result to the channel. + probeCh <- healthProbe{ep, took} }(key, value) return true }) wg.Wait() - close(hch) - for h := range hch { - healthyList = append(healthyList, h) + close(probeCh) + var ( + lastEps = checker.client.Endpoints() + pickedEps = checker.pickEps(probeCh) + ) + if len(pickedEps) > 0 { + checker.updateEvictedEps(lastEps, pickedEps) + pickedEps = checker.filterEps(pickedEps) + } + return lastEps, pickedEps, !typeutil.AreStringSlicesEquivalent(lastEps, pickedEps) +} + +// Divide the acceptable latency range into several parts, and pick the endpoints which +// are in the first acceptable latency range. Currently, we only take the latency of the +// last health check into consideration, and maybe in the future we could introduce more +// factors to help improving the selection strategy. +func (checker *healthChecker) pickEps(probeCh <-chan healthProbe) []string { + var ( + count = len(probeCh) + pickedEps = make([]string, 0, count) + ) + if count == 0 { + return pickedEps + } + // Consume the `probeCh` to build a reusable slice. + probes := make([]healthProbe, 0, count) + for probe := range probeCh { + probes = append(probes, probe) + } + // Take the default value as an example, if we have 3 endpoints with latency like: + // - A: 175ms + // - B: 50ms + // - C: 2.5s + // the distribution will be like: + // - [0, 1s) -> {A, B} + // - [1s, 2s) + // - [2s, 3s) -> {C} + // - ... + // - [9s, 10s) + // Then the picked endpoints will be {A, B} and if C is in the last used endpoints, it will be evicted later. + factor := int(DefaultRequestTimeout / DefaultSlowRequestTime) + for i := 0; i < factor; i++ { + minLatency, maxLatency := DefaultSlowRequestTime*time.Duration(i), DefaultSlowRequestTime*time.Duration(i+1) + for _, probe := range probes { + if minLatency <= probe.took && probe.took < maxLatency { + log.Debug("pick healthy etcd endpoint within acceptable latency range", + zap.Duration("min-latency", minLatency), + zap.Duration("max-latency", maxLatency), + zap.Duration("took", probe.took), + zap.String("endpoint", probe.ep), + zap.String("source", checker.source)) + pickedEps = append(pickedEps, probe.ep) + } + } + if len(pickedEps) > 0 { + break + } + } + return pickedEps +} + +func (checker *healthChecker) updateEvictedEps(lastEps, pickedEps []string) { + // Create a set of picked endpoints for faster lookup + pickedSet := make(map[string]bool, len(pickedEps)) + for _, ep := range pickedEps { + pickedSet[ep] = true + } + // Reset the count to 0 if it's in `evictedEps` but not in `pickedEps`. + checker.evictedEps.Range(func(key, value any) bool { + ep := key.(string) + count := value.(int) + if count > 0 && !pickedSet[ep] { + checker.evictedEps.Store(ep, 0) + log.Info("reset evicted etcd endpoint picked count", + zap.String("endpoint", ep), + zap.Int("previous-count", count), + zap.String("source", checker.source)) + } + return true + }) + // Find all endpoints which are in `lastEps` and `healthyClients` but not in `pickedEps`, + // and add them to the `evictedEps`. + for _, ep := range lastEps { + if pickedSet[ep] { + continue + } + if hc := checker.loadClient(ep); hc == nil { + continue + } + checker.evictedEps.Store(ep, 0) + log.Info("evicted etcd endpoint found", + zap.String("endpoint", ep), + zap.String("source", checker.source)) + } + // Find all endpoints which are in both `pickedEps` and `evictedEps` to + // increase their picked count. + for _, ep := range pickedEps { + if count, ok := checker.evictedEps.Load(ep); ok { + // Increase the count the endpoint being picked continuously. + checker.evictedEps.Store(ep, count.(int)+1) + log.Info("evicted etcd endpoint picked again", + zap.Int("picked-count-threshold", pickedCountThreshold), + zap.Int("picked-count", count.(int)+1), + zap.String("endpoint", ep), + zap.String("source", checker.source)) + } + } +} + +// Filter out the endpoints that are in evictedEps and have not been continuously picked +// for `pickedCountThreshold` times still, this is to ensure the evicted endpoints truly +// become available before adding them back to the client. +func (checker *healthChecker) filterEps(eps []string) []string { + pickedEps := make([]string, 0, len(eps)) + for _, ep := range eps { + if count, ok := checker.evictedEps.Load(ep); ok { + if count.(int) < pickedCountThreshold { + continue + } + checker.evictedEps.Delete(ep) + log.Info("add evicted etcd endpoint back", + zap.Int("picked-count-threshold", pickedCountThreshold), + zap.Int("picked-count", count.(int)), + zap.String("endpoint", ep), + zap.String("source", checker.source)) + } + pickedEps = append(pickedEps, ep) + } + // If the pickedEps is empty, it means all endpoints are evicted, + // to gain better availability, just use the original picked endpoints. + if len(pickedEps) == 0 { + log.Warn("all etcd endpoints are evicted, use the picked endpoints directly", + zap.Strings("endpoints", eps), + zap.String("source", checker.source)) + return eps } - return healthyList + return pickedEps } func (checker *healthChecker) update() { - eps := syncUrls(checker.client) + eps := checker.syncURLs() if len(eps) == 0 { - log.Warn("no available etcd endpoint returned by etcd cluster") + log.Warn("no available etcd endpoint returned by etcd cluster", + zap.String("source", checker.source)) return } epMap := make(map[string]struct{}, len(eps)) @@ -189,7 +362,7 @@ func (checker *healthChecker) update() { for ep := range epMap { client := checker.loadClient(ep) if client == nil { - checker.addClient(ep, time.Now()) + checker.initClient(ep) continue } since := time.Since(client.lastHealth) @@ -197,7 +370,8 @@ func (checker *healthChecker) update() { if since > etcdServerOfflineTimeout { log.Info("etcd server might be offline, try to remove it", zap.Duration("since-last-health", since), - zap.String("endpoint", ep)) + zap.String("endpoint", ep), + zap.String("source", checker.source)) checker.removeClient(ep) continue } @@ -205,15 +379,18 @@ func (checker *healthChecker) update() { if since > etcdServerDisconnectedTimeout { log.Info("etcd server might be disconnected, try to reconnect", zap.Duration("since-last-health", since), - zap.String("endpoint", ep)) + zap.String("endpoint", ep), + zap.String("source", checker.source)) resetClientEndpoints(client.Client, ep) } } // Clean up the stale clients which are not in the etcd cluster anymore. - checker.healthyClients.Range(func(key, value interface{}) bool { + checker.healthyClients.Range(func(key, _ any) bool { ep := key.(string) if _, ok := epMap[ep]; !ok { - log.Info("remove stale etcd client", zap.String("endpoint", ep)) + log.Info("remove stale etcd client", + zap.String("endpoint", ep), + zap.String("source", checker.source)) checker.removeClient(ep) } return true @@ -222,7 +399,7 @@ func (checker *healthChecker) update() { func (checker *healthChecker) clientCount() int { count := 0 - checker.healthyClients.Range(func(_, _ interface{}) bool { + checker.healthyClients.Range(func(_, _ any) bool { count++ return true }) @@ -236,40 +413,48 @@ func (checker *healthChecker) loadClient(ep string) *healthyClient { return nil } -func (checker *healthChecker) addClient(ep string, lastHealth time.Time) { +func (checker *healthChecker) initClient(ep string) { client, err := newClient(checker.tlsConfig, ep) if err != nil { log.Error("failed to create etcd healthy client", zap.String("endpoint", ep), + zap.String("source", checker.source), zap.Error(err)) return } - checker.healthyClients.Store(ep, &healthyClient{ - Client: client, - lastHealth: lastHealth, - }) + checker.storeClient(ep, client, time.Now()) } -func (checker *healthChecker) storeClient(ep string, client *healthyClient) { - checker.healthyClients.Store(ep, client) +func (checker *healthChecker) storeClient(ep string, client *clientv3.Client, lastHealth time.Time) { + checker.healthyClients.Store(ep, &healthyClient{ + Client: client, + lastHealth: lastHealth, + healthState: etcdStateGauge.WithLabelValues(checker.source, ep), + latency: etcdEndpointLatency.WithLabelValues(checker.source, ep), + }) } func (checker *healthChecker) removeClient(ep string) { if client, ok := checker.healthyClients.LoadAndDelete(ep); ok { - err := client.(*healthyClient).Close() - if err != nil { + healthyCli := client.(*healthyClient) + healthyCli.healthState.Set(0) + if err := healthyCli.Close(); err != nil { log.Error("failed to close etcd healthy client", zap.String("endpoint", ep), + zap.String("source", checker.source), zap.Error(err)) } } + checker.evictedEps.Delete(ep) } // See https://github.com/etcd-io/etcd/blob/85b640cee793e25f3837c47200089d14a8392dc7/clientv3/client.go#L170-L183 -func syncUrls(client *clientv3.Client) (eps []string) { - resp, err := ListEtcdMembers(clientv3.WithRequireLeader(client.Ctx()), client) +func (checker *healthChecker) syncURLs() (eps []string) { + resp, err := ListEtcdMembers(clientv3.WithRequireLeader(checker.client.Ctx()), checker.client) if err != nil { - log.Error("failed to list members", errs.ZapError(err)) + log.Error("failed to list members", + zap.String("source", checker.source), + errs.ZapError(err)) return nil } for _, m := range resp.Members { diff --git a/pkg/utils/etcdutil/health_checker_test.go b/pkg/utils/etcdutil/health_checker_test.go new file mode 100644 index 00000000000..07a8024e63c --- /dev/null +++ b/pkg/utils/etcdutil/health_checker_test.go @@ -0,0 +1,389 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package etcdutil + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +type testCase struct { + healthProbes []healthProbe + expectedEvictedEps map[string]int + expectedPickedEps []string +} + +func check(re *require.Assertions, testCases []*testCase) { + checker := &healthChecker{} + lastEps := []string{} + for idx, tc := range testCases { + // Send the health probes to the channel. + probeCh := make(chan healthProbe, len(tc.healthProbes)) + for _, probe := range tc.healthProbes { + probeCh <- probe + // Mock that all the endpoints are healthy. + checker.healthyClients.LoadOrStore(probe.ep, &healthyClient{}) + } + close(probeCh) + // Pick and filter the endpoints. + pickedEps := checker.pickEps(probeCh) + checker.updateEvictedEps(lastEps, pickedEps) + pickedEps = checker.filterEps(pickedEps) + // Check the evicted states after finishing picking. + count := 0 + checker.evictedEps.Range(func(key, value any) bool { + count++ + ep := key.(string) + times := value.(int) + re.Equal(tc.expectedEvictedEps[ep], times, "case %d ep %s", idx, ep) + return true + }) + re.Len(tc.expectedEvictedEps, count, "case %d", idx) + re.Equal(tc.expectedPickedEps, pickedEps, "case %d", idx) + lastEps = pickedEps + } +} + +// Test the endpoint picking and evicting logic. +func TestPickEps(t *testing.T) { + re := require.New(t) + testCases := []*testCase{ + // {} -> {A, B} + { + []healthProbe{ + { + ep: "A", + took: time.Millisecond, + }, + { + ep: "B", + took: time.Millisecond, + }, + }, + map[string]int{}, + []string{"A", "B"}, + }, + // {A, B} -> {A, B, C} + { + []healthProbe{ + { + ep: "A", + took: time.Millisecond, + }, + { + ep: "B", + took: time.Millisecond, + }, + { + ep: "C", + took: time.Millisecond, + }, + }, + map[string]int{}, + []string{"A", "B", "C"}, + }, + // {A, B, C} -> {A, B, C} + { + []healthProbe{ + { + ep: "A", + took: time.Millisecond, + }, + { + ep: "B", + took: time.Millisecond, + }, + { + ep: "C", + took: time.Millisecond, + }, + }, + map[string]int{}, + []string{"A", "B", "C"}, + }, + // {A, B, C} -> {C} + { + []healthProbe{ + { + ep: "C", + took: time.Millisecond, + }, + }, + map[string]int{"A": 0, "B": 0}, + []string{"C"}, + }, + // {C} -> {A, B, C} + { + []healthProbe{ + { + ep: "A", + took: time.Millisecond, + }, + { + ep: "B", + took: time.Millisecond, + }, + { + ep: "C", + took: time.Millisecond, + }, + }, + map[string]int{"A": 1, "B": 1}, + []string{"C"}, + }, + // {C} -> {B, C} + { + []healthProbe{ + { + ep: "B", + took: time.Millisecond, + }, + { + ep: "C", + took: time.Millisecond, + }, + }, + map[string]int{"A": 0, "B": 2}, + []string{"C"}, + }, + // {C} -> {A, B, C} + { + []healthProbe{ + { + ep: "A", + took: time.Millisecond, + }, + { + ep: "B", + took: time.Millisecond, + }, + { + ep: "C", + took: time.Millisecond, + }, + }, + map[string]int{"A": 1}, + []string{"B", "C"}, + }, + // {B, C} -> {D} + { + []healthProbe{ + { + ep: "D", + took: time.Millisecond, + }, + }, + map[string]int{"A": 0, "B": 0, "C": 0}, + []string{"D"}, + }, + // {D} -> {B, C} + { + []healthProbe{ + { + ep: "B", + took: time.Millisecond, + }, + { + ep: "C", + took: time.Millisecond, + }, + }, + map[string]int{"A": 0, "B": 1, "C": 1, "D": 0}, + []string{"B", "C"}, + }, + // {B, C} -> {A, B, C} + { + []healthProbe{ + { + ep: "A", + took: time.Millisecond, + }, + { + ep: "B", + took: time.Millisecond, + }, + { + ep: "C", + took: time.Millisecond, + }, + }, + map[string]int{"A": 1, "B": 2, "C": 2, "D": 0}, + []string{"A", "B", "C"}, + }, + // {A, B, C} -> {A, C, E} + { + []healthProbe{ + { + ep: "A", + took: time.Millisecond, + }, + { + ep: "C", + took: time.Millisecond, + }, + { + ep: "E", + took: time.Millisecond, + }, + }, + map[string]int{"A": 2, "B": 0, "D": 0}, + []string{"C", "E"}, + }, + } + check(re, testCases) +} + +func TestLatencyPick(t *testing.T) { + re := require.New(t) + testCases := []*testCase{ + // {} -> {A, B} + { + []healthProbe{ + { + ep: "A", + took: time.Millisecond, + }, + { + ep: "B", + took: time.Millisecond, + }, + }, + map[string]int{}, + []string{"A", "B"}, + }, + // {A, B} -> {A, B, C} + { + []healthProbe{ + { + ep: "A", + took: time.Millisecond, + }, + { + ep: "B", + took: time.Millisecond, + }, + { + ep: "C", + took: time.Second, + }, + }, + map[string]int{}, + []string{"A", "B"}, + }, + // {A, B} -> {A, B, C} + { + []healthProbe{ + { + ep: "A", + took: time.Second, + }, + { + ep: "B", + took: time.Second, + }, + { + ep: "C", + took: 2 * time.Second, + }, + }, + map[string]int{}, + []string{"A", "B"}, + }, + // {A, B} -> {A, B, C} + { + []healthProbe{ + { + ep: "A", + took: time.Second, + }, + { + ep: "B", + took: 2 * time.Second, + }, + { + ep: "C", + took: 3 * time.Second, + }, + }, + map[string]int{"B": 0}, + []string{"A"}, + }, + // {A} -> {A, B, C} + { + []healthProbe{ + { + ep: "A", + took: time.Second, + }, + { + ep: "B", + took: time.Second, + }, + { + ep: "C", + took: time.Millisecond, + }, + }, + map[string]int{"A": 0, "B": 0}, + []string{"C"}, + }, + // {C} -> {A, B, C} + { + []healthProbe{ + { + ep: "A", + took: time.Millisecond, + }, + { + ep: "B", + took: time.Millisecond, + }, + { + ep: "C", + took: time.Second, + }, + }, + map[string]int{"A": 1, "B": 1, "C": 0}, + []string{"A", "B"}, + }, + } + check(re, testCases) +} + +func TestUpdateEvictedEpsAfterRemoval(t *testing.T) { + re := require.New(t) + var ( + checker = &healthChecker{} + lastEps = []string{"A", "B", "C"} + pickedEps = []string{"A", "C"} + ) + // All endpoints are healthy. + for _, ep := range lastEps { + checker.healthyClients.Store(ep, &healthyClient{}) + } + checker.updateEvictedEps(lastEps, pickedEps) + // B should be evicted. + _, ok := checker.evictedEps.Load("B") + re.True(ok) + // Remove the endpoint B to mock member removal. + checker.healthyClients.Delete("B") + checker.evictedEps.Delete("B") + checker.updateEvictedEps(lastEps, pickedEps) + // B should not be evicted since it has been removed. + _, ok = checker.evictedEps.Load("B") + re.False(ok) +} diff --git a/pkg/utils/etcdutil/metrics.go b/pkg/utils/etcdutil/metrics.go index f78e0864ba2..5d0eae1607e 100644 --- a/pkg/utils/etcdutil/metrics.go +++ b/pkg/utils/etcdutil/metrics.go @@ -16,14 +16,32 @@ package etcdutil import "github.com/prometheus/client_golang/prometheus" -var etcdStateGauge = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: "pd", - Subsystem: "server", - Name: "etcd_client", - Help: "Etcd client states.", - }, []string{"type"}) +var ( + sourceLabel = "source" + typeLabel = "type" + endpointLabel = "endpoint" +) + +var ( + etcdStateGauge = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "pd", + Subsystem: "server", + Name: "etcd_client", + Help: "Etcd client states.", + }, []string{sourceLabel, typeLabel}) + + etcdEndpointLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "pd", + Subsystem: "server", + Name: "etcd_endpoint_latency_seconds", + Help: "Bucketed histogram of latency of health check.", + Buckets: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + }, []string{sourceLabel, endpointLabel}) +) func init() { prometheus.MustRegister(etcdStateGauge) + prometheus.MustRegister(etcdEndpointLatency) } diff --git a/pkg/utils/etcdutil/testutil.go b/pkg/utils/etcdutil/testutil.go index 57f7200ecb8..13c10260a40 100644 --- a/pkg/utils/etcdutil/testutil.go +++ b/pkg/utils/etcdutil/testutil.go @@ -38,20 +38,20 @@ func NewTestSingleConfig() *embed.Config { cfg.LogOutputs = []string{"stdout"} pu, _ := url.Parse(tempurl.Alloc()) - cfg.LPUrls = []url.URL{*pu} - cfg.APUrls = cfg.LPUrls + cfg.ListenPeerUrls = []url.URL{*pu} + cfg.AdvertisePeerUrls = cfg.ListenPeerUrls cu, _ := url.Parse(tempurl.Alloc()) - cfg.LCUrls = []url.URL{*cu} - cfg.ACUrls = cfg.LCUrls + cfg.ListenClientUrls = []url.URL{*cu} + cfg.AdvertiseClientUrls = cfg.ListenClientUrls cfg.StrictReconfigCheck = false - cfg.InitialCluster = fmt.Sprintf("%s=%s", cfg.Name, &cfg.LPUrls[0]) + cfg.InitialCluster = fmt.Sprintf("%s=%s", cfg.Name, &cfg.ListenPeerUrls[0]) cfg.ClusterState = embed.ClusterStateFlagNew return cfg } func genRandName() string { - return "test_etcd_" + strconv.FormatInt(time.Now().UnixNano()%10000, 10) + return "test_etcd_" + strconv.FormatInt(time.Now().UnixNano(), 10) } // NewTestEtcdCluster is used to create a etcd cluster for the unit test purpose. @@ -63,7 +63,7 @@ func NewTestEtcdCluster(t *testing.T, count int) (servers []*embed.Etcd, etcdCli cfg.Dir = t.TempDir() etcd, err := embed.StartEtcd(cfg) re.NoError(err) - etcdClient, err = CreateEtcdClient(nil, cfg.LCUrls) + etcdClient, err = CreateEtcdClient(nil, cfg.ListenClientUrls) re.NoError(err) <-etcd.Server.ReadyNotify() servers = append(servers, etcd) @@ -101,9 +101,9 @@ func MustAddEtcdMember(t *testing.T, cfg1 *embed.Config, client *clientv3.Client cfg2 := NewTestSingleConfig() cfg2.Dir = t.TempDir() cfg2.Name = genRandName() - cfg2.InitialCluster = cfg1.InitialCluster + fmt.Sprintf(",%s=%s", cfg2.Name, &cfg2.LPUrls[0]) + cfg2.InitialCluster = cfg1.InitialCluster + fmt.Sprintf(",%s=%s", cfg2.Name, &cfg2.ListenPeerUrls[0]) cfg2.ClusterState = embed.ClusterStateFlagExisting - peerURL := cfg2.LPUrls[0].String() + peerURL := cfg2.ListenPeerUrls[0].String() addResp, err := AddEtcdMember(client, []string{peerURL}) re.NoError(err) // Check the client can get the new member. @@ -122,18 +122,27 @@ func MustAddEtcdMember(t *testing.T, cfg1 *embed.Config, client *clientv3.Client func checkMembers(re *require.Assertions, client *clientv3.Client, etcds []*embed.Etcd) { // Check the client can get the new member. - listResp, err := ListEtcdMembers(client.Ctx(), client) - re.NoError(err) - re.Len(listResp.Members, len(etcds)) - inList := func(m *etcdserverpb.Member) bool { - for _, etcd := range etcds { - if m.ID == uint64(etcd.Server.ID()) { - return true + testutil.Eventually(re, func() bool { + listResp, err := ListEtcdMembers(client.Ctx(), client) + if err != nil { + return false + } + if len(etcds) != len(listResp.Members) { + return false + } + inList := func(m *etcdserverpb.Member) bool { + for _, etcd := range etcds { + if m.ID == uint64(etcd.Server.ID()) { + return true + } } + return false } - return false - } - for _, m := range listResp.Members { - re.True(inList(m)) - } + for _, m := range listResp.Members { + if !inList(m) { + return false + } + } + return true + }) } diff --git a/pkg/utils/grpcutil/grpcutil.go b/pkg/utils/grpcutil/grpcutil.go index 1bfb64868f3..5633533ae4a 100644 --- a/pkg/utils/grpcutil/grpcutil.go +++ b/pkg/utils/grpcutil/grpcutil.go @@ -30,8 +30,10 @@ import ( "go.etcd.io/etcd/pkg/transport" "go.uber.org/zap" "google.golang.org/grpc" + "google.golang.org/grpc/backoff" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials" + "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/metadata" ) @@ -138,7 +140,7 @@ func (s TLSConfig) GetOneAllowedCN() (string, error) { // ctx will be noop. Users should call ClientConn.Close to terminate all the // pending operations after this function returns. func GetClientConn(ctx context.Context, addr string, tlsCfg *tls.Config, do ...grpc.DialOption) (*grpc.ClientConn, error) { - opt := grpc.WithInsecure() + opt := grpc.WithTransportCredentials(insecure.NewCredentials()) if tlsCfg != nil { creds := credentials.NewTLS(tlsCfg) opt = grpc.WithTransportCredentials(creds) @@ -147,7 +149,18 @@ func GetClientConn(ctx context.Context, addr string, tlsCfg *tls.Config, do ...g if err != nil { return nil, errs.ErrURLParse.Wrap(err).GenWithStackByCause() } - cc, err := grpc.DialContext(ctx, u.Host, append(do, opt)...) + // Here we use a shorter MaxDelay to make the connection recover faster. + // The default MaxDelay is 120s, which is too long for us. + backoffOpts := grpc.WithConnectParams(grpc.ConnectParams{ + Backoff: backoff.Config{ + BaseDelay: time.Second, + Multiplier: 1.6, + Jitter: 0.2, + MaxDelay: 3 * time.Second, + }, + }) + do = append(do, opt, backoffOpts) + cc, err := grpc.DialContext(ctx, u.Host, do...) if err != nil { return nil, errs.ErrGRPCDial.Wrap(err).GenWithStackByCause() } @@ -156,8 +169,8 @@ func GetClientConn(ctx context.Context, addr string, tlsCfg *tls.Config, do ...g // BuildForwardContext creates a context with receiver metadata information. // It is used in client side. -func BuildForwardContext(ctx context.Context, addr string) context.Context { - md := metadata.Pairs(ForwardMetadataKey, addr) +func BuildForwardContext(ctx context.Context, url string) context.Context { + md := metadata.Pairs(ForwardMetadataKey, url) return metadata.NewOutgoingContext(ctx, md) } @@ -173,13 +186,9 @@ func ResetForwardContext(ctx context.Context) context.Context { // GetForwardedHost returns the forwarded host in metadata. func GetForwardedHost(ctx context.Context) string { - md, ok := metadata.FromIncomingContext(ctx) - if !ok { - log.Debug("failed to get gRPC incoming metadata when getting forwarded host") - return "" - } - if t, ok := md[ForwardMetadataKey]; ok { - return t[0] + s := metadata.ValueFromIncomingContext(ctx, ForwardMetadataKey) + if len(s) > 0 { + return s[0] } return "" } @@ -252,11 +261,11 @@ func CheckStream(ctx context.Context, cancel context.CancelFunc, done chan struc // NeedRebuildConnection checks if the error is a connection error. func NeedRebuildConnection(err error) bool { - return err == io.EOF || + return (err != nil) && (err == io.EOF || strings.Contains(err.Error(), codes.Unavailable.String()) || // Unavailable indicates the service is currently unavailable. This is a most likely a transient condition. strings.Contains(err.Error(), codes.DeadlineExceeded.String()) || // DeadlineExceeded means operation expired before completion. strings.Contains(err.Error(), codes.Internal.String()) || // Internal errors. strings.Contains(err.Error(), codes.Unknown.String()) || // Unknown error. - strings.Contains(err.Error(), codes.ResourceExhausted.String()) // ResourceExhausted is returned when either the client or the server has exhausted their resources. + strings.Contains(err.Error(), codes.ResourceExhausted.String())) // ResourceExhausted is returned when either the client or the server has exhausted their resources. // Besides, we don't need to rebuild the connection if the code is Canceled, which means the client cancelled the request. } diff --git a/pkg/utils/grpcutil/grpcutil_test.go b/pkg/utils/grpcutil/grpcutil_test.go index 21b7e1a4acb..99cbeae6cde 100644 --- a/pkg/utils/grpcutil/grpcutil_test.go +++ b/pkg/utils/grpcutil/grpcutil_test.go @@ -1,6 +1,7 @@ package grpcutil import ( + "context" "os" "os/exec" "path" @@ -9,6 +10,7 @@ import ( "github.com/pingcap/errors" "github.com/stretchr/testify/require" "github.com/tikv/pd/pkg/errs" + "google.golang.org/grpc/metadata" ) var ( @@ -37,7 +39,6 @@ func TestToTLSConfig(t *testing.T) { } }() - t.Parallel() re := require.New(t) tlsConfig := TLSConfig{ KeyPath: path.Join(certPath, "pd-server-key.pem"), @@ -67,3 +68,14 @@ func TestToTLSConfig(t *testing.T) { _, err = tlsConfig.ToTLSConfig() re.True(errors.ErrorEqual(err, errs.ErrCryptoAppendCertsFromPEM)) } + +func BenchmarkGetForwardedHost(b *testing.B) { + // Without forwarded host key + md := metadata.Pairs("test", "example.com") + ctx := metadata.NewIncomingContext(context.Background(), md) + + // Run the GetForwardedHost function b.N times + for i := 0; i < b.N; i++ { + GetForwardedHost(ctx) + } +} diff --git a/pkg/utils/jsonutil/jsonutil.go b/pkg/utils/jsonutil/jsonutil.go index 32549a2698f..141dbb570ce 100644 --- a/pkg/utils/jsonutil/jsonutil.go +++ b/pkg/utils/jsonutil/jsonutil.go @@ -22,8 +22,8 @@ import ( ) // AddKeyValue is used to add a key value pair into `old` -func AddKeyValue(old interface{}, key string, value interface{}) (updated bool, found bool, err error) { - data, err := json.Marshal(map[string]interface{}{key: value}) +func AddKeyValue(old any, key string, value any) (updated bool, found bool, err error) { + data, err := json.Marshal(map[string]any{key: value}) if err != nil { return false, false, err } @@ -31,7 +31,7 @@ func AddKeyValue(old interface{}, key string, value interface{}) (updated bool, } // MergeJSONObject is used to merge a marshaled json object into v -func MergeJSONObject(v interface{}, data []byte) (updated bool, found bool, err error) { +func MergeJSONObject(v any, data []byte) (updated bool, found bool, err error) { old, _ := json.Marshal(v) if err := json.Unmarshal(data, v); err != nil { return false, false, err @@ -40,7 +40,7 @@ func MergeJSONObject(v interface{}, data []byte) (updated bool, found bool, err if !bytes.Equal(old, new) { return true, true, nil } - m := make(map[string]interface{}) + m := make(map[string]any) if err := json.Unmarshal(data, &m); err != nil { return false, false, err } diff --git a/pkg/utils/jsonutil/jsonutil_test.go b/pkg/utils/jsonutil/jsonutil_test.go index a046fbaf70a..1e8c21917ba 100644 --- a/pkg/utils/jsonutil/jsonutil_test.go +++ b/pkg/utils/jsonutil/jsonutil_test.go @@ -31,7 +31,6 @@ type testJSONStructLevel2 struct { } func TestJSONUtil(t *testing.T) { - t.Parallel() re := require.New(t) father := &testJSONStructLevel1{ Name: "father", diff --git a/pkg/utils/keyutil/util_test.go b/pkg/utils/keyutil/util_test.go index 374faa1f797..7bcb0a49c6f 100644 --- a/pkg/utils/keyutil/util_test.go +++ b/pkg/utils/keyutil/util_test.go @@ -21,7 +21,6 @@ import ( ) func TestKeyUtil(t *testing.T) { - t.Parallel() re := require.New(t) startKey := []byte("a") endKey := []byte("b") diff --git a/pkg/utils/logutil/log.go b/pkg/utils/logutil/log.go index 8c0977818fa..ff6ffa7af9a 100644 --- a/pkg/utils/logutil/log.go +++ b/pkg/utils/logutil/log.go @@ -149,7 +149,7 @@ type stringer struct { } // String implement fmt.Stringer -func (s stringer) String() string { +func (stringer) String() string { return "?" } diff --git a/pkg/utils/logutil/log_test.go b/pkg/utils/logutil/log_test.go index fd46acbdda3..650ba62fe9d 100644 --- a/pkg/utils/logutil/log_test.go +++ b/pkg/utils/logutil/log_test.go @@ -23,7 +23,6 @@ import ( ) func TestStringToZapLogLevel(t *testing.T) { - t.Parallel() re := require.New(t) re.Equal(zapcore.FatalLevel, StringToZapLogLevel("fatal")) re.Equal(zapcore.ErrorLevel, StringToZapLogLevel("ERROR")) @@ -35,13 +34,12 @@ func TestStringToZapLogLevel(t *testing.T) { } func TestRedactLog(t *testing.T) { - t.Parallel() re := require.New(t) testCases := []struct { name string - arg interface{} + arg any enableRedactLog bool - expect interface{} + expect any }{ { name: "string arg, enable redact", diff --git a/pkg/utils/metricutil/metricutil_test.go b/pkg/utils/metricutil/metricutil_test.go index b817eb0112d..a5c183abc20 100644 --- a/pkg/utils/metricutil/metricutil_test.go +++ b/pkg/utils/metricutil/metricutil_test.go @@ -23,7 +23,6 @@ import ( ) func TestCamelCaseToSnakeCase(t *testing.T) { - t.Parallel() re := require.New(t) inputs := []struct { name string @@ -56,7 +55,7 @@ func TestCamelCaseToSnakeCase(t *testing.T) { } } -func TestCoverage(t *testing.T) { +func TestCoverage(_ *testing.T) { cfgs := []*MetricConfig{ { PushJob: "j1", diff --git a/pkg/utils/netutil/address_test.go b/pkg/utils/netutil/address_test.go index faa3e2e1d04..127c9a6d0f7 100644 --- a/pkg/utils/netutil/address_test.go +++ b/pkg/utils/netutil/address_test.go @@ -22,7 +22,6 @@ import ( ) func TestResolveLoopBackAddr(t *testing.T) { - t.Parallel() re := require.New(t) nodes := []struct { address string @@ -40,7 +39,6 @@ func TestResolveLoopBackAddr(t *testing.T) { } func TestIsEnableHttps(t *testing.T) { - t.Parallel() re := require.New(t) re.False(IsEnableHTTPS(http.DefaultClient)) httpClient := &http.Client{ diff --git a/pkg/utils/operatorutil/operator_check.go b/pkg/utils/operatorutil/operator_check.go index f6517be29d7..61efd84ef1a 100644 --- a/pkg/utils/operatorutil/operator_check.go +++ b/pkg/utils/operatorutil/operator_check.go @@ -41,7 +41,7 @@ func CheckTransferLeaderFrom(re *require.Assertions, op *operator.Operator, kind func CheckMultiTargetTransferLeader(re *require.Assertions, op *operator.Operator, kind operator.OpKind, sourceID uint64, targetIDs []uint64) { re.NotNil(op) re.Equal(1, op.Len()) - expectedOps := make([]interface{}, 0, len(targetIDs)) + expectedOps := make([]any, 0, len(targetIDs)) for _, targetID := range targetIDs { expectedOps = append(expectedOps, operator.TransferLeader{FromStore: sourceID, ToStore: targetID, ToStores: targetIDs}) } @@ -65,11 +65,27 @@ func trimTransferLeaders(op *operator.Operator) (steps []operator.OpStep, lastLe // CheckTransferPeer checks if the operator is to transfer peer between the specified source and target stores. func CheckTransferPeer(re *require.Assertions, op *operator.Operator, kind operator.OpKind, sourceID, targetID uint64) { re.NotNil(op) + var addLearnerTo, removePeerFrom uint64 steps, _ := trimTransferLeaders(op) - re.Len(steps, 3) - re.Equal(targetID, steps[0].(operator.AddLearner).ToStore) - re.IsType(operator.PromoteLearner{}, steps[1]) - re.Equal(sourceID, steps[2].(operator.RemovePeer).FromStore) + switch len(steps) { + case 3: // without joint consensus + re.IsType(operator.AddLearner{}, steps[0]) + re.IsType(operator.PromoteLearner{}, steps[1]) + re.IsType(operator.RemovePeer{}, steps[2]) + addLearnerTo = steps[0].(operator.AddLearner).ToStore + removePeerFrom = steps[2].(operator.RemovePeer).FromStore + case 4: // with joint consensus + re.IsType(operator.AddLearner{}, steps[0]) + re.IsType(operator.ChangePeerV2Enter{}, steps[1]) + re.IsType(operator.ChangePeerV2Leave{}, steps[2]) + re.IsType(operator.RemovePeer{}, steps[3]) + addLearnerTo = steps[0].(operator.AddLearner).ToStore + removePeerFrom = steps[3].(operator.RemovePeer).FromStore + default: + re.FailNow("unexpected operator steps") + } + re.Equal(sourceID, removePeerFrom) + re.Equal(targetID, addLearnerTo) kind |= operator.OpRegion re.Equal(kind, op.Kind()&kind) } @@ -88,32 +104,36 @@ func CheckTransferLearner(re *require.Assertions, op *operator.Operator, kind op // CheckTransferPeerWithLeaderTransfer checks if the operator is to transfer // peer between the specified source and target stores and it meanwhile // transfers the leader out of source store. +// If targetID is 0, it means the operator is to transfer peer to any store. func CheckTransferPeerWithLeaderTransfer(re *require.Assertions, op *operator.Operator, kind operator.OpKind, sourceID, targetID uint64) { re.NotNil(op) + var addLearnerTo, removePeerFrom uint64 steps, lastLeader := trimTransferLeaders(op) - re.Len(steps, 3) - re.Equal(targetID, steps[0].(operator.AddLearner).ToStore) - re.IsType(operator.PromoteLearner{}, steps[1]) - re.Equal(sourceID, steps[2].(operator.RemovePeer).FromStore) + switch len(steps) { + case 3: // without joint consensus + re.IsType(operator.AddLearner{}, steps[0]) + re.IsType(operator.PromoteLearner{}, steps[1]) + re.IsType(operator.RemovePeer{}, steps[2]) + addLearnerTo = steps[0].(operator.AddLearner).ToStore + removePeerFrom = steps[2].(operator.RemovePeer).FromStore + case 4: // with joint consensus + re.IsType(operator.AddLearner{}, steps[0]) + re.IsType(operator.ChangePeerV2Enter{}, steps[1]) + re.IsType(operator.ChangePeerV2Leave{}, steps[2]) + re.IsType(operator.RemovePeer{}, steps[3]) + addLearnerTo = steps[0].(operator.AddLearner).ToStore + removePeerFrom = steps[3].(operator.RemovePeer).FromStore + default: + re.FailNow("unexpected operator steps") + } re.NotZero(lastLeader) re.NotEqual(sourceID, lastLeader) kind |= operator.OpRegion re.Equal(kind, op.Kind()&kind) -} - -// CheckTransferPeerWithLeaderTransferFrom checks if the operator is to transfer -// peer out of the specified store and it meanwhile transfers the leader out of -// the store. -func CheckTransferPeerWithLeaderTransferFrom(re *require.Assertions, op *operator.Operator, kind operator.OpKind, sourceID uint64) { - re.NotNil(op) - steps, lastLeader := trimTransferLeaders(op) - re.IsType(operator.AddLearner{}, steps[0]) - re.IsType(operator.PromoteLearner{}, steps[1]) - re.Equal(sourceID, steps[2].(operator.RemovePeer).FromStore) - re.NotZero(lastLeader) - re.NotEqual(sourceID, lastLeader) - kind |= operator.OpRegion | operator.OpLeader - re.Equal(kind, op.Kind()&kind) + re.Equal(sourceID, removePeerFrom) + if targetID != 0 { + re.Equal(targetID, addLearnerTo) + } } // CheckAddPeer checks if the operator is to add peer on specified store. diff --git a/pkg/utils/reflectutil/tag.go b/pkg/utils/reflectutil/tag.go index 3671519f96b..1d04d25502b 100644 --- a/pkg/utils/reflectutil/tag.go +++ b/pkg/utils/reflectutil/tag.go @@ -44,7 +44,7 @@ func FindJSONFullTagByChildTag(t reflect.Type, tag string) string { } // FindSameFieldByJSON is used to check whether there is same field between `m` and `v` -func FindSameFieldByJSON(v interface{}, m map[string]interface{}) bool { +func FindSameFieldByJSON(v any, m map[string]any) bool { t := reflect.TypeOf(v).Elem() for i := 0; i < t.NumField(); i++ { jsonTag := t.Field(i).Tag.Get("json") diff --git a/pkg/utils/reflectutil/tag_test.go b/pkg/utils/reflectutil/tag_test.go index d8619898ea5..3e49e093912 100644 --- a/pkg/utils/reflectutil/tag_test.go +++ b/pkg/utils/reflectutil/tag_test.go @@ -35,7 +35,6 @@ type testStruct3 struct { } func TestFindJSONFullTagByChildTag(t *testing.T) { - t.Parallel() re := require.New(t) key := "enable" result := FindJSONFullTagByChildTag(reflect.TypeOf(testStruct1{}), key) @@ -51,21 +50,19 @@ func TestFindJSONFullTagByChildTag(t *testing.T) { } func TestFindSameFieldByJSON(t *testing.T) { - t.Parallel() re := require.New(t) - input := map[string]interface{}{ + input := map[string]any{ "name": "test2", } t2 := testStruct2{} re.True(FindSameFieldByJSON(&t2, input)) - input = map[string]interface{}{ + input = map[string]any{ "enable": "test2", } re.False(FindSameFieldByJSON(&t2, input)) } func TestFindFieldByJSONTag(t *testing.T) { - t.Parallel() re := require.New(t) t1 := testStruct1{} t2 := testStruct2{} diff --git a/pkg/utils/requestutil/context.go b/pkg/utils/requestutil/context.go index 1fdbac08a97..c49b94399d0 100644 --- a/pkg/utils/requestutil/context.go +++ b/pkg/utils/requestutil/context.go @@ -44,7 +44,7 @@ func WithEndTime(parent context.Context, endTime int64) context.Context { return context.WithValue(parent, endTimeKey, endTime) } -// EndTimeFrom returns the value of the excution info key on the ctx +// EndTimeFrom returns the value of the execution info key on the ctx func EndTimeFrom(ctx context.Context) (int64, bool) { info, ok := ctx.Value(endTimeKey).(int64) return info, ok diff --git a/pkg/utils/requestutil/context_test.go b/pkg/utils/requestutil/context_test.go index 298fc1ff8a3..e6bdcd7be46 100644 --- a/pkg/utils/requestutil/context_test.go +++ b/pkg/utils/requestutil/context_test.go @@ -24,7 +24,6 @@ import ( ) func TestRequestInfo(t *testing.T) { - t.Parallel() re := require.New(t) ctx := context.Background() _, ok := RequestInfoFrom(ctx) @@ -53,7 +52,6 @@ func TestRequestInfo(t *testing.T) { } func TestEndTime(t *testing.T) { - t.Parallel() re := require.New(t) ctx := context.Background() _, ok := EndTimeFrom(ctx) diff --git a/pkg/utils/tempurl/check_env_dummy.go b/pkg/utils/tempurl/check_env_dummy.go index 85f527ea6fe..58d889bbfd6 100644 --- a/pkg/utils/tempurl/check_env_dummy.go +++ b/pkg/utils/tempurl/check_env_dummy.go @@ -16,6 +16,6 @@ package tempurl -func environmentCheck(addr string) bool { +func environmentCheck(_ string) bool { return true } diff --git a/pkg/utils/tempurl/tempurl.go b/pkg/utils/tempurl/tempurl.go index 421513ff001..cd5cd498f95 100644 --- a/pkg/utils/tempurl/tempurl.go +++ b/pkg/utils/tempurl/tempurl.go @@ -16,7 +16,10 @@ package tempurl import ( "fmt" + "io" "net" + "net/http" + "os" "time" "github.com/pingcap/log" @@ -29,6 +32,9 @@ var ( testAddrMap = make(map[string]struct{}) ) +// reference: /pd/tools/pd-ut/alloc/server.go +const AllocURLFromUT = "allocURLFromUT" + // Alloc allocates a local URL for testing. func Alloc() string { for i := 0; i < 10; i++ { @@ -42,6 +48,9 @@ func Alloc() string { } func tryAllocTestURL() string { + if url := getFromUT(); url != "" { + return url + } l, err := net.Listen("tcp", "127.0.0.1:0") if err != nil { log.Fatal("listen failed", errs.ZapError(err)) @@ -63,3 +72,26 @@ func tryAllocTestURL() string { testAddrMap[addr] = struct{}{} return addr } + +func getFromUT() string { + addr := os.Getenv(AllocURLFromUT) + if addr == "" { + return "" + } + + req, err := http.NewRequest(http.MethodGet, addr, nil) + if err != nil { + return "" + } + resp, err := http.DefaultClient.Do(req) + if err != nil || resp.StatusCode != http.StatusOK { + return "" + } + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + if err != nil { + return "" + } + url := string(body) + return url +} diff --git a/pkg/utils/testutil/api_check.go b/pkg/utils/testutil/api_check.go index 58934bf08f6..0b714204500 100644 --- a/pkg/utils/testutil/api_check.go +++ b/pkg/utils/testutil/api_check.go @@ -43,7 +43,7 @@ func StatusNotOK(re *require.Assertions) func([]byte, int, http.Header) { } // ExtractJSON is used to check whether given data can be extracted successfully. -func ExtractJSON(re *require.Assertions, data interface{}) func([]byte, int, http.Header) { +func ExtractJSON(re *require.Assertions, data any) func([]byte, int, http.Header) { return func(resp []byte, _ int, _ http.Header) { re.NoError(json.Unmarshal(resp, data), "resp: "+string(resp)) } @@ -56,6 +56,13 @@ func StringContain(re *require.Assertions, sub string) func([]byte, int, http.He } } +// StringNotContain is used to check whether response context doesn't contain given string. +func StringNotContain(re *require.Assertions, sub string) func([]byte, int, http.Header) { + return func(resp []byte, _ int, _ http.Header) { + re.NotContains(string(resp), sub, "resp: "+string(resp)) + } +} + // StringEqual is used to check whether response context equal given string. func StringEqual(re *require.Assertions, str string) func([]byte, int, http.Header) { return func(resp []byte, _ int, _ http.Header) { @@ -78,7 +85,7 @@ func WithoutHeader(re *require.Assertions, key string) func([]byte, int, http.He } // ReadGetJSON is used to do get request and check whether given data can be extracted successfully. -func ReadGetJSON(re *require.Assertions, client *http.Client, url string, data interface{}, checkOpts ...func([]byte, int, http.Header)) error { +func ReadGetJSON(re *require.Assertions, client *http.Client, url string, data any, checkOpts ...func([]byte, int, http.Header)) error { resp, err := apiutil.GetJSON(client, url, nil) if err != nil { return err @@ -88,12 +95,13 @@ func ReadGetJSON(re *require.Assertions, client *http.Client, url string, data i } // ReadGetJSONWithBody is used to do get request with input and check whether given data can be extracted successfully. -func ReadGetJSONWithBody(re *require.Assertions, client *http.Client, url string, input []byte, data interface{}, checkOpts ...func([]byte, int, http.Header)) error { +func ReadGetJSONWithBody(re *require.Assertions, client *http.Client, url string, input []byte, data any, checkOpts ...func([]byte, int, http.Header)) error { resp, err := apiutil.GetJSON(client, url, input) if err != nil { return err } - return checkResp(resp, StatusOK(re), ExtractJSON(re, data)) + checkOpts = append(checkOpts, StatusOK(re), ExtractJSON(re, data)) + return checkResp(resp, checkOpts...) } // CheckPostJSON is used to do post request and do check options. diff --git a/pkg/utils/testutil/leak.go b/pkg/utils/testutil/leak.go index d1329aef0e6..ba2ebb7fcb0 100644 --- a/pkg/utils/testutil/leak.go +++ b/pkg/utils/testutil/leak.go @@ -21,6 +21,7 @@ var LeakOptions = []goleak.Option{ goleak.IgnoreTopFunction("github.com/syndtr/goleveldb/leveldb.(*DB).mpoolDrain"), goleak.IgnoreTopFunction("google.golang.org/grpc.(*ccBalancerWrapper).watcher"), goleak.IgnoreTopFunction("google.golang.org/grpc.(*addrConn).resetTransport"), + goleak.IgnoreTopFunction("google.golang.org/grpc/internal/grpcsync.(*CallbackSerializer).run"), goleak.IgnoreTopFunction("go.etcd.io/etcd/pkg/logutil.(*MergeLogger).outputLoop"), goleak.IgnoreTopFunction("sync.runtime_notifyListWait"), // TODO: remove the below options once we fixed the http connection leak problems diff --git a/pkg/utils/testutil/testutil.go b/pkg/utils/testutil/testutil.go index a48db0bd60f..cef952353bc 100644 --- a/pkg/utils/testutil/testutil.go +++ b/pkg/utils/testutil/testutil.go @@ -16,13 +16,16 @@ package testutil import ( "os" + "runtime" "strings" + "sync" "time" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" "github.com/stretchr/testify/require" "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" ) const ( @@ -77,7 +80,7 @@ func NewRequestHeader(clusterID uint64) *pdpb.RequestHeader { // MustNewGrpcClient must create a new PD grpc client. func MustNewGrpcClient(re *require.Assertions, addr string) pdpb.PDClient { - conn, err := grpc.Dial(strings.TrimPrefix(addr, "http://"), grpc.WithInsecure()) + conn, err := grpc.Dial(strings.TrimPrefix(addr, "http://"), grpc.WithTransportCredentials(insecure.NewCredentials())) re.NoError(err) return pdpb.NewPDClient(conn) } @@ -100,3 +103,24 @@ func InitTempFileLogger(level string) (fname string) { log.ReplaceGlobals(lg, p) return fname } + +// GenerateTestDataConcurrently generates test data concurrently. +func GenerateTestDataConcurrently(count int, f func(int)) { + var wg sync.WaitGroup + tasks := make(chan int, count) + workers := runtime.NumCPU() + for w := 0; w < workers; w++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := range tasks { + f(i) + } + }() + } + for i := 0; i < count; i++ { + tasks <- i + } + close(tasks) + wg.Wait() +} diff --git a/pkg/utils/tsoutil/tso_dispatcher.go b/pkg/utils/tsoutil/tso_dispatcher.go index 6d1ee2ace28..9dfb2515dc1 100644 --- a/pkg/utils/tsoutil/tso_dispatcher.go +++ b/pkg/utils/tsoutil/tso_dispatcher.go @@ -128,7 +128,7 @@ func (s *TSODispatcher) dispatch( case <-dispatcherCtx.Done(): return } - err = s.processRequests(forwardStream, requests[:pendingTSOReqCount], tsoProtoFactory) + err = s.processRequests(forwardStream, requests[:pendingTSOReqCount]) close(done) if err != nil { log.Error("proxy forward tso error", @@ -155,7 +155,7 @@ func (s *TSODispatcher) dispatch( } } -func (s *TSODispatcher) processRequests(forwardStream stream, requests []Request, tsoProtoFactory ProtoFactory) error { +func (s *TSODispatcher) processRequests(forwardStream stream, requests []Request) error { // Merge the requests count := uint32(0) for _, request := range requests { @@ -163,7 +163,7 @@ func (s *TSODispatcher) processRequests(forwardStream stream, requests []Request } start := time.Now() - resp, err := requests[0].process(forwardStream, count, tsoProtoFactory) + resp, err := requests[0].process(forwardStream, count) if err != nil { return err } @@ -184,7 +184,7 @@ func addLogical(logical, count int64, suffixBits uint32) int64 { return logical + count< -ROOT_PATH=../../ - -if [[ $2 -gt 10 ]]; then - integrations_dir=./tests/integrations - integrations_tasks=($(find "$integrations_dir" -mindepth 1 -maxdepth 1 -type d)) - # Currently, we only have 3 integration tests, so we can hardcode the task index. - for t in ${integrations_tasks[@]}; do - if [[ "$t" = "$integrations_dir/client" && "$2" = 11 ]]; then - cd ./client && make ci-test-job && cd .. && cat ./client/covprofile >> covprofile - cd $integrations_dir && make ci-test-job test_name=client - cd $ROOT_PATH && cat $integrations_dir/client/covprofile >> covprofile - break - elif [[ "$t" = "$integrations_dir/tso" && "$2" = 12 ]]; then - cd $integrations_dir && make ci-test-job test_name=tso - cd $ROOT_PATH && cat $integrations_dir/tso/covprofile >> covprofile - break - elif [[ "$t" = "$integrations_dir/mcs" && "$2" = 13 ]]; then - cd $integrations_dir && make ci-test-job test_name=mcs - cd $ROOT_PATH && cat $integrations_dir/mcs/covprofile >> covprofile - break - fi - done -else - # Get package test list. - packages=($(go list ./...)) - dirs=($(find . -iname "*_test.go" -exec dirname {} \; | sort -u | sed -e "s/^\./github.com\/tikv\/pd/")) - tasks=($(comm -12 <(printf "%s\n" "${packages[@]}") <(printf "%s\n" "${dirs[@]}"))) - - weight() { - [[ $1 == "github.com/tikv/pd/server/api" ]] && return 30 - [[ $1 == "github.com/tikv/pd/pkg/schedule" ]] && return 30 - [[ $1 == "github.com/tikv/pd/pkg/core" ]] && return 30 - [[ $1 == "github.com/tikv/pd/tests/server/api" ]] && return 30 - [[ $1 == "github.com/tikv/pd/tools" ]] && return 30 - [[ $1 =~ "pd/tests" ]] && return 5 - return 1 - } - - # Create an associative array to store the weight of each task. - declare -A task_weights - for t in ${tasks[@]}; do - weight $t - task_weights[$t]=$? - done - - # Sort tasks by weight in descending order. - tasks=($(printf "%s\n" "${tasks[@]}" | sort -rn)) - - scores=($(seq "$1" | xargs -I{} echo 0)) - - res=() - for t in ${tasks[@]}; do - min_i=0 - for i in ${!scores[@]}; do - [[ ${scores[i]} -lt ${scores[$min_i]} ]] && min_i=$i - done - scores[$min_i]=$((${scores[$min_i]} + ${task_weights[$t]})) - [[ $(($min_i + 1)) -eq $2 ]] && res+=($t) - done - - CGO_ENABLED=1 go test -timeout=15m -tags deadlock -race -covermode=atomic -coverprofile=covprofile -coverpkg=./... ${res[@]} -fi +ROOT_PATH_COV=$(pwd)/covprofile +# Currently, we only have 3 integration tests, so we can hardcode the task index. +integrations_dir=$(pwd)/tests/integrations + +case $1 in + 1) + # unit tests ignore `tests` + ./bin/pd-ut run --race --ignore tests --coverprofile $ROOT_PATH_COV || exit 1 + ;; + 2) + # unit tests only in `tests` + ./bin/pd-ut run tests --race --coverprofile $ROOT_PATH_COV || exit 1 + ;; + 3) + # tools tests + cd ./tools && make ci-test-job && cat covprofile >> $ROOT_PATH_COV || exit 1 + ;; + 4) + # integration test client + ./bin/pd-ut it run client --race --coverprofile $ROOT_PATH_COV || exit 1 + # client tests + cd ./client && make ci-test-job && cat covprofile >> $ROOT_PATH_COV || exit 1 + ;; + 5) + # integration test tso + ./bin/pd-ut it run tso --race --coverprofile $ROOT_PATH_COV || exit 1 + ;; + 6) + # integration test mcs + ./bin/pd-ut it run mcs --race --coverprofile $ROOT_PATH_COV || exit 1 + ;; +esac diff --git a/scripts/dashboard-version b/scripts/dashboard-version index 6525f437517..9b2a3898256 100644 --- a/scripts/dashboard-version +++ b/scripts/dashboard-version @@ -1,3 +1,3 @@ # This file is updated by running scripts/update-dashboard.sh # Don't edit it manullay -7.6.0-41f7c801 +8.0.0-9768844f diff --git a/server/api/admin.go b/server/api/admin.go index 49fe7cdc567..dd81985b514 100644 --- a/server/api/admin.go +++ b/server/api/admin.go @@ -60,7 +60,7 @@ func (h *adminHandler) DeleteRegionCache(w http.ResponseWriter, r *http.Request) h.rd.JSON(w, http.StatusBadRequest, err.Error()) return } - rc.DropCacheRegion(regionID) + rc.RemoveRegionIfExist(regionID) if h.svr.IsServiceIndependent(utils.SchedulingServiceName) { err = h.DeleteRegionCacheInSchedulingServer(regionID) } @@ -100,7 +100,7 @@ func (h *adminHandler) DeleteRegionStorage(w http.ResponseWriter, r *http.Reques return } // Remove region from cache. - rc.DropCacheRegion(regionID) + rc.RemoveRegionIfExist(regionID) if h.svr.IsServiceIndependent(utils.SchedulingServiceName) { err = h.DeleteRegionCacheInSchedulingServer(regionID) } @@ -116,7 +116,7 @@ func (h *adminHandler) DeleteRegionStorage(w http.ResponseWriter, r *http.Reques func (h *adminHandler) DeleteAllRegionCache(w http.ResponseWriter, r *http.Request) { var err error rc := getCluster(r) - rc.DropCacheAllRegion() + rc.ResetRegionCache() if h.svr.IsServiceIndependent(utils.SchedulingServiceName) { err = h.DeleteRegionCacheInSchedulingServer() } @@ -148,7 +148,7 @@ func (h *adminHandler) SavePersistFile(w http.ResponseWriter, r *http.Request) { h.rd.Text(w, http.StatusOK, "") } -func (h *adminHandler) MarkSnapshotRecovering(w http.ResponseWriter, r *http.Request) { +func (h *adminHandler) MarkSnapshotRecovering(w http.ResponseWriter, _ *http.Request) { if err := h.svr.MarkSnapshotRecovering(); err != nil { _ = h.rd.Text(w, http.StatusInternalServerError, err.Error()) return @@ -179,7 +179,7 @@ func (h *adminHandler) UnmarkSnapshotRecovering(w http.ResponseWriter, r *http.R // RecoverAllocID recover base alloc id // body should be in {"id": "123"} format func (h *adminHandler) RecoverAllocID(w http.ResponseWriter, r *http.Request) { - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { return } diff --git a/server/api/admin_test.go b/server/api/admin_test.go index 050aa9cfb32..f3b3dd64bd3 100644 --- a/server/api/admin_test.go +++ b/server/api/admin_test.go @@ -181,12 +181,12 @@ func (suite *adminTestSuite) TestPersistFile() { func makeTS(offset time.Duration) uint64 { physical := time.Now().Add(offset).UnixNano() / int64(time.Millisecond) - return uint64(physical << 18) + return uint64(physical) << 18 } func (suite *adminTestSuite) TestResetTS() { re := suite.Require() - args := make(map[string]interface{}) + args := make(map[string]any) t1 := makeTS(time.Hour) url := fmt.Sprintf("%s/admin/reset-ts", suite.urlPrefix) args["tso"] = fmt.Sprintf("%d", t1) diff --git a/server/api/cluster.go b/server/api/cluster.go index fcf972d56a7..2c29101dd2d 100644 --- a/server/api/cluster.go +++ b/server/api/cluster.go @@ -38,7 +38,7 @@ func newClusterHandler(svr *server.Server, rd *render.Render) *clusterHandler { // @Produce json // @Success 200 {object} metapb.Cluster // @Router /cluster [get] -func (h *clusterHandler) GetCluster(w http.ResponseWriter, r *http.Request) { +func (h *clusterHandler) GetCluster(w http.ResponseWriter, _ *http.Request) { h.rd.JSON(w, http.StatusOK, h.svr.GetCluster()) } @@ -48,7 +48,7 @@ func (h *clusterHandler) GetCluster(w http.ResponseWriter, r *http.Request) { // @Success 200 {object} cluster.Status // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /cluster/status [get] -func (h *clusterHandler) GetClusterStatus(w http.ResponseWriter, r *http.Request) { +func (h *clusterHandler) GetClusterStatus(w http.ResponseWriter, _ *http.Request) { status, err := h.svr.GetClusterStatus() if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) diff --git a/server/api/config.go b/server/api/config.go index 6037de650a0..c8233f8d5eb 100644 --- a/server/api/config.go +++ b/server/api/config.go @@ -83,7 +83,7 @@ func (h *confHandler) GetConfig(w http.ResponseWriter, r *http.Request) { // @Success 200 {object} config.Config // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/default [get] -func (h *confHandler) GetDefaultConfig(w http.ResponseWriter, r *http.Request) { +func (h *confHandler) GetDefaultConfig(w http.ResponseWriter, _ *http.Request) { config := config.NewConfig() err := config.Adjust(nil, false) if err != nil { @@ -113,7 +113,7 @@ func (h *confHandler) SetConfig(w http.ResponseWriter, r *http.Request) { return } - conf := make(map[string]interface{}) + conf := make(map[string]any) if err := json.Unmarshal(data, &conf); err != nil { h.rd.JSON(w, http.StatusBadRequest, err.Error()) return @@ -157,7 +157,7 @@ func (h *confHandler) SetConfig(w http.ResponseWriter, r *http.Request) { h.rd.JSON(w, http.StatusOK, "The config is updated.") } -func (h *confHandler) updateConfig(cfg *config.Config, key string, value interface{}) error { +func (h *confHandler) updateConfig(cfg *config.Config, key string, value any) error { kp := strings.Split(key, ".") switch kp[0] { case "schedule": @@ -187,7 +187,7 @@ func (h *confHandler) updateConfig(cfg *config.Config, key string, value interfa return errors.Errorf("config prefix %s not found", kp[0]) } -func (h *confHandler) updateKeyspaceConfig(config *config.Config, key string, value interface{}) error { +func (h *confHandler) updateKeyspaceConfig(config *config.Config, key string, value any) error { updated, found, err := jsonutil.AddKeyValue(&config.Keyspace, key, value) if err != nil { return err @@ -203,7 +203,7 @@ func (h *confHandler) updateKeyspaceConfig(config *config.Config, key string, va return err } -func (h *confHandler) updateMicroServiceConfig(config *config.Config, key string, value interface{}) error { +func (h *confHandler) updateMicroServiceConfig(config *config.Config, key string, value any) error { updated, found, err := jsonutil.AddKeyValue(&config.MicroService, key, value) if err != nil { return err @@ -219,7 +219,7 @@ func (h *confHandler) updateMicroServiceConfig(config *config.Config, key string return err } -func (h *confHandler) updateSchedule(config *config.Config, key string, value interface{}) error { +func (h *confHandler) updateSchedule(config *config.Config, key string, value any) error { updated, found, err := jsonutil.AddKeyValue(&config.Schedule, key, value) if err != nil { return err @@ -235,7 +235,7 @@ func (h *confHandler) updateSchedule(config *config.Config, key string, value in return err } -func (h *confHandler) updateReplication(config *config.Config, key string, value interface{}) error { +func (h *confHandler) updateReplication(config *config.Config, key string, value any) error { updated, found, err := jsonutil.AddKeyValue(&config.Replication, key, value) if err != nil { return err @@ -251,8 +251,8 @@ func (h *confHandler) updateReplication(config *config.Config, key string, value return err } -func (h *confHandler) updateReplicationModeConfig(config *config.Config, key []string, value interface{}) error { - cfg := make(map[string]interface{}) +func (h *confHandler) updateReplicationModeConfig(config *config.Config, key []string, value any) error { + cfg := make(map[string]any) cfg = getConfigMap(cfg, key, value) data, err := json.Marshal(cfg) if err != nil { @@ -273,7 +273,7 @@ func (h *confHandler) updateReplicationModeConfig(config *config.Config, key []s return err } -func (h *confHandler) updatePDServerConfig(config *config.Config, key string, value interface{}) error { +func (h *confHandler) updatePDServerConfig(config *config.Config, key string, value any) error { updated, found, err := jsonutil.AddKeyValue(&config.PDServerCfg, key, value) if err != nil { return err @@ -289,7 +289,7 @@ func (h *confHandler) updatePDServerConfig(config *config.Config, key string, va return err } -func (h *confHandler) updateLogLevel(kp []string, value interface{}) error { +func (h *confHandler) updateLogLevel(kp []string, value any) error { if len(kp) != 2 || kp[1] != "level" { return errors.Errorf("only support changing log level") } @@ -304,7 +304,7 @@ func (h *confHandler) updateLogLevel(kp []string, value interface{}) error { return errors.Errorf("input value %v is illegal", value) } -func (h *confHandler) updateClusterVersion(value interface{}) error { +func (h *confHandler) updateClusterVersion(value any) error { if version, ok := value.(string); ok { err := h.svr.SetClusterVersion(version) if err != nil { @@ -315,13 +315,13 @@ func (h *confHandler) updateClusterVersion(value interface{}) error { return errors.Errorf("input value %v is illegal", value) } -func getConfigMap(cfg map[string]interface{}, key []string, value interface{}) map[string]interface{} { +func getConfigMap(cfg map[string]any, key []string, value any) map[string]any { if len(key) == 1 { cfg[key[0]] = value return cfg } - subConfig := make(map[string]interface{}) + subConfig := make(map[string]any) cfg[key[0]] = getConfigMap(subConfig, key[1:], value) return cfg } @@ -366,7 +366,7 @@ func (h *confHandler) SetScheduleConfig(w http.ResponseWriter, r *http.Request) return } - conf := make(map[string]interface{}) + conf := make(map[string]any) if err := json.Unmarshal(data, &conf); err != nil { h.rd.JSON(w, http.StatusBadRequest, err.Error()) return @@ -447,7 +447,7 @@ func (h *confHandler) SetReplicationConfig(w http.ResponseWriter, r *http.Reques // @Produce json // @Success 200 {object} config.LabelPropertyConfig // @Router /config/label-property [get] -func (h *confHandler) GetLabelPropertyConfig(w http.ResponseWriter, r *http.Request) { +func (h *confHandler) GetLabelPropertyConfig(w http.ResponseWriter, _ *http.Request) { h.rd.JSON(w, http.StatusOK, h.svr.GetLabelProperty()) } @@ -487,7 +487,7 @@ func (h *confHandler) SetLabelPropertyConfig(w http.ResponseWriter, r *http.Requ // @Produce json // @Success 200 {object} semver.Version // @Router /config/cluster-version [get] -func (h *confHandler) GetClusterVersion(w http.ResponseWriter, r *http.Request) { +func (h *confHandler) GetClusterVersion(w http.ResponseWriter, _ *http.Request) { h.rd.JSON(w, http.StatusOK, h.svr.GetClusterVersion()) } @@ -524,7 +524,7 @@ func (h *confHandler) SetClusterVersion(w http.ResponseWriter, r *http.Request) // @Produce json // @Success 200 {object} config.ReplicationModeConfig // @Router /config/replication-mode [get] -func (h *confHandler) GetReplicationModeConfig(w http.ResponseWriter, r *http.Request) { +func (h *confHandler) GetReplicationModeConfig(w http.ResponseWriter, _ *http.Request) { h.rd.JSON(w, http.StatusOK, h.svr.GetReplicationModeConfig()) } @@ -554,7 +554,7 @@ func (h *confHandler) SetReplicationModeConfig(w http.ResponseWriter, r *http.Re // @Produce json // @Success 200 {object} config.PDServerConfig // @Router /config/pd-server [get] -func (h *confHandler) GetPDServerConfig(w http.ResponseWriter, r *http.Request) { +func (h *confHandler) GetPDServerConfig(w http.ResponseWriter, _ *http.Request) { h.rd.JSON(w, http.StatusOK, h.svr.GetPDServerConfig()) } diff --git a/server/api/diagnostic_test.go b/server/api/diagnostic_test.go index 8ba77f1267d..8c4089a8710 100644 --- a/server/api/diagnostic_test.go +++ b/server/api/diagnostic_test.go @@ -36,7 +36,7 @@ type diagnosticTestSuite struct { cleanup tu.CleanupFunc urlPrefix string configPrefix string - schedulerPrifex string + schedulerPrefix string } func TestDiagnosticTestSuite(t *testing.T) { @@ -50,7 +50,7 @@ func (suite *diagnosticTestSuite) SetupSuite() { addr := suite.svr.GetAddr() suite.urlPrefix = fmt.Sprintf("%s%s/api/v1/schedulers/diagnostic", addr, apiPrefix) - suite.schedulerPrifex = fmt.Sprintf("%s%s/api/v1/schedulers", addr, apiPrefix) + suite.schedulerPrefix = fmt.Sprintf("%s%s/api/v1/schedulers", addr, apiPrefix) suite.configPrefix = fmt.Sprintf("%s%s/api/v1/config", addr, apiPrefix) mustBootstrapCluster(re, suite.svr) @@ -84,7 +84,7 @@ func (suite *diagnosticTestSuite) TestSchedulerDiagnosticAPI() { re.NoError(tu.ReadGetJSON(re, testDialClient, addr, cfg)) re.True(cfg.Schedule.EnableDiagnostic) - ms := map[string]interface{}{ + ms := map[string]any{ "enable-diagnostic": "true", "max-replicas": 1, } @@ -104,33 +104,35 @@ func (suite *diagnosticTestSuite) TestSchedulerDiagnosticAPI() { evictLeaderURL := suite.urlPrefix + "/" + schedulers.EvictLeaderName re.NoError(tu.CheckGetJSON(testDialClient, evictLeaderURL, nil, tu.StatusNotOK(re))) - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = schedulers.BalanceRegionName body, err := json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, suite.schedulerPrifex, body, tu.StatusOK(re)) + err = tu.CheckPostJSON(testDialClient, suite.schedulerPrefix, body, tu.StatusOK(re)) re.NoError(err) suite.checkStatus("pending", balanceRegionURL) - input = make(map[string]interface{}) + input = make(map[string]any) input["delay"] = 30 pauseArgs, err := json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, suite.schedulerPrifex+"/"+schedulers.BalanceRegionName, pauseArgs, tu.StatusOK(re)) + err = tu.CheckPostJSON(testDialClient, suite.schedulerPrefix+"/"+schedulers.BalanceRegionName, pauseArgs, tu.StatusOK(re)) re.NoError(err) suite.checkStatus("paused", balanceRegionURL) input["delay"] = 0 pauseArgs, err = json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, suite.schedulerPrifex+"/"+schedulers.BalanceRegionName, pauseArgs, tu.StatusOK(re)) + err = tu.CheckPostJSON(testDialClient, suite.schedulerPrefix+"/"+schedulers.BalanceRegionName, pauseArgs, tu.StatusOK(re)) re.NoError(err) suite.checkStatus("pending", balanceRegionURL) + fmt.Println("before put region") mustPutRegion(re, suite.svr, 1000, 1, []byte("a"), []byte("b"), core.SetApproximateSize(60)) + fmt.Println("after put region") suite.checkStatus("normal", balanceRegionURL) - deleteURL := fmt.Sprintf("%s/%s", suite.schedulerPrifex, schedulers.BalanceRegionName) + deleteURL := fmt.Sprintf("%s/%s", suite.schedulerPrefix, schedulers.BalanceRegionName) err = tu.CheckDelete(testDialClient, deleteURL, tu.StatusOK(re)) re.NoError(err) suite.checkStatus("disabled", balanceRegionURL) diff --git a/server/api/health.go b/server/api/health.go index fbbc4a3672f..93f95e8745d 100644 --- a/server/api/health.go +++ b/server/api/health.go @@ -48,7 +48,7 @@ func newHealthHandler(svr *server.Server, rd *render.Render) *healthHandler { // @Success 200 {array} Health // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /health [get] -func (h *healthHandler) GetHealthStatus(w http.ResponseWriter, r *http.Request) { +func (h *healthHandler) GetHealthStatus(w http.ResponseWriter, _ *http.Request) { client := h.svr.GetClient() members, err := cluster.GetMembers(client) if err != nil { @@ -75,4 +75,4 @@ func (h *healthHandler) GetHealthStatus(w http.ResponseWriter, r *http.Request) // @Summary Ping PD servers. // @Router /ping [get] -func (h *healthHandler) Ping(w http.ResponseWriter, r *http.Request) {} +func (*healthHandler) Ping(http.ResponseWriter, *http.Request) {} diff --git a/server/api/health_test.go b/server/api/health_test.go index 6d2caec12cd..89a9627bc37 100644 --- a/server/api/health_test.go +++ b/server/api/health_test.go @@ -26,7 +26,7 @@ import ( ) func checkSliceResponse(re *require.Assertions, body []byte, cfgs []*config.Config, unhealthy string) { - got := []Health{} + var got []Health re.NoError(json.Unmarshal(body, &got)) re.Len(cfgs, len(got)) diff --git a/server/api/hot_status.go b/server/api/hot_status.go index f352e21254b..e4dbf96b904 100644 --- a/server/api/hot_status.go +++ b/server/api/hot_status.go @@ -101,7 +101,7 @@ func (h *hotStatusHandler) getHotRegions(typ utils.RWType, w http.ResponseWriter // @Success 200 {object} handler.HotStoreStats // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /hotspot/stores [get] -func (h *hotStatusHandler) GetHotStores(w http.ResponseWriter, r *http.Request) { +func (h *hotStatusHandler) GetHotStores(w http.ResponseWriter, _ *http.Request) { stats, err := h.Handler.GetHotStores() if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) diff --git a/server/api/label.go b/server/api/label.go index b7f279d86cc..ead6b30ae26 100644 --- a/server/api/label.go +++ b/server/api/label.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/metapb" "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/response" "github.com/tikv/pd/server" "github.com/unrolled/render" ) @@ -65,7 +66,7 @@ func (h *labelsHandler) GetLabels(w http.ResponseWriter, r *http.Request) { // @Param name query string true "name of store label filter" // @Param value query string true "value of store label filter" // @Produce json -// @Success 200 {object} StoresInfo +// @Success 200 {object} response.StoresInfo // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /labels/stores [get] func (h *labelsHandler) GetStoresByLabel(w http.ResponseWriter, r *http.Request) { @@ -79,8 +80,8 @@ func (h *labelsHandler) GetStoresByLabel(w http.ResponseWriter, r *http.Request) } stores := rc.GetMetaStores() - storesInfo := &StoresInfo{ - Stores: make([]*StoreInfo, 0, len(stores)), + storesInfo := &response.StoresInfo{ + Stores: make([]*response.StoreInfo, 0, len(stores)), } stores = filter.filter(stores) @@ -92,7 +93,7 @@ func (h *labelsHandler) GetStoresByLabel(w http.ResponseWriter, r *http.Request) return } - storeInfo := newStoreInfo(h.svr.GetScheduleConfig(), store) + storeInfo := response.BuildStoreInfo(h.svr.GetScheduleConfig(), store) storesInfo.Stores = append(storesInfo.Stores, storeInfo) } storesInfo.Count = len(storesInfo.Stores) diff --git a/server/api/label_test.go b/server/api/label_test.go index 13474f89f22..a8599273d5c 100644 --- a/server/api/label_test.go +++ b/server/api/label_test.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/stretchr/testify/suite" + "github.com/tikv/pd/pkg/response" tu "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/server" "github.com/tikv/pd/server/config" @@ -178,7 +179,7 @@ func (suite *labelsStoreTestSuite) TestStoresLabelFilter() { } for _, testCase := range testCases { url := fmt.Sprintf("%s/labels/stores?name=%s&value=%s", suite.urlPrefix, testCase.name, testCase.value) - info := new(StoresInfo) + info := new(response.StoresInfo) err := tu.ReadGetJSON(re, testDialClient, url, info) re.NoError(err) checkStoresInfo(re, info.Stores, testCase.want) diff --git a/server/api/member.go b/server/api/member.go index df8c0aee622..10b7a06e121 100644 --- a/server/api/member.go +++ b/server/api/member.go @@ -51,7 +51,7 @@ func newMemberHandler(svr *server.Server, rd *render.Render) *memberHandler { // @Success 200 {object} pdpb.GetMembersResponse // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /members [get] -func (h *memberHandler) GetMembers(w http.ResponseWriter, r *http.Request) { +func (h *memberHandler) GetMembers(w http.ResponseWriter, _ *http.Request) { members, err := getMembers(h.svr) if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) @@ -238,7 +238,7 @@ func (h *memberHandler) SetMemberPropertyByName(w http.ResponseWriter, r *http.R return } - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { return } @@ -276,7 +276,7 @@ func newLeaderHandler(svr *server.Server, rd *render.Render) *leaderHandler { // @Produce json // @Success 200 {object} pdpb.Member // @Router /leader [get] -func (h *leaderHandler) GetLeader(w http.ResponseWriter, r *http.Request) { +func (h *leaderHandler) GetLeader(w http.ResponseWriter, _ *http.Request) { h.rd.JSON(w, http.StatusOK, h.svr.GetLeader()) } @@ -286,7 +286,7 @@ func (h *leaderHandler) GetLeader(w http.ResponseWriter, r *http.Request) { // @Success 200 {string} string "The resign command is submitted." // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /leader/resign [post] -func (h *leaderHandler) ResignLeader(w http.ResponseWriter, r *http.Request) { +func (h *leaderHandler) ResignLeader(w http.ResponseWriter, _ *http.Request) { err := h.svr.GetMember().ResignEtcdLeader(h.svr.Context(), h.svr.Name(), "") if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) diff --git a/server/api/member_test.go b/server/api/member_test.go index 65c0ff67360..d3318081e62 100644 --- a/server/api/member_test.go +++ b/server/api/member_test.go @@ -67,7 +67,7 @@ func relaxEqualStings(re *require.Assertions, a, b []string) { re.Equal(sortedStringB, sortedStringA) } -func (suite *memberTestSuite) checkListResponse(re *require.Assertions, body []byte, cfgs []*config.Config) { +func checkListResponse(re *require.Assertions, body []byte, cfgs []*config.Config) { got := make(map[string][]*pdpb.Member) json.Unmarshal(body, &got) re.Len(cfgs, len(got["members"])) @@ -92,7 +92,7 @@ func (suite *memberTestSuite) TestMemberList() { buf, err := io.ReadAll(resp.Body) re.NoError(err) resp.Body.Close() - suite.checkListResponse(re, buf, suite.cfgs) + checkListResponse(re, buf, suite.cfgs) } } @@ -158,26 +158,7 @@ func (suite *memberTestSuite) changeLeaderPeerUrls(leader *pdpb.Member, id uint6 resp.Body.Close() } -type resignTestSuite struct { - suite.Suite - cfgs []*config.Config - servers []*server.Server - clean testutil.CleanupFunc -} - -func TestResignTestSuite(t *testing.T) { - suite.Run(t, new(resignTestSuite)) -} - -func (suite *resignTestSuite) SetupSuite() { - suite.cfgs, suite.servers, suite.clean = mustNewCluster(suite.Require(), 1) -} - -func (suite *resignTestSuite) TearDownSuite() { - suite.clean() -} - -func (suite *resignTestSuite) TestResignMyself() { +func (suite *memberTestSuite) TestResignMyself() { re := suite.Require() addr := suite.cfgs[0].ClientUrls + apiPrefix + "/api/v1/leader/resign" resp, err := testDialClient.Post(addr, "", nil) diff --git a/server/api/operator.go b/server/api/operator.go index 049a343d3bd..c2529240804 100644 --- a/server/api/operator.go +++ b/server/api/operator.go @@ -101,6 +101,21 @@ func (h *operatorHandler) GetOperators(w http.ResponseWriter, r *http.Request) { } } +// @Tags operator +// @Summary Cancel all pending operators. +// @Produce json +// @Success 200 {string} string "All pending operators are canceled." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /operators [delete] +func (h *operatorHandler) DeleteOperators(w http.ResponseWriter, _ *http.Request) { + if err := h.RemoveOperators(); err != nil { + h.r.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + + h.r.JSON(w, http.StatusOK, "All pending operators are canceled.") +} + // FIXME: details of input json body params // @Tags operator // @Summary Create an operator. @@ -112,7 +127,7 @@ func (h *operatorHandler) GetOperators(w http.ResponseWriter, r *http.Request) { // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /operators [post] func (h *operatorHandler) CreateOperator(w http.ResponseWriter, r *http.Request) { - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(h.r, w, r.Body, &input); err != nil { return } diff --git a/server/api/plugin_disable.go b/server/api/plugin_disable.go index 2676dbb91e2..289a140a4d6 100644 --- a/server/api/plugin_disable.go +++ b/server/api/plugin_disable.go @@ -26,16 +26,16 @@ import ( type pluginHandler struct{} -func newPluginHandler(_ *server.Handler, _ *render.Render) *pluginHandler { +func newPluginHandler(*server.Handler, *render.Render) *pluginHandler { return &pluginHandler{} } -func (h *pluginHandler) LoadPlugin(w http.ResponseWriter, r *http.Request) { +func (*pluginHandler) LoadPlugin(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusNotImplemented) w.Write([]byte("load plugin is disabled, please `PLUGIN=1 $(MAKE) pd-server` first")) } -func (h *pluginHandler) UnloadPlugin(w http.ResponseWriter, r *http.Request) { +func (*pluginHandler) UnloadPlugin(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusNotImplemented) w.Write([]byte("unload plugin is disabled, please `PLUGIN=1 $(MAKE) pd-server` first")) } diff --git a/server/api/pprof.go b/server/api/pprof.go index 900c48f8368..bd41a34a6f0 100644 --- a/server/api/pprof.go +++ b/server/api/pprof.go @@ -148,63 +148,63 @@ func (h *pprofHandler) PProfZip(w http.ResponseWriter, r *http.Request) { // @Tags debug // @Summary debug profile of PD servers. // @Router /debug/pprof/profile [get] -func (h *pprofHandler) PProfProfile(w http.ResponseWriter, r *http.Request) { +func (*pprofHandler) PProfProfile(w http.ResponseWriter, r *http.Request) { pp.Profile(w, r) } // @Tags debug // @Summary debug trace of PD servers. // @Router /debug/pprof/trace [get] -func (h *pprofHandler) PProfTrace(w http.ResponseWriter, r *http.Request) { +func (*pprofHandler) PProfTrace(w http.ResponseWriter, r *http.Request) { pp.Trace(w, r) } // @Tags debug // @Summary debug symbol of PD servers. // @Router /debug/pprof/symbol [get] -func (h *pprofHandler) PProfSymbol(w http.ResponseWriter, r *http.Request) { +func (*pprofHandler) PProfSymbol(w http.ResponseWriter, r *http.Request) { pp.Symbol(w, r) } // @Tags debug // @Summary debug heap of PD servers. // @Router /debug/pprof/heap [get] -func (h *pprofHandler) PProfHeap(w http.ResponseWriter, r *http.Request) { +func (*pprofHandler) PProfHeap(w http.ResponseWriter, r *http.Request) { pp.Handler("heap").ServeHTTP(w, r) } // @Tags debug // @Summary debug mutex of PD servers. // @Router /debug/pprof/mutex [get] -func (h *pprofHandler) PProfMutex(w http.ResponseWriter, r *http.Request) { +func (*pprofHandler) PProfMutex(w http.ResponseWriter, r *http.Request) { pp.Handler("mutex").ServeHTTP(w, r) } // @Tags debug // @Summary debug allocs of PD servers. // @Router /debug/pprof/allocs [get] -func (h *pprofHandler) PProfAllocs(w http.ResponseWriter, r *http.Request) { +func (*pprofHandler) PProfAllocs(w http.ResponseWriter, r *http.Request) { pp.Handler("allocs").ServeHTTP(w, r) } // @Tags debug // @Summary debug block of PD servers. // @Router /debug/pprof/block [get] -func (h *pprofHandler) PProfBlock(w http.ResponseWriter, r *http.Request) { +func (*pprofHandler) PProfBlock(w http.ResponseWriter, r *http.Request) { pp.Handler("block").ServeHTTP(w, r) } // @Tags debug // @Summary debug goroutine of PD servers. // @Router /debug/pprof/goroutine [get] -func (h *pprofHandler) PProfGoroutine(w http.ResponseWriter, r *http.Request) { +func (*pprofHandler) PProfGoroutine(w http.ResponseWriter, r *http.Request) { pp.Handler("goroutine").ServeHTTP(w, r) } // @Tags debug // @Summary debug threadcreate of PD servers. // @Router /debug/pprof/threadcreate [get] -func (h *pprofHandler) PProfThreadcreate(w http.ResponseWriter, r *http.Request) { +func (*pprofHandler) PProfThreadcreate(w http.ResponseWriter, r *http.Request) { pp.Handler("threadcreate").ServeHTTP(w, r) } diff --git a/server/api/pprof_test.go b/server/api/pprof_test.go index a1acd84dcb6..b43feeab108 100644 --- a/server/api/pprof_test.go +++ b/server/api/pprof_test.go @@ -53,7 +53,7 @@ func (suite *profTestSuite) TearDownSuite() { func (suite *profTestSuite) TestGetZip() { re := suite.Require() - rsp, err := testDialClient.Get(suite.urlPrefix + "/pprof/zip?" + "seconds=5s") + rsp, err := testDialClient.Get(suite.urlPrefix + "/pprof/zip?" + "seconds=5") re.NoError(err) defer rsp.Body.Close() body, err := io.ReadAll(rsp.Body) diff --git a/server/api/region.go b/server/api/region.go index 740a2c84dde..c6bc3d9e699 100644 --- a/server/api/region.go +++ b/server/api/region.go @@ -16,8 +16,6 @@ package api import ( "container/heap" - "context" - "encoding/hex" "fmt" "net/http" "net/url" @@ -26,14 +24,11 @@ import ( "strings" "github.com/gorilla/mux" - jwriter "github.com/mailru/easyjson/jwriter" "github.com/pingcap/errors" - "github.com/pingcap/kvproto/pkg/metapb" - "github.com/pingcap/kvproto/pkg/pdpb" - "github.com/pingcap/kvproto/pkg/replication_modepb" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/keyspace" + "github.com/tikv/pd/pkg/response" "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/pkg/utils/typeutil" @@ -41,190 +36,6 @@ import ( "github.com/unrolled/render" ) -// MetaPeer is api compatible with *metapb.Peer. -// NOTE: This type is exported by HTTP API. Please pay more attention when modifying it. -type MetaPeer struct { - *metapb.Peer - // RoleName is `Role.String()`. - // Since Role is serialized as int by json by default, - // introducing it will make the output of pd-ctl easier to identify Role. - RoleName string `json:"role_name"` - // IsLearner is `Role == "Learner"`. - // Since IsLearner was changed to Role in kvproto in 5.0, this field was introduced to ensure api compatibility. - IsLearner bool `json:"is_learner,omitempty"` -} - -func (m *MetaPeer) setDefaultIfNil() { - if m.Peer == nil { - m.Peer = &metapb.Peer{ - Id: m.GetId(), - StoreId: m.GetStoreId(), - Role: m.GetRole(), - IsWitness: m.GetIsWitness(), - } - } -} - -// PDPeerStats is api compatible with *pdpb.PeerStats. -// NOTE: This type is exported by HTTP API. Please pay more attention when modifying it. -type PDPeerStats struct { - *pdpb.PeerStats - Peer MetaPeer `json:"peer"` -} - -func (s *PDPeerStats) setDefaultIfNil() { - if s.PeerStats == nil { - s.PeerStats = &pdpb.PeerStats{ - Peer: s.GetPeer(), - DownSeconds: s.GetDownSeconds(), - } - } - s.Peer.setDefaultIfNil() -} - -func fromPeer(peer *metapb.Peer) MetaPeer { - if peer == nil { - return MetaPeer{} - } - return MetaPeer{ - Peer: peer, - RoleName: peer.GetRole().String(), - IsLearner: core.IsLearner(peer), - } -} - -func fromPeerSlice(peers []*metapb.Peer) []MetaPeer { - if peers == nil { - return nil - } - slice := make([]MetaPeer, len(peers)) - for i, peer := range peers { - slice[i] = fromPeer(peer) - } - return slice -} - -func fromPeerStats(peer *pdpb.PeerStats) PDPeerStats { - return PDPeerStats{ - PeerStats: peer, - Peer: fromPeer(peer.Peer), - } -} - -func fromPeerStatsSlice(peers []*pdpb.PeerStats) []PDPeerStats { - if peers == nil { - return nil - } - slice := make([]PDPeerStats, len(peers)) - for i, peer := range peers { - slice[i] = fromPeerStats(peer) - } - return slice -} - -// RegionInfo records detail region info for api usage. -// NOTE: This type is exported by HTTP API. Please pay more attention when modifying it. -// easyjson:json -type RegionInfo struct { - ID uint64 `json:"id"` - StartKey string `json:"start_key"` - EndKey string `json:"end_key"` - RegionEpoch *metapb.RegionEpoch `json:"epoch,omitempty"` - Peers []MetaPeer `json:"peers,omitempty"` - - Leader MetaPeer `json:"leader,omitempty"` - DownPeers []PDPeerStats `json:"down_peers,omitempty"` - PendingPeers []MetaPeer `json:"pending_peers,omitempty"` - CPUUsage uint64 `json:"cpu_usage"` - WrittenBytes uint64 `json:"written_bytes"` - ReadBytes uint64 `json:"read_bytes"` - WrittenKeys uint64 `json:"written_keys"` - ReadKeys uint64 `json:"read_keys"` - ApproximateSize int64 `json:"approximate_size"` - ApproximateKeys int64 `json:"approximate_keys"` - Buckets []string `json:"buckets,omitempty"` - - ReplicationStatus *ReplicationStatus `json:"replication_status,omitempty"` -} - -// ReplicationStatus represents the replication mode status of the region. -// NOTE: This type is exported by HTTP API. Please pay more attention when modifying it. -type ReplicationStatus struct { - State string `json:"state"` - StateID uint64 `json:"state_id"` -} - -func fromPBReplicationStatus(s *replication_modepb.RegionReplicationStatus) *ReplicationStatus { - if s == nil { - return nil - } - return &ReplicationStatus{ - State: s.GetState().String(), - StateID: s.GetStateId(), - } -} - -// NewAPIRegionInfo create a new API RegionInfo. -func NewAPIRegionInfo(r *core.RegionInfo) *RegionInfo { - return InitRegion(r, &RegionInfo{}) -} - -// InitRegion init a new API RegionInfo from the core.RegionInfo. -func InitRegion(r *core.RegionInfo, s *RegionInfo) *RegionInfo { - if r == nil { - return nil - } - - s.ID = r.GetID() - s.StartKey = core.HexRegionKeyStr(r.GetStartKey()) - s.EndKey = core.HexRegionKeyStr(r.GetEndKey()) - s.RegionEpoch = r.GetRegionEpoch() - s.Peers = fromPeerSlice(r.GetPeers()) - s.Leader = fromPeer(r.GetLeader()) - s.DownPeers = fromPeerStatsSlice(r.GetDownPeers()) - s.PendingPeers = fromPeerSlice(r.GetPendingPeers()) - s.CPUUsage = r.GetCPUUsage() - s.WrittenBytes = r.GetBytesWritten() - s.WrittenKeys = r.GetKeysWritten() - s.ReadBytes = r.GetBytesRead() - s.ReadKeys = r.GetKeysRead() - s.ApproximateSize = r.GetApproximateSize() - s.ApproximateKeys = r.GetApproximateKeys() - s.ReplicationStatus = fromPBReplicationStatus(r.GetReplicationStatus()) - s.Buckets = nil - - keys := r.GetBuckets().GetKeys() - if len(keys) > 0 { - s.Buckets = make([]string, len(keys)) - for i, key := range keys { - s.Buckets[i] = core.HexRegionKeyStr(key) - } - } - return s -} - -// Adjust is only used in testing, in order to compare the data from json deserialization. -func (r *RegionInfo) Adjust() { - for _, peer := range r.DownPeers { - // Since api.PDPeerStats uses the api.MetaPeer type variable Peer to overwrite PeerStats.Peer, - // it needs to be restored after deserialization to be completely consistent with the original. - peer.PeerStats.Peer = peer.Peer.Peer - } -} - -// RegionsInfo contains some regions with the detailed region info. -type RegionsInfo struct { - Count int `json:"count"` - Regions []RegionInfo `json:"regions"` -} - -// Adjust is only used in testing, in order to compare the data from json deserialization. -func (s *RegionsInfo) Adjust() { - for _, r := range s.Regions { - r.Adjust() - } -} - type regionHandler struct { svr *server.Server rd *render.Render @@ -241,7 +52,7 @@ func newRegionHandler(svr *server.Server, rd *render.Render) *regionHandler { // @Summary Search for a region by region ID. // @Param id path integer true "Region Id" // @Produce json -// @Success 200 {object} RegionInfo +// @Success 200 {object} response.RegionInfo // @Failure 400 {string} string "The input is invalid." // @Router /region/id/{id} [get] func (h *regionHandler) GetRegionByID(w http.ResponseWriter, r *http.Request) { @@ -256,7 +67,7 @@ func (h *regionHandler) GetRegionByID(w http.ResponseWriter, r *http.Request) { } regionInfo := rc.GetRegion(regionID) - b, err := marshalRegionInfoJSON(r.Context(), regionInfo) + b, err := response.MarshalRegionInfoJSON(r.Context(), regionInfo) if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return @@ -269,30 +80,26 @@ func (h *regionHandler) GetRegionByID(w http.ResponseWriter, r *http.Request) { // @Summary Search for a region by a key. GetRegion is named to be consistent with gRPC // @Param key path string true "Region key" // @Produce json -// @Success 200 {object} RegionInfo +// @Success 200 {object} response.RegionInfo // @Router /region/key/{key} [get] func (h *regionHandler) GetRegion(w http.ResponseWriter, r *http.Request) { rc := getCluster(r) vars := mux.Vars(r) - key := vars["key"] - key, err := url.QueryUnescape(key) + key, err := url.QueryUnescape(vars["key"]) if err != nil { h.rd.JSON(w, http.StatusBadRequest, err.Error()) return } // decode hex if query has params with hex format - formatStr := r.URL.Query().Get("format") - if formatStr == "hex" { - keyBytes, err := hex.DecodeString(key) - if err != nil { - h.rd.JSON(w, http.StatusBadRequest, err.Error()) - return - } - key = string(keyBytes) + paramsByte := [][]byte{[]byte(key)} + paramsByte, err = apiutil.ParseHexKeys(r.URL.Query().Get("format"), paramsByte) + if err != nil { + h.rd.JSON(w, http.StatusBadRequest, err.Error()) + return } - regionInfo := rc.GetRegionByKey([]byte(key)) - b, err := marshalRegionInfoJSON(r.Context(), regionInfo) + regionInfo := rc.GetRegionByKey(paramsByte[0]) + b, err := response.MarshalRegionInfoJSON(r.Context(), regionInfo) if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return @@ -335,81 +142,15 @@ func newRegionsHandler(svr *server.Server, rd *render.Render) *regionsHandler { } } -// marshalRegionInfoJSON marshals region to bytes in `RegionInfo`'s JSON format. -// It is used to reduce the cost of JSON serialization. -func marshalRegionInfoJSON(ctx context.Context, r *core.RegionInfo) ([]byte, error) { - out := &jwriter.Writer{} - - region := &RegionInfo{} - select { - case <-ctx.Done(): - // Return early, avoid the unnecessary computation. - // See more details in https://github.com/tikv/pd/issues/6835 - return nil, ctx.Err() - default: - } - - covertAPIRegionInfo(r, region, out) - return out.Buffer.BuildBytes(), out.Error -} - -// marshalRegionsInfoJSON marshals regions to bytes in `RegionsInfo`'s JSON format. -// It is used to reduce the cost of JSON serialization. -func marshalRegionsInfoJSON(ctx context.Context, regions []*core.RegionInfo) ([]byte, error) { - out := &jwriter.Writer{} - out.RawByte('{') - - out.RawString("\"count\":") - out.Int(len(regions)) - - out.RawString(",\"regions\":") - out.RawByte('[') - region := &RegionInfo{} - for i, r := range regions { - select { - case <-ctx.Done(): - // Return early, avoid the unnecessary computation. - // See more details in https://github.com/tikv/pd/issues/6835 - return nil, ctx.Err() - default: - } - if i > 0 { - out.RawByte(',') - } - covertAPIRegionInfo(r, region, out) - } - out.RawByte(']') - - out.RawByte('}') - return out.Buffer.BuildBytes(), out.Error -} - -func covertAPIRegionInfo(r *core.RegionInfo, region *RegionInfo, out *jwriter.Writer) { - InitRegion(r, region) - // EasyJSON will not check anonymous struct pointer field and will panic if the field is nil. - // So we need to set the field to default value explicitly when the anonymous struct pointer is nil. - region.Leader.setDefaultIfNil() - for i := range region.Peers { - region.Peers[i].setDefaultIfNil() - } - for i := range region.PendingPeers { - region.PendingPeers[i].setDefaultIfNil() - } - for i := range region.DownPeers { - region.DownPeers[i].setDefaultIfNil() - } - region.MarshalEasyJSON(out) -} - // @Tags region // @Summary List all regions in the cluster. // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Router /regions [get] func (h *regionsHandler) GetRegions(w http.ResponseWriter, r *http.Request) { rc := getCluster(r) regions := rc.GetRegions() - b, err := marshalRegionsInfoJSON(r.Context(), regions) + b, err := response.MarshalRegionsInfoJSON(r.Context(), regions) if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return @@ -423,21 +164,27 @@ func (h *regionsHandler) GetRegions(w http.ResponseWriter, r *http.Request) { // @Param endkey query string true "Region range end key" // @Param limit query integer false "Limit count" default(16) // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 400 {string} string "The input is invalid." // @Router /regions/key [get] func (h *regionsHandler) ScanRegions(w http.ResponseWriter, r *http.Request) { rc := getCluster(r) - startKey := r.URL.Query().Get("key") - endKey := r.URL.Query().Get("end_key") - limit, err := h.AdjustLimit(r.URL.Query().Get("limit")) + query := r.URL.Query() + paramsByte := [][]byte{[]byte(query.Get("key")), []byte(query.Get("end_key"))} + paramsByte, err := apiutil.ParseHexKeys(query.Get("format"), paramsByte) if err != nil { h.rd.JSON(w, http.StatusBadRequest, err.Error()) return } - regions := rc.ScanRegions([]byte(startKey), []byte(endKey), limit) - b, err := marshalRegionsInfoJSON(r.Context(), regions) + limit, err := h.AdjustLimit(query.Get("limit")) + if err != nil { + h.rd.JSON(w, http.StatusBadRequest, err.Error()) + return + } + + regions := rc.ScanRegions(paramsByte[0], paramsByte[1], limit) + b, err := response.MarshalRegionsInfoJSON(r.Context(), regions) if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return @@ -448,19 +195,19 @@ func (h *regionsHandler) ScanRegions(w http.ResponseWriter, r *http.Request) { // @Tags region // @Summary Get count of regions. // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Router /regions/count [get] func (h *regionsHandler) GetRegionCount(w http.ResponseWriter, r *http.Request) { rc := getCluster(r) count := rc.GetTotalRegionCount() - h.rd.JSON(w, http.StatusOK, &RegionsInfo{Count: count}) + h.rd.JSON(w, http.StatusOK, &response.RegionsInfo{Count: count}) } // @Tags region // @Summary List all regions of a specific store. // @Param id path integer true "Store Id" // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 400 {string} string "The input is invalid." // @Router /regions/store/{id} [get] func (h *regionsHandler) GetStoreRegions(w http.ResponseWriter, r *http.Request) { @@ -472,8 +219,18 @@ func (h *regionsHandler) GetStoreRegions(w http.ResponseWriter, r *http.Request) h.rd.JSON(w, http.StatusBadRequest, err.Error()) return } - regions := rc.GetStoreRegions(uint64(id)) - b, err := marshalRegionsInfoJSON(r.Context(), regions) + // get type from query + typ := r.URL.Query().Get("type") + if len(typ) == 0 { + typ = string(core.AllInSubTree) + } + + regions, err := rc.GetStoreRegionsByTypeInSubTree(uint64(id), core.SubTreeRegionType(typ)) + if err != nil { + h.rd.JSON(w, http.StatusBadRequest, err.Error()) + return + } + b, err := response.MarshalRegionsInfoJSON(r.Context(), regions) if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return @@ -486,7 +243,7 @@ func (h *regionsHandler) GetStoreRegions(w http.ResponseWriter, r *http.Request) // @Param keyspace_id query string true "Keyspace ID" // @Param limit query integer false "Limit count" default(16) // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 400 {string} string "The input is invalid." // @Router /regions/keyspace/id/{id} [get] func (h *regionsHandler) GetKeyspaceRegions(w http.ResponseWriter, r *http.Request) { @@ -521,7 +278,7 @@ func (h *regionsHandler) GetKeyspaceRegions(w http.ResponseWriter, r *http.Reque txnRegion := rc.ScanRegions(regionBound.TxnLeftBound, regionBound.TxnRightBound, limit-len(regions)) regions = append(regions, txnRegion...) } - b, err := marshalRegionsInfoJSON(r.Context(), regions) + b, err := response.MarshalRegionsInfoJSON(r.Context(), regions) if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return @@ -532,7 +289,7 @@ func (h *regionsHandler) GetKeyspaceRegions(w http.ResponseWriter, r *http.Reque // @Tags region // @Summary List all regions that miss peer. // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/miss-peer [get] func (h *regionsHandler) GetMissPeerRegions(w http.ResponseWriter, r *http.Request) { @@ -550,7 +307,7 @@ func (h *regionsHandler) getRegionsByType( h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } - b, err := marshalRegionsInfoJSON(r.Context(), regions) + b, err := response.MarshalRegionsInfoJSON(r.Context(), regions) if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return @@ -561,7 +318,7 @@ func (h *regionsHandler) getRegionsByType( // @Tags region // @Summary List all regions that has extra peer. // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/extra-peer [get] func (h *regionsHandler) GetExtraPeerRegions(w http.ResponseWriter, r *http.Request) { @@ -571,7 +328,7 @@ func (h *regionsHandler) GetExtraPeerRegions(w http.ResponseWriter, r *http.Requ // @Tags region // @Summary List all regions that has pending peer. // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/pending-peer [get] func (h *regionsHandler) GetPendingPeerRegions(w http.ResponseWriter, r *http.Request) { @@ -581,7 +338,7 @@ func (h *regionsHandler) GetPendingPeerRegions(w http.ResponseWriter, r *http.Re // @Tags region // @Summary List all regions that has down peer. // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/down-peer [get] func (h *regionsHandler) GetDownPeerRegions(w http.ResponseWriter, r *http.Request) { @@ -591,7 +348,7 @@ func (h *regionsHandler) GetDownPeerRegions(w http.ResponseWriter, r *http.Reque // @Tags region // @Summary List all regions that has learner peer. // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/learner-peer [get] func (h *regionsHandler) GetLearnerPeerRegions(w http.ResponseWriter, r *http.Request) { @@ -601,7 +358,7 @@ func (h *regionsHandler) GetLearnerPeerRegions(w http.ResponseWriter, r *http.Re // @Tags region // @Summary List all regions that has offline peer. // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/offline-peer [get] func (h *regionsHandler) GetOfflinePeerRegions(w http.ResponseWriter, r *http.Request) { @@ -611,7 +368,7 @@ func (h *regionsHandler) GetOfflinePeerRegions(w http.ResponseWriter, r *http.Re // @Tags region // @Summary List all regions that are oversized. // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/oversized-region [get] func (h *regionsHandler) GetOverSizedRegions(w http.ResponseWriter, r *http.Request) { @@ -621,7 +378,7 @@ func (h *regionsHandler) GetOverSizedRegions(w http.ResponseWriter, r *http.Requ // @Tags region // @Summary List all regions that are undersized. // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/undersized-region [get] func (h *regionsHandler) GetUndersizedRegions(w http.ResponseWriter, r *http.Request) { @@ -631,7 +388,7 @@ func (h *regionsHandler) GetUndersizedRegions(w http.ResponseWriter, r *http.Req // @Tags region // @Summary List all empty regions. // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/empty-region [get] func (h *regionsHandler) GetEmptyRegions(w http.ResponseWriter, r *http.Request) { @@ -742,7 +499,7 @@ func calHist(bound int, list *[]int64) *[]*histItem { } // @Tags region -// @Summary List all range holes whitout any region info. +// @Summary List all range holes without any region info. // @Produce json // @Success 200 {object} [][]string // @Router /regions/range-holes [get] @@ -755,7 +512,7 @@ func (h *regionsHandler) GetRangeHoles(w http.ResponseWriter, r *http.Request) { // @Summary List sibling regions of a specific region. // @Param id path integer true "Region Id" // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 400 {string} string "The input is invalid." // @Failure 404 {string} string "The region does not exist." // @Router /regions/sibling/{id} [get] @@ -775,7 +532,7 @@ func (h *regionsHandler) GetRegionSiblings(w http.ResponseWriter, r *http.Reques } left, right := rc.GetAdjacentRegions(region) - b, err := marshalRegionsInfoJSON(r.Context(), []*core.RegionInfo{left, right}) + b, err := response.MarshalRegionsInfoJSON(r.Context(), []*core.RegionInfo{left, right}) if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return @@ -792,29 +549,55 @@ const ( // @Summary List regions with the highest write flow. // @Param limit query integer false "Limit count" default(16) // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 400 {string} string "The input is invalid." // @Router /regions/writeflow [get] func (h *regionsHandler) GetTopWriteFlowRegions(w http.ResponseWriter, r *http.Request) { h.GetTopNRegions(w, r, func(a, b *core.RegionInfo) bool { return a.GetBytesWritten() < b.GetBytesWritten() }) } +// @Tags region +// @Summary List regions with the highest write flow. +// @Param limit query integer false "Limit count" default(16) +// @Produce json +// @Success 200 {object} response.RegionsInfo +// @Failure 400 {string} string "The input is invalid." +// @Router /regions/writequery [get] +func (h *regionsHandler) GetTopWriteQueryRegions(w http.ResponseWriter, r *http.Request) { + h.GetTopNRegions(w, r, func(a, b *core.RegionInfo) bool { + return a.GetWriteQueryNum() < b.GetWriteQueryNum() + }) +} + // @Tags region // @Summary List regions with the highest read flow. // @Param limit query integer false "Limit count" default(16) // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 400 {string} string "The input is invalid." // @Router /regions/readflow [get] func (h *regionsHandler) GetTopReadFlowRegions(w http.ResponseWriter, r *http.Request) { h.GetTopNRegions(w, r, func(a, b *core.RegionInfo) bool { return a.GetBytesRead() < b.GetBytesRead() }) } +// @Tags region +// @Summary List regions with the highest write flow. +// @Param limit query integer false "Limit count" default(16) +// @Produce json +// @Success 200 {object} response.RegionsInfo +// @Failure 400 {string} string "The input is invalid." +// @Router /regions/readquery [get] +func (h *regionsHandler) GetTopReadQueryRegions(w http.ResponseWriter, r *http.Request) { + h.GetTopNRegions(w, r, func(a, b *core.RegionInfo) bool { + return a.GetReadQueryNum() < b.GetReadQueryNum() + }) +} + // @Tags region // @Summary List regions with the largest conf version. // @Param limit query integer false "Limit count" default(16) // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 400 {string} string "The input is invalid." // @Router /regions/confver [get] func (h *regionsHandler) GetTopConfVerRegions(w http.ResponseWriter, r *http.Request) { @@ -827,7 +610,7 @@ func (h *regionsHandler) GetTopConfVerRegions(w http.ResponseWriter, r *http.Req // @Summary List regions with the largest version. // @Param limit query integer false "Limit count" default(16) // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 400 {string} string "The input is invalid." // @Router /regions/version [get] func (h *regionsHandler) GetTopVersionRegions(w http.ResponseWriter, r *http.Request) { @@ -840,7 +623,7 @@ func (h *regionsHandler) GetTopVersionRegions(w http.ResponseWriter, r *http.Req // @Summary List regions with the largest size. // @Param limit query integer false "Limit count" default(16) // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 400 {string} string "The input is invalid." // @Router /regions/size [get] func (h *regionsHandler) GetTopSizeRegions(w http.ResponseWriter, r *http.Request) { @@ -853,7 +636,7 @@ func (h *regionsHandler) GetTopSizeRegions(w http.ResponseWriter, r *http.Reques // @Summary List regions with the largest keys. // @Param limit query integer false "Limit count" default(16) // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 400 {string} string "The input is invalid." // @Router /regions/keys [get] func (h *regionsHandler) GetTopKeysRegions(w http.ResponseWriter, r *http.Request) { @@ -866,7 +649,7 @@ func (h *regionsHandler) GetTopKeysRegions(w http.ResponseWriter, r *http.Reques // @Summary List regions with the highest CPU usage. // @Param limit query integer false "Limit count" default(16) // @Produce json -// @Success 200 {object} RegionsInfo +// @Success 200 {object} response.RegionsInfo // @Failure 400 {string} string "The input is invalid." // @Router /regions/cpu [get] func (h *regionsHandler) GetTopCPURegions(w http.ResponseWriter, r *http.Request) { @@ -885,7 +668,7 @@ func (h *regionsHandler) GetTopCPURegions(w http.ResponseWriter, r *http.Request // @Failure 400 {string} string "The input is invalid." // @Router /regions/accelerate-schedule [post] func (h *regionsHandler) AccelerateRegionsScheduleInRange(w http.ResponseWriter, r *http.Request) { - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { return } @@ -920,7 +703,7 @@ func (h *regionsHandler) AccelerateRegionsScheduleInRange(w http.ResponseWriter, // @Failure 400 {string} string "The input is invalid." // @Router /regions/accelerate-schedule/batch [post] func (h *regionsHandler) AccelerateRegionsScheduleInRanges(w http.ResponseWriter, r *http.Request) { - var input []map[string]interface{} + var input []map[string]any if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { return } @@ -965,7 +748,7 @@ func (h *regionsHandler) GetTopNRegions(w http.ResponseWriter, r *http.Request, return } regions := TopNRegions(rc.GetRegions(), less, limit) - b, err := marshalRegionsInfoJSON(r.Context(), regions) + b, err := response.MarshalRegionsInfoJSON(r.Context(), regions) if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return @@ -982,7 +765,7 @@ func (h *regionsHandler) GetTopNRegions(w http.ResponseWriter, r *http.Request, // @Failure 400 {string} string "The input is invalid." // @Router /regions/scatter [post] func (h *regionsHandler) ScatterRegions(w http.ResponseWriter, r *http.Request) { - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { return } @@ -1002,7 +785,7 @@ func (h *regionsHandler) ScatterRegions(w http.ResponseWriter, r *http.Request) if !ok { return 0, nil, errors.New("regions_id is invalid") } - return h.ScatterRegionsByID(ids, group, retryLimit, false) + return h.ScatterRegionsByID(ids, group, retryLimit) }() if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) @@ -1021,7 +804,7 @@ func (h *regionsHandler) ScatterRegions(w http.ResponseWriter, r *http.Request) // @Failure 400 {string} string "The input is invalid." // @Router /regions/split [post] func (h *regionsHandler) SplitRegions(w http.ResponseWriter, r *http.Request) { - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { return } @@ -1030,7 +813,7 @@ func (h *regionsHandler) SplitRegions(w http.ResponseWriter, r *http.Request) { h.rd.JSON(w, http.StatusBadRequest, "split_keys should be provided.") return } - rawSplitKeys := s.([]interface{}) + rawSplitKeys := s.([]any) if len(rawSplitKeys) < 1 { h.rd.JSON(w, http.StatusBadRequest, "empty split keys.") return @@ -1058,13 +841,13 @@ func (h *RegionHeap) Less(i, j int) bool { return h.less(h.regions[i], h.regions func (h *RegionHeap) Swap(i, j int) { h.regions[i], h.regions[j] = h.regions[j], h.regions[i] } // Push pushes an element x onto the heap. -func (h *RegionHeap) Push(x interface{}) { +func (h *RegionHeap) Push(x any) { h.regions = append(h.regions, x.(*core.RegionInfo)) } // Pop removes the minimum element (according to Less) from the heap and returns // it. -func (h *RegionHeap) Pop() interface{} { +func (h *RegionHeap) Pop() any { pos := len(h.regions) - 1 x := h.regions[pos] h.regions = h.regions[:pos] diff --git a/server/api/region_label_test.go b/server/api/region_label_test.go index 811c83df678..1ed0997b8e7 100644 --- a/server/api/region_label_test.go +++ b/server/api/region_label_test.go @@ -108,10 +108,10 @@ func (suite *regionLabelTestSuite) TestGetSet() { re.Equal([]*labeler.LabelRule{rules[1], rules[2]}, resp) } -func makeKeyRanges(keys ...string) []interface{} { - var res []interface{} +func makeKeyRanges(keys ...string) []any { + var res []any for i := 0; i < len(keys); i += 2 { - res = append(res, map[string]interface{}{"start_key": keys[i], "end_key": keys[i+1]}) + res = append(res, map[string]any{"start_key": keys[i], "end_key": keys[i+1]}) } return res } diff --git a/server/api/region_test.go b/server/api/region_test.go index 4838e24d632..88632232175 100644 --- a/server/api/region_test.go +++ b/server/api/region_test.go @@ -32,57 +32,12 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/response" "github.com/tikv/pd/pkg/utils/apiutil" tu "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/server" ) -func TestPeer(t *testing.T) { - re := require.New(t) - peers := []*metapb.Peer{ - {Id: 1, StoreId: 10, Role: metapb.PeerRole_Voter}, - {Id: 2, StoreId: 20, Role: metapb.PeerRole_Learner}, - {Id: 3, StoreId: 30, Role: metapb.PeerRole_IncomingVoter}, - {Id: 4, StoreId: 40, Role: metapb.PeerRole_DemotingVoter}, - } - // float64 is the default numeric type for JSON - expected := []map[string]interface{}{ - {"id": float64(1), "store_id": float64(10), "role_name": "Voter"}, - {"id": float64(2), "store_id": float64(20), "role": float64(1), "role_name": "Learner", "is_learner": true}, - {"id": float64(3), "store_id": float64(30), "role": float64(2), "role_name": "IncomingVoter"}, - {"id": float64(4), "store_id": float64(40), "role": float64(3), "role_name": "DemotingVoter"}, - } - - data, err := json.Marshal(fromPeerSlice(peers)) - re.NoError(err) - var ret []map[string]interface{} - re.NoError(json.Unmarshal(data, &ret)) - re.Equal(expected, ret) -} - -func TestPeerStats(t *testing.T) { - re := require.New(t) - peers := []*pdpb.PeerStats{ - {Peer: &metapb.Peer{Id: 1, StoreId: 10, Role: metapb.PeerRole_Voter}, DownSeconds: 0}, - {Peer: &metapb.Peer{Id: 2, StoreId: 20, Role: metapb.PeerRole_Learner}, DownSeconds: 1}, - {Peer: &metapb.Peer{Id: 3, StoreId: 30, Role: metapb.PeerRole_IncomingVoter}, DownSeconds: 2}, - {Peer: &metapb.Peer{Id: 4, StoreId: 40, Role: metapb.PeerRole_DemotingVoter}, DownSeconds: 3}, - } - // float64 is the default numeric type for JSON - expected := []map[string]interface{}{ - {"peer": map[string]interface{}{"id": float64(1), "store_id": float64(10), "role_name": "Voter"}}, - {"peer": map[string]interface{}{"id": float64(2), "store_id": float64(20), "role": float64(1), "role_name": "Learner", "is_learner": true}, "down_seconds": float64(1)}, - {"peer": map[string]interface{}{"id": float64(3), "store_id": float64(30), "role": float64(2), "role_name": "IncomingVoter"}, "down_seconds": float64(2)}, - {"peer": map[string]interface{}{"id": float64(4), "store_id": float64(40), "role": float64(3), "role_name": "DemotingVoter"}, "down_seconds": float64(3)}, - } - - data, err := json.Marshal(fromPeerStatsSlice(peers)) - re.NoError(err) - var ret []map[string]interface{} - re.NoError(json.Unmarshal(data, &ret)) - re.Equal(expected, ret) -} - type regionTestSuite struct { suite.Suite svr *server.Server @@ -124,32 +79,32 @@ func (suite *regionTestSuite) TestRegion() { re := suite.Require() mustRegionHeartbeat(re, suite.svr, r) url := fmt.Sprintf("%s/region/id/%d", suite.urlPrefix, r.GetID()) - r1 := &RegionInfo{} - r1m := make(map[string]interface{}) + r1 := &response.RegionInfo{} + r1m := make(map[string]any) re.NoError(tu.ReadGetJSON(re, testDialClient, url, r1)) r1.Adjust() - re.Equal(NewAPIRegionInfo(r), r1) + re.Equal(response.NewAPIRegionInfo(r), r1) re.NoError(tu.ReadGetJSON(re, testDialClient, url, &r1m)) re.Equal(float64(r.GetBytesWritten()), r1m["written_bytes"].(float64)) re.Equal(float64(r.GetKeysWritten()), r1m["written_keys"].(float64)) re.Equal(float64(r.GetBytesRead()), r1m["read_bytes"].(float64)) re.Equal(float64(r.GetKeysRead()), r1m["read_keys"].(float64)) - keys := r1m["buckets"].([]interface{}) + keys := r1m["buckets"].([]any) re.Len(keys, 2) re.Equal(core.HexRegionKeyStr([]byte("a")), keys[0].(string)) re.Equal(core.HexRegionKeyStr([]byte("b")), keys[1].(string)) url = fmt.Sprintf("%s/region/key/%s", suite.urlPrefix, "a") - r2 := &RegionInfo{} + r2 := &response.RegionInfo{} re.NoError(tu.ReadGetJSON(re, testDialClient, url, r2)) r2.Adjust() - re.Equal(NewAPIRegionInfo(r), r2) + re.Equal(response.NewAPIRegionInfo(r), r2) url = fmt.Sprintf("%s/region/key/%s?format=hex", suite.urlPrefix, hex.EncodeToString([]byte("a"))) - r2 = &RegionInfo{} + r2 = &response.RegionInfo{} re.NoError(tu.ReadGetJSON(re, testDialClient, url, r2)) r2.Adjust() - re.Equal(NewAPIRegionInfo(r), r2) + re.Equal(response.NewAPIRegionInfo(r), r2) } func (suite *regionTestSuite) TestRegionCheck() { @@ -164,36 +119,36 @@ func (suite *regionTestSuite) TestRegionCheck() { re := suite.Require() mustRegionHeartbeat(re, suite.svr, r) url := fmt.Sprintf("%s/region/id/%d", suite.urlPrefix, r.GetID()) - r1 := &RegionInfo{} + r1 := &response.RegionInfo{} re.NoError(tu.ReadGetJSON(re, testDialClient, url, r1)) r1.Adjust() - re.Equal(NewAPIRegionInfo(r), r1) + re.Equal(response.NewAPIRegionInfo(r), r1) url = fmt.Sprintf("%s/regions/check/%s", suite.urlPrefix, "down-peer") - r2 := &RegionsInfo{} + r2 := &response.RegionsInfo{} re.NoError(tu.ReadGetJSON(re, testDialClient, url, r2)) r2.Adjust() - re.Equal(&RegionsInfo{Count: 1, Regions: []RegionInfo{*NewAPIRegionInfo(r)}}, r2) + re.Equal(&response.RegionsInfo{Count: 1, Regions: []response.RegionInfo{*response.NewAPIRegionInfo(r)}}, r2) url = fmt.Sprintf("%s/regions/check/%s", suite.urlPrefix, "pending-peer") - r3 := &RegionsInfo{} + r3 := &response.RegionsInfo{} re.NoError(tu.ReadGetJSON(re, testDialClient, url, r3)) r3.Adjust() - re.Equal(&RegionsInfo{Count: 1, Regions: []RegionInfo{*NewAPIRegionInfo(r)}}, r3) + re.Equal(&response.RegionsInfo{Count: 1, Regions: []response.RegionInfo{*response.NewAPIRegionInfo(r)}}, r3) url = fmt.Sprintf("%s/regions/check/%s", suite.urlPrefix, "offline-peer") - r4 := &RegionsInfo{} + r4 := &response.RegionsInfo{} re.NoError(tu.ReadGetJSON(re, testDialClient, url, r4)) r4.Adjust() - re.Equal(&RegionsInfo{Count: 0, Regions: []RegionInfo{}}, r4) + re.Equal(&response.RegionsInfo{Count: 0, Regions: []response.RegionInfo{}}, r4) r = r.Clone(core.SetApproximateSize(1)) mustRegionHeartbeat(re, suite.svr, r) url = fmt.Sprintf("%s/regions/check/%s", suite.urlPrefix, "empty-region") - r5 := &RegionsInfo{} + r5 := &response.RegionsInfo{} re.NoError(tu.ReadGetJSON(re, testDialClient, url, r5)) r5.Adjust() - re.Equal(&RegionsInfo{Count: 1, Regions: []RegionInfo{*NewAPIRegionInfo(r)}}, r5) + re.Equal(&response.RegionsInfo{Count: 1, Regions: []response.RegionInfo{*response.NewAPIRegionInfo(r)}}, r5) r = r.Clone(core.SetApproximateSize(1)) mustRegionHeartbeat(re, suite.svr, r) @@ -211,10 +166,12 @@ func (suite *regionTestSuite) TestRegionCheck() { histKeys := []*histItem{{Start: 1000, End: 1999, Count: 1}} re.Equal(histKeys, r7) + // ref https://github.com/tikv/pd/issues/3558, we should change size to pass `NeedUpdate` for observing. + r = r.Clone(core.SetApproximateKeys(0)) mustPutStore(re, suite.svr, 2, metapb.StoreState_Offline, metapb.NodeState_Removing, []*metapb.StoreLabel{}) mustRegionHeartbeat(re, suite.svr, r) url = fmt.Sprintf("%s/regions/check/%s", suite.urlPrefix, "offline-peer") - r8 := &RegionsInfo{} + r8 := &response.RegionsInfo{} re.NoError(tu.ReadGetJSON(re, testDialClient, url, r8)) r4.Adjust() re.Equal(1, r8.Count) @@ -223,7 +180,7 @@ func (suite *regionTestSuite) TestRegionCheck() { func (suite *regionTestSuite) TestRegions() { re := suite.Require() - r := NewAPIRegionInfo(core.NewRegionInfo(&metapb.Region{Id: 1}, nil)) + r := response.NewAPIRegionInfo(core.NewRegionInfo(&metapb.Region{Id: 1}, nil)) re.Nil(r.Leader.Peer) re.Empty(r.Leader.RoleName) @@ -232,13 +189,13 @@ func (suite *regionTestSuite) TestRegions() { core.NewTestRegionInfo(3, 1, []byte("b"), []byte("c"), core.SetApproximateKeys(10), core.SetApproximateSize(10)), core.NewTestRegionInfo(4, 2, []byte("c"), []byte("d"), core.SetApproximateKeys(10), core.SetApproximateSize(10)), } - regions := make([]RegionInfo, 0, len(rs)) + regions := make([]response.RegionInfo, 0, len(rs)) for _, r := range rs { - regions = append(regions, *NewAPIRegionInfo(r)) + regions = append(regions, *response.NewAPIRegionInfo(r)) mustRegionHeartbeat(re, suite.svr, r) } url := fmt.Sprintf("%s/regions", suite.urlPrefix) - regionsInfo := &RegionsInfo{} + regionsInfo := &response.RegionsInfo{} err := tu.ReadGetJSON(re, testDialClient, url, regionsInfo) re.NoError(err) re.Len(regions, regionsInfo.Count) @@ -263,7 +220,7 @@ func (suite *regionTestSuite) TestStoreRegions() { regionIDs := []uint64{2, 3} url := fmt.Sprintf("%s/regions/store/%d", suite.urlPrefix, 1) - r4 := &RegionsInfo{} + r4 := &response.RegionsInfo{} err := tu.ReadGetJSON(re, testDialClient, url, r4) re.NoError(err) re.Len(regionIDs, r4.Count) @@ -274,7 +231,7 @@ func (suite *regionTestSuite) TestStoreRegions() { regionIDs = []uint64{4} url = fmt.Sprintf("%s/regions/store/%d", suite.urlPrefix, 2) - r5 := &RegionsInfo{} + r5 := &response.RegionsInfo{} err = tu.ReadGetJSON(re, testDialClient, url, r5) re.NoError(err) re.Len(regionIDs, r5.Count) @@ -284,7 +241,7 @@ func (suite *regionTestSuite) TestStoreRegions() { regionIDs = []uint64{} url = fmt.Sprintf("%s/regions/store/%d", suite.urlPrefix, 3) - r6 := &RegionsInfo{} + r6 := &response.RegionsInfo{} err = tu.ReadGetJSON(re, testDialClient, url, r6) re.NoError(err) re.Len(regionIDs, r6.Count) @@ -299,13 +256,13 @@ func (suite *regionTestSuite) TestTop() { mustRegionHeartbeat(re, suite.svr, r2) r3 := core.NewTestRegionInfo(3, 1, []byte("c"), []byte("d"), core.SetWrittenBytes(500), core.SetReadBytes(800), core.SetRegionConfVer(3), core.SetRegionVersion(2)) mustRegionHeartbeat(re, suite.svr, r3) - suite.checkTopRegions(re, fmt.Sprintf("%s/regions/writeflow", suite.urlPrefix), []uint64{2, 1, 3}) - suite.checkTopRegions(re, fmt.Sprintf("%s/regions/readflow", suite.urlPrefix), []uint64{1, 3, 2}) - suite.checkTopRegions(re, fmt.Sprintf("%s/regions/writeflow?limit=2", suite.urlPrefix), []uint64{2, 1}) - suite.checkTopRegions(re, fmt.Sprintf("%s/regions/confver", suite.urlPrefix), []uint64{3, 2, 1}) - suite.checkTopRegions(re, fmt.Sprintf("%s/regions/confver?limit=2", suite.urlPrefix), []uint64{3, 2}) - suite.checkTopRegions(re, fmt.Sprintf("%s/regions/version", suite.urlPrefix), []uint64{2, 3, 1}) - suite.checkTopRegions(re, fmt.Sprintf("%s/regions/version?limit=2", suite.urlPrefix), []uint64{2, 3}) + checkTopRegions(re, fmt.Sprintf("%s/regions/writeflow", suite.urlPrefix), []uint64{2, 1, 3}) + checkTopRegions(re, fmt.Sprintf("%s/regions/readflow", suite.urlPrefix), []uint64{1, 3, 2}) + checkTopRegions(re, fmt.Sprintf("%s/regions/writeflow?limit=2", suite.urlPrefix), []uint64{2, 1}) + checkTopRegions(re, fmt.Sprintf("%s/regions/confver", suite.urlPrefix), []uint64{3, 2, 1}) + checkTopRegions(re, fmt.Sprintf("%s/regions/confver?limit=2", suite.urlPrefix), []uint64{3, 2}) + checkTopRegions(re, fmt.Sprintf("%s/regions/version", suite.urlPrefix), []uint64{2, 3, 1}) + checkTopRegions(re, fmt.Sprintf("%s/regions/version?limit=2", suite.urlPrefix), []uint64{2, 3}) // Top size. baseOpt := []core.RegionCreateOption{core.SetRegionConfVer(3), core.SetRegionVersion(3)} opt := core.SetApproximateSize(1000) @@ -317,8 +274,8 @@ func (suite *regionTestSuite) TestTop() { opt = core.SetApproximateSize(800) r3 = core.NewTestRegionInfo(3, 1, []byte("c"), []byte("d"), append(baseOpt, opt)...) mustRegionHeartbeat(re, suite.svr, r3) - suite.checkTopRegions(re, fmt.Sprintf("%s/regions/size?limit=2", suite.urlPrefix), []uint64{1, 2}) - suite.checkTopRegions(re, fmt.Sprintf("%s/regions/size", suite.urlPrefix), []uint64{1, 2, 3}) + checkTopRegions(re, fmt.Sprintf("%s/regions/size?limit=2", suite.urlPrefix), []uint64{1, 2}) + checkTopRegions(re, fmt.Sprintf("%s/regions/size", suite.urlPrefix), []uint64{1, 2, 3}) // Top CPU usage. baseOpt = []core.RegionCreateOption{core.SetRegionConfVer(4), core.SetRegionVersion(4)} opt = core.SetCPUUsage(100) @@ -330,12 +287,12 @@ func (suite *regionTestSuite) TestTop() { opt = core.SetCPUUsage(500) r3 = core.NewTestRegionInfo(3, 1, []byte("c"), []byte("d"), append(baseOpt, opt)...) mustRegionHeartbeat(re, suite.svr, r3) - suite.checkTopRegions(re, fmt.Sprintf("%s/regions/cpu?limit=2", suite.urlPrefix), []uint64{3, 2}) - suite.checkTopRegions(re, fmt.Sprintf("%s/regions/cpu", suite.urlPrefix), []uint64{3, 2, 1}) + checkTopRegions(re, fmt.Sprintf("%s/regions/cpu?limit=2", suite.urlPrefix), []uint64{3, 2}) + checkTopRegions(re, fmt.Sprintf("%s/regions/cpu", suite.urlPrefix), []uint64{3, 2, 1}) } -func (suite *regionTestSuite) checkTopRegions(re *require.Assertions, url string, regionIDs []uint64) { - regions := &RegionsInfo{} +func checkTopRegions(re *require.Assertions, url string, regionIDs []uint64) { + regions := &response.RegionsInfo{} err := tu.ReadGetJSON(re, testDialClient, url, regions) re.NoError(err) re.Len(regionIDs, regions.Count) @@ -375,29 +332,36 @@ func TestRegionsWithKillRequest(t *testing.T) { addr := svr.GetAddr() url := fmt.Sprintf("%s%s/api/v1/regions", addr, apiPrefix) mustBootstrapCluster(re, svr) + regionCount := 100000 - for i := 0; i < regionCount; i++ { + tu.GenerateTestDataConcurrently(regionCount, func(i int) { r := core.NewTestRegionInfo(uint64(i+2), 1, []byte(fmt.Sprintf("%09d", i)), []byte(fmt.Sprintf("%09d", i+1)), core.SetApproximateKeys(10), core.SetApproximateSize(10)) mustRegionHeartbeat(re, svr, r) - } + }) ctx, cancel := context.WithCancel(context.Background()) req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, http.NoBody) re.NoError(err) - respCh := make(chan *http.Response) + doneCh := make(chan struct{}) go func() { - resp, err := testDialClient.Do(req) // nolint:bodyclose + resp, err := testDialClient.Do(req) + defer func() { + if resp != nil { + resp.Body.Close() + } + }() re.Error(err) re.Contains(err.Error(), "context canceled") - respCh <- resp + re.Nil(resp) + doneCh <- struct{}{} }() time.Sleep(100 * time.Millisecond) // wait for the request to be sent - cancel() // close the request - resp := <-respCh - re.Nil(resp) + cancel() + <-doneCh + close(doneCh) } type getRegionTestSuite struct { @@ -431,7 +395,7 @@ func (suite *getRegionTestSuite) TestRegionKey() { r := core.NewTestRegionInfo(99, 1, []byte{0xFF, 0xFF, 0xAA}, []byte{0xFF, 0xFF, 0xCC}, core.SetWrittenBytes(500), core.SetReadBytes(800), core.SetRegionConfVer(3), core.SetRegionVersion(2)) mustRegionHeartbeat(re, suite.svr, r) url := fmt.Sprintf("%s/region/key/%s", suite.urlPrefix, url.QueryEscape(string([]byte{0xFF, 0xFF, 0xBB}))) - RegionInfo := &RegionInfo{} + RegionInfo := &response.RegionInfo{} err := tu.ReadGetJSON(re, testDialClient, url, RegionInfo) re.NoError(err) re.Equal(RegionInfo.ID, r.GetID()) @@ -452,7 +416,7 @@ func (suite *getRegionTestSuite) TestScanRegionByKeys() { url := fmt.Sprintf("%s/regions/key?key=%s", suite.urlPrefix, "b") regionIDs := []uint64{3, 4, 5, 99} - regions := &RegionsInfo{} + regions := &response.RegionsInfo{} err := tu.ReadGetJSON(re, testDialClient, url, regions) re.NoError(err) re.Len(regionIDs, regions.Count) @@ -461,7 +425,7 @@ func (suite *getRegionTestSuite) TestScanRegionByKeys() { } url = fmt.Sprintf("%s/regions/key?key=%s", suite.urlPrefix, "d") regionIDs = []uint64{4, 5, 99} - regions = &RegionsInfo{} + regions = &response.RegionsInfo{} err = tu.ReadGetJSON(re, testDialClient, url, regions) re.NoError(err) re.Len(regionIDs, regions.Count) @@ -470,7 +434,7 @@ func (suite *getRegionTestSuite) TestScanRegionByKeys() { } url = fmt.Sprintf("%s/regions/key?key=%s", suite.urlPrefix, "g") regionIDs = []uint64{5, 99} - regions = &RegionsInfo{} + regions = &response.RegionsInfo{} err = tu.ReadGetJSON(re, testDialClient, url, regions) re.NoError(err) re.Len(regionIDs, regions.Count) @@ -479,7 +443,7 @@ func (suite *getRegionTestSuite) TestScanRegionByKeys() { } url = fmt.Sprintf("%s/regions/key?end_key=%s", suite.urlPrefix, "e") regionIDs = []uint64{2, 3, 4} - regions = &RegionsInfo{} + regions = &response.RegionsInfo{} err = tu.ReadGetJSON(re, testDialClient, url, regions) re.NoError(err) re.Len(regionIDs, regions.Count) @@ -488,7 +452,7 @@ func (suite *getRegionTestSuite) TestScanRegionByKeys() { } url = fmt.Sprintf("%s/regions/key?key=%s&end_key=%s", suite.urlPrefix, "b", "g") regionIDs = []uint64{3, 4} - regions = &RegionsInfo{} + regions = &response.RegionsInfo{} err = tu.ReadGetJSON(re, testDialClient, url, regions) re.NoError(err) re.Len(regionIDs, regions.Count) @@ -497,13 +461,48 @@ func (suite *getRegionTestSuite) TestScanRegionByKeys() { } url = fmt.Sprintf("%s/regions/key?key=%s&end_key=%s", suite.urlPrefix, "b", []byte{0xFF, 0xFF, 0xCC}) regionIDs = []uint64{3, 4, 5, 99} - regions = &RegionsInfo{} + regions = &response.RegionsInfo{} + err = tu.ReadGetJSON(re, testDialClient, url, regions) + re.NoError(err) + re.Len(regionIDs, regions.Count) + for i, v := range regionIDs { + re.Equal(regions.Regions[i].ID, v) + } + url = fmt.Sprintf("%s/regions/key?key=%s&format=hex", suite.urlPrefix, hex.EncodeToString([]byte("b"))) + regionIDs = []uint64{3, 4, 5, 99} + regions = &response.RegionsInfo{} + err = tu.ReadGetJSON(re, testDialClient, url, regions) + re.NoError(err) + re.Len(regionIDs, regions.Count) + for i, v := range regionIDs { + re.Equal(regions.Regions[i].ID, v) + } + url = fmt.Sprintf("%s/regions/key?key=%s&end_key=%s&format=hex", + suite.urlPrefix, hex.EncodeToString([]byte("b")), hex.EncodeToString([]byte("g"))) + regionIDs = []uint64{3, 4} + regions = &response.RegionsInfo{} + err = tu.ReadGetJSON(re, testDialClient, url, regions) + re.NoError(err) + re.Len(regionIDs, regions.Count) + for i, v := range regionIDs { + re.Equal(regions.Regions[i].ID, v) + } + url = fmt.Sprintf("%s/regions/key?key=%s&end_key=%s&format=hex", + suite.urlPrefix, hex.EncodeToString([]byte("b")), hex.EncodeToString([]byte{0xFF, 0xFF, 0xCC})) + regionIDs = []uint64{3, 4, 5, 99} + regions = &response.RegionsInfo{} err = tu.ReadGetJSON(re, testDialClient, url, regions) re.NoError(err) re.Len(regionIDs, regions.Count) for i, v := range regionIDs { re.Equal(regions.Regions[i].ID, v) } + // test invalid key + url = fmt.Sprintf("%s/regions/key?key=%s&format=hex", suite.urlPrefix, "invalid") + err = tu.CheckGetJSON(testDialClient, url, nil, + tu.Status(re, http.StatusBadRequest), + tu.StringEqual(re, "\"encoding/hex: invalid byte: U+0069 'i'\"\n")) + re.NoError(err) } // Start a new test suite to prevent from being interfered by other tests. @@ -605,9 +604,9 @@ func TestRegionsInfoMarshal(t *testing.T) { core.SetReadKeys(10), core.SetWrittenKeys(10)), }, } - regionsInfo := &RegionsInfo{} + regionsInfo := &response.RegionsInfo{} for _, regions := range cases { - b, err := marshalRegionsInfoJSON(context.Background(), regions) + b, err := response.MarshalRegionsInfoJSON(context.Background(), regions) re.NoError(err) err = json.Unmarshal(b, regionsInfo) re.NoError(err) @@ -648,7 +647,7 @@ func BenchmarkGetRegions(b *testing.B) { mustRegionHeartbeat(re, svr, r) } resp, _ := apiutil.GetJSON(testDialClient, url, nil) - regions := &RegionsInfo{} + regions := &response.RegionsInfo{} err := json.NewDecoder(resp.Body).Decode(regions) re.NoError(err) re.Equal(regionCount, regions.Count) diff --git a/server/api/router.go b/server/api/router.go index fc963d6a2f9..553332e96af 100644 --- a/server/api/router.go +++ b/server/api/router.go @@ -71,7 +71,7 @@ func createIndentRender() *render.Render { }) } -func getFunctionName(f interface{}) string { +func getFunctionName(f any) string { strs := strings.Split(runtime.FuncForPC(reflect.ValueOf(f).Pointer()).Name(), ".") return strings.Split(strs[len(strs)-1], "-")[0] } @@ -133,6 +133,7 @@ func createRouter(prefix string, svr *server.Server) *mux.Router { operatorHandler := newOperatorHandler(handler, rd) registerFunc(apiRouter, "/operators", operatorHandler.GetOperators, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(apiRouter, "/operators", operatorHandler.CreateOperator, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/operators", operatorHandler.DeleteOperators, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) registerFunc(apiRouter, "/operators/records", operatorHandler.GetOperatorRecords, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(apiRouter, "/operators/{region_id}", operatorHandler.GetOperatorsByRegion, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(apiRouter, "/operators/{region_id}", operatorHandler.DeleteOperatorByRegion, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) @@ -256,7 +257,9 @@ func createRouter(prefix string, svr *server.Server) *mux.Router { registerFunc(clusterRouter, "/regions/store/{id}", regionsHandler.GetStoreRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(clusterRouter, "/regions/keyspace/id/{id}", regionsHandler.GetKeyspaceRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(clusterRouter, "/regions/writeflow", regionsHandler.GetTopWriteFlowRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/writequery", regionsHandler.GetTopWriteQueryRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(clusterRouter, "/regions/readflow", regionsHandler.GetTopReadFlowRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/readquery", regionsHandler.GetTopReadQueryRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(clusterRouter, "/regions/confver", regionsHandler.GetTopConfVerRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(clusterRouter, "/regions/version", regionsHandler.GetTopVersionRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(clusterRouter, "/regions/size", regionsHandler.GetTopSizeRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) @@ -383,9 +386,9 @@ func createRouter(prefix string, svr *server.Server) *mux.Router { // Deprecated: use /pd/api/v1/health instead. rootRouter.HandleFunc("/health", healthHandler.GetHealthStatus).Methods(http.MethodGet) // Deprecated: use /pd/api/v1/ping instead. - rootRouter.HandleFunc("/ping", func(w http.ResponseWriter, r *http.Request) {}).Methods(http.MethodGet) + rootRouter.HandleFunc("/ping", func(http.ResponseWriter, *http.Request) {}).Methods(http.MethodGet) - rootRouter.Walk(func(route *mux.Route, router *mux.Router, ancestors []*mux.Route) error { + rootRouter.Walk(func(route *mux.Route, _ *mux.Router, _ []*mux.Route) error { serviceLabel := route.GetName() methods, _ := route.GetMethods() path, _ := route.GetPathTemplate() diff --git a/server/api/scheduler.go b/server/api/scheduler.go index e28e852b006..b1d3e8c07af 100644 --- a/server/api/scheduler.go +++ b/server/api/scheduler.go @@ -70,7 +70,7 @@ func (h *schedulerHandler) GetSchedulers(w http.ResponseWriter, r *http.Request) // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /schedulers [post] func (h *schedulerHandler) CreateScheduler(w http.ResponseWriter, r *http.Request) { - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(h.r, w, r.Body, &input); err != nil { return } @@ -142,10 +142,23 @@ func (h *schedulerHandler) CreateScheduler(w http.ResponseWriter, r *http.Reques return } - case schedulers.GrantLeaderName: - h.addEvictOrGrant(w, input, schedulers.GrantLeaderName) - case schedulers.EvictLeaderName: - h.addEvictOrGrant(w, input, schedulers.EvictLeaderName) + case schedulers.GrantLeaderName, schedulers.EvictLeaderName: + storeID, ok := input["store_id"].(float64) + if !ok { + h.r.JSON(w, http.StatusBadRequest, "missing store id") + return + } + exist, err := h.AddEvictOrGrant(storeID, name) + if err != nil { + h.r.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + // we should ensure whether it is the first time to create evict-leader-scheduler + // or just update the evict-leader. + if exist { + h.r.JSON(w, http.StatusOK, "The scheduler has been applied to the store.") + return + } case schedulers.ShuffleLeaderName: if err := h.AddShuffleLeaderScheduler(); err != nil { h.r.JSON(w, http.StatusInternalServerError, err.Error()) @@ -204,18 +217,6 @@ func (h *schedulerHandler) CreateScheduler(w http.ResponseWriter, r *http.Reques h.r.JSON(w, http.StatusOK, "The scheduler is created.") } -func (h *schedulerHandler) addEvictOrGrant(w http.ResponseWriter, input map[string]interface{}, name string) { - storeID, ok := input["store_id"].(float64) - if !ok { - h.r.JSON(w, http.StatusBadRequest, "missing store id") - return - } - err := h.AddEvictOrGrant(storeID, name) - if err != nil { - h.r.JSON(w, http.StatusInternalServerError, err.Error()) - } -} - // @Tags scheduler // @Summary Delete a scheduler. // @Param name path string true "The name of the scheduler." diff --git a/server/api/server.go b/server/api/server.go index ad614593b2f..7b7066c4f77 100644 --- a/server/api/server.go +++ b/server/api/server.go @@ -51,7 +51,7 @@ func NewHandler(_ context.Context, svr *server.Server) (http.Handler, apiutil.AP // "/checker/{name}", http.MethodPost // "/checker/{name}", http.MethodGet // "/schedulers", http.MethodGet - // "/schedulers/{name}", http.MethodPost + // "/schedulers/{name}", http.MethodPost, which is to be used to pause or resume the scheduler rather than create a new scheduler // "/schedulers/diagnostic/{name}", http.MethodGet // "/scheduler-config", http.MethodGet // "/hotspot/regions/read", http.MethodGet @@ -62,6 +62,8 @@ func NewHandler(_ context.Context, svr *server.Server) (http.Handler, apiutil.AP // Following requests are **not** redirected: // "/schedulers", http.MethodPost // "/schedulers/{name}", http.MethodDelete + // Because the writing of all the config of the scheduling service is in the API server, + // we should not post and delete the scheduler directly in the scheduling service. router.PathPrefix(apiPrefix).Handler(negroni.New( serverapi.NewRuntimeServiceValidator(svr, group), serverapi.NewRedirector(svr, @@ -163,7 +165,7 @@ func NewHandler(_ context.Context, svr *server.Server) (http.Handler, apiutil.AP mcs.SchedulingServiceName, []string{http.MethodGet}), serverapi.MicroserviceRedirectRule( - prefix+"/schedulers/", // Note: this means "/schedulers/{name}" + prefix+"/schedulers/", // Note: this means "/schedulers/{name}", which is to be used to pause or resume the scheduler scheapi.APIPathPrefix+"/schedulers", mcs.SchedulingServiceName, []string{http.MethodPost}), diff --git a/server/api/service_gc_safepoint.go b/server/api/service_gc_safepoint.go index 270edca58bf..d6bb153eb6f 100644 --- a/server/api/service_gc_safepoint.go +++ b/server/api/service_gc_safepoint.go @@ -16,6 +16,7 @@ package api import ( "net/http" + "time" "github.com/gorilla/mux" "github.com/tikv/pd/pkg/storage/endpoint" @@ -38,8 +39,9 @@ func newServiceGCSafepointHandler(svr *server.Server, rd *render.Render) *servic // ListServiceGCSafepoint is the response for list service GC safepoint. // NOTE: This type is exported by HTTP API. Please pay more attention when modifying it. type ListServiceGCSafepoint struct { - ServiceGCSafepoints []*endpoint.ServiceSafePoint `json:"service_gc_safe_points"` - GCSafePoint uint64 `json:"gc_safe_point"` + ServiceGCSafepoints []*endpoint.ServiceSafePoint `json:"service_gc_safe_points"` + MinServiceGcSafepoint uint64 `json:"min_service_gc_safe_point,omitempty"` + GCSafePoint uint64 `json:"gc_safe_point"` } // @Tags service_gc_safepoint @@ -48,7 +50,7 @@ type ListServiceGCSafepoint struct { // @Success 200 {array} ListServiceGCSafepoint // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /gc/safepoint [get] -func (h *serviceGCSafepointHandler) GetGCSafePoint(w http.ResponseWriter, r *http.Request) { +func (h *serviceGCSafepointHandler) GetGCSafePoint(w http.ResponseWriter, _ *http.Request) { storage := h.svr.GetStorage() gcSafepoint, err := storage.LoadGCSafePoint() if err != nil { @@ -60,9 +62,21 @@ func (h *serviceGCSafepointHandler) GetGCSafePoint(w http.ResponseWriter, r *htt h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } + var minSSp *endpoint.ServiceSafePoint + for _, ssp := range ssps { + if (minSSp == nil || minSSp.SafePoint > ssp.SafePoint) && + ssp.ExpiredAt > time.Now().Unix() { + minSSp = ssp + } + } + minServiceGcSafepoint := uint64(0) + if minSSp != nil { + minServiceGcSafepoint = minSSp.SafePoint + } list := ListServiceGCSafepoint{ - GCSafePoint: gcSafepoint, - ServiceGCSafepoints: ssps, + GCSafePoint: gcSafepoint, + ServiceGCSafepoints: ssps, + MinServiceGcSafepoint: minServiceGcSafepoint, } h.rd.JSON(w, http.StatusOK, list) } diff --git a/server/api/service_gc_safepoint_test.go b/server/api/service_gc_safepoint_test.go index c2d0603ac49..6325babd438 100644 --- a/server/api/service_gc_safepoint_test.go +++ b/server/api/service_gc_safepoint_test.go @@ -77,7 +77,8 @@ func (suite *serviceGCSafepointTestSuite) TestServiceGCSafepoint() { SafePoint: 3, }, }, - GCSafePoint: 1, + GCSafePoint: 1, + MinServiceGcSafepoint: 1, } for _, ssp := range list.ServiceGCSafepoints { err := storage.SaveServiceGCSafePoint(ssp) diff --git a/server/api/service_middleware.go b/server/api/service_middleware.go index 41d2f6601f0..ecd41eede08 100644 --- a/server/api/service_middleware.go +++ b/server/api/service_middleware.go @@ -50,7 +50,7 @@ func newServiceMiddlewareHandler(svr *server.Server, rd *render.Render) *service // @Produce json // @Success 200 {object} config.Config // @Router /service-middleware/config [get] -func (h *serviceMiddlewareHandler) GetServiceMiddlewareConfig(w http.ResponseWriter, r *http.Request) { +func (h *serviceMiddlewareHandler) GetServiceMiddlewareConfig(w http.ResponseWriter, _ *http.Request) { h.rd.JSON(w, http.StatusOK, h.svr.GetServiceMiddlewareConfig()) } @@ -72,7 +72,7 @@ func (h *serviceMiddlewareHandler) SetServiceMiddlewareConfig(w http.ResponseWri return } - conf := make(map[string]interface{}) + conf := make(map[string]any) if err := json.Unmarshal(data, &conf); err != nil { h.rd.JSON(w, http.StatusBadRequest, err.Error()) return @@ -104,7 +104,7 @@ func (h *serviceMiddlewareHandler) SetServiceMiddlewareConfig(w http.ResponseWri h.rd.JSON(w, http.StatusOK, "The service-middleware config is updated.") } -func (h *serviceMiddlewareHandler) updateServiceMiddlewareConfig(cfg *config.ServiceMiddlewareConfig, key string, value interface{}) error { +func (h *serviceMiddlewareHandler) updateServiceMiddlewareConfig(cfg *config.ServiceMiddlewareConfig, key string, value any) error { kp := strings.Split(key, ".") switch kp[0] { case "audit": @@ -117,7 +117,7 @@ func (h *serviceMiddlewareHandler) updateServiceMiddlewareConfig(cfg *config.Ser return errors.Errorf("config prefix %s not found", kp[0]) } -func (h *serviceMiddlewareHandler) updateAudit(config *config.ServiceMiddlewareConfig, key string, value interface{}) error { +func (h *serviceMiddlewareHandler) updateAudit(config *config.ServiceMiddlewareConfig, key string, value any) error { updated, found, err := jsonutil.AddKeyValue(&config.AuditConfig, key, value) if err != nil { return err @@ -140,9 +140,9 @@ func (h *serviceMiddlewareHandler) updateAudit(config *config.ServiceMiddlewareC // @Success 200 {string} string // @Failure 400 {string} string "The input is invalid." // @Failure 500 {string} string "config item not found" -// @Router /service-middleware/config/rate-limit [POST] +// @Router /service-middleware/config/rate-limit [post] func (h *serviceMiddlewareHandler) SetRateLimitConfig(w http.ResponseWriter, r *http.Request) { - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { return } @@ -236,9 +236,9 @@ func (h *serviceMiddlewareHandler) SetRateLimitConfig(w http.ResponseWriter, r * // @Success 200 {string} string // @Failure 400 {string} string "The input is invalid." // @Failure 500 {string} string "config item not found" -// @Router /service-middleware/config/grpc-rate-limit [POST] +// @Router /service-middleware/config/grpc-rate-limit [post] func (h *serviceMiddlewareHandler) SetGRPCRateLimitConfig(w http.ResponseWriter, r *http.Request) { - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { return } diff --git a/server/api/service_middleware_test.go b/server/api/service_middleware_test.go index c4db58c39ae..7d5e0db98be 100644 --- a/server/api/service_middleware_test.go +++ b/server/api/service_middleware_test.go @@ -61,7 +61,7 @@ func (suite *auditMiddlewareTestSuite) TestConfigAuditSwitch() { re.NoError(tu.ReadGetJSON(re, testDialClient, addr, sc)) re.True(sc.EnableAudit) - ms := map[string]interface{}{ + ms := map[string]any{ "audit.enable-audit": "false", } postData, err := json.Marshal(ms) @@ -70,7 +70,7 @@ func (suite *auditMiddlewareTestSuite) TestConfigAuditSwitch() { sc = &config.ServiceMiddlewareConfig{} re.NoError(tu.ReadGetJSON(re, testDialClient, addr, sc)) re.False(sc.EnableAudit) - ms = map[string]interface{}{ + ms = map[string]any{ "enable-audit": "true", } postData, err = json.Marshal(ms) @@ -81,25 +81,25 @@ func (suite *auditMiddlewareTestSuite) TestConfigAuditSwitch() { re.True(sc.EnableAudit) // test empty - ms = map[string]interface{}{} + ms = map[string]any{} postData, err = json.Marshal(ms) re.NoError(err) re.NoError(tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re), tu.StringContain(re, "The input is empty."))) - ms = map[string]interface{}{ + ms = map[string]any{ "audit": "false", } postData, err = json.Marshal(ms) re.NoError(err) re.NoError(tu.CheckPostJSON(testDialClient, addr, postData, tu.Status(re, http.StatusBadRequest), tu.StringEqual(re, "config item audit not found"))) re.NoError(failpoint.Enable("github.com/tikv/pd/server/config/persistServiceMiddlewareFail", "return(true)")) - ms = map[string]interface{}{ + ms = map[string]any{ "audit.enable-audit": "false", } postData, err = json.Marshal(ms) re.NoError(err) re.NoError(tu.CheckPostJSON(testDialClient, addr, postData, tu.Status(re, http.StatusBadRequest))) re.NoError(failpoint.Disable("github.com/tikv/pd/server/config/persistServiceMiddlewareFail")) - ms = map[string]interface{}{ + ms = map[string]any{ "audit.audit": "false", } postData, err = json.Marshal(ms) @@ -135,7 +135,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateRateLimitConfig() { urlPrefix := fmt.Sprintf("%s%s/api/v1/service-middleware/config/rate-limit", suite.svr.GetAddr(), apiPrefix) // test empty type - input := make(map[string]interface{}) + input := make(map[string]any) input["type"] = 123 jsonBody, err := json.Marshal(input) re.NoError(err) @@ -143,7 +143,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateRateLimitConfig() { tu.Status(re, http.StatusBadRequest), tu.StringEqual(re, "\"The type is empty.\"\n")) re.NoError(err) // test invalid type - input = make(map[string]interface{}) + input = make(map[string]any) input["type"] = "url" jsonBody, err = json.Marshal(input) re.NoError(err) @@ -152,7 +152,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateRateLimitConfig() { re.NoError(err) // test empty label - input = make(map[string]interface{}) + input = make(map[string]any) input["type"] = "label" input["label"] = "" jsonBody, err = json.Marshal(input) @@ -161,7 +161,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateRateLimitConfig() { tu.Status(re, http.StatusBadRequest), tu.StringEqual(re, "\"The label is empty.\"\n")) re.NoError(err) // test no label matched - input = make(map[string]interface{}) + input = make(map[string]any) input["type"] = "label" input["label"] = "TestLabel" jsonBody, err = json.Marshal(input) @@ -171,7 +171,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateRateLimitConfig() { re.NoError(err) // test empty path - input = make(map[string]interface{}) + input = make(map[string]any) input["type"] = "path" input["path"] = "" jsonBody, err = json.Marshal(input) @@ -181,7 +181,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateRateLimitConfig() { re.NoError(err) // test path but no label matched - input = make(map[string]interface{}) + input = make(map[string]any) input["type"] = "path" input["path"] = "/pd/api/v1/test" jsonBody, err = json.Marshal(input) @@ -191,7 +191,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateRateLimitConfig() { re.NoError(err) // no change - input = make(map[string]interface{}) + input = make(map[string]any) input["type"] = "label" input["label"] = "GetHealthStatus" jsonBody, err = json.Marshal(input) @@ -201,7 +201,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateRateLimitConfig() { re.NoError(err) // change concurrency - input = make(map[string]interface{}) + input = make(map[string]any) input["type"] = "path" input["path"] = "/pd/api/v1/health" input["method"] = http.MethodGet @@ -219,7 +219,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateRateLimitConfig() { re.NoError(err) // change qps - input = make(map[string]interface{}) + input = make(map[string]any) input["type"] = "path" input["path"] = "/pd/api/v1/health" input["method"] = http.MethodGet @@ -230,7 +230,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateRateLimitConfig() { tu.StatusOK(re), tu.StringContain(re, "QPS rate limiter is changed.")) re.NoError(err) - input = make(map[string]interface{}) + input = make(map[string]any) input["type"] = "path" input["path"] = "/pd/api/v1/health" input["method"] = http.MethodGet @@ -250,7 +250,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateRateLimitConfig() { re.NoError(err) // change both - input = make(map[string]interface{}) + input = make(map[string]any) input["type"] = "path" input["path"] = "/pd/api/v1/debug/pprof/profile" input["qps"] = 100 @@ -272,7 +272,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateRateLimitConfig() { limiter.Update("SetRateLimitConfig", ratelimit.AddLabelAllowList()) // Allow list - input = make(map[string]interface{}) + input = make(map[string]any) input["type"] = "label" input["label"] = "SetRateLimitConfig" input["qps"] = 100 @@ -289,7 +289,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateGRPCRateLimitConfig() { re := suite.Require() // test empty label - input := make(map[string]interface{}) + input := make(map[string]any) input["label"] = "" jsonBody, err := json.Marshal(input) re.NoError(err) @@ -297,7 +297,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateGRPCRateLimitConfig() { tu.Status(re, http.StatusBadRequest), tu.StringEqual(re, "\"The label is empty.\"\n")) re.NoError(err) // test no label matched - input = make(map[string]interface{}) + input = make(map[string]any) input["label"] = "TestLabel" jsonBody, err = json.Marshal(input) re.NoError(err) @@ -306,7 +306,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateGRPCRateLimitConfig() { re.NoError(err) // no change - input = make(map[string]interface{}) + input = make(map[string]any) input["label"] = "StoreHeartbeat" jsonBody, err = json.Marshal(input) re.NoError(err) @@ -315,7 +315,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateGRPCRateLimitConfig() { re.NoError(err) // change concurrency - input = make(map[string]interface{}) + input = make(map[string]any) input["label"] = "StoreHeartbeat" input["concurrency"] = 100 jsonBody, err = json.Marshal(input) @@ -331,7 +331,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateGRPCRateLimitConfig() { re.NoError(err) // change qps - input = make(map[string]interface{}) + input = make(map[string]any) input["label"] = "StoreHeartbeat" input["qps"] = 100 jsonBody, err = json.Marshal(input) @@ -340,7 +340,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateGRPCRateLimitConfig() { tu.StatusOK(re), tu.StringContain(re, "QPS rate limiter is changed.")) re.NoError(err) - input = make(map[string]interface{}) + input = make(map[string]any) input["label"] = "StoreHeartbeat" input["qps"] = 0.3 jsonBody, err = json.Marshal(input) @@ -358,7 +358,7 @@ func (suite *rateLimitConfigTestSuite) TestUpdateGRPCRateLimitConfig() { re.NoError(err) // change both - input = make(map[string]interface{}) + input = make(map[string]any) input["label"] = "GetStore" input["qps"] = 100 input["concurrency"] = 100 @@ -384,7 +384,7 @@ func (suite *rateLimitConfigTestSuite) TestConfigRateLimitSwitch() { re.True(sc.RateLimitConfig.EnableRateLimit) re.True(sc.GRPCRateLimitConfig.EnableRateLimit) - ms := map[string]interface{}{ + ms := map[string]any{ "enable-rate-limit": "false", "enable-grpc-rate-limit": "false", } @@ -395,7 +395,7 @@ func (suite *rateLimitConfigTestSuite) TestConfigRateLimitSwitch() { re.NoError(tu.ReadGetJSON(re, testDialClient, addr, sc)) re.False(sc.RateLimitConfig.EnableRateLimit) re.False(sc.GRPCRateLimitConfig.EnableRateLimit) - ms = map[string]interface{}{ + ms = map[string]any{ "enable-rate-limit": "true", "enable-grpc-rate-limit": "true", } @@ -408,18 +408,18 @@ func (suite *rateLimitConfigTestSuite) TestConfigRateLimitSwitch() { re.True(sc.GRPCRateLimitConfig.EnableRateLimit) // test empty - ms = map[string]interface{}{} + ms = map[string]any{} postData, err = json.Marshal(ms) re.NoError(err) re.NoError(tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re), tu.StringContain(re, "The input is empty."))) - ms = map[string]interface{}{ + ms = map[string]any{ "rate-limit": "false", } postData, err = json.Marshal(ms) re.NoError(err) re.NoError(tu.CheckPostJSON(testDialClient, addr, postData, tu.Status(re, http.StatusBadRequest), tu.StringEqual(re, "config item rate-limit not found"))) re.NoError(failpoint.Enable("github.com/tikv/pd/server/config/persistServiceMiddlewareFail", "return(true)")) - ms = map[string]interface{}{ + ms = map[string]any{ "rate-limit.enable-rate-limit": "false", "grpc-rate-limit.enable-grpc-rate-limit": "false", } @@ -427,7 +427,7 @@ func (suite *rateLimitConfigTestSuite) TestConfigRateLimitSwitch() { re.NoError(err) re.NoError(tu.CheckPostJSON(testDialClient, addr, postData, tu.Status(re, http.StatusBadRequest))) re.NoError(failpoint.Disable("github.com/tikv/pd/server/config/persistServiceMiddlewareFail")) - ms = map[string]interface{}{ + ms = map[string]any{ "rate-limit.rate-limit": "false", } postData, err = json.Marshal(ms) @@ -440,10 +440,10 @@ func (suite *rateLimitConfigTestSuite) TestConfigLimiterConfigByOriginAPI() { // this test case is used to test updating `limiter-config` by origin API simply addr := fmt.Sprintf("%s/service-middleware/config", suite.urlPrefix) dimensionConfig := ratelimit.DimensionConfig{QPS: 1} - limiterConfig := map[string]interface{}{ + limiterConfig := map[string]any{ "CreateOperator": dimensionConfig, } - ms := map[string]interface{}{ + ms := map[string]any{ "limiter-config": limiterConfig, } postData, err := json.Marshal(ms) diff --git a/server/api/stats.go b/server/api/stats.go index 915d33ddfdf..5aa8fcb72a6 100644 --- a/server/api/stats.go +++ b/server/api/stats.go @@ -47,7 +47,7 @@ func (h *statsHandler) GetRegionStatus(w http.ResponseWriter, r *http.Request) { startKey, endKey := r.URL.Query().Get("start_key"), r.URL.Query().Get("end_key") var stats *statistics.RegionStats if r.URL.Query().Has("count") { - stats = rc.GetRegionCount([]byte(startKey), []byte(endKey)) + stats = rc.GetRegionStatsCount([]byte(startKey), []byte(endKey)) } else { stats = rc.GetRegionStatsByRange([]byte(startKey), []byte(endKey)) } diff --git a/server/api/stats_test.go b/server/api/stats_test.go index a4d8ef200a4..1485f9eb5af 100644 --- a/server/api/stats_test.go +++ b/server/api/stats_test.go @@ -193,9 +193,9 @@ func (suite *statsTestSuite) TestRegionStats() { args := fmt.Sprintf("?start_key=%s&end_key=%s&%s", data.startKey, data.endKey, query) res, err := testDialClient.Get(statsURL + args) re.NoError(err) - defer res.Body.Close() stats := &statistics.RegionStats{} err = apiutil.ReadJSON(res.Body, stats) + res.Body.Close() re.NoError(err) re.Equal(data.expect.Count, stats.Count) if query != "count" { diff --git a/server/api/status.go b/server/api/status.go index ba8f3b484dd..e25f5da5287 100644 --- a/server/api/status.go +++ b/server/api/status.go @@ -38,7 +38,7 @@ func newStatusHandler(svr *server.Server, rd *render.Render) *statusHandler { // @Produce json // @Success 200 {object} versioninfo.Status // @Router /status [get] -func (h *statusHandler) GetPDStatus(w http.ResponseWriter, r *http.Request) { +func (h *statusHandler) GetPDStatus(w http.ResponseWriter, _ *http.Request) { version := versioninfo.Status{ BuildTS: versioninfo.PDBuildTS, GitHash: versioninfo.PDGitHash, diff --git a/server/api/status_test.go b/server/api/status_test.go index 065618efb6c..5444fda77b4 100644 --- a/server/api/status_test.go +++ b/server/api/status_test.go @@ -33,7 +33,7 @@ func checkStatusResponse(re *require.Assertions, body []byte) { func TestStatus(t *testing.T) { re := require.New(t) - cfgs, _, clean := mustNewCluster(re, 3) + cfgs, _, clean := mustNewCluster(re, 1) defer clean() for _, cfg := range cfgs { diff --git a/server/api/store.go b/server/api/store.go index 44e178c23fd..1d0da0e9825 100644 --- a/server/api/store.go +++ b/server/api/store.go @@ -26,140 +26,16 @@ import ( "github.com/pingcap/errcode" "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/metapb" - "github.com/tikv/pd/pkg/core" - "github.com/tikv/pd/pkg/core/constant" "github.com/tikv/pd/pkg/core/storelimit" "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/response" sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/utils/apiutil" - "github.com/tikv/pd/pkg/utils/typeutil" "github.com/tikv/pd/server" "github.com/unrolled/render" ) -// MetaStore contains meta information about a store. -type MetaStore struct { - *metapb.Store - StateName string `json:"state_name"` -} - -// SlowTrend contains slow trend information about a store. -type SlowTrend struct { - // CauseValue is the slow trend detecting raw input, it changes by the performance and pressure along time of the store. - // The value itself is not important, what matter is: - // - The comparition result from store to store. - // - The change magnitude along time (represented by CauseRate). - // Currently it's one of store's internal latency (duration of waiting in the task queue of raftstore.store). - CauseValue float64 `json:"cause_value"` - // CauseRate is for mesuring the change magnitude of CauseValue of the store, - // - CauseRate > 0 means the store is become slower currently - // - CauseRate < 0 means the store is become faster currently - // - CauseRate == 0 means the store's performance and pressure does not have significant changes - CauseRate float64 `json:"cause_rate"` - // ResultValue is the current gRPC QPS of the store. - ResultValue float64 `json:"result_value"` - // ResultRate is for mesuring the change magnitude of ResultValue of the store. - ResultRate float64 `json:"result_rate"` -} - -// StoreStatus contains status about a store. -type StoreStatus struct { - Capacity typeutil.ByteSize `json:"capacity"` - Available typeutil.ByteSize `json:"available"` - UsedSize typeutil.ByteSize `json:"used_size"` - LeaderCount int `json:"leader_count"` - LeaderWeight float64 `json:"leader_weight"` - LeaderScore float64 `json:"leader_score"` - LeaderSize int64 `json:"leader_size"` - RegionCount int `json:"region_count"` - RegionWeight float64 `json:"region_weight"` - RegionScore float64 `json:"region_score"` - RegionSize int64 `json:"region_size"` - LearnerCount int `json:"learner_count,omitempty"` - WitnessCount int `json:"witness_count,omitempty"` - SlowScore uint64 `json:"slow_score,omitempty"` - SlowTrend *SlowTrend `json:"slow_trend,omitempty"` - SendingSnapCount uint32 `json:"sending_snap_count,omitempty"` - ReceivingSnapCount uint32 `json:"receiving_snap_count,omitempty"` - IsBusy bool `json:"is_busy,omitempty"` - StartTS *time.Time `json:"start_ts,omitempty"` - LastHeartbeatTS *time.Time `json:"last_heartbeat_ts,omitempty"` - Uptime *typeutil.Duration `json:"uptime,omitempty"` -} - -// StoreInfo contains information about a store. -type StoreInfo struct { - Store *MetaStore `json:"store"` - Status *StoreStatus `json:"status"` -} - -const ( - disconnectedName = "Disconnected" - downStateName = "Down" -) - -func newStoreInfo(opt *sc.ScheduleConfig, store *core.StoreInfo) *StoreInfo { - var slowTrend *SlowTrend - coreSlowTrend := store.GetSlowTrend() - if coreSlowTrend != nil { - slowTrend = &SlowTrend{coreSlowTrend.CauseValue, coreSlowTrend.CauseRate, coreSlowTrend.ResultValue, coreSlowTrend.ResultRate} - } - s := &StoreInfo{ - Store: &MetaStore{ - Store: store.GetMeta(), - StateName: store.GetState().String(), - }, - Status: &StoreStatus{ - Capacity: typeutil.ByteSize(store.GetCapacity()), - Available: typeutil.ByteSize(store.GetAvailable()), - UsedSize: typeutil.ByteSize(store.GetUsedSize()), - LeaderCount: store.GetLeaderCount(), - LeaderWeight: store.GetLeaderWeight(), - LeaderScore: store.LeaderScore(constant.StringToSchedulePolicy(opt.LeaderSchedulePolicy), 0), - LeaderSize: store.GetLeaderSize(), - RegionCount: store.GetRegionCount(), - RegionWeight: store.GetRegionWeight(), - RegionScore: store.RegionScore(opt.RegionScoreFormulaVersion, opt.HighSpaceRatio, opt.LowSpaceRatio, 0), - RegionSize: store.GetRegionSize(), - LearnerCount: store.GetLearnerCount(), - WitnessCount: store.GetWitnessCount(), - SlowScore: store.GetSlowScore(), - SlowTrend: slowTrend, - SendingSnapCount: store.GetSendingSnapCount(), - ReceivingSnapCount: store.GetReceivingSnapCount(), - IsBusy: store.IsBusy(), - }, - } - - if store.GetStoreStats() != nil { - startTS := store.GetStartTime() - s.Status.StartTS = &startTS - } - if lastHeartbeat := store.GetLastHeartbeatTS(); !lastHeartbeat.IsZero() { - s.Status.LastHeartbeatTS = &lastHeartbeat - } - if upTime := store.GetUptime(); upTime > 0 { - duration := typeutil.NewDuration(upTime) - s.Status.Uptime = &duration - } - - if store.GetState() == metapb.StoreState_Up { - if store.DownTime() > opt.MaxStoreDownTime.Duration { - s.Store.StateName = downStateName - } else if store.IsDisconnected() { - s.Store.StateName = disconnectedName - } - } - return s -} - -// StoresInfo records stores' info. -type StoresInfo struct { - Count int `json:"count"` - Stores []*StoreInfo `json:"stores"` -} - type storeHandler struct { handler *server.Handler rd *render.Render @@ -176,7 +52,7 @@ func newStoreHandler(handler *server.Handler, rd *render.Render) *storeHandler { // @Summary Get a store's information. // @Param id path integer true "Store Id" // @Produce json -// @Success 200 {object} StoreInfo +// @Success 200 {object} response.StoreInfo // @Failure 400 {string} string "The input is invalid." // @Failure 404 {string} string "The store does not exist." // @Failure 500 {string} string "PD server failed to proceed the request." @@ -196,7 +72,7 @@ func (h *storeHandler) GetStore(w http.ResponseWriter, r *http.Request) { return } - storeInfo := newStoreInfo(h.handler.GetScheduleConfig(), store) + storeInfo := response.BuildStoreInfo(h.handler.GetScheduleConfig(), store) h.rd.JSON(w, http.StatusOK, storeInfo) } @@ -382,7 +258,7 @@ func (h *storeHandler) SetStoreWeight(w http.ResponseWriter, r *http.Request) { return } - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { return } @@ -442,7 +318,7 @@ func (h *storeHandler) SetStoreLimit(w http.ResponseWriter, r *http.Request) { return } - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { return } @@ -529,7 +405,7 @@ func (h *storesHandler) RemoveTombStone(w http.ResponseWriter, r *http.Request) // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /stores/limit [post] func (h *storesHandler) SetAllStoresLimit(w http.ResponseWriter, r *http.Request) { - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { return } @@ -576,7 +452,7 @@ func (h *storesHandler) SetAllStoresLimit(w http.ResponseWriter, r *http.Request } } } else { - labelMap := input["labels"].(map[string]interface{}) + labelMap := input["labels"].(map[string]any) labels := make([]*metapb.StoreLabel, 0, len(input)) for k, v := range labelMap { labels = append(labels, &metapb.StoreLabel{ @@ -738,15 +614,15 @@ func (h *storesHandler) GetStoresProgress(w http.ResponseWriter, r *http.Request // @Summary Get all stores in the cluster. // @Param state query array true "Specify accepted store states." // @Produce json -// @Success 200 {object} StoresInfo +// @Success 200 {object} response.StoresInfo // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /stores [get] // @Deprecated Better to use /stores/check instead. func (h *storesHandler) GetAllStores(w http.ResponseWriter, r *http.Request) { rc := getCluster(r) stores := rc.GetMetaStores() - StoresInfo := &StoresInfo{ - Stores: make([]*StoreInfo, 0, len(stores)), + StoresInfo := &response.StoresInfo{ + Stores: make([]*response.StoreInfo, 0, len(stores)), } urlFilter, err := newStoreStateFilter(r.URL) @@ -764,7 +640,7 @@ func (h *storesHandler) GetAllStores(w http.ResponseWriter, r *http.Request) { return } - storeInfo := newStoreInfo(h.GetScheduleConfig(), store) + storeInfo := response.BuildStoreInfo(h.GetScheduleConfig(), store) StoresInfo.Stores = append(StoresInfo.Stores, storeInfo) } StoresInfo.Count = len(StoresInfo.Stores) @@ -776,17 +652,17 @@ func (h *storesHandler) GetAllStores(w http.ResponseWriter, r *http.Request) { // @Summary Get all stores by states in the cluster. // @Param state query array true "Specify accepted store states." // @Produce json -// @Success 200 {object} StoresInfo +// @Success 200 {object} response.StoresInfo // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /stores/check [get] func (h *storesHandler) GetStoresByState(w http.ResponseWriter, r *http.Request) { rc := getCluster(r) stores := rc.GetMetaStores() - StoresInfo := &StoresInfo{ - Stores: make([]*StoreInfo, 0, len(stores)), + StoresInfo := &response.StoresInfo{ + Stores: make([]*response.StoreInfo, 0, len(stores)), } - lowerStateName := []string{strings.ToLower(downStateName), strings.ToLower(disconnectedName)} + lowerStateName := []string{strings.ToLower(response.DownStateName), strings.ToLower(response.DisconnectedName)} for _, v := range metapb.StoreState_name { lowerStateName = append(lowerStateName, strings.ToLower(v)) } @@ -812,7 +688,7 @@ func (h *storesHandler) GetStoresByState(w http.ResponseWriter, r *http.Request) return } - storeInfo := newStoreInfo(h.GetScheduleConfig(), store) + storeInfo := response.BuildStoreInfo(h.GetScheduleConfig(), store) if queryStates != nil && !slice.Contains(queryStates, strings.ToLower(storeInfo.Store.StateName)) { continue } @@ -868,7 +744,7 @@ func (filter *storeStateFilter) filter(stores []*metapb.Store) []*metapb.Store { return ret } -func getStoreLimitType(input map[string]interface{}) ([]storelimit.Type, error) { +func getStoreLimitType(input map[string]any) ([]storelimit.Type, error) { typeNameIface, ok := input["type"] var err error if ok { diff --git a/server/api/store_test.go b/server/api/store_test.go index 9a81244cfcb..0fb970b42ed 100644 --- a/server/api/store_test.go +++ b/server/api/store_test.go @@ -30,6 +30,7 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/response" tu "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/pkg/utils/typeutil" "github.com/tikv/pd/pkg/versioninfo" @@ -50,7 +51,7 @@ func TestStoreTestSuite(t *testing.T) { suite.Run(t, new(storeTestSuite)) } -func (suite *storeTestSuite) requestStatusBody(re *require.Assertions, client *http.Client, method string, url string) int { +func requestStatusBody(re *require.Assertions, client *http.Client, method string, url string) int { req, err := http.NewRequest(method, url, http.NoBody) re.NoError(err) resp, err := client.Do(req) @@ -116,7 +117,7 @@ func (suite *storeTestSuite) TearDownSuite() { suite.cleanup() } -func checkStoresInfo(re *require.Assertions, ss []*StoreInfo, want []*metapb.Store) { +func checkStoresInfo(re *require.Assertions, ss []*response.StoreInfo, want []*metapb.Store) { re.Len(ss, len(want)) mapWant := make(map[uint64]*metapb.Store) for _, s := range want { @@ -135,32 +136,32 @@ func checkStoresInfo(re *require.Assertions, ss []*StoreInfo, want []*metapb.Sto func (suite *storeTestSuite) TestStoresList() { url := fmt.Sprintf("%s/stores", suite.urlPrefix) - info := new(StoresInfo) + info := new(response.StoresInfo) re := suite.Require() err := tu.ReadGetJSON(re, testDialClient, url, info) re.NoError(err) checkStoresInfo(re, info.Stores, suite.stores[:3]) url = fmt.Sprintf("%s/stores/check?state=up", suite.urlPrefix) - info = new(StoresInfo) + info = new(response.StoresInfo) err = tu.ReadGetJSON(re, testDialClient, url, info) re.NoError(err) checkStoresInfo(re, info.Stores, suite.stores[:2]) url = fmt.Sprintf("%s/stores/check?state=offline", suite.urlPrefix) - info = new(StoresInfo) + info = new(response.StoresInfo) err = tu.ReadGetJSON(re, testDialClient, url, info) re.NoError(err) checkStoresInfo(re, info.Stores, suite.stores[2:3]) url = fmt.Sprintf("%s/stores/check?state=tombstone", suite.urlPrefix) - info = new(StoresInfo) + info = new(response.StoresInfo) err = tu.ReadGetJSON(re, testDialClient, url, info) re.NoError(err) checkStoresInfo(re, info.Stores, suite.stores[3:]) url = fmt.Sprintf("%s/stores/check?state=tombstone&state=offline", suite.urlPrefix) - info = new(StoresInfo) + info = new(response.StoresInfo) err = tu.ReadGetJSON(re, testDialClient, url, info) re.NoError(err) checkStoresInfo(re, info.Stores, suite.stores[2:]) @@ -181,7 +182,7 @@ func (suite *storeTestSuite) TestStoresList() { re.NoError(err) url = fmt.Sprintf("%s/stores/check?state=down", suite.urlPrefix) - info = new(StoresInfo) + info = new(response.StoresInfo) err = tu.ReadGetJSON(re, testDialClient, url, info) re.NoError(err) checkStoresInfo(re, info.Stores, []*metapb.Store{store}) @@ -195,7 +196,7 @@ func (suite *storeTestSuite) TestStoresList() { re.NoError(err) url = fmt.Sprintf("%s/stores/check?state=disconnected", suite.urlPrefix) - info = new(StoresInfo) + info = new(response.StoresInfo) err = tu.ReadGetJSON(re, testDialClient, url, info) re.NoError(err) checkStoresInfo(re, info.Stores, []*metapb.Store{store}) @@ -215,20 +216,20 @@ func (suite *storeTestSuite) TestStoreGet() { }, }, ) - info := new(StoreInfo) + info := new(response.StoreInfo) err := tu.ReadGetJSON(re, testDialClient, url, info) re.NoError(err) capacity, _ := units.RAMInBytes("1.636TiB") available, _ := units.RAMInBytes("1.555TiB") re.Equal(capacity, int64(info.Status.Capacity)) re.Equal(available, int64(info.Status.Available)) - checkStoresInfo(re, []*StoreInfo{info}, suite.stores[:1]) + checkStoresInfo(re, []*response.StoreInfo{info}, suite.stores[:1]) } func (suite *storeTestSuite) TestStoreLabel() { url := fmt.Sprintf("%s/store/1", suite.urlPrefix) re := suite.Require() - var info StoreInfo + var info response.StoreInfo err := tu.ReadGetJSON(re, testDialClient, url, &info) re.NoError(err) re.Empty(info.Store.Labels) @@ -320,31 +321,31 @@ func (suite *storeTestSuite) TestStoreDelete() { } for _, testCase := range testCases { url := fmt.Sprintf("%s/store/%d", suite.urlPrefix, testCase.id) - status := suite.requestStatusBody(re, testDialClient, http.MethodDelete, url) + status := requestStatusBody(re, testDialClient, http.MethodDelete, url) re.Equal(testCase.status, status) } // store 6 origin status:offline url := fmt.Sprintf("%s/store/6", suite.urlPrefix) - store := new(StoreInfo) + store := new(response.StoreInfo) err := tu.ReadGetJSON(re, testDialClient, url, store) re.NoError(err) re.False(store.Store.PhysicallyDestroyed) re.Equal(metapb.StoreState_Offline, store.Store.State) // up store success because it is offline but not physically destroyed - status := suite.requestStatusBody(re, testDialClient, http.MethodPost, fmt.Sprintf("%s/state?state=Up", url)) + status := requestStatusBody(re, testDialClient, http.MethodPost, fmt.Sprintf("%s/state?state=Up", url)) re.Equal(http.StatusOK, status) - status = suite.requestStatusBody(re, testDialClient, http.MethodGet, url) + status = requestStatusBody(re, testDialClient, http.MethodGet, url) re.Equal(http.StatusOK, status) - store = new(StoreInfo) + store = new(response.StoreInfo) err = tu.ReadGetJSON(re, testDialClient, url, store) re.NoError(err) re.Equal(metapb.StoreState_Up, store.Store.State) re.False(store.Store.PhysicallyDestroyed) // offline store with physically destroyed - status = suite.requestStatusBody(re, testDialClient, http.MethodDelete, fmt.Sprintf("%s?force=true", url)) + status = requestStatusBody(re, testDialClient, http.MethodDelete, fmt.Sprintf("%s?force=true", url)) re.Equal(http.StatusOK, status) err = tu.ReadGetJSON(re, testDialClient, url, store) re.NoError(err) @@ -352,7 +353,7 @@ func (suite *storeTestSuite) TestStoreDelete() { re.True(store.Store.PhysicallyDestroyed) // try to up store again failed because it is physically destroyed - status = suite.requestStatusBody(re, testDialClient, http.MethodPost, fmt.Sprintf("%s/state?state=Up", url)) + status = requestStatusBody(re, testDialClient, http.MethodPost, fmt.Sprintf("%s/state?state=Up", url)) re.Equal(http.StatusBadRequest, status) // reset store 6 suite.cleanup() @@ -366,13 +367,13 @@ func (suite *storeTestSuite) TestStoreSetState() { mustPutStore(re, suite.svr, uint64(id), metapb.StoreState_Up, metapb.NodeState_Serving, nil) } url := fmt.Sprintf("%s/store/1", suite.urlPrefix) - info := StoreInfo{} + info := response.StoreInfo{} err := tu.ReadGetJSON(re, testDialClient, url, &info) re.NoError(err) re.Equal(metapb.StoreState_Up, info.Store.State) // Set to Offline. - info = StoreInfo{} + info = response.StoreInfo{} err = tu.CheckPostJSON(testDialClient, url+"/state?state=Offline", nil, tu.StatusOK(re)) re.NoError(err) err = tu.ReadGetJSON(re, testDialClient, url, &info) @@ -380,14 +381,14 @@ func (suite *storeTestSuite) TestStoreSetState() { re.Equal(metapb.StoreState_Offline, info.Store.State) // store not found - info = StoreInfo{} + info = response.StoreInfo{} err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/store/10086/state?state=Offline", nil, tu.StatusNotOK(re)) re.NoError(err) // Invalid state. invalidStates := []string{"Foo", "Tombstone"} for _, state := range invalidStates { - info = StoreInfo{} + info = response.StoreInfo{} err = tu.CheckPostJSON(testDialClient, url+"/state?state="+state, nil, tu.StatusNotOK(re)) re.NoError(err) err := tu.ReadGetJSON(re, testDialClient, url, &info) @@ -396,7 +397,7 @@ func (suite *storeTestSuite) TestStoreSetState() { } // Set back to Up. - info = StoreInfo{} + info = response.StoreInfo{} err = tu.CheckPostJSON(testDialClient, url+"/state?state=Up", nil, tu.StatusOK(re)) re.NoError(err) err = tu.ReadGetJSON(re, testDialClient, url, &info) @@ -458,16 +459,16 @@ func (suite *storeTestSuite) TestDownState() { core.SetStoreStats(&pdpb.StoreStats{}), core.SetLastHeartbeatTS(time.Now()), ) - storeInfo := newStoreInfo(suite.svr.GetScheduleConfig(), store) + storeInfo := response.BuildStoreInfo(suite.svr.GetScheduleConfig(), store) re.Equal(metapb.StoreState_Up.String(), storeInfo.Store.StateName) newStore := store.Clone(core.SetLastHeartbeatTS(time.Now().Add(-time.Minute * 2))) - storeInfo = newStoreInfo(suite.svr.GetScheduleConfig(), newStore) - re.Equal(disconnectedName, storeInfo.Store.StateName) + storeInfo = response.BuildStoreInfo(suite.svr.GetScheduleConfig(), newStore) + re.Equal(response.DisconnectedName, storeInfo.Store.StateName) newStore = store.Clone(core.SetLastHeartbeatTS(time.Now().Add(-time.Hour * 2))) - storeInfo = newStoreInfo(suite.svr.GetScheduleConfig(), newStore) - re.Equal(downStateName, storeInfo.Store.StateName) + storeInfo = response.BuildStoreInfo(suite.svr.GetScheduleConfig(), newStore) + re.Equal(response.DownStateName, storeInfo.Store.StateName) } func (suite *storeTestSuite) TestGetAllLimit() { @@ -509,7 +510,7 @@ func (suite *storeTestSuite) TestGetAllLimit() { re := suite.Require() for _, testCase := range testCases { suite.T().Logf(testCase.name) - info := make(map[uint64]interface{}, 4) + info := make(map[uint64]any, 4) err := tu.ReadGetJSON(re, testDialClient, testCase.url, &info) re.NoError(err) re.Len(info, len(testCase.expectedStores)) @@ -524,7 +525,7 @@ func (suite *storeTestSuite) TestStoreLimitTTL() { re := suite.Require() // add peer url := fmt.Sprintf("%s/store/1/limit?ttlSecond=%v", suite.urlPrefix, 5) - data := map[string]interface{}{ + data := map[string]any{ "type": "add-peer", "rate": 999, } @@ -533,7 +534,7 @@ func (suite *storeTestSuite) TestStoreLimitTTL() { err = tu.CheckPostJSON(testDialClient, url, postData, tu.StatusOK(re)) re.NoError(err) // remove peer - data = map[string]interface{}{ + data = map[string]any{ "type": "remove-peer", "rate": 998, } @@ -543,7 +544,7 @@ func (suite *storeTestSuite) TestStoreLimitTTL() { re.NoError(err) // all store limit add peer url = fmt.Sprintf("%s/stores/limit?ttlSecond=%v", suite.urlPrefix, 3) - data = map[string]interface{}{ + data = map[string]any{ "type": "add-peer", "rate": 997, } @@ -552,7 +553,7 @@ func (suite *storeTestSuite) TestStoreLimitTTL() { err = tu.CheckPostJSON(testDialClient, url, postData, tu.StatusOK(re)) re.NoError(err) // all store limit remove peer - data = map[string]interface{}{ + data = map[string]any{ "type": "remove-peer", "rate": 996, } diff --git a/server/api/trend.go b/server/api/trend.go index d75086d267d..6a32b875f89 100644 --- a/server/api/trend.go +++ b/server/api/trend.go @@ -18,6 +18,7 @@ import ( "net/http" "time" + "github.com/tikv/pd/pkg/response" "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/statistics/utils" "github.com/tikv/pd/pkg/utils/apiutil" @@ -136,7 +137,7 @@ func (h *trendHandler) getTrendStores() ([]trendStore, error) { } trendStores := make([]trendStore, 0, len(stores)) for _, store := range stores { - info := newStoreInfo(h.svr.GetScheduleConfig(), store) + info := response.BuildStoreInfo(h.svr.GetScheduleConfig(), store) s := trendStore{ ID: info.Store.GetId(), Address: info.Store.GetAddress(), @@ -149,14 +150,14 @@ func (h *trendHandler) getTrendStores() ([]trendStore, error) { LastHeartbeatTS: info.Status.LastHeartbeatTS, Uptime: info.Status.Uptime, } - s.HotReadFlow, s.HotReadRegionFlows = h.getStoreFlow(hotRead.AsLeader, store.GetID()) - s.HotWriteFlow, s.HotWriteRegionFlows = h.getStoreFlow(hotWrite.AsPeer, store.GetID()) + s.HotReadFlow, s.HotReadRegionFlows = getStoreFlow(hotRead.AsLeader, store.GetID()) + s.HotWriteFlow, s.HotWriteRegionFlows = getStoreFlow(hotWrite.AsPeer, store.GetID()) trendStores = append(trendStores, s) } return trendStores, nil } -func (h *trendHandler) getStoreFlow(stats statistics.StoreHotPeersStat, storeID uint64) (storeByteFlow float64, regionByteFlows []float64) { +func getStoreFlow(stats statistics.StoreHotPeersStat, storeID uint64) (storeByteFlow float64, regionByteFlows []float64) { if stats == nil { return } diff --git a/server/api/unsafe_operation.go b/server/api/unsafe_operation.go index 11fd59933eb..dc41ec336e3 100644 --- a/server/api/unsafe_operation.go +++ b/server/api/unsafe_operation.go @@ -43,10 +43,10 @@ func newUnsafeOperationHandler(svr *server.Server, rd *render.Render) *unsafeOpe // Success 200 {string} string "Request has been accepted." // Failure 400 {string} string "The input is invalid." // Failure 500 {string} string "PD server failed to proceed the request." -// @Router /admin/unsafe/remove-failed-stores [POST] +// @Router /admin/unsafe/remove-failed-stores [post] func (h *unsafeOperationHandler) RemoveFailedStores(w http.ResponseWriter, r *http.Request) { rc := getCluster(r) - var input map[string]interface{} + var input map[string]any if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { h.rd.JSON(w, http.StatusBadRequest, err.Error()) return @@ -81,7 +81,7 @@ func (h *unsafeOperationHandler) RemoveFailedStores(w http.ResponseWriter, r *ht // @Summary Show the current status of failed stores removal. // @Produce json // Success 200 {object} []StageOutput -// @Router /admin/unsafe/remove-failed-stores/show [GET] +// @Router /admin/unsafe/remove-failed-stores/show [get] func (h *unsafeOperationHandler) GetFailedStoresRemovalStatus(w http.ResponseWriter, r *http.Request) { rc := getCluster(r) h.rd.JSON(w, http.StatusOK, rc.GetUnsafeRecoveryController().Show()) diff --git a/server/api/unsafe_operation_test.go b/server/api/unsafe_operation_test.go index 37c1506f6a7..e708a93abfc 100644 --- a/server/api/unsafe_operation_test.go +++ b/server/api/unsafe_operation_test.go @@ -56,25 +56,25 @@ func (suite *unsafeOperationTestSuite) TearDownTest() { func (suite *unsafeOperationTestSuite) TestRemoveFailedStores() { re := suite.Require() - input := map[string]interface{}{"stores": []uint64{}} + input := map[string]any{"stores": []uint64{}} data, _ := json.Marshal(input) err := tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/remove-failed-stores", data, tu.StatusNotOK(re), tu.StringEqual(re, "\"[PD:unsaferecovery:ErrUnsafeRecoveryInvalidInput]invalid input no store specified\"\n")) re.NoError(err) - input = map[string]interface{}{"stores": []string{"abc", "def"}} + input = map[string]any{"stores": []string{"abc", "def"}} data, _ = json.Marshal(input) err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/remove-failed-stores", data, tu.StatusNotOK(re), tu.StringEqual(re, "\"Store ids are invalid\"\n")) re.NoError(err) - input = map[string]interface{}{"stores": []uint64{1, 2}} + input = map[string]any{"stores": []uint64{1, 2}} data, _ = json.Marshal(input) err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/remove-failed-stores", data, tu.StatusNotOK(re), tu.StringEqual(re, "\"[PD:unsaferecovery:ErrUnsafeRecoveryInvalidInput]invalid input store 2 doesn't exist\"\n")) re.NoError(err) - input = map[string]interface{}{"stores": []uint64{1}} + input = map[string]any{"stores": []uint64{1}} data, _ = json.Marshal(input) err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/remove-failed-stores", data, tu.StatusOK(re)) re.NoError(err) @@ -88,13 +88,13 @@ func (suite *unsafeOperationTestSuite) TestRemoveFailedStores() { func (suite *unsafeOperationTestSuite) TestRemoveFailedStoresAutoDetect() { re := suite.Require() - input := map[string]interface{}{"auto-detect": false} + input := map[string]any{"auto-detect": false} data, _ := json.Marshal(input) err := tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/remove-failed-stores", data, tu.StatusNotOK(re), tu.StringEqual(re, "\"Store ids are invalid\"\n")) re.NoError(err) - input = map[string]interface{}{"auto-detect": true} + input = map[string]any{"auto-detect": true} data, _ = json.Marshal(input) err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/remove-failed-stores", data, tu.StatusOK(re)) re.NoError(err) diff --git a/server/api/version.go b/server/api/version.go index 5bf3aeb272c..99a8a7376e5 100644 --- a/server/api/version.go +++ b/server/api/version.go @@ -43,7 +43,7 @@ func newVersionHandler(rd *render.Render) *versionHandler { // @Produce json // @Success 200 {object} version // @Router /version [get] -func (h *versionHandler) GetVersion(w http.ResponseWriter, r *http.Request) { +func (h *versionHandler) GetVersion(w http.ResponseWriter, _ *http.Request) { version := &version{ Version: versioninfo.PDReleaseVersion, } diff --git a/server/apiv2/handlers/micro_service.go b/server/apiv2/handlers/micro_service.go index 3c2be3748d4..fd44665530f 100644 --- a/server/apiv2/handlers/micro_service.go +++ b/server/apiv2/handlers/micro_service.go @@ -26,30 +26,52 @@ import ( // RegisterMicroService registers microservice handler to the router. func RegisterMicroService(r *gin.RouterGroup) { router := r.Group("ms") - router.Use(middlewares.BootstrapChecker()) router.GET("members/:service", GetMembers) + router.GET("primary/:service", GetPrimary) } // GetMembers gets all members of the cluster for the specified service. // @Tags members // @Summary Get all members of the cluster for the specified service. // @Produce json -// @Success 200 {object} []string +// @Success 200 {object} []discovery.ServiceRegistryEntry // @Router /ms/members/{service} [get] func GetMembers(c *gin.Context) { svr := c.MustGet(middlewares.ServerContextKey).(*server.Server) if !svr.IsAPIServiceMode() { - c.AbortWithStatusJSON(http.StatusServiceUnavailable, "not support micro service") + c.AbortWithStatusJSON(http.StatusNotFound, "not support micro service") return } if service := c.Param("service"); len(service) > 0 { - addrs, err := discovery.GetMSMembers(service, svr.GetClient()) + entries, err := discovery.GetMSMembers(service, svr.GetClient()) if err != nil { c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error()) return } - c.IndentedJSON(http.StatusOK, addrs) + c.IndentedJSON(http.StatusOK, entries) + return + } + + c.AbortWithStatusJSON(http.StatusInternalServerError, "please specify service") +} + +// GetPrimary gets the primary member of the specified service. +// @Tags primary +// @Summary Get the primary member of the specified service. +// @Produce json +// @Success 200 {object} string +// @Router /ms/primary/{service} [get] +func GetPrimary(c *gin.Context) { + svr := c.MustGet(middlewares.ServerContextKey).(*server.Server) + if !svr.IsAPIServiceMode() { + c.AbortWithStatusJSON(http.StatusNotFound, "not support micro service") + return + } + + if service := c.Param("service"); len(service) > 0 { + addr, _ := svr.GetServicePrimaryAddr(c.Request.Context(), service) + c.IndentedJSON(http.StatusOK, addr) return } diff --git a/server/apiv2/handlers/tso_keyspace_group.go b/server/apiv2/handlers/tso_keyspace_group.go index a9f042687f6..ed3d37c27a9 100644 --- a/server/apiv2/handlers/tso_keyspace_group.go +++ b/server/apiv2/handlers/tso_keyspace_group.go @@ -413,8 +413,16 @@ func AllocNodesForKeyspaceGroup(c *gin.Context) { c.AbortWithStatusJSON(http.StatusBadRequest, "existed replica is larger than the new replica") return } + + // check if nodes exist + existMembers := make(map[string]struct{}) + for _, member := range keyspaceGroup.Members { + if exist, addr := manager.IsExistNode(member.Address); exist { + existMembers[addr] = struct{}{} + } + } // get the nodes - nodes, err := manager.AllocNodesForKeyspaceGroup(id, allocParams.Replica) + nodes, err := manager.AllocNodesForKeyspaceGroup(id, existMembers, allocParams.Replica) if err != nil { c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error()) return @@ -453,14 +461,9 @@ func SetNodesForKeyspaceGroup(c *gin.Context) { c.AbortWithStatusJSON(http.StatusBadRequest, "keyspace group does not exist") return } - // check if nodes is less than default replica count - if len(setParams.Nodes) < utils.DefaultKeyspaceGroupReplicaCount { - c.AbortWithStatusJSON(http.StatusBadRequest, "invalid num of nodes") - return - } // check if node exists for _, node := range setParams.Nodes { - if !manager.IsExistNode(node) { + if exist, _ := manager.IsExistNode(node); !exist { c.AbortWithStatusJSON(http.StatusBadRequest, "node does not exist") return } @@ -512,7 +515,7 @@ func SetPriorityForKeyspaceGroup(c *gin.Context) { // check if node exists members := kg.Members if slice.NoneOf(members, func(i int) bool { - return members[i].Address == node + return members[i].CompareAddress(node) }) { c.AbortWithStatusJSON(http.StatusBadRequest, "tso node does not exist in the keyspace group") } diff --git a/server/apiv2/middlewares/redirector.go b/server/apiv2/middlewares/redirector.go index 37c06de1585..9c2c4081175 100644 --- a/server/apiv2/middlewares/redirector.go +++ b/server/apiv2/middlewares/redirector.go @@ -43,8 +43,8 @@ func Redirector() gin.HandlerFunc { // Prevent more than one redirection. if name := c.Request.Header.Get(apiutil.PDRedirectorHeader); len(name) != 0 { - log.Error("redirect but server is not leader", zap.String("from", name), zap.String("server", svr.Name()), errs.ZapError(errs.ErrRedirect)) - c.AbortWithStatusJSON(http.StatusInternalServerError, errs.ErrRedirect.FastGenByArgs().Error()) + log.Error("redirect but server is not leader", zap.String("from", name), zap.String("server", svr.Name()), errs.ZapError(errs.ErrRedirectToNotLeader)) + c.AbortWithStatusJSON(http.StatusInternalServerError, errs.ErrRedirectToNotLeader.FastGenByArgs().Error()) return } diff --git a/server/apiv2/router.go b/server/apiv2/router.go index fd3ce38c0e4..781a1cca87b 100644 --- a/server/apiv2/router.go +++ b/server/apiv2/router.go @@ -17,18 +17,14 @@ package apiv2 import ( "context" "net/http" - "sync" "github.com/gin-gonic/gin" - "github.com/joho/godotenv" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/server" "github.com/tikv/pd/server/apiv2/handlers" "github.com/tikv/pd/server/apiv2/middlewares" ) -var once sync.Once - var group = apiutil.APIServiceGroup{ Name: "core", IsCore: true, @@ -49,12 +45,6 @@ const apiV2Prefix = "/pd/api/v2/" // @license.url http://www.apache.org/licenses/LICENSE-2.0.html // @BasePath /pd/api/v2 func NewV2Handler(_ context.Context, svr *server.Server) (http.Handler, apiutil.APIServiceGroup, error) { - once.Do(func() { - // See https://github.com/pingcap/tidb-dashboard/blob/f8ecb64e3d63f4ed91c3dca7a04362418ade01d8/pkg/apiserver/apiserver.go#L84 - // These global modification will be effective only for the first invoke. - _ = godotenv.Load() - gin.SetMode(gin.ReleaseMode) - }) router := gin.New() router.Use(func(c *gin.Context) { c.Set(middlewares.ServerContextKey, svr) diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index abb8af80a92..534d8361b2a 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -21,6 +21,7 @@ import ( "io" "math" "net/http" + "runtime" "strconv" "strings" "sync" @@ -44,6 +45,7 @@ import ( mcsutils "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/memory" "github.com/tikv/pd/pkg/progress" + "github.com/tikv/pd/pkg/ratelimit" "github.com/tikv/pd/pkg/replication" sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/schedule/hbstream" @@ -103,6 +105,11 @@ const ( // minSnapshotDurationSec is the minimum duration that a store can tolerate. // It should enlarge the limiter if the snapshot's duration is less than this value. minSnapshotDurationSec = 5 + + // heartbeat relative const + heartbeatTaskRunner = "heartbeat-async" + miscTaskRunner = "misc-async" + logTaskRunner = "log-async" ) // Server is the interface for cluster. @@ -136,6 +143,8 @@ type RaftCluster struct { ctx context.Context cancel context.CancelFunc + *core.BasicCluster // cached cluster info + etcdClient *clientv3.Client httpClient *http.Client @@ -152,7 +161,6 @@ type RaftCluster struct { // This below fields are all read-only, we cannot update itself after the raft cluster starts. clusterID uint64 id id.Allocator - core *core.BasicCluster // cached cluster info opt *config.PersistOptions limiter *StoreLimiter *schedulingController @@ -166,6 +174,13 @@ type RaftCluster struct { keyspaceGroupManager *keyspace.GroupManager independentServices sync.Map hbstreams *hbstream.HeartbeatStreams + + // heartbeatRunner is used to process the subtree update task asynchronously. + heartbeatRunner ratelimit.Runner + // miscRunner is used to process the statistics and persistent tasks asynchronously. + miscRunner ratelimit.Runner + // logRunner is used to process the log asynchronously. + logRunner ratelimit.Runner } // Status saves some state information. @@ -182,13 +197,16 @@ type Status struct { func NewRaftCluster(ctx context.Context, clusterID uint64, basicCluster *core.BasicCluster, storage storage.Storage, regionSyncer *syncer.RegionSyncer, etcdClient *clientv3.Client, httpClient *http.Client) *RaftCluster { return &RaftCluster{ - serverCtx: ctx, - clusterID: clusterID, - regionSyncer: regionSyncer, - httpClient: httpClient, - etcdClient: etcdClient, - core: basicCluster, - storage: storage, + serverCtx: ctx, + clusterID: clusterID, + regionSyncer: regionSyncer, + httpClient: httpClient, + etcdClient: etcdClient, + BasicCluster: basicCluster, + storage: storage, + heartbeatRunner: ratelimit.NewConcurrentRunner(heartbeatTaskRunner, ratelimit.NewConcurrencyLimiter(uint64(runtime.NumCPU()*2)), time.Minute), + miscRunner: ratelimit.NewConcurrentRunner(miscTaskRunner, ratelimit.NewConcurrencyLimiter(uint64(runtime.NumCPU()*2)), time.Minute), + logRunner: ratelimit.NewConcurrentRunner(logTaskRunner, ratelimit.NewConcurrencyLimiter(uint64(runtime.NumCPU()*2)), time.Minute), } } @@ -234,10 +252,10 @@ func (c *RaftCluster) LoadClusterStatus() (*Status, error) { } func (c *RaftCluster) isInitialized() bool { - if c.core.GetTotalRegionCount() > 1 { + if c.GetTotalRegionCount() > 1 { return true } - region := c.core.GetRegionByKey(nil) + region := c.GetRegionByKey(nil) return region != nil && len(region.GetVoters()) >= int(c.opt.GetReplicationConfig().MaxReplicas) && len(region.GetPendingPeers()) == 0 @@ -278,7 +296,7 @@ func (c *RaftCluster) InitCluster( return err } } - c.schedulingController = newSchedulingController(c.ctx, c.core, c.opt, c.ruleManager) + c.schedulingController = newSchedulingController(c.ctx, c.BasicCluster, c.opt, c.ruleManager) return nil } @@ -291,7 +309,6 @@ func (c *RaftCluster) Start(s Server) error { log.Warn("raft cluster has already been started") return nil } - c.isAPIServiceMode = s.IsAPIServiceMode() err := c.InitCluster(s.GetAllocator(), s.GetPersistOptions(), s.GetHBStreams(), s.GetKeyspaceGroupManager()) if err != nil { @@ -347,6 +364,9 @@ func (c *RaftCluster) Start(s Server) error { go c.startGCTuner() c.running = true + c.heartbeatRunner.Start() + c.miscRunner.Start() + c.logRunner.Start() return nil } @@ -625,9 +645,9 @@ func (c *RaftCluster) LoadClusterInfo() (*RaftCluster, error) { return nil, nil } - c.core.ResetStores() + c.ResetStores() start := time.Now() - if err := c.storage.LoadStores(c.core.PutStore); err != nil { + if err := c.storage.LoadStores(c.PutStore); err != nil { return nil, err } log.Info("load stores", @@ -638,11 +658,11 @@ func (c *RaftCluster) LoadClusterInfo() (*RaftCluster, error) { start = time.Now() // used to load region from kv storage to cache storage. - if err = storage.TryLoadRegionsOnce(c.ctx, c.storage, c.core.CheckAndPutRegion); err != nil { + if err = storage.TryLoadRegionsOnce(c.ctx, c.storage, c.CheckAndPutRegion); err != nil { return nil, err } log.Info("load regions", - zap.Int("count", c.core.GetTotalRegionCount()), + zap.Int("count", c.GetTotalRegionCount()), zap.Duration("cost", time.Since(start)), ) @@ -710,7 +730,7 @@ func (c *RaftCluster) runUpdateStoreStats() { case <-ticker.C: // Update related stores. start := time.Now() - c.core.UpdateAllStoreStatus() + c.UpdateAllStoreStatus() updateStoreStatsGauge.Set(time.Since(start).Seconds()) } } @@ -740,6 +760,9 @@ func (c *RaftCluster) Stop() { if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { c.stopSchedulingJobs() } + c.heartbeatRunner.Stop() + c.miscRunner.Stop() + c.logRunner.Stop() c.Unlock() c.wg.Wait() @@ -829,6 +852,14 @@ func (c *RaftCluster) SetPDServerConfig(cfg *config.PDServerConfig) { c.opt.SetPDServerConfig(cfg) } +// IsSchedulingHalted returns whether the scheduling is halted. +// Currently, the PD scheduling is halted when: +// - The `HaltScheduling` persist option is set to true. +// - Online unsafe recovery is running. +func (c *RaftCluster) IsSchedulingHalted() bool { + return c.opt.IsSchedulingHalted() || c.unsafeRecoveryController.IsRunning() +} + // GetUnsafeRecoveryController returns the unsafe recovery controller. func (c *RaftCluster) GetUnsafeRecoveryController() *unsaferecovery.Controller { return c.unsafeRecoveryController @@ -838,8 +869,6 @@ func (c *RaftCluster) GetUnsafeRecoveryController() *unsaferecovery.Controller { func (c *RaftCluster) HandleStoreHeartbeat(heartbeat *pdpb.StoreHeartbeatRequest, resp *pdpb.StoreHeartbeatResponse) error { stats := heartbeat.GetStats() storeID := stats.GetStoreId() - c.Lock() - defer c.Unlock() store := c.GetStore(storeID) if store == nil { return errors.Errorf("store %v not found", storeID) @@ -887,10 +916,10 @@ func (c *RaftCluster) HandleStoreHeartbeat(heartbeat *pdpb.StoreHeartbeatRequest newStore = newStore.Clone(core.SetLastPersistTime(nowTime)) } } - if store := c.core.GetStore(storeID); store != nil { + if store := c.GetStore(storeID); store != nil { statistics.UpdateStoreHeartbeatMetrics(store) } - c.core.PutStore(newStore) + c.PutStore(newStore) var ( regions map[uint64]*core.RegionInfo interval uint64 @@ -929,8 +958,13 @@ func (c *RaftCluster) HandleStoreHeartbeat(heartbeat *pdpb.StoreHeartbeatRequest utils.RegionWriteKeys: 0, utils.RegionWriteQueryNum: 0, } - peerInfo := core.NewPeerInfo(peer, loads, interval) - c.hotStat.CheckReadAsync(statistics.NewCheckPeerTask(peerInfo, region)) + checkReadPeerTask := func(cache *statistics.HotPeerCache) { + stats := cache.CheckPeerFlow(region, []*metapb.Peer{peer}, loads, interval) + for _, stat := range stats { + cache.UpdateStat(stat) + } + } + c.hotStat.CheckReadAsync(checkReadPeerTask) } } for _, stat := range stats.GetSnapshotStats() { @@ -953,14 +987,20 @@ func (c *RaftCluster) HandleStoreHeartbeat(heartbeat *pdpb.StoreHeartbeatRequest } if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { // Here we will compare the reported regions with the previous hot peers to decide if it is still hot. - c.hotStat.CheckReadAsync(statistics.NewCollectUnReportedPeerTask(storeID, regions, interval)) + collectUnReportedPeerTask := func(cache *statistics.HotPeerCache) { + stats := cache.CheckColdPeer(storeID, regions, interval) + for _, stat := range stats { + cache.UpdateStat(stat) + } + } + c.hotStat.CheckReadAsync(collectUnReportedPeerTask) } return nil } // processReportBuckets update the bucket information. func (c *RaftCluster) processReportBuckets(buckets *metapb.Buckets) error { - region := c.core.GetRegion(buckets.GetRegionId()) + region := c.GetRegion(buckets.GetRegionId()) if region == nil { regionCacheMissCounter.Inc() return errors.Errorf("region %v not found", buckets.GetRegionId()) @@ -988,37 +1028,63 @@ func (c *RaftCluster) processReportBuckets(buckets *metapb.Buckets) error { } var regionGuide = core.GenerateRegionGuideFunc(true) +var syncRunner = ratelimit.NewSyncRunner() // processRegionHeartbeat updates the region information. -func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { - origin, _, err := c.core.PreCheckPutRegion(region) +func (c *RaftCluster) processRegionHeartbeat(ctx *core.MetaProcessContext, region *core.RegionInfo) error { + tracer := ctx.Tracer + origin, _, err := c.PreCheckPutRegion(region) + tracer.OnPreCheckFinished() if err != nil { return err } + region.Inherit(origin, c.GetStoreConfig().IsEnableRegionBucket()) if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { cluster.HandleStatsAsync(c, region) } - + tracer.OnAsyncHotStatsFinished() hasRegionStats := c.regionStats != nil // Save to storage if meta is updated, except for flashback. // Save to cache if meta or leader is updated, or contains any down/pending peer. - // Mark isNew if the region in cache does not have leader. - isNew, saveKV, saveCache, needSync := regionGuide(region, origin) - if !saveKV && !saveCache && !isNew { + saveKV, saveCache, needSync, retained := regionGuide(ctx, region, origin) + tracer.OnRegionGuideFinished() + regionID := region.GetID() + if !saveKV && !saveCache { // Due to some config changes need to update the region stats as well, // so we do some extra checks here. + // TODO: Due to the accuracy requirements of the API "/regions/check/xxx", + // region stats needs to be collected in API mode. + // We need to think of a better way to reduce this part of the cost in the future. if hasRegionStats && c.regionStats.RegionStatsNeedUpdate(region) { - c.regionStats.Observe(region, c.getRegionStoresLocked(region)) + _ = ctx.MiscRunner.RunTask( + regionID, + ratelimit.ObserveRegionStatsAsync, + func() { + if c.regionStats.RegionStatsNeedUpdate(region) { + cluster.Collect(c, region, hasRegionStats) + } + }, + ) + } + // region is not updated to the subtree. + if origin.GetRef() < 2 { + _ = ctx.TaskRunner.RunTask( + regionID, + ratelimit.UpdateSubTree, + func() { + c.CheckAndPutSubTree(region) + }, + ratelimit.WithRetained(true), + ) } return nil } - failpoint.Inject("concurrentRegionHeartbeat", func() { time.Sleep(500 * time.Millisecond) }) - + tracer.OnSaveCacheBegin() var overlaps []*core.RegionInfo if saveCache { failpoint.Inject("decEpoch", func() { @@ -1028,42 +1094,73 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { // check its validation again here. // // However, it can't solve the race condition of concurrent heartbeats from the same region. - if overlaps, err = c.core.AtomicCheckAndPutRegion(region); err != nil { + if overlaps, err = c.CheckAndPutRootTree(ctx, region); err != nil { + tracer.OnSaveCacheFinished() return err } + _ = ctx.TaskRunner.RunTask( + regionID, + ratelimit.UpdateSubTree, + func() { + c.CheckAndPutSubTree(region) + }, + ratelimit.WithRetained(retained), + ) + tracer.OnUpdateSubTreeFinished() + if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { - cluster.HandleOverlaps(c, overlaps) + _ = ctx.MiscRunner.RunTask( + regionID, + ratelimit.HandleOverlaps, + func() { + cluster.HandleOverlaps(c, overlaps) + }, + ) } regionUpdateCacheEventCounter.Inc() } - isPrepared := true - if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { - isPrepared = c.IsPrepared() - } - cluster.Collect(c, region, c.GetRegionStores(region), hasRegionStats, isNew, isPrepared) + tracer.OnSaveCacheFinished() + // handle region stats + _ = ctx.MiscRunner.RunTask( + regionID, + ratelimit.CollectRegionStatsAsync, + func() { + // TODO: Due to the accuracy requirements of the API "/regions/check/xxx", + // region stats needs to be collected in API mode. + // We need to think of a better way to reduce this part of the cost in the future. + cluster.Collect(c, region, hasRegionStats) + }, + ) + tracer.OnCollectRegionStatsFinished() if c.storage != nil { - // If there are concurrent heartbeats from the same region, the last write will win even if - // writes to storage in the critical area. So don't use mutex to protect it. - // Not successfully saved to storage is not fatal, it only leads to longer warm-up - // after restart. Here we only log the error then go on updating cache. - for _, item := range overlaps { - if err := c.storage.DeleteRegion(item.GetMeta()); err != nil { - log.Error("failed to delete region from storage", - zap.Uint64("region-id", item.GetID()), - logutil.ZapRedactStringer("region-meta", core.RegionToHexMeta(item.GetMeta())), - errs.ZapError(err)) - } - } if saveKV { - if err := c.storage.SaveRegion(region.GetMeta()); err != nil { - log.Error("failed to save region to storage", - zap.Uint64("region-id", region.GetID()), - logutil.ZapRedactStringer("region-meta", core.RegionToHexMeta(region.GetMeta())), - errs.ZapError(err)) - } - regionUpdateKVEventCounter.Inc() + _ = ctx.MiscRunner.RunTask( + regionID, + ratelimit.SaveRegionToKV, + func() { + // If there are concurrent heartbeats from the same region, the last write will win even if + // writes to storage in the critical area. So don't use mutex to protect it. + // Not successfully saved to storage is not fatal, it only leads to longer warm-up + // after restart. Here we only log the error then go on updating cache. + for _, item := range overlaps { + if err := c.storage.DeleteRegion(item.GetMeta()); err != nil { + log.Error("failed to delete region from storage", + zap.Uint64("region-id", item.GetID()), + logutil.ZapRedactStringer("region-meta", core.RegionToHexMeta(item.GetMeta())), + errs.ZapError(err)) + } + } + if err := c.storage.SaveRegion(region.GetMeta()); err != nil { + log.Error("failed to save region to storage", + zap.Uint64("region-id", region.GetID()), + logutil.ZapRedactStringer("region-meta", core.RegionToHexMeta(region.GetMeta())), + errs.ZapError(err)) + } + regionUpdateKVEventCounter.Inc() + }, + ) } } @@ -1073,7 +1170,6 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { default: } } - return nil } @@ -1089,153 +1185,7 @@ func (c *RaftCluster) putMetaLocked(meta *metapb.Cluster) error { // GetBasicCluster returns the basic cluster. func (c *RaftCluster) GetBasicCluster() *core.BasicCluster { - return c.core -} - -// GetRegionByKey gets regionInfo by region key from cluster. -func (c *RaftCluster) GetRegionByKey(regionKey []byte) *core.RegionInfo { - return c.core.GetRegionByKey(regionKey) -} - -// GetPrevRegionByKey gets previous region and leader peer by the region key from cluster. -func (c *RaftCluster) GetPrevRegionByKey(regionKey []byte) *core.RegionInfo { - return c.core.GetPrevRegionByKey(regionKey) -} - -// ScanRegions scans region with start key, until the region contains endKey, or -// total number greater than limit. -func (c *RaftCluster) ScanRegions(startKey, endKey []byte, limit int) []*core.RegionInfo { - return c.core.ScanRegions(startKey, endKey, limit) -} - -// GetRegion searches for a region by ID. -func (c *RaftCluster) GetRegion(regionID uint64) *core.RegionInfo { - return c.core.GetRegion(regionID) -} - -// GetMetaRegions gets regions from cluster. -func (c *RaftCluster) GetMetaRegions() []*metapb.Region { - return c.core.GetMetaRegions() -} - -// GetRegions returns all regions' information in detail. -func (c *RaftCluster) GetRegions() []*core.RegionInfo { - return c.core.GetRegions() -} - -// ValidRegion is used to decide if the region is valid. -func (c *RaftCluster) ValidRegion(region *metapb.Region) error { - return c.core.ValidRegion(region) -} - -// GetTotalRegionCount returns total count of regions -func (c *RaftCluster) GetTotalRegionCount() int { - return c.core.GetTotalRegionCount() -} - -// GetStoreRegions returns all regions' information with a given storeID. -func (c *RaftCluster) GetStoreRegions(storeID uint64) []*core.RegionInfo { - return c.core.GetStoreRegions(storeID) -} - -// RandLeaderRegions returns some random regions that has leader on the store. -func (c *RaftCluster) RandLeaderRegions(storeID uint64, ranges []core.KeyRange) []*core.RegionInfo { - return c.core.RandLeaderRegions(storeID, ranges) -} - -// RandFollowerRegions returns some random regions that has a follower on the store. -func (c *RaftCluster) RandFollowerRegions(storeID uint64, ranges []core.KeyRange) []*core.RegionInfo { - return c.core.RandFollowerRegions(storeID, ranges) -} - -// RandPendingRegions returns some random regions that has a pending peer on the store. -func (c *RaftCluster) RandPendingRegions(storeID uint64, ranges []core.KeyRange) []*core.RegionInfo { - return c.core.RandPendingRegions(storeID, ranges) -} - -// RandLearnerRegions returns some random regions that has a learner peer on the store. -func (c *RaftCluster) RandLearnerRegions(storeID uint64, ranges []core.KeyRange) []*core.RegionInfo { - return c.core.RandLearnerRegions(storeID, ranges) -} - -// RandWitnessRegions returns some random regions that has a witness peer on the store. -func (c *RaftCluster) RandWitnessRegions(storeID uint64, ranges []core.KeyRange) []*core.RegionInfo { - return c.core.RandWitnessRegions(storeID, ranges) -} - -// GetLeaderStore returns all stores that contains the region's leader peer. -func (c *RaftCluster) GetLeaderStore(region *core.RegionInfo) *core.StoreInfo { - return c.core.GetLeaderStore(region) -} - -// GetNonWitnessVoterStores returns all stores that contains the region's non-witness voter peer. -func (c *RaftCluster) GetNonWitnessVoterStores(region *core.RegionInfo) []*core.StoreInfo { - return c.core.GetNonWitnessVoterStores(region) -} - -// GetFollowerStores returns all stores that contains the region's follower peer. -func (c *RaftCluster) GetFollowerStores(region *core.RegionInfo) []*core.StoreInfo { - return c.core.GetFollowerStores(region) -} - -// GetRegionStores returns all stores that contains the region's peer. -func (c *RaftCluster) GetRegionStores(region *core.RegionInfo) []*core.StoreInfo { - return c.core.GetRegionStores(region) -} - -// GetStoreCount returns the count of stores. -func (c *RaftCluster) GetStoreCount() int { - return c.core.GetStoreCount() -} - -// GetStoreRegionCount returns the number of regions for a given store. -func (c *RaftCluster) GetStoreRegionCount(storeID uint64) int { - return c.core.GetStoreRegionCount(storeID) -} - -// GetAverageRegionSize returns the average region approximate size. -func (c *RaftCluster) GetAverageRegionSize() int64 { - return c.core.GetAverageRegionSize() -} - -// DropCacheRegion removes a region from the cache. -func (c *RaftCluster) DropCacheRegion(id uint64) { - c.core.RemoveRegionIfExist(id) -} - -// DropCacheAllRegion removes all regions from the cache. -func (c *RaftCluster) DropCacheAllRegion() { - c.core.ResetRegionCache() -} - -// GetMetaStores gets stores from cluster. -func (c *RaftCluster) GetMetaStores() []*metapb.Store { - return c.core.GetMetaStores() -} - -// GetStores returns all stores in the cluster. -func (c *RaftCluster) GetStores() []*core.StoreInfo { - return c.core.GetStores() -} - -// GetLeaderStoreByRegionID returns the leader store of the given region. -func (c *RaftCluster) GetLeaderStoreByRegionID(regionID uint64) *core.StoreInfo { - return c.core.GetLeaderStoreByRegionID(regionID) -} - -// GetStore gets store from cluster. -func (c *RaftCluster) GetStore(storeID uint64) *core.StoreInfo { - return c.core.GetStore(storeID) -} - -// GetAdjacentRegions returns regions' information that are adjacent with the specific region ID. -func (c *RaftCluster) GetAdjacentRegions(region *core.RegionInfo) (*core.RegionInfo, *core.RegionInfo) { - return c.core.GetAdjacentRegions(region) -} - -// GetRangeHoles returns all range holes, i.e the key ranges without any region info. -func (c *RaftCluster) GetRangeHoles() [][]string { - return c.core.GetRangeHoles() + return c.BasicCluster } // UpdateStoreLabels updates a store's location labels @@ -1271,8 +1221,8 @@ func (c *RaftCluster) DeleteStoreLabel(storeID uint64, labelKey string) error { return c.putStoreImpl(newStore, true) } -// PutStore puts a store. -func (c *RaftCluster) PutStore(store *metapb.Store) error { +// PutMetaStore puts a store. +func (c *RaftCluster) PutMetaStore(store *metapb.Store) error { if err := c.putStoreImpl(store, false); err != nil { return err } @@ -1285,9 +1235,6 @@ func (c *RaftCluster) PutStore(store *metapb.Store) error { // If 'force' is true, the store's labels will overwrite those labels which already existed in the store. // If 'force' is false, the store's labels will merge into those labels which already existed in the store. func (c *RaftCluster) putStoreImpl(store *metapb.Store, force bool) error { - c.Lock() - defer c.Unlock() - if store.GetId() == 0 { return errors.Errorf("invalid put store %v", store) } @@ -1329,7 +1276,7 @@ func (c *RaftCluster) putStoreImpl(store *metapb.Store, force bool) error { if err := c.checkStoreLabels(s); err != nil { return err } - return c.putStoreLocked(s) + return c.setStore(s) } func (c *RaftCluster) checkStoreVersion(store *metapb.Store) error { @@ -1374,9 +1321,6 @@ func (c *RaftCluster) checkStoreLabels(s *core.StoreInfo) error { // RemoveStore marks a store as offline in cluster. // State transition: Up -> Offline. func (c *RaftCluster) RemoveStore(storeID uint64, physicallyDestroyed bool) error { - c.Lock() - defer c.Unlock() - store := c.GetStore(storeID) if store == nil { return errs.ErrStoreNotFound.FastGenByArgs(storeID) @@ -1401,21 +1345,22 @@ func (c *RaftCluster) RemoveStore(storeID uint64, physicallyDestroyed bool) erro zap.Uint64("store-id", storeID), zap.String("store-address", newStore.GetAddress()), zap.Bool("physically-destroyed", newStore.IsPhysicallyDestroyed())) - err := c.putStoreLocked(newStore) - if err == nil { - regionSize := float64(c.core.GetStoreRegionSize(storeID)) - c.resetProgress(storeID, store.GetAddress()) - c.progressManager.AddProgress(encodeRemovingProgressKey(storeID), regionSize, regionSize, nodeStateCheckJobInterval) - // record the current store limit in memory - c.prevStoreLimit[storeID] = map[storelimit.Type]float64{ - storelimit.AddPeer: c.GetStoreLimitByType(storeID, storelimit.AddPeer), - storelimit.RemovePeer: c.GetStoreLimitByType(storeID, storelimit.RemovePeer), - } - // TODO: if the persist operation encounters error, the "Unlimited" will be rollback. - // And considering the store state has changed, RemoveStore is actually successful. - _ = c.SetStoreLimit(storeID, storelimit.RemovePeer, storelimit.Unlimited) + + if err := c.setStore(newStore); err != nil { + return err } - return err + regionSize := float64(c.GetStoreRegionSize(storeID)) + c.resetProgress(storeID, store.GetAddress()) + c.progressManager.AddProgress(encodeRemovingProgressKey(storeID), regionSize, regionSize, nodeStateCheckJobInterval, progress.WindowDurationOption(c.GetCoordinator().GetPatrolRegionsDuration())) + // record the current store limit in memory + c.prevStoreLimit[storeID] = map[storelimit.Type]float64{ + storelimit.AddPeer: c.GetStoreLimitByType(storeID, storelimit.AddPeer), + storelimit.RemovePeer: c.GetStoreLimitByType(storeID, storelimit.RemovePeer), + } + // TODO: if the persist operation encounters error, the "Unlimited" will be rollback. + // And considering the store state has changed, RemoveStore is actually successful. + _ = c.SetStoreLimit(storeID, storelimit.RemovePeer, storelimit.Unlimited) + return nil } func (c *RaftCluster) checkReplicaBeforeOfflineStore(storeID uint64) error { @@ -1466,9 +1411,6 @@ func (c *RaftCluster) getUpStores() []uint64 { // BuryStore marks a store as tombstone in cluster. // If forceBury is false, the store should be offlined and emptied before calling this func. func (c *RaftCluster) BuryStore(storeID uint64, forceBury bool) error { - c.Lock() - defer c.Unlock() - store := c.GetStore(storeID) if store == nil { return errs.ErrStoreNotFound.FastGenByArgs(storeID) @@ -1493,8 +1435,8 @@ func (c *RaftCluster) BuryStore(storeID uint64, forceBury bool) error { zap.String("store-address", newStore.GetAddress()), zap.String("state", store.GetState().String()), zap.Bool("physically-destroyed", store.IsPhysicallyDestroyed())) - err := c.putStoreLocked(newStore) - c.onStoreVersionChangeLocked() + err := c.setStore(newStore) + c.OnStoreVersionChange() if err == nil { // clean up the residual information. delete(c.prevStoreLimit, storeID) @@ -1510,40 +1452,6 @@ func (c *RaftCluster) BuryStore(storeID uint64, forceBury bool) error { return err } -// PauseLeaderTransfer prevents the store from been selected as source or -// target store of TransferLeader. -func (c *RaftCluster) PauseLeaderTransfer(storeID uint64) error { - return c.core.PauseLeaderTransfer(storeID) -} - -// ResumeLeaderTransfer cleans a store's pause state. The store can be selected -// as source or target of TransferLeader again. -func (c *RaftCluster) ResumeLeaderTransfer(storeID uint64) { - c.core.ResumeLeaderTransfer(storeID) -} - -// SlowStoreEvicted marks a store as a slow store and prevents transferring -// leader to the store -func (c *RaftCluster) SlowStoreEvicted(storeID uint64) error { - return c.core.SlowStoreEvicted(storeID) -} - -// SlowTrendEvicted marks a store as a slow store by trend and prevents transferring -// leader to the store -func (c *RaftCluster) SlowTrendEvicted(storeID uint64) error { - return c.core.SlowTrendEvicted(storeID) -} - -// SlowTrendRecovered cleans the evicted by slow trend state of a store. -func (c *RaftCluster) SlowTrendRecovered(storeID uint64) { - c.core.SlowTrendRecovered(storeID) -} - -// SlowStoreRecovered cleans the evicted state of a store. -func (c *RaftCluster) SlowStoreRecovered(storeID uint64) { - c.core.SlowStoreRecovered(storeID) -} - // NeedAwakenAllRegionsInStore checks whether we should do AwakenRegions operation. func (c *RaftCluster) NeedAwakenAllRegionsInStore(storeID uint64) (needAwaken bool, slowStoreIDs []uint64) { store := c.GetStore(storeID) @@ -1575,9 +1483,6 @@ func (c *RaftCluster) NeedAwakenAllRegionsInStore(storeID uint64) (needAwaken bo // UpStore up a store from offline func (c *RaftCluster) UpStore(storeID uint64) error { - c.Lock() - defer c.Unlock() - store := c.GetStore(storeID) if store == nil { return errs.ErrStoreNotFound.FastGenByArgs(storeID) @@ -1608,7 +1513,7 @@ func (c *RaftCluster) UpStore(storeID uint64) error { log.Warn("store has been up", zap.Uint64("store-id", storeID), zap.String("store-address", newStore.GetAddress())) - err := c.putStoreLocked(newStore) + err := c.setStore(newStore) if err == nil { if exist { // persist the store limit @@ -1622,9 +1527,6 @@ func (c *RaftCluster) UpStore(storeID uint64) error { // ReadyToServe change store's node state to Serving. func (c *RaftCluster) ReadyToServe(storeID uint64) error { - c.Lock() - defer c.Unlock() - store := c.GetStore(storeID) if store == nil { return errs.ErrStoreNotFound.FastGenByArgs(storeID) @@ -1646,7 +1548,7 @@ func (c *RaftCluster) ReadyToServe(storeID uint64) error { log.Info("store has changed to serving", zap.Uint64("store-id", storeID), zap.String("store-address", newStore.GetAddress())) - err := c.putStoreLocked(newStore) + err := c.setStore(newStore) if err == nil { c.resetProgress(storeID, store.GetAddress()) } @@ -1669,16 +1571,16 @@ func (c *RaftCluster) SetStoreWeight(storeID uint64, leaderWeight, regionWeight core.SetRegionWeight(regionWeight), ) - return c.putStoreLocked(newStore) + return c.setStore(newStore) } -func (c *RaftCluster) putStoreLocked(store *core.StoreInfo) error { +func (c *RaftCluster) setStore(store *core.StoreInfo) error { if c.storage != nil { if err := c.storage.SaveStoreMeta(store.GetMeta()); err != nil { return err } } - c.core.PutStore(store) + c.PutStore(store) if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { c.updateStoreStatistics(store.GetID(), store.IsSlow()) } @@ -1744,11 +1646,11 @@ func (c *RaftCluster) checkStores() { offlineStore := store.GetMeta() id := offlineStore.GetId() - regionSize := c.core.GetStoreRegionSize(id) + regionSize := c.GetStoreRegionSize(id) if c.IsPrepared() { c.updateProgress(id, store.GetAddress(), removingAction, float64(regionSize), float64(regionSize), false /* dec */) } - regionCount := c.core.GetStoreRegionCount(id) + regionCount := c.GetStoreRegionCount(id) // If the store is empty, it can be buried. if regionCount == 0 { if err := c.BuryStore(id, false); err != nil { @@ -1776,7 +1678,7 @@ func (c *RaftCluster) checkStores() { func (c *RaftCluster) getThreshold(stores []*core.StoreInfo, store *core.StoreInfo) float64 { start := time.Now() if !c.opt.IsPlacementRulesEnabled() { - regionSize := c.core.GetRegionSizeByRange([]byte(""), []byte("")) * int64(c.opt.GetMaxReplicas()) + regionSize := c.GetRegionSizeByRange([]byte(""), []byte("")) * int64(c.opt.GetMaxReplicas()) weight := getStoreTopoWeight(store, stores, c.opt.GetLocationLabels(), c.opt.GetMaxReplicas()) return float64(regionSize) * weight * 0.9 } @@ -1816,7 +1718,7 @@ func (c *RaftCluster) calculateRange(stores []*core.StoreInfo, store *core.Store matchStores = append(matchStores, s) } } - regionSize := c.core.GetRegionSizeByRange(startKey, endKey) * int64(rule.Count) + regionSize := c.GetRegionSizeByRange(startKey, endKey) * int64(rule.Count) weight := getStoreTopoWeight(store, matchStores, rule.LocationLabels, rule.Count) storeSize += float64(regionSize) * weight log.Debug("calculate range result", @@ -1846,7 +1748,7 @@ func getStoreTopoWeight(store *core.StoreInfo, stores []*core.StoreInfo, locatio if slice.Contains(validLabels, label.Key) { weight /= float64(len(topo)) } - topo = topo[label.Value].(map[string]interface{}) + topo = topo[label.Value].(map[string]any) } else { break } @@ -1855,8 +1757,8 @@ func getStoreTopoWeight(store *core.StoreInfo, stores []*core.StoreInfo, locatio return weight / sameLocationStoreNum } -func buildTopology(s *core.StoreInfo, stores []*core.StoreInfo, locationLabels []string, count int) (map[string]interface{}, []string, float64, bool) { - topology := make(map[string]interface{}) +func buildTopology(s *core.StoreInfo, stores []*core.StoreInfo, locationLabels []string, count int) (map[string]any, []string, float64, bool) { + topology := make(map[string]any) sameLocationStoreNum := 1.0 totalLabelCount := make([]int, len(locationLabels)) for _, store := range stores { @@ -1913,7 +1815,7 @@ func getSortedLabels(storeLabels []*metapb.StoreLabel, locationLabels []string) } // updateTopology records stores' topology in the `topology` variable. -func updateTopology(topology map[string]interface{}, sortedLabels []*metapb.StoreLabel) []int { +func updateTopology(topology map[string]any, sortedLabels []*metapb.StoreLabel) []int { labelCount := make([]int, len(sortedLabels)) if len(sortedLabels) == 0 { return labelCount @@ -1921,36 +1823,38 @@ func updateTopology(topology map[string]interface{}, sortedLabels []*metapb.Stor topo := topology for i, l := range sortedLabels { if _, exist := topo[l.Value]; !exist { - topo[l.Value] = make(map[string]interface{}) + topo[l.Value] = make(map[string]any) labelCount[i] += 1 } - topo = topo[l.Value].(map[string]interface{}) + topo = topo[l.Value].(map[string]any) } return labelCount } func (c *RaftCluster) updateProgress(storeID uint64, storeAddress, action string, current, remaining float64, isInc bool) { storeLabel := strconv.FormatUint(storeID, 10) - var progress string + var progressName string + var opts []progress.Option switch action { case removingAction: - progress = encodeRemovingProgressKey(storeID) + progressName = encodeRemovingProgressKey(storeID) + opts = []progress.Option{progress.WindowDurationOption(c.GetCoordinator().GetPatrolRegionsDuration())} case preparingAction: - progress = encodePreparingProgressKey(storeID) + progressName = encodePreparingProgressKey(storeID) } - if exist := c.progressManager.AddProgress(progress, current, remaining, nodeStateCheckJobInterval); !exist { + if exist := c.progressManager.AddProgress(progressName, current, remaining, nodeStateCheckJobInterval, opts...); !exist { return } - c.progressManager.UpdateProgress(progress, current, remaining, isInc) - process, ls, cs, err := c.progressManager.Status(progress) + c.progressManager.UpdateProgress(progressName, current, remaining, isInc, opts...) + progress, leftSeconds, currentSpeed, err := c.progressManager.Status(progressName) if err != nil { - log.Error("get progress status failed", zap.String("progress", progress), zap.Float64("remaining", remaining), errs.ZapError(err)) + log.Error("get progress status failed", zap.String("progress", progressName), zap.Float64("remaining", remaining), errs.ZapError(err)) return } - storesProgressGauge.WithLabelValues(storeAddress, storeLabel, action).Set(process) - storesSpeedGauge.WithLabelValues(storeAddress, storeLabel, action).Set(cs) - storesETAGauge.WithLabelValues(storeAddress, storeLabel, action).Set(ls) + storesProgressGauge.WithLabelValues(storeAddress, storeLabel, action).Set(progress) + storesSpeedGauge.WithLabelValues(storeAddress, storeLabel, action).Set(currentSpeed) + storesETAGauge.WithLabelValues(storeAddress, storeLabel, action).Set(leftSeconds) } func (c *RaftCluster) resetProgress(storeID uint64, storeAddress string) { @@ -1980,13 +1884,10 @@ func encodePreparingProgressKey(storeID uint64) string { // RemoveTombStoneRecords removes the tombStone Records. func (c *RaftCluster) RemoveTombStoneRecords() error { - c.Lock() - defer c.Unlock() - var failedStores []uint64 for _, store := range c.GetStores() { if store.IsRemoved() { - if c.core.GetStoreRegionCount(store.GetID()) > 0 { + if c.GetStoreRegionCount(store.GetID()) > 0 { log.Warn("skip removing tombstone", zap.Stringer("store", store.GetMeta())) failedStores = append(failedStores, store.GetID()) continue @@ -2024,7 +1925,7 @@ func (c *RaftCluster) deleteStore(store *core.StoreInfo) error { return err } } - c.core.DeleteStore(store) + c.DeleteStore(store) return nil } @@ -2052,7 +1953,7 @@ func (c *RaftCluster) collectHealthStatus() { } } -func (c *RaftCluster) resetHealthStatus() { +func (*RaftCluster) resetHealthStatus() { healthStatusGauge.Reset() } @@ -2063,24 +1964,8 @@ func (c *RaftCluster) resetProgressIndicator() { storesETAGauge.Reset() } -func (c *RaftCluster) getRegionStoresLocked(region *core.RegionInfo) []*core.StoreInfo { - stores := make([]*core.StoreInfo, 0, len(region.GetPeers())) - for _, p := range region.GetPeers() { - if store := c.core.GetStore(p.StoreId); store != nil { - stores = append(stores, store) - } - } - return stores -} - // OnStoreVersionChange changes the version of the cluster when needed. func (c *RaftCluster) OnStoreVersionChange() { - c.RLock() - defer c.RUnlock() - c.onStoreVersionChangeLocked() -} - -func (c *RaftCluster) onStoreVersionChangeLocked() { var minVersion *semver.Version stores := c.GetStores() for _, s := range stores { @@ -2138,13 +2023,13 @@ func (c *RaftCluster) PutMetaCluster(meta *metapb.Cluster) error { // GetRegionStatsByRange returns region statistics from cluster. func (c *RaftCluster) GetRegionStatsByRange(startKey, endKey []byte) *statistics.RegionStats { - return statistics.GetRegionStats(c.core.ScanRegions(startKey, endKey, -1)) + return statistics.GetRegionStats(c.ScanRegions(startKey, endKey, -1)) } -// GetRegionCount returns the number of regions in the range. -func (c *RaftCluster) GetRegionCount(startKey, endKey []byte) *statistics.RegionStats { +// GetRegionStatsCount returns the number of regions in the range. +func (c *RaftCluster) GetRegionStatsCount(startKey, endKey []byte) *statistics.RegionStats { stats := &statistics.RegionStats{} - stats.Count = c.core.GetRegionCount(startKey, endKey) + stats.Count = c.GetRegionCount(startKey, endKey) return stats } @@ -2156,7 +2041,7 @@ func (c *RaftCluster) putRegion(region *core.RegionInfo) error { return err } } - c.core.PutRegion(region) + c.PutRegion(region) return nil } @@ -2211,7 +2096,7 @@ func (c *RaftCluster) AddStoreLimit(store *metapb.Store) { func (c *RaftCluster) RemoveStoreLimit(storeID uint64) { cfg := c.opt.GetScheduleConfig().Clone() for _, limitType := range storelimit.TypeNameValue { - c.core.ResetStoreLimit(storeID, limitType) + c.ResetStoreLimit(storeID, limitType) } delete(cfg.StoreLimit, storeID) c.opt.SetScheduleConfig(cfg) @@ -2231,16 +2116,13 @@ func (c *RaftCluster) RemoveStoreLimit(storeID uint64) { // SetMinResolvedTS sets up a store with min resolved ts. func (c *RaftCluster) SetMinResolvedTS(storeID, minResolvedTS uint64) error { - c.Lock() - defer c.Unlock() - store := c.GetStore(storeID) if store == nil { return errs.ErrStoreNotFound.FastGenByArgs(storeID) } newStore := store.Clone(core.SetMinResolvedTS(minResolvedTS)) - c.core.PutStore(newStore) + c.PutStore(newStore) return nil } diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index d5931394c1b..baf862131a5 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -32,7 +32,9 @@ import ( "github.com/pingcap/kvproto/pkg/eraftpb" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" + "github.com/pingcap/log" "github.com/stretchr/testify/require" + "github.com/tikv/pd/pkg/cluster" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/core/constant" "github.com/tikv/pd/pkg/core/storelimit" @@ -50,9 +52,11 @@ import ( "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/schedule/schedulers" + "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/statistics/utils" "github.com/tikv/pd/pkg/storage" + "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/pkg/utils/operatorutil" "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/pkg/utils/typeutil" @@ -91,7 +95,7 @@ func TestStoreHeartbeat(t *testing.T) { } re.Error(cluster.HandleStoreHeartbeat(req, resp)) - re.NoError(cluster.putStoreLocked(store)) + re.NoError(cluster.setStore(store)) re.Equal(i+1, cluster.GetStoreCount()) re.Equal(int64(0), store.GetLastHeartbeatTS().UnixNano()) @@ -213,7 +217,7 @@ func TestFilterUnhealthyStore(t *testing.T) { Available: 50, RegionCount: 1, } - re.NoError(cluster.putStoreLocked(store)) + re.NoError(cluster.setStore(store)) re.NoError(cluster.HandleStoreHeartbeat(req, resp)) re.NotNil(cluster.hotStat.GetRollingStoreStats(store.GetID())) } @@ -226,7 +230,7 @@ func TestFilterUnhealthyStore(t *testing.T) { RegionCount: 1, } newStore := store.Clone(core.SetStoreState(metapb.StoreState_Tombstone)) - re.NoError(cluster.putStoreLocked(newStore)) + re.NoError(cluster.setStore(newStore)) re.NoError(cluster.HandleStoreHeartbeat(req, resp)) re.Nil(cluster.hotStat.GetRollingStoreStats(store.GetID())) } @@ -251,7 +255,7 @@ func TestSetOfflineStore(t *testing.T) { // Put 6 stores. for _, store := range newTestStores(6, "2.0.0") { - re.NoError(cluster.PutStore(store.GetMeta())) + re.NoError(cluster.PutMetaStore(store.GetMeta())) } // store 1: up -> offline @@ -293,7 +297,7 @@ func TestSetOfflineStore(t *testing.T) { // test clean up tombstone store toCleanStore := cluster.GetStore(1).Clone().GetMeta() toCleanStore.LastHeartbeat = time.Now().Add(-40 * 24 * time.Hour).UnixNano() - cluster.PutStore(toCleanStore) + cluster.PutMetaStore(toCleanStore) cluster.checkStores() re.Nil(cluster.GetStore(1)) } @@ -310,7 +314,7 @@ func TestSetOfflineWithReplica(t *testing.T) { // Put 4 stores. for _, store := range newTestStores(4, "2.0.0") { - re.NoError(cluster.PutStore(store.GetMeta())) + re.NoError(cluster.PutMetaStore(store.GetMeta())) } re.NoError(cluster.RemoveStore(2, false)) @@ -349,7 +353,7 @@ func TestSetOfflineStoreWithEvictLeader(t *testing.T) { // Put 3 stores. for _, store := range newTestStores(3, "2.0.0") { - re.NoError(cluster.PutStore(store.GetMeta())) + re.NoError(cluster.PutMetaStore(store.GetMeta())) } _, err = addEvictLeaderScheduler(cluster, 1) @@ -376,7 +380,7 @@ func TestForceBuryStore(t *testing.T) { stores := newTestStores(2, "5.3.0") stores[1] = stores[1].Clone(core.SetLastHeartbeatTS(time.Now())) for _, store := range stores { - re.NoError(cluster.PutStore(store.GetMeta())) + re.NoError(cluster.PutMetaStore(store.GetMeta())) } re.NoError(cluster.BuryStore(uint64(1), true)) re.Error(cluster.BuryStore(uint64(2), true)) @@ -394,7 +398,7 @@ func TestReuseAddress(t *testing.T) { cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) // Put 4 stores. for _, store := range newTestStores(4, "2.0.0") { - re.NoError(cluster.PutStore(store.GetMeta())) + re.NoError(cluster.PutMetaStore(store.GetMeta())) } // store 1: up // store 2: offline @@ -418,9 +422,9 @@ func TestReuseAddress(t *testing.T) { if storeInfo.IsPhysicallyDestroyed() || storeInfo.IsRemoved() { // try to start a new store with the same address with store which is physically destroyed or tombstone should be success - re.NoError(cluster.PutStore(newStore)) + re.NoError(cluster.PutMetaStore(newStore)) } else { - re.Error(cluster.PutStore(newStore)) + re.Error(cluster.PutMetaStore(newStore)) } } } @@ -448,7 +452,7 @@ func TestUpStore(t *testing.T) { // Put 5 stores. for _, store := range newTestStores(5, "5.0.0") { - re.NoError(cluster.PutStore(store.GetMeta())) + re.NoError(cluster.PutMetaStore(store.GetMeta())) } // set store 1 offline @@ -488,7 +492,7 @@ func TestRemovingProcess(t *testing.T) { // Put 5 stores. stores := newTestStores(5, "5.0.0") for _, store := range stores { - re.NoError(cluster.PutStore(store.GetMeta())) + re.NoError(cluster.PutMetaStore(store.GetMeta())) } regions := newTestRegions(100, 5, 1) var regionInStore1 []*core.RegionInfo @@ -516,7 +520,7 @@ func TestRemovingProcess(t *testing.T) { if i >= 5 { break } - cluster.DropCacheRegion(region.GetID()) + cluster.RemoveRegionIfExist(region.GetID()) i++ } cluster.checkStores() @@ -551,13 +555,13 @@ func TestDeleteStoreUpdatesClusterVersion(t *testing.T) { // Put 3 new 4.0.9 stores. for _, store := range newTestStores(3, "4.0.9") { - re.NoError(cluster.PutStore(store.GetMeta())) + re.NoError(cluster.PutMetaStore(store.GetMeta())) } re.Equal("4.0.9", cluster.GetClusterVersion()) // Upgrade 2 stores to 5.0.0. for _, store := range newTestStores(2, "5.0.0") { - re.NoError(cluster.PutStore(store.GetMeta())) + re.NoError(cluster.PutMetaStore(store.GetMeta())) } re.Equal("4.0.9", cluster.GetClusterVersion()) @@ -580,14 +584,14 @@ func TestStoreClusterVersion(t *testing.T) { s1.Version = "5.0.1" s2.Version = "5.0.3" s3.Version = "5.0.5" - re.NoError(cluster.PutStore(s2)) + re.NoError(cluster.PutMetaStore(s2)) re.Equal(s2.Version, cluster.GetClusterVersion()) - re.NoError(cluster.PutStore(s1)) + re.NoError(cluster.PutMetaStore(s1)) // the cluster version should be 5.0.1(the min one) re.Equal(s1.Version, cluster.GetClusterVersion()) - re.NoError(cluster.PutStore(s3)) + re.NoError(cluster.PutMetaStore(s3)) // the cluster version should be 5.0.1(the min one) re.Equal(s1.Version, cluster.GetClusterVersion()) } @@ -630,7 +634,7 @@ func TestRegionHeartbeatHotStat(t *testing.T) { region := core.NewRegionInfo(regionMeta, leader, core.WithInterval(&pdpb.TimeInterval{StartTimestamp: 0, EndTimestamp: utils.RegionHeartBeatReportInterval}), core.SetWrittenBytes(30000*10), core.SetWrittenKeys(300000*10)) - err = cluster.processRegionHeartbeat(region) + err = cluster.processRegionHeartbeat(core.ContextTODO(), region) re.NoError(err) // wait HotStat to update items time.Sleep(time.Second) @@ -643,7 +647,7 @@ func TestRegionHeartbeatHotStat(t *testing.T) { StoreId: 4, } region = region.Clone(core.WithRemoveStorePeer(2), core.WithAddPeer(newPeer)) - err = cluster.processRegionHeartbeat(region) + err = cluster.processRegionHeartbeat(core.ContextTODO(), region) re.NoError(err) // wait HotStat to update items time.Sleep(time.Second) @@ -677,11 +681,11 @@ func TestBucketHeartbeat(t *testing.T) { n, np := uint64(2), uint64(2) regions := newTestRegions(n, n, np) for _, store := range stores { - re.NoError(cluster.putStoreLocked(store)) + re.NoError(cluster.setStore(store)) } - re.NoError(cluster.processRegionHeartbeat(regions[0])) - re.NoError(cluster.processRegionHeartbeat(regions[1])) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), regions[0])) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), regions[1])) re.Nil(cluster.GetRegion(uint64(1)).GetBuckets()) re.NoError(cluster.processReportBuckets(buckets)) re.Equal(buckets, cluster.GetRegion(uint64(1)).GetBuckets()) @@ -700,13 +704,13 @@ func TestBucketHeartbeat(t *testing.T) { // case5: region update should inherit buckets. newRegion := regions[1].Clone(core.WithIncConfVer(), core.SetBuckets(nil)) opt.SetRegionBucketEnabled(true) - re.NoError(cluster.processRegionHeartbeat(newRegion)) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), newRegion)) re.Len(cluster.GetRegion(uint64(1)).GetBuckets().GetKeys(), 2) // case6: disable region bucket in opt.SetRegionBucketEnabled(false) newRegion2 := regions[1].Clone(core.WithIncConfVer(), core.SetBuckets(nil)) - re.NoError(cluster.processRegionHeartbeat(newRegion2)) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), newRegion2)) re.Nil(cluster.GetRegion(uint64(1)).GetBuckets()) re.Empty(cluster.GetRegion(uint64(1)).GetBuckets().GetKeys()) } @@ -727,31 +731,31 @@ func TestRegionHeartbeat(t *testing.T) { regions := newTestRegions(n, n, np) for _, store := range stores { - re.NoError(cluster.putStoreLocked(store)) + re.NoError(cluster.setStore(store)) } for i, region := range regions { // region does not exist. - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) checkRegionsKV(re, cluster.storage, regions[:i+1]) // region is the same, not updated. - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) checkRegionsKV(re, cluster.storage, regions[:i+1]) origin := region // region is updated. region = origin.Clone(core.WithIncVersion()) regions[i] = region - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) checkRegionsKV(re, cluster.storage, regions[:i+1]) // region is stale (Version). stale := origin.Clone(core.WithIncConfVer()) - re.Error(cluster.processRegionHeartbeat(stale)) - checkRegions(re, cluster.core, regions[:i+1]) + re.Error(cluster.processRegionHeartbeat(core.ContextTODO(), stale)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) checkRegionsKV(re, cluster.storage, regions[:i+1]) // region is updated @@ -760,14 +764,14 @@ func TestRegionHeartbeat(t *testing.T) { core.WithIncConfVer(), ) regions[i] = region - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) checkRegionsKV(re, cluster.storage, regions[:i+1]) // region is stale (ConfVer). stale = origin.Clone(core.WithIncConfVer()) - re.Error(cluster.processRegionHeartbeat(stale)) - checkRegions(re, cluster.core, regions[:i+1]) + re.Error(cluster.processRegionHeartbeat(core.ContextTODO(), stale)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) checkRegionsKV(re, cluster.storage, regions[:i+1]) // Add a down peer. @@ -778,39 +782,39 @@ func TestRegionHeartbeat(t *testing.T) { }, })) regions[i] = region - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) // Add a pending peer. region = region.Clone(core.WithPendingPeers([]*metapb.Peer{region.GetPeers()[rand.Intn(len(region.GetPeers()))]})) regions[i] = region - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) // Clear down peers. region = region.Clone(core.WithDownPeers(nil)) regions[i] = region - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) // Clear pending peers. region = region.Clone(core.WithPendingPeers(nil)) regions[i] = region - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) // Remove peers. origin = region region = origin.Clone(core.SetPeers(region.GetPeers()[:1])) regions[i] = region - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) checkRegionsKV(re, cluster.storage, regions[:i+1]) // Add peers. region = origin regions[i] = region - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) checkRegionsKV(re, cluster.storage, regions[:i+1]) // Change one peer to witness @@ -819,48 +823,48 @@ func TestRegionHeartbeat(t *testing.T) { core.WithIncConfVer(), ) regions[i] = region - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) // Change leader. region = region.Clone(core.WithLeader(region.GetPeers()[1])) regions[i] = region - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) // Change ApproximateSize. region = region.Clone(core.SetApproximateSize(144)) regions[i] = region - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) // Change ApproximateKeys. region = region.Clone(core.SetApproximateKeys(144000)) regions[i] = region - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) // Change bytes written. region = region.Clone(core.SetWrittenBytes(24000)) regions[i] = region - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) // Change bytes read. region = region.Clone(core.SetReadBytes(1080000)) regions[i] = region - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) // Flashback region = region.Clone(core.WithFlashback(true, 1)) regions[i] = region - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) region = region.Clone(core.WithFlashback(false, 0)) regions[i] = region - re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core, regions[:i+1]) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region)) + checkRegions(re, cluster.BasicCluster, regions[:i+1]) } regionCounts := make(map[uint64]int) @@ -892,10 +896,10 @@ func TestRegionHeartbeat(t *testing.T) { time.Sleep(50 * time.Millisecond) for _, store := range cluster.GetStores() { - re.Equal(cluster.core.GetStoreLeaderCount(store.GetID()), store.GetLeaderCount()) - re.Equal(cluster.core.GetStoreRegionCount(store.GetID()), store.GetRegionCount()) - re.Equal(cluster.core.GetStoreLeaderRegionSize(store.GetID()), store.GetLeaderSize()) - re.Equal(cluster.core.GetStoreRegionSize(store.GetID()), store.GetRegionSize()) + re.Equal(cluster.GetStoreLeaderCount(store.GetID()), store.GetLeaderCount()) + re.Equal(cluster.GetStoreRegionCount(store.GetID()), store.GetRegionCount()) + re.Equal(cluster.GetStoreLeaderRegionSize(store.GetID()), store.GetLeaderSize()) + re.Equal(cluster.GetStoreRegionSize(store.GetID()), store.GetRegionSize()) } // Test with storage. @@ -915,7 +919,7 @@ func TestRegionHeartbeat(t *testing.T) { core.WithNewRegionID(10000), core.WithDecVersion(), ) - re.Error(cluster.processRegionHeartbeat(overlapRegion)) + re.Error(cluster.processRegionHeartbeat(core.ContextTODO(), overlapRegion)) region := &metapb.Region{} ok, err := storage.LoadRegion(regions[n-1].GetID(), region) re.True(ok) @@ -939,7 +943,16 @@ func TestRegionHeartbeat(t *testing.T) { core.WithStartKey(regions[n-2].GetStartKey()), core.WithNewRegionID(regions[n-1].GetID()+1), ) - re.NoError(cluster.processRegionHeartbeat(overlapRegion)) + tracer := core.NewHeartbeatProcessTracer() + tracer.Begin() + ctx := core.ContextTODO() + ctx.Tracer = tracer + re.NoError(cluster.processRegionHeartbeat(ctx, overlapRegion)) + tracer.OnAllStageFinished() + re.Condition(func() bool { + fileds := tracer.LogFields() + return slice.AllOf(fileds, func(i int) bool { return fileds[i].Integer > 0 }) + }, "should have stats") region = &metapb.Region{} ok, err = storage.LoadRegion(regions[n-1].GetID(), region) re.False(ok) @@ -968,7 +981,9 @@ func TestRegionFlowChanged(t *testing.T) { regions := []*core.RegionInfo{core.NewTestRegionInfo(1, 1, []byte{}, []byte{})} processRegions := func(regions []*core.RegionInfo) { for _, r := range regions { - cluster.processRegionHeartbeat(r) + mctx := core.ContextTODO() + mctx.Context = ctx + cluster.processRegionHeartbeat(mctx, r) } } regions = core.SplitRegions(regions) @@ -1004,7 +1019,7 @@ func TestRegionSizeChanged(t *testing.T) { core.SetApproximateKeys(curMaxMergeKeys-1), core.SetSource(core.Heartbeat), ) - cluster.processRegionHeartbeat(region) + cluster.processRegionHeartbeat(core.ContextTODO(), region) regionID := region.GetID() re.True(cluster.regionStats.IsRegionStatsType(regionID, statistics.UndersizedRegion)) // Test ApproximateSize and ApproximateKeys change. @@ -1014,16 +1029,16 @@ func TestRegionSizeChanged(t *testing.T) { core.SetApproximateKeys(curMaxMergeKeys+1), core.SetSource(core.Heartbeat), ) - cluster.processRegionHeartbeat(region) + cluster.processRegionHeartbeat(core.ContextTODO(), region) re.False(cluster.regionStats.IsRegionStatsType(regionID, statistics.UndersizedRegion)) // Test MaxMergeRegionSize and MaxMergeRegionKeys change. cluster.opt.SetMaxMergeRegionSize(uint64(curMaxMergeSize + 2)) cluster.opt.SetMaxMergeRegionKeys(uint64(curMaxMergeKeys + 2)) - cluster.processRegionHeartbeat(region) + cluster.processRegionHeartbeat(core.ContextTODO(), region) re.True(cluster.regionStats.IsRegionStatsType(regionID, statistics.UndersizedRegion)) cluster.opt.SetMaxMergeRegionSize(uint64(curMaxMergeSize)) cluster.opt.SetMaxMergeRegionKeys(uint64(curMaxMergeKeys)) - cluster.processRegionHeartbeat(region) + cluster.processRegionHeartbeat(core.ContextTODO(), region) re.False(cluster.regionStats.IsRegionStatsType(regionID, statistics.UndersizedRegion)) } @@ -1086,11 +1101,11 @@ func TestConcurrentRegionHeartbeat(t *testing.T) { re.NoError(failpoint.Enable("github.com/tikv/pd/server/cluster/concurrentRegionHeartbeat", "return(true)")) go func() { defer wg.Done() - cluster.processRegionHeartbeat(source) + cluster.processRegionHeartbeat(core.ContextTODO(), source) }() time.Sleep(100 * time.Millisecond) re.NoError(failpoint.Disable("github.com/tikv/pd/server/cluster/concurrentRegionHeartbeat")) - re.NoError(cluster.processRegionHeartbeat(target)) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), target)) wg.Wait() checkRegion(re, cluster.GetRegionByKey([]byte{}), target) } @@ -1120,7 +1135,7 @@ func TestRegionLabelIsolationLevel(t *testing.T) { State: metapb.StoreState_Up, Labels: labels, } - re.NoError(cluster.putStoreLocked(core.NewStoreInfo(store))) + re.NoError(cluster.setStore(core.NewStoreInfo(store))) } peers := make([]*metapb.Peer, 0, 4) @@ -1152,7 +1167,7 @@ func TestRegionLabelIsolationLevel(t *testing.T) { func heartbeatRegions(re *require.Assertions, cluster *RaftCluster, regions []*core.RegionInfo) { // Heartbeat and check region one by one. for _, r := range regions { - re.NoError(cluster.processRegionHeartbeat(r)) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), r)) checkRegion(re, cluster.GetRegion(r.GetID()), r) checkRegion(re, cluster.GetRegionByKey(r.GetStartKey()), r) @@ -1189,7 +1204,7 @@ func TestHeartbeatSplit(t *testing.T) { // 1: [nil, nil) region1 := core.NewRegionInfo(&metapb.Region{Id: 1, RegionEpoch: &metapb.RegionEpoch{Version: 1, ConfVer: 1}}, nil) - re.NoError(cluster.processRegionHeartbeat(region1)) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region1)) checkRegion(re, cluster.GetRegionByKey([]byte("foo")), region1) // split 1 to 2: [nil, m) 1: [m, nil), sync 2 first. @@ -1198,12 +1213,12 @@ func TestHeartbeatSplit(t *testing.T) { core.WithIncVersion(), ) region2 := core.NewRegionInfo(&metapb.Region{Id: 2, EndKey: []byte("m"), RegionEpoch: &metapb.RegionEpoch{Version: 1, ConfVer: 1}}, nil) - re.NoError(cluster.processRegionHeartbeat(region2)) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region2)) checkRegion(re, cluster.GetRegionByKey([]byte("a")), region2) // [m, nil) is missing before r1's heartbeat. re.Nil(cluster.GetRegionByKey([]byte("z"))) - re.NoError(cluster.processRegionHeartbeat(region1)) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region1)) checkRegion(re, cluster.GetRegionByKey([]byte("z")), region1) // split 1 to 3: [m, q) 1: [q, nil), sync 1 first. @@ -1212,12 +1227,12 @@ func TestHeartbeatSplit(t *testing.T) { core.WithIncVersion(), ) region3 := core.NewRegionInfo(&metapb.Region{Id: 3, StartKey: []byte("m"), EndKey: []byte("q"), RegionEpoch: &metapb.RegionEpoch{Version: 1, ConfVer: 1}}, nil) - re.NoError(cluster.processRegionHeartbeat(region1)) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region1)) checkRegion(re, cluster.GetRegionByKey([]byte("z")), region1) checkRegion(re, cluster.GetRegionByKey([]byte("a")), region2) // [m, q) is missing before r3's heartbeat. re.Nil(cluster.GetRegionByKey([]byte("n"))) - re.NoError(cluster.processRegionHeartbeat(region3)) + re.NoError(cluster.processRegionHeartbeat(core.ContextTODO(), region3)) checkRegion(re, cluster.GetRegionByKey([]byte("n")), region3) } @@ -1283,7 +1298,7 @@ func TestOfflineAndMerge(t *testing.T) { // Put 4 stores. for _, store := range newTestStores(4, "5.0.0") { - re.NoError(cluster.PutStore(store.GetMeta())) + re.NoError(cluster.PutMetaStore(store.GetMeta())) } peers := []*metapb.Peer{ @@ -1338,7 +1353,7 @@ func TestStoreConfigUpdate(t *testing.T) { tc := newTestCluster(ctx, opt) stores := newTestStores(5, "2.0.0") for _, s := range stores { - re.NoError(tc.putStoreLocked(s)) + re.NoError(tc.setStore(s)) } re.Len(tc.getUpStores(), 5) // Case1: big region. @@ -1408,7 +1423,7 @@ func TestSyncConfigContext(t *testing.T) { tc := newTestCluster(ctx, opt) tc.httpClient = &http.Client{} - server := httptest.NewServer(http.HandlerFunc(func(res http.ResponseWriter, req *http.Request) { + server := httptest.NewServer(http.HandlerFunc(func(res http.ResponseWriter, _ *http.Request) { time.Sleep(time.Second * 100) cfg := &sc.StoreConfig{} b, err := json.Marshal(cfg) @@ -1423,7 +1438,7 @@ func TestSyncConfigContext(t *testing.T) { })) stores := newTestStores(1, "2.0.0") for _, s := range stores { - re.NoError(tc.putStoreLocked(s)) + re.NoError(tc.setStore(s)) } // trip schema header now := time.Now() @@ -1445,7 +1460,7 @@ func TestStoreConfigSync(t *testing.T) { tc := newTestCluster(ctx, opt) stores := newTestStores(5, "2.0.0") for _, s := range stores { - re.NoError(tc.putStoreLocked(s)) + re.NoError(tc.setStore(s)) } re.Len(tc.getUpStores(), 5) @@ -1490,7 +1505,7 @@ func TestUpdateStorePendingPeerCount(t *testing.T) { tc.RaftCluster.coordinator = schedule.NewCoordinator(ctx, tc.RaftCluster, nil) stores := newTestStores(5, "2.0.0") for _, s := range stores { - re.NoError(tc.putStoreLocked(s)) + re.NoError(tc.setStore(s)) } tc.RaftCluster.wg.Add(1) go tc.RaftCluster.runUpdateStoreStats() @@ -1513,11 +1528,11 @@ func TestUpdateStorePendingPeerCount(t *testing.T) { }, } origin := core.NewRegionInfo(&metapb.Region{Id: 1, Peers: peers[:3]}, peers[0], core.WithPendingPeers(peers[1:3])) - re.NoError(tc.processRegionHeartbeat(origin)) + re.NoError(tc.processRegionHeartbeat(core.ContextTODO(), origin)) time.Sleep(50 * time.Millisecond) checkPendingPeerCount([]int{0, 1, 1, 0}, tc.RaftCluster, re) newRegion := core.NewRegionInfo(&metapb.Region{Id: 1, Peers: peers[1:]}, peers[1], core.WithPendingPeers(peers[3:4])) - re.NoError(tc.processRegionHeartbeat(newRegion)) + re.NoError(tc.processRegionHeartbeat(core.ContextTODO(), newRegion)) time.Sleep(50 * time.Millisecond) checkPendingPeerCount([]int{0, 0, 0, 1}, tc.RaftCluster, re) } @@ -1665,7 +1680,7 @@ func TestCalculateStoreSize1(t *testing.T) { }, }...) s := store.Clone(core.SetStoreLabels(labels)) - re.NoError(cluster.PutStore(s.GetMeta())) + re.NoError(cluster.PutMetaStore(s.GetMeta())) } cluster.ruleManager.SetRule( @@ -1749,7 +1764,7 @@ func TestCalculateStoreSize2(t *testing.T) { } labels = append(labels, []*metapb.StoreLabel{{Key: "rack", Value: "r1"}, {Key: "host", Value: "h1"}}...) s := store.Clone(core.SetStoreLabels(labels)) - re.NoError(cluster.PutStore(s.GetMeta())) + re.NoError(cluster.PutMetaStore(s.GetMeta())) } cluster.ruleManager.SetRule( @@ -1799,7 +1814,7 @@ func TestStores(t *testing.T) { id := store.GetID() re.Nil(cache.GetStore(id)) re.Error(cache.PauseLeaderTransfer(id)) - cache.SetStore(store) + cache.PutStore(store) re.Equal(store, cache.GetStore(id)) re.Equal(i+1, cache.GetStoreCount()) re.NoError(cache.PauseLeaderTransfer(id)) @@ -1830,7 +1845,7 @@ func Test(t *testing.T) { _, opts, err := newTestScheduleConfig() re.NoError(err) tc := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opts, storage.NewStorageWithMemoryBackend()) - cache := tc.core + cache := tc.BasicCluster for i := uint64(0); i < n; i++ { region := regions[i] @@ -1948,7 +1963,7 @@ func TestAwakenStore(t *testing.T) { stores := newTestStores(n, "6.5.0") re.True(stores[0].NeedAwakenStore()) for _, store := range stores { - re.NoError(cluster.PutStore(store.GetMeta())) + re.NoError(cluster.PutMetaStore(store.GetMeta())) } for i := uint64(1); i <= n; i++ { re.False(cluster.slowStat.ExistsSlowStores()) @@ -1958,7 +1973,7 @@ func TestAwakenStore(t *testing.T) { now := time.Now() store4 := stores[0].Clone(core.SetLastHeartbeatTS(now), core.SetLastAwakenTime(now.Add(-11*time.Minute))) - re.NoError(cluster.putStoreLocked(store4)) + re.NoError(cluster.setStore(store4)) store1 := cluster.GetStore(1) re.True(store1.NeedAwakenStore()) @@ -2000,7 +2015,7 @@ func TestUpdateAndDeleteLabel(t *testing.T) { cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) stores := newTestStores(1, "6.5.1") for _, store := range stores { - re.NoError(cluster.PutStore(store.GetMeta())) + re.NoError(cluster.PutMetaStore(store.GetMeta())) } re.Empty(cluster.GetStore(1).GetLabels()) // Update label. @@ -2092,7 +2107,7 @@ func TestUpdateAndDeleteLabel(t *testing.T) { newStore := typeutil.DeepClone(cluster.GetStore(1).GetMeta(), core.StoreFactory) newStore.Labels = nil // Store rebooting will call PutStore. - err = cluster.PutStore(newStore) + err = cluster.PutMetaStore(newStore) re.NoError(err) // Check the label after rebooting. re.Equal([]*metapb.StoreLabel{{Key: "mode", Value: "readonly"}}, cluster.GetStore(1).GetLabels()) @@ -2128,7 +2143,8 @@ func newTestRaftCluster( opt *config.PersistOptions, s storage.Storage, ) *RaftCluster { - rc := &RaftCluster{serverCtx: ctx, core: core.NewBasicCluster(), storage: s} + opt.GetScheduleConfig().EnableHeartbeatConcurrentRunner = false + rc := &RaftCluster{serverCtx: ctx, BasicCluster: core.NewBasicCluster(), storage: s} rc.InitCluster(id, opt, nil, nil) rc.ruleManager = placement.NewRuleManager(ctx, storage.NewStorageWithMemoryBackend(), rc, opt) if opt.IsPlacementRulesEnabled() { @@ -2137,7 +2153,7 @@ func newTestRaftCluster( panic(err) } } - rc.schedulingController = newSchedulingController(rc.ctx, rc.core, rc.opt, rc.ruleManager) + rc.schedulingController = newSchedulingController(rc.ctx, rc.BasicCluster, rc.opt, rc.ruleManager) return rc } @@ -2279,10 +2295,6 @@ func checkStaleRegion(origin *metapb.Region, region *metapb.Region) error { return nil } -func newTestOperator(regionID uint64, regionEpoch *metapb.RegionEpoch, kind operator.OpKind, steps ...operator.OpStep) *operator.Operator { - return operator.NewTestOperator(regionID, regionEpoch, kind, steps...) -} - func (c *testCluster) AllocPeer(storeID uint64) (*metapb.Peer, error) { id, err := c.AllocID() if err != nil { @@ -2314,7 +2326,7 @@ func (c *testCluster) addRegionStore(storeID uint64, regionCount int, regionSize c.SetStoreLimit(storeID, storelimit.RemovePeer, 60) c.Lock() defer c.Unlock() - return c.putStoreLocked(newStore) + return c.setStore(newStore) } func (c *testCluster) addLeaderRegion(regionID uint64, leaderStoreID uint64, followerStoreIDs ...uint64) error { @@ -2337,7 +2349,7 @@ func (c *testCluster) updateLeaderCount(storeID uint64, leaderCount int) error { ) c.Lock() defer c.Unlock() - return c.putStoreLocked(newStore) + return c.setStore(newStore) } func (c *testCluster) addLeaderStore(storeID uint64, leaderCount int) error { @@ -2353,7 +2365,7 @@ func (c *testCluster) addLeaderStore(storeID uint64, leaderCount int) error { c.SetStoreLimit(storeID, storelimit.RemovePeer, 60) c.Lock() defer c.Unlock() - return c.putStoreLocked(newStore) + return c.setStore(newStore) } func (c *testCluster) setStoreDown(storeID uint64) error { @@ -2364,7 +2376,7 @@ func (c *testCluster) setStoreDown(storeID uint64) error { ) c.Lock() defer c.Unlock() - return c.putStoreLocked(newStore) + return c.setStore(newStore) } func (c *testCluster) setStoreOffline(storeID uint64) error { @@ -2372,7 +2384,7 @@ func (c *testCluster) setStoreOffline(storeID uint64) error { newStore := store.Clone(core.SetStoreState(metapb.StoreState_Offline, false)) c.Lock() defer c.Unlock() - return c.putStoreLocked(newStore) + return c.setStore(newStore) } func (c *testCluster) LoadRegion(regionID uint64, followerStoreIDs ...uint64) error { @@ -2395,19 +2407,19 @@ func TestBasic(t *testing.T) { re.NoError(tc.addLeaderRegion(1, 1)) - op1 := newTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpLeader) + op1 := operator.NewTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpLeader) oc.AddWaitingOperator(op1) re.Equal(uint64(1), oc.OperatorCount(operator.OpLeader)) re.Equal(op1.RegionID(), oc.GetOperator(1).RegionID()) // Region 1 already has an operator, cannot add another one. - op2 := newTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpRegion) + op2 := operator.NewTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpRegion) oc.AddWaitingOperator(op2) re.Equal(uint64(0), oc.OperatorCount(operator.OpRegion)) // Remove the operator manually, then we can add a new operator. re.True(oc.RemoveOperator(op1)) - op3 := newTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpRegion) + op3 := operator.NewTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpRegion) oc.AddWaitingOperator(op3) re.Equal(uint64(1), oc.OperatorCount(operator.OpRegion)) re.Equal(op3.RegionID(), oc.GetOperator(1).RegionID()) @@ -2508,7 +2520,7 @@ func TestCollectMetricsConcurrent(t *testing.T) { } schedule.ResetHotSpotMetrics() schedulers.ResetSchedulerMetrics() - rc.resetSchedulingMetrics() + resetSchedulingMetrics() wg.Wait() } @@ -2559,7 +2571,7 @@ func TestCollectMetrics(t *testing.T) { re.Equal(status1, status2) schedule.ResetHotSpotMetrics() schedulers.ResetSchedulerMetrics() - rc.resetSchedulingMetrics() + resetSchedulingMetrics() } func prepare(setCfg func(*sc.ScheduleConfig), setTc func(*testCluster), run func(*schedule.Coordinator), re *require.Assertions) (*testCluster, *schedule.Coordinator, func()) { @@ -2660,7 +2672,7 @@ func TestCheckRegionWithScheduleDeny(t *testing.T) { ID: "schedulelabel", Labels: []labeler.RegionLabel{{Key: "schedule", Value: "deny"}}, RuleType: labeler.KeyRange, - Data: []interface{}{map[string]interface{}{"start_key": "", "end_key": ""}}, + Data: []any{map[string]any{"start_key": "", "end_key": ""}}, }) // should allow to do rule checker @@ -2700,7 +2712,7 @@ func TestCheckerIsBusy(t *testing.T) { re.NoError(tc.addLeaderRegion(regionID, 1)) switch operatorKind { case operator.OpReplica: - op := newTestOperator(regionID, tc.GetRegion(regionID).GetRegionEpoch(), operatorKind) + op := operator.NewTestOperator(regionID, tc.GetRegion(regionID).GetRegionEpoch(), operatorKind) re.Equal(1, co.GetOperatorController().AddWaitingOperator(op)) case operator.OpRegion | operator.OpMerge: if regionID%2 == 1 { @@ -2950,13 +2962,13 @@ func TestShouldRun(t *testing.T) { for _, testCase := range testCases { r := tc.GetRegion(testCase.regionID) nr := r.Clone(core.WithLeader(r.GetPeers()[0]), core.SetSource(core.Heartbeat)) - re.NoError(tc.processRegionHeartbeat(nr)) + re.NoError(tc.processRegionHeartbeat(core.ContextTODO(), nr)) re.Equal(testCase.ShouldRun, co.ShouldRun()) } nr := &metapb.Region{Id: 6, Peers: []*metapb.Peer{}} newRegion := core.NewRegionInfo(nr, nil, core.SetSource(core.Heartbeat)) - re.Error(tc.processRegionHeartbeat(newRegion)) - re.Equal(7, tc.core.GetClusterNotFromStorageRegionsCnt()) + re.Error(tc.processRegionHeartbeat(core.ContextTODO(), newRegion)) + re.Equal(7, tc.GetClusterNotFromStorageRegionsCnt()) } func TestShouldRunWithNonLeaderRegions(t *testing.T) { @@ -2993,13 +3005,13 @@ func TestShouldRunWithNonLeaderRegions(t *testing.T) { for _, testCase := range testCases { r := tc.GetRegion(testCase.regionID) nr := r.Clone(core.WithLeader(r.GetPeers()[0]), core.SetSource(core.Heartbeat)) - re.NoError(tc.processRegionHeartbeat(nr)) + re.NoError(tc.processRegionHeartbeat(core.ContextTODO(), nr)) re.Equal(testCase.ShouldRun, co.ShouldRun()) } nr := &metapb.Region{Id: 9, Peers: []*metapb.Peer{}} newRegion := core.NewRegionInfo(nr, nil, core.SetSource(core.Heartbeat)) - re.Error(tc.processRegionHeartbeat(newRegion)) - re.Equal(9, tc.core.GetClusterNotFromStorageRegionsCnt()) + re.Error(tc.processRegionHeartbeat(core.ContextTODO(), newRegion)) + re.Equal(9, tc.GetClusterNotFromStorageRegionsCnt()) // Now, after server is prepared, there exist some regions with no leader. re.Equal(uint64(0), tc.GetRegion(10).GetLeader().GetStoreId()) @@ -3015,8 +3027,6 @@ func TestAddScheduler(t *testing.T) { re.NoError(controller.RemoveScheduler(schedulers.BalanceLeaderName)) re.NoError(controller.RemoveScheduler(schedulers.BalanceRegionName)) re.NoError(controller.RemoveScheduler(schedulers.HotRegionName)) - re.NoError(controller.RemoveScheduler(schedulers.BalanceWitnessName)) - re.NoError(controller.RemoveScheduler(schedulers.TransferWitnessLeaderName)) re.NoError(controller.RemoveScheduler(schedulers.EvictSlowStoreName)) re.Empty(controller.GetSchedulerNames()) @@ -3040,7 +3050,7 @@ func TestAddScheduler(t *testing.T) { re.NoError(err) conf, err := bl.EncodeConfig() re.NoError(err) - data := make(map[string]interface{}) + data := make(map[string]any) err = json.Unmarshal(conf, &data) re.NoError(err) batch := data["batch"].(float64) @@ -3058,7 +3068,7 @@ func TestAddScheduler(t *testing.T) { re.NoError(err) conf, err = hb.EncodeConfig() re.NoError(err) - data = make(map[string]interface{}) + data = make(map[string]any) re.NoError(json.Unmarshal(conf, &data)) re.Contains(data, "enable-for-tiflash") re.Equal("true", data["enable-for-tiflash"].(string)) @@ -3112,8 +3122,6 @@ func TestPersistScheduler(t *testing.T) { re.NoError(controller.RemoveScheduler(schedulers.BalanceLeaderName)) re.NoError(controller.RemoveScheduler(schedulers.BalanceRegionName)) re.NoError(controller.RemoveScheduler(schedulers.HotRegionName)) - re.NoError(controller.RemoveScheduler(schedulers.BalanceWitnessName)) - re.NoError(controller.RemoveScheduler(schedulers.TransferWitnessLeaderName)) re.NoError(controller.RemoveScheduler(schedulers.EvictSlowStoreName)) // only remains 2 items with independent config. re.Len(controller.GetSchedulerNames(), 2) @@ -3226,8 +3234,6 @@ func TestRemoveScheduler(t *testing.T) { re.NoError(controller.RemoveScheduler(schedulers.BalanceRegionName)) re.NoError(controller.RemoveScheduler(schedulers.HotRegionName)) re.NoError(controller.RemoveScheduler(schedulers.GrantLeaderName)) - re.NoError(controller.RemoveScheduler(schedulers.BalanceWitnessName)) - re.NoError(controller.RemoveScheduler(schedulers.TransferWitnessLeaderName)) re.NoError(controller.RemoveScheduler(schedulers.EvictSlowStoreName)) // all removed sches, _, err = storage.LoadAllSchedulerConfigs() @@ -3372,10 +3378,10 @@ func TestOperatorCount(t *testing.T) { re.NoError(tc.addLeaderRegion(1, 1)) re.NoError(tc.addLeaderRegion(2, 2)) { - op1 := newTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpLeader) + op1 := operator.NewTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpLeader) oc.AddWaitingOperator(op1) re.Equal(uint64(1), oc.OperatorCount(operator.OpLeader)) // 1:leader - op2 := newTestOperator(2, tc.GetRegion(2).GetRegionEpoch(), operator.OpLeader) + op2 := operator.NewTestOperator(2, tc.GetRegion(2).GetRegionEpoch(), operator.OpLeader) oc.AddWaitingOperator(op2) re.Equal(uint64(2), oc.OperatorCount(operator.OpLeader)) // 1:leader, 2:leader re.True(oc.RemoveOperator(op1)) @@ -3383,11 +3389,11 @@ func TestOperatorCount(t *testing.T) { } { - op1 := newTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpRegion) + op1 := operator.NewTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpRegion) oc.AddWaitingOperator(op1) re.Equal(uint64(1), oc.OperatorCount(operator.OpRegion)) // 1:region 2:leader re.Equal(uint64(1), oc.OperatorCount(operator.OpLeader)) - op2 := newTestOperator(2, tc.GetRegion(2).GetRegionEpoch(), operator.OpRegion) + op2 := operator.NewTestOperator(2, tc.GetRegion(2).GetRegionEpoch(), operator.OpRegion) op2.SetPriorityLevel(constant.High) oc.AddWaitingOperator(op2) re.Equal(uint64(2), oc.OperatorCount(operator.OpRegion)) // 1:region 2:region @@ -3468,19 +3474,20 @@ func TestStoreOverloadedWithReplace(t *testing.T) { tc.putRegion(region) region = tc.GetRegion(2).Clone(core.SetApproximateSize(60)) tc.putRegion(region) - op1 := newTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpRegion, operator.AddPeer{ToStore: 1, PeerID: 1}) + op1 := operator.NewTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpRegion, operator.AddPeer{ToStore: 1, PeerID: 1}) re.True(oc.AddOperator(op1)) - op2 := newTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpRegion, operator.AddPeer{ToStore: 2, PeerID: 2}) + op2 := operator.NewTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpRegion, operator.AddPeer{ToStore: 2, PeerID: 2}) op2.SetPriorityLevel(constant.High) re.True(oc.AddOperator(op2)) - op3 := newTestOperator(1, tc.GetRegion(2).GetRegionEpoch(), operator.OpRegion, operator.AddPeer{ToStore: 1, PeerID: 3}) + op3 := operator.NewTestOperator(1, tc.GetRegion(2).GetRegionEpoch(), operator.OpRegion, operator.AddPeer{ToStore: 1, PeerID: 3}) re.False(oc.AddOperator(op3)) ops, _ := lb.Schedule(tc, false /* dryRun */) re.Empty(ops) - // sleep 2 seconds to make sure that token is filled up - time.Sleep(2 * time.Second) - ops, _ = lb.Schedule(tc, false /* dryRun */) - re.NotEmpty(ops) + // make sure that token is filled up + testutil.Eventually(re, func() bool { + ops, _ = lb.Schedule(tc, false /* dryRun */) + return len(ops) != 0 + }) } func TestDownStoreLimit(t *testing.T) { @@ -3534,7 +3541,7 @@ type mockLimitScheduler struct { kind operator.OpKind } -func (s *mockLimitScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { +func (s *mockLimitScheduler) IsScheduleAllowed(sche.SchedulerCluster) bool { return s.counter.OperatorCount(s.kind) < s.limit } @@ -3566,11 +3573,11 @@ func TestController(t *testing.T) { // count = 0 { re.True(sc.AllowSchedule(false)) - op1 := newTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpLeader) + op1 := operator.NewTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpLeader) re.Equal(1, oc.AddWaitingOperator(op1)) // count = 1 re.True(sc.AllowSchedule(false)) - op2 := newTestOperator(2, tc.GetRegion(2).GetRegionEpoch(), operator.OpLeader) + op2 := operator.NewTestOperator(2, tc.GetRegion(2).GetRegionEpoch(), operator.OpLeader) re.Equal(1, oc.AddWaitingOperator(op2)) // count = 2 re.False(sc.AllowSchedule(false)) @@ -3579,10 +3586,10 @@ func TestController(t *testing.T) { re.True(sc.AllowSchedule(false)) } - op11 := newTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpLeader) + op11 := operator.NewTestOperator(1, tc.GetRegion(1).GetRegionEpoch(), operator.OpLeader) // add a PriorityKind operator will remove old operator { - op3 := newTestOperator(2, tc.GetRegion(2).GetRegionEpoch(), operator.OpHotRegion) + op3 := operator.NewTestOperator(2, tc.GetRegion(2).GetRegionEpoch(), operator.OpHotRegion) op3.SetPriorityLevel(constant.High) re.Equal(1, oc.AddWaitingOperator(op11)) re.False(sc.AllowSchedule(false)) @@ -3593,10 +3600,10 @@ func TestController(t *testing.T) { // add a admin operator will remove old operator { - op2 := newTestOperator(2, tc.GetRegion(2).GetRegionEpoch(), operator.OpLeader) + op2 := operator.NewTestOperator(2, tc.GetRegion(2).GetRegionEpoch(), operator.OpLeader) re.Equal(1, oc.AddWaitingOperator(op2)) re.False(sc.AllowSchedule(false)) - op4 := newTestOperator(2, tc.GetRegion(2).GetRegionEpoch(), operator.OpAdmin) + op4 := operator.NewTestOperator(2, tc.GetRegion(2).GetRegionEpoch(), operator.OpAdmin) op4.SetPriorityLevel(constant.High) re.Equal(1, oc.AddWaitingOperator(op4)) re.True(sc.AllowSchedule(false)) @@ -3605,7 +3612,7 @@ func TestController(t *testing.T) { // test wrong region id. { - op5 := newTestOperator(3, &metapb.RegionEpoch{}, operator.OpHotRegion) + op5 := operator.NewTestOperator(3, &metapb.RegionEpoch{}, operator.OpHotRegion) re.Equal(0, oc.AddWaitingOperator(op5)) } @@ -3616,12 +3623,12 @@ func TestController(t *testing.T) { ConfVer: tc.GetRegion(1).GetRegionEpoch().GetConfVer(), } { - op6 := newTestOperator(1, epoch, operator.OpLeader) + op6 := operator.NewTestOperator(1, epoch, operator.OpLeader) re.Equal(0, oc.AddWaitingOperator(op6)) } epoch.Version-- { - op6 := newTestOperator(1, epoch, operator.OpLeader) + op6 := operator.NewTestOperator(1, epoch, operator.OpLeader) re.Equal(1, oc.AddWaitingOperator(op6)) re.True(oc.RemoveOperator(op6)) } @@ -3651,7 +3658,8 @@ func TestInterval(t *testing.T) { func waitAddLearner(re *require.Assertions, stream mockhbstream.HeartbeatStream, region *core.RegionInfo, storeID uint64) *core.RegionInfo { var res *pdpb.RegionHeartbeatResponse testutil.Eventually(re, func() bool { - if res = stream.Recv().(*pdpb.RegionHeartbeatResponse); res != nil { + if r := stream.Recv(); r != nil { + res = r.(*pdpb.RegionHeartbeatResponse) return res.GetRegionId() == region.GetID() && res.GetChangePeer().GetChangeType() == eraftpb.ConfChangeType_AddLearnerNode && res.GetChangePeer().GetPeer().GetStoreId() == storeID @@ -3667,7 +3675,8 @@ func waitAddLearner(re *require.Assertions, stream mockhbstream.HeartbeatStream, func waitPromoteLearner(re *require.Assertions, stream mockhbstream.HeartbeatStream, region *core.RegionInfo, storeID uint64) *core.RegionInfo { var res *pdpb.RegionHeartbeatResponse testutil.Eventually(re, func() bool { - if res = stream.Recv().(*pdpb.RegionHeartbeatResponse); res != nil { + if r := stream.Recv(); r != nil { + res = r.(*pdpb.RegionHeartbeatResponse) return res.GetRegionId() == region.GetID() && res.GetChangePeer().GetChangeType() == eraftpb.ConfChangeType_AddNode && res.GetChangePeer().GetPeer().GetStoreId() == storeID @@ -3684,7 +3693,8 @@ func waitPromoteLearner(re *require.Assertions, stream mockhbstream.HeartbeatStr func waitRemovePeer(re *require.Assertions, stream mockhbstream.HeartbeatStream, region *core.RegionInfo, storeID uint64) *core.RegionInfo { var res *pdpb.RegionHeartbeatResponse testutil.Eventually(re, func() bool { - if res = stream.Recv().(*pdpb.RegionHeartbeatResponse); res != nil { + if r := stream.Recv(); r != nil { + res = r.(*pdpb.RegionHeartbeatResponse) return res.GetRegionId() == region.GetID() && res.GetChangePeer().GetChangeType() == eraftpb.ConfChangeType_RemoveNode && res.GetChangePeer().GetPeer().GetStoreId() == storeID @@ -3700,7 +3710,8 @@ func waitRemovePeer(re *require.Assertions, stream mockhbstream.HeartbeatStream, func waitTransferLeader(re *require.Assertions, stream mockhbstream.HeartbeatStream, region *core.RegionInfo, storeID uint64) *core.RegionInfo { var res *pdpb.RegionHeartbeatResponse testutil.Eventually(re, func() bool { - if res = stream.Recv().(*pdpb.RegionHeartbeatResponse); res != nil { + if r := stream.Recv(); r != nil { + res = r.(*pdpb.RegionHeartbeatResponse) if res.GetRegionId() == region.GetID() { for _, peer := range append(res.GetTransferLeader().GetPeers(), res.GetTransferLeader().GetPeer()) { if peer.GetStoreId() == storeID { @@ -3722,3 +3733,94 @@ func waitNoResponse(re *require.Assertions, stream mockhbstream.HeartbeatStream) return res == nil }) } + +func BenchmarkHandleStatsAsync(b *testing.B) { + // Setup: create a new instance of Cluster + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + _, opt, _ := newTestScheduleConfig() + c := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) + c.coordinator = schedule.NewCoordinator(ctx, c, nil) + c.SetPrepared() + region := core.NewRegionInfo(&metapb.Region{ + Id: 1, + RegionEpoch: &metapb.RegionEpoch{ + ConfVer: 1, + Version: 1, + }, + StartKey: []byte{byte(2)}, + EndKey: []byte{byte(3)}, + Peers: []*metapb.Peer{{Id: 11, StoreId: uint64(1)}}, + }, nil, + core.SetApproximateSize(10), + core.SetReportInterval(0, 10), + ) + // Reset timer after setup + b.ResetTimer() + // Run HandleStatsAsync b.N times + for i := 0; i < b.N; i++ { + cluster.HandleStatsAsync(c, region) + } +} + +func BenchmarkHandleRegionHeartbeat(b *testing.B) { + // Setup: create a new instance of Cluster + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + _, opt, _ := newTestScheduleConfig() + c := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) + c.coordinator = schedule.NewCoordinator(ctx, c, nil) + c.SetPrepared() + log.SetLevel(logutil.StringToZapLogLevel("fatal")) + peers := []*metapb.Peer{ + {Id: 11, StoreId: 1}, + {Id: 22, StoreId: 2}, + {Id: 33, StoreId: 3}, + } + queryStats := &pdpb.QueryStats{ + Get: 5, + Coprocessor: 6, + Scan: 7, + Put: 8, + Delete: 9, + DeleteRange: 10, + AcquirePessimisticLock: 11, + Rollback: 12, + Prewrite: 13, + Commit: 14, + } + interval := &pdpb.TimeInterval{StartTimestamp: 0, EndTimestamp: 10} + downPeers := []*pdpb.PeerStats{{Peer: peers[1], DownSeconds: 100}, {Peer: peers[2], DownSeconds: 100}} + pendingPeers := []*metapb.Peer{peers[1], peers[2]} + + var requests []*pdpb.RegionHeartbeatRequest + for i := 0; i < 1000000; i++ { + request := &pdpb.RegionHeartbeatRequest{ + Region: &metapb.Region{Id: 10, Peers: peers, StartKey: []byte{byte(i)}, EndKey: []byte{byte(i + 1)}}, + Leader: peers[0], + DownPeers: downPeers, + PendingPeers: pendingPeers, + BytesWritten: 10, + BytesRead: 20, + KeysWritten: 100, + KeysRead: 200, + ApproximateSize: 30 * units.MiB, + ApproximateKeys: 300, + Interval: interval, + QueryStats: queryStats, + Term: 1, + CpuUsage: 100, + } + requests = append(requests, request) + } + flowRoundDivisor := opt.GetPDServerConfig().FlowRoundByDigit + + // Reset timer after setup + b.ResetTimer() + // Run HandleRegionHeartbeat b.N times + for i := 0; i < b.N; i++ { + region := core.RegionFromHeartbeat(requests[i], flowRoundDivisor) + c.HandleRegionHeartbeat(region) + } +} diff --git a/server/cluster/cluster_worker.go b/server/cluster/cluster_worker.go index 74a445ad78e..39720e7d765 100644 --- a/server/cluster/cluster_worker.go +++ b/server/cluster/cluster_worker.go @@ -24,6 +24,7 @@ import ( "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/errs" mcsutils "github.com/tikv/pd/pkg/mcs/utils" + "github.com/tikv/pd/pkg/ratelimit" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/statistics/buckets" "github.com/tikv/pd/pkg/utils/logutil" @@ -34,9 +35,32 @@ import ( // HandleRegionHeartbeat processes RegionInfo reports from client. func (c *RaftCluster) HandleRegionHeartbeat(region *core.RegionInfo) error { - if err := c.processRegionHeartbeat(region); err != nil { + tracer := core.NewNoopHeartbeatProcessTracer() + if c.GetScheduleConfig().EnableHeartbeatBreakdownMetrics { + tracer = core.NewHeartbeatProcessTracer() + } + defer tracer.Release() + var taskRunner, miscRunner, logRunner ratelimit.Runner + taskRunner, miscRunner, logRunner = syncRunner, syncRunner, syncRunner + if c.GetScheduleConfig().EnableHeartbeatConcurrentRunner { + taskRunner = c.heartbeatRunner + miscRunner = c.miscRunner + logRunner = c.logRunner + } + + ctx := &core.MetaProcessContext{ + Context: c.ctx, + Tracer: tracer, + TaskRunner: taskRunner, + MiscRunner: miscRunner, + LogRunner: logRunner, + } + tracer.Begin() + if err := c.processRegionHeartbeat(ctx, region); err != nil { + tracer.OnAllStageFinished() return err } + tracer.OnAllStageFinished() if c.IsServiceIndependent(mcsutils.SchedulingServiceName) { return nil @@ -47,7 +71,7 @@ func (c *RaftCluster) HandleRegionHeartbeat(region *core.RegionInfo) error { // HandleAskSplit handles the split request. func (c *RaftCluster) HandleAskSplit(request *pdpb.AskSplitRequest) (*pdpb.AskSplitResponse, error) { - if c.isSchedulingHalted() { + if c.IsSchedulingHalted() { return nil, errs.ErrSchedulingIsHalted.FastGenByArgs() } if !c.opt.IsTikvRegionSplitEnabled() { @@ -90,13 +114,9 @@ func (c *RaftCluster) HandleAskSplit(request *pdpb.AskSplitRequest) (*pdpb.AskSp return split, nil } -func (c *RaftCluster) isSchedulingHalted() bool { - return c.opt.IsSchedulingHalted() -} - // HandleAskBatchSplit handles the batch split request. func (c *RaftCluster) HandleAskBatchSplit(request *pdpb.AskBatchSplitRequest) (*pdpb.AskBatchSplitResponse, error) { - if c.isSchedulingHalted() { + if c.IsSchedulingHalted() { return nil, errs.ErrSchedulingIsHalted.FastGenByArgs() } if !c.opt.IsTikvRegionSplitEnabled() { @@ -152,7 +172,7 @@ func (c *RaftCluster) HandleAskBatchSplit(request *pdpb.AskBatchSplitRequest) (* return resp, nil } -func (c *RaftCluster) checkSplitRegion(left *metapb.Region, right *metapb.Region) error { +func checkSplitRegion(left *metapb.Region, right *metapb.Region) error { if left == nil || right == nil { return errors.New("invalid split region") } @@ -168,7 +188,7 @@ func (c *RaftCluster) checkSplitRegion(left *metapb.Region, right *metapb.Region return errors.New("invalid split region") } -func (c *RaftCluster) checkSplitRegions(regions []*metapb.Region) error { +func checkSplitRegions(regions []*metapb.Region) error { if len(regions) <= 1 { return errors.New("invalid split region") } @@ -187,11 +207,11 @@ func (c *RaftCluster) checkSplitRegions(regions []*metapb.Region) error { } // HandleReportSplit handles the report split request. -func (c *RaftCluster) HandleReportSplit(request *pdpb.ReportSplitRequest) (*pdpb.ReportSplitResponse, error) { +func (*RaftCluster) HandleReportSplit(request *pdpb.ReportSplitRequest) (*pdpb.ReportSplitResponse, error) { left := request.GetLeft() right := request.GetRight() - err := c.checkSplitRegion(left, right) + err := checkSplitRegion(left, right) if err != nil { log.Warn("report split region is invalid", logutil.ZapRedactStringer("left-region", core.RegionToHexMeta(left)), @@ -211,11 +231,11 @@ func (c *RaftCluster) HandleReportSplit(request *pdpb.ReportSplitRequest) (*pdpb } // HandleBatchReportSplit handles the batch report split request. -func (c *RaftCluster) HandleBatchReportSplit(request *pdpb.ReportBatchSplitRequest) (*pdpb.ReportBatchSplitResponse, error) { +func (*RaftCluster) HandleBatchReportSplit(request *pdpb.ReportBatchSplitRequest) (*pdpb.ReportBatchSplitResponse, error) { regions := request.GetRegions() hrm := core.RegionsToHexMeta(regions) - err := c.checkSplitRegions(regions) + err := checkSplitRegions(regions) if err != nil { log.Warn("report batch split region is invalid", zap.Stringer("region-meta", hrm), diff --git a/server/cluster/scheduling_controller.go b/server/cluster/scheduling_controller.go index a36e7159cfd..ca846eaa885 100644 --- a/server/cluster/scheduling_controller.go +++ b/server/cluster/scheduling_controller.go @@ -68,7 +68,7 @@ func newSchedulingController(parentCtx context.Context, basicCluster *core.Basic opt: opt, labelStats: statistics.NewLabelStatistics(), hotStat: statistics.NewHotStat(parentCtx), - slowStat: statistics.NewSlowStat(parentCtx), + slowStat: statistics.NewSlowStat(), regionStats: statistics.NewRegionStatistics(basicCluster, opt, ruleManager), } } @@ -158,7 +158,7 @@ func (sc *schedulingController) runSchedulingMetricsCollectionJob() { select { case <-sc.ctx.Done(): log.Info("scheduling metrics are reset") - sc.resetSchedulingMetrics() + resetSchedulingMetrics() log.Info("scheduling metrics collection job has been stopped") return case <-ticker.C: @@ -167,7 +167,7 @@ func (sc *schedulingController) runSchedulingMetricsCollectionJob() { } } -func (sc *schedulingController) resetSchedulingMetrics() { +func resetSchedulingMetrics() { statistics.Reset() schedulers.ResetSchedulerMetrics() schedule.ResetHotSpotMetrics() @@ -182,7 +182,7 @@ func (sc *schedulingController) collectSchedulingMetrics() { stores := sc.GetStores() for _, s := range stores { statsMap.Observe(s) - statsMap.ObserveHotStat(s, sc.hotStat.StoresStats) + statistics.ObserveHotStat(s, sc.hotStat.StoresStats) } statsMap.Collect() sc.coordinator.GetSchedulersController().CollectSchedulerMetrics() @@ -194,6 +194,8 @@ func (sc *schedulingController) collectSchedulingMetrics() { sc.labelStats.Collect() // collect hot cache metrics sc.hotStat.CollectMetrics() + // collect the lock metrics + sc.CollectWaitLockMetrics() } func (sc *schedulingController) removeStoreStatistics(storeID uint64) { diff --git a/server/config/OWNERS b/server/config/OWNERS new file mode 100644 index 00000000000..179de4843e6 --- /dev/null +++ b/server/config/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs at https://go.k8s.io/owners +options: + no_parent_owners: true +filters: + "(OWNERS|(config|service_middleware_config)\\.go)$": + approvers: + - sig-critical-approvers-config diff --git a/server/config/config.go b/server/config/config.go index 25e13d59652..95d5dcb3257 100644 --- a/server/config/config.go +++ b/server/config/config.go @@ -207,7 +207,6 @@ const ( defaultLeaderPriorityCheckInterval = time.Minute defaultUseRegionStorage = true - defaultTraceRegionFlow = true defaultFlowRoundByDigit = 3 // KB maxTraceFlowRoundByDigit = 5 // 0.1 MB defaultMaxResetTSGap = 24 * time.Hour @@ -235,6 +234,7 @@ const ( minTSOUpdatePhysicalInterval = 1 * time.Millisecond defaultLogFormat = "text" + defaultLogLevel = "info" defaultServerMemoryLimit = 0 minServerMemoryLimit = 0 @@ -469,10 +469,6 @@ func (c *Config) Adjust(meta *toml.MetaData, reloading bool) error { c.Security.Encryption.Adjust() - if len(c.Log.Format) == 0 { - c.Log.Format = defaultLogFormat - } - c.Controller.Adjust(configMetaData.Child("controller")) return nil @@ -482,6 +478,8 @@ func (c *Config) adjustLog(meta *configutil.ConfigMetaData) { if !meta.IsDefined("disable-error-verbose") { c.Log.DisableErrorVerbose = defaultDisableErrorVerbose } + configutil.AdjustString(&c.Log.Format, defaultLogFormat) + configutil.AdjustString(&c.Log.Level, defaultLogLevel) } // Clone returns a cloned configuration. @@ -548,9 +546,6 @@ func (c *PDServerConfig) adjust(meta *configutil.ConfigMetaData) error { if !meta.IsDefined("dashboard-address") { c.DashboardAddress = defaultDashboardAddress } - if !meta.IsDefined("trace-region-flow") { - c.TraceRegionFlow = defaultTraceRegionFlow - } if !meta.IsDefined("flow-round-by-digit") { configutil.AdjustInt(&c.FlowRoundByDigit, defaultFlowRoundByDigit) } @@ -748,22 +743,22 @@ func (c *Config) GenEmbedEtcdConfig() (*embed.Config, error) { cfg.Logger = "zap" var err error - cfg.LPUrls, err = parseUrls(c.PeerUrls) + cfg.ListenPeerUrls, err = parseUrls(c.PeerUrls) if err != nil { return nil, err } - cfg.APUrls, err = parseUrls(c.AdvertisePeerUrls) + cfg.AdvertisePeerUrls, err = parseUrls(c.AdvertisePeerUrls) if err != nil { return nil, err } - cfg.LCUrls, err = parseUrls(c.ClientUrls) + cfg.ListenClientUrls, err = parseUrls(c.ClientUrls) if err != nil { return nil, err } - cfg.ACUrls, err = parseUrls(c.AdvertiseClientUrls) + cfg.AdvertiseClientUrls, err = parseUrls(c.AdvertiseClientUrls) if err != nil { return nil, err } diff --git a/server/config/config_test.go b/server/config/config_test.go index 69cfafd8d36..d7abfe0746a 100644 --- a/server/config/config_test.go +++ b/server/config/config_test.go @@ -246,6 +246,22 @@ tso-update-physical-interval = "15s" re.NoError(err) re.Equal(maxTSOUpdatePhysicalInterval, cfg.TSOUpdatePhysicalInterval.Duration) + + cfgData = ` +[log] +level = "debug" +` + flagSet = pflag.NewFlagSet("testlog", pflag.ContinueOnError) + flagSet.StringP("log-level", "L", "info", "log level: debug, info, warn, error, fatal (default 'info')") + flagSet.Parse(nil) + cfg = NewConfig() + err = cfg.Parse(flagSet) + re.NoError(err) + meta, err = toml.Decode(cfgData, &cfg) + re.NoError(err) + err = cfg.Adjust(&meta, false) + re.NoError(err) + re.Equal("debug", cfg.Log.Level) } func TestMigrateFlags(t *testing.T) { diff --git a/server/config/persist_options.go b/server/config/persist_options.go index e383f519e63..6f5dc50f205 100644 --- a/server/config/persist_options.go +++ b/server/config/persist_options.go @@ -257,27 +257,27 @@ func IsSupportedTTLConfig(key string) bool { // GetMaxSnapshotCount returns the number of the max snapshot which is allowed to send. func (o *PersistOptions) GetMaxSnapshotCount() uint64 { - return o.getTTLUintOr(sc.MaxSnapshotCountKey, o.GetScheduleConfig().MaxSnapshotCount) + return o.getTTLNumberOr(sc.MaxSnapshotCountKey, o.GetScheduleConfig().MaxSnapshotCount) } // GetMaxPendingPeerCount returns the number of the max pending peers. func (o *PersistOptions) GetMaxPendingPeerCount() uint64 { - return o.getTTLUintOr(sc.MaxPendingPeerCountKey, o.GetScheduleConfig().MaxPendingPeerCount) + return o.getTTLNumberOr(sc.MaxPendingPeerCountKey, o.GetScheduleConfig().MaxPendingPeerCount) } // GetMaxMergeRegionSize returns the max region size. func (o *PersistOptions) GetMaxMergeRegionSize() uint64 { - return o.getTTLUintOr(sc.MaxMergeRegionSizeKey, o.GetScheduleConfig().MaxMergeRegionSize) + return o.getTTLNumberOr(sc.MaxMergeRegionSizeKey, o.GetScheduleConfig().MaxMergeRegionSize) } // GetMaxMergeRegionKeys returns the max number of keys. // It returns size * 10000 if the key of max-merge-region-Keys doesn't exist. func (o *PersistOptions) GetMaxMergeRegionKeys() uint64 { - keys, exist, err := o.getTTLUint(sc.MaxMergeRegionKeysKey) + keys, exist, err := o.getTTLNumber(sc.MaxMergeRegionKeysKey) if exist && err == nil { return keys } - size, exist, err := o.getTTLUint(sc.MaxMergeRegionSizeKey) + size, exist, err := o.getTTLNumber(sc.MaxMergeRegionSizeKey) if exist && err == nil { return size * 10000 } @@ -419,32 +419,32 @@ func (o *PersistOptions) GetMaxStorePreparingTime() time.Duration { // GetLeaderScheduleLimit returns the limit for leader schedule. func (o *PersistOptions) GetLeaderScheduleLimit() uint64 { - return o.getTTLUintOr(sc.LeaderScheduleLimitKey, o.GetScheduleConfig().LeaderScheduleLimit) + return o.getTTLNumberOr(sc.LeaderScheduleLimitKey, o.GetScheduleConfig().LeaderScheduleLimit) } // GetRegionScheduleLimit returns the limit for region schedule. func (o *PersistOptions) GetRegionScheduleLimit() uint64 { - return o.getTTLUintOr(sc.RegionScheduleLimitKey, o.GetScheduleConfig().RegionScheduleLimit) + return o.getTTLNumberOr(sc.RegionScheduleLimitKey, o.GetScheduleConfig().RegionScheduleLimit) } // GetWitnessScheduleLimit returns the limit for region schedule. func (o *PersistOptions) GetWitnessScheduleLimit() uint64 { - return o.getTTLUintOr(sc.WitnessScheduleLimitKey, o.GetScheduleConfig().WitnessScheduleLimit) + return o.getTTLNumberOr(sc.WitnessScheduleLimitKey, o.GetScheduleConfig().WitnessScheduleLimit) } // GetReplicaScheduleLimit returns the limit for replica schedule. func (o *PersistOptions) GetReplicaScheduleLimit() uint64 { - return o.getTTLUintOr(sc.ReplicaRescheduleLimitKey, o.GetScheduleConfig().ReplicaScheduleLimit) + return o.getTTLNumberOr(sc.ReplicaRescheduleLimitKey, o.GetScheduleConfig().ReplicaScheduleLimit) } // GetMergeScheduleLimit returns the limit for merge schedule. func (o *PersistOptions) GetMergeScheduleLimit() uint64 { - return o.getTTLUintOr(sc.MergeScheduleLimitKey, o.GetScheduleConfig().MergeScheduleLimit) + return o.getTTLNumberOr(sc.MergeScheduleLimitKey, o.GetScheduleConfig().MergeScheduleLimit) } // GetHotRegionScheduleLimit returns the limit for hot region schedule. func (o *PersistOptions) GetHotRegionScheduleLimit() uint64 { - return o.getTTLUintOr(sc.HotRegionScheduleLimitKey, o.GetScheduleConfig().HotRegionScheduleLimit) + return o.getTTLNumberOr(sc.HotRegionScheduleLimitKey, o.GetScheduleConfig().HotRegionScheduleLimit) } // GetStoreLimit returns the limit of a store. @@ -547,7 +547,7 @@ func (o *PersistOptions) GetRegionScoreFormulaVersion() string { // GetSchedulerMaxWaitingOperator returns the number of the max waiting operators. func (o *PersistOptions) GetSchedulerMaxWaitingOperator() uint64 { - return o.getTTLUintOr(sc.SchedulerMaxWaitingOperatorKey, o.GetScheduleConfig().SchedulerMaxWaitingOperator) + return o.getTTLNumberOr(sc.SchedulerMaxWaitingOperatorKey, o.GetScheduleConfig().SchedulerMaxWaitingOperator) } // GetLeaderSchedulePolicy is to get leader schedule policy. @@ -801,7 +801,7 @@ func (o *PersistOptions) Reload(storage endpoint.ConfigStorage) error { if err != nil { return err } - o.adjustScheduleCfg(&cfg.Schedule) + adjustScheduleCfg(&cfg.Schedule) // Some fields may not be stored in the storage, we need to calculate them manually. cfg.StoreConfig.Adjust() cfg.PDServerCfg.MigrateDeprecatedFlags() @@ -819,7 +819,7 @@ func (o *PersistOptions) Reload(storage endpoint.ConfigStorage) error { return nil } -func (o *PersistOptions) adjustScheduleCfg(scheduleCfg *sc.ScheduleConfig) { +func adjustScheduleCfg(scheduleCfg *sc.ScheduleConfig) { // In case we add new default schedulers. for _, ps := range sc.DefaultSchedulers { if slice.NoneOf(scheduleCfg.Schedulers, func(i int) bool { @@ -870,17 +870,29 @@ func (o *PersistOptions) SetTTLData(parCtx context.Context, client *clientv3.Cli return nil } -func (o *PersistOptions) getTTLUint(key string) (uint64, bool, error) { +// getTTLNumber try to parse uint64 from ttl storage first, if failed, try to parse float64 +func (o *PersistOptions) getTTLNumber(key string) (uint64, bool, error) { stringForm, ok := o.GetTTLData(key) if !ok { return 0, false, nil } r, err := strconv.ParseUint(stringForm, 10, 64) - return r, true, err + if err == nil { + return r, true, nil + } + // try to parse float64 + // json unmarshal will convert number(such as `uint64(math.MaxInt32)`) to float64 + f, err := strconv.ParseFloat(stringForm, 64) + if err != nil { + return 0, false, err + } + return uint64(f), true, nil } -func (o *PersistOptions) getTTLUintOr(key string, defaultValue uint64) uint64 { - if v, ok, err := o.getTTLUint(key); ok { +// getTTLNumberOr try to parse uint64 from ttl storage first, if failed, try to parse float64. +// If both failed, return defaultValue. +func (o *PersistOptions) getTTLNumberOr(key string, defaultValue uint64) uint64 { + if v, ok, err := o.getTTLNumber(key); ok { if err == nil { return v } @@ -975,11 +987,8 @@ func (o *PersistOptions) SetAllStoresLimitTTL(ctx context.Context, client *clien var haltSchedulingStatus = schedulingAllowanceStatusGauge.WithLabelValues("halt-scheduling") -// SetHaltScheduling set HaltScheduling. -func (o *PersistOptions) SetHaltScheduling(halt bool, source string) { - v := o.GetScheduleConfig().Clone() - v.HaltScheduling = halt - o.SetScheduleConfig(v) +// SetSchedulingAllowanceStatus sets the scheduling allowance status to help distinguish the source of the halt. +func (*PersistOptions) SetSchedulingAllowanceStatus(halt bool, source string) { if halt { haltSchedulingStatus.Set(1) schedulingAllowanceStatusGauge.WithLabelValues(source).Set(1) @@ -989,6 +998,14 @@ func (o *PersistOptions) SetHaltScheduling(halt bool, source string) { } } +// SetHaltScheduling set HaltScheduling. +func (o *PersistOptions) SetHaltScheduling(halt bool, source string) { + v := o.GetScheduleConfig().Clone() + v.HaltScheduling = halt + o.SetScheduleConfig(v) + o.SetSchedulingAllowanceStatus(halt, source) +} + // IsSchedulingHalted returns if PD scheduling is halted. func (o *PersistOptions) IsSchedulingHalted() bool { if o == nil { diff --git a/server/forward.go b/server/forward.go index 65750fcd4be..650833e1fc1 100644 --- a/server/forward.go +++ b/server/forward.go @@ -32,13 +32,14 @@ import ( "github.com/tikv/pd/pkg/utils/grpcutil" "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/pkg/utils/tsoutil" + "github.com/tikv/pd/server/cluster" "go.uber.org/zap" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" ) -func (s *GrpcServer) forwardTSORequest( +func forwardTSORequest( ctx context.Context, request *pdpb.TsoRequest, forwardStream tsopb.TSO_TsoClient) (*tsopb.TsoResponse, error) { @@ -89,6 +90,7 @@ func (s *GrpcServer) forwardTSO(stream pdpb.PD_TsoServer) error { forwardStream tsopb.TSO_TsoClient forwardCtx context.Context cancelForward context.CancelFunc + tsoStreamErr error lastForwardedHost string ) defer func() { @@ -96,6 +98,9 @@ func (s *GrpcServer) forwardTSO(stream pdpb.PD_TsoServer) error { if cancelForward != nil { cancelForward() } + if grpcutil.NeedRebuildConnection(tsoStreamErr) { + s.closeDelegateClient(lastForwardedHost) + } }() maxConcurrentTSOProxyStreamings := int32(s.GetMaxConcurrentTSOProxyStreamings()) @@ -131,7 +136,8 @@ func (s *GrpcServer) forwardTSO(stream pdpb.PD_TsoServer) error { forwardedHost, ok := s.GetServicePrimaryAddr(stream.Context(), utils.TSOServiceName) if !ok || len(forwardedHost) == 0 { - return errors.WithStack(ErrNotFoundTSOAddr) + tsoStreamErr = errors.WithStack(ErrNotFoundTSOAddr) + return tsoStreamErr } if forwardStream == nil || lastForwardedHost != forwardedHost { if cancelForward != nil { @@ -140,18 +146,21 @@ func (s *GrpcServer) forwardTSO(stream pdpb.PD_TsoServer) error { clientConn, err := s.getDelegateClient(s.ctx, forwardedHost) if err != nil { - return errors.WithStack(err) + tsoStreamErr = errors.WithStack(err) + return tsoStreamErr } - forwardStream, forwardCtx, cancelForward, err = s.createTSOForwardStream(stream.Context(), clientConn) + forwardStream, forwardCtx, cancelForward, err = createTSOForwardStream(stream.Context(), clientConn) if err != nil { - return errors.WithStack(err) + tsoStreamErr = errors.WithStack(err) + return tsoStreamErr } lastForwardedHost = forwardedHost } tsopbResp, err := s.forwardTSORequestWithDeadLine(forwardCtx, cancelForward, forwardStream, request, tsDeadlineCh) if err != nil { - return errors.WithStack(err) + tsoStreamErr = errors.WithStack(err) + return tsoStreamErr } // The error types defined for tsopb and pdpb are different, so we need to convert them. @@ -201,7 +210,7 @@ func (s *GrpcServer) forwardTSORequestWithDeadLine( } start := time.Now() - resp, err := s.forwardTSORequest(forwardCtx, request, forwardStream) + resp, err := forwardTSORequest(forwardCtx, request, forwardStream) close(done) if err != nil { if strings.Contains(err.Error(), errs.NotLeaderErr) { @@ -214,7 +223,7 @@ func (s *GrpcServer) forwardTSORequestWithDeadLine( return resp, nil } -func (s *GrpcServer) createTSOForwardStream(ctx context.Context, client *grpc.ClientConn) (tsopb.TSO_TsoClient, context.Context, context.CancelFunc, error) { +func createTSOForwardStream(ctx context.Context, client *grpc.ClientConn) (tsopb.TSO_TsoClient, context.Context, context.CancelFunc, error) { done := make(chan struct{}) forwardCtx, cancelForward := context.WithCancel(ctx) go grpcutil.CheckStream(forwardCtx, cancelForward, done) @@ -232,7 +241,7 @@ func (s *GrpcServer) createRegionHeartbeatForwardStream(client *grpc.ClientConn) return forwardStream, cancel, err } -func (s *GrpcServer) createRegionHeartbeatSchedulingStream(ctx context.Context, client *grpc.ClientConn) (schedulingpb.Scheduling_RegionHeartbeatClient, context.Context, context.CancelFunc, error) { +func createRegionHeartbeatSchedulingStream(ctx context.Context, client *grpc.ClientConn) (schedulingpb.Scheduling_RegionHeartbeatClient, context.Context, context.CancelFunc, error) { done := make(chan struct{}) forwardCtx, cancelForward := context.WithCancel(ctx) go grpcutil.CheckStream(forwardCtx, cancelForward, done) @@ -241,7 +250,7 @@ func (s *GrpcServer) createRegionHeartbeatSchedulingStream(ctx context.Context, return forwardStream, forwardCtx, cancelForward, err } -func forwardRegionHeartbeatToScheduling(forwardStream schedulingpb.Scheduling_RegionHeartbeatClient, server *heartbeatServer, errCh chan error) { +func forwardRegionHeartbeatToScheduling(rc *cluster.RaftCluster, forwardStream schedulingpb.Scheduling_RegionHeartbeatClient, server *heartbeatServer, errCh chan error) { defer logutil.LogPanic() defer close(errCh) for { @@ -254,6 +263,10 @@ func forwardRegionHeartbeatToScheduling(forwardStream schedulingpb.Scheduling_Re errCh <- errors.WithStack(err) return } + // TODO: find a better way to halt scheduling immediately. + if rc.IsSchedulingHalted() { + continue + } // The error types defined for schedulingpb and pdpb are different, so we need to convert them. var pdpbErr *pdpb.Error schedulingpbErr := resp.GetHeader().GetError() @@ -363,25 +376,13 @@ func (s *GrpcServer) getDelegateClient(ctx context.Context, forwardedHost string return conn.(*grpc.ClientConn), nil } -func (s *GrpcServer) getForwardedHost(ctx, streamCtx context.Context, serviceName ...string) (forwardedHost string, err error) { - if s.IsAPIServiceMode() { - var ok bool - if len(serviceName) == 0 { - return "", ErrNotFoundService - } - forwardedHost, ok = s.GetServicePrimaryAddr(ctx, serviceName[0]) - if !ok || len(forwardedHost) == 0 { - switch serviceName[0] { - case utils.TSOServiceName: - return "", ErrNotFoundTSOAddr - case utils.SchedulingServiceName: - return "", ErrNotFoundSchedulingAddr - } - } - } else if fh := grpcutil.GetForwardedHost(streamCtx); !s.isLocalRequest(fh) { - forwardedHost = fh +func (s *GrpcServer) closeDelegateClient(forwardedHost string) { + client, ok := s.clientConns.LoadAndDelete(forwardedHost) + if !ok { + return } - return forwardedHost, nil + client.(*grpc.ClientConn).Close() + log.Debug("close delegate client connection", zap.String("forwarded-host", forwardedHost)) } func (s *GrpcServer) isLocalRequest(host string) bool { @@ -406,7 +407,7 @@ func (s *GrpcServer) getGlobalTSO(ctx context.Context) (pdpb.Timestamp, error) { } request := &tsopb.TsoRequest{ Header: &tsopb.RequestHeader{ - ClusterId: s.clusterID, + ClusterId: s.ClusterID(), KeyspaceId: utils.DefaultKeyspaceID, KeyspaceGroupId: utils.DefaultKeyspaceGroupID, }, diff --git a/server/gc_service.go b/server/gc_service.go index 90333654e5e..db3879a74fb 100644 --- a/server/gc_service.go +++ b/server/gc_service.go @@ -35,7 +35,7 @@ import ( // GetGCSafePointV2 return gc safe point for the given keyspace. func (s *GrpcServer) GetGCSafePointV2(ctx context.Context, request *pdpb.GetGCSafePointV2Request) (*pdpb.GetGCSafePointV2Response, error) { - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).GetGCSafePointV2(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -60,7 +60,7 @@ func (s *GrpcServer) GetGCSafePointV2(ctx context.Context, request *pdpb.GetGCSa // UpdateGCSafePointV2 update gc safe point for the given keyspace. func (s *GrpcServer) UpdateGCSafePointV2(ctx context.Context, request *pdpb.UpdateGCSafePointV2Request) (*pdpb.UpdateGCSafePointV2Response, error) { - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).UpdateGCSafePointV2(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -97,7 +97,7 @@ func (s *GrpcServer) UpdateGCSafePointV2(ctx context.Context, request *pdpb.Upda // UpdateServiceSafePointV2 update service safe point for the given keyspace. func (s *GrpcServer) UpdateServiceSafePointV2(ctx context.Context, request *pdpb.UpdateServiceSafePointV2Request) (*pdpb.UpdateServiceSafePointV2Response, error) { - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).UpdateServiceSafePointV2(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -194,7 +194,7 @@ func (s *GrpcServer) WatchGCSafePointV2(request *pdpb.WatchGCSafePointV2Request, // GetAllGCSafePointV2 return all gc safe point v2. func (s *GrpcServer) GetAllGCSafePointV2(ctx context.Context, request *pdpb.GetAllGCSafePointV2Request) (*pdpb.GetAllGCSafePointV2Response, error) { - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).GetAllGCSafePointV2(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { diff --git a/server/grpc_service.go b/server/grpc_service.go index ef7020f7fee..e16fa4a8d4f 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -232,14 +232,14 @@ type request interface { GetHeader() *pdpb.RequestHeader } -type forwardFn func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) +type forwardFn func(ctx context.Context, client *grpc.ClientConn) (any, error) -func (s *GrpcServer) unaryMiddleware(ctx context.Context, req request, fn forwardFn) (rsp interface{}, err error) { +func (s *GrpcServer) unaryMiddleware(ctx context.Context, req request, fn forwardFn) (rsp any, err error) { return s.unaryFollowerMiddleware(ctx, req, fn, nil) } // unaryFollowerMiddleware adds the check of followers enable compared to unaryMiddleware. -func (s *GrpcServer) unaryFollowerMiddleware(ctx context.Context, req request, fn forwardFn, allowFollower *bool) (rsp interface{}, err error) { +func (s *GrpcServer) unaryFollowerMiddleware(ctx context.Context, req request, fn forwardFn, allowFollower *bool) (rsp any, err error) { failpoint.Inject("customTimeout", func() { time.Sleep(5 * time.Second) }) @@ -259,7 +259,7 @@ func (s *GrpcServer) unaryFollowerMiddleware(ctx context.Context, req request, f } // GetClusterInfo implements gRPC PDServer. -func (s *GrpcServer) GetClusterInfo(ctx context.Context, _ *pdpb.GetClusterInfoRequest) (*pdpb.GetClusterInfoResponse, error) { +func (s *GrpcServer) GetClusterInfo(context.Context, *pdpb.GetClusterInfoRequest) (*pdpb.GetClusterInfoResponse, error) { // Here we purposely do not check the cluster ID because the client does not know the correct cluster ID // at startup and needs to get the cluster ID with the first request (i.e. GetMembers). if s.IsClosed() { @@ -301,7 +301,7 @@ func (s *GrpcServer) GetMinTS( }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).GetMinTS(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -547,9 +547,8 @@ func (s *GrpcServer) Tso(stream pdpb.PD_TsoServer) error { return errors.WithStack(err) } - if forwardedHost, err := s.getForwardedHost(ctx, stream.Context(), utils.TSOServiceName); err != nil { - return err - } else if len(forwardedHost) > 0 { + forwardedHost := grpcutil.GetForwardedHost(stream.Context()) + if !s.isLocalRequest(forwardedHost) { clientConn, err := s.getDelegateClient(s.ctx, forwardedHost) if err != nil { return errors.WithStack(err) @@ -557,7 +556,7 @@ func (s *GrpcServer) Tso(stream pdpb.PD_TsoServer) error { if errCh == nil { doneCh = make(chan struct{}) - defer close(doneCh) + defer close(doneCh) // nolint errCh = make(chan error) } @@ -571,18 +570,18 @@ func (s *GrpcServer) Tso(stream pdpb.PD_TsoServer) error { if s.IsClosed() { return status.Errorf(codes.Unknown, "server not started") } - if request.GetHeader().GetClusterId() != s.clusterID { + if clusterID := s.ClusterID(); request.GetHeader().GetClusterId() != clusterID { return status.Errorf(codes.FailedPrecondition, - "mismatch cluster id, need %d but got %d", s.clusterID, request.GetHeader().GetClusterId()) + "mismatch cluster id, need %d but got %d", clusterID, request.GetHeader().GetClusterId()) } count := request.GetCount() ctx, task := trace.NewTask(ctx, "tso") ts, err := s.tsoAllocatorManager.HandleRequest(ctx, request.GetDcLocation(), count) task.End() + tsoHandleDuration.Observe(time.Since(start).Seconds()) if err != nil { return status.Errorf(codes.Unknown, err.Error()) } - tsoHandleDuration.Observe(time.Since(start).Seconds()) response := &pdpb.TsoResponse{ Header: s.header(), Timestamp: &ts, @@ -607,7 +606,7 @@ func (s *GrpcServer) Bootstrap(ctx context.Context, request *pdpb.BootstrapReque }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).Bootstrap(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -651,7 +650,7 @@ func (s *GrpcServer) IsBootstrapped(ctx context.Context, request *pdpb.IsBootstr }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).IsBootstrapped(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -680,7 +679,7 @@ func (s *GrpcServer) AllocID(ctx context.Context, request *pdpb.AllocIDRequest) }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).AllocID(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -704,7 +703,7 @@ func (s *GrpcServer) AllocID(ctx context.Context, request *pdpb.AllocIDRequest) } // IsSnapshotRecovering implements gRPC PDServer. -func (s *GrpcServer) IsSnapshotRecovering(ctx context.Context, request *pdpb.IsSnapshotRecoveringRequest) (*pdpb.IsSnapshotRecoveringResponse, error) { +func (s *GrpcServer) IsSnapshotRecovering(ctx context.Context, _ *pdpb.IsSnapshotRecoveringRequest) (*pdpb.IsSnapshotRecoveringResponse, error) { if s.GetServiceMiddlewarePersistOptions().IsGRPCRateLimitEnabled() { fName := currentFunction() limiter := s.GetGRPCRateLimiter() @@ -742,7 +741,7 @@ func (s *GrpcServer) GetStore(ctx context.Context, request *pdpb.GetStoreRequest }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).GetStore(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -798,7 +797,7 @@ func (s *GrpcServer) PutStore(ctx context.Context, request *pdpb.PutStoreRequest }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).PutStore(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -827,14 +826,14 @@ func (s *GrpcServer) PutStore(ctx context.Context, request *pdpb.PutStoreRequest }, nil } - if err := rc.PutStore(store); err != nil { + if err := rc.PutMetaStore(store); err != nil { return &pdpb.PutStoreResponse{ Header: s.wrapErrorToHeader(pdpb.ErrorType_UNKNOWN, err.Error()), }, nil } log.Info("put store ok", zap.Stringer("store", store)) - CheckPDVersion(s.persistOptions) + CheckPDVersionWithClusterVersion(s.persistOptions) return &pdpb.PutStoreResponse{ Header: s.header(), @@ -855,7 +854,7 @@ func (s *GrpcServer) GetAllStores(ctx context.Context, request *pdpb.GetAllStore }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).GetAllStores(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -900,7 +899,7 @@ func (s *GrpcServer) StoreHeartbeat(ctx context.Context, request *pdpb.StoreHear }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).StoreHeartbeat(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -1170,7 +1169,7 @@ func (s *GrpcServer) ReportBuckets(stream pdpb.PD_ReportBucketsServer) error { func (s *GrpcServer) RegionHeartbeat(stream pdpb.PD_RegionHeartbeatServer) error { var ( server = &heartbeatServer{stream: stream} - flowRoundOption = core.WithFlowRoundByDigit(s.persistOptions.GetPDServerConfig().FlowRoundByDigit) + flowRoundDivisor = s.persistOptions.GetPDServerConfig().FlowRoundByDigit cancel context.CancelFunc lastBind time.Time errCh chan error @@ -1265,11 +1264,11 @@ func (s *GrpcServer) RegionHeartbeat(stream pdpb.PD_RegionHeartbeatServer) error regionHeartbeatCounter.WithLabelValues(storeAddress, storeLabel, "report", "bind").Inc() s.hbStreams.BindStream(storeID, server) // refresh FlowRoundByDigit - flowRoundOption = core.WithFlowRoundByDigit(s.persistOptions.GetPDServerConfig().FlowRoundByDigit) + flowRoundDivisor = s.persistOptions.GetPDServerConfig().FlowRoundByDigit lastBind = time.Now() } - region := core.RegionFromHeartbeat(request, flowRoundOption) + region := core.RegionFromHeartbeat(request, flowRoundDivisor) if region.GetLeader() == nil { log.Error("invalid request, the leader is nil", zap.Reflect("request", request), errs.ZapError(errs.ErrLeaderNil)) regionHeartbeatCounter.WithLabelValues(storeAddress, storeLabel, "report", "invalid-leader").Inc() @@ -1294,7 +1293,6 @@ func (s *GrpcServer) RegionHeartbeat(stream pdpb.PD_RegionHeartbeatServer) error continue } start := time.Now() - err = rc.HandleRegionHeartbeat(region) if err != nil { regionHeartbeatCounter.WithLabelValues(storeAddress, storeLabel, "report", "err").Inc() @@ -1302,7 +1300,6 @@ func (s *GrpcServer) RegionHeartbeat(stream pdpb.PD_RegionHeartbeatServer) error s.hbStreams.SendErr(pdpb.ErrorType_UNKNOWN, msg, request.GetLeader()) continue } - regionHeartbeatHandleDuration.WithLabelValues(storeAddress, storeLabel).Observe(time.Since(start).Seconds()) regionHeartbeatCounter.WithLabelValues(storeAddress, storeLabel, "report", "ok").Inc() @@ -1332,6 +1329,7 @@ func (s *GrpcServer) RegionHeartbeat(stream pdpb.PD_RegionHeartbeatServer) error if cancel != nil { cancel() } + client, err := s.getDelegateClient(s.ctx, forwardedSchedulingHost) if err != nil { errRegionHeartbeatClient.Inc() @@ -1339,7 +1337,7 @@ func (s *GrpcServer) RegionHeartbeat(stream pdpb.PD_RegionHeartbeatServer) error continue } log.Debug("create scheduling forwarding stream", zap.String("forwarded-host", forwardedSchedulingHost)) - forwardSchedulingStream, _, cancel, err = s.createRegionHeartbeatSchedulingStream(stream.Context(), client) + forwardSchedulingStream, _, cancel, err = createRegionHeartbeatSchedulingStream(stream.Context(), client) if err != nil { errRegionHeartbeatStream.Inc() log.Debug("failed to create stream", zap.Error(err)) @@ -1347,7 +1345,7 @@ func (s *GrpcServer) RegionHeartbeat(stream pdpb.PD_RegionHeartbeatServer) error } lastForwardedSchedulingHost = forwardedSchedulingHost forwardErrCh = make(chan error, 1) - go forwardRegionHeartbeatToScheduling(forwardSchedulingStream, server, forwardErrCh) + go forwardRegionHeartbeatToScheduling(rc, forwardSchedulingStream, server, forwardErrCh) } schedulingpbReq := &schedulingpb.RegionHeartbeatRequest{ Header: &schedulingpb.RequestHeader{ @@ -1370,6 +1368,9 @@ func (s *GrpcServer) RegionHeartbeat(stream pdpb.PD_RegionHeartbeatServer) error } if err := forwardSchedulingStream.Send(schedulingpbReq); err != nil { forwardSchedulingStream = nil + if grpcutil.NeedRebuildConnection(err) { + s.closeDelegateClient(lastForwardedSchedulingHost) + } errRegionHeartbeatSend.Inc() log.Error("failed to send request to scheduling service", zap.Error(err)) } @@ -1400,7 +1401,7 @@ func (s *GrpcServer) GetRegion(ctx context.Context, request *pdpb.GetRegionReque }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).GetRegion(ctx, request) } followerHandle := new(bool) @@ -1461,7 +1462,7 @@ func (s *GrpcServer) GetPrevRegion(ctx context.Context, request *pdpb.GetRegionR }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).GetPrevRegion(ctx, request) } followerHandle := new(bool) @@ -1520,7 +1521,7 @@ func (s *GrpcServer) GetRegionByID(ctx context.Context, request *pdpb.GetRegionB }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).GetRegionByID(ctx, request) } followerHandle := new(bool) @@ -1581,7 +1582,7 @@ func (s *GrpcServer) ScanRegions(ctx context.Context, request *pdpb.ScanRegionsR }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).ScanRegions(ctx, request) } followerHandle := new(bool) @@ -1639,7 +1640,7 @@ func (s *GrpcServer) AskSplit(ctx context.Context, request *pdpb.AskSplitRequest }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).AskSplit(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -1709,12 +1710,12 @@ func (s *GrpcServer) AskBatchSplit(ctx context.Context, request *pdpb.AskBatchSp if err != nil { // reset to let it be updated in the next request s.schedulingClient.CompareAndSwap(forwardCli, &schedulingClient{}) - return s.convertAskSplitResponse(resp), err + return convertAskSplitResponse(resp), err } - return s.convertAskSplitResponse(resp), nil + return convertAskSplitResponse(resp), nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).AskBatchSplit(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -1767,7 +1768,7 @@ func (s *GrpcServer) ReportSplit(ctx context.Context, request *pdpb.ReportSplitR }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).ReportSplit(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -1805,7 +1806,7 @@ func (s *GrpcServer) ReportBatchSplit(ctx context.Context, request *pdpb.ReportB }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).ReportBatchSplit(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -1844,7 +1845,7 @@ func (s *GrpcServer) GetClusterConfig(ctx context.Context, request *pdpb.GetClus }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).GetClusterConfig(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -1876,7 +1877,7 @@ func (s *GrpcServer) PutClusterConfig(ctx context.Context, request *pdpb.PutClus }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).PutClusterConfig(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -1927,9 +1928,9 @@ func (s *GrpcServer) ScatterRegion(ctx context.Context, request *pdpb.ScatterReg cli := forwardCli.getClient() if cli != nil { var regionsID []uint64 - // nolint + // nolint:staticcheck if request.GetRegionId() != 0 { - // nolint + // nolint:staticcheck regionsID = []uint64{request.GetRegionId()} } else { regionsID = request.GetRegionsId() @@ -1954,13 +1955,13 @@ func (s *GrpcServer) ScatterRegion(ctx context.Context, request *pdpb.ScatterReg errScatterRegionSend.Inc() // reset to let it be updated in the next request s.schedulingClient.CompareAndSwap(forwardCli, &schedulingClient{}) - return s.convertScatterResponse(resp), err + return convertScatterResponse(resp), err } - return s.convertScatterResponse(resp), nil + return convertScatterResponse(resp), nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).ScatterRegion(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -1985,11 +1986,10 @@ func (s *GrpcServer) ScatterRegion(ctx context.Context, request *pdpb.ScatterReg }, nil } // TODO: Deprecate it use `request.GetRegionsID`. - //nolint + // nolint:staticcheck region := rc.GetRegion(request.GetRegionId()) if region == nil { if request.GetRegion() == nil { - //nolint return &pdpb.ScatterRegionResponse{ Header: s.wrapErrorToHeader(pdpb.ErrorType_REGION_NOT_FOUND, "region %d not found"), @@ -2031,7 +2031,7 @@ func (s *GrpcServer) GetGCSafePoint(ctx context.Context, request *pdpb.GetGCSafe }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).GetGCSafePoint(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -2090,7 +2090,7 @@ func (s *GrpcServer) UpdateGCSafePoint(ctx context.Context, request *pdpb.Update }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).UpdateGCSafePoint(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -2139,7 +2139,7 @@ func (s *GrpcServer) UpdateServiceGCSafePoint(ctx context.Context, request *pdpb }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).UpdateServiceGCSafePoint(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -2216,12 +2216,12 @@ func (s *GrpcServer) GetOperator(ctx context.Context, request *pdpb.GetOperatorR errGetOperatorSend.Inc() // reset to let it be updated in the next request s.schedulingClient.CompareAndSwap(forwardCli, &schedulingClient{}) - return s.convertOperatorResponse(resp), err + return convertOperatorResponse(resp), err } - return s.convertOperatorResponse(resp), nil + return convertOperatorResponse(resp), nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).GetOperator(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -2276,17 +2276,18 @@ func (s *GrpcServer) validateRoleInRequest(ctx context.Context, header *pdpb.Req } *allowFollower = true } - if header.GetClusterId() != s.clusterID { - return status.Errorf(codes.FailedPrecondition, "mismatch cluster id, need %d but got %d", s.clusterID, header.GetClusterId()) + if clusterID := s.ClusterID(); header.GetClusterId() != clusterID { + return status.Errorf(codes.FailedPrecondition, "mismatch cluster id, need %d but got %d", clusterID, header.GetClusterId()) } return nil } func (s *GrpcServer) header() *pdpb.ResponseHeader { - if s.clusterID == 0 { + clusterID := s.ClusterID() + if clusterID == 0 { return s.wrapErrorToHeader(pdpb.ErrorType_NOT_BOOTSTRAPPED, "cluster id is not ready") } - return &pdpb.ResponseHeader{ClusterId: s.clusterID} + return &pdpb.ResponseHeader{ClusterId: clusterID} } func (s *GrpcServer) wrapErrorToHeader(errorType pdpb.ErrorType, message string) *pdpb.ResponseHeader { @@ -2298,7 +2299,7 @@ func (s *GrpcServer) wrapErrorToHeader(errorType pdpb.ErrorType, message string) func (s *GrpcServer) errorHeader(err *pdpb.Error) *pdpb.ResponseHeader { return &pdpb.ResponseHeader{ - ClusterId: s.clusterID, + ClusterId: s.ClusterID(), Error: err, } } @@ -2332,7 +2333,7 @@ func (s *GrpcServer) regionNotFound() *pdpb.ResponseHeader { }) } -func (s *GrpcServer) convertHeader(header *schedulingpb.ResponseHeader) *pdpb.ResponseHeader { +func convertHeader(header *schedulingpb.ResponseHeader) *pdpb.ResponseHeader { switch header.GetError().GetType() { case schedulingpb.ErrorType_UNKNOWN: if strings.Contains(header.GetError().GetMessage(), "region not found") { @@ -2356,23 +2357,23 @@ func (s *GrpcServer) convertHeader(header *schedulingpb.ResponseHeader) *pdpb.Re } } -func (s *GrpcServer) convertSplitResponse(resp *schedulingpb.SplitRegionsResponse) *pdpb.SplitRegionsResponse { +func convertSplitResponse(resp *schedulingpb.SplitRegionsResponse) *pdpb.SplitRegionsResponse { return &pdpb.SplitRegionsResponse{ - Header: s.convertHeader(resp.GetHeader()), + Header: convertHeader(resp.GetHeader()), FinishedPercentage: resp.GetFinishedPercentage(), } } -func (s *GrpcServer) convertScatterResponse(resp *schedulingpb.ScatterRegionsResponse) *pdpb.ScatterRegionResponse { +func convertScatterResponse(resp *schedulingpb.ScatterRegionsResponse) *pdpb.ScatterRegionResponse { return &pdpb.ScatterRegionResponse{ - Header: s.convertHeader(resp.GetHeader()), + Header: convertHeader(resp.GetHeader()), FinishedPercentage: resp.GetFinishedPercentage(), } } -func (s *GrpcServer) convertOperatorResponse(resp *schedulingpb.GetOperatorResponse) *pdpb.GetOperatorResponse { +func convertOperatorResponse(resp *schedulingpb.GetOperatorResponse) *pdpb.GetOperatorResponse { return &pdpb.GetOperatorResponse{ - Header: s.convertHeader(resp.GetHeader()), + Header: convertHeader(resp.GetHeader()), RegionId: resp.GetRegionId(), Desc: resp.GetDesc(), Kind: resp.GetKind(), @@ -2380,9 +2381,9 @@ func (s *GrpcServer) convertOperatorResponse(resp *schedulingpb.GetOperatorRespo } } -func (s *GrpcServer) convertAskSplitResponse(resp *schedulingpb.AskBatchSplitResponse) *pdpb.AskBatchSplitResponse { +func convertAskSplitResponse(resp *schedulingpb.AskBatchSplitResponse) *pdpb.AskBatchSplitResponse { return &pdpb.AskBatchSplitResponse{ - Header: s.convertHeader(resp.GetHeader()), + Header: convertHeader(resp.GetHeader()), Ids: resp.GetIds(), } } @@ -2535,13 +2536,13 @@ func (s *GrpcServer) SplitRegions(ctx context.Context, request *pdpb.SplitRegion errSplitRegionsSend.Inc() // reset to let it be updated in the next request s.schedulingClient.CompareAndSwap(forwardCli, &schedulingClient{}) - return s.convertSplitResponse(resp), err + return convertSplitResponse(resp), err } - return s.convertSplitResponse(resp), nil + return convertSplitResponse(resp), nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).SplitRegions(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -2564,7 +2565,7 @@ func (s *GrpcServer) SplitRegions(ctx context.Context, request *pdpb.SplitRegion // SplitAndScatterRegions split regions by the given split keys, and scatter regions. // Only regions which split successfully will be scattered. -// scatterFinishedPercentage indicates the percentage of successfully splited regions that are scattered. +// scatterFinishedPercentage indicates the percentage of successfully split regions that are scattered. func (s *GrpcServer) SplitAndScatterRegions(ctx context.Context, request *pdpb.SplitAndScatterRegionsRequest) (*pdpb.SplitAndScatterRegionsResponse, error) { if s.GetServiceMiddlewarePersistOptions().IsGRPCRateLimitEnabled() { fName := currentFunction() @@ -2577,7 +2578,7 @@ func (s *GrpcServer) SplitAndScatterRegions(ctx context.Context, request *pdpb.S }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).SplitAndScatterRegions(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -2806,7 +2807,7 @@ func (s *GrpcServer) WatchGlobalConfig(req *pdpb.WatchGlobalConfigRequest, serve return err } } - ctx, cancel := context.WithCancel(s.Context()) + ctx, cancel := context.WithCancel(server.Context()) defer cancel() configPath := req.GetConfigPath() if configPath == "" { @@ -2822,6 +2823,8 @@ func (s *GrpcServer) WatchGlobalConfig(req *pdpb.WatchGlobalConfigRequest, serve select { case <-ctx.Done(): return nil + case <-s.Context().Done(): + return nil case res := <-watchChan: if res.Err() != nil { var resp pdpb.WatchGlobalConfigResponse @@ -2903,7 +2906,7 @@ func (s *GrpcServer) ReportMinResolvedTS(ctx context.Context, request *pdpb.Repo }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).ReportMinResolvedTS(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -2943,7 +2946,7 @@ func (s *GrpcServer) SetExternalTimestamp(ctx context.Context, request *pdpb.Set }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).SetExternalTimestamp(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { @@ -2981,7 +2984,7 @@ func (s *GrpcServer) GetExternalTimestamp(ctx context.Context, request *pdpb.Get }, nil } } - fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + fn := func(ctx context.Context, client *grpc.ClientConn) (any, error) { return pdpb.NewPDClient(client).GetExternalTimestamp(ctx, request) } if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { diff --git a/server/handler.go b/server/handler.go index b91c8e368f9..f5d9b9035b2 100644 --- a/server/handler.go +++ b/server/handler.go @@ -369,15 +369,6 @@ func (h *Handler) SetLabelStoresLimit(ratePerMin float64, limitType storelimit.T return nil } -// GetAllStoresLimit is used to get limit of all stores. -func (h *Handler) GetAllStoresLimit(limitType storelimit.Type) (map[uint64]sc.StoreLimitConfig, error) { - c, err := h.GetRaftCluster() - if err != nil { - return nil, err - } - return c.GetAllStoresLimit(), nil -} - // SetStoreLimit is used to set the limit of a store. func (h *Handler) SetStoreLimit(storeID uint64, ratePerMin float64, limitType storelimit.Type) error { c, err := h.GetRaftCluster() @@ -495,7 +486,7 @@ func (h *Handler) GetAddr() string { // SetStoreLimitTTL set storeLimit with ttl func (h *Handler) SetStoreLimitTTL(data string, value float64, ttl time.Duration) error { - return h.s.SaveTTLConfig(map[string]interface{}{ + return h.s.SaveTTLConfig(map[string]any{ data: value, }, ttl) } @@ -568,7 +559,7 @@ func (h *Handler) GetHistoryHotRegionIter( // RedirectSchedulerUpdate update scheduler config. Export this func to help handle damaged store. func (h *Handler) redirectSchedulerUpdate(name string, storeID float64) error { - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = name input["store_id"] = storeID updateURL, err := url.JoinPath(h.GetAddr(), "pd", SchedulerConfigHandlerPath, name, "config") @@ -583,10 +574,10 @@ func (h *Handler) redirectSchedulerUpdate(name string, storeID float64) error { } // AddEvictOrGrant add evict leader scheduler or grant leader scheduler. -func (h *Handler) AddEvictOrGrant(storeID float64, name string) error { - if exist, err := h.IsSchedulerExisted(name); !exist { +func (h *Handler) AddEvictOrGrant(storeID float64, name string) (exist bool, err error) { + if exist, err = h.IsSchedulerExisted(name); !exist { if err != nil && !errors.ErrorEqual(err, errs.ErrSchedulerNotFound.FastGenByArgs()) { - return err + return exist, err } switch name { case schedulers.EvictLeaderName: @@ -595,13 +586,14 @@ func (h *Handler) AddEvictOrGrant(storeID float64, name string) error { err = h.AddGrantLeaderScheduler(uint64(storeID)) } if err != nil { - return err + return exist, err } } else { if err := h.redirectSchedulerUpdate(name, storeID); err != nil { - return err + return exist, err } log.Info("update scheduler", zap.String("scheduler-name", name), zap.Uint64("store-id", uint64(storeID))) + return exist, nil } - return nil + return exist, nil } diff --git a/server/join/join.go b/server/join/join.go index d1711063313..1319dc08d07 100644 --- a/server/join/join.go +++ b/server/join/join.go @@ -136,7 +136,11 @@ func PrepareJoinCluster(cfg *config.Config) error { existed := false for _, m := range listResp.Members { if len(m.Name) == 0 { - return errors.New("there is a member that has not joined successfully") + log.Error("there is an abnormal joined member in the current member list", + zap.Uint64("id", m.ID), + zap.Strings("peer-urls", m.PeerURLs), + zap.Strings("client-urls", m.ClientURLs)) + return errors.Errorf("there is a member %d that has not joined successfully", m.ID) } if m.Name == cfg.Name { existed = true @@ -184,7 +188,11 @@ func PrepareJoinCluster(cfg *config.Config) error { listSucc = true } if len(n) == 0 { - return errors.New("there is a member that has not joined successfully") + log.Error("there is an abnormal joined member in the current member list", + zap.Uint64("id", memb.ID), + zap.Strings("peer-urls", memb.PeerURLs), + zap.Strings("client-urls", memb.ClientURLs)) + return errors.Errorf("there is a member %d that has not joined successfully", memb.ID) } for _, m := range memb.PeerURLs { pds = append(pds, fmt.Sprintf("%s=%s", n, m)) diff --git a/server/keyspace_service.go b/server/keyspace_service.go index 09b935c2f84..44973a83061 100644 --- a/server/keyspace_service.go +++ b/server/keyspace_service.go @@ -86,7 +86,7 @@ func (s *KeyspaceServer) WatchKeyspaces(request *keyspacepb.WatchKeyspacesReques keyspaces = append(keyspaces, meta) return nil } - deleteFn := func(kv *mvccpb.KeyValue) error { + deleteFn := func(*mvccpb.KeyValue) error { return nil } postEventsFn := func([]*clientv3.Event) error { diff --git a/server/server.go b/server/server.go index be886a56712..1d38a5ee495 100644 --- a/server/server.go +++ b/server/server.go @@ -44,6 +44,7 @@ import ( "github.com/pingcap/sysutil" "github.com/tikv/pd/pkg/audit" bs "github.com/tikv/pd/pkg/basicserver" + "github.com/tikv/pd/pkg/cgroup" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/encryption" "github.com/tikv/pd/pkg/errs" @@ -126,7 +127,6 @@ var ( ) // Server is the pd server. It implements bs.Server -// nolint type Server struct { diagnosticspb.DiagnosticsServer @@ -157,8 +157,9 @@ type Server struct { electionClient *clientv3.Client // http client httpClient *http.Client - clusterID uint64 // pd cluster id. - rootPath string + // PD cluster ID. + clusterID atomic.Uint64 + rootPath string // Server services. // for id allocator, we can use one allocator for @@ -234,6 +235,9 @@ type Server struct { servicePrimaryMap sync.Map /* Store as map[string]string */ tsoPrimaryWatcher *etcdutil.LoopWatcher schedulingPrimaryWatcher *etcdutil.LoopWatcher + + // Cgroup Monitor + cgMonitor cgroup.Monitor } // HandlerBuilder builds a server HTTP handler. @@ -383,12 +387,12 @@ func (s *Server) startClient() error { } /* Starting two different etcd clients here is to avoid the throttling. */ // This etcd client will be used to access the etcd cluster to read and write all kinds of meta data. - s.client, err = etcdutil.CreateEtcdClient(tlsConfig, etcdCfg.ACUrls) + s.client, err = etcdutil.CreateEtcdClient(tlsConfig, etcdCfg.AdvertiseClientUrls, "server-etcd-client") if err != nil { return errs.ErrNewEtcdClient.Wrap(err).GenWithStackByCause() } // This etcd client will only be used to read and write the election-related data, such as leader key. - s.electionClient, err = etcdutil.CreateEtcdClient(tlsConfig, etcdCfg.ACUrls) + s.electionClient, err = etcdutil.CreateEtcdClient(tlsConfig, etcdCfg.AdvertiseClientUrls, "election-etcd-client") if err != nil { return errs.ErrNewEtcdClient.Wrap(err).GenWithStackByCause() } @@ -425,17 +429,18 @@ func (s *Server) AddStartCallback(callbacks ...func()) { } func (s *Server) startServer(ctx context.Context) error { - var err error - if s.clusterID, err = etcdutil.InitClusterID(s.client, pdClusterIDPath); err != nil { + clusterID, err := etcdutil.InitClusterID(s.client, pdClusterIDPath) + if err != nil { log.Error("failed to init cluster id", errs.ZapError(err)) return err } - log.Info("init cluster id", zap.Uint64("cluster-id", s.clusterID)) + s.clusterID.Store(clusterID) + log.Info("init cluster id", zap.Uint64("cluster-id", clusterID)) // It may lose accuracy if use float64 to store uint64. So we store the cluster id in label. - metadataGauge.WithLabelValues(fmt.Sprintf("cluster%d", s.clusterID)).Set(0) + metadataGauge.WithLabelValues(fmt.Sprintf("cluster%d", clusterID)).Set(0) bs.ServerInfoGauge.WithLabelValues(versioninfo.PDReleaseVersion, versioninfo.PDGitHash).Set(float64(time.Now().Unix())) - s.rootPath = endpoint.PDRootPath(s.clusterID) + s.rootPath = endpoint.PDRootPath(clusterID) s.member.InitMemberInfo(s.cfg.AdvertiseClientUrls, s.cfg.AdvertisePeerUrls, s.Name(), s.rootPath) s.member.SetMemberDeployPath(s.member.ID()) s.member.SetMemberBinaryVersion(s.member.ID(), versioninfo.PDReleaseVersion) @@ -447,11 +452,16 @@ func (s *Server) startServer(ctx context.Context) error { Label: idAllocLabel, Member: s.member.MemberValue(), }) - regionStorage, err := storage.NewStorageWithLevelDBBackend(ctx, filepath.Join(s.cfg.DataDir, "region-meta"), s.encryptionKeyManager) + // Initialize an etcd storage as the default storage. + defaultStorage := storage.NewStorageWithEtcdBackend(s.client, s.rootPath) + // Initialize a specialized LevelDB storage to store the region-related meta info independently. + regionStorage, err := storage.NewRegionStorageWithLevelDBBackend( + ctx, + filepath.Join(s.cfg.DataDir, "region-meta"), + s.encryptionKeyManager) if err != nil { return err } - defaultStorage := storage.NewStorageWithEtcdBackend(s.client, s.rootPath) s.storage = storage.NewCoreStorage(defaultStorage, regionStorage) s.tsoDispatcher = tsoutil.NewTSODispatcher(tsoProxyHandleDuration, tsoProxyBatchSize) s.tsoProtoFactory = &tsoutil.TSOProtoFactory{} @@ -478,7 +488,7 @@ func (s *Server) startServer(ctx context.Context) error { s.gcSafePointManager = gc.NewSafePointManager(s.storage, s.cfg.PDServerCfg) s.basicCluster = core.NewBasicCluster() - s.cluster = cluster.NewRaftCluster(ctx, s.clusterID, s.GetBasicCluster(), s.GetStorage(), syncer.NewRegionSyncer(s), s.client, s.httpClient) + s.cluster = cluster.NewRaftCluster(ctx, clusterID, s.GetBasicCluster(), s.GetStorage(), syncer.NewRegionSyncer(s), s.client, s.httpClient) keyspaceIDAllocator := id.NewAllocator(&id.AllocatorParams{ Client: s.client, RootPath: s.rootPath, @@ -488,11 +498,11 @@ func (s *Server) startServer(ctx context.Context) error { Step: keyspace.AllocStep, }) if s.IsAPIServiceMode() { - s.keyspaceGroupManager = keyspace.NewKeyspaceGroupManager(s.ctx, s.storage, s.client, s.clusterID) + s.keyspaceGroupManager = keyspace.NewKeyspaceGroupManager(s.ctx, s.storage, s.client, clusterID) } s.keyspaceManager = keyspace.NewKeyspaceManager(s.ctx, s.storage, s.cluster, keyspaceIDAllocator, &s.cfg.Keyspace, s.keyspaceGroupManager) s.safePointV2Manager = gc.NewSafePointManagerV2(s.ctx, s.storage, s.storage, s.storage) - s.hbStreams = hbstream.NewHeartbeatStreams(ctx, s.clusterID, "", s.cluster) + s.hbStreams = hbstream.NewHeartbeatStreams(ctx, clusterID, "", s.cluster) // initial hot_region_storage in here. s.hotRegionStorage, err = storage.NewHotRegionsStorage( @@ -535,6 +545,8 @@ func (s *Server) Close() { log.Info("closing server") + s.cgMonitor.StopMonitor() + s.stopServerLoop() if s.IsAPIServiceMode() { s.keyspaceGroupManager.Close() @@ -580,6 +592,14 @@ func (s *Server) Close() { cb() } + s.clientConns.Range(func(_, value any) bool { + conn := value.(*grpc.ClientConn) + if err := conn.Close(); err != nil { + log.Error("close grpc client meet error", errs.ZapError(err)) + } + return true + }) + log.Info("close server") } @@ -602,6 +622,8 @@ func (s *Server) Run() error { return err } + s.cgMonitor.StartMonitor(s.ctx) + failpoint.Inject("delayStartServerLoop", func() { time.Sleep(2 * time.Second) }) @@ -685,7 +707,7 @@ func (s *Server) collectEtcdStateMetrics() { } func (s *Server) bootstrapCluster(req *pdpb.BootstrapRequest) (*pdpb.BootstrapResponse, error) { - clusterID := s.clusterID + clusterID := s.ClusterID() log.Info("try to bootstrap raft cluster", zap.Uint64("cluster-id", clusterID), @@ -916,7 +938,7 @@ func (s *Server) Name() string { // ClusterID returns the cluster ID of this server. func (s *Server) ClusterID() uint64 { - return s.clusterID + return s.clusterID.Load() } // StartTimestamp returns the start timestamp of this server @@ -1020,6 +1042,7 @@ func (s *Server) GetScheduleConfig() *sc.ScheduleConfig { } // SetScheduleConfig sets the balance config information. +// This function is exported to be used by the API. func (s *Server) SetScheduleConfig(cfg sc.ScheduleConfig) error { if err := cfg.Validate(); err != nil { return err @@ -1038,6 +1061,8 @@ func (s *Server) SetScheduleConfig(cfg sc.ScheduleConfig) error { errs.ZapError(err)) return err } + // Update the scheduling halt status at the same time. + s.persistOptions.SetSchedulingAllowanceStatus(cfg.HaltScheduling, "manually") log.Info("schedule config is updated", zap.Reflect("new", cfg), zap.Reflect("old", old)) return nil } @@ -1174,7 +1199,7 @@ func (s *Server) UpdateRateLimitConfig(key, label string, value ratelimit.Dimens } // UpdateRateLimit is used to update rate-limit config which will overwrite limiter-config -func (s *Server) UpdateRateLimit(cfg *config.RateLimitConfig, key string, value interface{}) error { +func (s *Server) UpdateRateLimit(cfg *config.RateLimitConfig, key string, value any) error { updated, found, err := jsonutil.AddKeyValue(cfg, key, value) if err != nil { return err @@ -1223,7 +1248,7 @@ func (s *Server) UpdateGRPCRateLimitConfig(key, label string, value ratelimit.Di } // UpdateGRPCRateLimit is used to update gRPC rate-limit config which will overwrite limiter-config -func (s *Server) UpdateGRPCRateLimit(cfg *config.GRPCRateLimitConfig, key string, value interface{}) error { +func (s *Server) UpdateGRPCRateLimit(cfg *config.GRPCRateLimitConfig, key string, value any) error { updated, found, err := jsonutil.AddKeyValue(cfg, key, value) if err != nil { return err @@ -1409,7 +1434,7 @@ func (s *Server) DirectlyGetRaftCluster() *cluster.RaftCluster { // GetCluster gets cluster. func (s *Server) GetCluster() *metapb.Cluster { return &metapb.Cluster{ - Id: s.clusterID, + Id: s.ClusterID(), MaxPeerCount: uint32(s.persistOptions.GetMaxReplicas()), } } @@ -1530,8 +1555,6 @@ func (s *Server) UpdateGRPCServiceRateLimiter(serviceLabel string, opts ...ratel // GetClusterStatus gets cluster status. func (s *Server) GetClusterStatus() (*cluster.Status, error) { - s.cluster.Lock() - defer s.cluster.Unlock() return s.cluster.LoadClusterStatus() } @@ -1777,7 +1800,7 @@ func (s *Server) campaignLeader() { member.ServiceMemberGauge.WithLabelValues(s.mode).Set(0) }) - CheckPDVersion(s.persistOptions) + CheckPDVersionWithClusterVersion(s.persistOptions) log.Info(fmt.Sprintf("%s leader is ready to serve", s.mode), zap.String("leader-name", s.Name())) leaderTicker := time.NewTicker(mcs.LeaderTickInterval) @@ -1922,7 +1945,7 @@ func (s *Server) PersistFile(name string, data []byte) error { } // SaveTTLConfig save ttl config -func (s *Server) SaveTTLConfig(data map[string]interface{}, ttl time.Duration) error { +func (s *Server) SaveTTLConfig(data map[string]any, ttl time.Duration) error { for k := range data { if !config.IsSupportedTTLConfig(k) { return fmt.Errorf("unsupported ttl config %s", k) @@ -2010,7 +2033,7 @@ func (s *Server) SetServicePrimaryAddr(serviceName, addr string) { func (s *Server) initTSOPrimaryWatcher() { serviceName := mcs.TSOServiceName - tsoRootPath := endpoint.TSOSvcRootPath(s.clusterID) + tsoRootPath := endpoint.TSOSvcRootPath(s.ClusterID()) tsoServicePrimaryKey := endpoint.KeyspaceGroupPrimaryPath(tsoRootPath, mcs.DefaultKeyspaceGroupID) s.tsoPrimaryWatcher = s.initServicePrimaryWatcher(serviceName, tsoServicePrimaryKey) s.tsoPrimaryWatcher.StartWatchLoop() @@ -2018,7 +2041,7 @@ func (s *Server) initTSOPrimaryWatcher() { func (s *Server) initSchedulingPrimaryWatcher() { serviceName := mcs.SchedulingServiceName - primaryKey := endpoint.SchedulingPrimaryPath(s.clusterID) + primaryKey := endpoint.SchedulingPrimaryPath(s.ClusterID()) s.schedulingPrimaryWatcher = s.initServicePrimaryWatcher(serviceName, primaryKey) s.schedulingPrimaryWatcher.StartWatchLoop() } @@ -2037,7 +2060,7 @@ func (s *Server) initServicePrimaryWatcher(serviceName string, primaryKey string } return nil } - deleteFn := func(kv *mvccpb.KeyValue) error { + deleteFn := func(*mvccpb.KeyValue) error { var oldPrimary string v, ok := s.servicePrimaryMap.Load(serviceName) if ok { @@ -2063,7 +2086,7 @@ func (s *Server) initServicePrimaryWatcher(serviceName string, primaryKey string } // RecoverAllocID recover alloc id. set current base id to input id -func (s *Server) RecoverAllocID(ctx context.Context, id uint64) error { +func (s *Server) RecoverAllocID(_ context.Context, id uint64) error { return s.idAllocator.SetBase(id) } diff --git a/server/server_test.go b/server/server_test.go index 32f5d0646bc..b2b15962fdc 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -88,7 +88,7 @@ func (suite *leaderServerTestSuite) TearDownSuite() { } } -func (suite *leaderServerTestSuite) newTestServersWithCfgs( +func newTestServersWithCfgs( ctx context.Context, cfgs []*config.Config, re *require.Assertions, @@ -135,52 +135,6 @@ func (suite *leaderServerTestSuite) newTestServersWithCfgs( return svrs, cleanup } -func (suite *leaderServerTestSuite) TestCheckClusterID() { - re := suite.Require() - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cfgs := NewTestMultiConfig(assertutil.CheckerWithNilAssert(re), 2) - for i, cfg := range cfgs { - cfg.DataDir = fmt.Sprintf("/tmp/test_pd_check_clusterID_%d", i) - // Clean up before testing. - testutil.CleanServer(cfg.DataDir) - } - originInitial := cfgs[0].InitialCluster - for _, cfg := range cfgs { - cfg.InitialCluster = fmt.Sprintf("%s=%s", cfg.Name, cfg.PeerUrls) - } - - cfgA, cfgB := cfgs[0], cfgs[1] - // Start a standalone cluster. - svrsA, cleanA := suite.newTestServersWithCfgs(ctx, []*config.Config{cfgA}, re) - defer cleanA() - // Close it. - for _, svr := range svrsA { - svr.Close() - } - - // Start another cluster. - _, cleanB := suite.newTestServersWithCfgs(ctx, []*config.Config{cfgB}, re) - defer cleanB() - - // Start previous cluster, expect an error. - cfgA.InitialCluster = originInitial - mockHandler := CreateMockHandler(re, "127.0.0.1") - svr, err := CreateServer(ctx, cfgA, nil, mockHandler) - re.NoError(err) - - etcd, err := embed.StartEtcd(svr.etcdCfg) - re.NoError(err) - urlsMap, err := types.NewURLsMap(svr.cfg.InitialCluster) - re.NoError(err) - tlsConfig, err := svr.cfg.Security.ToTLSConfig() - re.NoError(err) - err = etcdutil.CheckClusterID(etcd.Server.Cluster().ID(), urlsMap, tlsConfig) - re.Error(err) - etcd.Close() - testutil.CleanServer(cfgA.DataDir) -} - func (suite *leaderServerTestSuite) TestRegisterServerHandler() { re := suite.Require() cfg := NewTestSingleConfig(assertutil.CheckerWithNilAssert(re)) @@ -330,3 +284,49 @@ func TestIsPathInDirectory(t *testing.T) { path = filepath.Join(directory, fileName) re.False(isPathInDirectory(path, directory)) } + +func TestCheckClusterID(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + cfgs := NewTestMultiConfig(assertutil.CheckerWithNilAssert(re), 2) + for i, cfg := range cfgs { + cfg.DataDir = fmt.Sprintf("/tmp/test_pd_check_clusterID_%d", i) + // Clean up before testing. + testutil.CleanServer(cfg.DataDir) + } + originInitial := cfgs[0].InitialCluster + for _, cfg := range cfgs { + cfg.InitialCluster = fmt.Sprintf("%s=%s", cfg.Name, cfg.PeerUrls) + } + + cfgA, cfgB := cfgs[0], cfgs[1] + // Start a standalone cluster. + svrsA, cleanA := newTestServersWithCfgs(ctx, []*config.Config{cfgA}, re) + defer cleanA() + // Close it. + for _, svr := range svrsA { + svr.Close() + } + + // Start another cluster. + _, cleanB := newTestServersWithCfgs(ctx, []*config.Config{cfgB}, re) + defer cleanB() + + // Start previous cluster, expect an error. + cfgA.InitialCluster = originInitial + mockHandler := CreateMockHandler(re, "127.0.0.1") + svr, err := CreateServer(ctx, cfgA, nil, mockHandler) + re.NoError(err) + + etcd, err := embed.StartEtcd(svr.etcdCfg) + re.NoError(err) + urlsMap, err := types.NewURLsMap(svr.cfg.InitialCluster) + re.NoError(err) + tlsConfig, err := svr.cfg.Security.ToTLSConfig() + re.NoError(err) + err = etcdutil.CheckClusterID(etcd.Server.Cluster().ID(), urlsMap, tlsConfig) + re.Error(err) + etcd.Close() + testutil.CleanServer(cfgA.DataDir) +} diff --git a/server/testutil.go b/server/testutil.go index cc1a380bfb8..5f817d47016 100644 --- a/server/testutil.go +++ b/server/testutil.go @@ -138,7 +138,7 @@ func MustWaitLeader(re *require.Assertions, svrs []*Server) *Server { // CreateMockHandler creates a mock handler for test. func CreateMockHandler(re *require.Assertions, ip string) HandlerBuilder { - return func(ctx context.Context, s *Server) (http.Handler, apiutil.APIServiceGroup, error) { + return func(context.Context, *Server) (http.Handler, apiutil.APIServiceGroup, error) { mux := http.NewServeMux() mux.HandleFunc("/pd/apis/mock/v1/hello", func(w http.ResponseWriter, r *http.Request) { fmt.Fprintln(w, "Hello World") diff --git a/server/util.go b/server/util.go index f88d0146a7f..83455e2a6fe 100644 --- a/server/util.go +++ b/server/util.go @@ -21,6 +21,7 @@ import ( "path/filepath" "strings" + "github.com/coreos/go-semver/semver" "github.com/gorilla/mux" "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/pdpb" @@ -33,14 +34,21 @@ import ( "go.uber.org/zap" ) -// CheckPDVersion checks if PD needs to be upgraded. -func CheckPDVersion(opt *config.PersistOptions) { +// CheckAndGetPDVersion checks and returns the PD version. +func CheckAndGetPDVersion() *semver.Version { pdVersion := versioninfo.MinSupportedVersion(versioninfo.Base) if versioninfo.PDReleaseVersion != "None" { pdVersion = versioninfo.MustParseVersion(versioninfo.PDReleaseVersion) } + return pdVersion +} + +// CheckPDVersionWithClusterVersion checks if PD needs to be upgraded by comparing the PD version with the cluster version. +func CheckPDVersionWithClusterVersion(opt *config.PersistOptions) { + pdVersion := CheckAndGetPDVersion() clusterVersion := *opt.GetClusterVersion() - log.Info("load cluster version", zap.Stringer("cluster-version", clusterVersion)) + log.Info("load pd and cluster version", + zap.Stringer("pd-version", pdVersion), zap.Stringer("cluster-version", clusterVersion)) if pdVersion.LessThan(clusterVersion) { log.Warn( "PD version less than cluster version, please upgrade PD", diff --git a/tests/cluster.go b/tests/cluster.go index 41efc2b045d..c7368fe3c3a 100644 --- a/tests/cluster.go +++ b/tests/cluster.go @@ -88,6 +88,8 @@ func NewTestAPIServer(ctx context.Context, cfg *config.Config) (*TestServer, err } func createTestServer(ctx context.Context, cfg *config.Config, services []string) (*TestServer, error) { + // disable the heartbeat async runner in test + cfg.Schedule.EnableHeartbeatConcurrentRunner = false err := logutil.SetupLogger(cfg.Log, &cfg.Logger, &cfg.LogProps, cfg.Security.RedactInfoLog) if err != nil { return nil, err @@ -551,7 +553,7 @@ func restartTestCluster( } wg.Wait() - errorMap.Range(func(key, value interface{}) bool { + errorMap.Range(func(key, value any) bool { if value != nil { err = value.(error) return false @@ -570,17 +572,17 @@ func restartTestCluster( } // RunServer starts to run TestServer. -func (c *TestCluster) RunServer(server *TestServer) <-chan error { +func RunServer(server *TestServer) <-chan error { resC := make(chan error) go func() { resC <- server.Run() }() return resC } // RunServers starts to run multiple TestServer. -func (c *TestCluster) RunServers(servers []*TestServer) error { +func RunServers(servers []*TestServer) error { res := make([]<-chan error, len(servers)) for i, s := range servers { - res[i] = c.RunServer(s) + res[i] = RunServer(s) } for _, c := range res { if err := <-c; err != nil { @@ -596,7 +598,7 @@ func (c *TestCluster) RunInitialServers() error { for _, conf := range c.config.InitialServers { servers = append(servers, c.GetServer(conf.Name)) } - return c.RunServers(servers) + return RunServers(servers) } // StopAll is used to stop all servers. @@ -852,8 +854,8 @@ func (c *TestCluster) CheckClusterDCLocation() { wg := sync.WaitGroup{} for _, server := range c.GetServers() { wg.Add(1) - go func(ser *TestServer) { - ser.GetTSOAllocatorManager().ClusterDCLocationChecker() + go func(s *TestServer) { + s.GetTSOAllocatorManager().ClusterDCLocationChecker() wg.Done() }(server) } diff --git a/tests/dashboard/service_test.go b/tests/dashboard/service_test.go index 47857629328..aa4d0f337f9 100644 --- a/tests/dashboard/service_test.go +++ b/tests/dashboard/service_test.go @@ -52,7 +52,7 @@ func (suite *dashboardTestSuite) SetupSuite() { dashboard.SetCheckInterval(10 * time.Millisecond) suite.ctx, suite.cancel = context.WithCancel(context.Background()) suite.httpClient = &http.Client{ - CheckRedirect: func(req *http.Request, via []*http.Request) error { + CheckRedirect: func(*http.Request, []*http.Request) error { // ErrUseLastResponse can be returned by Client.CheckRedirect hooks to // control how redirects are processed. If returned, the next request // is not sent and the most recent response is returned with its body @@ -123,7 +123,7 @@ func (suite *dashboardTestSuite) checkServiceIsStopped(re *require.Assertions, s } func (suite *dashboardTestSuite) testDashboard(re *require.Assertions, internalProxy bool) { - cluster, err := tests.NewTestCluster(suite.ctx, 3, func(conf *config.Config, serverName string) { + cluster, err := tests.NewTestCluster(suite.ctx, 3, func(conf *config.Config, _ string) { conf.Dashboard.InternalProxy = internalProxy }) re.NoError(err) @@ -148,7 +148,7 @@ func (suite *dashboardTestSuite) testDashboard(re *require.Assertions, internalP } } - input := map[string]interface{}{ + input := map[string]any{ "dashboard-address": dashboardAddress2, } data, err := json.Marshal(input) @@ -161,7 +161,7 @@ func (suite *dashboardTestSuite) testDashboard(re *require.Assertions, internalP re.Equal(dashboardAddress2, leader.GetServer().GetPersistOptions().GetDashboardAddress()) // pd-ctl set stop - input = map[string]interface{}{ + input = map[string]any{ "dashboard-address": "none", } data, err = json.Marshal(input) diff --git a/tests/integrations/Makefile b/tests/integrations/Makefile index ee0be1f4239..658df855146 100644 --- a/tests/integrations/Makefile +++ b/tests/integrations/Makefile @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -ROOT_PATH := ../.. +ROOT_PATH := $(shell pwd)/../.. GO_TOOLS_BIN_PATH := $(ROOT_PATH)/.tools/bin PATH := $(GO_TOOLS_BIN_PATH):$(PATH) SHELL := env PATH='$(PATH)' GOBIN='$(GO_TOOLS_BIN_PATH)' $(shell which bash) @@ -22,8 +22,6 @@ static: install-tools @ gofmt -s -l -d . 2>&1 | awk '{ print } END { if (NR > 0) { exit 1 } }' @ echo "golangci-lint ..." @ golangci-lint run -c $(ROOT_PATH)/.golangci.yml --verbose ./... --allow-parallel-runners - @ echo "revive ..." - @ revive -formatter friendly -config $(ROOT_PATH)/revive.toml ./... tidy: @ go mod tidy @@ -35,15 +33,14 @@ test: failpoint-enable $(MAKE) failpoint-disable ci-test-job: - CGO_ENABLED=1 go test ./$(value test_name)/... -v -tags deadlock -race -covermode=atomic -coverprofile=./$(value test_name)/covprofile -coverpkg=$(ROOT_PATH)/... + if [ -f ./$(value test_name)/covprofile ]; then rm ./$(value test_name)/covprofile; fi + CGO_ENABLED=1 go test ./$(value test_name)/... -v -tags deadlock -race -cover -covermode=atomic -coverprofile=./$(value test_name)/covprofile -coverpkg=../../... install-tools: cd $(ROOT_PATH) && $(MAKE) install-tools failpoint-enable: cd $(ROOT_PATH) && $(MAKE) failpoint-enable - go mod tidy failpoint-disable: cd $(ROOT_PATH) && $(MAKE) failpoint-disable - go mod tidy diff --git a/tests/integrations/client/client_test.go b/tests/integrations/client/client_test.go index cfc37f42628..65acd897726 100644 --- a/tests/integrations/client/client_test.go +++ b/tests/integrations/client/client_test.go @@ -24,11 +24,15 @@ import ( "reflect" "sort" "strconv" + "strings" "sync" + "sync/atomic" "testing" "time" "github.com/docker/go-units" + "github.com/opentracing/basictracer-go" + "github.com/opentracing/opentracing-go" "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/meta_storagepb" "github.com/pingcap/kvproto/pkg/metapb" @@ -36,9 +40,11 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" pd "github.com/tikv/pd/client" + clierrs "github.com/tikv/pd/client/errs" "github.com/tikv/pd/client/retry" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/mock/mockid" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/tso" @@ -49,6 +55,7 @@ import ( "github.com/tikv/pd/server" "github.com/tikv/pd/server/config" "github.com/tikv/pd/tests" + "github.com/tikv/pd/tests/integrations/mcs" "go.etcd.io/etcd/clientv3" "go.uber.org/goleak" ) @@ -104,7 +111,13 @@ func TestClientLeaderChange(t *testing.T) { defer cluster.Destroy() endpoints := runServer(re, cluster) - cli := setupCli(re, ctx, endpoints) + endpointsWithWrongURL := append([]string{}, endpoints...) + // inject wrong http scheme + for i := range endpointsWithWrongURL { + endpointsWithWrongURL[i] = "https://" + strings.TrimPrefix(endpointsWithWrongURL[i], "http://") + } + cli := setupCli(ctx, re, endpointsWithWrongURL) + defer cli.Close() innerCli, ok := cli.(interface{ GetServiceDiscovery() pd.ServiceDiscovery }) re.True(ok) @@ -121,14 +134,14 @@ func TestClientLeaderChange(t *testing.T) { re.True(cluster.CheckTSOUnique(ts1)) leader := cluster.GetLeader() - waitLeader(re, innerCli.GetServiceDiscovery(), cluster.GetServer(leader).GetConfig().ClientUrls) + waitLeader(re, innerCli.GetServiceDiscovery(), cluster.GetServer(leader)) err = cluster.GetServer(leader).Stop() re.NoError(err) leader = cluster.WaitLeader() re.NotEmpty(leader) - waitLeader(re, innerCli.GetServiceDiscovery(), cluster.GetServer(leader).GetConfig().ClientUrls) + waitLeader(re, innerCli.GetServiceDiscovery(), cluster.GetServer(leader)) // Check TS won't fall back after leader changed. testutil.Eventually(re, func() bool { @@ -151,11 +164,11 @@ func TestClientLeaderChange(t *testing.T) { re.Equal(endpoints, urls) } -func TestLeaderTransfer(t *testing.T) { +func TestLeaderTransferAndMoveCluster(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - cluster, err := tests.NewTestCluster(ctx, 2) + cluster, err := tests.NewTestCluster(ctx, 3) re.NoError(err) defer cluster.Destroy() re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/member/skipCampaignLeaderCheck", "return(true)")) @@ -164,7 +177,8 @@ func TestLeaderTransfer(t *testing.T) { }() endpoints := runServer(re, cluster) - cli := setupCli(re, ctx, endpoints) + cli := setupCli(ctx, re, endpoints) + defer cli.Close() var lastTS uint64 testutil.Eventually(re, func() bool { @@ -210,10 +224,66 @@ func TestLeaderTransfer(t *testing.T) { newLeaderName := cluster.WaitLeader() re.NotEqual(oldLeaderName, newLeaderName) } + + // ABC->ABCDEF + oldServers := cluster.GetServers() + oldLeaderName := cluster.WaitLeader() + for i := 0; i < 3; i++ { + newPD, err := cluster.Join(ctx) + re.NoError(err) + re.NoError(newPD.Run()) + oldLeaderName = cluster.WaitLeader() + time.Sleep(5 * time.Second) + } + + // ABCDEF->DEF + oldNames := make([]string, 0) + for _, s := range oldServers { + oldNames = append(oldNames, s.GetServer().GetMemberInfo().GetName()) + s.Stop() + } + newLeaderName := cluster.WaitLeader() + re.NotEqual(oldLeaderName, newLeaderName) + re.NotContains(oldNames, newLeaderName) + close(quit) wg.Wait() } +func TestGetTSAfterTransferLeader(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + cluster, err := tests.NewTestCluster(ctx, 2) + re.NoError(err) + endpoints := runServer(re, cluster) + leader := cluster.WaitLeader() + re.NotEmpty(leader) + defer cluster.Destroy() + + cli := setupCli(ctx, re, endpoints, pd.WithCustomTimeoutOption(10*time.Second)) + defer cli.Close() + + var leaderSwitched atomic.Bool + cli.GetServiceDiscovery().AddServingURLSwitchedCallback(func() { + leaderSwitched.Store(true) + }) + err = cluster.GetServer(leader).ResignLeader() + re.NoError(err) + newLeader := cluster.WaitLeader() + re.NotEmpty(newLeader) + re.NotEqual(leader, newLeader) + leader = cluster.WaitLeader() + re.NotEmpty(leader) + err = cli.GetServiceDiscovery().CheckMemberChanged() + re.NoError(err) + + testutil.Eventually(re, leaderSwitched.Load) + // The leader stream must be updated after the leader switch is sensed by the client. + _, _, err = cli.GetTS(context.TODO()) + re.NoError(err) +} + func TestTSOAllocatorLeader(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) @@ -253,7 +323,8 @@ func TestTSOAllocatorLeader(t *testing.T) { }) allocatorLeaderMap[dcLocation] = pdName } - cli := setupCli(re, ctx, endpoints) + cli := setupCli(ctx, re, endpoints) + defer cli.Close() innerCli, ok := cli.(interface{ GetServiceDiscovery() pd.ServiceDiscovery }) re.True(ok) @@ -286,8 +357,10 @@ func TestTSOFollowerProxy(t *testing.T) { defer cluster.Destroy() endpoints := runServer(re, cluster) - cli1 := setupCli(re, ctx, endpoints) - cli2 := setupCli(re, ctx, endpoints) + cli1 := setupCli(ctx, re, endpoints) + defer cli1.Close() + cli2 := setupCli(ctx, re, endpoints) + defer cli2.Close() cli2.UpdateOption(pd.EnableTSOFollowerProxy, true) var wg sync.WaitGroup @@ -314,6 +387,30 @@ func TestTSOFollowerProxy(t *testing.T) { wg.Wait() } +func TestTSOFollowerProxyWithTSOService(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + cluster, err := tests.NewTestAPICluster(ctx, 1) + re.NoError(err) + defer cluster.Destroy() + err = cluster.RunInitialServers() + re.NoError(err) + leaderName := cluster.WaitLeader() + pdLeaderServer := cluster.GetServer(leaderName) + re.NoError(pdLeaderServer.BootstrapCluster()) + backendEndpoints := pdLeaderServer.GetAddr() + tsoCluster, err := tests.NewTestTSOCluster(ctx, 2, backendEndpoints) + re.NoError(err) + defer tsoCluster.Destroy() + cli := mcs.SetupClientWithKeyspaceID(ctx, re, utils.DefaultKeyspaceID, strings.Split(backendEndpoints, ",")) + re.NotNil(cli) + defer cli.Close() + // TSO service does not support the follower proxy, so enabling it should fail. + err = cli.UpdateOption(pd.EnableTSOFollowerProxy, true) + re.Error(err) +} + // TestUnavailableTimeAfterLeaderIsReady is used to test https://github.com/tikv/pd/issues/5207 func TestUnavailableTimeAfterLeaderIsReady(t *testing.T) { re := require.New(t) @@ -324,7 +421,8 @@ func TestUnavailableTimeAfterLeaderIsReady(t *testing.T) { defer cluster.Destroy() endpoints := runServer(re, cluster) - cli := setupCli(re, ctx, endpoints) + cli := setupCli(ctx, re, endpoints) + defer cli.Close() var wg sync.WaitGroup var maxUnavailableTime, leaderReadyTime time.Time @@ -355,7 +453,7 @@ func TestUnavailableTimeAfterLeaderIsReady(t *testing.T) { leader.Stop() re.NotEmpty(cluster.WaitLeader()) leaderReadyTime = time.Now() - cluster.RunServers([]*tests.TestServer{leader}) + tests.RunServers([]*tests.TestServer{leader}) }() wg.Wait() re.Less(maxUnavailableTime.UnixMilli(), leaderReadyTime.Add(1*time.Second).UnixMilli()) @@ -396,13 +494,14 @@ func TestGlobalAndLocalTSO(t *testing.T) { defer cluster.Destroy() endpoints := runServer(re, cluster) - cli := setupCli(re, ctx, endpoints) + cli := setupCli(ctx, re, endpoints) + defer cli.Close() // Wait for all nodes becoming healthy. time.Sleep(time.Second * 5) // Join a new dc-location - pd4, err := cluster.Join(ctx, func(conf *config.Config, serverName string) { + pd4, err := cluster.Join(ctx, func(conf *config.Config, _ string) { conf.EnableLocalTSO = true conf.Labels[config.ZoneLabel] = "dc-4" }) @@ -430,11 +529,27 @@ func TestGlobalAndLocalTSO(t *testing.T) { re.NotEmpty(cluster.WaitLeader()) _, _, err = cli.GetTS(ctx) re.Error(err) - re.True(pd.IsLeaderChange(err)) + re.True(clierrs.IsLeaderChange(err)) _, _, err = cli.GetTS(ctx) re.NoError(err) re.NoError(failpoint.Disable("github.com/tikv/pd/client/skipUpdateMember")) + recorder := basictracer.NewInMemoryRecorder() + tracer := basictracer.New(recorder) + span := tracer.StartSpan("trace") + ctx = opentracing.ContextWithSpan(ctx, span) + future := cli.GetLocalTSAsync(ctx, "error-dc") + spans := recorder.GetSpans() + re.Len(spans, 1) + _, _, err = future.Wait() + re.Error(err) + spans = recorder.GetSpans() + re.Len(spans, 1) + _, _, err = cli.GetTS(ctx) + re.NoError(err) + spans = recorder.GetSpans() + re.Len(spans, 3) + // Test the TSO follower proxy while enabling the Local TSO. cli.UpdateOption(pd.EnableTSOFollowerProxy, true) // Sleep a while here to prevent from canceling the ongoing TSO request. @@ -491,7 +606,7 @@ type TSOAllocatorsGetter interface{ GetTSOAllocators() *sync.Map } func getTSOAllocatorServingEndpointURLs(c TSOAllocatorsGetter) map[string]string { allocatorLeaders := make(map[string]string) - c.GetTSOAllocators().Range(func(dcLocation, url interface{}) bool { + c.GetTSOAllocators().Range(func(dcLocation, url any) bool { allocatorLeaders[dcLocation.(string)] = url.(string) return true }) @@ -507,7 +622,8 @@ func TestCustomTimeout(t *testing.T) { defer cluster.Destroy() endpoints := runServer(re, cluster) - cli := setupCli(re, ctx, endpoints, pd.WithCustomTimeoutOption(time.Second)) + cli := setupCli(ctx, re, endpoints, pd.WithCustomTimeoutOption(time.Second)) + defer cli.Close() start := time.Now() re.NoError(failpoint.Enable("github.com/tikv/pd/server/customTimeout", "return(true)")) @@ -567,8 +683,7 @@ func (suite *followerForwardAndHandleTestSuite) SetupSuite() { }) } -func (suite *followerForwardAndHandleTestSuite) TearDownTest() { -} +func (*followerForwardAndHandleTestSuite) TearDownTest() {} func (suite *followerForwardAndHandleTestSuite) TearDownSuite() { suite.cluster.Destroy() @@ -580,7 +695,8 @@ func (suite *followerForwardAndHandleTestSuite) TestGetRegionByFollowerForwardin ctx, cancel := context.WithCancel(suite.ctx) defer cancel() - cli := setupCli(re, ctx, suite.endpoints, pd.WithForwardingOption(true)) + cli := setupCli(ctx, re, suite.endpoints, pd.WithForwardingOption(true)) + defer cli.Close() re.NoError(failpoint.Enable("github.com/tikv/pd/client/unreachableNetwork1", "return(true)")) time.Sleep(200 * time.Millisecond) r, err := cli.GetRegion(context.Background(), []byte("a")) @@ -599,7 +715,8 @@ func (suite *followerForwardAndHandleTestSuite) TestGetTsoByFollowerForwarding1( re := suite.Require() ctx, cancel := context.WithCancel(suite.ctx) defer cancel() - cli := setupCli(re, ctx, suite.endpoints, pd.WithForwardingOption(true)) + cli := setupCli(ctx, re, suite.endpoints, pd.WithForwardingOption(true)) + defer cli.Close() re.NoError(failpoint.Enable("github.com/tikv/pd/client/unreachableNetwork", "return(true)")) var lastTS uint64 @@ -633,7 +750,8 @@ func (suite *followerForwardAndHandleTestSuite) TestGetTsoByFollowerForwarding2( re := suite.Require() ctx, cancel := context.WithCancel(suite.ctx) defer cancel() - cli := setupCli(re, ctx, suite.endpoints, pd.WithForwardingOption(true)) + cli := setupCli(ctx, re, suite.endpoints, pd.WithForwardingOption(true)) + defer cli.Close() re.NoError(failpoint.Enable("github.com/tikv/pd/client/unreachableNetwork", "return(true)")) var lastTS uint64 @@ -669,7 +787,8 @@ func (suite *followerForwardAndHandleTestSuite) TestGetTsoAndRegionByFollowerFor follower := cluster.GetServer(cluster.GetFollower()) re.NoError(failpoint.Enable("github.com/tikv/pd/client/grpcutil/unreachableNetwork2", fmt.Sprintf("return(\"%s\")", follower.GetAddr()))) - cli := setupCli(re, ctx, suite.endpoints, pd.WithForwardingOption(true)) + cli := setupCli(ctx, re, suite.endpoints, pd.WithForwardingOption(true)) + defer cli.Close() var lastTS uint64 testutil.Eventually(re, func() bool { physical, logical, err := cli.GetTS(context.TODO()) @@ -725,13 +844,53 @@ func (suite *followerForwardAndHandleTestSuite) TestGetTsoAndRegionByFollowerFor }) } +func (suite *followerForwardAndHandleTestSuite) TestGetRegionFromLeaderWhenNetworkErr() { + re := suite.Require() + ctx, cancel := context.WithCancel(suite.ctx) + defer cancel() + + cluster := suite.cluster + re.NotEmpty(cluster.WaitLeader()) + leader := cluster.GetLeaderServer() + + follower := cluster.GetServer(cluster.GetFollower()) + re.NoError(failpoint.Enable("github.com/tikv/pd/client/grpcutil/unreachableNetwork2", fmt.Sprintf("return(\"%s\")", follower.GetAddr()))) + + cli := setupCli(ctx, re, suite.endpoints) + defer cli.Close() + + cluster.GetLeaderServer().GetServer().GetMember().ResignEtcdLeader(ctx, leader.GetServer().Name(), follower.GetServer().Name()) + re.NotEmpty(cluster.WaitLeader()) + + // here is just for trigger the leader change. + cli.GetRegion(context.Background(), []byte("a")) + + testutil.Eventually(re, func() bool { + return cli.GetLeaderURL() == follower.GetAddr() + }) + r, err := cli.GetRegion(context.Background(), []byte("a")) + re.Error(err) + re.Nil(r) + + re.NoError(failpoint.Disable("github.com/tikv/pd/client/grpcutil/unreachableNetwork2")) + cli.GetServiceDiscovery().CheckMemberChanged() + testutil.Eventually(re, func() bool { + r, err = cli.GetRegion(context.Background(), []byte("a")) + if err == nil && r != nil { + return true + } + return false + }) +} + func (suite *followerForwardAndHandleTestSuite) TestGetRegionFromFollower() { re := suite.Require() ctx, cancel := context.WithCancel(suite.ctx) defer cancel() cluster := suite.cluster - cli := setupCli(re, ctx, suite.endpoints) + cli := setupCli(ctx, re, suite.endpoints) + defer cli.Close() cli.UpdateOption(pd.EnableFollowerHandle, true) re.NotEmpty(cluster.WaitLeader()) leader := cluster.GetLeaderServer() @@ -818,6 +977,51 @@ func (suite *followerForwardAndHandleTestSuite) TestGetRegionFromFollower() { re.NoError(failpoint.Disable("github.com/tikv/pd/client/fastCheckAvailable")) } +func (suite *followerForwardAndHandleTestSuite) TestGetTSFuture() { + re := suite.Require() + ctx, cancel := context.WithCancel(suite.ctx) + defer cancel() + + re.NoError(failpoint.Enable("github.com/tikv/pd/client/shortDispatcherChannel", "return(true)")) + + cli := setupCli(ctx, re, suite.endpoints) + + ctxs := make([]context.Context, 20) + cancels := make([]context.CancelFunc, 20) + for i := 0; i < 20; i++ { + ctxs[i], cancels[i] = context.WithCancel(ctx) + } + start := time.Now() + wg1 := sync.WaitGroup{} + wg2 := sync.WaitGroup{} + wg3 := sync.WaitGroup{} + wg1.Add(1) + go func() { + <-time.After(time.Second) + for i := 0; i < 20; i++ { + cancels[i]() + } + wg1.Done() + }() + wg2.Add(1) + go func() { + cli.Close() + wg2.Done() + }() + wg3.Add(1) + go func() { + for i := 0; i < 20; i++ { + cli.GetTSAsync(ctxs[i]) + } + wg3.Done() + }() + wg1.Wait() + wg2.Wait() + wg3.Wait() + re.Less(time.Since(start), time.Second*2) + re.NoError(failpoint.Disable("github.com/tikv/pd/client/shortDispatcherChannel")) +} + func checkTS(re *require.Assertions, cli pd.Client, lastTS uint64) uint64 { for i := 0; i < tsoRequestRound; i++ { physical, logical, err := cli.GetTS(context.TODO()) @@ -846,16 +1050,16 @@ func runServer(re *require.Assertions, cluster *tests.TestCluster) []string { return endpoints } -func setupCli(re *require.Assertions, ctx context.Context, endpoints []string, opts ...pd.ClientOption) pd.Client { +func setupCli(ctx context.Context, re *require.Assertions, endpoints []string, opts ...pd.ClientOption) pd.Client { cli, err := pd.NewClientWithContext(ctx, endpoints, pd.SecurityOption{}, opts...) re.NoError(err) return cli } -func waitLeader(re *require.Assertions, cli pd.ServiceDiscovery, leader string) { +func waitLeader(re *require.Assertions, cli pd.ServiceDiscovery, leader *tests.TestServer) { testutil.Eventually(re, func() bool { cli.ScheduleCheckMemberChanged() - return cli.GetServingAddr() == leader + return cli.GetServingURL() == leader.GetConfig().ClientUrls && leader.GetAddr() == cli.GetServingURL() }) } @@ -871,7 +1075,7 @@ func TestConfigTTLAfterTransferLeader(t *testing.T) { leader := cluster.GetServer(cluster.WaitLeader()) re.NoError(leader.BootstrapCluster()) addr := fmt.Sprintf("%s/pd/api/v1/config?ttlSecond=5", leader.GetAddr()) - postData, err := json.Marshal(map[string]interface{}{ + postData, err := json.Marshal(map[string]any{ "schedule.max-snapshot-count": 999, "schedule.enable-location-replacement": false, "schedule.max-merge-region-size": 999, @@ -914,10 +1118,24 @@ func TestCloseClient(t *testing.T) { re.NoError(err) defer cluster.Destroy() endpoints := runServer(re, cluster) - cli := setupCli(re, ctx, endpoints) - cli.GetTSAsync(context.TODO()) + cli := setupCli(ctx, re, endpoints) + ts := cli.GetTSAsync(context.TODO()) time.Sleep(time.Second) cli.Close() + physical, logical, err := ts.Wait() + if err == nil { + re.Greater(physical, int64(0)) + re.Greater(logical, int64(0)) + } else { + re.ErrorIs(err, context.Canceled) + re.Zero(physical) + re.Zero(logical) + } + ts = cli.GetTSAsync(context.TODO()) + physical, logical, err = ts.Wait() + re.ErrorIs(err, context.Canceled) + re.Zero(physical) + re.Zero(logical) } type idAllocator struct { @@ -988,10 +1206,10 @@ func (suite *clientTestSuite) SetupSuite() { suite.grpcSvr = &server.GrpcServer{Server: suite.srv} server.MustWaitLeader(re, []*server.Server{suite.srv}) - suite.bootstrapServer(re, newHeader(suite.srv), suite.grpcPDClient) + bootstrapServer(re, newHeader(suite.srv), suite.grpcPDClient) suite.ctx, suite.clean = context.WithCancel(context.Background()) - suite.client = setupCli(re, suite.ctx, suite.srv.GetEndpoints()) + suite.client = setupCli(suite.ctx, re, suite.srv.GetEndpoints()) suite.regionHeartbeat, err = suite.grpcPDClient.RegionHeartbeat(suite.ctx) re.NoError(err) @@ -1033,7 +1251,7 @@ func newHeader(srv *server.Server) *pdpb.RequestHeader { } } -func (suite *clientTestSuite) bootstrapServer(re *require.Assertions, header *pdpb.RequestHeader, client pdpb.PDClient) { +func bootstrapServer(re *require.Assertions, header *pdpb.RequestHeader, client pdpb.PDClient) { regionID := regionIDAllocator.alloc() region := &metapb.Region{ Id: regionID, @@ -1598,7 +1816,7 @@ func TestWatch(t *testing.T) { re.NoError(err) defer cluster.Destroy() endpoints := runServer(re, cluster) - client := setupCli(re, ctx, endpoints) + client := setupCli(ctx, re, endpoints) defer client.Close() key := "test" @@ -1641,7 +1859,7 @@ func TestPutGet(t *testing.T) { re.NoError(err) defer cluster.Destroy() endpoints := runServer(re, cluster) - client := setupCli(re, ctx, endpoints) + client := setupCli(ctx, re, endpoints) defer client.Close() key := []byte("test") @@ -1676,7 +1894,7 @@ func TestClientWatchWithRevision(t *testing.T) { re.NoError(err) defer cluster.Destroy() endpoints := runServer(re, cluster) - client := setupCli(re, ctx, endpoints) + client := setupCli(ctx, re, endpoints) defer client.Close() s := cluster.GetLeaderServer() watchPrefix := "watch_test" @@ -1744,13 +1962,13 @@ func (suite *clientTestSuite) TestMemberUpdateBackOff() { defer cluster.Destroy() endpoints := runServer(re, cluster) - cli := setupCli(re, ctx, endpoints) + cli := setupCli(ctx, re, endpoints) defer cli.Close() innerCli, ok := cli.(interface{ GetServiceDiscovery() pd.ServiceDiscovery }) re.True(ok) leader := cluster.GetLeader() - waitLeader(re, innerCli.GetServiceDiscovery(), cluster.GetServer(leader).GetConfig().ClientUrls) + waitLeader(re, innerCli.GetServiceDiscovery(), cluster.GetServer(leader)) memberID := cluster.GetServer(leader).GetLeader().GetMemberId() re.NoError(failpoint.Enable("github.com/tikv/pd/server/leaderLoopCheckAgain", fmt.Sprintf("return(\"%d\")", memberID))) diff --git a/tests/integrations/client/client_tls_test.go b/tests/integrations/client/client_tls_test.go index b46895f4f8c..a5f0f5b200d 100644 --- a/tests/integrations/client/client_tls_test.go +++ b/tests/integrations/client/client_tls_test.go @@ -120,18 +120,18 @@ func TestTLSReloadAtomicReplace(t *testing.T) { err = os.Rename(certsDirExp, certsDir) re.NoError(err) } - testTLSReload(re, ctx, cloneFunc, replaceFunc, revertFunc) + testTLSReload(ctx, re, cloneFunc, replaceFunc, revertFunc) } func testTLSReload( - re *require.Assertions, ctx context.Context, + re *require.Assertions, cloneFunc func() transport.TLSInfo, replaceFunc func(), revertFunc func()) { tlsInfo := cloneFunc() // 1. start cluster with valid certs - clus, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, serverName string) { + clus, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, _ string) { conf.Security.TLSConfig = grpcutil.TLSConfig{ KeyPath: tlsInfo.KeyFile, CertPath: tlsInfo.CertFile, @@ -178,8 +178,8 @@ func testTLSReload( dcancel() return } - dcancel() cli.Close() + dcancel() } }() @@ -212,12 +212,13 @@ func testTLSReload( caData, certData, keyData := loadTLSContent(re, testClientTLSInfo.TrustedCAFile, testClientTLSInfo.CertFile, testClientTLSInfo.KeyFile) ctx1, cancel1 := context.WithTimeout(ctx, 2*time.Second) - _, err = pd.NewClientWithContext(ctx1, endpoints, pd.SecurityOption{ + cli, err = pd.NewClientWithContext(ctx1, endpoints, pd.SecurityOption{ SSLCABytes: caData, SSLCertBytes: certData, SSLKEYBytes: keyData, }, pd.WithGRPCDialOptions(grpc.WithBlock())) re.NoError(err) + defer cli.Close() cancel1() } diff --git a/tests/integrations/client/gc_client_test.go b/tests/integrations/client/gc_client_test.go index a2c3c3263f7..0913579f47e 100644 --- a/tests/integrations/client/gc_client_test.go +++ b/tests/integrations/client/gc_client_test.go @@ -89,6 +89,7 @@ func (suite *gcClientTestSuite) TearDownSuite() { re := suite.Require() re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/gc/checkKeyspace")) suite.cleanup() + suite.client.Close() } func (suite *gcClientTestSuite) TearDownTest() { @@ -134,6 +135,7 @@ func (suite *gcClientTestSuite) TestClientWatchWithRevision() { suite.testClientWatchWithRevision(true) } +// nolint:revive func (suite *gcClientTestSuite) testClientWatchWithRevision(fromNewRevision bool) { re := suite.Require() testKeyspaceID := uint32(100) diff --git a/tests/integrations/client/global_config_test.go b/tests/integrations/client/global_config_test.go index 349b16579bd..d813ec99676 100644 --- a/tests/integrations/client/global_config_test.go +++ b/tests/integrations/client/global_config_test.go @@ -15,6 +15,7 @@ package client_test import ( + "context" "path" "strconv" "testing" @@ -37,7 +38,8 @@ import ( const globalConfigPath = "/global/config/" type testReceiver struct { - re *require.Assertions + re *require.Assertions + ctx context.Context grpc.ServerStream } @@ -49,6 +51,10 @@ func (s testReceiver) Send(m *pdpb.WatchGlobalConfigResponse) error { return nil } +func (s testReceiver) Context() context.Context { + return s.ctx +} + type globalConfigTestSuite struct { suite.Suite server *server.GrpcServer @@ -80,9 +86,10 @@ func (suite *globalConfigTestSuite) SetupSuite() { func (suite *globalConfigTestSuite) TearDownSuite() { suite.client.Close() suite.cleanup() + suite.client.Close() } -func (suite *globalConfigTestSuite) GetEtcdPath(configPath string) string { +func getEtcdPath(configPath string) string { return globalConfigPath + configPath } @@ -90,10 +97,10 @@ func (suite *globalConfigTestSuite) TestLoadWithoutNames() { re := suite.Require() defer func() { // clean up - _, err := suite.server.GetClient().Delete(suite.server.Context(), suite.GetEtcdPath("test")) + _, err := suite.server.GetClient().Delete(suite.server.Context(), getEtcdPath("test")) re.NoError(err) }() - r, err := suite.server.GetClient().Put(suite.server.Context(), suite.GetEtcdPath("test"), "test") + r, err := suite.server.GetClient().Put(suite.server.Context(), getEtcdPath("test"), "test") re.NoError(err) res, err := suite.server.LoadGlobalConfig(suite.server.Context(), &pdpb.LoadGlobalConfigRequest{ ConfigPath: globalConfigPath, @@ -108,10 +115,10 @@ func (suite *globalConfigTestSuite) TestLoadWithoutConfigPath() { re := suite.Require() defer func() { // clean up - _, err := suite.server.GetClient().Delete(suite.server.Context(), suite.GetEtcdPath("source_id")) + _, err := suite.server.GetClient().Delete(suite.server.Context(), getEtcdPath("source_id")) re.NoError(err) }() - _, err := suite.server.GetClient().Put(suite.server.Context(), suite.GetEtcdPath("source_id"), "1") + _, err := suite.server.GetClient().Put(suite.server.Context(), getEtcdPath("source_id"), "1") re.NoError(err) res, err := suite.server.LoadGlobalConfig(suite.server.Context(), &pdpb.LoadGlobalConfigRequest{ Names: []string{"source_id"}, @@ -125,7 +132,7 @@ func (suite *globalConfigTestSuite) TestLoadOtherConfigPath() { re := suite.Require() defer func() { for i := 0; i < 3; i++ { - _, err := suite.server.GetClient().Delete(suite.server.Context(), suite.GetEtcdPath(strconv.Itoa(i))) + _, err := suite.server.GetClient().Delete(suite.server.Context(), getEtcdPath(strconv.Itoa(i))) re.NoError(err) } }() @@ -148,7 +155,7 @@ func (suite *globalConfigTestSuite) TestLoadAndStore() { re := suite.Require() defer func() { for i := 0; i < 3; i++ { - _, err := suite.server.GetClient().Delete(suite.server.Context(), suite.GetEtcdPath("test")) + _, err := suite.server.GetClient().Delete(suite.server.Context(), getEtcdPath("test")) re.NoError(err) } }() @@ -164,7 +171,7 @@ func (suite *globalConfigTestSuite) TestLoadAndStore() { re.Len(res.Items, 3) re.NoError(err) for i, item := range res.Items { - re.Equal(&pdpb.GlobalConfigItem{Kind: pdpb.EventType_PUT, Name: suite.GetEtcdPath(strconv.Itoa(i)), Payload: []byte(strconv.Itoa(i))}, item) + re.Equal(&pdpb.GlobalConfigItem{Kind: pdpb.EventType_PUT, Name: getEtcdPath(strconv.Itoa(i)), Payload: []byte(strconv.Itoa(i))}, item) } } @@ -172,7 +179,7 @@ func (suite *globalConfigTestSuite) TestStore() { re := suite.Require() defer func() { for i := 0; i < 3; i++ { - _, err := suite.server.GetClient().Delete(suite.server.Context(), suite.GetEtcdPath("test")) + _, err := suite.server.GetClient().Delete(suite.server.Context(), getEtcdPath("test")) re.NoError(err) } }() @@ -183,9 +190,9 @@ func (suite *globalConfigTestSuite) TestStore() { }) re.NoError(err) for i := 0; i < 3; i++ { - res, err := suite.server.GetClient().Get(suite.server.Context(), suite.GetEtcdPath(strconv.Itoa(i))) + res, err := suite.server.GetClient().Get(suite.server.Context(), getEtcdPath(strconv.Itoa(i))) re.NoError(err) - re.Equal(suite.GetEtcdPath(string(res.Kvs[0].Value)), string(res.Kvs[0].Key)) + re.Equal(getEtcdPath(string(res.Kvs[0].Value)), string(res.Kvs[0].Key)) } } @@ -194,21 +201,23 @@ func (suite *globalConfigTestSuite) TestWatch() { defer func() { for i := 0; i < 3; i++ { // clean up - _, err := suite.server.GetClient().Delete(suite.server.Context(), suite.GetEtcdPath(strconv.Itoa(i))) + _, err := suite.server.GetClient().Delete(suite.server.Context(), getEtcdPath(strconv.Itoa(i))) re.NoError(err) } }() - server := testReceiver{re: suite.Require()} + ctx, cancel := context.WithCancel(suite.server.Context()) + defer cancel() + server := testReceiver{re: suite.Require(), ctx: ctx} go suite.server.WatchGlobalConfig(&pdpb.WatchGlobalConfigRequest{ ConfigPath: globalConfigPath, Revision: 0, }, server) for i := 0; i < 6; i++ { - _, err := suite.server.GetClient().Put(suite.server.Context(), suite.GetEtcdPath(strconv.Itoa(i)), strconv.Itoa(i)) + _, err := suite.server.GetClient().Put(suite.server.Context(), getEtcdPath(strconv.Itoa(i)), strconv.Itoa(i)) re.NoError(err) } for i := 3; i < 6; i++ { - _, err := suite.server.GetClient().Delete(suite.server.Context(), suite.GetEtcdPath(strconv.Itoa(i))) + _, err := suite.server.GetClient().Delete(suite.server.Context(), getEtcdPath(strconv.Itoa(i))) re.NoError(err) } res, err := suite.server.LoadGlobalConfig(suite.server.Context(), &pdpb.LoadGlobalConfigRequest{ @@ -222,29 +231,29 @@ func (suite *globalConfigTestSuite) TestClientLoadWithoutNames() { re := suite.Require() defer func() { for i := 0; i < 3; i++ { - _, err := suite.server.GetClient().Delete(suite.server.Context(), suite.GetEtcdPath(strconv.Itoa(i))) + _, err := suite.server.GetClient().Delete(suite.server.Context(), getEtcdPath(strconv.Itoa(i))) re.NoError(err) } }() for i := 0; i < 3; i++ { - _, err := suite.server.GetClient().Put(suite.server.Context(), suite.GetEtcdPath(strconv.Itoa(i)), strconv.Itoa(i)) + _, err := suite.server.GetClient().Put(suite.server.Context(), getEtcdPath(strconv.Itoa(i)), strconv.Itoa(i)) re.NoError(err) } res, _, err := suite.client.LoadGlobalConfig(suite.server.Context(), nil, globalConfigPath) re.NoError(err) re.Len(res, 3) for i, item := range res { - re.Equal(pd.GlobalConfigItem{EventType: pdpb.EventType_PUT, Name: suite.GetEtcdPath(strconv.Itoa(i)), PayLoad: []byte(strconv.Itoa(i)), Value: strconv.Itoa(i)}, item) + re.Equal(pd.GlobalConfigItem{EventType: pdpb.EventType_PUT, Name: getEtcdPath(strconv.Itoa(i)), PayLoad: []byte(strconv.Itoa(i)), Value: strconv.Itoa(i)}, item) } } func (suite *globalConfigTestSuite) TestClientLoadWithoutConfigPath() { re := suite.Require() defer func() { - _, err := suite.server.GetClient().Delete(suite.server.Context(), suite.GetEtcdPath("source_id")) + _, err := suite.server.GetClient().Delete(suite.server.Context(), getEtcdPath("source_id")) re.NoError(err) }() - _, err := suite.server.GetClient().Put(suite.server.Context(), suite.GetEtcdPath("source_id"), "1") + _, err := suite.server.GetClient().Put(suite.server.Context(), getEtcdPath("source_id"), "1") re.NoError(err) res, _, err := suite.client.LoadGlobalConfig(suite.server.Context(), []string{"source_id"}, "") re.NoError(err) @@ -256,7 +265,7 @@ func (suite *globalConfigTestSuite) TestClientLoadOtherConfigPath() { re := suite.Require() defer func() { for i := 0; i < 3; i++ { - _, err := suite.server.GetClient().Delete(suite.server.Context(), suite.GetEtcdPath(strconv.Itoa(i))) + _, err := suite.server.GetClient().Delete(suite.server.Context(), getEtcdPath(strconv.Itoa(i))) re.NoError(err) } }() @@ -276,7 +285,7 @@ func (suite *globalConfigTestSuite) TestClientStore() { re := suite.Require() defer func() { for i := 0; i < 3; i++ { - _, err := suite.server.GetClient().Delete(suite.server.Context(), suite.GetEtcdPath(strconv.Itoa(i))) + _, err := suite.server.GetClient().Delete(suite.server.Context(), getEtcdPath(strconv.Itoa(i))) re.NoError(err) } }() @@ -284,9 +293,9 @@ func (suite *globalConfigTestSuite) TestClientStore() { []pd.GlobalConfigItem{{Name: "0", Value: "0"}, {Name: "1", Value: "1"}, {Name: "2", Value: "2"}}) re.NoError(err) for i := 0; i < 3; i++ { - res, err := suite.server.GetClient().Get(suite.server.Context(), suite.GetEtcdPath(strconv.Itoa(i))) + res, err := suite.server.GetClient().Get(suite.server.Context(), getEtcdPath(strconv.Itoa(i))) re.NoError(err) - re.Equal(suite.GetEtcdPath(string(res.Kvs[0].Value)), string(res.Kvs[0].Key)) + re.Equal(getEtcdPath(string(res.Kvs[0].Value)), string(res.Kvs[0].Key)) } } @@ -294,25 +303,25 @@ func (suite *globalConfigTestSuite) TestClientWatchWithRevision() { re := suite.Require() ctx := suite.server.Context() defer func() { - _, err := suite.server.GetClient().Delete(ctx, suite.GetEtcdPath("test")) + _, err := suite.server.GetClient().Delete(ctx, getEtcdPath("test")) re.NoError(err) for i := 3; i < 9; i++ { - _, err := suite.server.GetClient().Delete(ctx, suite.GetEtcdPath(strconv.Itoa(i))) + _, err := suite.server.GetClient().Delete(ctx, getEtcdPath(strconv.Itoa(i))) re.NoError(err) } }() // Mock get revision by loading - r, err := suite.server.GetClient().Put(ctx, suite.GetEtcdPath("test"), "test") + r, err := suite.server.GetClient().Put(ctx, getEtcdPath("test"), "test") re.NoError(err) res, revision, err := suite.client.LoadGlobalConfig(ctx, nil, globalConfigPath) re.NoError(err) re.Len(res, 1) suite.LessOrEqual(r.Header.GetRevision(), revision) - re.Equal(pd.GlobalConfigItem{EventType: pdpb.EventType_PUT, Name: suite.GetEtcdPath("test"), PayLoad: []byte("test"), Value: "test"}, res[0]) + re.Equal(pd.GlobalConfigItem{EventType: pdpb.EventType_PUT, Name: getEtcdPath("test"), PayLoad: []byte("test"), Value: "test"}, res[0]) // Mock when start watcher there are existed some keys, will load firstly for i := 0; i < 6; i++ { - _, err = suite.server.GetClient().Put(suite.server.Context(), suite.GetEtcdPath(strconv.Itoa(i)), strconv.Itoa(i)) + _, err = suite.server.GetClient().Put(suite.server.Context(), getEtcdPath(strconv.Itoa(i)), strconv.Itoa(i)) re.NoError(err) } // Start watcher at next revision @@ -320,12 +329,12 @@ func (suite *globalConfigTestSuite) TestClientWatchWithRevision() { re.NoError(err) // Mock delete for i := 0; i < 3; i++ { - _, err = suite.server.GetClient().Delete(suite.server.Context(), suite.GetEtcdPath(strconv.Itoa(i))) + _, err = suite.server.GetClient().Delete(suite.server.Context(), getEtcdPath(strconv.Itoa(i))) re.NoError(err) } // Mock put for i := 6; i < 9; i++ { - _, err = suite.server.GetClient().Put(suite.server.Context(), suite.GetEtcdPath(strconv.Itoa(i)), strconv.Itoa(i)) + _, err = suite.server.GetClient().Put(suite.server.Context(), getEtcdPath(strconv.Itoa(i)), strconv.Itoa(i)) re.NoError(err) } timer := time.NewTimer(time.Second) @@ -338,7 +347,7 @@ func (suite *globalConfigTestSuite) TestClientWatchWithRevision() { return case res := <-configChan: for _, r := range res { - re.Equal(suite.GetEtcdPath(r.Value), r.Name) + re.Equal(getEtcdPath(r.Value), r.Name) } runTest = true } diff --git a/tests/integrations/client/http_client_test.go b/tests/integrations/client/http_client_test.go index 5ff5fef0222..f4a48dcd63e 100644 --- a/tests/integrations/client/http_client_test.go +++ b/tests/integrations/client/http_client_test.go @@ -18,12 +18,15 @@ import ( "context" "math" "net/http" + "net/url" "sort" "strings" + "sync" "testing" "time" "github.com/pingcap/errors" + "github.com/pingcap/kvproto/pkg/metapb" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" "github.com/stretchr/testify/require" @@ -40,178 +43,184 @@ import ( "github.com/tikv/pd/tests" ) -type mode int - -// We have two ways to create HTTP client. -// 1. using `NewClient` which created `DefaultPDServiceDiscovery` -// 2. using `NewClientWithServiceDiscovery` which pass a `PDServiceDiscovery` as parameter -// test cases should be run in both modes. -const ( - defaultServiceDiscovery mode = iota - specificServiceDiscovery -) - type httpClientTestSuite struct { suite.Suite - env map[mode]*httpClientTestEnv + // 1. Using `NewClient` will create a `DefaultPDServiceDiscovery` internal. + // 2. Using `NewClientWithServiceDiscovery` will need a `PDServiceDiscovery` to be passed in. + withServiceDiscovery bool + ctx context.Context + cancelFunc context.CancelFunc + cluster *tests.TestCluster + endpoints []string + client pd.Client } -type httpClientTestEnv struct { - ctx context.Context - cancelFunc context.CancelFunc - cluster *tests.TestCluster - endpoints []string +func TestHTTPClientTestSuite(t *testing.T) { + suite.Run(t, &httpClientTestSuite{ + withServiceDiscovery: false, + }) } -func TestHTTPClientTestSuite(t *testing.T) { - suite.Run(t, new(httpClientTestSuite)) +func TestHTTPClientTestSuiteWithServiceDiscovery(t *testing.T) { + suite.Run(t, &httpClientTestSuite{ + withServiceDiscovery: true, + }) } func (suite *httpClientTestSuite) SetupSuite() { - suite.env = make(map[mode]*httpClientTestEnv) re := suite.Require() + suite.ctx, suite.cancelFunc = context.WithCancel(context.Background()) - for _, mode := range []mode{defaultServiceDiscovery, specificServiceDiscovery} { - env := &httpClientTestEnv{} - env.ctx, env.cancelFunc = context.WithCancel(context.Background()) + cluster, err := tests.NewTestCluster(suite.ctx, 2) + re.NoError(err) - cluster, err := tests.NewTestCluster(env.ctx, 2) - re.NoError(err) + err = cluster.RunInitialServers() + re.NoError(err) + leader := cluster.WaitLeader() + re.NotEmpty(leader) + leaderServer := cluster.GetLeaderServer() - err = cluster.RunInitialServers() + err = leaderServer.BootstrapCluster() + // Add 2 more stores to the cluster. + for i := 2; i <= 4; i++ { + tests.MustPutStore(re, cluster, &metapb.Store{ + Id: uint64(i), + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + LastHeartbeat: time.Now().UnixNano(), + }) + } + re.NoError(err) + for _, region := range []*core.RegionInfo{ + core.NewTestRegionInfo(10, 1, []byte("a1"), []byte("a2")), + core.NewTestRegionInfo(11, 1, []byte("a2"), []byte("a3")), + } { + err := leaderServer.GetRaftCluster().HandleRegionHeartbeat(region) re.NoError(err) - leader := cluster.WaitLeader() - re.NotEmpty(leader) - leaderServer := cluster.GetLeaderServer() - err = leaderServer.BootstrapCluster() + } + var ( + testServers = cluster.GetServers() + endpoints = make([]string, 0, len(testServers)) + ) + for _, s := range testServers { + addr := s.GetConfig().AdvertiseClientUrls + url, err := url.Parse(addr) re.NoError(err) - for _, region := range []*core.RegionInfo{ - core.NewTestRegionInfo(10, 1, []byte("a1"), []byte("a2")), - core.NewTestRegionInfo(11, 1, []byte("a2"), []byte("a3")), - } { - err := leaderServer.GetRaftCluster().HandleRegionHeartbeat(region) - re.NoError(err) - } - var ( - testServers = cluster.GetServers() - endpoints = make([]string, 0, len(testServers)) - ) - for _, s := range testServers { - endpoints = append(endpoints, s.GetConfig().AdvertiseClientUrls) - } - env.endpoints = endpoints - env.cluster = cluster - - suite.env[mode] = env + endpoints = append(endpoints, url.Host) } -} - -func (suite *httpClientTestSuite) TearDownSuite() { - for _, env := range suite.env { - env.cancelFunc() - env.cluster.Destroy() + suite.endpoints = endpoints + suite.cluster = cluster + + if suite.withServiceDiscovery { + // Run test with specific service discovery. + cli := setupCli(suite.ctx, re, suite.endpoints) + sd := cli.GetServiceDiscovery() + suite.client = pd.NewClientWithServiceDiscovery("pd-http-client-it-grpc", sd) + } else { + // Run test with default service discovery. + suite.client = pd.NewClient("pd-http-client-it-http", suite.endpoints) } } -// RunTestInTwoModes is to run test in two modes. -func (suite *httpClientTestSuite) RunTestInTwoModes(test func(mode mode, client pd.Client)) { - // Run test with specific service discovery. - cli := setupCli(suite.Require(), suite.env[specificServiceDiscovery].ctx, suite.env[specificServiceDiscovery].endpoints) - sd := cli.GetServiceDiscovery() - client := pd.NewClientWithServiceDiscovery("pd-http-client-it-grpc", sd) - test(specificServiceDiscovery, client) - client.Close() - - // Run test with default service discovery. - client = pd.NewClient("pd-http-client-it-http", suite.env[defaultServiceDiscovery].endpoints) - test(defaultServiceDiscovery, client) - client.Close() +func (suite *httpClientTestSuite) TearDownSuite() { + suite.cancelFunc() + suite.client.Close() + suite.cluster.Destroy() } func (suite *httpClientTestSuite) TestMeta() { - suite.RunTestInTwoModes(suite.checkMeta) -} - -func (suite *httpClientTestSuite) checkMeta(mode mode, client pd.Client) { re := suite.Require() - env := suite.env[mode] - replicateConfig, err := client.GetReplicateConfig(env.ctx) + client := suite.client + ctx, cancel := context.WithCancel(suite.ctx) + defer cancel() + replicateConfig, err := client.GetReplicateConfig(ctx) re.NoError(err) re.Equal(3.0, replicateConfig["max-replicas"]) - region, err := client.GetRegionByID(env.ctx, 10) + region, err := client.GetRegionByID(ctx, 10) re.NoError(err) re.Equal(int64(10), region.ID) re.Equal(core.HexRegionKeyStr([]byte("a1")), region.StartKey) re.Equal(core.HexRegionKeyStr([]byte("a2")), region.EndKey) - region, err = client.GetRegionByKey(env.ctx, []byte("a2")) + region, err = client.GetRegionByKey(ctx, []byte("a2")) re.NoError(err) re.Equal(int64(11), region.ID) re.Equal(core.HexRegionKeyStr([]byte("a2")), region.StartKey) re.Equal(core.HexRegionKeyStr([]byte("a3")), region.EndKey) - regions, err := client.GetRegions(env.ctx) + regions, err := client.GetRegions(ctx) re.NoError(err) re.Equal(int64(2), regions.Count) re.Len(regions.Regions, 2) - regions, err = client.GetRegionsByKeyRange(env.ctx, pd.NewKeyRange([]byte("a1"), []byte("a3")), -1) + regions, err = client.GetRegionsByKeyRange(ctx, pd.NewKeyRange([]byte("a1"), []byte("a3")), -1) re.NoError(err) re.Equal(int64(2), regions.Count) re.Len(regions.Regions, 2) - regions, err = client.GetRegionsByStoreID(env.ctx, 1) + regions, err = client.GetRegionsByStoreID(ctx, 1) re.NoError(err) re.Equal(int64(2), regions.Count) re.Len(regions.Regions, 2) - regions, err = client.GetEmptyRegions(env.ctx) + regions, err = client.GetEmptyRegions(ctx) re.NoError(err) re.Equal(int64(2), regions.Count) re.Len(regions.Regions, 2) - state, err := client.GetRegionsReplicatedStateByKeyRange(env.ctx, pd.NewKeyRange([]byte("a1"), []byte("a3"))) + state, err := client.GetRegionsReplicatedStateByKeyRange(ctx, pd.NewKeyRange([]byte("a1"), []byte("a3"))) re.NoError(err) re.Equal("INPROGRESS", state) - regionStats, err := client.GetRegionStatusByKeyRange(env.ctx, pd.NewKeyRange([]byte("a1"), []byte("a3")), false) + regionStats, err := client.GetRegionStatusByKeyRange(ctx, pd.NewKeyRange([]byte("a1"), []byte("a3")), false) re.NoError(err) - re.Greater(regionStats.Count, 0) + re.Positive(regionStats.Count) re.NotEmpty(regionStats.StoreLeaderCount) - regionStats, err = client.GetRegionStatusByKeyRange(env.ctx, pd.NewKeyRange([]byte("a1"), []byte("a3")), true) + regionStats, err = client.GetRegionStatusByKeyRange(ctx, pd.NewKeyRange([]byte("a1"), []byte("a3")), true) re.NoError(err) - re.Greater(regionStats.Count, 0) + re.Positive(regionStats.Count) re.Empty(regionStats.StoreLeaderCount) - hotReadRegions, err := client.GetHotReadRegions(env.ctx) + hotReadRegions, err := client.GetHotReadRegions(ctx) re.NoError(err) - re.Len(hotReadRegions.AsPeer, 1) - re.Len(hotReadRegions.AsLeader, 1) - hotWriteRegions, err := client.GetHotWriteRegions(env.ctx) + re.Len(hotReadRegions.AsPeer, 4) + re.Len(hotReadRegions.AsLeader, 4) + hotWriteRegions, err := client.GetHotWriteRegions(ctx) re.NoError(err) - re.Len(hotWriteRegions.AsPeer, 1) - re.Len(hotWriteRegions.AsLeader, 1) - historyHorRegions, err := client.GetHistoryHotRegions(env.ctx, &pd.HistoryHotRegionsRequest{ + re.Len(hotWriteRegions.AsPeer, 4) + re.Len(hotWriteRegions.AsLeader, 4) + historyHorRegions, err := client.GetHistoryHotRegions(ctx, &pd.HistoryHotRegionsRequest{ StartTime: 0, EndTime: time.Now().AddDate(0, 0, 1).UnixNano() / int64(time.Millisecond), }) re.NoError(err) re.Empty(historyHorRegions.HistoryHotRegion) - store, err := client.GetStores(env.ctx) + stores, err := client.GetStores(ctx) re.NoError(err) - re.Equal(1, store.Count) - re.Len(store.Stores, 1) - storeID := uint64(store.Stores[0].Store.ID) // TODO: why type is different? - store2, err := client.GetStore(env.ctx, storeID) + re.Equal(4, stores.Count) + re.Len(stores.Stores, 4) + storeID := uint64(stores.Stores[0].Store.ID) // TODO: why type is different? + store2, err := client.GetStore(ctx, storeID) re.NoError(err) re.EqualValues(storeID, store2.Store.ID) - version, err := client.GetClusterVersion(env.ctx) + version, err := client.GetClusterVersion(ctx) re.NoError(err) - re.Equal("0.0.0", version) + re.Equal("1.0.0", version) + rgs, _ := client.GetRegionsByKeyRange(ctx, pd.NewKeyRange([]byte("a"), []byte("a1")), 100) + re.Equal(int64(0), rgs.Count) + rgs, _ = client.GetRegionsByKeyRange(ctx, pd.NewKeyRange([]byte("a1"), []byte("a3")), 100) + re.Equal(int64(2), rgs.Count) + rgs, _ = client.GetRegionsByKeyRange(ctx, pd.NewKeyRange([]byte("a2"), []byte("b")), 100) + re.Equal(int64(1), rgs.Count) + rgs, _ = client.GetRegionsByKeyRange(ctx, pd.NewKeyRange([]byte(""), []byte("")), 100) + re.Equal(int64(2), rgs.Count) + // store 2 origin status:offline + err = client.DeleteStore(ctx, 2) + re.NoError(err) + store2, err = client.GetStore(ctx, 2) + re.NoError(err) + re.Equal(int64(metapb.StoreState_Offline), store2.Store.State) } func (suite *httpClientTestSuite) TestGetMinResolvedTSByStoresIDs() { - suite.RunTestInTwoModes(suite.checkGetMinResolvedTSByStoresIDs) -} - -func (suite *httpClientTestSuite) checkGetMinResolvedTSByStoresIDs(mode mode, client pd.Client) { re := suite.Require() - env := suite.env[mode] - + client := suite.client + ctx, cancel := context.WithCancel(suite.ctx) + defer cancel() testMinResolvedTS := tsoutil.TimeToTS(time.Now()) - raftCluster := env.cluster.GetLeaderServer().GetRaftCluster() + raftCluster := suite.cluster.GetLeaderServer().GetRaftCluster() err := raftCluster.SetMinResolvedTS(1, testMinResolvedTS) re.NoError(err) // Make sure the min resolved TS is updated. @@ -220,18 +229,18 @@ func (suite *httpClientTestSuite) checkGetMinResolvedTSByStoresIDs(mode mode, cl return minResolvedTS == testMinResolvedTS }) // Wait for the cluster-level min resolved TS to be initialized. - minResolvedTS, storeMinResolvedTSMap, err := client.GetMinResolvedTSByStoresIDs(env.ctx, nil) + minResolvedTS, storeMinResolvedTSMap, err := client.GetMinResolvedTSByStoresIDs(ctx, nil) re.NoError(err) re.Equal(testMinResolvedTS, minResolvedTS) re.Empty(storeMinResolvedTSMap) // Get the store-level min resolved TS. - minResolvedTS, storeMinResolvedTSMap, err = client.GetMinResolvedTSByStoresIDs(env.ctx, []uint64{1}) + minResolvedTS, storeMinResolvedTSMap, err = client.GetMinResolvedTSByStoresIDs(ctx, []uint64{1}) re.NoError(err) re.Equal(testMinResolvedTS, minResolvedTS) re.Len(storeMinResolvedTSMap, 1) re.Equal(minResolvedTS, storeMinResolvedTSMap[1]) // Get the store-level min resolved TS with an invalid store ID. - minResolvedTS, storeMinResolvedTSMap, err = client.GetMinResolvedTSByStoresIDs(env.ctx, []uint64{1, 2}) + minResolvedTS, storeMinResolvedTSMap, err = client.GetMinResolvedTSByStoresIDs(ctx, []uint64{1, 2}) re.NoError(err) re.Equal(testMinResolvedTS, minResolvedTS) re.Len(storeMinResolvedTSMap, 2) @@ -240,22 +249,19 @@ func (suite *httpClientTestSuite) checkGetMinResolvedTSByStoresIDs(mode mode, cl } func (suite *httpClientTestSuite) TestRule() { - suite.RunTestInTwoModes(suite.checkRule) -} - -func (suite *httpClientTestSuite) checkRule(mode mode, client pd.Client) { re := suite.Require() - env := suite.env[mode] - - bundles, err := client.GetAllPlacementRuleBundles(env.ctx) + client := suite.client + ctx, cancel := context.WithCancel(suite.ctx) + defer cancel() + bundles, err := client.GetAllPlacementRuleBundles(ctx) re.NoError(err) re.Len(bundles, 1) re.Equal(placement.DefaultGroupID, bundles[0].ID) - bundle, err := client.GetPlacementRuleBundleByGroup(env.ctx, placement.DefaultGroupID) + bundle, err := client.GetPlacementRuleBundleByGroup(ctx, placement.DefaultGroupID) re.NoError(err) re.Equal(bundles[0], bundle) // Check if we have the default rule. - suite.checkRuleResult(re, env, client, &pd.Rule{ + suite.checkRuleResult(ctx, re, &pd.Rule{ GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, Role: pd.Voter, @@ -264,7 +270,7 @@ func (suite *httpClientTestSuite) checkRule(mode mode, client pd.Client) { EndKey: []byte{}, }, 1, true) // Should be the same as the rules in the bundle. - suite.checkRuleResult(re, env, client, bundle.Rules[0], 1, true) + suite.checkRuleResult(ctx, re, bundle.Rules[0], 1, true) testRule := &pd.Rule{ GroupID: placement.DefaultGroupID, ID: "test", @@ -273,39 +279,39 @@ func (suite *httpClientTestSuite) checkRule(mode mode, client pd.Client) { StartKey: []byte{}, EndKey: []byte{}, } - err = client.SetPlacementRule(env.ctx, testRule) + err = client.SetPlacementRule(ctx, testRule) re.NoError(err) - suite.checkRuleResult(re, env, client, testRule, 2, true) - err = client.DeletePlacementRule(env.ctx, placement.DefaultGroupID, "test") + suite.checkRuleResult(ctx, re, testRule, 2, true) + err = client.DeletePlacementRule(ctx, placement.DefaultGroupID, "test") re.NoError(err) - suite.checkRuleResult(re, env, client, testRule, 1, false) + suite.checkRuleResult(ctx, re, testRule, 1, false) testRuleOp := &pd.RuleOp{ Rule: testRule, Action: pd.RuleOpAdd, } - err = client.SetPlacementRuleInBatch(env.ctx, []*pd.RuleOp{testRuleOp}) + err = client.SetPlacementRuleInBatch(ctx, []*pd.RuleOp{testRuleOp}) re.NoError(err) - suite.checkRuleResult(re, env, client, testRule, 2, true) + suite.checkRuleResult(ctx, re, testRule, 2, true) testRuleOp = &pd.RuleOp{ Rule: testRule, Action: pd.RuleOpDel, } - err = client.SetPlacementRuleInBatch(env.ctx, []*pd.RuleOp{testRuleOp}) + err = client.SetPlacementRuleInBatch(ctx, []*pd.RuleOp{testRuleOp}) re.NoError(err) - suite.checkRuleResult(re, env, client, testRule, 1, false) - err = client.SetPlacementRuleBundles(env.ctx, []*pd.GroupBundle{ + suite.checkRuleResult(ctx, re, testRule, 1, false) + err = client.SetPlacementRuleBundles(ctx, []*pd.GroupBundle{ { ID: placement.DefaultGroupID, Rules: []*pd.Rule{testRule}, }, }, true) re.NoError(err) - suite.checkRuleResult(re, env, client, testRule, 1, true) - ruleGroups, err := client.GetAllPlacementRuleGroups(env.ctx) + suite.checkRuleResult(ctx, re, testRule, 1, true) + ruleGroups, err := client.GetAllPlacementRuleGroups(ctx) re.NoError(err) re.Len(ruleGroups, 1) re.Equal(placement.DefaultGroupID, ruleGroups[0].ID) - ruleGroup, err := client.GetPlacementRuleGroupByID(env.ctx, placement.DefaultGroupID) + ruleGroup, err := client.GetPlacementRuleGroupByID(ctx, placement.DefaultGroupID) re.NoError(err) re.Equal(ruleGroups[0], ruleGroup) testRuleGroup := &pd.RuleGroup{ @@ -313,14 +319,14 @@ func (suite *httpClientTestSuite) checkRule(mode mode, client pd.Client) { Index: 1, Override: true, } - err = client.SetPlacementRuleGroup(env.ctx, testRuleGroup) + err = client.SetPlacementRuleGroup(ctx, testRuleGroup) re.NoError(err) - ruleGroup, err = client.GetPlacementRuleGroupByID(env.ctx, testRuleGroup.ID) + ruleGroup, err = client.GetPlacementRuleGroupByID(ctx, testRuleGroup.ID) re.NoError(err) re.Equal(testRuleGroup, ruleGroup) - err = client.DeletePlacementRuleGroupByID(env.ctx, testRuleGroup.ID) + err = client.DeletePlacementRuleGroupByID(ctx, testRuleGroup.ID) re.NoError(err) - ruleGroup, err = client.GetPlacementRuleGroupByID(env.ctx, testRuleGroup.ID) + ruleGroup, err = client.GetPlacementRuleGroupByID(ctx, testRuleGroup.ID) re.ErrorContains(err, http.StatusText(http.StatusNotFound)) re.Empty(ruleGroup) // Test the start key and end key. @@ -332,34 +338,33 @@ func (suite *httpClientTestSuite) checkRule(mode mode, client pd.Client) { StartKey: []byte("a1"), EndKey: []byte(""), } - err = client.SetPlacementRule(env.ctx, testRule) + err = client.SetPlacementRule(ctx, testRule) re.NoError(err) - suite.checkRuleResult(re, env, client, testRule, 1, true) + suite.checkRuleResult(ctx, re, testRule, 1, true) } func (suite *httpClientTestSuite) checkRuleResult( - re *require.Assertions, - env *httpClientTestEnv, - client pd.Client, + ctx context.Context, re *require.Assertions, rule *pd.Rule, totalRuleCount int, exist bool, ) { + client := suite.client if exist { - got, err := client.GetPlacementRule(env.ctx, rule.GroupID, rule.ID) + got, err := client.GetPlacementRule(ctx, rule.GroupID, rule.ID) re.NoError(err) // skip comparison of the generated field got.StartKeyHex = rule.StartKeyHex got.EndKeyHex = rule.EndKeyHex re.Equal(rule, got) } else { - _, err := client.GetPlacementRule(env.ctx, rule.GroupID, rule.ID) + _, err := client.GetPlacementRule(ctx, rule.GroupID, rule.ID) re.ErrorContains(err, http.StatusText(http.StatusNotFound)) } // Check through the `GetPlacementRulesByGroup` API. - rules, err := client.GetPlacementRulesByGroup(env.ctx, rule.GroupID) + rules, err := client.GetPlacementRulesByGroup(ctx, rule.GroupID) re.NoError(err) checkRuleFunc(re, rules, rule, totalRuleCount, exist) // Check through the `GetPlacementRuleBundleByGroup` API. - bundle, err := client.GetPlacementRuleBundleByGroup(env.ctx, rule.GroupID) + bundle, err := client.GetPlacementRuleBundleByGroup(ctx, rule.GroupID) re.NoError(err) checkRuleFunc(re, bundle.Rules, rule, totalRuleCount, exist) } @@ -387,14 +392,11 @@ func checkRuleFunc( } func (suite *httpClientTestSuite) TestRegionLabel() { - suite.RunTestInTwoModes(suite.checkRegionLabel) -} - -func (suite *httpClientTestSuite) checkRegionLabel(mode mode, client pd.Client) { re := suite.Require() - env := suite.env[mode] - - labelRules, err := client.GetAllRegionLabelRules(env.ctx) + client := suite.client + ctx, cancel := context.WithCancel(suite.ctx) + defer cancel() + labelRules, err := client.GetAllRegionLabelRules(ctx) re.NoError(err) re.Len(labelRules, 1) re.Equal("keyspaces/0", labelRules[0].ID) @@ -405,9 +407,9 @@ func (suite *httpClientTestSuite) checkRegionLabel(mode mode, client pd.Client) RuleType: "key-range", Data: labeler.MakeKeyRanges("1234", "5678"), } - err = client.SetRegionLabelRule(env.ctx, labelRule) + err = client.SetRegionLabelRule(ctx, labelRule) re.NoError(err) - labelRules, err = client.GetAllRegionLabelRules(env.ctx) + labelRules, err = client.GetAllRegionLabelRules(ctx) re.NoError(err) re.Len(labelRules, 2) sort.Slice(labelRules, func(i, j int) bool { @@ -427,9 +429,9 @@ func (suite *httpClientTestSuite) checkRegionLabel(mode mode, client pd.Client) SetRules: []*pd.LabelRule{labelRule}, DeleteRules: []string{"rule1"}, } - err = client.PatchRegionLabelRules(env.ctx, patch) + err = client.PatchRegionLabelRules(ctx, patch) re.NoError(err) - allLabelRules, err := client.GetAllRegionLabelRules(env.ctx) + allLabelRules, err := client.GetAllRegionLabelRules(ctx) re.NoError(err) re.Len(labelRules, 2) sort.Slice(allLabelRules, func(i, j int) bool { @@ -438,7 +440,7 @@ func (suite *httpClientTestSuite) checkRegionLabel(mode mode, client pd.Client) re.Equal(labelRule.ID, allLabelRules[1].ID) re.Equal(labelRule.Labels, allLabelRules[1].Labels) re.Equal(labelRule.RuleType, allLabelRules[1].RuleType) - labelRules, err = client.GetRegionLabelRulesByIDs(env.ctx, []string{"keyspaces/0", "rule2"}) + labelRules, err = client.GetRegionLabelRulesByIDs(ctx, []string{"keyspaces/0", "rule2"}) re.NoError(err) sort.Slice(labelRules, func(i, j int) bool { return labelRules[i].ID < labelRules[j].ID @@ -447,24 +449,21 @@ func (suite *httpClientTestSuite) checkRegionLabel(mode mode, client pd.Client) } func (suite *httpClientTestSuite) TestAccelerateSchedule() { - suite.RunTestInTwoModes(suite.checkAccelerateSchedule) -} - -func (suite *httpClientTestSuite) checkAccelerateSchedule(mode mode, client pd.Client) { re := suite.Require() - env := suite.env[mode] - - raftCluster := env.cluster.GetLeaderServer().GetRaftCluster() + client := suite.client + ctx, cancel := context.WithCancel(suite.ctx) + defer cancel() + raftCluster := suite.cluster.GetLeaderServer().GetRaftCluster() suspectRegions := raftCluster.GetSuspectRegions() re.Empty(suspectRegions) - err := client.AccelerateSchedule(env.ctx, pd.NewKeyRange([]byte("a1"), []byte("a2"))) + err := client.AccelerateSchedule(ctx, pd.NewKeyRange([]byte("a1"), []byte("a2"))) re.NoError(err) suspectRegions = raftCluster.GetSuspectRegions() re.Len(suspectRegions, 1) raftCluster.ClearSuspectRegions() suspectRegions = raftCluster.GetSuspectRegions() re.Empty(suspectRegions) - err = client.AccelerateScheduleInBatch(env.ctx, []*pd.KeyRange{ + err = client.AccelerateScheduleInBatch(ctx, []*pd.KeyRange{ pd.NewKeyRange([]byte("a1"), []byte("a2")), pd.NewKeyRange([]byte("a2"), []byte("a3")), }) @@ -474,101 +473,111 @@ func (suite *httpClientTestSuite) checkAccelerateSchedule(mode mode, client pd.C } func (suite *httpClientTestSuite) TestConfig() { - suite.RunTestInTwoModes(suite.checkConfig) -} - -func (suite *httpClientTestSuite) checkConfig(mode mode, client pd.Client) { re := suite.Require() - env := suite.env[mode] - - config, err := client.GetConfig(env.ctx) + client := suite.client + ctx, cancel := context.WithCancel(suite.ctx) + defer cancel() + config, err := client.GetConfig(ctx) re.NoError(err) - re.Equal(float64(4), config["schedule"].(map[string]interface{})["leader-schedule-limit"]) + re.Equal(float64(4), config["schedule"].(map[string]any)["leader-schedule-limit"]) - newConfig := map[string]interface{}{ + newConfig := map[string]any{ "schedule.leader-schedule-limit": float64(8), } - err = client.SetConfig(env.ctx, newConfig) + err = client.SetConfig(ctx, newConfig) re.NoError(err) - config, err = client.GetConfig(env.ctx) + config, err = client.GetConfig(ctx) re.NoError(err) - re.Equal(float64(8), config["schedule"].(map[string]interface{})["leader-schedule-limit"]) + re.Equal(float64(8), config["schedule"].(map[string]any)["leader-schedule-limit"]) // Test the config with TTL. - newConfig = map[string]interface{}{ + newConfig = map[string]any{ "schedule.leader-schedule-limit": float64(16), } - err = client.SetConfig(env.ctx, newConfig, 5) + err = client.SetConfig(ctx, newConfig, 5) re.NoError(err) - resp, err := env.cluster.GetEtcdClient().Get(env.ctx, sc.TTLConfigPrefix+"/schedule.leader-schedule-limit") + resp, err := suite.cluster.GetEtcdClient().Get(ctx, sc.TTLConfigPrefix+"/schedule.leader-schedule-limit") re.NoError(err) re.Equal([]byte("16"), resp.Kvs[0].Value) -} + // delete the config with TTL. + err = client.SetConfig(ctx, newConfig, 0) + re.NoError(err) + resp, err = suite.cluster.GetEtcdClient().Get(ctx, sc.TTLConfigPrefix+"/schedule.leader-schedule-limit") + re.NoError(err) + re.Empty(resp.Kvs) -func (suite *httpClientTestSuite) TestScheduleConfig() { - suite.RunTestInTwoModes(suite.checkScheduleConfig) + // Test the config with TTL for storing float64 as uint64. + newConfig = map[string]any{ + "schedule.max-pending-peer-count": uint64(math.MaxInt32), + } + err = client.SetConfig(ctx, newConfig, 4) + re.NoError(err) + c := suite.cluster.GetLeaderServer().GetRaftCluster().GetOpts().GetMaxPendingPeerCount() + re.Equal(uint64(math.MaxInt32), c) + + err = client.SetConfig(ctx, newConfig, 0) + re.NoError(err) + resp, err = suite.cluster.GetEtcdClient().Get(ctx, sc.TTLConfigPrefix+"/schedule.max-pending-peer-count") + re.NoError(err) + re.Empty(resp.Kvs) } -func (suite *httpClientTestSuite) checkScheduleConfig(mode mode, client pd.Client) { +func (suite *httpClientTestSuite) TestScheduleConfig() { re := suite.Require() - env := suite.env[mode] - - config, err := client.GetScheduleConfig(env.ctx) + client := suite.client + ctx, cancel := context.WithCancel(suite.ctx) + defer cancel() + config, err := client.GetScheduleConfig(ctx) re.NoError(err) - re.Equal(float64(4), config["leader-schedule-limit"]) + re.Equal(float64(4), config["hot-region-schedule-limit"]) re.Equal(float64(2048), config["region-schedule-limit"]) - config["leader-schedule-limit"] = float64(8) - err = client.SetScheduleConfig(env.ctx, config) + config["hot-region-schedule-limit"] = float64(8) + err = client.SetScheduleConfig(ctx, config) re.NoError(err) - config, err = client.GetScheduleConfig(env.ctx) + config, err = client.GetScheduleConfig(ctx) re.NoError(err) - re.Equal(float64(8), config["leader-schedule-limit"]) + re.Equal(float64(8), config["hot-region-schedule-limit"]) re.Equal(float64(2048), config["region-schedule-limit"]) } func (suite *httpClientTestSuite) TestSchedulers() { - suite.RunTestInTwoModes(suite.checkSchedulers) -} - -func (suite *httpClientTestSuite) checkSchedulers(mode mode, client pd.Client) { re := suite.Require() - env := suite.env[mode] - - schedulers, err := client.GetSchedulers(env.ctx) + client := suite.client + ctx, cancel := context.WithCancel(suite.ctx) + defer cancel() + schedulers, err := client.GetSchedulers(ctx) re.NoError(err) - re.Empty(schedulers) + const schedulerName = "evict-leader-scheduler" + re.NotContains(schedulers, schedulerName) - err = client.CreateScheduler(env.ctx, "evict-leader-scheduler", 1) + err = client.CreateScheduler(ctx, schedulerName, 1) re.NoError(err) - schedulers, err = client.GetSchedulers(env.ctx) + schedulers, err = client.GetSchedulers(ctx) re.NoError(err) - re.Len(schedulers, 1) - err = client.SetSchedulerDelay(env.ctx, "evict-leader-scheduler", 100) + re.Contains(schedulers, schedulerName) + err = client.SetSchedulerDelay(ctx, schedulerName, 100) re.NoError(err) - err = client.SetSchedulerDelay(env.ctx, "not-exist", 100) + err = client.SetSchedulerDelay(ctx, "not-exist", 100) re.ErrorContains(err, "500 Internal Server Error") // TODO: should return friendly error message } func (suite *httpClientTestSuite) TestSetStoreLabels() { - suite.RunTestInTwoModes(suite.checkSetStoreLabels) -} - -func (suite *httpClientTestSuite) checkSetStoreLabels(mode mode, client pd.Client) { re := suite.Require() - env := suite.env[mode] - - resp, err := client.GetStores(env.ctx) + client := suite.client + ctx, cancel := context.WithCancel(suite.ctx) + defer cancel() + resp, err := client.GetStores(ctx) re.NoError(err) setStore := resp.Stores[0] re.Empty(setStore.Store.Labels, nil) storeLabels := map[string]string{ "zone": "zone1", } - err = client.SetStoreLabels(env.ctx, 1, storeLabels) + err = client.SetStoreLabels(ctx, 1, storeLabels) re.NoError(err) - resp, err = client.GetStores(env.ctx) + resp, err = client.GetStores(ctx) re.NoError(err) for _, store := range resp.Stores { if store.Store.ID == setStore.Store.ID { @@ -580,101 +589,96 @@ func (suite *httpClientTestSuite) checkSetStoreLabels(mode mode, client pd.Clien } func (suite *httpClientTestSuite) TestTransferLeader() { - suite.RunTestInTwoModes(suite.checkTransferLeader) -} - -func (suite *httpClientTestSuite) checkTransferLeader(mode mode, client pd.Client) { re := suite.Require() - env := suite.env[mode] - - members, err := client.GetMembers(env.ctx) + client := suite.client + ctx, cancel := context.WithCancel(suite.ctx) + defer cancel() + members, err := client.GetMembers(ctx) re.NoError(err) re.Len(members.Members, 2) - leader, err := client.GetLeader(env.ctx) + leader, err := client.GetLeader(ctx) re.NoError(err) // Transfer leader to another pd for _, member := range members.Members { if member.GetName() != leader.GetName() { - err = client.TransferLeader(env.ctx, member.GetName()) + err = client.TransferLeader(ctx, member.GetName()) re.NoError(err) break } } - newLeader := env.cluster.WaitLeader() + newLeader := suite.cluster.WaitLeader() re.NotEmpty(newLeader) re.NoError(err) re.NotEqual(leader.GetName(), newLeader) // Force to update the members info. testutil.Eventually(re, func() bool { - leader, err = client.GetLeader(env.ctx) + leader, err = client.GetLeader(ctx) re.NoError(err) return newLeader == leader.GetName() }) - members, err = client.GetMembers(env.ctx) + members, err = client.GetMembers(ctx) re.NoError(err) re.Len(members.Members, 2) re.Equal(leader.GetName(), members.Leader.GetName()) } func (suite *httpClientTestSuite) TestVersion() { - suite.RunTestInTwoModes(suite.checkVersion) -} - -func (suite *httpClientTestSuite) checkVersion(mode mode, client pd.Client) { re := suite.Require() - env := suite.env[mode] - - ver, err := client.GetPDVersion(env.ctx) + ver, err := suite.client.GetPDVersion(suite.ctx) re.NoError(err) re.Equal(versioninfo.PDReleaseVersion, ver) } -func (suite *httpClientTestSuite) TestAdmin() { - suite.RunTestInTwoModes(suite.checkAdmin) +func (suite *httpClientTestSuite) TestStatus() { + re := suite.Require() + status, err := suite.client.GetStatus(suite.ctx) + re.NoError(err) + re.Equal(versioninfo.PDReleaseVersion, status.Version) + re.Equal(versioninfo.PDGitHash, status.GitHash) + re.Equal(versioninfo.PDBuildTS, status.BuildTS) + re.GreaterOrEqual(time.Now().Unix(), status.StartTimestamp) } -func (suite *httpClientTestSuite) checkAdmin(mode mode, client pd.Client) { +func (suite *httpClientTestSuite) TestAdmin() { re := suite.Require() - env := suite.env[mode] - - err := client.SetSnapshotRecoveringMark(env.ctx) + client := suite.client + ctx, cancel := context.WithCancel(suite.ctx) + defer cancel() + err := client.SetSnapshotRecoveringMark(ctx) re.NoError(err) - err = client.ResetTS(env.ctx, 123, true) + err = client.ResetTS(ctx, 123, true) re.NoError(err) - err = client.ResetBaseAllocID(env.ctx, 456) + err = client.ResetBaseAllocID(ctx, 456) re.NoError(err) - err = client.DeleteSnapshotRecoveringMark(env.ctx) + err = client.DeleteSnapshotRecoveringMark(ctx) re.NoError(err) } func (suite *httpClientTestSuite) TestWithBackoffer() { - suite.RunTestInTwoModes(suite.checkWithBackoffer) -} - -func (suite *httpClientTestSuite) checkWithBackoffer(mode mode, client pd.Client) { re := suite.Require() - env := suite.env[mode] - + client := suite.client + ctx, cancel := context.WithCancel(suite.ctx) + defer cancel() // Should return with 404 error without backoffer. - rule, err := client.GetPlacementRule(env.ctx, "non-exist-group", "non-exist-rule") + rule, err := client.GetPlacementRule(ctx, "non-exist-group", "non-exist-rule") re.ErrorContains(err, http.StatusText(http.StatusNotFound)) re.Nil(rule) // Should return with 404 error even with an infinite backoffer. rule, err = client. WithBackoffer(retry.InitialBackoffer(100*time.Millisecond, time.Second, 0)). - GetPlacementRule(env.ctx, "non-exist-group", "non-exist-rule") + GetPlacementRule(ctx, "non-exist-group", "non-exist-rule") re.ErrorContains(err, http.StatusText(http.StatusNotFound)) re.Nil(rule) } func (suite *httpClientTestSuite) TestRedirectWithMetrics() { re := suite.Require() - env := suite.env[defaultServiceDiscovery] - cli := setupCli(suite.Require(), env.ctx, env.endpoints) + cli := setupCli(suite.ctx, re, suite.endpoints) + defer cli.Close() sd := cli.GetServiceDiscovery() metricCnt := prometheus.NewCounterVec( @@ -697,7 +701,7 @@ func (suite *httpClientTestSuite) TestRedirectWithMetrics() { re.Equal(float64(2), out.Counter.GetValue()) c.Close() - leader := sd.GetServingAddr() + leader := sd.GetServingURL() httpClient = pd.NewHTTPClientWithRequestChecker(func(req *http.Request) error { // mock leader success. if !strings.Contains(leader, req.Host) { @@ -732,3 +736,82 @@ func (suite *httpClientTestSuite) TestRedirectWithMetrics() { re.Equal(float64(3), out.Counter.GetValue()) c.Close() } + +func (suite *httpClientTestSuite) TestUpdateKeyspaceGCManagementType() { + re := suite.Require() + client := suite.client + ctx, cancel := context.WithCancel(suite.ctx) + defer cancel() + + keyspaceName := "DEFAULT" + expectGCManagementType := "keyspace_level_gc" + + keyspaceSafePointVersionConfig := pd.KeyspaceGCManagementTypeConfig{ + Config: pd.KeyspaceGCManagementType{ + GCManagementType: expectGCManagementType, + }, + } + err := client.UpdateKeyspaceGCManagementType(ctx, keyspaceName, &keyspaceSafePointVersionConfig) + re.NoError(err) + + keyspaceMetaRes, err := client.GetKeyspaceMetaByName(ctx, keyspaceName) + re.NoError(err) + val, ok := keyspaceMetaRes.Config["gc_management_type"] + + // Check it can get expect key and value in keyspace meta config. + re.True(ok) + re.Equal(expectGCManagementType, val) +} + +func (suite *httpClientTestSuite) TestGetHealthStatus() { + re := suite.Require() + healths, err := suite.client.GetHealthStatus(suite.ctx) + re.NoError(err) + re.Len(healths, 2) + sort.Slice(healths, func(i, j int) bool { + return healths[i].Name < healths[j].Name + }) + re.Equal("pd1", healths[0].Name) + re.Equal("pd2", healths[1].Name) + re.True(healths[0].Health && healths[1].Health) +} + +func (suite *httpClientTestSuite) TestRetryOnLeaderChange() { + re := suite.Require() + ctx, cancel := context.WithCancel(suite.ctx) + defer cancel() + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + bo := retry.InitialBackoffer(100*time.Millisecond, time.Second, 0) + client := suite.client.WithBackoffer(bo) + for { + healths, err := client.GetHealthStatus(ctx) + if err != nil && strings.Contains(err.Error(), "context canceled") { + return + } + re.NoError(err) + re.Len(healths, 2) + select { + case <-ctx.Done(): + return + default: + } + } + }() + + leader := suite.cluster.GetLeaderServer() + re.NotNil(leader) + for i := 0; i < 3; i++ { + leader.ResignLeader() + re.NotEmpty(suite.cluster.WaitLeader()) + leader = suite.cluster.GetLeaderServer() + re.NotNil(leader) + } + + // Cancel the context to stop the goroutine. + cancel() + wg.Wait() +} diff --git a/tests/integrations/go.mod b/tests/integrations/go.mod index 31d43cb86f6..7d07b668c80 100644 --- a/tests/integrations/go.mod +++ b/tests/integrations/go.mod @@ -8,26 +8,23 @@ replace ( github.com/tikv/pd/tests/integrations/mcs => ./mcs ) -// reset grpc and protobuf deps in order to import client and server at the same time -replace google.golang.org/grpc v1.59.0 => google.golang.org/grpc v1.26.0 - require ( github.com/DATA-DOG/go-sqlmock v1.5.0 github.com/docker/go-units v0.5.0 github.com/go-sql-driver/mysql v1.7.0 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20220801062533-2eaa32854a6c - github.com/pingcap/kvproto v0.0.0-20231226064240-4f28b82c7860 + github.com/pingcap/kvproto v0.0.0-20240403065636-c699538f7aa1 github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 - github.com/prometheus/client_golang v1.18.0 - github.com/prometheus/client_model v0.5.0 + github.com/prometheus/client_golang v1.19.0 + github.com/prometheus/client_model v0.6.0 github.com/stretchr/testify v1.8.4 github.com/tikv/pd v0.0.0-00010101000000-000000000000 github.com/tikv/pd/client v0.0.0-00010101000000-000000000000 - go.etcd.io/etcd v0.5.0-alpha.5.0.20220915004622-85b640cee793 + go.etcd.io/etcd v0.5.0-alpha.5.0.20240320135013-950cd5fbe6ca go.uber.org/goleak v1.3.0 - go.uber.org/zap v1.26.0 - google.golang.org/grpc v1.59.0 + go.uber.org/zap v1.27.0 + google.golang.org/grpc v1.62.1 gorm.io/driver/mysql v1.4.5 gorm.io/gorm v1.24.3 moul.io/zapgorm2 v1.1.0 @@ -64,9 +61,9 @@ require ( github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect github.com/cloudfoundry/gosigar v1.3.6 // indirect - github.com/coreos/go-semver v0.3.0 // indirect - github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f // indirect - github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect + github.com/coreos/go-semver v0.3.1 // indirect + github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf // indirect + github.com/coreos/pkg v0.0.0-20240122114842-bbd7aa9bf6fb // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/elliotchance/pie/v2 v2.1.0 // indirect @@ -89,30 +86,29 @@ require ( github.com/goccy/go-graphviz v0.0.9 // indirect github.com/goccy/go-json v0.10.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect - github.com/golang-jwt/jwt v3.2.1+incompatible // indirect + github.com/golang-jwt/jwt v3.2.2+incompatible // indirect github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect - github.com/golang/protobuf v1.5.3 // indirect + github.com/golang/protobuf v1.5.4 // indirect github.com/golang/snappy v0.0.4 // indirect github.com/google/btree v1.1.2 // indirect github.com/google/pprof v0.0.0-20211122183932-1daafda22083 // indirect - github.com/google/uuid v1.3.0 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/gorilla/mux v1.7.4 // indirect - github.com/gorilla/websocket v1.4.2 // indirect - github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 // indirect + github.com/gorilla/websocket v1.5.1 // indirect + github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect github.com/gtank/cryptopasta v0.0.0-20170601214702-1f550f6f2f69 // indirect github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d // indirect - github.com/inconshreveable/mousetrap v1.0.0 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect github.com/joho/godotenv v1.4.0 // indirect - github.com/jonboulle/clockwork v0.2.2 // indirect + github.com/jonboulle/clockwork v0.4.0 // indirect github.com/joomcode/errorx v1.0.1 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/cpuid/v2 v2.2.4 // indirect - github.com/konsorten/go-windows-terminal-sequences v1.0.3 // indirect github.com/leodido/go-urn v1.2.4 // indirect github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a // indirect github.com/mailru/easyjson v0.7.6 // indirect @@ -122,29 +118,30 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/oleiade/reflections v1.0.1 // indirect - github.com/opentracing/opentracing-go v1.2.0 // indirect + github.com/opentracing/basictracer-go v1.1.0 + github.com/opentracing/opentracing-go v1.2.0 github.com/pelletier/go-toml/v2 v2.0.8 // indirect github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36 // indirect github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d // indirect github.com/pingcap/errcode v0.3.0 // indirect github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21 // indirect - github.com/pingcap/tidb-dashboard v0.0.0-20240111062855-41f7c8011953 // indirect + github.com/pingcap/tidb-dashboard v0.0.0-20240326110213-9768844ff5d7 // indirect github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b // indirect - github.com/prometheus/common v0.46.0 // indirect - github.com/prometheus/procfs v0.12.0 // indirect + github.com/prometheus/common v0.51.1 // indirect + github.com/prometheus/procfs v0.13.0 // indirect github.com/rs/cors v1.7.0 // indirect github.com/samber/lo v1.37.0 // indirect github.com/sasha-s/go-deadlock v0.2.0 // indirect github.com/shirou/gopsutil/v3 v3.23.3 // indirect github.com/shoenig/go-m1cpu v0.1.5 // indirect github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0 // indirect - github.com/sirupsen/logrus v1.6.0 // indirect - github.com/smallnest/chanx v0.0.0-20221229104322-eb4c998d2072 // indirect - github.com/soheilhy/cmux v0.1.4 // indirect - github.com/spf13/cobra v1.0.0 // indirect + github.com/sirupsen/logrus v1.9.3 // indirect + github.com/smallnest/chanx v1.2.1-0.20240521153536-01121e21ff99 // indirect + github.com/soheilhy/cmux v0.1.5 // indirect + github.com/spf13/cobra v1.8.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/stretchr/objx v0.5.0 // indirect github.com/swaggo/files v0.0.0-20210815190702-a29dd2bc99b2 // indirect @@ -153,40 +150,40 @@ require ( github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965 // indirect github.com/tklauser/go-sysconf v0.3.11 // indirect github.com/tklauser/numcpus v0.6.0 // indirect - github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 // indirect + github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.11 // indirect github.com/unrolled/render v1.0.1 // indirect github.com/urfave/negroni v0.3.0 // indirect github.com/vmihailenco/msgpack/v5 v5.3.5 // indirect github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect - github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect + github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect - go.etcd.io/bbolt v1.3.6 // indirect + go.etcd.io/bbolt v1.3.9 // indirect go.uber.org/atomic v1.10.0 // indirect go.uber.org/dig v1.9.0 // indirect go.uber.org/fx v1.12.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/arch v0.3.0 // indirect - golang.org/x/crypto v0.18.0 // indirect + golang.org/x/crypto v0.21.0 // indirect golang.org/x/exp v0.0.0-20230711005742-c3f37128e5a4 // indirect golang.org/x/image v0.10.0 // indirect - golang.org/x/net v0.20.0 // indirect - golang.org/x/oauth2 v0.16.0 // indirect - golang.org/x/sync v0.4.0 // indirect - golang.org/x/sys v0.16.0 // indirect + golang.org/x/net v0.23.0 // indirect + golang.org/x/oauth2 v0.18.0 // indirect + golang.org/x/sync v0.6.0 // indirect + golang.org/x/sys v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect - golang.org/x/time v0.3.0 // indirect + golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.14.0 // indirect - google.golang.org/appengine v1.6.7 // indirect - google.golang.org/genproto v0.0.0-20231030173426-d783a09b4405 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20231016165738-49dd2c1f3d0b // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 // indirect - google.golang.org/protobuf v1.32.0 // indirect + google.golang.org/appengine v1.6.8 // indirect + google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda // indirect + google.golang.org/protobuf v1.33.0 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect gorm.io/datatypes v1.1.0 // indirect gorm.io/driver/sqlite v1.4.3 // indirect - sigs.k8s.io/yaml v1.2.0 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect ) diff --git a/tests/integrations/go.sum b/tests/integrations/go.sum index 556932a2448..0701b42aea7 100644 --- a/tests/integrations/go.sum +++ b/tests/integrations/go.sum @@ -8,7 +8,6 @@ github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww= github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= -github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI= github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M= @@ -20,13 +19,10 @@ github.com/VividCortex/mysqlerr v1.0.0/go.mod h1:xERx8E4tBhLvpjzdUyQiSfUxeMcATEQ github.com/Xeoncross/go-aesctr-with-hmac v0.0.0-20200623134604-12b17a7ff502 h1:L8IbaI/W6h5Cwgh0n4zGeZpVK78r/jBf9ASurHo9+/o= github.com/Xeoncross/go-aesctr-with-hmac v0.0.0-20200623134604-12b17a7ff502/go.mod h1:pmnBM9bxWSiHvC/gSWunUIyDvGn33EkP2CUjxFKtTTM= github.com/agiledragon/gomonkey/v2 v2.3.1/go.mod h1:ap1AmDzcVOAz1YpeJ3TCzIgstoaWLA6jbbgxfB4w2iY= -github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alvaroloes/enumer v1.1.2/go.mod h1:FxrjvuXoDAx9isTJrv4c+T410zFi0DtXIT0m65DJ+Wo= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/appleboy/gofight/v2 v2.1.2 h1:VOy3jow4vIK8BRQJoC/I9muxyYlJ2yb9ht2hZoS3rf4= github.com/appleboy/gofight/v2 v2.1.2/go.mod h1:frW+U1QZEdDgixycTj4CygQ48yLTUhplt43+Wczp3rw= -github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/aws/aws-sdk-go-v2 v1.17.7 h1:CLSjnhJSTSogvqUGhIC6LqFKATMRexcxLZ0i/Nzk9Eg= github.com/aws/aws-sdk-go-v2 v1.17.7/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw= github.com/aws/aws-sdk-go-v2/config v1.18.19 h1:AqFK6zFNtq4i1EYu+eC7lcKHYnZagMn6SW171la0bGw= @@ -54,11 +50,8 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.18.7/go.mod h1:JuTnSoeePXmMVe9G8Ncjj github.com/aws/smithy-go v1.13.5 h1:hgz0X/DX0dGqTYpGALqXJoRKRj5oQ7150i5FdTePzO8= github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= -github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= -github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/bitly/go-simplejson v0.5.0 h1:6IH+V8/tVMab511d5bn4M7EwGXZf9Hj6i2xSwkNEM+Y= github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= @@ -75,7 +68,6 @@ github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5/go.mod h1:jtAfV github.com/cenkalti/backoff/v4 v4.0.2 h1:JIufpQLbh4DkbQoii76ItQIUFzevQSqOLZca4eamEDs= github.com/cenkalti/backoff/v4 v4.0.2/go.mod h1:eEew/i+1Q6OrCDZh3WiXYv3+nJwBASZ8Bog/87DQnVg= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= @@ -90,32 +82,22 @@ github.com/cloudfoundry/gosigar v1.3.6/go.mod h1:lNWstu5g5gw59O09Y+wsMNFzBSnU8a0 github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa h1:OaNxuTZr7kxeODyLWsRMC+OD03aFUH+mW6r2d+MWa5Y= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= -github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= -github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= -github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM= -github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f h1:JOrtw2xFKzlg+cbHpyrpLDmnN1HqhBfnX7WDiW7eG2c= -github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= -github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f h1:lBNOc5arjvs8E5mO2tbpBpLoyyu8B6e44T7hJy6potg= -github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= +github.com/coreos/go-semver v0.3.1 h1:yi21YpKnrx1gt5R+la8n5WgS0kCrsPp33dmEyHReZr4= +github.com/coreos/go-semver v0.3.1/go.mod h1:irMmmIw/7yzSRPWryHsK7EYSg09caPQL03VsM8rvUec= +github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU= +github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/pkg v0.0.0-20240122114842-bbd7aa9bf6fb h1:GIzvVQ9UkUlOhSDlqmrQAAAUd6R3E+caIisNEyWXvNE= +github.com/coreos/pkg v0.0.0-20240122114842-bbd7aa9bf6fb/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= github.com/corona10/goimagehash v1.0.2 h1:pUfB0LnsJASMPGEZLj7tGY251vF+qLGqOgEP4rUs6kA= github.com/corona10/goimagehash v1.0.2/go.mod h1:/l9umBhvcHQXVtQO1V6Gp1yD20STawkhRnnX0D1bvVI= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= -github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= -github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385 h1:clC1lXBpe2kTj2VHdaIu9ajZQe4kcEY9j0NsnDDBZ3o= @@ -126,7 +108,6 @@ github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymF github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fogleman/gg v1.3.0 h1:/7zJX8F6AaYQc57WQCyN9cAIz+4bCJGO9B+dyW29am8= github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= @@ -149,9 +130,8 @@ github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwv github.com/gin-gonic/gin v1.8.1/go.mod h1:ji8BvRH1azfM+SYow9zQ6SZMvR8qOMZHmsCuWR9tTTk= github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= -github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= -github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= +github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= +github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= @@ -191,12 +171,12 @@ github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/gogo/protobuf v0.0.0-20171007142547-342cbe0a0415/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v0.0.0-20180717141946-636bf0302bc9/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= +github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang-jwt/jwt v3.2.1+incompatible h1:73Z+4BJcrTC+KczS6WvTPvRGOp1WmfEP4Q1lOd9Z/+c= github.com/golang-jwt/jwt v3.2.1+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= +github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY= +github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0ktxqI+Sida1w446QrXBRJ0nee3SNZlA= github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A= @@ -204,25 +184,21 @@ github.com/golang-sql/sqlexp v0.1.0/go.mod h1:J4ad9Vo8ZCWQ2GMrC4UCQy1JpCbwU9m3EO github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/glog v1.1.2 h1:DVjP2PbBOzHyzA+dn3WhHIq4NdVu3Q+pvivFICf/7fo= -github.com/golang/glog v1.1.2/go.mod h1:zR+okUeTbrL6EL3xHUDxZuEtGv04p5shwip1+mL/rLQ= -github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef h1:veQD95Isof8w9/WXiA+pa3tz3fJXkt5B7QaRBrM62gk= -github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v0.0.0-20180814211427-aa810b61a9c7/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU= github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -230,39 +206,35 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20211122183932-1daafda22083 h1:c8EUapQFi+kjzedr4c6WqbwMdmB95+oDBWZ5XFHFYxY= github.com/google/pprof v0.0.0-20211122183932-1daafda22083/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= -github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gorilla/mux v1.7.4 h1:VuZ8uybHlWmqV03+zRzdwKL4tUnIp1MAQtp1mIFE1bc= github.com/gorilla/mux v1.7.4/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= -github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= -github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 h1:z53tR0945TRRQO/fLEVPI6SMv7ZflF0TEaTAoU7tOzg= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= +github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/QY= +github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY= +github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 h1:UH//fgunKIs4JdUbpDl1VZCDaL56wXCB/5+wF6uHfaI= +github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vbp88Yd8NsDy6rZz+RcrMPxvld8= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= -github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= -github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/gtank/cryptopasta v0.0.0-20170601214702-1f550f6f2f69 h1:7xsUJsB2NrdcttQPa7JLEaGzvdbk7KvfrjgHZXOQRo0= github.com/gtank/cryptopasta v0.0.0-20170601214702-1f550f6f2f69/go.mod h1:YLEMZOtU+AZ7dhN9T/IpGhXVGly2bvkJQ+zxj3WeVQo= -github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d h1:uGg2frlt3IcT7kbV6LEp5ONv4vmoO2FW4qSO+my/aoM= github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w= -github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= -github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jackc/chunkreader/v2 v2.0.1 h1:i+RDz65UE+mmpjTfyz0MoVTnzeYxroil2G82ki7MGG8= github.com/jackc/chunkreader/v2 v2.0.1/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk= github.com/jackc/pgconn v1.13.0 h1:3L1XMNV2Zvca/8BYhzcRFS70Lr0WlDg16Di6SFGAbys= @@ -292,9 +264,8 @@ github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHW github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/joho/godotenv v1.4.0 h1:3l4+N6zfMWnkbPEXKng2o2/MR5mSwTrBih4ZEkkz1lg= github.com/joho/godotenv v1.4.0/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= -github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= -github.com/jonboulle/clockwork v0.2.2 h1:UOGuzwb1PwsrDAObMuhUnj0p5ULPj8V/xJ7Kx9qUBdQ= -github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8= +github.com/jonboulle/clockwork v0.4.0 h1:p4Cf1aMWXnXAUh8lVfewRBx1zaTSYKrKMF2g3ST4RZ4= +github.com/jonboulle/clockwork v0.4.0/go.mod h1:xgRqUGwRcjKCO1vbZUEtSLrqKoPSsUpK7fnezOII0kc= github.com/joomcode/errorx v1.0.1 h1:CalpDWz14ZHd68fIqluJasJosAewpz2TFaJALrUxjrk= github.com/joomcode/errorx v1.0.1/go.mod h1:kgco15ekB6cs+4Xjzo7SPeXzx38PbJzBwbnu9qfVNHQ= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -302,23 +273,17 @@ github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFF github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.5/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= -github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= +github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8= -github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= @@ -335,27 +300,21 @@ github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNa github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a h1:N9zuLhTvBSRt0gWSiJswwQ2HqDmtX/ZCDJURnKUt1Ik= github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a/go.mod h1:JKx41uQRwqlTZabZc+kILPrO/3jlKnQ2Z8b7YiVw5cE= -github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA= github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= -github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI= github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= -github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/microsoft/go-mssqldb v0.17.0 h1:Fto83dMZPnYv1Zwx5vHHxpNraeEaUlQ/hhHLgZiaenE= github.com/microsoft/go-mssqldb v0.17.0/go.mod h1:OkoNGhGEs8EZqchVTtochlXruEhEOaO4S0d2sB5aeGQ= github.com/minio/sio v0.3.0 h1:syEFBewzOMOYVzSTFpp1MqpSZk8rUNbz8VIIc+PNzus= github.com/minio/sio v0.3.0/go.mod h1:8b0yPp2avGThviy/+OCJBI6OMpvxoUuiLvE6F1lebhw= -github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -363,7 +322,6 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lN github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/nfnt/resize v0.0.0-20160724205520-891127d8d1b5 h1:BvoENQQU+fZ9uukda/RzCAL/191HHwJA5b13R6diVlY= @@ -371,10 +329,8 @@ github.com/nfnt/resize v0.0.0-20160724205520-891127d8d1b5/go.mod h1:jpp1/29i3P1S github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= -github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/oleiade/reflections v1.0.1 h1:D1XO3LVEYroYskEsoSiGItp9RUxG6jWnCVvrqH0HHQM= github.com/oleiade/reflections v1.0.1/go.mod h1:rdFxbxq4QXVZWj0F+e9jqjDkc7dbp97vkRixKo2JR60= -github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= @@ -382,6 +338,9 @@ github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042 github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/onsi/gomega v1.20.1 h1:PA/3qinGoukvymdIDV8pii6tiZgC8kbmJO6Z5+b002Q= github.com/onsi/gomega v1.20.1/go.mod h1:DtrZpjmvpn2mPm4YWQa0/ALMDj9v4YxLgojwPeREyVo= +github.com/opentracing/basictracer-go v1.1.0 h1:Oa1fTSBvAl8pa3U+IJYqrKm0NALwH9OsgwOqDv4xJW0= +github.com/opentracing/basictracer-go v1.1.0/go.mod h1:V2HZueSJEp879yv285Aap1BS69fQMD+MNP1mRs6mBQc= +github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= github.com/otiai10/copy v1.7.0/go.mod h1:rmRl6QPdJj6EiUqXQ/4Nn2lLXoNQjFCQbbNrxgc/t3U= @@ -390,7 +349,6 @@ github.com/otiai10/curr v1.0.0/go.mod h1:LskTG5wDwr8Rs+nNQ+1LlxRjAtTZZjtJW4rMXl6 github.com/otiai10/mint v1.3.0/go.mod h1:F5AjcsTsWUqX+Na9fpHb52P8pcRX2CI6A3ctIT91xUo= github.com/otiai10/mint v1.3.3/go.mod h1:/yxELlJQ0ufhjUwhshSj+wFjZ78CnZ48/1wtmBH1OTc= github.com/pascaldekloe/name v0.0.0-20180628100202-0fd16699aae1/go.mod h1:eD5JxqMiuNYyFNmyY9rkJ/slN8y59oEu4Ei7F8OoKWQ= -github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo= github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= @@ -410,19 +368,18 @@ github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c/go.mod h1:X2r9ue github.com/pingcap/failpoint v0.0.0-20220801062533-2eaa32854a6c h1:CgbKAHto5CQgWM9fSBIvaxsJHuGP0uM74HXtv3MyyGQ= github.com/pingcap/failpoint v0.0.0-20220801062533-2eaa32854a6c/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= -github.com/pingcap/kvproto v0.0.0-20231226064240-4f28b82c7860 h1:yv9mYJJCKv2mKcW2nEYUgfRkfeyapRWB3GktKEE4sv8= -github.com/pingcap/kvproto v0.0.0-20231226064240-4f28b82c7860/go.mod h1:rXxWk2UnwfUhLXha1jxRWPADw9eMZGWEWCg92Tgmb/8= +github.com/pingcap/kvproto v0.0.0-20240403065636-c699538f7aa1 h1:vDWWJKU6ztczn24XixahtLwcnJ15DOtSRIRM3jVtZNU= +github.com/pingcap/kvproto v0.0.0-20240403065636-c699538f7aa1/go.mod h1:rXxWk2UnwfUhLXha1jxRWPADw9eMZGWEWCg92Tgmb/8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8IDP+SZrdhV1Kibl9KrHxJ9eciw= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21 h1:QV6jqlfOkh8hqvEAgwBZa+4bSgO0EeKC7s5c6Luam2I= github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21/go.mod h1:QYnjfA95ZaMefyl1NO8oPtKeb8pYUdnDVhQgf+qdpjM= -github.com/pingcap/tidb-dashboard v0.0.0-20240111062855-41f7c8011953 h1:vY/bY5vkSvvuXB1030AUmy0LFhuEA53ryVdF/bTbFXU= -github.com/pingcap/tidb-dashboard v0.0.0-20240111062855-41f7c8011953/go.mod h1:ucZBRz52icb23T/5Z4CsuUHmarYiin7p2MeiVBe+o8c= +github.com/pingcap/tidb-dashboard v0.0.0-20240326110213-9768844ff5d7 h1:eFu98FbfJB7PKWOtkaV6YNXXJWqDhczQX56j/iucgU4= +github.com/pingcap/tidb-dashboard v0.0.0-20240326110213-9768844ff5d7/go.mod h1:ucZBRz52icb23T/5Z4CsuUHmarYiin7p2MeiVBe+o8c= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e h1:FBaTXU8C3xgt/drM58VHxojHo/QoG1oPsgWTGvaSpO4= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= -github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -431,28 +388,15 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b h1:0LFwY6Q3gMACTjAbMZBjXAqTOzOwFaj2Ld6cjeQ7Rig= github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= -github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= -github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= -github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk= -github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlkOQntgjkJWKrN5txjA= -github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= +github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= -github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= -github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= -github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.46.0 h1:doXzt5ybi1HBKpsZOL0sSkaNHJJqkyfEWZGGqqScV0Y= -github.com/prometheus/common v0.46.0/go.mod h1:Tp0qkxpb9Jsg54QMe+EAmqXkSV7Evdy1BTn+g2pa/hQ= -github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= -github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= -github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= -github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= +github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos= +github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8= +github.com/prometheus/common v0.51.1 h1:eIjN50Bwglz6a/c3hAgSMcofL3nD+nFQkV6Dd4DsQCw= +github.com/prometheus/common v0.51.1/go.mod h1:lrWtQx+iDfn2mbH5GUzlH9TSHyfZpHkSiG1W7y3sF2Q= +github.com/prometheus/procfs v0.13.0 h1:GqzLlQyfsPbaEHaQkO7tbDlriv/4o5Hudv6OXHGKX7o= +github.com/prometheus/procfs v0.13.0/go.mod h1:cd4PFCR54QLnGKPaKGA6l+cfuNXtht43ZKY6tow0Y1g= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= @@ -462,6 +406,7 @@ github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncj github.com/rs/cors v1.7.0 h1:+88SsELBHx5r+hZ8TCkggzSstaWNbDvThkVK8H6f9ik= github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/samber/lo v1.37.0 h1:XjVcB8g6tgUp8rsPsJ2CvhClfImrpL04YpQHXeHPhRw= github.com/samber/lo v1.37.0/go.mod h1:9vaz2O4o8oOnK23pd2TrXufcbdbJIa3b6cstBWKpopA= github.com/sasha-s/go-deadlock v0.2.0 h1:lMqc+fUb7RrFS3gQLtoQsJ7/6TV/pAIFvBsqX73DK8Y= @@ -478,28 +423,20 @@ github.com/shoenig/test v0.6.3/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnj github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0 h1:mj/nMDAwTBiaCqMEs4cYCqF7pO6Np7vhy1D1wcQGz+E= github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0/go.mod h1:919LwcH0M7/W4fcZ0/jy0qGght1GIhqyS/EgWGH2j5Q= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= -github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= -github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I= -github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= -github.com/smallnest/chanx v0.0.0-20221229104322-eb4c998d2072 h1:Txo4SXVJq/OgEjwgkWoxkMoTjGlcrgsQE/XSghjmu0w= -github.com/smallnest/chanx v0.0.0-20221229104322-eb4c998d2072/go.mod h1:+4nWMF0+CqEcU74SnX2NxaGqZ8zX4pcQ8Jcs77DbX5A= +github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/smallnest/chanx v1.2.1-0.20240521153536-01121e21ff99 h1:fmanhZtn5RKRljCjX46H+Q9/PECsHbflXm0RdrnK9e4= +github.com/smallnest/chanx v1.2.1-0.20240521153536-01121e21ff99/go.mod h1:+4nWMF0+CqEcU74SnX2NxaGqZ8zX4pcQ8Jcs77DbX5A= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= -github.com/soheilhy/cmux v0.1.4 h1:0HKaf1o97UwFjHH9o5XsHUOF+tqmdA7KEzXLpiyaw0E= -github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= -github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= -github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= -github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= -github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= -github.com/spf13/cobra v1.0.0 h1:6m/oheQuQ13N9ks4hubMG6BnvwOeaJrqSPLahSnczz8= -github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE= -github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= -github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= -github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js= +github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0= +github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= +github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= @@ -540,12 +477,10 @@ github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7Am github.com/tklauser/numcpus v0.3.0/go.mod h1:yFGUr7TUHQRAhyqBcEg0Ge34zDBAsIvJJcyE6boqnA8= github.com/tklauser/numcpus v0.6.0 h1:kebhY2Qt+3U6RNK7UqpYNA+tJ23IBEGKkB7JQBfDYms= github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4= -github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= -github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 h1:j6JEOq5QWFker+d7mFQYOhjTZonQ7YkLTHm56dbn+yM= -github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 h1:6fotK7otjonDflCTK0BCfls4SPy3NcCVb5dqqmbRknE= +github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75/go.mod h1:KO6IkyS8Y3j8OdNO85qEYBsRPuteD+YciPomcXdrMnk= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= -github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M= github.com/ugorji/go/codec v0.0.0-20181022190402-e5e69e061d4f/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= @@ -555,7 +490,6 @@ github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4d github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/unrolled/render v1.0.1 h1:VDDnQQVfBMsOsp3VaCJszSO0nkBIVEYoPWeRThk9spY= github.com/unrolled/render v1.0.1/go.mod h1:gN9T0NhL4Bfbwu8ann7Ry/TGHYfosul+J0obPf6NBdM= -github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI= github.com/urfave/negroni v0.3.0 h1:PaXOb61mWeZJxc1Ji2xJjpVg9QfPo0rrB+lHyBxGNSU= github.com/urfave/negroni v0.3.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4= @@ -563,22 +497,19 @@ github.com/vmihailenco/msgpack/v5 v5.3.5 h1:5gO0H1iULLWGhs2H5tbAHIZTV8/cYafcFOr9 github.com/vmihailenco/msgpack/v5 v5.3.5/go.mod h1:7xyJ9e+0+9SaZT0Wt1RGleJXzli6Q/V5KbhBonMG9jc= github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= -github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= -github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= -github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510 h1:S2dVYn90KE98chqDkyE9Z4N61UnQd+KOfgp5Iu53llk= +github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg= github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= -go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= -go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= -go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= -go.etcd.io/etcd v0.5.0-alpha.5.0.20220915004622-85b640cee793 h1:fqmtdYQlwZ/vKWSz5amW+a4cnjg23ojz5iL7rjf08Wg= -go.etcd.io/etcd v0.5.0-alpha.5.0.20220915004622-85b640cee793/go.mod h1:eBhtbxXP1qpW0F6+WxoJ64DM1Mrfx46PHtVxEdkLe0I= +go.etcd.io/bbolt v1.3.9 h1:8x7aARPEXiXbHmtUwAIv7eV2fQFHrLLavdiJ3uzJXoI= +go.etcd.io/bbolt v1.3.9/go.mod h1:zaO32+Ti0PK1ivdPtgMESzuzL2VPoIG1PCQNvOdo/dE= +go.etcd.io/etcd v0.5.0-alpha.5.0.20240320135013-950cd5fbe6ca h1:LCc0GAhfJ+qDqnUbE7ybQ0mTz1dNRn2iiM6e183p/5E= +go.etcd.io/etcd v0.5.0-alpha.5.0.20240320135013-950cd5fbe6ca/go.mod h1:1AyK+XVcIwjbjw5EYrhT+IiMYSgRZTohGb2ceZ0/US8= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= -go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= @@ -603,16 +534,15 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= -go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.12.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ= +go.uber.org/zap v1.18.1/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= -go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo= -go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= -golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190513172903-22d7a77e9e5f/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -621,9 +551,8 @@ golang.org/x/crypto v0.0.0-20200204104054-c9f3fb736b72/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc= -golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= +golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= +golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20230711005742-c3f37128e5a4 h1:QLureRX3moex6NVu/Lr4MGakp9FdA7sBHGBmvRW7NaM= golang.org/x/exp v0.0.0-20230711005742-c3f37128e5a4/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc= @@ -634,6 +563,7 @@ golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTk golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5 h1:2M3HP5CCK1Si9FQhwnzYhXdG6DXeebvUHFpre8QvbyI= golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= @@ -649,32 +579,31 @@ golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181005035420-146acd28ed58/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM= golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211123203042-d83791d6bcd9/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= -golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= +golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= +golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.16.0 h1:aDkGMBSYxElaoP81NpoUoz2oo2R2wHdZpGToUxfyQrQ= -golang.org/x/oauth2 v0.16.0/go.mod h1:hqZ+0LWXsiVoZpeld6jVt06P3adbS2Uu911W1SsJv2o= +golang.org/x/oauth2 v0.18.0 h1:09qnuIAgzdx1XplqJvW6CQqMCtGZykZWcXzPMPUusvI= +golang.org/x/oauth2 v0.18.0/go.mod h1:Wf7knwG0MPoWIMMBgFlEaSUDaKskp0dCfrlJRJXbBi8= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -684,27 +613,25 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ= -golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181228144115-9a3f9b0469bb/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -712,15 +639,16 @@ golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210816074244-15123e1e1f71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= -golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= @@ -729,16 +657,15 @@ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= -golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= @@ -768,41 +695,42 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= -google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM= +google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20181004005441-af9cb2a35e7f/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20231030173426-d783a09b4405 h1:I6WNifs6pF9tNdSob2W24JtyxIYjzFB9qDlpUC76q+U= -google.golang.org/genproto v0.0.0-20231030173426-d783a09b4405/go.mod h1:3WDQMjmJk36UQhjQ89emUzb1mdaHcPeeAh4SCBKznB4= -google.golang.org/genproto/googleapis/api v0.0.0-20231016165738-49dd2c1f3d0b h1:CIC2YMXmIhYw6evmhPxBKJ4fmLbOFtXQN/GV3XOZR8k= -google.golang.org/genproto/googleapis/api v0.0.0-20231016165738-49dd2c1f3d0b/go.mod h1:IBQ646DjkDkvUIsVq/cc03FUFQ9wbZu7yE396YcL870= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 h1:Jyp0Hsi0bmHXG6k9eATXoYtjd6e2UzZ1SCn/wIupY14= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17/go.mod h1:oQ5rr10WTTMvP4A36n8JpR1OrO1BEiV4f78CneXZxkA= +google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda h1:wu/KJm9KJwpfHWhkkZGohVC6KRrc1oJNr4jwtQMOQXw= +google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda/go.mod h1:g2LLCvCeCSir/JJSWosk19BR4NVxGqHUC6rxIRsd7Aw= +google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda h1:b6F6WIV4xHHD0FA4oIyzU6mHWg2WI2X1RBehwa5QN38= +google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda/go.mod h1:AHcE/gZH76Bk/ROZhQphlRoWo5xKDEtz3eVEO1LfA8c= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda h1:LI5DOvAxUPMv/50agcLLoo+AdWc1irS9Rzz4vPuD1V4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.24.0/go.mod h1:XDChyiUovWa60DnaeDeZmSW86xtLtjtZbwvSiRnRtcA= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= -google.golang.org/grpc v1.26.0 h1:2dTRdpdFEEhJYQD8EMLB61nnrzSCTbG38PhqdhvOltg= -google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= +google.golang.org/grpc v1.62.1 h1:B4n+nfKzOICUXMgyrNd19h/I9oH0L1pizfk1d4zSgTk= +google.golang.org/grpc v1.62.1/go.mod h1:IWTG0VlJLCh1SkC58F7np9ka9mx/WNkjl4PGJaiq+QE= +google.golang.org/grpc/examples v0.0.0-20231221225426-4f03f3ff32c9 h1:ATnmU8nL2NfIyTSiBvJVDIDIr3qBmeW+c7z7XU21eWs= +google.golang.org/grpc/examples v0.0.0-20231221225426-4f03f3ff32c9/go.mod h1:j5uROIAAgi3YmtiETMt1LW0d/lHqQ7wwrIY4uGRXLQ4= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= -google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= -gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/go-playground/assert.v1 v1.2.1/go.mod h1:9RXL0bg/zibRAgZUYszZSwO/z8Y/a8bDuhia5mkpMnE= @@ -810,10 +738,8 @@ gopkg.in/go-playground/validator.v8 v8.18.2/go.mod h1:RX2a/7Ha8BgOhfk7j780h4/u/R gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc= gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc= -gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= -gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= @@ -847,6 +773,5 @@ honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt moul.io/zapgorm2 v1.1.0 h1:qwAlMBYf+qJkJ7PAzJl4oCe6eS6QGiKAXUPeis0+RBE= moul.io/zapgorm2 v1.1.0/go.mod h1:emRfKjNqSzVj5lcgasBdovIXY1jSOwFz2GQZn1Rddks= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= -sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= -sigs.k8s.io/yaml v1.2.0 h1:kr/MCeFWJWTwyaHoR9c8EjH9OumOmoF9YGiZd7lFm/Q= -sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/tests/integrations/mcs/discovery/register_test.go b/tests/integrations/mcs/discovery/register_test.go index 1b61a264232..69d53463818 100644 --- a/tests/integrations/mcs/discovery/register_test.go +++ b/tests/integrations/mcs/discovery/register_test.go @@ -124,9 +124,15 @@ func (suite *serverRegisterTestSuite) checkServerPrimaryChange(serviceName strin re.Empty(primary) serverMap := make(map[string]bs.Server) + var cleanups []func() + defer func() { + for _, cleanup := range cleanups { + cleanup() + } + }() for i := 0; i < serverNum; i++ { s, cleanup := suite.addServer(serviceName) - defer cleanup() + cleanups = append(cleanups, cleanup) serverMap[s.GetAddr()] = s } diff --git a/tests/integrations/mcs/keyspace/tso_keyspace_group_test.go b/tests/integrations/mcs/keyspace/tso_keyspace_group_test.go index ff9769cd1a5..7c95b99bcc7 100644 --- a/tests/integrations/mcs/keyspace/tso_keyspace_group_test.go +++ b/tests/integrations/mcs/keyspace/tso_keyspace_group_test.go @@ -47,7 +47,6 @@ type keyspaceGroupTestSuite struct { cluster *tests.TestCluster server *tests.TestServer backendEndpoints string - dialClient *http.Client } func TestKeyspaceGroupTestSuite(t *testing.T) { @@ -67,11 +66,6 @@ func (suite *keyspaceGroupTestSuite) SetupTest() { suite.server = cluster.GetLeaderServer() re.NoError(suite.server.BootstrapCluster()) suite.backendEndpoints = suite.server.GetAddr() - suite.dialClient = &http.Client{ - Transport: &http.Transport{ - DisableKeepAlives: true, - }, - } suite.cleanupFunc = func() { cancel() } @@ -88,9 +82,15 @@ func (suite *keyspaceGroupTestSuite) TestAllocNodesUpdate() { re := suite.Require() // add three nodes. nodes := make(map[string]bs.Server) + var cleanups []func() + defer func() { + for _, cleanup := range cleanups { + cleanup() + } + }() for i := 0; i < utils.DefaultKeyspaceGroupReplicaCount+1; i++ { s, cleanup := tests.StartSingleTSOTestServer(suite.ctx, re, suite.backendEndpoints, tempurl.Alloc()) - defer cleanup() + cleanups = append(cleanups, cleanup) nodes[s.GetAddr()] = s } tests.WaitForPrimaryServing(re, nodes) @@ -138,9 +138,15 @@ func (suite *keyspaceGroupTestSuite) TestAllocNodesUpdate() { func (suite *keyspaceGroupTestSuite) TestAllocReplica() { re := suite.Require() nodes := make(map[string]bs.Server) + var cleanups []func() + defer func() { + for _, cleanup := range cleanups { + cleanup() + } + }() for i := 0; i < utils.DefaultKeyspaceGroupReplicaCount; i++ { s, cleanup := tests.StartSingleTSOTestServer(suite.ctx, re, suite.backendEndpoints, tempurl.Alloc()) - defer cleanup() + cleanups = append(cleanups, cleanup) nodes[s.GetAddr()] = s } tests.WaitForPrimaryServing(re, nodes) @@ -232,9 +238,15 @@ func (suite *keyspaceGroupTestSuite) TestSetNodes() { re := suite.Require() nodes := make(map[string]bs.Server) nodesList := []string{} + var cleanups []func() + defer func() { + for _, cleanup := range cleanups { + cleanup() + } + }() for i := 0; i < utils.DefaultKeyspaceGroupReplicaCount; i++ { s, cleanup := tests.StartSingleTSOTestServer(suite.ctx, re, suite.backendEndpoints, tempurl.Alloc()) - defer cleanup() + cleanups = append(cleanups, cleanup) nodes[s.GetAddr()] = s nodesList = append(nodesList, s.GetAddr()) } @@ -279,7 +291,7 @@ func (suite *keyspaceGroupTestSuite) TestSetNodes() { Nodes: []string{nodesList[0]}, } _, code = suite.trySetNodesForKeyspaceGroup(re, id, params) - re.Equal(http.StatusBadRequest, code) + re.Equal(http.StatusOK, code) // the keyspace group is not exist. id = 2 @@ -293,9 +305,15 @@ func (suite *keyspaceGroupTestSuite) TestSetNodes() { func (suite *keyspaceGroupTestSuite) TestDefaultKeyspaceGroup() { re := suite.Require() nodes := make(map[string]bs.Server) + var cleanups []func() + defer func() { + for _, cleanup := range cleanups { + cleanup() + } + }() for i := 0; i < utils.DefaultKeyspaceGroupReplicaCount; i++ { s, cleanup := tests.StartSingleTSOTestServer(suite.ctx, re, suite.backendEndpoints, tempurl.Alloc()) - defer cleanup() + cleanups = append(cleanups, cleanup) nodes[s.GetAddr()] = s } tests.WaitForPrimaryServing(re, nodes) @@ -317,12 +335,108 @@ func (suite *keyspaceGroupTestSuite) TestDefaultKeyspaceGroup() { } } +func (suite *keyspaceGroupTestSuite) TestAllocNodes() { + re := suite.Require() + // add three nodes. + nodes := make(map[string]bs.Server) + var cleanups []func() + defer func() { + for _, cleanup := range cleanups { + cleanup() + } + }() + for i := 0; i < utils.DefaultKeyspaceGroupReplicaCount+1; i++ { + s, cleanup := tests.StartSingleTSOTestServer(suite.ctx, re, suite.backendEndpoints, tempurl.Alloc()) + cleanups = append(cleanups, cleanup) + nodes[s.GetAddr()] = s + } + tests.WaitForPrimaryServing(re, nodes) + + // create a keyspace group. + kgs := &handlers.CreateKeyspaceGroupParams{KeyspaceGroups: []*endpoint.KeyspaceGroup{ + { + ID: uint32(1), + UserKind: endpoint.Standard.String(), + }, + }} + code := suite.tryCreateKeyspaceGroup(re, kgs) + re.Equal(http.StatusOK, code) + + // alloc nodes for the keyspace group + var kg *endpoint.KeyspaceGroup + testutil.Eventually(re, func() bool { + kg, code = suite.tryGetKeyspaceGroup(re, utils.DefaultKeyspaceGroupID) + return code == http.StatusOK && kg != nil && len(kg.Members) == utils.DefaultKeyspaceGroupReplicaCount + }) + stopNode := kg.Members[0].Address + // close one of members + nodes[stopNode].Close() + + // the member list will be updated + testutil.Eventually(re, func() bool { + kg, code = suite.tryGetKeyspaceGroup(re, utils.DefaultKeyspaceGroupID) + for _, member := range kg.Members { + if member.Address == stopNode { + return false + } + } + return code == http.StatusOK && kg != nil && len(kg.Members) == utils.DefaultKeyspaceGroupReplicaCount + }) +} + +func (suite *keyspaceGroupTestSuite) TestAllocOneNode() { + re := suite.Require() + // add one tso server + nodes := make(map[string]bs.Server) + oldTSOServer, cleanupOldTSOserver := tests.StartSingleTSOTestServer(suite.ctx, re, suite.backendEndpoints, tempurl.Alloc()) + defer cleanupOldTSOserver() + nodes[oldTSOServer.GetAddr()] = oldTSOServer + + tests.WaitForPrimaryServing(re, nodes) + + // create a keyspace group. + kgs := &handlers.CreateKeyspaceGroupParams{KeyspaceGroups: []*endpoint.KeyspaceGroup{ + { + ID: uint32(1), + UserKind: endpoint.Standard.String(), + }, + }} + code := suite.tryCreateKeyspaceGroup(re, kgs) + re.Equal(http.StatusOK, code) + + // alloc nodes for the keyspace group + var kg *endpoint.KeyspaceGroup + testutil.Eventually(re, func() bool { + kg, code = suite.tryGetKeyspaceGroup(re, utils.DefaultKeyspaceGroupID) + return code == http.StatusOK && kg != nil && len(kg.Members) == 1 + }) + stopNode := kg.Members[0].Address + // close old tso server + nodes[stopNode].Close() + + // create a new tso server + newTSOServer, cleanupNewTSOServer := tests.StartSingleTSOTestServer(suite.ctx, re, suite.backendEndpoints, tempurl.Alloc()) + defer cleanupNewTSOServer() + nodes[newTSOServer.GetAddr()] = newTSOServer + + tests.WaitForPrimaryServing(re, nodes) + + // the member list will be updated + testutil.Eventually(re, func() bool { + kg, code = suite.tryGetKeyspaceGroup(re, utils.DefaultKeyspaceGroupID) + if len(kg.Members) != 0 && kg.Members[0].Address == stopNode { + return false + } + return code == http.StatusOK && kg != nil && len(kg.Members) == 1 + }) +} + func (suite *keyspaceGroupTestSuite) tryAllocNodesForKeyspaceGroup(re *require.Assertions, id int, request *handlers.AllocNodesForKeyspaceGroupParams) ([]endpoint.KeyspaceGroupMember, int) { data, err := json.Marshal(request) re.NoError(err) httpReq, err := http.NewRequest(http.MethodPost, suite.server.GetAddr()+keyspaceGroupsPrefix+fmt.Sprintf("/%d/alloc", id), bytes.NewBuffer(data)) re.NoError(err) - resp, err := suite.dialClient.Do(httpReq) + resp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer resp.Body.Close() nodes := make([]endpoint.KeyspaceGroupMember, 0) @@ -339,7 +453,7 @@ func (suite *keyspaceGroupTestSuite) tryCreateKeyspaceGroup(re *require.Assertio re.NoError(err) httpReq, err := http.NewRequest(http.MethodPost, suite.server.GetAddr()+keyspaceGroupsPrefix, bytes.NewBuffer(data)) re.NoError(err) - resp, err := suite.dialClient.Do(httpReq) + resp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer resp.Body.Close() return resp.StatusCode @@ -348,7 +462,7 @@ func (suite *keyspaceGroupTestSuite) tryCreateKeyspaceGroup(re *require.Assertio func (suite *keyspaceGroupTestSuite) tryGetKeyspaceGroup(re *require.Assertions, id uint32) (*endpoint.KeyspaceGroup, int) { httpReq, err := http.NewRequest(http.MethodGet, suite.server.GetAddr()+keyspaceGroupsPrefix+fmt.Sprintf("/%d", id), http.NoBody) re.NoError(err) - resp, err := suite.dialClient.Do(httpReq) + resp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer resp.Body.Close() kg := &endpoint.KeyspaceGroup{} @@ -365,7 +479,7 @@ func (suite *keyspaceGroupTestSuite) trySetNodesForKeyspaceGroup(re *require.Ass re.NoError(err) httpReq, err := http.NewRequest(http.MethodPatch, suite.server.GetAddr()+keyspaceGroupsPrefix+fmt.Sprintf("/%d", id), bytes.NewBuffer(data)) re.NoError(err) - resp, err := suite.dialClient.Do(httpReq) + resp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer resp.Body.Close() if resp.StatusCode != http.StatusOK { diff --git a/tests/integrations/mcs/members/member_test.go b/tests/integrations/mcs/members/member_test.go index 41cd30bb5b7..d650d1ded4f 100644 --- a/tests/integrations/mcs/members/member_test.go +++ b/tests/integrations/mcs/members/member_test.go @@ -34,7 +34,7 @@ type memberTestSuite struct { cluster *tests.TestCluster server *tests.TestServer backendEndpoints string - dialClient pdClient.Client + pdClient pdClient.Client } func TestMemberTestSuite(t *testing.T) { @@ -53,7 +53,7 @@ func (suite *memberTestSuite) SetupTest() { suite.server = cluster.GetLeaderServer() re.NoError(suite.server.BootstrapCluster()) suite.backendEndpoints = suite.server.GetAddr() - suite.dialClient = pdClient.NewClient("mcs-member-test", []string{suite.server.GetAddr()}) + suite.pdClient = pdClient.NewClient("mcs-member-test", []string{suite.server.GetAddr()}) // TSO nodes := make(map[string]bs.Server) @@ -86,17 +86,30 @@ func (suite *memberTestSuite) TearDownTest() { for _, cleanup := range suite.cleanupFunc { cleanup() } - suite.dialClient.Close() + if suite.pdClient != nil { + suite.pdClient.Close() + } suite.cluster.Destroy() } func (suite *memberTestSuite) TestMembers() { re := suite.Require() - members, err := suite.dialClient.GetMicroServiceMembers(suite.ctx, "tso") + members, err := suite.pdClient.GetMicroServiceMembers(suite.ctx, "tso") re.NoError(err) re.Len(members, utils.DefaultKeyspaceGroupReplicaCount) - members, err = suite.dialClient.GetMicroServiceMembers(suite.ctx, "scheduling") + members, err = suite.pdClient.GetMicroServiceMembers(suite.ctx, "scheduling") re.NoError(err) re.Len(members, 3) } + +func (suite *memberTestSuite) TestPrimary() { + re := suite.Require() + primary, err := suite.pdClient.GetMicroServicePrimary(suite.ctx, "tso") + re.NoError(err) + re.NotEmpty(primary) + + primary, err = suite.pdClient.GetMicroServicePrimary(suite.ctx, "scheduling") + re.NoError(err) + re.NotEmpty(primary) +} diff --git a/tests/integrations/mcs/resourcemanager/resource_manager_test.go b/tests/integrations/mcs/resourcemanager/resource_manager_test.go index e83e9db23cd..ab7cd5321ad 100644 --- a/tests/integrations/mcs/resourcemanager/resource_manager_test.go +++ b/tests/integrations/mcs/resourcemanager/resource_manager_test.go @@ -34,6 +34,7 @@ import ( "github.com/tikv/pd/client/resource_group/controller" "github.com/tikv/pd/pkg/mcs/resourcemanager/server" "github.com/tikv/pd/pkg/utils/testutil" + "github.com/tikv/pd/pkg/utils/typeutil" "github.com/tikv/pd/tests" "go.uber.org/goleak" @@ -77,7 +78,7 @@ func (suite *resourceManagerClientTestSuite) SetupSuite() { suite.client, err = pd.NewClientWithContext(suite.ctx, suite.cluster.GetConfig().GetClientURLs(), pd.SecurityOption{}) re.NoError(err) leader := suite.cluster.GetServer(suite.cluster.WaitLeader()) - suite.waitLeader(re, suite.client, leader.GetAddr()) + waitLeader(re, suite.client, leader.GetAddr()) suite.initGroups = []*rmpb.ResourceGroup{ { @@ -134,13 +135,13 @@ func (suite *resourceManagerClientTestSuite) SetupSuite() { } } -func (suite *resourceManagerClientTestSuite) waitLeader(re *require.Assertions, cli pd.Client, leaderAddr string) { +func waitLeader(re *require.Assertions, cli pd.Client, leaderAddr string) { innerCli, ok := cli.(interface{ GetServiceDiscovery() pd.ServiceDiscovery }) re.True(ok) re.NotNil(innerCli) testutil.Eventually(re, func() bool { innerCli.GetServiceDiscovery().ScheduleCheckMemberChanged() - return innerCli.GetServiceDiscovery().GetServingAddr() == leaderAddr + return innerCli.GetServiceDiscovery().GetServingURL() == leaderAddr }) } @@ -176,7 +177,7 @@ func (suite *resourceManagerClientTestSuite) resignAndWaitLeader(re *require.Ass re.NoError(suite.cluster.ResignLeader()) newLeader := suite.cluster.GetServer(suite.cluster.WaitLeader()) re.NotNil(newLeader) - suite.waitLeader(re, suite.client, newLeader.GetAddr()) + waitLeader(re, suite.client, newLeader.GetAddr()) } func (suite *resourceManagerClientTestSuite) TestWatchResourceGroup() { @@ -348,7 +349,7 @@ type tokenConsumptionPerSecond struct { waitDuration time.Duration } -func (t tokenConsumptionPerSecond) makeReadRequest() *controller.TestRequestInfo { +func (tokenConsumptionPerSecond) makeReadRequest() *controller.TestRequestInfo { return controller.NewTestRequestInfo(false, 0, 0) } @@ -364,7 +365,7 @@ func (t tokenConsumptionPerSecond) makeReadResponse() *controller.TestResponseIn ) } -func (t tokenConsumptionPerSecond) makeWriteResponse() *controller.TestResponseInfo { +func (tokenConsumptionPerSecond) makeWriteResponse() *controller.TestResponseInfo { return controller.NewTestResponseInfo( 0, time.Duration(0), @@ -679,7 +680,7 @@ func (suite *resourceManagerClientTestSuite) TestResourcePenalty() { _, penalty, _, _, err = c.OnRequestWait(suite.ctx, resourceGroupName, req2) re.NoError(err) re.Equal(60.0, penalty.WriteBytes) - re.InEpsilon(penalty.TotalCpuTimeMs, 10.0/1000.0/1000.0, 1e-6) + re.InEpsilon(10.0/1000.0/1000.0, penalty.TotalCpuTimeMs, 1e-6) _, err = c.OnResponse(resourceGroupName, req2, resp2) re.NoError(err) @@ -705,7 +706,6 @@ func (suite *resourceManagerClientTestSuite) TestResourcePenalty() { c.Stop() } -// nolint:gosec func (suite *resourceManagerClientTestSuite) TestAcquireTokenBucket() { re := suite.Require() cli := suite.client @@ -957,9 +957,9 @@ func (suite *resourceManagerClientTestSuite) TestBasicResourceGroupCURD() { } createJSON, err := json.Marshal(group) re.NoError(err) - resp, err := http.Post(getAddr(i)+"/resource-manager/api/v1/config/group", "application/json", strings.NewReader(string(createJSON))) + resp, err := tests.TestDialClient.Post(getAddr(i)+"/resource-manager/api/v1/config/group", "application/json", strings.NewReader(string(createJSON))) re.NoError(err) - defer resp.Body.Close() + resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) if tcase.isNewGroup { finalNum++ @@ -974,7 +974,7 @@ func (suite *resourceManagerClientTestSuite) TestBasicResourceGroupCURD() { req.Header.Set("Content-Type", "application/json") resp, err = http.DefaultClient.Do(req) re.NoError(err) - defer resp.Body.Close() + resp.Body.Close() if tcase.modifySuccess { re.Equal(http.StatusOK, resp.StatusCode) } else { @@ -982,11 +982,11 @@ func (suite *resourceManagerClientTestSuite) TestBasicResourceGroupCURD() { } // Get Resource Group - resp, err = http.Get(getAddr(i) + "/resource-manager/api/v1/config/group/" + tcase.name) + resp, err = tests.TestDialClient.Get(getAddr(i) + "/resource-manager/api/v1/config/group/" + tcase.name) re.NoError(err) - defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) respString, err := io.ReadAll(resp.Body) + resp.Body.Close() re.NoError(err) re.Contains(string(respString), tcase.name) if tcase.modifySuccess { @@ -995,11 +995,11 @@ func (suite *resourceManagerClientTestSuite) TestBasicResourceGroupCURD() { // Last one, Check list and delete all resource groups if i == len(testCasesSet1)-1 { - resp, err := http.Get(getAddr(i) + "/resource-manager/api/v1/config/groups") + resp, err := tests.TestDialClient.Get(getAddr(i) + "/resource-manager/api/v1/config/groups") re.NoError(err) - defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) respString, err := io.ReadAll(resp.Body) + resp.Body.Close() re.NoError(err) groups := make([]*server.ResourceGroup, 0) json.Unmarshal(respString, &groups) @@ -1011,8 +1011,8 @@ func (suite *resourceManagerClientTestSuite) TestBasicResourceGroupCURD() { re.NoError(err) resp, err := http.DefaultClient.Do(req) re.NoError(err) - defer resp.Body.Close() respString, err := io.ReadAll(resp.Body) + resp.Body.Close() re.NoError(err) if g.Name == "default" { re.Contains(string(respString), "cannot delete reserved group") @@ -1023,11 +1023,11 @@ func (suite *resourceManagerClientTestSuite) TestBasicResourceGroupCURD() { } // verify again - resp1, err := http.Get(getAddr(i) + "/resource-manager/api/v1/config/groups") + resp1, err := tests.TestDialClient.Get(getAddr(i) + "/resource-manager/api/v1/config/groups") re.NoError(err) - defer resp1.Body.Close() re.Equal(http.StatusOK, resp1.StatusCode) respString1, err := io.ReadAll(resp1.Body) + resp1.Body.Close() re.NoError(err) groups1 := make([]server.ResourceGroup, 0) json.Unmarshal(respString1, &groups1) @@ -1045,7 +1045,7 @@ func (suite *resourceManagerClientTestSuite) TestBasicResourceGroupCURD() { for _, s := range servers { serverList = append(serverList, s) } - re.NoError(suite.cluster.RunServers(serverList)) + re.NoError(tests.RunServers(serverList)) suite.cluster.WaitLeader() // re-connect client as well suite.client, err = pd.NewClientWithContext(suite.ctx, suite.cluster.GetConfig().GetClientURLs(), pd.SecurityOption{}) @@ -1108,7 +1108,7 @@ func (suite *resourceManagerClientTestSuite) TestResourceGroupRUConsumption() { re.NoError(err) re.Equal(g.RUStats, testConsumption) - // update resoruce group, ru stats not change + // update resource group, ru stats not change g.RUSettings.RU.Settings.FillRate = 12345 _, err = cli.ModifyResourceGroup(suite.ctx, g) re.NoError(err) @@ -1313,9 +1313,8 @@ func (suite *resourceManagerClientTestSuite) TestCheckBackgroundJobs() { enableBackgroundGroup := func(enable bool) string { if enable { return "background_enable" - } else { - return "background_unable" } + return "background_unable" } // Mock add resource group. group := &rmpb.ResourceGroup{ @@ -1343,6 +1342,7 @@ func (suite *resourceManagerClientTestSuite) TestCheckBackgroundJobs() { re.False(c.IsBackgroundRequest(suite.ctx, resourceGroupName, "internal_lightning")) re.False(c.IsBackgroundRequest(suite.ctx, resourceGroupName, "internal_ddl")) re.False(c.IsBackgroundRequest(suite.ctx, resourceGroupName, "")) + re.False(c.IsBackgroundRequest(suite.ctx, "none", "none")) resourceGroupName = enableBackgroundGroup(true) re.True(c.IsBackgroundRequest(suite.ctx, resourceGroupName, "internal_br")) @@ -1435,19 +1435,25 @@ func (suite *resourceManagerClientTestSuite) TestResourceGroupControllerConfigCh waitDuration := 10 * time.Second readBaseCost := 1.5 defaultCfg := controller.DefaultConfig() - // failpoint enableDegradedMode will setup and set it be 1s. - defaultCfg.DegradedModeWaitDuration.Duration = time.Second + expectCfg := server.ControllerConfig{ + // failpoint enableDegradedMode will setup and set it be 1s. + DegradedModeWaitDuration: typeutil.NewDuration(time.Second), + LTBMaxWaitDuration: typeutil.Duration(defaultCfg.LTBMaxWaitDuration), + RequestUnit: server.RequestUnitConfig(defaultCfg.RequestUnit), + EnableControllerTraceLog: defaultCfg.EnableControllerTraceLog, + } expectRUCfg := controller.GenerateRUConfig(defaultCfg) + expectRUCfg.DegradedModeWaitDuration = time.Second // initial config verification respString := sendRequest("GET", getAddr()+configURL, nil) - defaultString, err := json.Marshal(defaultCfg) + expectStr, err := json.Marshal(expectCfg) re.NoError(err) - re.JSONEq(string(respString), string(defaultString)) + re.JSONEq(string(respString), string(expectStr)) re.EqualValues(expectRUCfg, c1.GetConfig()) testCases := []struct { configJSON string - value interface{} + value any expected func(ruConfig *controller.RUConfig) }{ { diff --git a/tests/integrations/mcs/resourcemanager/server_test.go b/tests/integrations/mcs/resourcemanager/server_test.go index 4e1fb018d56..24de29db3a6 100644 --- a/tests/integrations/mcs/resourcemanager/server_test.go +++ b/tests/integrations/mcs/resourcemanager/server_test.go @@ -63,7 +63,7 @@ func TestResourceManagerServer(t *testing.T) { // Test registered REST HTTP Handler url := addr + "/resource-manager/api/v1/config" { - resp, err := http.Get(url + "/groups") + resp, err := tests.TestDialClient.Get(url + "/groups") re.NoError(err) defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) @@ -78,13 +78,13 @@ func TestResourceManagerServer(t *testing.T) { } createJSON, err := json.Marshal(group) re.NoError(err) - resp, err := http.Post(url+"/group", "application/json", strings.NewReader(string(createJSON))) + resp, err := tests.TestDialClient.Post(url+"/group", "application/json", strings.NewReader(string(createJSON))) re.NoError(err) defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) } { - resp, err := http.Get(url + "/group/pingcap") + resp, err := tests.TestDialClient.Get(url + "/group/pingcap") re.NoError(err) defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) @@ -95,7 +95,7 @@ func TestResourceManagerServer(t *testing.T) { // Test metrics handler { - resp, err := http.Get(addr + "/metrics") + resp, err := tests.TestDialClient.Get(addr + "/metrics") re.NoError(err) defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) @@ -106,7 +106,7 @@ func TestResourceManagerServer(t *testing.T) { // Test status handler { - resp, err := http.Get(addr + "/status") + resp, err := tests.TestDialClient.Get(addr + "/status") re.NoError(err) defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) diff --git a/tests/integrations/mcs/scheduling/api_test.go b/tests/integrations/mcs/scheduling/api_test.go index 4c53f3fabb2..365ab1ca493 100644 --- a/tests/integrations/mcs/scheduling/api_test.go +++ b/tests/integrations/mcs/scheduling/api_test.go @@ -10,6 +10,8 @@ import ( "testing" "time" + "github.com/pingcap/failpoint" + "github.com/pingcap/kvproto/pkg/metapb" "github.com/stretchr/testify/suite" "github.com/tikv/pd/pkg/core" _ "github.com/tikv/pd/pkg/mcs/scheduling/server/apis/v1" @@ -27,12 +29,6 @@ import ( "github.com/tikv/pd/tests" ) -var testDialClient = &http.Client{ - Transport: &http.Transport{ - DisableKeepAlives: true, - }, -} - type apiTestSuite struct { suite.Suite env *tests.SchedulingTestEnvironment @@ -43,11 +39,17 @@ func TestAPI(t *testing.T) { } func (suite *apiTestSuite) SetupSuite() { + re := suite.Require() + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/changeCoordinatorTicker", `return(true)`)) + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/mcs/scheduling/server/changeRunCollectWaitTime", `return(true)`)) suite.env = tests.NewSchedulingTestEnvironment(suite.T()) } func (suite *apiTestSuite) TearDownSuite() { suite.env.Cleanup() + re := suite.Require() + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/schedule/changeCoordinatorTicker")) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/mcs/scheduling/server/changeRunCollectWaitTime")) } func (suite *apiTestSuite) TestGetCheckerByName() { @@ -74,23 +76,23 @@ func (suite *apiTestSuite) checkGetCheckerByName(cluster *tests.TestCluster) { for _, testCase := range testCases { name := testCase.name // normal run - resp := make(map[string]interface{}) - err := testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) + resp := make(map[string]any) + err := testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) re.NoError(err) re.False(resp["paused"].(bool)) // paused err = co.PauseOrResumeChecker(name, 30) re.NoError(err) - resp = make(map[string]interface{}) - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) + resp = make(map[string]any) + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) re.NoError(err) re.True(resp["paused"].(bool)) // resumed err = co.PauseOrResumeChecker(name, 1) re.NoError(err) time.Sleep(time.Second) - resp = make(map[string]interface{}) - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) + resp = make(map[string]any) + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) re.NoError(err) re.False(resp["paused"].(bool)) } @@ -106,46 +108,46 @@ func (suite *apiTestSuite) checkAPIForward(cluster *tests.TestCluster) { leader := cluster.GetLeaderServer().GetServer() urlPrefix := fmt.Sprintf("%s/pd/api/v1", leader.GetAddr()) var respSlice []string - var resp map[string]interface{} + var resp map[string]any testutil.Eventually(re, func() bool { return leader.GetRaftCluster().IsServiceIndependent(utils.SchedulingServiceName) }) // Test operators - err := testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "operators"), &respSlice, + err := testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "operators"), &respSlice, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) re.Empty(respSlice) - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "operators"), []byte(``), + err = testutil.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "operators"), []byte(``), testutil.StatusNotOK(re), testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "operators/2"), nil, + err = testutil.CheckGetJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "operators/2"), nil, testutil.StatusNotOK(re), testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.CheckDelete(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "operators/2"), + err = testutil.CheckDelete(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "operators/2"), testutil.StatusNotOK(re), testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "operators/records"), nil, + err = testutil.CheckGetJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "operators/records"), nil, testutil.StatusNotOK(re), testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) // Test checker - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "checker/merge"), &resp, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "checker/merge"), &resp, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) re.False(resp["paused"].(bool)) // Test pause postChecker := func(delay int) { - input := make(map[string]interface{}) + input := make(map[string]any) input["delay"] = delay pauseArgs, err := json.Marshal(input) re.NoError(err) - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "checker/merge"), pauseArgs, + err = testutil.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "checker/merge"), pauseArgs, testutil.StatusOK(re), testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) } @@ -164,126 +166,124 @@ func (suite *apiTestSuite) checkAPIForward(cluster *tests.TestCluster) { // "/schedulers", http.MethodPost // "/schedulers/{name}", http.MethodDelete testutil.Eventually(re, func() bool { - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers"), &respSlice, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers"), &respSlice, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) return slice.Contains(respSlice, "balance-leader-scheduler") }) postScheduler := func(delay int) { - input := make(map[string]interface{}) + input := make(map[string]any) input["delay"] = delay pauseArgs, err := json.Marshal(input) re.NoError(err) - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers/balance-leader-scheduler"), pauseArgs, + err = testutil.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers/balance-leader-scheduler"), pauseArgs, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) } postScheduler(30) postScheduler(0) - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers/diagnostic/balance-leader-scheduler"), &resp, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers/diagnostic/balance-leader-scheduler"), &resp, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "scheduler-config"), &resp, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "scheduler-config"), &resp, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) re.Contains(resp, "balance-leader-scheduler") - re.Contains(resp, "balance-witness-scheduler") re.Contains(resp, "balance-hot-region-scheduler") schedulers := []string{ "balance-leader-scheduler", - "balance-witness-scheduler", "balance-hot-region-scheduler", } for _, schedulerName := range schedulers { - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s/%s/%s", urlPrefix, "scheduler-config", schedulerName, "list"), &resp, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s/%s/%s", urlPrefix, "scheduler-config", schedulerName, "list"), &resp, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) } - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers"), nil, + err = testutil.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers"), nil, testutil.WithoutHeader(re, apiutil.XForwardedToMicroServiceHeader)) re.NoError(err) - err = testutil.CheckDelete(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers/balance-leader-scheduler"), + err = testutil.CheckDelete(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers/balance-leader-scheduler"), testutil.WithoutHeader(re, apiutil.XForwardedToMicroServiceHeader)) re.NoError(err) - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = "balance-leader-scheduler" b, err := json.Marshal(input) re.NoError(err) - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers"), b, + err = testutil.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers"), b, testutil.WithoutHeader(re, apiutil.XForwardedToMicroServiceHeader)) re.NoError(err) // Test hotspot var hotRegions statistics.StoreHotPeersInfos - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "hotspot/regions/write"), &hotRegions, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "hotspot/regions/write"), &hotRegions, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "hotspot/regions/read"), &hotRegions, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "hotspot/regions/read"), &hotRegions, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) var stores handler.HotStoreStats - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "hotspot/stores"), &stores, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "hotspot/stores"), &stores, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) var buckets handler.HotBucketsResponse - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "hotspot/buckets"), &buckets, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "hotspot/buckets"), &buckets, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) var history storage.HistoryHotRegions - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "hotspot/regions/history"), &history, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "hotspot/regions/history"), &history, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) // Test region label var labelRules []*labeler.LabelRule - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/region-label/rules"), &labelRules, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/region-label/rules"), &labelRules, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.ReadGetJSONWithBody(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/region-label/rules/ids"), []byte(`["rule1", "rule3"]`), + err = testutil.ReadGetJSONWithBody(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/region-label/rules/ids"), []byte(`["rule1", "rule3"]`), &labelRules, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/region-label/rule/rule1"), nil, + err = testutil.CheckGetJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/region-label/rule/rule1"), nil, testutil.StatusNotOK(re), testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "region/id/1"), nil, + err = testutil.CheckGetJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "region/id/1"), nil, testutil.WithoutHeader(re, apiutil.XForwardedToMicroServiceHeader)) re.NoError(err) - err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "region/id/1/label/key"), nil, + err = testutil.CheckGetJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "region/id/1/label/key"), nil, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "region/id/1/labels"), nil, + err = testutil.CheckGetJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "region/id/1/labels"), nil, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) // Test Region body := fmt.Sprintf(`{"start_key":"%s", "end_key": "%s"}`, hex.EncodeToString([]byte("a1")), hex.EncodeToString([]byte("a3"))) - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "regions/accelerate-schedule"), []byte(body), + err = testutil.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "regions/accelerate-schedule"), []byte(body), testutil.StatusOK(re), testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) body = fmt.Sprintf(`[{"start_key":"%s", "end_key": "%s"}, {"start_key":"%s", "end_key": "%s"}]`, hex.EncodeToString([]byte("a1")), hex.EncodeToString([]byte("a3")), hex.EncodeToString([]byte("a4")), hex.EncodeToString([]byte("a6"))) - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "regions/accelerate-schedule/batch"), []byte(body), + err = testutil.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "regions/accelerate-schedule/batch"), []byte(body), testutil.StatusOK(re), testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) body = fmt.Sprintf(`{"start_key":"%s", "end_key": "%s"}`, hex.EncodeToString([]byte("b1")), hex.EncodeToString([]byte("b3"))) - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "regions/scatter"), []byte(body), + err = testutil.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "regions/scatter"), []byte(body), testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) body = fmt.Sprintf(`{"retry_limit":%v, "split_keys": ["%s","%s","%s"]}`, 3, hex.EncodeToString([]byte("bbb")), hex.EncodeToString([]byte("ccc")), hex.EncodeToString([]byte("ddd"))) - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "regions/split"), []byte(body), + err = testutil.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "regions/split"), []byte(body), testutil.StatusOK(re), testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf(`%s/regions/replicated?startKey=%s&endKey=%s`, urlPrefix, hex.EncodeToString([]byte("a1")), hex.EncodeToString([]byte("a2"))), nil, + err = testutil.CheckGetJSON(tests.TestDialClient, fmt.Sprintf(`%s/regions/replicated?startKey=%s&endKey=%s`, urlPrefix, hex.EncodeToString([]byte("a1")), hex.EncodeToString([]byte("a2"))), nil, testutil.StatusOK(re), testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) // Test rules: only forward `GET` request @@ -301,73 +301,73 @@ func (suite *apiTestSuite) checkAPIForward(cluster *tests.TestCluster) { rulesArgs, err := json.Marshal(rules) re.NoError(err) - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules"), &rules, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules"), &rules, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules"), rulesArgs, + err = testutil.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules"), rulesArgs, testutil.WithoutHeader(re, apiutil.XForwardedToMicroServiceHeader)) re.NoError(err) - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules/batch"), rulesArgs, + err = testutil.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules/batch"), rulesArgs, testutil.WithoutHeader(re, apiutil.XForwardedToMicroServiceHeader)) re.NoError(err) - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules/group/pd"), &rules, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules/group/pd"), &rules, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules/region/2"), &rules, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules/region/2"), &rules, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) var fit placement.RegionFit - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules/region/2/detail"), &fit, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules/region/2/detail"), &fit, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules/key/0000000000000001"), &rules, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules/key/0000000000000001"), &rules, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule/pd/2"), nil, + err = testutil.CheckGetJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule/pd/2"), nil, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.CheckDelete(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule/pd/2"), + err = testutil.CheckDelete(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule/pd/2"), testutil.WithoutHeader(re, apiutil.XForwardedToMicroServiceHeader)) re.NoError(err) - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule"), rulesArgs, + err = testutil.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule"), rulesArgs, testutil.WithoutHeader(re, apiutil.XForwardedToMicroServiceHeader)) re.NoError(err) - err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule_group/pd"), nil, + err = testutil.CheckGetJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule_group/pd"), nil, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.CheckDelete(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule_group/pd"), + err = testutil.CheckDelete(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule_group/pd"), testutil.WithoutHeader(re, apiutil.XForwardedToMicroServiceHeader)) re.NoError(err) - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule_group"), rulesArgs, + err = testutil.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule_group"), rulesArgs, testutil.WithoutHeader(re, apiutil.XForwardedToMicroServiceHeader)) re.NoError(err) - err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule_groups"), nil, + err = testutil.CheckGetJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule_groups"), nil, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule"), nil, + err = testutil.CheckGetJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule"), nil, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule"), rulesArgs, + err = testutil.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule"), rulesArgs, testutil.WithoutHeader(re, apiutil.XForwardedToMicroServiceHeader)) re.NoError(err) - err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule/pd"), nil, + err = testutil.CheckGetJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule/pd"), nil, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) - err = testutil.CheckDelete(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule/pd"), + err = testutil.CheckDelete(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule/pd"), testutil.WithoutHeader(re, apiutil.XForwardedToMicroServiceHeader)) re.NoError(err) - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule/pd"), rulesArgs, + err = testutil.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule/pd"), rulesArgs, testutil.WithoutHeader(re, apiutil.XForwardedToMicroServiceHeader)) re.NoError(err) // test redirect is disabled - err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule/pd"), nil, + err = testutil.CheckGetJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule/pd"), nil, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule/pd"), http.NoBody) re.NoError(err) req.Header.Set(apiutil.XForbiddenForwardToMicroServiceHeader, "true") - httpResp, err := testDialClient.Do(req) + httpResp, err := tests.TestDialClient.Do(req) re.NoError(err) re.Equal(http.StatusOK, httpResp.StatusCode) defer httpResp.Body.Close() @@ -388,7 +388,7 @@ func (suite *apiTestSuite) checkConfig(cluster *tests.TestCluster) { urlPrefix := fmt.Sprintf("%s/scheduling/api/v1/config", addr) var cfg config.Config - testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) + testutil.ReadGetJSON(re, tests.TestDialClient, urlPrefix, &cfg) re.Equal(cfg.GetListenAddr(), s.GetConfig().GetListenAddr()) re.Equal(cfg.Schedule.LeaderScheduleLimit, s.GetConfig().Schedule.LeaderScheduleLimit) re.Equal(cfg.Schedule.EnableCrossTableMerge, s.GetConfig().Schedule.EnableCrossTableMerge) @@ -397,13 +397,11 @@ func (suite *apiTestSuite) checkConfig(cluster *tests.TestCluster) { re.Equal(cfg.DataDir, s.GetConfig().DataDir) testutil.Eventually(re, func() bool { // wait for all schedulers to be loaded in scheduling server. - return len(cfg.Schedule.SchedulersPayload) == 6 + return len(cfg.Schedule.SchedulersPayload) == 4 }) re.Contains(cfg.Schedule.SchedulersPayload, "balance-leader-scheduler") re.Contains(cfg.Schedule.SchedulersPayload, "balance-region-scheduler") re.Contains(cfg.Schedule.SchedulersPayload, "balance-hot-region-scheduler") - re.Contains(cfg.Schedule.SchedulersPayload, "balance-witness-scheduler") - re.Contains(cfg.Schedule.SchedulersPayload, "transfer-witness-leader-scheduler") re.Contains(cfg.Schedule.SchedulersPayload, "evict-slow-store-scheduler") } @@ -415,33 +413,33 @@ func (suite *apiTestSuite) checkConfigForward(cluster *tests.TestCluster) { re := suite.Require() sche := cluster.GetSchedulingPrimaryServer() opts := sche.GetPersistConfig() - var cfg map[string]interface{} + var cfg map[string]any addr := cluster.GetLeaderServer().GetAddr() urlPrefix := fmt.Sprintf("%s/pd/api/v1/config", addr) // Test config forward // Expect to get same config in scheduling server and api server testutil.Eventually(re, func() bool { - testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) - re.Equal(cfg["schedule"].(map[string]interface{})["leader-schedule-limit"], + testutil.ReadGetJSON(re, tests.TestDialClient, urlPrefix, &cfg) + re.Equal(cfg["schedule"].(map[string]any)["leader-schedule-limit"], float64(opts.GetLeaderScheduleLimit())) - re.Equal(cfg["replication"].(map[string]interface{})["max-replicas"], + re.Equal(cfg["replication"].(map[string]any)["max-replicas"], float64(opts.GetReplicationConfig().MaxReplicas)) - schedulers := cfg["schedule"].(map[string]interface{})["schedulers-payload"].(map[string]interface{}) - return len(schedulers) == 6 + schedulers := cfg["schedule"].(map[string]any)["schedulers-payload"].(map[string]any) + return len(schedulers) == 4 }) // Test to change config in api server // Expect to get new config in scheduling server and api server - reqData, err := json.Marshal(map[string]interface{}{ + reqData, err := json.Marshal(map[string]any{ "max-replicas": 4, }) re.NoError(err) - err = testutil.CheckPostJSON(testDialClient, urlPrefix, reqData, testutil.StatusOK(re)) + err = testutil.CheckPostJSON(tests.TestDialClient, urlPrefix, reqData, testutil.StatusOK(re)) re.NoError(err) testutil.Eventually(re, func() bool { - testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) - return cfg["replication"].(map[string]interface{})["max-replicas"] == 4. && + testutil.ReadGetJSON(re, tests.TestDialClient, urlPrefix, &cfg) + return cfg["replication"].(map[string]any)["max-replicas"] == 4. && opts.GetReplicationConfig().MaxReplicas == 4. }) @@ -449,12 +447,12 @@ func (suite *apiTestSuite) checkConfigForward(cluster *tests.TestCluster) { // Expect to get new config in scheduling server but not old config in api server opts.GetScheduleConfig().LeaderScheduleLimit = 100 re.Equal(100, int(opts.GetLeaderScheduleLimit())) - testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) - re.Equal(100., cfg["schedule"].(map[string]interface{})["leader-schedule-limit"]) + testutil.ReadGetJSON(re, tests.TestDialClient, urlPrefix, &cfg) + re.Equal(100., cfg["schedule"].(map[string]any)["leader-schedule-limit"]) opts.GetReplicationConfig().MaxReplicas = 5 re.Equal(5, int(opts.GetReplicationConfig().MaxReplicas)) - testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) - re.Equal(5., cfg["replication"].(map[string]interface{})["max-replicas"]) + testutil.ReadGetJSON(re, tests.TestDialClient, urlPrefix, &cfg) + re.Equal(5., cfg["replication"].(map[string]any)["max-replicas"]) } func (suite *apiTestSuite) TestAdminRegionCache() { @@ -475,11 +473,11 @@ func (suite *apiTestSuite) checkAdminRegionCache(cluster *tests.TestCluster) { addr := schedulingServer.GetAddr() urlPrefix := fmt.Sprintf("%s/scheduling/api/v1/admin/cache/regions", addr) - err := testutil.CheckDelete(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "30"), testutil.StatusOK(re)) + err := testutil.CheckDelete(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "30"), testutil.StatusOK(re)) re.NoError(err) re.Equal(2, schedulingServer.GetCluster().GetRegionCount([]byte{}, []byte{})) - err = testutil.CheckDelete(testDialClient, urlPrefix, testutil.StatusOK(re)) + err = testutil.CheckDelete(tests.TestDialClient, urlPrefix, testutil.StatusOK(re)) re.NoError(err) re.Equal(0, schedulingServer.GetCluster().GetRegionCount([]byte{}, []byte{})) } @@ -500,19 +498,19 @@ func (suite *apiTestSuite) checkAdminRegionCacheForward(cluster *tests.TestClust apiServer := cluster.GetLeaderServer().GetServer() schedulingServer := cluster.GetSchedulingPrimaryServer() re.Equal(3, schedulingServer.GetCluster().GetRegionCount([]byte{}, []byte{})) - re.Equal(3, apiServer.GetRaftCluster().GetRegionCount([]byte{}, []byte{}).Count) + re.Equal(3, apiServer.GetRaftCluster().GetRegionCount([]byte{}, []byte{})) addr := cluster.GetLeaderServer().GetAddr() urlPrefix := fmt.Sprintf("%s/pd/api/v1/admin/cache/region", addr) - err := testutil.CheckDelete(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "30"), testutil.StatusOK(re)) + err := testutil.CheckDelete(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "30"), testutil.StatusOK(re)) re.NoError(err) re.Equal(2, schedulingServer.GetCluster().GetRegionCount([]byte{}, []byte{})) - re.Equal(2, apiServer.GetRaftCluster().GetRegionCount([]byte{}, []byte{}).Count) + re.Equal(2, apiServer.GetRaftCluster().GetRegionCount([]byte{}, []byte{})) - err = testutil.CheckDelete(testDialClient, urlPrefix+"s", testutil.StatusOK(re)) + err = testutil.CheckDelete(tests.TestDialClient, urlPrefix+"s", testutil.StatusOK(re)) re.NoError(err) re.Equal(0, schedulingServer.GetCluster().GetRegionCount([]byte{}, []byte{})) - re.Equal(0, apiServer.GetRaftCluster().GetRegionCount([]byte{}, []byte{}).Count) + re.Equal(0, apiServer.GetRaftCluster().GetRegionCount([]byte{}, []byte{})) } func (suite *apiTestSuite) TestFollowerForward() { @@ -539,14 +537,14 @@ func (suite *apiTestSuite) checkFollowerForward(cluster *tests.TestCluster) { if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { // follower will forward to scheduling server directly re.NotEqual(cluster.GetLeaderServer().GetAddr(), followerAddr) - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules"), &rules, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules"), &rules, testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true"), ) re.NoError(err) } else { // follower will forward to leader server re.NotEqual(cluster.GetLeaderServer().GetAddr(), followerAddr) - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules"), &rules, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules"), &rules, testutil.WithoutHeader(re, apiutil.XForwardedToMicroServiceHeader), ) re.NoError(err) @@ -554,8 +552,8 @@ func (suite *apiTestSuite) checkFollowerForward(cluster *tests.TestCluster) { // follower will forward to leader server re.NotEqual(cluster.GetLeaderServer().GetAddr(), followerAddr) - results := make(map[string]interface{}) - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config"), &results, + results := make(map[string]any) + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config"), &results, testutil.WithoutHeader(re, apiutil.XForwardedToMicroServiceHeader), ) re.NoError(err) @@ -571,7 +569,7 @@ func (suite *apiTestSuite) checkMetrics(cluster *tests.TestCluster) { testutil.Eventually(re, func() bool { return s.IsServing() }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) - resp, err := http.Get(s.GetConfig().GetAdvertiseListenAddr() + "/metrics") + resp, err := tests.TestDialClient.Get(s.GetConfig().GetAdvertiseListenAddr() + "/metrics") re.NoError(err) defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) @@ -590,7 +588,7 @@ func (suite *apiTestSuite) checkStatus(cluster *tests.TestCluster) { testutil.Eventually(re, func() bool { return s.IsServing() }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) - resp, err := http.Get(s.GetConfig().GetAdvertiseListenAddr() + "/status") + resp, err := tests.TestDialClient.Get(s.GetConfig().GetAdvertiseListenAddr() + "/status") re.NoError(err) defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) @@ -602,3 +600,123 @@ func (suite *apiTestSuite) checkStatus(cluster *tests.TestCluster) { re.Equal(versioninfo.PDGitHash, status.GitHash) re.Equal(versioninfo.PDReleaseVersion, status.Version) } + +func (suite *apiTestSuite) TestStores() { + suite.env.RunTestInAPIMode(suite.checkStores) +} + +func (suite *apiTestSuite) checkStores(cluster *tests.TestCluster) { + re := suite.Require() + stores := []*metapb.Store{ + { + // metapb.StoreState_Up == 0 + Id: 1, + Address: "tikv1", + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + Version: "2.0.0", + }, + { + Id: 4, + Address: "tikv4", + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + Version: "2.0.0", + }, + { + // metapb.StoreState_Offline == 1 + Id: 6, + Address: "tikv6", + State: metapb.StoreState_Offline, + NodeState: metapb.NodeState_Removing, + Version: "2.0.0", + }, + { + // metapb.StoreState_Tombstone == 2 + Id: 7, + Address: "tikv7", + State: metapb.StoreState_Tombstone, + NodeState: metapb.NodeState_Removed, + Version: "2.0.0", + }, + } + // prevent the offline store from changing to tombstone + tests.MustPutRegion(re, cluster, 3, 6, []byte("a"), []byte("b")) + for _, store := range stores { + tests.MustPutStore(re, cluster, store) + if store.GetId() == 6 { + cluster.GetLeaderServer().GetRaftCluster().GetBasicCluster().UpdateStoreStatus(6) + } + } + // Test /stores + apiServerAddr := cluster.GetLeaderServer().GetAddr() + urlPrefix := fmt.Sprintf("%s/pd/api/v1/stores", apiServerAddr) + var resp map[string]any + err := testutil.ReadGetJSON(re, tests.TestDialClient, urlPrefix, &resp) + re.NoError(err) + re.Equal(3, int(resp["count"].(float64))) + re.Len(resp["stores"].([]any), 3) + scheServerAddr := cluster.GetSchedulingPrimaryServer().GetAddr() + urlPrefix = fmt.Sprintf("%s/scheduling/api/v1/stores", scheServerAddr) + err = testutil.ReadGetJSON(re, tests.TestDialClient, urlPrefix, &resp) + re.NoError(err) + re.Equal(3, int(resp["count"].(float64))) + re.Len(resp["stores"].([]any), 3) + // Test /stores/{id} + urlPrefix = fmt.Sprintf("%s/scheduling/api/v1/stores/1", scheServerAddr) + err = testutil.ReadGetJSON(re, tests.TestDialClient, urlPrefix, &resp) + re.NoError(err) + re.Equal("tikv1", resp["store"].(map[string]any)["address"]) + re.Equal("Up", resp["store"].(map[string]any)["state_name"]) + urlPrefix = fmt.Sprintf("%s/scheduling/api/v1/stores/6", scheServerAddr) + err = testutil.ReadGetJSON(re, tests.TestDialClient, urlPrefix, &resp) + re.NoError(err) + re.Equal("tikv6", resp["store"].(map[string]any)["address"]) + re.Equal("Offline", resp["store"].(map[string]any)["state_name"]) + urlPrefix = fmt.Sprintf("%s/scheduling/api/v1/stores/7", scheServerAddr) + err = testutil.ReadGetJSON(re, tests.TestDialClient, urlPrefix, &resp) + re.NoError(err) + re.Equal("tikv7", resp["store"].(map[string]any)["address"]) + re.Equal("Tombstone", resp["store"].(map[string]any)["state_name"]) + urlPrefix = fmt.Sprintf("%s/scheduling/api/v1/stores/233", scheServerAddr) + testutil.CheckGetJSON(tests.TestDialClient, urlPrefix, nil, + testutil.Status(re, http.StatusNotFound), testutil.StringContain(re, "not found")) +} + +func (suite *apiTestSuite) TestRegions() { + suite.env.RunTestInAPIMode(suite.checkRegions) +} + +func (suite *apiTestSuite) checkRegions(cluster *tests.TestCluster) { + re := suite.Require() + tests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b")) + tests.MustPutRegion(re, cluster, 2, 2, []byte("c"), []byte("d")) + tests.MustPutRegion(re, cluster, 3, 1, []byte("e"), []byte("f")) + // Test /regions + apiServerAddr := cluster.GetLeaderServer().GetAddr() + urlPrefix := fmt.Sprintf("%s/pd/api/v1/regions", apiServerAddr) + var resp map[string]any + err := testutil.ReadGetJSON(re, tests.TestDialClient, urlPrefix, &resp) + re.NoError(err) + re.Equal(3, int(resp["count"].(float64))) + re.Len(resp["regions"].([]any), 3) + scheServerAddr := cluster.GetSchedulingPrimaryServer().GetAddr() + urlPrefix = fmt.Sprintf("%s/scheduling/api/v1/regions", scheServerAddr) + err = testutil.ReadGetJSON(re, tests.TestDialClient, urlPrefix, &resp) + re.NoError(err) + re.Equal(3, int(resp["count"].(float64))) + re.Len(resp["regions"].([]any), 3) + // Test /regions/{id} and /regions/count + urlPrefix = fmt.Sprintf("%s/scheduling/api/v1/regions/1", scheServerAddr) + err = testutil.ReadGetJSON(re, tests.TestDialClient, urlPrefix, &resp) + re.NoError(err) + key := fmt.Sprintf("%x", "a") + re.Equal(key, resp["start_key"]) + urlPrefix = fmt.Sprintf("%s/scheduling/api/v1/regions/count", scheServerAddr) + err = testutil.ReadGetJSON(re, tests.TestDialClient, urlPrefix, &resp) + re.NoError(err) + re.Equal(3., resp["count"]) + urlPrefix = fmt.Sprintf("%s/scheduling/api/v1/regions/233", scheServerAddr) + testutil.CheckGetJSON(tests.TestDialClient, urlPrefix, nil, + testutil.Status(re, http.StatusNotFound), testutil.StringContain(re, "not found")) +} diff --git a/tests/integrations/mcs/scheduling/config_test.go b/tests/integrations/mcs/scheduling/config_test.go index 69d77bb24ac..54622d5c515 100644 --- a/tests/integrations/mcs/scheduling/config_test.go +++ b/tests/integrations/mcs/scheduling/config_test.go @@ -164,7 +164,7 @@ func (suite *configTestSuite) TestSchedulerConfigWatch() { }) re.Equal(namesFromAPIServer, namesFromSchedulingServer) // Add a new scheduler. - api.MustAddScheduler(re, suite.pdLeaderServer.GetAddr(), schedulers.EvictLeaderName, map[string]interface{}{ + api.MustAddScheduler(re, suite.pdLeaderServer.GetAddr(), schedulers.EvictLeaderName, map[string]any{ "store_id": 1, }) // Check the new scheduler's config. @@ -175,7 +175,7 @@ func (suite *configTestSuite) TestSchedulerConfigWatch() { }) assertEvictLeaderStoreIDs(re, storage, []uint64{1}) // Update the scheduler by adding a store. - err = suite.pdLeaderServer.GetServer().GetRaftCluster().PutStore( + err = suite.pdLeaderServer.GetServer().GetRaftCluster().PutMetaStore( &metapb.Store{ Id: 2, Address: "mock://2", @@ -186,7 +186,7 @@ func (suite *configTestSuite) TestSchedulerConfigWatch() { }, ) re.NoError(err) - api.MustAddScheduler(re, suite.pdLeaderServer.GetAddr(), schedulers.EvictLeaderName, map[string]interface{}{ + api.MustAddScheduler(re, suite.pdLeaderServer.GetAddr(), schedulers.EvictLeaderName, map[string]any{ "store_id": 2, }) assertEvictLeaderStoreIDs(re, storage, []uint64{1, 2}) diff --git a/tests/integrations/mcs/scheduling/meta_test.go b/tests/integrations/mcs/scheduling/meta_test.go index abc1efd9021..11782590ab9 100644 --- a/tests/integrations/mcs/scheduling/meta_test.go +++ b/tests/integrations/mcs/scheduling/meta_test.go @@ -79,7 +79,7 @@ func (suite *metaTestSuite) TestStoreWatch() { ) re.NoError(err) for i := uint64(1); i <= 4; i++ { - suite.pdLeaderServer.GetServer().GetRaftCluster().PutStore( + suite.pdLeaderServer.GetServer().GetRaftCluster().PutMetaStore( &metapb.Store{Id: i, Address: fmt.Sprintf("mock-%d", i), State: metapb.StoreState_Up, NodeState: metapb.NodeState_Serving, LastHeartbeat: time.Now().UnixNano()}, ) } @@ -102,7 +102,7 @@ func (suite *metaTestSuite) TestStoreWatch() { }) // test synchronized store labels - suite.pdLeaderServer.GetServer().GetRaftCluster().PutStore( + suite.pdLeaderServer.GetServer().GetRaftCluster().PutMetaStore( &metapb.Store{Id: 5, Address: "mock-5", State: metapb.StoreState_Up, NodeState: metapb.NodeState_Serving, LastHeartbeat: time.Now().UnixNano(), Labels: []*metapb.StoreLabel{{Key: "zone", Value: "z1"}}}, ) testutil.Eventually(re, func() bool { diff --git a/tests/integrations/mcs/scheduling/server_test.go b/tests/integrations/mcs/scheduling/server_test.go index 1e943735516..82da47d18f3 100644 --- a/tests/integrations/mcs/scheduling/server_test.go +++ b/tests/integrations/mcs/scheduling/server_test.go @@ -59,6 +59,8 @@ func TestServerTestSuite(t *testing.T) { func (suite *serverTestSuite) SetupSuite() { var err error re := suite.Require() + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/changeCoordinatorTicker", `return(true)`)) + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/mcs/scheduling/server/changeRunCollectWaitTime", `return(true)`)) re.NoError(failpoint.Enable("github.com/tikv/pd/server/cluster/highFrequencyClusterJobs", `return(true)`)) suite.ctx, suite.cancel = context.WithCancel(context.Background()) suite.cluster, err = tests.NewTestAPICluster(suite.ctx, 1) @@ -78,6 +80,8 @@ func (suite *serverTestSuite) TearDownSuite() { suite.cluster.Destroy() suite.cancel() re.NoError(failpoint.Disable("github.com/tikv/pd/server/cluster/highFrequencyClusterJobs")) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/schedule/changeCoordinatorTicker")) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/mcs/scheduling/server/changeRunCollectWaitTime")) } func (suite *serverTestSuite) TestAllocID() { @@ -137,7 +141,7 @@ func (suite *serverTestSuite) TestPrimaryChange() { testutil.Eventually(re, func() bool { watchedAddr, ok := suite.pdLeader.GetServicePrimaryAddr(suite.ctx, mcs.SchedulingServiceName) return ok && oldPrimaryAddr == watchedAddr && - len(primary.GetCluster().GetCoordinator().GetSchedulersController().GetSchedulerNames()) == 6 + len(primary.GetCluster().GetCoordinator().GetSchedulersController().GetSchedulerNames()) == 4 }) // change primary primary.Close() @@ -148,7 +152,7 @@ func (suite *serverTestSuite) TestPrimaryChange() { testutil.Eventually(re, func() bool { watchedAddr, ok := suite.pdLeader.GetServicePrimaryAddr(suite.ctx, mcs.SchedulingServiceName) return ok && newPrimaryAddr == watchedAddr && - len(primary.GetCluster().GetCoordinator().GetSchedulersController().GetSchedulerNames()) == 6 + len(primary.GetCluster().GetCoordinator().GetSchedulersController().GetSchedulerNames()) == 4 }) } @@ -299,14 +303,14 @@ func (suite *serverTestSuite) TestSchedulerSync() { schedulersController := tc.GetPrimaryServer().GetCluster().GetCoordinator().GetSchedulersController() checkEvictLeaderSchedulerExist(re, schedulersController, false) // Add a new evict-leader-scheduler through the API server. - api.MustAddScheduler(re, suite.backendEndpoints, schedulers.EvictLeaderName, map[string]interface{}{ + api.MustAddScheduler(re, suite.backendEndpoints, schedulers.EvictLeaderName, map[string]any{ "store_id": 1, }) // Check if the evict-leader-scheduler is added. checkEvictLeaderSchedulerExist(re, schedulersController, true) checkEvictLeaderStoreIDs(re, schedulersController, []uint64{1}) // Add a store_id to the evict-leader-scheduler through the API server. - err = suite.pdLeader.GetServer().GetRaftCluster().PutStore( + err = suite.pdLeader.GetServer().GetRaftCluster().PutMetaStore( &metapb.Store{ Id: 2, Address: "mock://2", @@ -317,7 +321,7 @@ func (suite *serverTestSuite) TestSchedulerSync() { }, ) re.NoError(err) - api.MustAddScheduler(re, suite.backendEndpoints, schedulers.EvictLeaderName, map[string]interface{}{ + api.MustAddScheduler(re, suite.backendEndpoints, schedulers.EvictLeaderName, map[string]any{ "store_id": 2, }) checkEvictLeaderSchedulerExist(re, schedulersController, true) @@ -327,7 +331,7 @@ func (suite *serverTestSuite) TestSchedulerSync() { checkEvictLeaderSchedulerExist(re, schedulersController, true) checkEvictLeaderStoreIDs(re, schedulersController, []uint64{2}) // Add a store_id to the evict-leader-scheduler through the API server by the scheduler handler. - api.MustCallSchedulerConfigAPI(re, http.MethodPost, suite.backendEndpoints, schedulers.EvictLeaderName, []string{"config"}, map[string]interface{}{ + api.MustCallSchedulerConfigAPI(re, http.MethodPost, suite.backendEndpoints, schedulers.EvictLeaderName, []string{"config"}, map[string]any{ "name": schedulers.EvictLeaderName, "store_id": 1, }) @@ -343,7 +347,7 @@ func (suite *serverTestSuite) TestSchedulerSync() { checkEvictLeaderSchedulerExist(re, schedulersController, false) // Delete the evict-leader-scheduler through the API server by removing the last store_id. - api.MustAddScheduler(re, suite.backendEndpoints, schedulers.EvictLeaderName, map[string]interface{}{ + api.MustAddScheduler(re, suite.backendEndpoints, schedulers.EvictLeaderName, map[string]any{ "store_id": 1, }) checkEvictLeaderSchedulerExist(re, schedulersController, true) @@ -352,7 +356,7 @@ func (suite *serverTestSuite) TestSchedulerSync() { checkEvictLeaderSchedulerExist(re, schedulersController, false) // Delete the evict-leader-scheduler through the API server. - api.MustAddScheduler(re, suite.backendEndpoints, schedulers.EvictLeaderName, map[string]interface{}{ + api.MustAddScheduler(re, suite.backendEndpoints, schedulers.EvictLeaderName, map[string]any{ "store_id": 1, }) checkEvictLeaderSchedulerExist(re, schedulersController, true) @@ -364,9 +368,7 @@ func (suite *serverTestSuite) TestSchedulerSync() { defaultSchedulerNames := []string{ schedulers.BalanceLeaderName, schedulers.BalanceRegionName, - schedulers.BalanceWitnessName, schedulers.HotRegionName, - schedulers.TransferWitnessLeaderName, } checkDisabled := func(name string, shouldDisabled bool) { re.NotNil(schedulersController.GetScheduler(name), name) diff --git a/tests/integrations/mcs/tso/api_test.go b/tests/integrations/mcs/tso/api_test.go index dd683c88fd7..4d6f9b33e3b 100644 --- a/tests/integrations/mcs/tso/api_test.go +++ b/tests/integrations/mcs/tso/api_test.go @@ -42,13 +42,6 @@ const ( tsoKeyspaceGroupsPrefix = "/tso/api/v1/keyspace-groups" ) -// dialClient used to dial http request. -var dialClient = &http.Client{ - Transport: &http.Transport{ - DisableKeepAlives: true, - }, -} - type tsoAPITestSuite struct { suite.Suite ctx context.Context @@ -110,13 +103,13 @@ func (suite *tsoAPITestSuite) TestForwardResetTS() { // Test reset ts input := []byte(`{"tso":"121312", "force-use-larger":true}`) - err := testutil.CheckPostJSON(dialClient, url, input, + err := testutil.CheckPostJSON(tests.TestDialClient, url, input, testutil.StatusOK(re), testutil.StringContain(re, "Reset ts successfully"), testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) // Test reset ts with invalid tso input = []byte(`{}`) - err = testutil.CheckPostJSON(dialClient, url, input, + err = testutil.CheckPostJSON(tests.TestDialClient, url, input, testutil.StatusNotOK(re), testutil.StringContain(re, "invalid tso value"), testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) } @@ -124,7 +117,7 @@ func (suite *tsoAPITestSuite) TestForwardResetTS() { func mustGetKeyspaceGroupMembers(re *require.Assertions, server *tso.Server) map[uint32]*apis.KeyspaceGroupMember { httpReq, err := http.NewRequest(http.MethodGet, server.GetAddr()+tsoKeyspaceGroupsPrefix+"/members", http.NoBody) re.NoError(err) - httpResp, err := dialClient.Do(httpReq) + httpResp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer httpResp.Body.Close() data, err := io.ReadAll(httpResp.Body) @@ -141,7 +134,7 @@ func TestTSOServerStartFirst(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - apiCluster, err := tests.NewTestAPICluster(ctx, 1, func(conf *config.Config, serverName string) { + apiCluster, err := tests.NewTestAPICluster(ctx, 1, func(conf *config.Config, _ string) { conf.Keyspace.PreAlloc = []string{"k1", "k2"} }) defer apiCluster.Destroy() @@ -170,21 +163,21 @@ func TestTSOServerStartFirst(t *testing.T) { <-ch time.Sleep(time.Second * 1) - input := make(map[string]interface{}) + input := make(map[string]any) input["new-id"] = 1 input["keyspaces"] = []uint32{2} jsonBody, err := json.Marshal(input) re.NoError(err) httpReq, err := http.NewRequest(http.MethodPost, addr+"/pd/api/v2/tso/keyspace-groups/0/split", bytes.NewBuffer(jsonBody)) re.NoError(err) - httpResp, err := dialClient.Do(httpReq) + httpResp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer httpResp.Body.Close() re.Equal(http.StatusOK, httpResp.StatusCode) httpReq, err = http.NewRequest(http.MethodGet, addr+"/pd/api/v2/tso/keyspace-groups/0", http.NoBody) re.NoError(err) - httpResp, err = dialClient.Do(httpReq) + httpResp, err = tests.TestDialClient.Do(httpReq) re.NoError(err) data, err := io.ReadAll(httpResp.Body) re.NoError(err) @@ -219,20 +212,20 @@ func TestForwardOnlyTSONoScheduling(t *testing.T) { // Test /operators, it should not forward when there is no scheduling server. var slice []string - err = testutil.ReadGetJSON(re, dialClient, fmt.Sprintf("%s/%s", urlPrefix, "operators"), &slice, + err = testutil.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "operators"), &slice, testutil.WithoutHeader(re, apiutil.XForwardedToMicroServiceHeader)) re.NoError(err) re.Empty(slice) // Test admin/reset-ts, it should forward to tso server. input := []byte(`{"tso":"121312", "force-use-larger":true}`) - err = testutil.CheckPostJSON(dialClient, fmt.Sprintf("%s/%s", urlPrefix, "admin/reset-ts"), input, + err = testutil.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "admin/reset-ts"), input, testutil.StatusOK(re), testutil.StringContain(re, "Reset ts successfully"), testutil.WithHeader(re, apiutil.XForwardedToMicroServiceHeader, "true")) re.NoError(err) // If close tso server, it should try forward to tso server, but return error in api mode. ttc.Destroy() - err = testutil.CheckPostJSON(dialClient, fmt.Sprintf("%s/%s", urlPrefix, "admin/reset-ts"), input, + err = testutil.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, "admin/reset-ts"), input, testutil.Status(re, http.StatusInternalServerError), testutil.StringContain(re, "[PD:apiutil:ErrRedirect]redirect failed")) re.NoError(err) } @@ -241,7 +234,7 @@ func (suite *tsoAPITestSuite) TestMetrics() { re := suite.Require() primary := suite.tsoCluster.WaitForDefaultPrimaryServing(re) - resp, err := http.Get(primary.GetConfig().GetAdvertiseListenAddr() + "/metrics") + resp, err := tests.TestDialClient.Get(primary.GetConfig().GetAdvertiseListenAddr() + "/metrics") re.NoError(err) defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) @@ -254,7 +247,7 @@ func (suite *tsoAPITestSuite) TestStatus() { re := suite.Require() primary := suite.tsoCluster.WaitForDefaultPrimaryServing(re) - resp, err := http.Get(primary.GetConfig().GetAdvertiseListenAddr() + "/status") + resp, err := tests.TestDialClient.Get(primary.GetConfig().GetAdvertiseListenAddr() + "/status") re.NoError(err) defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) @@ -266,3 +259,22 @@ func (suite *tsoAPITestSuite) TestStatus() { re.Equal(versioninfo.PDGitHash, s.GitHash) re.Equal(versioninfo.PDReleaseVersion, s.Version) } + +func (suite *tsoAPITestSuite) TestConfig() { + re := suite.Require() + + primary := suite.tsoCluster.WaitForDefaultPrimaryServing(re) + resp, err := tests.TestDialClient.Get(primary.GetConfig().GetAdvertiseListenAddr() + "/tso/api/v1/config") + re.NoError(err) + defer resp.Body.Close() + re.Equal(http.StatusOK, resp.StatusCode) + respBytes, err := io.ReadAll(resp.Body) + re.NoError(err) + var cfg tso.Config + re.NoError(json.Unmarshal(respBytes, &cfg)) + re.Equal(cfg.GetListenAddr(), primary.GetConfig().GetListenAddr()) + re.Equal(cfg.GetTSOSaveInterval(), primary.GetConfig().GetTSOSaveInterval()) + re.Equal(cfg.IsLocalTSOEnabled(), primary.GetConfig().IsLocalTSOEnabled()) + re.Equal(cfg.GetTSOUpdatePhysicalInterval(), primary.GetConfig().GetTSOUpdatePhysicalInterval()) + re.Equal(cfg.GetMaxResetTSGap(), primary.GetConfig().GetMaxResetTSGap()) +} diff --git a/tests/integrations/mcs/tso/keyspace_group_manager_test.go b/tests/integrations/mcs/tso/keyspace_group_manager_test.go index 52248086249..6d861962d9b 100644 --- a/tests/integrations/mcs/tso/keyspace_group_manager_test.go +++ b/tests/integrations/mcs/tso/keyspace_group_manager_test.go @@ -28,10 +28,12 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" pd "github.com/tikv/pd/client" + clierrs "github.com/tikv/pd/client/errs" "github.com/tikv/pd/pkg/election" "github.com/tikv/pd/pkg/errs" mcsutils "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/member" + "github.com/tikv/pd/pkg/mock/mockid" "github.com/tikv/pd/pkg/storage/endpoint" tsopkg "github.com/tikv/pd/pkg/tso" "github.com/tikv/pd/pkg/utils/etcdutil" @@ -56,6 +58,13 @@ type tsoKeyspaceGroupManagerTestSuite struct { pdLeaderServer *tests.TestServer // tsoCluster is the TSO service cluster. tsoCluster *tests.TestTSOCluster + + allocator *mockid.IDAllocator +} + +func (suite *tsoKeyspaceGroupManagerTestSuite) allocID() uint32 { + id, _ := suite.allocator.Alloc() + return uint32(id) } func TestTSOKeyspaceGroupManager(t *testing.T) { @@ -77,6 +86,8 @@ func (suite *tsoKeyspaceGroupManagerTestSuite) SetupSuite() { re.NoError(suite.pdLeaderServer.BootstrapCluster()) suite.tsoCluster, err = tests.NewTestTSOCluster(suite.ctx, 2, suite.pdLeaderServer.GetAddr()) re.NoError(err) + suite.allocator = mockid.NewIDAllocator() + suite.allocator.SetBase(uint64(time.Now().Second())) } func (suite *tsoKeyspaceGroupManagerTestSuite) TearDownSuite() { @@ -151,6 +162,9 @@ func (suite *tsoKeyspaceGroupManagerTestSuite) TestKeyspacesServedByDefaultKeysp mcs.CheckMultiKeyspacesTSO(suite.ctx, re, clients, func() { time.Sleep(3 * time.Second) }) + for _, client := range clients { + client.Close() + } } func (suite *tsoKeyspaceGroupManagerTestSuite) TestKeyspacesServedByNonDefaultKeyspaceGroups() { @@ -163,9 +177,9 @@ func (suite *tsoKeyspaceGroupManagerTestSuite) TestKeyspacesServedByNonDefaultKe keyspaceGroupID uint32 keyspaceIDs []uint32 }{ - {0, []uint32{0, 10}}, - {1, []uint32{1, 11}}, - {2, []uint32{2, 12}}, + {suite.allocID(), []uint32{0, 10}}, + {suite.allocID(), []uint32{1, 11}}, + {suite.allocID(), []uint32{2, 12}}, } for _, param := range params { @@ -232,54 +246,62 @@ func (suite *tsoKeyspaceGroupManagerTestSuite) TestKeyspacesServedByNonDefaultKe mcs.CheckMultiKeyspacesTSO(suite.ctx, re, clients, func() { time.Sleep(3 * time.Second) }) + for _, client := range clients { + client.Close() + } } func (suite *tsoKeyspaceGroupManagerTestSuite) TestTSOKeyspaceGroupSplit() { re := suite.Require() - // Create the keyspace group 1 with keyspaces [111, 222, 333]. + // Create the keyspace group `oldID` with keyspaces [111, 222, 333]. + oldID := suite.allocID() handlersutil.MustCreateKeyspaceGroup(re, suite.pdLeaderServer, &handlers.CreateKeyspaceGroupParams{ KeyspaceGroups: []*endpoint.KeyspaceGroup{ { - ID: 1, + ID: oldID, UserKind: endpoint.Standard.String(), Members: suite.tsoCluster.GetKeyspaceGroupMember(), Keyspaces: []uint32{111, 222, 333}, }, }, }) - kg1 := handlersutil.MustLoadKeyspaceGroupByID(re, suite.pdLeaderServer, 1) - re.Equal(uint32(1), kg1.ID) + kg1 := handlersutil.MustLoadKeyspaceGroupByID(re, suite.pdLeaderServer, oldID) + re.Equal(oldID, kg1.ID) re.Equal([]uint32{111, 222, 333}, kg1.Keyspaces) re.False(kg1.IsSplitting()) - // Get a TSO from the keyspace group 1. + // Get a TSO from the keyspace group `oldID`. var ( ts pdpb.Timestamp err error ) testutil.Eventually(re, func() bool { - ts, err = suite.requestTSO(re, 222, 1) + ts, err = suite.requestTSO(re, 222, oldID) return err == nil && tsoutil.CompareTimestamp(&ts, &pdpb.Timestamp{}) > 0 }) ts.Physical += time.Hour.Milliseconds() - // Set the TSO of the keyspace group 1 to a large value. - err = suite.tsoCluster.GetPrimaryServer(222, 1).ResetTS(tsoutil.GenerateTS(&ts), false, true, 1) + // Set the TSO of the keyspace group `oldID` to a large value. + err = suite.tsoCluster.GetPrimaryServer(222, oldID).ResetTS(tsoutil.GenerateTS(&ts), false, true, oldID) re.NoError(err) - // Split the keyspace group 1 to 2. - handlersutil.MustSplitKeyspaceGroup(re, suite.pdLeaderServer, 1, &handlers.SplitKeyspaceGroupByIDParams{ - NewID: 2, + // Split the keyspace group `oldID` to `newID`. + newID := suite.allocID() + handlersutil.MustSplitKeyspaceGroup(re, suite.pdLeaderServer, oldID, &handlers.SplitKeyspaceGroupByIDParams{ + NewID: newID, Keyspaces: []uint32{222, 333}, }) // Wait for the split to complete automatically even there is no TSO request from the outside. testutil.Eventually(re, func() bool { - kg2 := handlersutil.MustLoadKeyspaceGroupByID(re, suite.pdLeaderServer, 2) - re.Equal(uint32(2), kg2.ID) + kg2, code := handlersutil.TryLoadKeyspaceGroupByID(re, suite.pdLeaderServer, newID) + if code != http.StatusOK { + return false + } + re.Equal(newID, kg2.ID) re.Equal([]uint32{222, 333}, kg2.Keyspaces) return !kg2.IsSplitting() }) - // Check the split TSO from keyspace group 2 now. - splitTS, err := suite.requestTSO(re, 222, 2) + // Check the split TSO from keyspace group `newID` now. + splitTS, err := suite.requestTSO(re, 222, newID) re.NoError(err) - re.Greater(tsoutil.CompareTimestamp(&splitTS, &ts), 0) + re.Positive(tsoutil.CompareTimestamp(&splitTS, &ts)) } func (suite *tsoKeyspaceGroupManagerTestSuite) requestTSO( @@ -295,60 +317,62 @@ func (suite *tsoKeyspaceGroupManagerTestSuite) requestTSO( func (suite *tsoKeyspaceGroupManagerTestSuite) TestTSOKeyspaceGroupSplitElection() { re := suite.Require() - // Create the keyspace group 1 with keyspaces [111, 222, 333]. + // Create the keyspace group `oldID` with keyspaces [111, 222, 333]. + oldID := suite.allocID() handlersutil.MustCreateKeyspaceGroup(re, suite.pdLeaderServer, &handlers.CreateKeyspaceGroupParams{ KeyspaceGroups: []*endpoint.KeyspaceGroup{ { - ID: 1, + ID: oldID, UserKind: endpoint.Standard.String(), Members: suite.tsoCluster.GetKeyspaceGroupMember(), Keyspaces: []uint32{111, 222, 333}, }, }, }) - kg1 := handlersutil.MustLoadKeyspaceGroupByID(re, suite.pdLeaderServer, 1) - re.Equal(uint32(1), kg1.ID) + kg1 := handlersutil.MustLoadKeyspaceGroupByID(re, suite.pdLeaderServer, oldID) + re.Equal(oldID, kg1.ID) re.Equal([]uint32{111, 222, 333}, kg1.Keyspaces) re.False(kg1.IsSplitting()) - // Split the keyspace group 1 to 2. - handlersutil.MustSplitKeyspaceGroup(re, suite.pdLeaderServer, 1, &handlers.SplitKeyspaceGroupByIDParams{ - NewID: 2, + // Split the keyspace group `oldID` to `newID`. + newID := suite.allocID() + handlersutil.MustSplitKeyspaceGroup(re, suite.pdLeaderServer, oldID, &handlers.SplitKeyspaceGroupByIDParams{ + NewID: newID, Keyspaces: []uint32{222, 333}, }) - kg2 := handlersutil.MustLoadKeyspaceGroupByID(re, suite.pdLeaderServer, 2) - re.Equal(uint32(2), kg2.ID) + kg2 := handlersutil.MustLoadKeyspaceGroupByID(re, suite.pdLeaderServer, newID) + re.Equal(newID, kg2.ID) re.Equal([]uint32{222, 333}, kg2.Keyspaces) re.True(kg2.IsSplitTarget()) // Check the leadership. - member1, err := suite.tsoCluster.WaitForPrimaryServing(re, 111, 1).GetMember(111, 1) + member1, err := suite.tsoCluster.WaitForPrimaryServing(re, 111, oldID).GetMember(111, oldID) re.NoError(err) re.NotNil(member1) - member2, err := suite.tsoCluster.WaitForPrimaryServing(re, 222, 2).GetMember(222, 2) + member2, err := suite.tsoCluster.WaitForPrimaryServing(re, 222, newID).GetMember(222, newID) re.NoError(err) re.NotNil(member2) - // Wait for the leader of the keyspace group 1 and 2 to be elected. + // Wait for the leader of the keyspace group `oldID` and `newID` to be elected. testutil.Eventually(re, func() bool { return len(member1.GetLeaderListenUrls()) > 0 && len(member2.GetLeaderListenUrls()) > 0 }) - // Check if the leader of the keyspace group 1 and 2 are the same. + // Check if the leader of the keyspace group `oldID` and `newID` are the same. re.Equal(member1.GetLeaderListenUrls(), member2.GetLeaderListenUrls()) - // Resign and block the leader of the keyspace group 1 from being elected. + // Resign and block the leader of the keyspace group `oldID` from being elected. member1.(*member.Participant).SetCampaignChecker(func(*election.Leadership) bool { return false }) member1.ResetLeader() - // The leader of the keyspace group 2 should be resigned also. + // The leader of the keyspace group `newID` should be resigned also. testutil.Eventually(re, func() bool { return member2.IsLeader() == false }) - // Check if the leader of the keyspace group 1 and 2 are the same again. + // Check if the leader of the keyspace group `oldID` and `newID` are the same again. member1.(*member.Participant).SetCampaignChecker(nil) testutil.Eventually(re, func() bool { return len(member1.GetLeaderListenUrls()) > 0 && len(member2.GetLeaderListenUrls()) > 0 }) re.Equal(member1.GetLeaderListenUrls(), member2.GetLeaderListenUrls()) // Wait for the keyspace groups to finish the split. - waitFinishSplit(re, suite.pdLeaderServer, 1, 2, []uint32{111}, []uint32{222, 333}) + waitFinishSplit(re, suite.pdLeaderServer, oldID, newID, []uint32{111}, []uint32{222, 333}) } func waitFinishSplit( @@ -381,30 +405,32 @@ func (suite *tsoKeyspaceGroupManagerTestSuite) TestTSOKeyspaceGroupSplitClient() re := suite.Require() // Enable the failpoint to slow down the system time to test whether the TSO is monotonic. re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/tso/systemTimeSlow", `return(true)`)) - // Create the keyspace group 1 with keyspaces [444, 555, 666]. + // Create the keyspace group `oldID` with keyspaces [444, 555, 666]. + oldID := suite.allocID() handlersutil.MustCreateKeyspaceGroup(re, suite.pdLeaderServer, &handlers.CreateKeyspaceGroupParams{ KeyspaceGroups: []*endpoint.KeyspaceGroup{ { - ID: 1, + ID: oldID, UserKind: endpoint.Standard.String(), Members: suite.tsoCluster.GetKeyspaceGroupMember(), Keyspaces: []uint32{444, 555, 666}, }, }, }) - kg1 := handlersutil.MustLoadKeyspaceGroupByID(re, suite.pdLeaderServer, 1) - re.Equal(uint32(1), kg1.ID) + kg1 := handlersutil.MustLoadKeyspaceGroupByID(re, suite.pdLeaderServer, oldID) + re.Equal(oldID, kg1.ID) re.Equal([]uint32{444, 555, 666}, kg1.Keyspaces) re.False(kg1.IsSplitting()) // Request the TSO for keyspace 555 concurrently via client. - cancel := suite.dispatchClient(re, 555, 1) - // Split the keyspace group 1 to 2. - handlersutil.MustSplitKeyspaceGroup(re, suite.pdLeaderServer, 1, &handlers.SplitKeyspaceGroupByIDParams{ - NewID: 2, + cancel := suite.dispatchClient(re, 555, oldID) + // Split the keyspace group `oldID` to `newID`. + newID := suite.allocID() + handlersutil.MustSplitKeyspaceGroup(re, suite.pdLeaderServer, oldID, &handlers.SplitKeyspaceGroupByIDParams{ + NewID: newID, Keyspaces: []uint32{555, 666}, }) // Wait for the keyspace groups to finish the split. - waitFinishSplit(re, suite.pdLeaderServer, 1, 2, []uint32{444}, []uint32{555, 666}) + waitFinishSplit(re, suite.pdLeaderServer, oldID, newID, []uint32{444}, []uint32{555, 666}) // Stop the client. cancel() re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/tso/systemTimeSlow")) @@ -442,8 +468,8 @@ func (suite *tsoKeyspaceGroupManagerTestSuite) dispatchClient( errMsg := err.Error() // Ignore the errors caused by the split and context cancellation. if strings.Contains(errMsg, "context canceled") || - strings.Contains(errMsg, "not leader") || - strings.Contains(errMsg, "not served") || + strings.Contains(errMsg, clierrs.NotLeaderErr) || + strings.Contains(errMsg, clierrs.NotServedErr) || strings.Contains(errMsg, "ErrKeyspaceNotAssigned") || strings.Contains(errMsg, "ErrKeyspaceGroupIsMerging") { continue @@ -560,48 +586,49 @@ func TestTwiceSplitKeyspaceGroup(t *testing.T) { func (suite *tsoKeyspaceGroupManagerTestSuite) TestTSOKeyspaceGroupMerge() { re := suite.Require() - // Create the keyspace group 1 and 2 with keyspaces [111, 222] and [333]. + // Create the keyspace group `firstID` and `secondID` with keyspaces [111, 222] and [333]. + firstID, secondID := suite.allocID(), suite.allocID() handlersutil.MustCreateKeyspaceGroup(re, suite.pdLeaderServer, &handlers.CreateKeyspaceGroupParams{ KeyspaceGroups: []*endpoint.KeyspaceGroup{ { - ID: 1, + ID: firstID, UserKind: endpoint.Standard.String(), Members: suite.tsoCluster.GetKeyspaceGroupMember(), Keyspaces: []uint32{111, 222}, }, { - ID: 2, + ID: secondID, UserKind: endpoint.Standard.String(), Members: suite.tsoCluster.GetKeyspaceGroupMember(), Keyspaces: []uint32{333}, }, }, }) - // Get a TSO from the keyspace group 1. + // Get a TSO from the keyspace group `firstID`. var ( ts pdpb.Timestamp err error ) testutil.Eventually(re, func() bool { - ts, err = suite.requestTSO(re, 222, 1) + ts, err = suite.requestTSO(re, 222, firstID) return err == nil && tsoutil.CompareTimestamp(&ts, &pdpb.Timestamp{}) > 0 }) ts.Physical += time.Hour.Milliseconds() - // Set the TSO of the keyspace group 1 to a large value. - err = suite.tsoCluster.GetPrimaryServer(222, 1).ResetTS(tsoutil.GenerateTS(&ts), false, true, 1) + // Set the TSO of the keyspace group `firstID` to a large value. + err = suite.tsoCluster.GetPrimaryServer(222, firstID).ResetTS(tsoutil.GenerateTS(&ts), false, true, firstID) re.NoError(err) - // Merge the keyspace group 1 and 2 to the default keyspace group. + // Merge the keyspace group `firstID` and `secondID` to the default keyspace group. handlersutil.MustMergeKeyspaceGroup(re, suite.pdLeaderServer, mcsutils.DefaultKeyspaceGroupID, &handlers.MergeKeyspaceGroupsParams{ - MergeList: []uint32{1, 2}, + MergeList: []uint32{firstID, secondID}, }) - // Check the keyspace group 1 and 2 are merged to the default keyspace group. + // Check the keyspace group `firstID` and `secondID` are merged to the default keyspace group. kg := handlersutil.MustLoadKeyspaceGroupByID(re, suite.pdLeaderServer, mcsutils.DefaultKeyspaceGroupID) re.Equal(mcsutils.DefaultKeyspaceGroupID, kg.ID) for _, keyspaceID := range []uint32{111, 222, 333} { re.Contains(kg.Keyspaces, keyspaceID) } re.True(kg.IsMergeTarget()) - // Check the merged TSO from the default keyspace group is greater than the TSO from the keyspace group 1. + // Check the merged TSO from the default keyspace group is greater than the TSO from the keyspace group`firstID`. var mergedTS pdpb.Timestamp testutil.Eventually(re, func() bool { mergedTS, err = suite.requestTSO(re, 333, mcsutils.DefaultKeyspaceGroupID) @@ -610,31 +637,32 @@ func (suite *tsoKeyspaceGroupManagerTestSuite) TestTSOKeyspaceGroupMerge() { } return err == nil && tsoutil.CompareTimestamp(&mergedTS, &pdpb.Timestamp{}) > 0 }, testutil.WithTickInterval(5*time.Second), testutil.WithWaitFor(time.Minute)) - re.Greater(tsoutil.CompareTimestamp(&mergedTS, &ts), 0) + re.Positive(tsoutil.CompareTimestamp(&mergedTS, &ts)) } func (suite *tsoKeyspaceGroupManagerTestSuite) TestTSOKeyspaceGroupMergeClient() { re := suite.Require() - // Create the keyspace group 1 with keyspaces [111, 222, 333]. + // Create the keyspace group `id` with keyspaces [111, 222, 333]. + id := suite.allocID() handlersutil.MustCreateKeyspaceGroup(re, suite.pdLeaderServer, &handlers.CreateKeyspaceGroupParams{ KeyspaceGroups: []*endpoint.KeyspaceGroup{ { - ID: 1, + ID: id, UserKind: endpoint.Standard.String(), Members: suite.tsoCluster.GetKeyspaceGroupMember(), Keyspaces: []uint32{111, 222, 333}, }, }, }) - kg1 := handlersutil.MustLoadKeyspaceGroupByID(re, suite.pdLeaderServer, 1) - re.Equal(uint32(1), kg1.ID) + kg1 := handlersutil.MustLoadKeyspaceGroupByID(re, suite.pdLeaderServer, id) + re.Equal(id, kg1.ID) re.Equal([]uint32{111, 222, 333}, kg1.Keyspaces) re.False(kg1.IsMerging()) // Request the TSO for keyspace 222 concurrently via client. - cancel := suite.dispatchClient(re, 222, 1) + cancel := suite.dispatchClient(re, 222, id) // Merge the keyspace group 1 to the default keyspace group. handlersutil.MustMergeKeyspaceGroup(re, suite.pdLeaderServer, mcsutils.DefaultKeyspaceGroupID, &handlers.MergeKeyspaceGroupsParams{ - MergeList: []uint32{1}, + MergeList: []uint32{id}, }) // Wait for the default keyspace group to finish the merge. waitFinishMerge(re, suite.pdLeaderServer, mcsutils.DefaultKeyspaceGroupID, []uint32{111, 222, 333}) @@ -662,24 +690,25 @@ func waitFinishMerge( func (suite *tsoKeyspaceGroupManagerTestSuite) TestTSOKeyspaceGroupMergeBeforeInitTSO() { re := suite.Require() - // Make sure the TSO of keyspace group 1 won't be initialized before it's merged. + // Make sure the TSO of keyspace group `id` won't be initialized before it's merged. re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/tso/failedToSaveTimestamp", `return(true)`)) // Request the TSO for the default keyspace concurrently via client. + id := suite.allocID() cancel := suite.dispatchClient(re, mcsutils.DefaultKeyspaceID, mcsutils.DefaultKeyspaceGroupID) // Create the keyspace group 1 with keyspaces [111, 222, 333]. handlersutil.MustCreateKeyspaceGroup(re, suite.pdLeaderServer, &handlers.CreateKeyspaceGroupParams{ KeyspaceGroups: []*endpoint.KeyspaceGroup{ { - ID: 1, + ID: id, UserKind: endpoint.Standard.String(), Members: suite.tsoCluster.GetKeyspaceGroupMember(), Keyspaces: []uint32{111, 222, 333}, }, }, }) - // Merge the keyspace group 1 to the default keyspace group. + // Merge the keyspace group `id` to the default keyspace group. handlersutil.MustMergeKeyspaceGroup(re, suite.pdLeaderServer, mcsutils.DefaultKeyspaceGroupID, &handlers.MergeKeyspaceGroupsParams{ - MergeList: []uint32{1}, + MergeList: []uint32{id}, }) // Wait for the default keyspace group to finish the merge. waitFinishMerge(re, suite.pdLeaderServer, mcsutils.DefaultKeyspaceGroupID, []uint32{111, 222, 333}) @@ -766,19 +795,19 @@ func (suite *tsoKeyspaceGroupManagerTestSuite) TestKeyspaceGroupMergeIntoDefault keyspaces = make([]uint32, 0, keyspaceGroupNum) ) for i := 1; i <= keyspaceGroupNum; i++ { + id := suite.allocID() keyspaceGroups = append(keyspaceGroups, &endpoint.KeyspaceGroup{ - ID: uint32(i), + ID: id, UserKind: endpoint.UserKind(rand.Intn(int(endpoint.UserKindCount))).String(), - Keyspaces: []uint32{uint32(i)}, + Keyspaces: []uint32{id}, }) - keyspaces = append(keyspaces, uint32(i)) - if len(keyspaceGroups) < etcdutil.MaxEtcdTxnOps/2 && i != keyspaceGroupNum { + keyspaces = append(keyspaces, id) + if i != keyspaceGroupNum { continue } handlersutil.MustCreateKeyspaceGroup(re, suite.pdLeaderServer, &handlers.CreateKeyspaceGroupParams{ KeyspaceGroups: keyspaceGroups, }) - keyspaceGroups = keyspaceGroups[:0] } // Check if all the keyspace groups are created. groups := handlersutil.MustLoadKeyspaceGroups(re, suite.pdLeaderServer, "0", "0") @@ -787,9 +816,11 @@ func (suite *tsoKeyspaceGroupManagerTestSuite) TestKeyspaceGroupMergeIntoDefault // Check if the first keyspace group is served. svr := suite.tsoCluster.WaitForDefaultPrimaryServing(re) re.NotNil(svr) - // Check if the last keyspace group is served. - svr = suite.tsoCluster.WaitForPrimaryServing(re, uint32(keyspaceGroupNum), uint32(keyspaceGroupNum)) - re.NotNil(svr) + for i := 1; i < keyspaceGroupNum; i++ { + // Check if the keyspace group is served. + svr = suite.tsoCluster.WaitForPrimaryServing(re, keyspaceGroups[i].ID, keyspaceGroups[i].ID) + re.NotNil(svr) + } // Merge all the keyspace groups into the default keyspace group. handlersutil.MustMergeKeyspaceGroup(re, suite.pdLeaderServer, mcsutils.DefaultKeyspaceGroupID, &handlers.MergeKeyspaceGroupsParams{ MergeAllIntoDefault: true, diff --git a/tests/integrations/mcs/tso/proxy_test.go b/tests/integrations/mcs/tso/proxy_test.go index a19776ffc48..43877f262e2 100644 --- a/tests/integrations/mcs/tso/proxy_test.go +++ b/tests/integrations/mcs/tso/proxy_test.go @@ -34,6 +34,7 @@ import ( "github.com/tikv/pd/tests" "go.uber.org/zap" "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" ) type tsoProxyTestSuite struct { @@ -83,7 +84,7 @@ func (s *tsoProxyTestSuite) SetupSuite() { } func (s *tsoProxyTestSuite) TearDownSuite() { - s.cleanupGRPCStreams(s.cleanupFuncs) + cleanupGRPCStreams(s.cleanupFuncs) s.tsoCluster.Destroy() s.apiCluster.Destroy() s.cancel() @@ -111,7 +112,7 @@ func (s *tsoProxyTestSuite) TestTSOProxyWorksWithCancellation() { for j := 0; j < 10; j++ { s.verifyTSOProxy(s.ctx, streams, cleanupFuncs, 10, true) } - s.cleanupGRPCStreams(cleanupFuncs) + cleanupGRPCStreams(cleanupFuncs) } }() for i := 0; i < 10; i++ { @@ -124,7 +125,7 @@ func (s *tsoProxyTestSuite) TestTSOProxyWorksWithCancellation() { // TestTSOProxyStress tests the TSO Proxy can work correctly under the stress. gPRC and TSO failures are allowed, // but the TSO Proxy should not panic, blocked or deadlocked, and if it returns a timestamp, it should be a valid // timestamp monotonic increasing. After the stress, the TSO Proxy should still work correctly. -func TestTSOProxyStress(t *testing.T) { +func TestTSOProxyStress(_ *testing.T) { s := new(tsoProxyTestSuite) s.SetT(&testing.T{}) s.SetupSuite() @@ -153,7 +154,7 @@ func TestTSOProxyStress(t *testing.T) { cleanupFuncs = append(cleanupFuncs, cleanupFuncsTemp...) s.verifyTSOProxy(ctxTimeout, streams, cleanupFuncs, 50, false) } - s.cleanupGRPCStreams(cleanupFuncs) + cleanupGRPCStreams(cleanupFuncs) log.Info("the stress test completed.") // Verify the TSO Proxy can still work correctly after the stress. @@ -177,7 +178,7 @@ func (s *tsoProxyTestSuite) TestTSOProxyClientsWithSameContext() { defer cancel() for i := 0; i < clientCount; i++ { - conn, err := grpc.Dial(strings.TrimPrefix(s.backendEndpoints, "http://"), grpc.WithInsecure()) + conn, err := grpc.Dial(strings.TrimPrefix(s.backendEndpoints, "http://"), grpc.WithTransportCredentials(insecure.NewCredentials())) re.NoError(err) grpcPDClient := pdpb.NewPDClient(conn) stream, err := grpcPDClient.Tso(ctx) @@ -191,7 +192,7 @@ func (s *tsoProxyTestSuite) TestTSOProxyClientsWithSameContext() { } s.verifyTSOProxy(ctx, streams, cleanupFuncs, 100, true) - s.cleanupGRPCStreams(cleanupFuncs) + cleanupGRPCStreams(cleanupFuncs) } // TestTSOProxyRecvFromClientTimeout tests the TSO Proxy can properly close the grpc stream on the server side @@ -206,7 +207,7 @@ func (s *tsoProxyTestSuite) TestTSOProxyRecvFromClientTimeout() { time.Sleep(2 * time.Second) err := streams[0].Send(s.defaultReq) re.Error(err) - s.cleanupGRPCStreams(cleanupFuncs) + cleanupGRPCStreams(cleanupFuncs) re.NoError(failpoint.Disable("github.com/tikv/pd/server/tsoProxyRecvFromClientTimeout")) // Verify the streams with no fault injection can work correctly. @@ -225,7 +226,7 @@ func (s *tsoProxyTestSuite) TestTSOProxyFailToSendToClient() { re.NoError(err) _, err = streams[0].Recv() re.Error(err) - s.cleanupGRPCStreams(cleanupFuncs) + cleanupGRPCStreams(cleanupFuncs) re.NoError(failpoint.Disable("github.com/tikv/pd/server/tsoProxyFailToSendToClient")) s.verifyTSOProxy(s.ctx, s.streams, s.cleanupFuncs, 1, true) @@ -243,7 +244,7 @@ func (s *tsoProxyTestSuite) TestTSOProxySendToTSOTimeout() { re.NoError(err) _, err = streams[0].Recv() re.Error(err) - s.cleanupGRPCStreams(cleanupFuncs) + cleanupGRPCStreams(cleanupFuncs) re.NoError(failpoint.Disable("github.com/tikv/pd/server/tsoProxySendToTSOTimeout")) s.verifyTSOProxy(s.ctx, s.streams, s.cleanupFuncs, 1, true) @@ -261,13 +262,13 @@ func (s *tsoProxyTestSuite) TestTSOProxyRecvFromTSOTimeout() { re.NoError(err) _, err = streams[0].Recv() re.Error(err) - s.cleanupGRPCStreams(cleanupFuncs) + cleanupGRPCStreams(cleanupFuncs) re.NoError(failpoint.Disable("github.com/tikv/pd/server/tsoProxyRecvFromTSOTimeout")) s.verifyTSOProxy(s.ctx, s.streams, s.cleanupFuncs, 1, true) } -func (s *tsoProxyTestSuite) cleanupGRPCStreams(cleanupFuncs []testutil.CleanupFunc) { +func cleanupGRPCStreams(cleanupFuncs []testutil.CleanupFunc) { for i := 0; i < len(cleanupFuncs); i++ { if cleanupFuncs[i] != nil { cleanupFuncs[i]() @@ -276,7 +277,7 @@ func (s *tsoProxyTestSuite) cleanupGRPCStreams(cleanupFuncs []testutil.CleanupFu } } -func (s *tsoProxyTestSuite) cleanupGRPCStream( +func cleanupGRPCStream( streams []pdpb.PD_TsoClient, cleanupFuncs []testutil.CleanupFunc, index int, ) { if cleanupFuncs[index] != nil { @@ -317,7 +318,7 @@ func (s *tsoProxyTestSuite) verifyTSOProxy( for j := 0; j < requestsPerClient; j++ { select { case <-ctx.Done(): - s.cleanupGRPCStream(streams, cleanupFuncs, i) + cleanupGRPCStream(streams, cleanupFuncs, i) return default: } @@ -326,14 +327,14 @@ func (s *tsoProxyTestSuite) verifyTSOProxy( err := streams[i].Send(req) if err != nil && !mustReliable { respErr.Store(err) - s.cleanupGRPCStream(streams, cleanupFuncs, i) + cleanupGRPCStream(streams, cleanupFuncs, i) return } re.NoError(err) resp, err := streams[i].Recv() if err != nil && !mustReliable { respErr.Store(err) - s.cleanupGRPCStream(streams, cleanupFuncs, i) + cleanupGRPCStream(streams, cleanupFuncs, i) return } re.NoError(err) @@ -375,7 +376,7 @@ func createTSOStreams( streams := make([]pdpb.PD_TsoClient, clientCount) for i := 0; i < clientCount; i++ { - conn, err := grpc.Dial(strings.TrimPrefix(backendEndpoints, "http://"), grpc.WithInsecure()) + conn, err := grpc.Dial(strings.TrimPrefix(backendEndpoints, "http://"), grpc.WithTransportCredentials(insecure.NewCredentials())) re.NoError(err) grpcPDClient := pdpb.NewPDClient(conn) cctx, cancel := context.WithCancel(ctx) @@ -494,7 +495,7 @@ func benchmarkTSOProxyNClients(clientCount int, b *testing.B) { } b.StopTimer() - suite.cleanupGRPCStreams(cleanupFuncs) + cleanupGRPCStreams(cleanupFuncs) suite.TearDownSuite() } diff --git a/tests/integrations/mcs/tso/server_test.go b/tests/integrations/mcs/tso/server_test.go index a6a2c42acc9..260395e4209 100644 --- a/tests/integrations/mcs/tso/server_test.go +++ b/tests/integrations/mcs/tso/server_test.go @@ -45,6 +45,7 @@ import ( "go.etcd.io/etcd/clientv3" "go.uber.org/goleak" "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" ) func TestMain(m *testing.M) { @@ -103,20 +104,20 @@ func (suite *tsoServerTestSuite) TestTSOServerStartAndStopNormally() { }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) // Test registered GRPC Service - cc, err := grpc.DialContext(suite.ctx, s.GetAddr(), grpc.WithInsecure()) + cc, err := grpc.DialContext(suite.ctx, s.GetAddr(), grpc.WithTransportCredentials(insecure.NewCredentials())) re.NoError(err) cc.Close() url := s.GetAddr() + tsoapi.APIPathPrefix + "/admin/reset-ts" // Test reset ts input := []byte(`{"tso":"121312", "force-use-larger":true}`) - err = testutil.CheckPostJSON(dialClient, url, input, + err = testutil.CheckPostJSON(tests.TestDialClient, url, input, testutil.StatusOK(re), testutil.StringContain(re, "Reset ts successfully")) re.NoError(err) // Test reset ts with invalid tso input = []byte(`{}`) - err = testutil.CheckPostJSON(dialClient, suite.backendEndpoints+"/pd/api/v1/admin/reset-ts", input, + err = testutil.CheckPostJSON(tests.TestDialClient, suite.backendEndpoints+"/pd/api/v1/admin/reset-ts", input, testutil.StatusNotOK(re), testutil.StringContain(re, "invalid tso value")) re.NoError(err) } @@ -186,6 +187,7 @@ func checkTSOPath(re *require.Assertions, isAPIServiceMode bool) { defer cleanup() cli := mcs.SetupClientWithAPIContext(ctx, re, pd.NewAPIContextV2(""), []string{backendEndpoints}) + defer cli.Close() physical, logical, err := cli.GetTS(ctx) re.NoError(err) ts := tsoutil.ComposeTS(physical, logical) @@ -581,7 +583,7 @@ func (suite *CommonTestSuite) TestBootstrapDefaultKeyspaceGroup() { // check the default keyspace group check := func() { - resp, err := http.Get(suite.pdLeader.GetServer().GetConfig().AdvertiseClientUrls + "/pd/api/v2/tso/keyspace-groups") + resp, err := tests.TestDialClient.Get(suite.pdLeader.GetServer().GetConfig().AdvertiseClientUrls + "/pd/api/v2/tso/keyspace-groups") re.NoError(err) defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) diff --git a/tests/integrations/realcluster/Makefile b/tests/integrations/realcluster/Makefile index 4817b94b5da..e161d52a86e 100644 --- a/tests/integrations/realcluster/Makefile +++ b/tests/integrations/realcluster/Makefile @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -ROOT_PATH := ../../.. +ROOT_PATH := $(shell pwd)/../../.. GO_TOOLS_BIN_PATH := $(ROOT_PATH)/.tools/bin PATH := $(GO_TOOLS_BIN_PATH):$(PATH) SHELL := env PATH='$(PATH)' GOBIN='$(GO_TOOLS_BIN_PATH)' $(shell which bash) @@ -22,8 +22,6 @@ static: install-tools @ gofmt -s -l -d . 2>&1 | awk '{ print } END { if (NR > 0) { exit 1 } }' @ echo "golangci-lint ..." @ golangci-lint run -c $(ROOT_PATH)/.golangci.yml --verbose ./... --allow-parallel-runners - @ echo "revive ..." - @ revive -formatter friendly -config $(ROOT_PATH)/revive.toml ./... tidy: @ go mod tidy diff --git a/tests/integrations/realcluster/deploy.sh b/tests/integrations/realcluster/deploy.sh index d6cd0b27f72..8cce60e8ee6 100755 --- a/tests/integrations/realcluster/deploy.sh +++ b/tests/integrations/realcluster/deploy.sh @@ -15,10 +15,12 @@ curl --proto '=https' --tlsv1.2 -sSf https://tiup-mirrors.pingcap.com/install.sh $TIUP_BIN_DIR update playground cd ../../.. -if [ ! -d "bin" ] || [ ! -e "bin/tikv-server" ] && [ ! -e "bin/tidb-server" ] && [ ! -e "bin/pd-server" ] && [ ! -e "bin/tiflash" ]; then +if [ ! -d "bin" ] || [ ! -e "bin/tikv-server" ] && [ ! -e "bin/tidb-server" ] && [ ! -e "bin/tiflash" ]; then color-green "downloading binaries..." color-green "this may take a few minutes, you can also download them manually and put them in the bin directory." + make pd-server WITH_RACE=1 $TIUP_BIN_DIR playground nightly --kv 3 --tiflash 1 --db 1 --pd 3 --without-monitor --tag pd_test \ + --pd.binpath ./bin/pd-server \ > $CUR_PATH/playground.log 2>&1 & else color-green "using existing binaries..." diff --git a/tests/integrations/realcluster/mock_db.go b/tests/integrations/realcluster/mock_db.go index 255ff6c0057..2a636b9b86b 100644 --- a/tests/integrations/realcluster/mock_db.go +++ b/tests/integrations/realcluster/mock_db.go @@ -85,7 +85,7 @@ func (db *TestDB) Gorm() *gorm.DB { } // MustExec executes a query -func (db *TestDB) MustExec(sql string, values ...interface{}) { +func (db *TestDB) MustExec(sql string, values ...any) { err := db.inner.Exec(sql, values...).Error db.require.NoError(err) } diff --git a/tests/integrations/tso/client_test.go b/tests/integrations/tso/client_test.go index c8e8f5b2f52..d4f484087cf 100644 --- a/tests/integrations/tso/client_test.go +++ b/tests/integrations/tso/client_test.go @@ -21,6 +21,7 @@ import ( "math/rand" "strings" "sync" + "sync/atomic" "testing" "time" @@ -66,6 +67,10 @@ type tsoClientTestSuite struct { clients []pd.Client } +func (suite *tsoClientTestSuite) getBackendEndpoints() []string { + return strings.Split(suite.backendEndpoints, ",") +} + func TestLegacyTSOClient(t *testing.T) { suite.Run(t, &tsoClientTestSuite{ legacy: true, @@ -98,7 +103,7 @@ func (suite *tsoClientTestSuite) SetupSuite() { suite.keyspaceIDs = make([]uint32, 0) if suite.legacy { - client, err := pd.NewClientWithContext(suite.ctx, strings.Split(suite.backendEndpoints, ","), pd.SecurityOption{}, pd.WithForwardingOption(true)) + client, err := pd.NewClientWithContext(suite.ctx, suite.getBackendEndpoints(), pd.SecurityOption{}, pd.WithForwardingOption(true)) re.NoError(err) innerClient, ok := client.(interface{ GetServiceDiscovery() pd.ServiceDiscovery }) re.True(ok) @@ -173,7 +178,7 @@ func (suite *tsoClientTestSuite) waitForAllKeyspaceGroupsInServing(re *require.A // Create clients and make sure they all have discovered the tso service. suite.clients = mcs.WaitForMultiKeyspacesTSOAvailable( - suite.ctx, re, suite.keyspaceIDs, strings.Split(suite.backendEndpoints, ",")) + suite.ctx, re, suite.keyspaceIDs, suite.getBackendEndpoints()) re.Equal(len(suite.keyspaceIDs), len(suite.clients)) } @@ -183,6 +188,9 @@ func (suite *tsoClientTestSuite) TearDownSuite() { suite.tsoCluster.Destroy() } suite.cluster.Destroy() + for _, client := range suite.clients { + client.Close() + } } func (suite *tsoClientTestSuite) TestGetTS() { @@ -251,7 +259,8 @@ func (suite *tsoClientTestSuite) TestDiscoverTSOServiceWithLegacyPath() { ctx, cancel := context.WithCancel(suite.ctx) defer cancel() client := mcs.SetupClientWithKeyspaceID( - ctx, re, keyspaceID, strings.Split(suite.backendEndpoints, ",")) + ctx, re, keyspaceID, suite.getBackendEndpoints()) + defer client.Close() var lastTS uint64 for j := 0; j < tsoRequestRound; j++ { physical, logical, err := client.GetTS(ctx) @@ -265,10 +274,6 @@ func (suite *tsoClientTestSuite) TestDiscoverTSOServiceWithLegacyPath() { // TestGetMinTS tests the correctness of GetMinTS. func (suite *tsoClientTestSuite) TestGetMinTS() { re := suite.Require() - if !suite.legacy { - suite.waitForAllKeyspaceGroupsInServing(re) - } - var wg sync.WaitGroup wg.Add(tsoRequestConcurrencyNumber * len(suite.clients)) for i := 0; i < tsoRequestConcurrencyNumber; i++ { @@ -420,6 +425,52 @@ func (suite *tsoClientTestSuite) TestRandomShutdown() { re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/tso/fastUpdatePhysicalInterval")) } +func (suite *tsoClientTestSuite) TestGetTSWhileResettingTSOClient() { + re := suite.Require() + re.NoError(failpoint.Enable("github.com/tikv/pd/client/delayDispatchTSORequest", "return(true)")) + var ( + clients []pd.Client + stopSignal atomic.Bool + wg sync.WaitGroup + ) + // Create independent clients to prevent interfering with other tests. + if suite.legacy { + client, err := pd.NewClientWithContext(suite.ctx, suite.getBackendEndpoints(), pd.SecurityOption{}, pd.WithForwardingOption(true)) + re.NoError(err) + clients = []pd.Client{client} + } else { + clients = mcs.WaitForMultiKeyspacesTSOAvailable(suite.ctx, re, suite.keyspaceIDs, suite.getBackendEndpoints()) + } + wg.Add(tsoRequestConcurrencyNumber * len(clients)) + for i := 0; i < tsoRequestConcurrencyNumber; i++ { + for _, client := range clients { + go func(client pd.Client) { + defer wg.Done() + var lastTS uint64 + for !stopSignal.Load() { + physical, logical, err := client.GetTS(suite.ctx) + if err != nil { + re.ErrorContains(err, context.Canceled.Error()) + } else { + ts := tsoutil.ComposeTS(physical, logical) + re.Less(lastTS, ts) + lastTS = ts + } + } + }(client) + } + } + // Reset the TSO clients while requesting TSO concurrently. + for i := 0; i < tsoRequestConcurrencyNumber; i++ { + for _, client := range clients { + client.(interface{ ResetTSOClient() }).ResetTSOClient() + } + } + stopSignal.Store(true) + wg.Wait() + re.NoError(failpoint.Disable("github.com/tikv/pd/client/delayDispatchTSORequest")) +} + // When we upgrade the PD cluster, there may be a period of time that the old and new PDs are running at the same time. func TestMixedTSODeployment(t *testing.T) { re := require.New(t) @@ -491,6 +542,7 @@ func TestUpgradingAPIandTSOClusters(t *testing.T) { pdClient, err := pd.NewClientWithContext(context.Background(), []string{backendEndpoints}, pd.SecurityOption{}, pd.WithMaxErrorRetry(1)) re.NoError(err) + defer pdClient.Close() // Create a TSO cluster which has 2 servers tsoCluster, err := tests.NewTestTSOCluster(ctx, 2, backendEndpoints) diff --git a/tests/integrations/tso/server_test.go b/tests/integrations/tso/server_test.go index ac3d914aa80..5590ba68d37 100644 --- a/tests/integrations/tso/server_test.go +++ b/tests/integrations/tso/server_test.go @@ -152,12 +152,12 @@ func (suite *tsoServerTestSuite) TestConcurrentlyReset() { for i := 0; i < 2; i++ { go func() { defer wg.Done() - for j := 0; j <= 100; j++ { + for j := 0; j <= 50; j++ { // Get a copy of now then call base.add, because now is shared by all goroutines // and now.add() will add to itself which isn't atomic and multi-goroutine safe. base := now - physical := base.Add(time.Duration(2*j)*time.Minute).UnixNano() / int64(time.Millisecond) - ts := uint64(physical << 18) + physical := base.Add(time.Duration(j)*time.Minute).UnixNano() / int64(time.Millisecond) + ts := uint64(physical) << 18 suite.resetTS(ts, false, false) } }() diff --git a/tests/registry/registry_test.go b/tests/registry/registry_test.go index d2661cda616..416a7420d2e 100644 --- a/tests/registry/registry_test.go +++ b/tests/registry/registry_test.go @@ -30,8 +30,8 @@ import ( "github.com/tikv/pd/tests" "go.uber.org/goleak" "google.golang.org/grpc" - - "google.golang.org/grpc/test/grpc_testing" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/interop/grpc_testing" ) func TestMain(m *testing.M) { @@ -41,18 +41,18 @@ func TestMain(m *testing.M) { type testServiceRegistry struct { } -func (t *testServiceRegistry) RegisterGRPCService(g *grpc.Server) { +func (*testServiceRegistry) RegisterGRPCService(g *grpc.Server) { grpc_testing.RegisterTestServiceServer(g, &grpc_testing.UnimplementedTestServiceServer{}) } -func (t *testServiceRegistry) RegisterRESTHandler(userDefineHandlers map[string]http.Handler) { +func (*testServiceRegistry) RegisterRESTHandler(userDefineHandlers map[string]http.Handler) { group := apiutil.APIServiceGroup{ Name: "my-http-service", Version: "v1alpha1", IsCore: false, PathPrefix: "/my-service", } - handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + handler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) w.Write([]byte("Hello World!")) }) @@ -80,7 +80,7 @@ func TestRegistryService(t *testing.T) { leader := cluster.GetLeaderServer() // Test registered GRPC Service - cc, err := grpc.DialContext(ctx, strings.TrimPrefix(leader.GetAddr(), "http://"), grpc.WithInsecure()) + cc, err := grpc.DialContext(ctx, strings.TrimPrefix(leader.GetAddr(), "http://"), grpc.WithTransportCredentials(insecure.NewCredentials())) re.NoError(err) defer cc.Close() grpcClient := grpc_testing.NewTestServiceClient(cc) diff --git a/tests/scheduling_cluster.go b/tests/scheduling_cluster.go index 1768c4128cc..434a6bd9a48 100644 --- a/tests/scheduling_cluster.go +++ b/tests/scheduling_cluster.go @@ -113,7 +113,7 @@ func (tc *TestSchedulingCluster) WaitForPrimaryServing(re *require.Assertions) * } } return false - }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) + }, testutil.WithWaitFor(10*time.Second), testutil.WithTickInterval(50*time.Millisecond)) return primary } diff --git a/tests/server/api/api_test.go b/tests/server/api/api_test.go index c80048b141f..f59e85651f5 100644 --- a/tests/server/api/api_test.go +++ b/tests/server/api/api_test.go @@ -51,7 +51,7 @@ func TestReconnect(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - cluster, err := tests.NewTestCluster(ctx, 3, func(conf *config.Config, serverName string) { + cluster, err := tests.NewTestCluster(ctx, 3, func(conf *config.Config, _ string) { conf.TickInterval = typeutil.Duration{Duration: 50 * time.Millisecond} conf.ElectionInterval = typeutil.Duration{Duration: 250 * time.Millisecond} }) @@ -66,7 +66,7 @@ func TestReconnect(t *testing.T) { re.NotEmpty(leader) for name, s := range cluster.GetServers() { if name != leader { - res, err := http.Get(s.GetConfig().AdvertiseClientUrls + "/pd/api/v1/version") + res, err := tests.TestDialClient.Get(s.GetConfig().AdvertiseClientUrls + "/pd/api/v1/version") re.NoError(err) res.Body.Close() re.Equal(http.StatusOK, res.StatusCode) @@ -83,7 +83,7 @@ func TestReconnect(t *testing.T) { for name, s := range cluster.GetServers() { if name != leader { testutil.Eventually(re, func() bool { - res, err := http.Get(s.GetConfig().AdvertiseClientUrls + "/pd/api/v1/version") + res, err := tests.TestDialClient.Get(s.GetConfig().AdvertiseClientUrls + "/pd/api/v1/version") re.NoError(err) defer res.Body.Close() return res.StatusCode == http.StatusOK @@ -98,7 +98,7 @@ func TestReconnect(t *testing.T) { for name, s := range cluster.GetServers() { if name != leader && name != newLeader { testutil.Eventually(re, func() bool { - res, err := http.Get(s.GetConfig().AdvertiseClientUrls + "/pd/api/v1/version") + res, err := tests.TestDialClient.Get(s.GetConfig().AdvertiseClientUrls + "/pd/api/v1/version") re.NoError(err) defer res.Body.Close() return res.StatusCode == http.StatusServiceUnavailable @@ -142,21 +142,21 @@ func (suite *middlewareTestSuite) TestRequestInfoMiddleware() { leader := suite.cluster.GetLeaderServer() re.NotNil(leader) - input := map[string]interface{}{ + input := map[string]any{ "enable-audit": "true", } data, err := json.Marshal(input) re.NoError(err) req, _ := http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config", bytes.NewBuffer(data)) - resp, err := dialClient.Do(req) + resp, err := tests.TestDialClient.Do(req) re.NoError(err) resp.Body.Close() re.True(leader.GetServer().GetServiceMiddlewarePersistOptions().IsAuditEnabled()) - labels := make(map[string]interface{}) + labels := make(map[string]any) labels["testkey"] = "testvalue" data, _ = json.Marshal(labels) - resp, err = dialClient.Post(leader.GetAddr()+"/pd/api/v1/debug/pprof/profile?force=true", "application/json", bytes.NewBuffer(data)) + resp, err = tests.TestDialClient.Post(leader.GetAddr()+"/pd/api/v1/debug/pprof/profile?seconds=1", "application/json", bytes.NewBuffer(data)) re.NoError(err) _, err = io.ReadAll(resp.Body) resp.Body.Close() @@ -164,25 +164,25 @@ func (suite *middlewareTestSuite) TestRequestInfoMiddleware() { re.Equal(http.StatusOK, resp.StatusCode) re.Equal("Profile", resp.Header.Get("service-label")) - re.Equal("{\"force\":[\"true\"]}", resp.Header.Get("url-param")) + re.Equal("{\"seconds\":[\"1\"]}", resp.Header.Get("url-param")) re.Equal("{\"testkey\":\"testvalue\"}", resp.Header.Get("body-param")) re.Equal("HTTP/1.1/POST:/pd/api/v1/debug/pprof/profile", resp.Header.Get("method")) re.Equal("anonymous", resp.Header.Get("caller-id")) re.Equal("127.0.0.1", resp.Header.Get("ip")) - input = map[string]interface{}{ + input = map[string]any{ "enable-audit": "false", } data, err = json.Marshal(input) re.NoError(err) req, _ = http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config", bytes.NewBuffer(data)) - resp, err = dialClient.Do(req) + resp, err = tests.TestDialClient.Do(req) re.NoError(err) resp.Body.Close() re.False(leader.GetServer().GetServiceMiddlewarePersistOptions().IsAuditEnabled()) header := mustRequestSuccess(re, leader.GetServer()) - re.Equal("", header.Get("service-label")) + re.Equal("GetVersion", header.Get("service-label")) re.NoError(failpoint.Disable("github.com/tikv/pd/server/api/addRequestInfoMiddleware")) } @@ -194,12 +194,12 @@ func BenchmarkDoRequestWithServiceMiddleware(b *testing.B) { cluster.RunInitialServers() cluster.WaitLeader() leader := cluster.GetLeaderServer() - input := map[string]interface{}{ + input := map[string]any{ "enable-audit": "true", } data, _ := json.Marshal(input) req, _ := http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config", bytes.NewBuffer(data)) - resp, _ := dialClient.Do(req) + resp, _ := tests.TestDialClient.Do(req) resp.Body.Close() b.StartTimer() for i := 0; i < b.N; i++ { @@ -213,26 +213,26 @@ func (suite *middlewareTestSuite) TestRateLimitMiddleware() { re := suite.Require() leader := suite.cluster.GetLeaderServer() re.NotNil(leader) - input := map[string]interface{}{ + input := map[string]any{ "enable-rate-limit": "true", } data, err := json.Marshal(input) re.NoError(err) req, _ := http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config", bytes.NewBuffer(data)) - resp, err := dialClient.Do(req) + resp, err := tests.TestDialClient.Do(req) re.NoError(err) resp.Body.Close() re.True(leader.GetServer().GetServiceMiddlewarePersistOptions().IsRateLimitEnabled()) // returns StatusOK when no rate-limit config req, _ = http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/admin/log", strings.NewReader("\"info\"")) - resp, err = dialClient.Do(req) + resp, err = tests.TestDialClient.Do(req) re.NoError(err) _, err = io.ReadAll(resp.Body) resp.Body.Close() re.NoError(err) re.Equal(http.StatusOK, resp.StatusCode) - input = make(map[string]interface{}) + input = make(map[string]any) input["type"] = "label" input["label"] = "SetLogLevel" input["qps"] = 0.5 @@ -240,7 +240,7 @@ func (suite *middlewareTestSuite) TestRateLimitMiddleware() { jsonBody, err := json.Marshal(input) re.NoError(err) req, _ = http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config/rate-limit", bytes.NewBuffer(jsonBody)) - resp, err = dialClient.Do(req) + resp, err = tests.TestDialClient.Do(req) re.NoError(err) _, err = io.ReadAll(resp.Body) resp.Body.Close() @@ -249,7 +249,7 @@ func (suite *middlewareTestSuite) TestRateLimitMiddleware() { for i := 0; i < 3; i++ { req, _ = http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/admin/log", strings.NewReader("\"info\"")) - resp, err = dialClient.Do(req) + resp, err = tests.TestDialClient.Do(req) re.NoError(err) data, err := io.ReadAll(resp.Body) resp.Body.Close() @@ -266,7 +266,7 @@ func (suite *middlewareTestSuite) TestRateLimitMiddleware() { time.Sleep(time.Second * 2) for i := 0; i < 2; i++ { req, _ = http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/admin/log", strings.NewReader("\"info\"")) - resp, err = dialClient.Do(req) + resp, err = tests.TestDialClient.Do(req) re.NoError(err) data, err := io.ReadAll(resp.Body) resp.Body.Close() @@ -283,7 +283,7 @@ func (suite *middlewareTestSuite) TestRateLimitMiddleware() { time.Sleep(time.Second) for i := 0; i < 2; i++ { req, _ = http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/admin/log", strings.NewReader("\"info\"")) - resp, err = dialClient.Do(req) + resp, err = tests.TestDialClient.Do(req) re.NoError(err) data, err := io.ReadAll(resp.Body) resp.Body.Close() @@ -310,7 +310,7 @@ func (suite *middlewareTestSuite) TestRateLimitMiddleware() { for i := 0; i < 3; i++ { req, _ = http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/admin/log", strings.NewReader("\"info\"")) - resp, err = dialClient.Do(req) + resp, err = tests.TestDialClient.Do(req) re.NoError(err) data, err := io.ReadAll(resp.Body) resp.Body.Close() @@ -327,7 +327,7 @@ func (suite *middlewareTestSuite) TestRateLimitMiddleware() { time.Sleep(time.Second * 2) for i := 0; i < 2; i++ { req, _ = http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/admin/log", strings.NewReader("\"info\"")) - resp, err = dialClient.Do(req) + resp, err = tests.TestDialClient.Do(req) re.NoError(err) data, err := io.ReadAll(resp.Body) resp.Body.Close() @@ -344,7 +344,7 @@ func (suite *middlewareTestSuite) TestRateLimitMiddleware() { time.Sleep(time.Second) for i := 0; i < 2; i++ { req, _ = http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/admin/log", strings.NewReader("\"info\"")) - resp, err = dialClient.Do(req) + resp, err = tests.TestDialClient.Do(req) re.NoError(err) data, err := io.ReadAll(resp.Body) resp.Body.Close() @@ -353,26 +353,38 @@ func (suite *middlewareTestSuite) TestRateLimitMiddleware() { re.Equal(string(data), fmt.Sprintf("%s\n", http.StatusText(http.StatusTooManyRequests))) } - input = map[string]interface{}{ + input = map[string]any{ "enable-rate-limit": "false", } data, err = json.Marshal(input) re.NoError(err) req, _ = http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config", bytes.NewBuffer(data)) - resp, err = dialClient.Do(req) + resp, err = tests.TestDialClient.Do(req) re.NoError(err) resp.Body.Close() re.False(leader.GetServer().GetServiceMiddlewarePersistOptions().IsRateLimitEnabled()) for i := 0; i < 3; i++ { req, _ = http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/admin/log", strings.NewReader("\"info\"")) - resp, err = dialClient.Do(req) + resp, err = tests.TestDialClient.Do(req) re.NoError(err) _, err = io.ReadAll(resp.Body) resp.Body.Close() re.NoError(err) re.Equal(http.StatusOK, resp.StatusCode) } + + // reset rate limit + input = map[string]any{ + "enable-rate-limit": "true", + } + data, err = json.Marshal(input) + re.NoError(err) + req, _ = http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config", bytes.NewBuffer(data)) + resp, err = tests.TestDialClient.Do(req) + re.NoError(err) + resp.Body.Close() + re.True(leader.GetServer().GetServiceMiddlewarePersistOptions().IsRateLimitEnabled()) } func (suite *middlewareTestSuite) TestSwaggerUrl() { @@ -380,7 +392,7 @@ func (suite *middlewareTestSuite) TestSwaggerUrl() { leader := suite.cluster.GetLeaderServer() re.NotNil(leader) req, _ := http.NewRequest(http.MethodGet, leader.GetAddr()+"/swagger/ui/index", http.NoBody) - resp, err := dialClient.Do(req) + resp, err := tests.TestDialClient.Do(req) re.NoError(err) re.Equal(http.StatusNotFound, resp.StatusCode) resp.Body.Close() @@ -390,26 +402,26 @@ func (suite *middlewareTestSuite) TestAuditPrometheusBackend() { re := suite.Require() leader := suite.cluster.GetLeaderServer() re.NotNil(leader) - input := map[string]interface{}{ + input := map[string]any{ "enable-audit": "true", } data, err := json.Marshal(input) re.NoError(err) req, _ := http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config", bytes.NewBuffer(data)) - resp, err := dialClient.Do(req) + resp, err := tests.TestDialClient.Do(req) re.NoError(err) resp.Body.Close() re.True(leader.GetServer().GetServiceMiddlewarePersistOptions().IsAuditEnabled()) timeUnix := time.Now().Unix() - 20 req, _ = http.NewRequest(http.MethodGet, fmt.Sprintf("%s/pd/api/v1/trend?from=%d", leader.GetAddr(), timeUnix), http.NoBody) - resp, err = dialClient.Do(req) + resp, err = tests.TestDialClient.Do(req) re.NoError(err) _, err = io.ReadAll(resp.Body) resp.Body.Close() re.NoError(err) req, _ = http.NewRequest(http.MethodGet, leader.GetAddr()+"/metrics", http.NoBody) - resp, err = dialClient.Do(req) + resp, err = tests.TestDialClient.Do(req) re.NoError(err) defer resp.Body.Close() content, _ := io.ReadAll(resp.Body) @@ -428,27 +440,27 @@ func (suite *middlewareTestSuite) TestAuditPrometheusBackend() { timeUnix = time.Now().Unix() - 20 req, _ = http.NewRequest(http.MethodGet, fmt.Sprintf("%s/pd/api/v1/trend?from=%d", leader.GetAddr(), timeUnix), http.NoBody) - resp, err = dialClient.Do(req) + resp, err = tests.TestDialClient.Do(req) re.NoError(err) _, err = io.ReadAll(resp.Body) resp.Body.Close() re.NoError(err) req, _ = http.NewRequest(http.MethodGet, leader.GetAddr()+"/metrics", http.NoBody) - resp, err = dialClient.Do(req) + resp, err = tests.TestDialClient.Do(req) re.NoError(err) defer resp.Body.Close() content, _ = io.ReadAll(resp.Body) output = string(content) re.Contains(output, "pd_service_audit_handling_seconds_count{caller_id=\"anonymous\",ip=\"127.0.0.1\",method=\"HTTP\",service=\"GetTrend\"} 2") - input = map[string]interface{}{ + input = map[string]any{ "enable-audit": "false", } data, err = json.Marshal(input) re.NoError(err) req, _ = http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config", bytes.NewBuffer(data)) - resp, err = dialClient.Do(req) + resp, err = tests.TestDialClient.Do(req) re.NoError(err) resp.Body.Close() re.False(leader.GetServer().GetServiceMiddlewarePersistOptions().IsAuditEnabled()) @@ -460,19 +472,19 @@ func (suite *middlewareTestSuite) TestAuditLocalLogBackend() { defer os.RemoveAll(fname) leader := suite.cluster.GetLeaderServer() re.NotNil(leader) - input := map[string]interface{}{ + input := map[string]any{ "enable-audit": "true", } data, err := json.Marshal(input) re.NoError(err) req, _ := http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config", bytes.NewBuffer(data)) - resp, err := dialClient.Do(req) + resp, err := tests.TestDialClient.Do(req) re.NoError(err) resp.Body.Close() re.True(leader.GetServer().GetServiceMiddlewarePersistOptions().IsAuditEnabled()) req, _ = http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/admin/log", strings.NewReader("\"info\"")) - resp, err = dialClient.Do(req) + resp, err = tests.TestDialClient.Do(req) re.NoError(err) _, err = io.ReadAll(resp.Body) resp.Body.Close() @@ -489,12 +501,12 @@ func BenchmarkDoRequestWithLocalLogAudit(b *testing.B) { cluster.RunInitialServers() cluster.WaitLeader() leader := cluster.GetLeaderServer() - input := map[string]interface{}{ + input := map[string]any{ "enable-audit": "true", } data, _ := json.Marshal(input) req, _ := http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config", bytes.NewBuffer(data)) - resp, _ := dialClient.Do(req) + resp, _ := tests.TestDialClient.Do(req) resp.Body.Close() b.StartTimer() for i := 0; i < b.N; i++ { @@ -511,12 +523,12 @@ func BenchmarkDoRequestWithPrometheusAudit(b *testing.B) { cluster.RunInitialServers() cluster.WaitLeader() leader := cluster.GetLeaderServer() - input := map[string]interface{}{ + input := map[string]any{ "enable-audit": "true", } data, _ := json.Marshal(input) req, _ := http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config", bytes.NewBuffer(data)) - resp, _ := dialClient.Do(req) + resp, _ := tests.TestDialClient.Do(req) resp.Body.Close() b.StartTimer() for i := 0; i < b.N; i++ { @@ -533,12 +545,12 @@ func BenchmarkDoRequestWithoutServiceMiddleware(b *testing.B) { cluster.RunInitialServers() cluster.WaitLeader() leader := cluster.GetLeaderServer() - input := map[string]interface{}{ + input := map[string]any{ "enable-audit": "false", } data, _ := json.Marshal(input) req, _ := http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config", bytes.NewBuffer(data)) - resp, _ := dialClient.Do(req) + resp, _ := tests.TestDialClient.Do(req) resp.Body.Close() b.StartTimer() for i := 0; i < b.N; i++ { @@ -551,7 +563,7 @@ func BenchmarkDoRequestWithoutServiceMiddleware(b *testing.B) { func doTestRequestWithLogAudit(srv *tests.TestServer) { req, _ := http.NewRequest(http.MethodDelete, fmt.Sprintf("%s/pd/api/v1/admin/cache/regions", srv.GetAddr()), http.NoBody) req.Header.Set(apiutil.XCallerIDHeader, "test") - resp, _ := dialClient.Do(req) + resp, _ := tests.TestDialClient.Do(req) resp.Body.Close() } @@ -559,7 +571,7 @@ func doTestRequestWithPrometheus(srv *tests.TestServer) { timeUnix := time.Now().Unix() - 20 req, _ := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/pd/api/v1/trend?from=%d", srv.GetAddr(), timeUnix), http.NoBody) req.Header.Set(apiutil.XCallerIDHeader, "test") - resp, _ := dialClient.Do(req) + resp, _ := tests.TestDialClient.Do(req) resp.Body.Close() } @@ -577,7 +589,7 @@ func (suite *redirectorTestSuite) SetupSuite() { re := suite.Require() ctx, cancel := context.WithCancel(context.Background()) suite.cleanup = cancel - cluster, err := tests.NewTestCluster(ctx, 3, func(conf *config.Config, serverName string) { + cluster, err := tests.NewTestCluster(ctx, 3, func(conf *config.Config, _ string) { conf.TickInterval = typeutil.Duration{Duration: 50 * time.Millisecond} conf.ElectionInterval = typeutil.Duration{Duration: 250 * time.Millisecond} }) @@ -605,6 +617,24 @@ func (suite *redirectorTestSuite) TestRedirect() { re.Equal(h, header) } } + // Test redirect during leader election. + leader = suite.cluster.GetLeaderServer() + re.NotNil(leader) + err := leader.ResignLeader() + re.NoError(err) + for _, svr := range suite.cluster.GetServers() { + url := fmt.Sprintf("%s/pd/api/v1/version", svr.GetServer().GetAddr()) + testutil.Eventually(re, func() bool { + resp, err := tests.TestDialClient.Get(url) + re.NoError(err) + defer resp.Body.Close() + _, err = io.ReadAll(resp.Body) + re.NoError(err) + // Should not meet 503 since the retry logic ensure the request is sent to the new leader eventually. + re.NotEqual(http.StatusServiceUnavailable, resp.StatusCode) + return resp.StatusCode == http.StatusOK + }) + } } func (suite *redirectorTestSuite) TestAllowFollowerHandle() { @@ -623,7 +653,7 @@ func (suite *redirectorTestSuite) TestAllowFollowerHandle() { request, err := http.NewRequest(http.MethodGet, addr, http.NoBody) re.NoError(err) request.Header.Add(apiutil.PDAllowFollowerHandleHeader, "true") - resp, err := dialClient.Do(request) + resp, err := tests.TestDialClient.Do(request) re.NoError(err) re.Equal("", resp.Header.Get(apiutil.PDRedirectorHeader)) defer resp.Body.Close() @@ -648,7 +678,7 @@ func (suite *redirectorTestSuite) TestNotLeader() { // Request to follower without redirectorHeader is OK. request, err := http.NewRequest(http.MethodGet, addr, http.NoBody) re.NoError(err) - resp, err := dialClient.Do(request) + resp, err := tests.TestDialClient.Do(request) re.NoError(err) defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) @@ -658,7 +688,7 @@ func (suite *redirectorTestSuite) TestNotLeader() { // Request to follower with redirectorHeader will fail. request.RequestURI = "" request.Header.Set(apiutil.PDRedirectorHeader, "pd") - resp1, err := dialClient.Do(request) + resp1, err := tests.TestDialClient.Do(request) re.NoError(err) defer resp1.Body.Close() re.NotEqual(http.StatusOK, resp1.StatusCode) @@ -677,7 +707,7 @@ func (suite *redirectorTestSuite) TestXForwardedFor() { addr := follower.GetAddr() + "/pd/api/v1/regions" request, err := http.NewRequest(http.MethodGet, addr, http.NoBody) re.NoError(err) - resp, err := dialClient.Do(request) + resp, err := tests.TestDialClient.Do(request) re.NoError(err) defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) @@ -689,7 +719,7 @@ func (suite *redirectorTestSuite) TestXForwardedFor() { } func mustRequestSuccess(re *require.Assertions, s *server.Server) http.Header { - resp, err := dialClient.Get(s.GetAddr() + "/pd/api/v1/version") + resp, err := tests.TestDialClient.Get(s.GetAddr() + "/pd/api/v1/version") re.NoError(err) defer resp.Body.Close() _, err = io.ReadAll(resp.Body) @@ -703,7 +733,7 @@ func TestRemovingProgress(t *testing.T) { re.NoError(failpoint.Enable("github.com/tikv/pd/server/cluster/highFrequencyClusterJobs", `return(true)`)) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, serverName string) { + cluster, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, _ string) { conf.Replication.MaxReplicas = 1 }) re.NoError(err) @@ -755,9 +785,9 @@ func TestRemovingProgress(t *testing.T) { // no store removing output := sendRequest(re, leader.GetAddr()+"/pd/api/v1/stores/progress?action=removing", http.MethodGet, http.StatusNotFound) - re.Contains((string(output)), "no progress found for the action") + re.Contains(string(output), "no progress found for the action") output = sendRequest(re, leader.GetAddr()+"/pd/api/v1/stores/progress?id=2", http.MethodGet, http.StatusNotFound) - re.Contains((string(output)), "no progress found for the given store ID") + re.Contains(string(output), "no progress found for the given store ID") // remove store 1 and store 2 _ = sendRequest(re, leader.GetAddr()+"/pd/api/v1/store/1", http.MethodDelete, http.StatusOK) @@ -776,32 +806,69 @@ func TestRemovingProgress(t *testing.T) { tests.MustPutRegion(re, cluster, 1000, 1, []byte("a"), []byte("b"), core.SetApproximateSize(20)) tests.MustPutRegion(re, cluster, 1001, 2, []byte("c"), []byte("d"), core.SetApproximateSize(10)) - // is not prepared - time.Sleep(2 * time.Second) - output = sendRequest(re, leader.GetAddr()+"/pd/api/v1/stores/progress?action=removing", http.MethodGet, http.StatusOK) - re.NoError(json.Unmarshal(output, &p)) - re.Equal("removing", p.Action) - re.Equal(0.0, p.Progress) - re.Equal(0.0, p.CurrentSpeed) - re.Equal(math.MaxFloat64, p.LeftSeconds) + if !leader.GetRaftCluster().IsPrepared() { + testutil.Eventually(re, func() bool { + if leader.GetRaftCluster().IsPrepared() { + return true + } + url := leader.GetAddr() + "/pd/api/v1/stores/progress?action=removing" + req, _ := http.NewRequest(http.MethodGet, url, http.NoBody) + resp, err := tests.TestDialClient.Do(req) + re.NoError(err) + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return false + } + // is not prepared + re.NoError(json.Unmarshal(output, &p)) + re.Equal("removing", p.Action) + re.Equal(0.0, p.Progress) + re.Equal(0.0, p.CurrentSpeed) + re.Equal(math.MaxFloat64, p.LeftSeconds) + return true + }) + } - leader.GetRaftCluster().SetPrepared() - time.Sleep(2 * time.Second) - output = sendRequest(re, leader.GetAddr()+"/pd/api/v1/stores/progress?action=removing", http.MethodGet, http.StatusOK) - re.NoError(json.Unmarshal(output, &p)) - re.Equal("removing", p.Action) - // store 1: (60-20)/(60+50) ~= 0.36 - // store 2: (30-10)/(30+40) ~= 0.28 - // average progress ~= (0.36+0.28)/2 = 0.32 - re.Equal("0.32", fmt.Sprintf("%.2f", p.Progress)) - // store 1: 40/10s = 4 - // store 2: 20/10s = 2 - // average speed = (2+4)/2 = 33 - re.Equal(3.0, p.CurrentSpeed) - // store 1: (20+50)/4 = 17.5s - // store 2: (10+40)/2 = 25s - // average time = (17.5+25)/2 = 21.25s - re.Equal(21.25, p.LeftSeconds) + testutil.Eventually(re, func() bool { + // wait for cluster prepare + if !leader.GetRaftCluster().IsPrepared() { + leader.GetRaftCluster().SetPrepared() + return false + } + url := leader.GetAddr() + "/pd/api/v1/stores/progress?action=removing" + req, _ := http.NewRequest(http.MethodGet, url, http.NoBody) + resp, err := tests.TestDialClient.Do(req) + re.NoError(err) + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return false + } + output, err := io.ReadAll(resp.Body) + re.NoError(err) + re.NoError(json.Unmarshal(output, &p)) + if p.Action != "removing" { + return false + } + // store 1: (60-20)/(60+50) ~= 0.36 + // store 2: (30-10)/(30+40) ~= 0.28 + // average progress ~= (0.36+0.28)/2 = 0.32 + if fmt.Sprintf("%.2f", p.Progress) != "0.32" { + return false + } + // store 1: 40/10s = 4 + // store 2: 20/10s = 2 + // average speed = (2+4)/2 = 33 + if p.CurrentSpeed != 3.0 { + return false + } + // store 1: (20+50)/4 = 17.5s + // store 2: (10+40)/2 = 25s + // average time = (17.5+25)/2 = 21.25s + if p.LeftSeconds != 21.25 { + return false + } + return true + }) output = sendRequest(re, leader.GetAddr()+"/pd/api/v1/stores/progress?id=2", http.MethodGet, http.StatusOK) re.NoError(json.Unmarshal(output, &p)) @@ -820,7 +887,7 @@ func TestSendApiWhenRestartRaftCluster(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - cluster, err := tests.NewTestCluster(ctx, 3, func(conf *config.Config, serverName string) { + cluster, err := tests.NewTestCluster(ctx, 3, func(conf *config.Config, _ string) { conf.Replication.MaxReplicas = 1 }) re.NoError(err) @@ -862,7 +929,7 @@ func TestPreparingProgress(t *testing.T) { re.NoError(failpoint.Enable("github.com/tikv/pd/server/cluster/highFrequencyClusterJobs", `return(true)`)) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, serverName string) { + cluster, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, _ string) { conf.Replication.MaxReplicas = 1 }) re.NoError(err) @@ -920,56 +987,103 @@ func TestPreparingProgress(t *testing.T) { StartTimestamp: time.Now().UnixNano() - 100, }, } - - for _, store := range stores { + // store 4 and store 5 are preparing state while store 1, store 2 and store 3 are state serving state + for _, store := range stores[:2] { tests.MustPutStore(re, cluster, store) } - for i := 0; i < 100; i++ { + for i := 0; i < core.InitClusterRegionThreshold; i++ { tests.MustPutRegion(re, cluster, uint64(i+1), uint64(i)%3+1, []byte(fmt.Sprintf("%20d", i)), []byte(fmt.Sprintf("%20d", i+1)), core.SetApproximateSize(10)) } + testutil.Eventually(re, func() bool { + return leader.GetRaftCluster().GetTotalRegionCount() == core.InitClusterRegionThreshold + }) + // to avoid forcing the store to the `serving` state with too few regions + for _, store := range stores[2:] { + tests.MustPutStore(re, cluster, store) + } // no store preparing output := sendRequest(re, leader.GetAddr()+"/pd/api/v1/stores/progress?action=preparing", http.MethodGet, http.StatusNotFound) - re.Contains((string(output)), "no progress found for the action") - output = sendRequest(re, leader.GetAddr()+"/pd/api/v1/stores/progress?id=4", http.MethodGet, http.StatusNotFound) - re.Contains((string(output)), "no progress found for the given store ID") - - // is not prepared - time.Sleep(2 * time.Second) - output = sendRequest(re, leader.GetAddr()+"/pd/api/v1/stores/progress?action=preparing", http.MethodGet, http.StatusNotFound) - re.Contains((string(output)), "no progress found for the action") + re.Contains(string(output), "no progress found for the action") output = sendRequest(re, leader.GetAddr()+"/pd/api/v1/stores/progress?id=4", http.MethodGet, http.StatusNotFound) - re.Contains((string(output)), "no progress found for the given store ID") + re.Contains(string(output), "no progress found for the given store ID") + + if !leader.GetRaftCluster().IsPrepared() { + testutil.Eventually(re, func() bool { + if leader.GetRaftCluster().IsPrepared() { + return true + } + url := leader.GetAddr() + "/pd/api/v1/stores/progress?action=preparing" + req, _ := http.NewRequest(http.MethodGet, url, http.NoBody) + resp, err := tests.TestDialClient.Do(req) + re.NoError(err) + defer resp.Body.Close() + if resp.StatusCode != http.StatusNotFound { + return false + } + // is not prepared + output, err := io.ReadAll(resp.Body) + re.NoError(err) + re.Contains(string(output), "no progress found for the action") + output = sendRequest(re, leader.GetAddr()+"/pd/api/v1/stores/progress?id=4", http.MethodGet, http.StatusNotFound) + re.Contains(string(output), "no progress found for the given store ID") + return true + }) + } - // size is not changed. - leader.GetRaftCluster().SetPrepared() - time.Sleep(2 * time.Second) - output = sendRequest(re, leader.GetAddr()+"/pd/api/v1/stores/progress?action=preparing", http.MethodGet, http.StatusOK) var p api.Progress - re.NoError(json.Unmarshal(output, &p)) - re.Equal("preparing", p.Action) - re.Equal(0.0, p.Progress) - re.Equal(0.0, p.CurrentSpeed) - re.Equal(math.MaxFloat64, p.LeftSeconds) + testutil.Eventually(re, func() bool { + // wait for cluster prepare + if !leader.GetRaftCluster().IsPrepared() { + leader.GetRaftCluster().SetPrepared() + return false + } + url := leader.GetAddr() + "/pd/api/v1/stores/progress?action=preparing" + req, _ := http.NewRequest(http.MethodGet, url, http.NoBody) + resp, err := tests.TestDialClient.Do(req) + re.NoError(err) + defer resp.Body.Close() + output, err := io.ReadAll(resp.Body) + re.NoError(err) + if resp.StatusCode != http.StatusOK { + return false + } + re.NoError(json.Unmarshal(output, &p)) + re.Equal("preparing", p.Action) + re.Equal(0.0, p.Progress) + re.Equal(0.0, p.CurrentSpeed) + re.Equal(math.MaxFloat64, p.LeftSeconds) + return true + }) // update size tests.MustPutRegion(re, cluster, 1000, 4, []byte(fmt.Sprintf("%20d", 1000)), []byte(fmt.Sprintf("%20d", 1001)), core.SetApproximateSize(10)) tests.MustPutRegion(re, cluster, 1001, 5, []byte(fmt.Sprintf("%20d", 1001)), []byte(fmt.Sprintf("%20d", 1002)), core.SetApproximateSize(40)) - time.Sleep(2 * time.Second) - output = sendRequest(re, leader.GetAddr()+"/pd/api/v1/stores/progress?action=preparing", http.MethodGet, http.StatusOK) - re.NoError(json.Unmarshal(output, &p)) - re.Equal("preparing", p.Action) - // store 4: 10/(210*0.9) ~= 0.05 - // store 5: 40/(210*0.9) ~= 0.21 - // average progress ~= (0.05+0.21)/2 = 0.13 - re.Equal("0.13", fmt.Sprintf("%.2f", p.Progress)) - // store 4: 10/10s = 1 - // store 5: 40/10s = 4 - // average speed = (1+4)/2 = 2.5 - re.Equal(2.5, p.CurrentSpeed) - // store 4: 179/1 ~= 179 - // store 5: 149/4 ~= 37.25 - // average time ~= (179+37.25)/2 = 108.125 - re.Equal(108.125, p.LeftSeconds) + testutil.Eventually(re, func() bool { + output := sendRequest(re, leader.GetAddr()+"/pd/api/v1/stores/progress?action=preparing", http.MethodGet, http.StatusOK) + re.NoError(json.Unmarshal(output, &p)) + if p.Action != "preparing" { + return false + } + // store 4: 10/(210*0.9) ~= 0.05 + // store 5: 40/(210*0.9) ~= 0.21 + // average progress ~= (0.05+0.21)/2 = 0.13 + if fmt.Sprintf("%.2f", p.Progress) != "0.13" { + return false + } + // store 4: 10/10s = 1 + // store 5: 40/10s = 4 + // average speed = (1+4)/2 = 2.5 + if p.CurrentSpeed != 2.5 { + return false + } + // store 4: 179/1 ~= 179 + // store 5: 149/4 ~= 37.25 + // average time ~= (179+37.25)/2 = 108.125 + if p.LeftSeconds != 108.125 { + return false + } + return true + }) output = sendRequest(re, leader.GetAddr()+"/pd/api/v1/stores/progress?id=4", http.MethodGet, http.StatusOK) re.NoError(json.Unmarshal(output, &p)) @@ -982,7 +1096,7 @@ func TestPreparingProgress(t *testing.T) { func sendRequest(re *require.Assertions, url string, method string, statusCode int) []byte { req, _ := http.NewRequest(method, url, http.NoBody) - resp, err := dialClient.Do(req) + resp, err := tests.TestDialClient.Do(req) re.NoError(err) re.Equal(statusCode, resp.StatusCode) output, err := io.ReadAll(resp.Body) diff --git a/tests/server/api/checker_test.go b/tests/server/api/checker_test.go index 884772bba97..54298b405f1 100644 --- a/tests/server/api/checker_test.go +++ b/tests/server/api/checker_test.go @@ -49,7 +49,7 @@ func (suite *checkerTestSuite) TestAPI() { func (suite *checkerTestSuite) checkAPI(cluster *tests.TestCluster) { re := suite.Require() - suite.testErrCases(re, cluster) + testErrCases(re, cluster) testCases := []struct { name string @@ -62,25 +62,25 @@ func (suite *checkerTestSuite) checkAPI(cluster *tests.TestCluster) { {name: "joint-state"}, } for _, testCase := range testCases { - suite.testGetStatus(re, cluster, testCase.name) - suite.testPauseOrResume(re, cluster, testCase.name) + testGetStatus(re, cluster, testCase.name) + testPauseOrResume(re, cluster, testCase.name) } } -func (suite *checkerTestSuite) testErrCases(re *require.Assertions, cluster *tests.TestCluster) { +func testErrCases(re *require.Assertions, cluster *tests.TestCluster) { urlPrefix := fmt.Sprintf("%s/pd/api/v1/checker", cluster.GetLeaderServer().GetAddr()) // missing args - input := make(map[string]interface{}) + input := make(map[string]any) pauseArgs, err := json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/merge", pauseArgs, tu.StatusNotOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/merge", pauseArgs, tu.StatusNotOK(re)) re.NoError(err) // negative delay input["delay"] = -10 pauseArgs, err = json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/merge", pauseArgs, tu.StatusNotOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/merge", pauseArgs, tu.StatusNotOK(re)) re.NoError(err) // wrong name @@ -88,83 +88,83 @@ func (suite *checkerTestSuite) testErrCases(re *require.Assertions, cluster *tes input["delay"] = 30 pauseArgs, err = json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/"+name, pauseArgs, tu.StatusNotOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/"+name, pauseArgs, tu.StatusNotOK(re)) re.NoError(err) input["delay"] = 0 pauseArgs, err = json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/"+name, pauseArgs, tu.StatusNotOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/"+name, pauseArgs, tu.StatusNotOK(re)) re.NoError(err) } -func (suite *checkerTestSuite) testGetStatus(re *require.Assertions, cluster *tests.TestCluster, name string) { - input := make(map[string]interface{}) +func testGetStatus(re *require.Assertions, cluster *tests.TestCluster, name string) { + input := make(map[string]any) urlPrefix := fmt.Sprintf("%s/pd/api/v1/checker", cluster.GetLeaderServer().GetAddr()) // normal run - resp := make(map[string]interface{}) - err := tu.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) + resp := make(map[string]any) + err := tu.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) re.NoError(err) re.False(resp["paused"].(bool)) // paused input["delay"] = 30 pauseArgs, err := json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/"+name, pauseArgs, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/"+name, pauseArgs, tu.StatusOK(re)) re.NoError(err) - resp = make(map[string]interface{}) - err = tu.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) + resp = make(map[string]any) + err = tu.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) re.NoError(err) re.True(resp["paused"].(bool)) // resumed input["delay"] = 0 pauseArgs, err = json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/"+name, pauseArgs, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/"+name, pauseArgs, tu.StatusOK(re)) re.NoError(err) time.Sleep(time.Second) - resp = make(map[string]interface{}) - err = tu.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) + resp = make(map[string]any) + err = tu.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) re.NoError(err) re.False(resp["paused"].(bool)) } -func (suite *checkerTestSuite) testPauseOrResume(re *require.Assertions, cluster *tests.TestCluster, name string) { - input := make(map[string]interface{}) +func testPauseOrResume(re *require.Assertions, cluster *tests.TestCluster, name string) { + input := make(map[string]any) urlPrefix := fmt.Sprintf("%s/pd/api/v1/checker", cluster.GetLeaderServer().GetAddr()) - resp := make(map[string]interface{}) + resp := make(map[string]any) // test pause. input["delay"] = 30 pauseArgs, err := json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/"+name, pauseArgs, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/"+name, pauseArgs, tu.StatusOK(re)) re.NoError(err) - err = tu.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) + err = tu.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) re.NoError(err) re.True(resp["paused"].(bool)) input["delay"] = 1 pauseArgs, err = json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/"+name, pauseArgs, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/"+name, pauseArgs, tu.StatusOK(re)) re.NoError(err) time.Sleep(time.Second) - err = tu.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) + err = tu.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) re.NoError(err) re.False(resp["paused"].(bool)) // test resume. - input = make(map[string]interface{}) + input = make(map[string]any) input["delay"] = 30 pauseArgs, err = json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/"+name, pauseArgs, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/"+name, pauseArgs, tu.StatusOK(re)) re.NoError(err) input["delay"] = 0 pauseArgs, err = json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/"+name, pauseArgs, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/"+name, pauseArgs, tu.StatusOK(re)) re.NoError(err) - err = tu.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) + err = tu.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s/%s", urlPrefix, name), &resp) re.NoError(err) re.False(resp["paused"].(bool)) } diff --git a/tests/server/api/operator_test.go b/tests/server/api/operator_test.go index cd3f2ac34dc..c3b86f9fde0 100644 --- a/tests/server/api/operator_test.go +++ b/tests/server/api/operator_test.go @@ -18,7 +18,6 @@ import ( "encoding/json" "errors" "fmt" - "net/http" "sort" "strconv" "strings" @@ -26,7 +25,6 @@ import ( "time" "github.com/pingcap/kvproto/pkg/metapb" - "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/schedule/operator" @@ -36,15 +34,6 @@ import ( "github.com/tikv/pd/tests" ) -var ( - // testDialClient used to dial http request. only used for test. - testDialClient = &http.Client{ - Transport: &http.Transport{ - DisableKeepAlives: true, - }, - } -) - type operatorTestSuite struct { suite.Suite env *tests.SchedulingTestEnvironment @@ -56,7 +45,7 @@ func TestOperatorTestSuite(t *testing.T) { func (suite *operatorTestSuite) SetupSuite() { suite.env = tests.NewSchedulingTestEnvironment(suite.T(), - func(conf *config.Config, serverName string) { + func(conf *config.Config, _ string) { conf.Replication.MaxReplicas = 1 }) } @@ -71,7 +60,7 @@ func (suite *operatorTestSuite) TestAddRemovePeer() { func (suite *operatorTestSuite) checkAddRemovePeer(cluster *tests.TestCluster) { re := suite.Require() - suite.pauseRuleChecker(re, cluster) + pauseAllCheckers(re, cluster) stores := []*metapb.Store{ { Id: 1, @@ -113,35 +102,35 @@ func (suite *operatorTestSuite) checkAddRemovePeer(cluster *tests.TestCluster) { urlPrefix := fmt.Sprintf("%s/pd/api/v1", cluster.GetLeaderServer().GetAddr()) regionURL := fmt.Sprintf("%s/operators/%d", urlPrefix, region.GetId()) - err := tu.CheckGetJSON(testDialClient, regionURL, nil, + err := tu.CheckGetJSON(tests.TestDialClient, regionURL, nil, tu.StatusNotOK(re), tu.StringContain(re, "operator not found")) re.NoError(err) recordURL := fmt.Sprintf("%s/operators/records?from=%s", urlPrefix, strconv.FormatInt(time.Now().Unix(), 10)) - err = tu.CheckGetJSON(testDialClient, recordURL, nil, + err = tu.CheckGetJSON(tests.TestDialClient, recordURL, nil, tu.StatusNotOK(re), tu.StringContain(re, "operator not found")) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"add-peer", "region_id": 1, "store_id": 3}`), tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"add-peer", "region_id": 1, "store_id": 3}`), tu.StatusOK(re)) re.NoError(err) - err = tu.CheckGetJSON(testDialClient, regionURL, nil, + err = tu.CheckGetJSON(tests.TestDialClient, regionURL, nil, tu.StatusOK(re), tu.StringContain(re, "add learner peer 1 on store 3"), tu.StringContain(re, "RUNNING")) re.NoError(err) - err = tu.CheckDelete(testDialClient, regionURL, tu.StatusOK(re)) + err = tu.CheckDelete(tests.TestDialClient, regionURL, tu.StatusOK(re)) re.NoError(err) - err = tu.CheckGetJSON(testDialClient, recordURL, nil, + err = tu.CheckGetJSON(tests.TestDialClient, recordURL, nil, tu.StatusOK(re), tu.StringContain(re, "admin-add-peer {add peer: store [3]}")) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"remove-peer", "region_id": 1, "store_id": 2}`), tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"remove-peer", "region_id": 1, "store_id": 2}`), tu.StatusOK(re)) re.NoError(err) - err = tu.CheckGetJSON(testDialClient, regionURL, nil, + err = tu.CheckGetJSON(tests.TestDialClient, regionURL, nil, tu.StatusOK(re), tu.StringContain(re, "remove peer on store 2"), tu.StringContain(re, "RUNNING")) re.NoError(err) - err = tu.CheckDelete(testDialClient, regionURL, tu.StatusOK(re)) + err = tu.CheckDelete(tests.TestDialClient, regionURL, tu.StatusOK(re)) re.NoError(err) - err = tu.CheckGetJSON(testDialClient, recordURL, nil, + err = tu.CheckGetJSON(tests.TestDialClient, recordURL, nil, tu.StatusOK(re), tu.StringContain(re, "admin-remove-peer {rm peer: store [2]}")) re.NoError(err) @@ -151,26 +140,26 @@ func (suite *operatorTestSuite) checkAddRemovePeer(cluster *tests.TestCluster) { NodeState: metapb.NodeState_Serving, LastHeartbeat: time.Now().UnixNano(), }) - err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"add-learner", "region_id": 1, "store_id": 4}`), tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"add-learner", "region_id": 1, "store_id": 4}`), tu.StatusOK(re)) re.NoError(err) - err = tu.CheckGetJSON(testDialClient, regionURL, nil, + err = tu.CheckGetJSON(tests.TestDialClient, regionURL, nil, tu.StatusOK(re), tu.StringContain(re, "add learner peer 2 on store 4")) re.NoError(err) // Fail to add peer to tombstone store. err = cluster.GetLeaderServer().GetRaftCluster().RemoveStore(3, true) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"add-peer", "region_id": 1, "store_id": 3}`), tu.StatusNotOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"add-peer", "region_id": 1, "store_id": 3}`), tu.StatusNotOK(re)) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"transfer-peer", "region_id": 1, "from_store_id": 1, "to_store_id": 3}`), tu.StatusNotOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"transfer-peer", "region_id": 1, "from_store_id": 1, "to_store_id": 3}`), tu.StatusNotOK(re)) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [1, 2, 3]}`), tu.StatusNotOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [1, 2, 3]}`), tu.StatusNotOK(re)) re.NoError(err) // Fail to get operator if from is latest. time.Sleep(time.Second) url := fmt.Sprintf("%s/operators/records?from=%s", urlPrefix, strconv.FormatInt(time.Now().Unix(), 10)) - err = tu.CheckGetJSON(testDialClient, url, nil, + err = tu.CheckGetJSON(tests.TestDialClient, url, nil, tu.StatusNotOK(re), tu.StringContain(re, "operator not found")) re.NoError(err) } @@ -206,7 +195,7 @@ func (suite *operatorTestSuite) checkMergeRegionOperator(cluster *tests.TestClus tests.MustPutStore(re, cluster, store) } - suite.pauseRuleChecker(re, cluster) + pauseAllCheckers(re, cluster) r1 := core.NewTestRegionInfo(10, 1, []byte(""), []byte("b"), core.SetWrittenBytes(1000), core.SetReadBytes(1000), core.SetRegionConfVer(1), core.SetRegionVersion(1)) tests.MustPutRegionInfo(re, cluster, r1) r2 := core.NewTestRegionInfo(20, 1, []byte("b"), []byte("c"), core.SetWrittenBytes(2000), core.SetReadBytes(0), core.SetRegionConfVer(2), core.SetRegionVersion(3)) @@ -215,17 +204,17 @@ func (suite *operatorTestSuite) checkMergeRegionOperator(cluster *tests.TestClus tests.MustPutRegionInfo(re, cluster, r3) urlPrefix := fmt.Sprintf("%s/pd/api/v1", cluster.GetLeaderServer().GetAddr()) - err := tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"merge-region", "source_region_id": 10, "target_region_id": 20}`), tu.StatusOK(re)) + err := tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"merge-region", "source_region_id": 10, "target_region_id": 20}`), tu.StatusOK(re)) re.NoError(err) - tu.CheckDelete(testDialClient, fmt.Sprintf("%s/operators/%d", urlPrefix, 10), tu.StatusOK(re)) - err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"merge-region", "source_region_id": 20, "target_region_id": 10}`), tu.StatusOK(re)) + tu.CheckDelete(tests.TestDialClient, fmt.Sprintf("%s/operators/%d", urlPrefix, 10), tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"merge-region", "source_region_id": 20, "target_region_id": 10}`), tu.StatusOK(re)) re.NoError(err) - tu.CheckDelete(testDialClient, fmt.Sprintf("%s/operators/%d", urlPrefix, 10), tu.StatusOK(re)) - err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"merge-region", "source_region_id": 10, "target_region_id": 30}`), + tu.CheckDelete(tests.TestDialClient, fmt.Sprintf("%s/operators/%d", urlPrefix, 10), tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"merge-region", "source_region_id": 10, "target_region_id": 30}`), tu.StatusNotOK(re), tu.StringContain(re, "not adjacent")) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"merge-region", "source_region_id": 30, "target_region_id": 10}`), + err = tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"merge-region", "source_region_id": 30, "target_region_id": 10}`), tu.StatusNotOK(re), tu.StringContain(re, "not adjacent")) re.NoError(err) } @@ -233,7 +222,7 @@ func (suite *operatorTestSuite) checkMergeRegionOperator(cluster *tests.TestClus func (suite *operatorTestSuite) TestTransferRegionWithPlacementRule() { // use a new environment to avoid affecting other tests env := tests.NewSchedulingTestEnvironment(suite.T(), - func(conf *config.Config, serverName string) { + func(conf *config.Config, _ string) { conf.Replication.MaxReplicas = 3 }) env.RunTestInTwoModes(suite.checkTransferRegionWithPlacementRule) @@ -242,7 +231,7 @@ func (suite *operatorTestSuite) TestTransferRegionWithPlacementRule() { func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *tests.TestCluster) { re := suite.Require() - suite.pauseRuleChecker(re, cluster) + pauseAllCheckers(re, cluster) stores := []*metapb.Store{ { Id: 1, @@ -288,7 +277,7 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te urlPrefix := fmt.Sprintf("%s/pd/api/v1", cluster.GetLeaderServer().GetAddr()) regionURL := fmt.Sprintf("%s/operators/%d", urlPrefix, region.GetId()) - err := tu.CheckGetJSON(testDialClient, regionURL, nil, + err := tu.CheckGetJSON(tests.TestDialClient, regionURL, nil, tu.StatusNotOK(re), tu.StringContain(re, "operator not found")) re.NoError(err) convertStepsToStr := func(steps []string) string { @@ -455,7 +444,7 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te url := fmt.Sprintf("%s/pd/api/v1/config", svr.GetAddr()) for _, testCase := range testCases { suite.T().Log(testCase.name) - data := make(map[string]interface{}) + data := make(map[string]any) if testCase.placementRuleEnable { data["enable-placement-rules"] = "true" } else { @@ -463,7 +452,7 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te } reqData, e := json.Marshal(data) re.NoError(e) - err := tu.CheckPostJSON(testDialClient, url, reqData, tu.StatusOK(re)) + err := tu.CheckPostJSON(tests.TestDialClient, url, reqData, tu.StatusOK(re)) re.NoError(err) if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { // wait for the scheduling server to update the config @@ -492,19 +481,19 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te re.NoError(err) } if testCase.expectedError == nil { - err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/operators", urlPrefix), testCase.input, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/operators", urlPrefix), testCase.input, tu.StatusOK(re)) } else { - err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/operators", urlPrefix), testCase.input, + err = tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/operators", urlPrefix), testCase.input, tu.StatusNotOK(re), tu.StringContain(re, testCase.expectedError.Error())) } re.NoError(err) if len(testCase.expectSteps) > 0 { - err = tu.CheckGetJSON(testDialClient, regionURL, nil, + err = tu.CheckGetJSON(tests.TestDialClient, regionURL, nil, tu.StatusOK(re), tu.StringContain(re, testCase.expectSteps)) re.NoError(err) - err = tu.CheckDelete(testDialClient, regionURL, tu.StatusOK(re)) + err = tu.CheckDelete(tests.TestDialClient, regionURL, tu.StatusOK(re)) } else { - err = tu.CheckDelete(testDialClient, regionURL, tu.StatusNotOK(re)) + err = tu.CheckDelete(tests.TestDialClient, regionURL, tu.StatusNotOK(re)) } re.NoError(err) } @@ -513,7 +502,7 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te func (suite *operatorTestSuite) TestGetOperatorsAsObject() { // use a new environment to avoid being affected by other tests env := tests.NewSchedulingTestEnvironment(suite.T(), - func(conf *config.Config, serverName string) { + func(conf *config.Config, _ string) { conf.Replication.MaxReplicas = 1 }) env.RunTestInTwoModes(suite.checkGetOperatorsAsObject) @@ -522,7 +511,7 @@ func (suite *operatorTestSuite) TestGetOperatorsAsObject() { func (suite *operatorTestSuite) checkGetOperatorsAsObject(cluster *tests.TestCluster) { re := suite.Require() - suite.pauseRuleChecker(re, cluster) + pauseAllCheckers(re, cluster) stores := []*metapb.Store{ { Id: 1, @@ -553,7 +542,7 @@ func (suite *operatorTestSuite) checkGetOperatorsAsObject(cluster *tests.TestClu resp := make([]operator.OpObject, 0) // No operator. - err := tu.ReadGetJSON(re, testDialClient, objURL, &resp) + err := tu.ReadGetJSON(re, tests.TestDialClient, objURL, &resp) re.NoError(err) re.Empty(resp) @@ -565,9 +554,9 @@ func (suite *operatorTestSuite) checkGetOperatorsAsObject(cluster *tests.TestClu r3 := core.NewTestRegionInfo(30, 1, []byte("c"), []byte("d"), core.SetWrittenBytes(500), core.SetReadBytes(800), core.SetRegionConfVer(3), core.SetRegionVersion(2)) tests.MustPutRegionInfo(re, cluster, r3) - err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"merge-region", "source_region_id": 10, "target_region_id": 20}`), tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"merge-region", "source_region_id": 10, "target_region_id": 20}`), tu.StatusOK(re)) re.NoError(err) - err = tu.ReadGetJSON(re, testDialClient, objURL, &resp) + err = tu.ReadGetJSON(re, tests.TestDialClient, objURL, &resp) re.NoError(err) re.Len(resp, 2) less := func(i, j int) bool { @@ -602,9 +591,9 @@ func (suite *operatorTestSuite) checkGetOperatorsAsObject(cluster *tests.TestClu } regionInfo := core.NewRegionInfo(region, peer1) tests.MustPutRegionInfo(re, cluster, regionInfo) - err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"add-peer", "region_id": 40, "store_id": 3}`), tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"add-peer", "region_id": 40, "store_id": 3}`), tu.StatusOK(re)) re.NoError(err) - err = tu.ReadGetJSON(re, testDialClient, objURL, &resp) + err = tu.ReadGetJSON(re, tests.TestDialClient, objURL, &resp) re.NoError(err) re.Len(resp, 3) sort.Slice(resp, less) @@ -612,15 +601,55 @@ func (suite *operatorTestSuite) checkGetOperatorsAsObject(cluster *tests.TestClu re.Equal("admin-add-peer", resp[2].Desc) } -// pauseRuleChecker will pause rule checker to avoid unexpected operator. -func (suite *operatorTestSuite) pauseRuleChecker(re *require.Assertions, cluster *tests.TestCluster) { - checkerName := "rule" - addr := cluster.GetLeaderServer().GetAddr() - resp := make(map[string]interface{}) - url := fmt.Sprintf("%s/pd/api/v1/checker/%s", addr, checkerName) - err := tu.CheckPostJSON(testDialClient, url, []byte(`{"delay":1000}`), tu.StatusOK(re)) +func (suite *operatorTestSuite) TestRemoveOperators() { + suite.env.RunTestInTwoModes(suite.checkRemoveOperators) +} + +func (suite *operatorTestSuite) checkRemoveOperators(cluster *tests.TestCluster) { + re := suite.Require() + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 4, + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + LastHeartbeat: time.Now().UnixNano(), + }, + } + + for _, store := range stores { + tests.MustPutStore(re, cluster, store) + } + + pauseAllCheckers(re, cluster) + r1 := core.NewTestRegionInfo(10, 1, []byte(""), []byte("b"), core.SetWrittenBytes(1000), core.SetReadBytes(1000), core.SetRegionConfVer(1), core.SetRegionVersion(1)) + tests.MustPutRegionInfo(re, cluster, r1) + r2 := core.NewTestRegionInfo(20, 1, []byte("b"), []byte("c"), core.SetWrittenBytes(2000), core.SetReadBytes(0), core.SetRegionConfVer(2), core.SetRegionVersion(3)) + tests.MustPutRegionInfo(re, cluster, r2) + r3 := core.NewTestRegionInfo(30, 1, []byte("c"), []byte(""), core.SetWrittenBytes(500), core.SetReadBytes(800), core.SetRegionConfVer(3), core.SetRegionVersion(2)) + tests.MustPutRegionInfo(re, cluster, r3) + + urlPrefix := fmt.Sprintf("%s/pd/api/v1", cluster.GetLeaderServer().GetAddr()) + err := tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"merge-region", "source_region_id": 10, "target_region_id": 20}`), tu.StatusOK(re)) + re.NoError(err) + err = tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/operators", urlPrefix), []byte(`{"name":"add-peer", "region_id": 30, "store_id": 4}`), tu.StatusOK(re)) + re.NoError(err) + url := fmt.Sprintf("%s/operators", urlPrefix) + err = tu.CheckGetJSON(tests.TestDialClient, url, nil, tu.StatusOK(re), tu.StringContain(re, "merge: region 10 to 20"), tu.StringContain(re, "add peer: store [4]")) + re.NoError(err) + err = tu.CheckDelete(tests.TestDialClient, url, tu.StatusOK(re)) re.NoError(err) - err = tu.ReadGetJSON(re, testDialClient, url, &resp) + err = tu.CheckGetJSON(tests.TestDialClient, url, nil, tu.StatusOK(re), tu.StringNotContain(re, "merge: region 10 to 20"), tu.StringNotContain(re, "add peer: store [4]")) re.NoError(err) - re.True(resp["paused"].(bool)) } diff --git a/tests/server/api/region_test.go b/tests/server/api/region_test.go index 68325fb6c71..23ebceaefd6 100644 --- a/tests/server/api/region_test.go +++ b/tests/server/api/region_test.go @@ -57,7 +57,7 @@ func (suite *regionTestSuite) TearDownTest() { pdAddr := cluster.GetConfig().GetClientURL() for _, region := range leader.GetRegions() { url := fmt.Sprintf("%s/pd/api/v1/admin/cache/region/%d", pdAddr, region.GetID()) - err := tu.CheckDelete(testDialClient, url, tu.StatusOK(re)) + err := tu.CheckDelete(tests.TestDialClient, url, tu.StatusOK(re)) re.NoError(err) } re.Empty(leader.GetRegions()) @@ -71,16 +71,25 @@ func (suite *regionTestSuite) TearDownTest() { data, err := json.Marshal([]placement.GroupBundle{def}) re.NoError(err) urlPrefix := cluster.GetLeaderServer().GetAddr() - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/pd/api/v1/config/placement-rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/pd/api/v1/config/placement-rule", data, tu.StatusOK(re)) re.NoError(err) // clean stores - // TODO: cannot sync to scheduling server? for _, store := range leader.GetStores() { re.NoError(cluster.GetLeaderServer().GetRaftCluster().RemoveStore(store.GetId(), true)) re.NoError(cluster.GetLeaderServer().GetRaftCluster().BuryStore(store.GetId(), true)) } re.NoError(cluster.GetLeaderServer().GetRaftCluster().RemoveTombStoneRecords()) re.Empty(leader.GetStores()) + tu.Eventually(re, func() bool { + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + for _, s := range sche.GetBasicCluster().GetStores() { + if s.GetState() != metapb.StoreState_Tombstone { + return false + } + } + } + return true + }) } suite.env.RunFuncInTwoModes(cleanFunc) } @@ -105,14 +114,14 @@ func (suite *regionTestSuite) checkSplitRegions(cluster *tests.TestCluster) { r1 := core.NewTestRegionInfo(601, 13, []byte("aaa"), []byte("ggg")) r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 5, StoreId: 14}, &metapb.Peer{Id: 6, StoreId: 15}) tests.MustPutRegionInfo(re, cluster, r1) - suite.checkRegionCount(re, cluster, 1) + checkRegionCount(re, cluster, 1) newRegionID := uint64(11) body := fmt.Sprintf(`{"retry_limit":%v, "split_keys": ["%s","%s","%s"]}`, 3, hex.EncodeToString([]byte("bbb")), hex.EncodeToString([]byte("ccc")), hex.EncodeToString([]byte("ddd"))) - checkOpt := func(res []byte, code int, _ http.Header) { + checkOpt := func(res []byte, _ int, _ http.Header) { s := &struct { ProcessedPercentage int `json:"processed-percentage"` NewRegionsID []uint64 `json:"regions-id"` @@ -123,7 +132,7 @@ func (suite *regionTestSuite) checkSplitRegions(cluster *tests.TestCluster) { re.Equal([]uint64{newRegionID}, s.NewRegionsID) } re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/handler/splitResponses", fmt.Sprintf("return(%v)", newRegionID))) - err := tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/regions/split", urlPrefix), []byte(body), checkOpt) + err := tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/regions/split", urlPrefix), []byte(body), checkOpt) re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/schedule/handler/splitResponses")) re.NoError(err) } @@ -150,10 +159,10 @@ func (suite *regionTestSuite) checkAccelerateRegionsScheduleInRange(cluster *tes r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 100 + i, StoreId: (i + 1) % regionCount}, &metapb.Peer{Id: 200 + i, StoreId: (i + 2) % regionCount}) tests.MustPutRegionInfo(re, cluster, r1) } - suite.checkRegionCount(re, cluster, regionCount) + checkRegionCount(re, cluster, regionCount) body := fmt.Sprintf(`{"start_key":"%s", "end_key": "%s"}`, hex.EncodeToString([]byte("a1")), hex.EncodeToString([]byte("a3"))) - err := tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/regions/accelerate-schedule", urlPrefix), []byte(body), + err := tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/regions/accelerate-schedule", urlPrefix), []byte(body), tu.StatusOK(re)) re.NoError(err) idList := leader.GetRaftCluster().GetSuspectRegions() @@ -185,11 +194,11 @@ func (suite *regionTestSuite) checkAccelerateRegionsScheduleInRanges(cluster *te r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 100 + i, StoreId: (i + 1) % regionCount}, &metapb.Peer{Id: 200 + i, StoreId: (i + 2) % regionCount}) tests.MustPutRegionInfo(re, cluster, r1) } - suite.checkRegionCount(re, cluster, regionCount) + checkRegionCount(re, cluster, regionCount) body := fmt.Sprintf(`[{"start_key":"%s", "end_key": "%s"}, {"start_key":"%s", "end_key": "%s"}]`, hex.EncodeToString([]byte("a1")), hex.EncodeToString([]byte("a3")), hex.EncodeToString([]byte("a4")), hex.EncodeToString([]byte("a6"))) - err := tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/regions/accelerate-schedule/batch", urlPrefix), []byte(body), + err := tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/regions/accelerate-schedule/batch", urlPrefix), []byte(body), tu.StatusOK(re)) re.NoError(err) idList := leader.GetRaftCluster().GetSuspectRegions() @@ -227,10 +236,10 @@ func (suite *regionTestSuite) checkScatterRegions(cluster *tests.TestCluster) { tests.MustPutRegionInfo(re, cluster, r1) tests.MustPutRegionInfo(re, cluster, r2) tests.MustPutRegionInfo(re, cluster, r3) - suite.checkRegionCount(re, cluster, 3) + checkRegionCount(re, cluster, 3) body := fmt.Sprintf(`{"start_key":"%s", "end_key": "%s"}`, hex.EncodeToString([]byte("b1")), hex.EncodeToString([]byte("b3"))) - err := tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/regions/scatter", urlPrefix), []byte(body), tu.StatusOK(re)) + err := tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/regions/scatter", urlPrefix), []byte(body), tu.StatusOK(re)) re.NoError(err) oc := leader.GetRaftCluster().GetOperatorController() if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { @@ -244,7 +253,7 @@ func (suite *regionTestSuite) checkScatterRegions(cluster *tests.TestCluster) { re.True(op1 != nil || op2 != nil || op3 != nil) body = `{"regions_id": [701, 702, 703]}` - err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/regions/scatter", urlPrefix), []byte(body), tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/regions/scatter", urlPrefix), []byte(body), tu.StatusOK(re)) re.NoError(err) } @@ -254,7 +263,7 @@ func (suite *regionTestSuite) TestCheckRegionsReplicated() { func (suite *regionTestSuite) checkRegionsReplicated(cluster *tests.TestCluster) { re := suite.Require() - suite.pauseRuleChecker(re, cluster) + pauseAllCheckers(re, cluster) leader := cluster.GetLeaderServer() urlPrefix := leader.GetAddr() + "/pd/api/v1" @@ -267,7 +276,7 @@ func (suite *regionTestSuite) checkRegionsReplicated(cluster *tests.TestCluster) tests.MustPutStore(re, cluster, s1) r1 := core.NewTestRegionInfo(2, 1, []byte("a"), []byte("b")) tests.MustPutRegionInfo(re, cluster, r1) - suite.checkRegionCount(re, cluster, 1) + checkRegionCount(re, cluster, 1) // set the bundle bundle := []placement.GroupBundle{ @@ -286,40 +295,40 @@ func (suite *regionTestSuite) checkRegionsReplicated(cluster *tests.TestCluster) // invalid url url := fmt.Sprintf(`%s/regions/replicated?startKey=%s&endKey=%s`, urlPrefix, "_", "t") - err := tu.CheckGetJSON(testDialClient, url, nil, tu.Status(re, http.StatusBadRequest)) + err := tu.CheckGetJSON(tests.TestDialClient, url, nil, tu.Status(re, http.StatusBadRequest)) re.NoError(err) url = fmt.Sprintf(`%s/regions/replicated?startKey=%s&endKey=%s`, urlPrefix, hex.EncodeToString(r1.GetStartKey()), "_") - err = tu.CheckGetJSON(testDialClient, url, nil, tu.Status(re, http.StatusBadRequest)) + err = tu.CheckGetJSON(tests.TestDialClient, url, nil, tu.Status(re, http.StatusBadRequest)) re.NoError(err) // correct test url = fmt.Sprintf(`%s/regions/replicated?startKey=%s&endKey=%s`, urlPrefix, hex.EncodeToString(r1.GetStartKey()), hex.EncodeToString(r1.GetEndKey())) - err = tu.CheckGetJSON(testDialClient, url, nil, tu.StatusOK(re)) + err = tu.CheckGetJSON(tests.TestDialClient, url, nil, tu.StatusOK(re)) re.NoError(err) // test one rule data, err := json.Marshal(bundle) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) re.NoError(err) tu.Eventually(re, func() bool { respBundle := make([]placement.GroupBundle, 0) - err = tu.CheckGetJSON(testDialClient, urlPrefix+"/config/placement-rule", nil, + err = tu.CheckGetJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", nil, tu.StatusOK(re), tu.ExtractJSON(re, &respBundle)) re.NoError(err) return len(respBundle) == 1 && respBundle[0].ID == "5" }) tu.Eventually(re, func() bool { - err = tu.ReadGetJSON(re, testDialClient, url, &status) + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &status) re.NoError(err) return status == "REPLICATED" }) re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/handler/mockPending", "return(true)")) - err = tu.ReadGetJSON(re, testDialClient, url, &status) + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &status) re.NoError(err) re.Equal("PENDING", status) re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/schedule/handler/mockPending")) @@ -333,19 +342,19 @@ func (suite *regionTestSuite) checkRegionsReplicated(cluster *tests.TestCluster) }) data, err = json.Marshal(bundle) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) re.NoError(err) tu.Eventually(re, func() bool { respBundle := make([]placement.GroupBundle, 0) - err = tu.CheckGetJSON(testDialClient, urlPrefix+"/config/placement-rule", nil, + err = tu.CheckGetJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", nil, tu.StatusOK(re), tu.ExtractJSON(re, &respBundle)) re.NoError(err) return len(respBundle) == 1 && len(respBundle[0].Rules) == 2 }) tu.Eventually(re, func() bool { - err = tu.ReadGetJSON(re, testDialClient, url, &status) + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &status) re.NoError(err) return status == "REPLICATED" }) @@ -362,12 +371,12 @@ func (suite *regionTestSuite) checkRegionsReplicated(cluster *tests.TestCluster) }) data, err = json.Marshal(bundle) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) re.NoError(err) tu.Eventually(re, func() bool { respBundle := make([]placement.GroupBundle, 0) - err = tu.CheckGetJSON(testDialClient, urlPrefix+"/config/placement-rule", nil, + err = tu.CheckGetJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", nil, tu.StatusOK(re), tu.ExtractJSON(re, &respBundle)) re.NoError(err) if len(respBundle) != 2 { @@ -379,7 +388,7 @@ func (suite *regionTestSuite) checkRegionsReplicated(cluster *tests.TestCluster) }) tu.Eventually(re, func() bool { - err = tu.ReadGetJSON(re, testDialClient, url, &status) + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &status) re.NoError(err) return status == "INPROGRESS" }) @@ -389,16 +398,16 @@ func (suite *regionTestSuite) checkRegionsReplicated(cluster *tests.TestCluster) tests.MustPutRegionInfo(re, cluster, r1) tu.Eventually(re, func() bool { - err = tu.ReadGetJSON(re, testDialClient, url, &status) + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &status) re.NoError(err) return status == "REPLICATED" }) } -func (suite *regionTestSuite) checkRegionCount(re *require.Assertions, cluster *tests.TestCluster, count uint64) { +func checkRegionCount(re *require.Assertions, cluster *tests.TestCluster, count uint64) { leader := cluster.GetLeaderServer() tu.Eventually(re, func() bool { - return leader.GetRaftCluster().GetRegionCount([]byte{}, []byte{}).Count == int(count) + return leader.GetRaftCluster().GetRegionCount([]byte{}, []byte{}) == int(count) }) if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { tu.Eventually(re, func() bool { @@ -407,15 +416,16 @@ func (suite *regionTestSuite) checkRegionCount(re *require.Assertions, cluster * } } -// pauseRuleChecker will pause rule checker to avoid unexpected operator. -func (suite *regionTestSuite) pauseRuleChecker(re *require.Assertions, cluster *tests.TestCluster) { - checkerName := "rule" +func pauseAllCheckers(re *require.Assertions, cluster *tests.TestCluster) { + checkerNames := []string{"learner", "replica", "rule", "split", "merge", "joint-state"} addr := cluster.GetLeaderServer().GetAddr() - resp := make(map[string]interface{}) - url := fmt.Sprintf("%s/pd/api/v1/checker/%s", addr, checkerName) - err := tu.CheckPostJSON(testDialClient, url, []byte(`{"delay":1000}`), tu.StatusOK(re)) - re.NoError(err) - err = tu.ReadGetJSON(re, testDialClient, url, &resp) - re.NoError(err) - re.True(resp["paused"].(bool)) + for _, checkerName := range checkerNames { + resp := make(map[string]any) + url := fmt.Sprintf("%s/pd/api/v1/checker/%s", addr, checkerName) + err := tu.CheckPostJSON(tests.TestDialClient, url, []byte(`{"delay":1000}`), tu.StatusOK(re)) + re.NoError(err) + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &resp) + re.NoError(err) + re.True(resp["paused"].(bool)) + } } diff --git a/tests/server/api/rule_test.go b/tests/server/api/rule_test.go index a845d2f3e05..16077a308f6 100644 --- a/tests/server/api/rule_test.go +++ b/tests/server/api/rule_test.go @@ -49,7 +49,7 @@ func TestRuleTestSuite(t *testing.T) { } func (suite *ruleTestSuite) SetupSuite() { - suite.env = tests.NewSchedulingTestEnvironment(suite.T(), func(conf *config.Config, serverName string) { + suite.env = tests.NewSchedulingTestEnvironment(suite.T(), func(conf *config.Config, _ string) { conf.PDServerCfg.KeyType = "raw" conf.Replication.EnablePlacementRules = true }) @@ -71,7 +71,7 @@ func (suite *ruleTestSuite) TearDownTest() { data, err := json.Marshal([]placement.GroupBundle{def}) re.NoError(err) urlPrefix := cluster.GetLeaderServer().GetAddr() - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/pd/api/v1/config/placement-rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/pd/api/v1/config/placement-rule", data, tu.StatusOK(re)) re.NoError(err) } suite.env.RunFuncInTwoModes(cleanFunc) @@ -171,7 +171,7 @@ func (suite *ruleTestSuite) checkSet(cluster *tests.TestCluster) { // clear suspect keyRanges to prevent test case from others leaderServer.GetRaftCluster().ClearSuspectKeyRanges() if testCase.success { - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", testCase.rawData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", testCase.rawData, tu.StatusOK(re)) popKeyRangeMap := map[string]struct{}{} for i := 0; i < len(testCase.popKeyRange)/2; i++ { v, got := leaderServer.GetRaftCluster().PopOneSuspectKeyRange() @@ -185,7 +185,7 @@ func (suite *ruleTestSuite) checkSet(cluster *tests.TestCluster) { re.True(ok) } } else { - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", testCase.rawData, + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", testCase.rawData, tu.StatusNotOK(re), tu.StringEqual(re, testCase.response)) } @@ -206,7 +206,7 @@ func (suite *ruleTestSuite) checkGet(cluster *tests.TestCluster) { rule := placement.Rule{GroupID: "a", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) re.NoError(err) testCases := []struct { @@ -234,11 +234,11 @@ func (suite *ruleTestSuite) checkGet(cluster *tests.TestCluster) { url := fmt.Sprintf("%s/rule/%s/%s", urlPrefix, testCase.rule.GroupID, testCase.rule.ID) if testCase.found { tu.Eventually(re, func() bool { - err = tu.ReadGetJSON(re, testDialClient, url, &resp) - return suite.compareRule(&resp, &testCase.rule) + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &resp) + return compareRule(&resp, &testCase.rule) }) } else { - err = tu.CheckGetJSON(testDialClient, url, nil, tu.Status(re, testCase.code)) + err = tu.CheckGetJSON(tests.TestDialClient, url, nil, tu.Status(re, testCase.code)) } re.NoError(err) } @@ -257,11 +257,11 @@ func (suite *ruleTestSuite) checkGetAll(cluster *tests.TestCluster) { rule := placement.Rule{GroupID: "b", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) re.NoError(err) var resp2 []*placement.Rule - err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/rules", &resp2) + err = tu.ReadGetJSON(re, tests.TestDialClient, urlPrefix+"/rules", &resp2) re.NoError(err) re.NotEmpty(resp2) } @@ -369,13 +369,13 @@ func (suite *ruleTestSuite) checkSetAll(cluster *tests.TestCluster) { for _, testCase := range testCases { suite.T().Log(testCase.name) if testCase.success { - err := tu.CheckPostJSON(testDialClient, urlPrefix+"/rules", testCase.rawData, tu.StatusOK(re)) + err := tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rules", testCase.rawData, tu.StatusOK(re)) re.NoError(err) if testCase.isDefaultRule { re.Equal(int(leaderServer.GetPersistOptions().GetReplicationConfig().MaxReplicas), testCase.count) } } else { - err := tu.CheckPostJSON(testDialClient, urlPrefix+"/rules", testCase.rawData, + err := tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rules", testCase.rawData, tu.StringEqual(re, testCase.response)) re.NoError(err) } @@ -395,13 +395,13 @@ func (suite *ruleTestSuite) checkGetAllByGroup(cluster *tests.TestCluster) { rule := placement.Rule{GroupID: "c", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) re.NoError(err) rule1 := placement.Rule{GroupID: "c", ID: "30", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} data, err = json.Marshal(rule1) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) re.NoError(err) testCases := []struct { @@ -426,13 +426,13 @@ func (suite *ruleTestSuite) checkGetAllByGroup(cluster *tests.TestCluster) { var resp []*placement.Rule url := fmt.Sprintf("%s/rules/group/%s", urlPrefix, testCase.groupID) tu.Eventually(re, func() bool { - err = tu.ReadGetJSON(re, testDialClient, url, &resp) + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &resp) re.NoError(err) if len(resp) != testCase.count { return false } if testCase.count == 2 { - return suite.compareRule(resp[0], &rule) && suite.compareRule(resp[1], &rule1) + return compareRule(resp[0], &rule) && compareRule(resp[1], &rule1) } return true }) @@ -452,7 +452,7 @@ func (suite *ruleTestSuite) checkGetAllByRegion(cluster *tests.TestCluster) { rule := placement.Rule{GroupID: "e", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) re.NoError(err) r := core.NewTestRegionInfo(4, 1, []byte{0x22, 0x22}, []byte{0x33, 0x33}) @@ -489,16 +489,16 @@ func (suite *ruleTestSuite) checkGetAllByRegion(cluster *tests.TestCluster) { if testCase.success { tu.Eventually(re, func() bool { - err = tu.ReadGetJSON(re, testDialClient, url, &resp) + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &resp) for _, r := range resp { if r.GroupID == "e" { - return suite.compareRule(r, &rule) + return compareRule(r, &rule) } } return true }) } else { - err = tu.CheckGetJSON(testDialClient, url, nil, tu.Status(re, testCase.code)) + err = tu.CheckGetJSON(tests.TestDialClient, url, nil, tu.Status(re, testCase.code)) } re.NoError(err) } @@ -517,7 +517,7 @@ func (suite *ruleTestSuite) checkGetAllByKey(cluster *tests.TestCluster) { rule := placement.Rule{GroupID: "f", ID: "40", StartKeyHex: "8888", EndKeyHex: "9111", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) re.NoError(err) testCases := []struct { @@ -553,11 +553,11 @@ func (suite *ruleTestSuite) checkGetAllByKey(cluster *tests.TestCluster) { url := fmt.Sprintf("%s/rules/key/%s", urlPrefix, testCase.key) if testCase.success { tu.Eventually(re, func() bool { - err = tu.ReadGetJSON(re, testDialClient, url, &resp) + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &resp) return len(resp) == testCase.respSize }) } else { - err = tu.CheckGetJSON(testDialClient, url, nil, tu.Status(re, testCase.code)) + err = tu.CheckGetJSON(tests.TestDialClient, url, nil, tu.Status(re, testCase.code)) } re.NoError(err) } @@ -576,7 +576,7 @@ func (suite *ruleTestSuite) checkDelete(cluster *tests.TestCluster) { rule := placement.Rule{GroupID: "g", ID: "10", StartKeyHex: "8888", EndKeyHex: "9111", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) re.NoError(err) oldStartKey, err := hex.DecodeString(rule.StartKeyHex) re.NoError(err) @@ -610,7 +610,7 @@ func (suite *ruleTestSuite) checkDelete(cluster *tests.TestCluster) { url := fmt.Sprintf("%s/rule/%s/%s", urlPrefix, testCase.groupID, testCase.id) // clear suspect keyRanges to prevent test case from others leaderServer.GetRaftCluster().ClearSuspectKeyRanges() - err = tu.CheckDelete(testDialClient, url, tu.StatusOK(re)) + err = tu.CheckDelete(tests.TestDialClient, url, tu.StatusOK(re)) re.NoError(err) if len(testCase.popKeyRange) > 0 { popKeyRangeMap := map[string]struct{}{} @@ -747,10 +747,10 @@ func (suite *ruleTestSuite) checkBatch(cluster *tests.TestCluster) { for _, testCase := range testCases { suite.T().Log(testCase.name) if testCase.success { - err := tu.CheckPostJSON(testDialClient, urlPrefix+"/rules/batch", testCase.rawData, tu.StatusOK(re)) + err := tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rules/batch", testCase.rawData, tu.StatusOK(re)) re.NoError(err) } else { - err := tu.CheckPostJSON(testDialClient, urlPrefix+"/rules/batch", testCase.rawData, + err := tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rules/batch", testCase.rawData, tu.StatusNotOK(re), tu.StringEqual(re, testCase.response)) re.NoError(err) @@ -780,11 +780,7 @@ func (suite *ruleTestSuite) checkBundle(cluster *tests.TestCluster) { }, }, } - var bundles []placement.GroupBundle - err := tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule", &bundles) - re.NoError(err) - re.Len(bundles, 1) - suite.assertBundleEqual(re, bundles[0], b1) + assertBundlesEqual(re, urlPrefix+"/placement-rule", []placement.GroupBundle{b1}, 1) // Set b2 := placement.GroupBundle{ @@ -797,31 +793,21 @@ func (suite *ruleTestSuite) checkBundle(cluster *tests.TestCluster) { } data, err := json.Marshal(b2) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/placement-rule/foo", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/placement-rule/foo", data, tu.StatusOK(re)) re.NoError(err) // Get - var bundle placement.GroupBundle - err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule/foo", &bundle) - re.NoError(err) - suite.assertBundleEqual(re, bundle, b2) + assertBundleEqual(re, urlPrefix+"/placement-rule/foo", b2) // GetAll again - err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule", &bundles) - re.NoError(err) - re.Len(bundles, 2) - suite.assertBundleEqual(re, bundles[0], b1) - suite.assertBundleEqual(re, bundles[1], b2) + assertBundlesEqual(re, urlPrefix+"/placement-rule", []placement.GroupBundle{b1, b2}, 2) // Delete - err = tu.CheckDelete(testDialClient, urlPrefix+"/placement-rule/pd", tu.StatusOK(re)) + err = tu.CheckDelete(tests.TestDialClient, urlPrefix+"/placement-rule/pd", tu.StatusOK(re)) re.NoError(err) // GetAll again - err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule", &bundles) - re.NoError(err) - re.Len(bundles, 1) - suite.assertBundleEqual(re, bundles[0], b2) + assertBundlesEqual(re, urlPrefix+"/placement-rule", []placement.GroupBundle{b2}, 1) // SetAll b2.Rules = append(b2.Rules, &placement.Rule{GroupID: "foo", ID: "baz", Index: 2, Role: placement.Follower, Count: 1}) @@ -829,26 +815,18 @@ func (suite *ruleTestSuite) checkBundle(cluster *tests.TestCluster) { b3 := placement.GroupBundle{ID: "foobar", Index: 100} data, err = json.Marshal([]placement.GroupBundle{b1, b2, b3}) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/placement-rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/placement-rule", data, tu.StatusOK(re)) re.NoError(err) // GetAll again - err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule", &bundles) - re.NoError(err) - re.Len(bundles, 3) - suite.assertBundleEqual(re, bundles[0], b2) - suite.assertBundleEqual(re, bundles[1], b1) - suite.assertBundleEqual(re, bundles[2], b3) + assertBundlesEqual(re, urlPrefix+"/placement-rule", []placement.GroupBundle{b1, b2, b3}, 3) // Delete using regexp - err = tu.CheckDelete(testDialClient, urlPrefix+"/placement-rule/"+url.PathEscape("foo.*")+"?regexp", tu.StatusOK(re)) + err = tu.CheckDelete(tests.TestDialClient, urlPrefix+"/placement-rule/"+url.PathEscape("foo.*")+"?regexp", tu.StatusOK(re)) re.NoError(err) // GetAll again - err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule", &bundles) - re.NoError(err) - re.Len(bundles, 1) - suite.assertBundleEqual(re, bundles[0], b1) + assertBundlesEqual(re, urlPrefix+"/placement-rule", []placement.GroupBundle{b1}, 1) // Set id := "rule-without-group-id" @@ -860,23 +838,16 @@ func (suite *ruleTestSuite) checkBundle(cluster *tests.TestCluster) { } data, err = json.Marshal(b4) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/placement-rule/"+id, data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/placement-rule/"+id, data, tu.StatusOK(re)) re.NoError(err) b4.ID = id b4.Rules[0].GroupID = b4.ID - // Get - err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule/"+id, &bundle) - re.NoError(err) - suite.assertBundleEqual(re, bundle, b4) + assertBundleEqual(re, urlPrefix+"/placement-rule/"+id, b4) // GetAll again - err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule", &bundles) - re.NoError(err) - re.Len(bundles, 2) - suite.assertBundleEqual(re, bundles[0], b1) - suite.assertBundleEqual(re, bundles[1], b4) + assertBundlesEqual(re, urlPrefix+"/placement-rule", []placement.GroupBundle{b1, b4}, 2) // SetAll b5 := placement.GroupBundle{ @@ -888,18 +859,13 @@ func (suite *ruleTestSuite) checkBundle(cluster *tests.TestCluster) { } data, err = json.Marshal([]placement.GroupBundle{b1, b4, b5}) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/placement-rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/placement-rule", data, tu.StatusOK(re)) re.NoError(err) b5.Rules[0].GroupID = b5.ID // GetAll again - err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule", &bundles) - re.NoError(err) - re.Len(bundles, 3) - suite.assertBundleEqual(re, bundles[0], b1) - suite.assertBundleEqual(re, bundles[1], b4) - suite.assertBundleEqual(re, bundles[2], b5) + assertBundlesEqual(re, urlPrefix+"/placement-rule", []placement.GroupBundle{b1, b4, b5}, 3) } func (suite *ruleTestSuite) TestBundleBadRequest() { @@ -925,7 +891,7 @@ func (suite *ruleTestSuite) checkBundleBadRequest(cluster *tests.TestCluster) { {"/placement-rule", `[{"group_id":"foo", "rules": [{"group_id":"bar", "id":"baz", "role":"voter", "count":1}]}]`, false}, } for _, testCase := range testCases { - err := tu.CheckPostJSON(testDialClient, urlPrefix+testCase.uri, []byte(testCase.data), + err := tu.CheckPostJSON(tests.TestDialClient, urlPrefix+testCase.uri, []byte(testCase.data), func(_ []byte, code int, _ http.Header) { re.Equal(testCase.ok, code == http.StatusOK) }) @@ -1010,12 +976,12 @@ func (suite *ruleTestSuite) checkLeaderAndVoter(cluster *tests.TestCluster) { for _, bundle := range bundles { data, err := json.Marshal(bundle) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) re.NoError(err) tu.Eventually(re, func() bool { respBundle := make([]placement.GroupBundle, 0) - err := tu.CheckGetJSON(testDialClient, urlPrefix+"/config/placement-rule", nil, + err := tu.CheckGetJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", nil, tu.StatusOK(re), tu.ExtractJSON(re, &respBundle)) re.NoError(err) re.Len(respBundle, 1) @@ -1178,7 +1144,7 @@ func (suite *ruleTestSuite) checkConcurrencyWith(cluster *tests.TestCluster, re.NoError(err) for j := 0; j < 10; j++ { expectResult.Lock() - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) re.NoError(err) expectResult.val = i expectResult.Unlock() @@ -1192,7 +1158,7 @@ func (suite *ruleTestSuite) checkConcurrencyWith(cluster *tests.TestCluster, re.NotZero(expectResult.val) tu.Eventually(re, func() bool { respBundle := make([]placement.GroupBundle, 0) - err := tu.CheckGetJSON(testDialClient, urlPrefix+"/config/placement-rule", nil, + err := tu.CheckGetJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", nil, tu.StatusOK(re), tu.ExtractJSON(re, &respBundle)) re.NoError(err) re.Len(respBundle, 1) @@ -1228,27 +1194,53 @@ func (suite *ruleTestSuite) checkLargeRules(cluster *tests.TestCluster) { suite.postAndCheckRuleBundle(urlPrefix, genBundlesWithRulesNum(etcdutil.MaxEtcdTxnOps*2)) } -func (suite *ruleTestSuite) assertBundleEqual(re *require.Assertions, b1, b2 placement.GroupBundle) { +func assertBundleEqual(re *require.Assertions, url string, expectedBundle placement.GroupBundle) { + var bundle placement.GroupBundle + tu.Eventually(re, func() bool { + err := tu.ReadGetJSON(re, tests.TestDialClient, url, &bundle) + if err != nil { + return false + } + return compareBundle(bundle, expectedBundle) + }) +} + +func assertBundlesEqual(re *require.Assertions, url string, expectedBundles []placement.GroupBundle, expectedLen int) { + var bundles []placement.GroupBundle tu.Eventually(re, func() bool { - return suite.compareBundle(b1, b2) + err := tu.ReadGetJSON(re, tests.TestDialClient, url, &bundles) + if err != nil { + return false + } + if len(bundles) != expectedLen { + return false + } + sort.Slice(bundles, func(i, j int) bool { return bundles[i].ID < bundles[j].ID }) + sort.Slice(expectedBundles, func(i, j int) bool { return expectedBundles[i].ID < expectedBundles[j].ID }) + for i := range bundles { + if !compareBundle(bundles[i], expectedBundles[i]) { + return false + } + } + return true }) } -func (suite *ruleTestSuite) compareBundle(b1, b2 placement.GroupBundle) bool { +func compareBundle(b1, b2 placement.GroupBundle) bool { if b2.ID != b1.ID || b2.Index != b1.Index || b2.Override != b1.Override || len(b2.Rules) != len(b1.Rules) { return false } sort.Slice(b1.Rules, func(i, j int) bool { return b1.Rules[i].ID < b1.Rules[j].ID }) sort.Slice(b2.Rules, func(i, j int) bool { return b2.Rules[i].ID < b2.Rules[j].ID }) for i := range b1.Rules { - if !suite.compareRule(b1.Rules[i], b2.Rules[i]) { + if !compareRule(b1.Rules[i], b2.Rules[i]) { return false } } return true } -func (suite *ruleTestSuite) compareRule(r1 *placement.Rule, r2 *placement.Rule) bool { +func compareRule(r1 *placement.Rule, r2 *placement.Rule) bool { return r2.GroupID == r1.GroupID && r2.ID == r1.ID && r2.StartKeyHex == r1.StartKeyHex && @@ -1261,12 +1253,12 @@ func (suite *ruleTestSuite) postAndCheckRuleBundle(urlPrefix string, bundle []pl re := suite.Require() data, err := json.Marshal(bundle) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) re.NoError(err) tu.Eventually(re, func() bool { respBundle := make([]placement.GroupBundle, 0) - err = tu.CheckGetJSON(testDialClient, urlPrefix+"/config/placement-rule", nil, + err = tu.CheckGetJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", nil, tu.StatusOK(re), tu.ExtractJSON(re, &respBundle)) re.NoError(err) if len(respBundle) != len(bundle) { @@ -1275,7 +1267,7 @@ func (suite *ruleTestSuite) postAndCheckRuleBundle(urlPrefix string, bundle []pl sort.Slice(respBundle, func(i, j int) bool { return respBundle[i].ID < respBundle[j].ID }) sort.Slice(bundle, func(i, j int) bool { return bundle[i].ID < bundle[j].ID }) for i := range respBundle { - if !suite.compareBundle(respBundle[i], bundle[i]) { + if !compareBundle(respBundle[i], bundle[i]) { return false } } @@ -1293,7 +1285,7 @@ func TestRegionRuleTestSuite(t *testing.T) { } func (suite *regionRuleTestSuite) SetupSuite() { - suite.env = tests.NewSchedulingTestEnvironment(suite.T(), func(conf *config.Config, serverName string) { + suite.env = tests.NewSchedulingTestEnvironment(suite.T(), func(conf *config.Config, _ string) { conf.Replication.EnablePlacementRules = true conf.Replication.MaxReplicas = 1 }) @@ -1372,19 +1364,19 @@ func (suite *regionRuleTestSuite) checkRegionPlacementRule(cluster *tests.TestCl fit := &placement.RegionFit{} u := fmt.Sprintf("%s/config/rules/region/%d/detail", urlPrefix, 1) - err := tu.ReadGetJSON(re, testDialClient, u, fit) + err := tu.ReadGetJSON(re, tests.TestDialClient, u, fit) re.NoError(err) re.Len(fit.RuleFits, 1) re.Len(fit.OrphanPeers, 1) u = fmt.Sprintf("%s/config/rules/region/%d/detail", urlPrefix, 2) fit = &placement.RegionFit{} - err = tu.ReadGetJSON(re, testDialClient, u, fit) + err = tu.ReadGetJSON(re, tests.TestDialClient, u, fit) re.NoError(err) re.Len(fit.RuleFits, 2) re.Empty(fit.OrphanPeers) u = fmt.Sprintf("%s/config/rules/region/%d/detail", urlPrefix, 3) fit = &placement.RegionFit{} - err = tu.ReadGetJSON(re, testDialClient, u, fit) + err = tu.ReadGetJSON(re, tests.TestDialClient, u, fit) re.NoError(err) re.Empty(fit.RuleFits) re.Len(fit.OrphanPeers, 2) @@ -1392,26 +1384,26 @@ func (suite *regionRuleTestSuite) checkRegionPlacementRule(cluster *tests.TestCl var label labeler.LabelRule escapedID := url.PathEscape("keyspaces/0") u = fmt.Sprintf("%s/config/region-label/rule/%s", urlPrefix, escapedID) - err = tu.ReadGetJSON(re, testDialClient, u, &label) + err = tu.ReadGetJSON(re, tests.TestDialClient, u, &label) re.NoError(err) re.Equal("keyspaces/0", label.ID) var labels []labeler.LabelRule u = fmt.Sprintf("%s/config/region-label/rules", urlPrefix) - err = tu.ReadGetJSON(re, testDialClient, u, &labels) + err = tu.ReadGetJSON(re, tests.TestDialClient, u, &labels) re.NoError(err) re.Len(labels, 1) re.Equal("keyspaces/0", labels[0].ID) u = fmt.Sprintf("%s/config/region-label/rules/ids", urlPrefix) - err = tu.CheckGetJSON(testDialClient, u, []byte(`["rule1", "rule3"]`), func(resp []byte, statusCode int, _ http.Header) { + err = tu.CheckGetJSON(tests.TestDialClient, u, []byte(`["rule1", "rule3"]`), func(resp []byte, _ int, _ http.Header) { err := json.Unmarshal(resp, &labels) re.NoError(err) re.Empty(labels) }) re.NoError(err) - err = tu.CheckGetJSON(testDialClient, u, []byte(`["keyspaces/0"]`), func(resp []byte, statusCode int, _ http.Header) { + err = tu.CheckGetJSON(tests.TestDialClient, u, []byte(`["keyspaces/0"]`), func(resp []byte, _ int, _ http.Header) { err := json.Unmarshal(resp, &labels) re.NoError(err) re.Len(labels, 1) @@ -1420,21 +1412,21 @@ func (suite *regionRuleTestSuite) checkRegionPlacementRule(cluster *tests.TestCl re.NoError(err) u = fmt.Sprintf("%s/config/rules/region/%d/detail", urlPrefix, 4) - err = tu.CheckGetJSON(testDialClient, u, nil, tu.Status(re, http.StatusNotFound), tu.StringContain( + err = tu.CheckGetJSON(tests.TestDialClient, u, nil, tu.Status(re, http.StatusNotFound), tu.StringContain( re, "region 4 not found")) re.NoError(err) u = fmt.Sprintf("%s/config/rules/region/%s/detail", urlPrefix, "id") - err = tu.CheckGetJSON(testDialClient, u, nil, tu.Status(re, http.StatusBadRequest), tu.StringContain( + err = tu.CheckGetJSON(tests.TestDialClient, u, nil, tu.Status(re, http.StatusBadRequest), tu.StringContain( re, errs.ErrRegionInvalidID.Error())) re.NoError(err) - data := make(map[string]interface{}) + data := make(map[string]any) data["enable-placement-rules"] = "false" reqData, e := json.Marshal(data) re.NoError(e) u = fmt.Sprintf("%s/config", urlPrefix) - err = tu.CheckPostJSON(testDialClient, u, reqData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, u, reqData, tu.StatusOK(re)) re.NoError(err) if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { // wait for the scheduling server to update the config @@ -1443,7 +1435,7 @@ func (suite *regionRuleTestSuite) checkRegionPlacementRule(cluster *tests.TestCl }) } u = fmt.Sprintf("%s/config/rules/region/%d/detail", urlPrefix, 1) - err = tu.CheckGetJSON(testDialClient, u, nil, tu.Status(re, http.StatusPreconditionFailed), tu.StringContain( + err = tu.CheckGetJSON(tests.TestDialClient, u, nil, tu.Status(re, http.StatusPreconditionFailed), tu.StringContain( re, "placement rules feature is disabled")) re.NoError(err) } diff --git a/tests/server/api/scheduler_test.go b/tests/server/api/scheduler_test.go index e029df5bf97..10631dab158 100644 --- a/tests/server/api/scheduler_test.go +++ b/tests/server/api/scheduler_test.go @@ -79,53 +79,53 @@ func (suite *scheduleTestSuite) checkOriginAPI(cluster *tests.TestCluster) { tests.MustPutStore(re, cluster, store) } - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = "evict-leader-scheduler" input["store_id"] = 1 body, err := json.Marshal(input) re.NoError(err) - re.NoError(tu.CheckPostJSON(testDialClient, urlPrefix, body, tu.StatusOK(re))) + re.NoError(tu.CheckPostJSON(tests.TestDialClient, urlPrefix, body, tu.StatusOK(re))) suite.assertSchedulerExists(urlPrefix, "evict-leader-scheduler") - resp := make(map[string]interface{}) + resp := make(map[string]any) listURL := fmt.Sprintf("%s%s%s/%s/list", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, "evict-leader-scheduler") - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) re.Len(resp["store-id-ranges"], 1) - input1 := make(map[string]interface{}) + input1 := make(map[string]any) input1["name"] = "evict-leader-scheduler" input1["store_id"] = 2 body, err = json.Marshal(input1) re.NoError(err) re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/schedulers/persistFail", "return(true)")) - re.NoError(tu.CheckPostJSON(testDialClient, urlPrefix, body, tu.StatusNotOK(re))) + re.NoError(tu.CheckPostJSON(tests.TestDialClient, urlPrefix, body, tu.StatusNotOK(re))) suite.assertSchedulerExists(urlPrefix, "evict-leader-scheduler") - resp = make(map[string]interface{}) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) + resp = make(map[string]any) + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) re.Len(resp["store-id-ranges"], 1) re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/schedule/schedulers/persistFail")) - re.NoError(tu.CheckPostJSON(testDialClient, urlPrefix, body, tu.StatusOK(re))) + re.NoError(tu.CheckPostJSON(tests.TestDialClient, urlPrefix, body, tu.StatusOK(re))) suite.assertSchedulerExists(urlPrefix, "evict-leader-scheduler") - resp = make(map[string]interface{}) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) + resp = make(map[string]any) + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) re.Len(resp["store-id-ranges"], 2) deleteURL := fmt.Sprintf("%s/%s", urlPrefix, "evict-leader-scheduler-1") - err = tu.CheckDelete(testDialClient, deleteURL, tu.StatusOK(re)) + err = tu.CheckDelete(tests.TestDialClient, deleteURL, tu.StatusOK(re)) re.NoError(err) suite.assertSchedulerExists(urlPrefix, "evict-leader-scheduler") - resp1 := make(map[string]interface{}) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp1)) + resp1 := make(map[string]any) + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp1)) re.Len(resp1["store-id-ranges"], 1) deleteURL = fmt.Sprintf("%s/%s", urlPrefix, "evict-leader-scheduler-2") re.NoError(failpoint.Enable("github.com/tikv/pd/server/config/persistFail", "return(true)")) - err = tu.CheckDelete(testDialClient, deleteURL, tu.Status(re, http.StatusInternalServerError)) + err = tu.CheckDelete(tests.TestDialClient, deleteURL, tu.Status(re, http.StatusInternalServerError)) re.NoError(err) suite.assertSchedulerExists(urlPrefix, "evict-leader-scheduler") re.NoError(failpoint.Disable("github.com/tikv/pd/server/config/persistFail")) - err = tu.CheckDelete(testDialClient, deleteURL, tu.StatusOK(re)) + err = tu.CheckDelete(tests.TestDialClient, deleteURL, tu.StatusOK(re)) re.NoError(err) - suite.assertNoScheduler(re, urlPrefix, "evict-leader-scheduler") - re.NoError(tu.CheckGetJSON(testDialClient, listURL, nil, tu.Status(re, http.StatusNotFound))) - err = tu.CheckDelete(testDialClient, deleteURL, tu.Status(re, http.StatusNotFound)) + assertNoScheduler(re, urlPrefix, "evict-leader-scheduler") + re.NoError(tu.CheckGetJSON(tests.TestDialClient, listURL, nil, tu.Status(re, http.StatusNotFound))) + err = tu.CheckDelete(tests.TestDialClient, deleteURL, tu.Status(re, http.StatusNotFound)) re.NoError(err) } @@ -149,7 +149,7 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { type arg struct { opt string - value interface{} + value any } testCases := []struct { name string @@ -161,50 +161,54 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { name: "balance-leader-scheduler", createdName: "balance-leader-scheduler", extraTestFunc: func(name string) { - resp := make(map[string]interface{}) listURL := fmt.Sprintf("%s%s%s/%s/list", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - re.Equal(4.0, resp["batch"]) - dataMap := make(map[string]interface{}) + resp := make(map[string]any) + tu.Eventually(re, func() bool { + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) + return resp["batch"] == 4.0 + }) + dataMap := make(map[string]any) dataMap["batch"] = 3 updateURL := fmt.Sprintf("%s%s%s/%s/config", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name) body, err := json.Marshal(dataMap) re.NoError(err) - re.NoError(tu.CheckPostJSON(testDialClient, updateURL, body, tu.StatusOK(re))) + re.NoError(tu.CheckPostJSON(tests.TestDialClient, updateURL, body, tu.StatusOK(re))) + resp = make(map[string]any) tu.Eventually(re, func() bool { // wait for scheduling server to be synced. - resp = make(map[string]interface{}) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) return resp["batch"] == 3.0 }) // update again - err = tu.CheckPostJSON(testDialClient, updateURL, body, + err = tu.CheckPostJSON(tests.TestDialClient, updateURL, body, tu.StatusOK(re), tu.StringEqual(re, "\"Config is the same with origin, so do nothing.\"\n")) re.NoError(err) // update invalidate batch - dataMap = map[string]interface{}{} + dataMap = map[string]any{} dataMap["batch"] = 100 body, err = json.Marshal(dataMap) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, updateURL, body, + err = tu.CheckPostJSON(tests.TestDialClient, updateURL, body, tu.Status(re, http.StatusBadRequest), tu.StringEqual(re, "\"invalid batch size which should be an integer between 1 and 10\"\n")) re.NoError(err) - resp = make(map[string]interface{}) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - re.Equal(3.0, resp["batch"]) + resp = make(map[string]any) + tu.Eventually(re, func() bool { + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) + return resp["batch"] == 3.0 + }) // empty body - err = tu.CheckPostJSON(testDialClient, updateURL, nil, + err = tu.CheckPostJSON(tests.TestDialClient, updateURL, nil, tu.Status(re, http.StatusInternalServerError), tu.StringEqual(re, "\"unexpected end of JSON input\"\n")) re.NoError(err) // config item not found - dataMap = map[string]interface{}{} + dataMap = map[string]any{} dataMap["error"] = 3 body, err = json.Marshal(dataMap) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, updateURL, body, + err = tu.CheckPostJSON(tests.TestDialClient, updateURL, body, tu.Status(re, http.StatusBadRequest), tu.StringEqual(re, "\"Config item is not found.\"\n")) re.NoError(err) @@ -214,10 +218,9 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { name: "balance-hot-region-scheduler", createdName: "balance-hot-region-scheduler", extraTestFunc: func(name string) { - resp := make(map[string]interface{}) + resp := make(map[string]any) listURL := fmt.Sprintf("%s%s%s/%s/list", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - expectMap := map[string]interface{}{ + expectMap := map[string]any{ "min-hot-byte-rate": 100.0, "min-hot-key-rate": 10.0, "min-hot-query-rate": 10.0, @@ -233,41 +236,53 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { "dst-tolerance-ratio": 1.05, "split-thresholds": 0.2, "rank-formula-version": "v2", - "read-priorities": []interface{}{"byte", "key"}, - "write-leader-priorities": []interface{}{"key", "byte"}, - "write-peer-priorities": []interface{}{"byte", "key"}, + "read-priorities": []any{"byte", "key"}, + "write-leader-priorities": []any{"key", "byte"}, + "write-peer-priorities": []any{"byte", "key"}, "enable-for-tiflash": "true", "strict-picking-store": "true", + "history-sample-duration": "5m0s", + "history-sample-interval": "30s", } - re.Equal(len(expectMap), len(resp), "expect %v, got %v", expectMap, resp) - for key := range expectMap { - re.Equal(expectMap[key], resp[key]) - } - dataMap := make(map[string]interface{}) + tu.Eventually(re, func() bool { + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) + re.Equal(len(expectMap), len(resp), "expect %v, got %v", expectMap, resp) + for key := range expectMap { + if !reflect.DeepEqual(resp[key], expectMap[key]) { + return false + } + } + return true + }) + dataMap := make(map[string]any) dataMap["max-zombie-rounds"] = 5.0 expectMap["max-zombie-rounds"] = 5.0 updateURL := fmt.Sprintf("%s%s%s/%s/config", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name) body, err := json.Marshal(dataMap) re.NoError(err) - re.NoError(tu.CheckPostJSON(testDialClient, updateURL, body, tu.StatusOK(re))) - resp = make(map[string]interface{}) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - - for key := range expectMap { - re.Equal(expectMap[key], resp[key], "key %s", key) - } + re.NoError(tu.CheckPostJSON(tests.TestDialClient, updateURL, body, tu.StatusOK(re))) + resp = make(map[string]any) + tu.Eventually(re, func() bool { + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) + for key := range expectMap { + if !reflect.DeepEqual(resp[key], expectMap[key]) { + return false + } + } + return true + }) // update again - err = tu.CheckPostJSON(testDialClient, updateURL, body, + err = tu.CheckPostJSON(tests.TestDialClient, updateURL, body, tu.StatusOK(re), tu.StringEqual(re, "Config is the same with origin, so do nothing.")) re.NoError(err) // config item not found - dataMap = map[string]interface{}{} + dataMap = map[string]any{} dataMap["error"] = 3 body, err = json.Marshal(dataMap) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, updateURL, body, + err = tu.CheckPostJSON(tests.TestDialClient, updateURL, body, tu.Status(re, http.StatusBadRequest), tu.StringEqual(re, "Config item is not found.")) re.NoError(err) @@ -277,36 +292,39 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { name: "split-bucket-scheduler", createdName: "split-bucket-scheduler", extraTestFunc: func(name string) { - resp := make(map[string]interface{}) listURL := fmt.Sprintf("%s%s%s/%s/list", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - re.Equal(3.0, resp["degree"]) - re.Equal(0.0, resp["split-limit"]) - dataMap := make(map[string]interface{}) + resp := make(map[string]any) + tu.Eventually(re, func() bool { + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) + return resp["degree"] == 3.0 && resp["split-limit"] == 0.0 + }) + dataMap := make(map[string]any) dataMap["degree"] = 4 updateURL := fmt.Sprintf("%s%s%s/%s/config", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name) body, err := json.Marshal(dataMap) re.NoError(err) - re.NoError(tu.CheckPostJSON(testDialClient, updateURL, body, tu.StatusOK(re))) - resp = make(map[string]interface{}) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - re.Equal(4.0, resp["degree"]) + re.NoError(tu.CheckPostJSON(tests.TestDialClient, updateURL, body, tu.StatusOK(re))) + resp = make(map[string]any) + tu.Eventually(re, func() bool { + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) + return resp["degree"] == 4.0 + }) // update again - err = tu.CheckPostJSON(testDialClient, updateURL, body, + err = tu.CheckPostJSON(tests.TestDialClient, updateURL, body, tu.StatusOK(re), tu.StringEqual(re, "Config is the same with origin, so do nothing.")) re.NoError(err) // empty body - err = tu.CheckPostJSON(testDialClient, updateURL, nil, + err = tu.CheckPostJSON(tests.TestDialClient, updateURL, nil, tu.Status(re, http.StatusInternalServerError), tu.StringEqual(re, "\"unexpected end of JSON input\"\n")) re.NoError(err) // config item not found - dataMap = map[string]interface{}{} + dataMap = map[string]any{} dataMap["error"] = 3 body, err = json.Marshal(dataMap) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, updateURL, body, + err = tu.CheckPostJSON(tests.TestDialClient, updateURL, body, tu.Status(re, http.StatusBadRequest), tu.StringEqual(re, "Config item is not found.")) re.NoError(err) @@ -332,47 +350,53 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { name: "balance-witness-scheduler", createdName: "balance-witness-scheduler", extraTestFunc: func(name string) { - resp := make(map[string]interface{}) + resp := make(map[string]any) listURL := fmt.Sprintf("%s%s%s/%s/list", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - re.Equal(4.0, resp["batch"]) - dataMap := make(map[string]interface{}) + tu.Eventually(re, func() bool { + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) + return resp["batch"] == 4.0 + }) + dataMap := make(map[string]any) dataMap["batch"] = 3 updateURL := fmt.Sprintf("%s%s%s/%s/config", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name) body, err := json.Marshal(dataMap) re.NoError(err) - re.NoError(tu.CheckPostJSON(testDialClient, updateURL, body, tu.StatusOK(re))) - resp = make(map[string]interface{}) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - re.Equal(3.0, resp["batch"]) + re.NoError(tu.CheckPostJSON(tests.TestDialClient, updateURL, body, tu.StatusOK(re))) + resp = make(map[string]any) + tu.Eventually(re, func() bool { + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) + return resp["batch"] == 3.0 + }) // update again - err = tu.CheckPostJSON(testDialClient, updateURL, body, + err = tu.CheckPostJSON(tests.TestDialClient, updateURL, body, tu.StatusOK(re), tu.StringEqual(re, "\"Config is the same with origin, so do nothing.\"\n")) re.NoError(err) // update invalidate batch - dataMap = map[string]interface{}{} + dataMap = map[string]any{} dataMap["batch"] = 100 body, err = json.Marshal(dataMap) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, updateURL, body, + err = tu.CheckPostJSON(tests.TestDialClient, updateURL, body, tu.Status(re, http.StatusBadRequest), tu.StringEqual(re, "\"invalid batch size which should be an integer between 1 and 10\"\n")) re.NoError(err) - resp = make(map[string]interface{}) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - re.Equal(3.0, resp["batch"]) + resp = make(map[string]any) + tu.Eventually(re, func() bool { + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) + return resp["batch"] == 3.0 + }) // empty body - err = tu.CheckPostJSON(testDialClient, updateURL, nil, + err = tu.CheckPostJSON(tests.TestDialClient, updateURL, nil, tu.Status(re, http.StatusInternalServerError), tu.StringEqual(re, "\"unexpected end of JSON input\"\n")) re.NoError(err) // config item not found - dataMap = map[string]interface{}{} + dataMap = map[string]any{} dataMap["error"] = 3 body, err = json.Marshal(dataMap) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, updateURL, body, + err = tu.CheckPostJSON(tests.TestDialClient, updateURL, body, tu.Status(re, http.StatusBadRequest), tu.StringEqual(re, "\"Config item is not found.\"\n")) re.NoError(err) @@ -383,35 +407,41 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { createdName: "grant-leader-scheduler", args: []arg{{"store_id", 1}}, extraTestFunc: func(name string) { - resp := make(map[string]interface{}) + resp := make(map[string]any) listURL := fmt.Sprintf("%s%s%s/%s/list", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - exceptMap := make(map[string]interface{}) - exceptMap["1"] = []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}} - re.Equal(exceptMap, resp["store-id-ranges"]) + expectedMap := make(map[string]any) + expectedMap["1"] = []any{map[string]any{"end-key": "", "start-key": ""}} + tu.Eventually(re, func() bool { + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) + return reflect.DeepEqual(expectedMap, resp["store-id-ranges"]) + }) // using /pd/v1/schedule-config/grant-leader-scheduler/config to add new store to grant-leader-scheduler - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = "grant-leader-scheduler" input["store_id"] = 2 updateURL := fmt.Sprintf("%s%s%s/%s/config", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name) body, err := json.Marshal(input) re.NoError(err) - re.NoError(tu.CheckPostJSON(testDialClient, updateURL, body, tu.StatusOK(re))) - resp = make(map[string]interface{}) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - exceptMap["2"] = []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}} - re.Equal(exceptMap, resp["store-id-ranges"]) + re.NoError(tu.CheckPostJSON(tests.TestDialClient, updateURL, body, tu.StatusOK(re))) + expectedMap["2"] = []any{map[string]any{"end-key": "", "start-key": ""}} + resp = make(map[string]any) + tu.Eventually(re, func() bool { + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) + return reflect.DeepEqual(expectedMap, resp["store-id-ranges"]) + }) // using /pd/v1/schedule-config/grant-leader-scheduler/config to delete exists store from grant-leader-scheduler deleteURL := fmt.Sprintf("%s%s%s/%s/delete/%s", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name, "2") - err = tu.CheckDelete(testDialClient, deleteURL, tu.StatusOK(re)) + err = tu.CheckDelete(tests.TestDialClient, deleteURL, tu.StatusOK(re)) re.NoError(err) - resp = make(map[string]interface{}) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - delete(exceptMap, "2") - re.Equal(exceptMap, resp["store-id-ranges"]) - err = tu.CheckDelete(testDialClient, deleteURL, tu.Status(re, http.StatusNotFound)) + delete(expectedMap, "2") + resp = make(map[string]any) + tu.Eventually(re, func() bool { + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) + return reflect.DeepEqual(expectedMap, resp["store-id-ranges"]) + }) + err = tu.CheckDelete(tests.TestDialClient, deleteURL, tu.Status(re, http.StatusNotFound)) re.NoError(err) }, }, @@ -421,23 +451,23 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { args: []arg{{"start_key", ""}, {"end_key", ""}, {"range_name", "test"}}, // Test the scheduler config handler. extraTestFunc: func(name string) { - resp := make(map[string]interface{}) + resp := make(map[string]any) listURL := fmt.Sprintf("%s%s%s/%s/list", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - re.Equal("", resp["start-key"]) - re.Equal("", resp["end-key"]) - re.Equal("test", resp["range-name"]) + tu.Eventually(re, func() bool { + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) + return resp["start-key"] == "" && resp["end-key"] == "" && resp["range-name"] == "test" + }) resp["start-key"] = "a_00" resp["end-key"] = "a_99" updateURL := fmt.Sprintf("%s%s%s/%s/config", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name) body, err := json.Marshal(resp) re.NoError(err) - re.NoError(tu.CheckPostJSON(testDialClient, updateURL, body, tu.StatusOK(re))) - resp = make(map[string]interface{}) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - re.Equal("a_00", resp["start-key"]) - re.Equal("a_99", resp["end-key"]) - re.Equal("test", resp["range-name"]) + re.NoError(tu.CheckPostJSON(tests.TestDialClient, updateURL, body, tu.StatusOK(re))) + resp = make(map[string]any) + tu.Eventually(re, func() bool { + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) + return resp["start-key"] == "a_00" && resp["end-key"] == "a_99" && resp["range-name"] == "test" + }) }, }, { @@ -446,39 +476,41 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { args: []arg{{"store_id", 3}}, // Test the scheduler config handler. extraTestFunc: func(name string) { - resp := make(map[string]interface{}) + resp := make(map[string]any) listURL := fmt.Sprintf("%s%s%s/%s/list", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name) - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - exceptMap := make(map[string]interface{}) - exceptMap["3"] = []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}} - re.Equal(exceptMap, resp["store-id-ranges"]) + expectedMap := make(map[string]any) + expectedMap["3"] = []any{map[string]any{"end-key": "", "start-key": ""}} + tu.Eventually(re, func() bool { + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) + return reflect.DeepEqual(expectedMap, resp["store-id-ranges"]) + }) // using /pd/v1/schedule-config/evict-leader-scheduler/config to add new store to evict-leader-scheduler - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = "evict-leader-scheduler" input["store_id"] = 4 updateURL := fmt.Sprintf("%s%s%s/%s/config", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name) body, err := json.Marshal(input) re.NoError(err) - re.NoError(tu.CheckPostJSON(testDialClient, updateURL, body, tu.StatusOK(re))) - resp = make(map[string]interface{}) + re.NoError(tu.CheckPostJSON(tests.TestDialClient, updateURL, body, tu.StatusOK(re))) + expectedMap["4"] = []any{map[string]any{"end-key": "", "start-key": ""}} + resp = make(map[string]any) tu.Eventually(re, func() bool { - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - exceptMap["4"] = []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}} - return reflect.DeepEqual(exceptMap, resp["store-id-ranges"]) + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) + return reflect.DeepEqual(expectedMap, resp["store-id-ranges"]) }) // using /pd/v1/schedule-config/evict-leader-scheduler/config to delete exist store from evict-leader-scheduler deleteURL := fmt.Sprintf("%s%s%s/%s/delete/%s", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name, "4") - err = tu.CheckDelete(testDialClient, deleteURL, tu.StatusOK(re)) + err = tu.CheckDelete(tests.TestDialClient, deleteURL, tu.StatusOK(re)) re.NoError(err) - resp = make(map[string]interface{}) + delete(expectedMap, "4") + resp = make(map[string]any) tu.Eventually(re, func() bool { - re.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - delete(exceptMap, "4") - return reflect.DeepEqual(exceptMap, resp["store-id-ranges"]) + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, listURL, &resp)) + return reflect.DeepEqual(expectedMap, resp["store-id-ranges"]) }) - err = tu.CheckDelete(testDialClient, deleteURL, tu.Status(re, http.StatusNotFound)) + err = tu.CheckDelete(tests.TestDialClient, deleteURL, tu.Status(re, http.StatusNotFound)) re.NoError(err) }, }, @@ -488,7 +520,7 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { }, } for _, testCase := range testCases { - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = testCase.name for _, a := range testCase.args { input[a.opt] = a.value @@ -499,22 +531,22 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { if testCase.extraTestFunc != nil { testCase.extraTestFunc(testCase.createdName) } - suite.deleteScheduler(re, urlPrefix, testCase.createdName) - suite.assertNoScheduler(re, urlPrefix, testCase.createdName) + deleteScheduler(re, urlPrefix, testCase.createdName) + assertNoScheduler(re, urlPrefix, testCase.createdName) } // test pause and resume all schedulers. // add schedulers. for _, testCase := range testCases { - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = testCase.name for _, a := range testCase.args { input[a.opt] = a.value } body, err := json.Marshal(input) re.NoError(err) - suite.addScheduler(re, urlPrefix, body) + addScheduler(re, urlPrefix, body) suite.assertSchedulerExists(urlPrefix, testCase.createdName) // wait for scheduler to be synced. if testCase.extraTestFunc != nil { testCase.extraTestFunc(testCase.createdName) @@ -522,11 +554,11 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { } // test pause all schedulers. - input := make(map[string]interface{}) + input := make(map[string]any) input["delay"] = 30 pauseArgs, err := json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/all", pauseArgs, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/all", pauseArgs, tu.StatusOK(re)) re.NoError(err) for _, testCase := range testCases { @@ -534,13 +566,13 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { if createdName == "" { createdName = testCase.name } - isPaused := suite.isSchedulerPaused(re, urlPrefix, createdName) + isPaused := isSchedulerPaused(re, urlPrefix, createdName) re.True(isPaused) } input["delay"] = 1 pauseArgs, err = json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/all", pauseArgs, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/all", pauseArgs, tu.StatusOK(re)) re.NoError(err) time.Sleep(time.Second) for _, testCase := range testCases { @@ -548,7 +580,7 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { if createdName == "" { createdName = testCase.name } - isPaused := suite.isSchedulerPaused(re, urlPrefix, createdName) + isPaused := isSchedulerPaused(re, urlPrefix, createdName) re.False(isPaused) } @@ -556,19 +588,19 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { input["delay"] = 30 pauseArgs, err = json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/all", pauseArgs, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/all", pauseArgs, tu.StatusOK(re)) re.NoError(err) input["delay"] = 0 pauseArgs, err = json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/all", pauseArgs, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/all", pauseArgs, tu.StatusOK(re)) re.NoError(err) for _, testCase := range testCases { createdName := testCase.createdName if createdName == "" { createdName = testCase.name } - isPaused := suite.isSchedulerPaused(re, urlPrefix, createdName) + isPaused := isSchedulerPaused(re, urlPrefix, createdName) re.False(isPaused) } @@ -578,8 +610,8 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { if createdName == "" { createdName = testCase.name } - suite.deleteScheduler(re, urlPrefix, createdName) - suite.assertNoScheduler(re, urlPrefix, createdName) + deleteScheduler(re, urlPrefix, createdName) + assertNoScheduler(re, urlPrefix, createdName) } } @@ -602,46 +634,46 @@ func (suite *scheduleTestSuite) checkDisable(cluster *tests.TestCluster) { } name := "shuffle-leader-scheduler" - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = name body, err := json.Marshal(input) re.NoError(err) - suite.addScheduler(re, urlPrefix, body) + addScheduler(re, urlPrefix, body) u := fmt.Sprintf("%s%s/api/v1/config/schedule", leaderAddr, apiPrefix) var scheduleConfig sc.ScheduleConfig - err = tu.ReadGetJSON(re, testDialClient, u, &scheduleConfig) + err = tu.ReadGetJSON(re, tests.TestDialClient, u, &scheduleConfig) re.NoError(err) originSchedulers := scheduleConfig.Schedulers scheduleConfig.Schedulers = sc.SchedulerConfigs{sc.SchedulerConfig{Type: "shuffle-leader", Disable: true}} body, err = json.Marshal(scheduleConfig) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, u, body, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, u, body, tu.StatusOK(re)) re.NoError(err) - suite.assertNoScheduler(re, urlPrefix, name) + assertNoScheduler(re, urlPrefix, name) suite.assertSchedulerExists(fmt.Sprintf("%s?status=disabled", urlPrefix), name) // reset schedule config scheduleConfig.Schedulers = originSchedulers body, err = json.Marshal(scheduleConfig) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, u, body, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, u, body, tu.StatusOK(re)) re.NoError(err) - suite.deleteScheduler(re, urlPrefix, name) - suite.assertNoScheduler(re, urlPrefix, name) + deleteScheduler(re, urlPrefix, name) + assertNoScheduler(re, urlPrefix, name) } -func (suite *scheduleTestSuite) addScheduler(re *require.Assertions, urlPrefix string, body []byte) { - err := tu.CheckPostJSON(testDialClient, urlPrefix, body, tu.StatusOK(re)) +func addScheduler(re *require.Assertions, urlPrefix string, body []byte) { + err := tu.CheckPostJSON(tests.TestDialClient, urlPrefix, body, tu.StatusOK(re)) re.NoError(err) } -func (suite *scheduleTestSuite) deleteScheduler(re *require.Assertions, urlPrefix string, createdName string) { +func deleteScheduler(re *require.Assertions, urlPrefix string, createdName string) { deleteURL := fmt.Sprintf("%s/%s", urlPrefix, createdName) - err := tu.CheckDelete(testDialClient, deleteURL, tu.StatusOK(re)) + err := tu.CheckDelete(tests.TestDialClient, deleteURL, tu.StatusOK(re)) re.NoError(err) } @@ -650,44 +682,44 @@ func (suite *scheduleTestSuite) testPauseOrResume(re *require.Assertions, urlPre createdName = name } var schedulers []string - tu.ReadGetJSON(re, testDialClient, urlPrefix, &schedulers) + tu.ReadGetJSON(re, tests.TestDialClient, urlPrefix, &schedulers) if !slice.Contains(schedulers, createdName) { - err := tu.CheckPostJSON(testDialClient, urlPrefix, body, tu.StatusOK(re)) + err := tu.CheckPostJSON(tests.TestDialClient, urlPrefix, body, tu.StatusOK(re)) re.NoError(err) } suite.assertSchedulerExists(urlPrefix, createdName) // wait for scheduler to be synced. // test pause. - input := make(map[string]interface{}) + input := make(map[string]any) input["delay"] = 30 pauseArgs, err := json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/"+createdName, pauseArgs, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/"+createdName, pauseArgs, tu.StatusOK(re)) re.NoError(err) - isPaused := suite.isSchedulerPaused(re, urlPrefix, createdName) + isPaused := isSchedulerPaused(re, urlPrefix, createdName) re.True(isPaused) input["delay"] = 1 pauseArgs, err = json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/"+createdName, pauseArgs, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/"+createdName, pauseArgs, tu.StatusOK(re)) re.NoError(err) time.Sleep(time.Second * 2) - isPaused = suite.isSchedulerPaused(re, urlPrefix, createdName) + isPaused = isSchedulerPaused(re, urlPrefix, createdName) re.False(isPaused) // test resume. - input = make(map[string]interface{}) + input = make(map[string]any) input["delay"] = 30 pauseArgs, err = json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/"+createdName, pauseArgs, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/"+createdName, pauseArgs, tu.StatusOK(re)) re.NoError(err) input["delay"] = 0 pauseArgs, err = json.Marshal(input) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, urlPrefix+"/"+createdName, pauseArgs, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/"+createdName, pauseArgs, tu.StatusOK(re)) re.NoError(err) - isPaused = suite.isSchedulerPaused(re, urlPrefix, createdName) + isPaused = isSchedulerPaused(re, urlPrefix, createdName) re.False(isPaused) } @@ -710,20 +742,20 @@ func (suite *scheduleTestSuite) checkEmptySchedulers(cluster *tests.TestCluster) } for _, query := range []string{"", "?status=paused", "?status=disabled"} { schedulers := make([]string, 0) - re.NoError(tu.ReadGetJSON(re, testDialClient, urlPrefix+query, &schedulers)) + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, urlPrefix+query, &schedulers)) for _, scheduler := range schedulers { if strings.Contains(query, "disable") { - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = scheduler body, err := json.Marshal(input) re.NoError(err) - suite.addScheduler(re, urlPrefix, body) + addScheduler(re, urlPrefix, body) } else { - suite.deleteScheduler(re, urlPrefix, scheduler) + deleteScheduler(re, urlPrefix, scheduler) } } tu.Eventually(re, func() bool { - resp, err := apiutil.GetJSON(testDialClient, urlPrefix+query, nil) + resp, err := apiutil.GetJSON(tests.TestDialClient, urlPrefix+query, nil) re.NoError(err) defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) @@ -738,26 +770,26 @@ func (suite *scheduleTestSuite) assertSchedulerExists(urlPrefix string, schedule var schedulers []string re := suite.Require() tu.Eventually(re, func() bool { - err := tu.ReadGetJSON(re, testDialClient, urlPrefix, &schedulers, + err := tu.ReadGetJSON(re, tests.TestDialClient, urlPrefix, &schedulers, tu.StatusOK(re)) re.NoError(err) return slice.Contains(schedulers, scheduler) }) } -func (suite *scheduleTestSuite) assertNoScheduler(re *require.Assertions, urlPrefix string, scheduler string) { +func assertNoScheduler(re *require.Assertions, urlPrefix string, scheduler string) { var schedulers []string tu.Eventually(re, func() bool { - err := tu.ReadGetJSON(re, testDialClient, urlPrefix, &schedulers, + err := tu.ReadGetJSON(re, tests.TestDialClient, urlPrefix, &schedulers, tu.StatusOK(re)) re.NoError(err) return !slice.Contains(schedulers, scheduler) }) } -func (suite *scheduleTestSuite) isSchedulerPaused(re *require.Assertions, urlPrefix, name string) bool { +func isSchedulerPaused(re *require.Assertions, urlPrefix, name string) bool { var schedulers []string - err := tu.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s?status=paused", urlPrefix), &schedulers, + err := tu.ReadGetJSON(re, tests.TestDialClient, fmt.Sprintf("%s?status=paused", urlPrefix), &schedulers, tu.StatusOK(re)) re.NoError(err) for _, scheduler := range schedulers { diff --git a/tests/server/api/testutil.go b/tests/server/api/testutil.go index 6fab82ea2e3..163a25c9bbb 100644 --- a/tests/server/api/testutil.go +++ b/tests/server/api/testutil.go @@ -23,6 +23,7 @@ import ( "path" "github.com/stretchr/testify/require" + "github.com/tikv/pd/tests" ) const ( @@ -30,19 +31,12 @@ const ( schedulerConfigPrefix = "/pd/api/v1/scheduler-config" ) -// dialClient used to dial http request. -var dialClient = &http.Client{ - Transport: &http.Transport{ - DisableKeepAlives: true, - }, -} - // MustAddScheduler adds a scheduler with HTTP API. func MustAddScheduler( re *require.Assertions, serverAddr string, - schedulerName string, args map[string]interface{}, + schedulerName string, args map[string]any, ) { - request := map[string]interface{}{ + request := map[string]any{ "name": schedulerName, } for arg, val := range args { @@ -53,7 +47,7 @@ func MustAddScheduler( httpReq, err := http.NewRequest(http.MethodPost, fmt.Sprintf("%s%s", serverAddr, schedulersPrefix), bytes.NewBuffer(data)) re.NoError(err) // Send request. - resp, err := dialClient.Do(httpReq) + resp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer resp.Body.Close() data, err = io.ReadAll(resp.Body) @@ -65,7 +59,7 @@ func MustAddScheduler( func MustDeleteScheduler(re *require.Assertions, serverAddr, schedulerName string) { httpReq, err := http.NewRequest(http.MethodDelete, fmt.Sprintf("%s%s/%s", serverAddr, schedulersPrefix, schedulerName), http.NoBody) re.NoError(err) - resp, err := dialClient.Do(httpReq) + resp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer resp.Body.Close() data, err := io.ReadAll(resp.Body) @@ -77,14 +71,14 @@ func MustDeleteScheduler(re *require.Assertions, serverAddr, schedulerName strin func MustCallSchedulerConfigAPI( re *require.Assertions, method, serverAddr, schedulerName string, args []string, - input map[string]interface{}, + input map[string]any, ) { data, err := json.Marshal(input) re.NoError(err) args = append([]string{schedulerConfigPrefix, schedulerName}, args...) httpReq, err := http.NewRequest(method, fmt.Sprintf("%s%s", serverAddr, path.Join(args...)), bytes.NewBuffer(data)) re.NoError(err) - resp, err := dialClient.Do(httpReq) + resp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer resp.Body.Close() data, err = io.ReadAll(resp.Body) diff --git a/tests/server/apiv2/handlers/testutil.go b/tests/server/apiv2/handlers/testutil.go index d26ce732714..1a40e8d1ac7 100644 --- a/tests/server/apiv2/handlers/testutil.go +++ b/tests/server/apiv2/handlers/testutil.go @@ -24,6 +24,7 @@ import ( "github.com/pingcap/kvproto/pkg/keyspacepb" "github.com/stretchr/testify/require" "github.com/tikv/pd/pkg/storage/endpoint" + "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/server/apiv2/handlers" "github.com/tikv/pd/tests" ) @@ -33,13 +34,6 @@ const ( keyspaceGroupsPrefix = "/pd/api/v2/tso/keyspace-groups" ) -// dialClient used to dial http request. -var dialClient = &http.Client{ - Transport: &http.Transport{ - DisableKeepAlives: true, - }, -} - func sendLoadRangeRequest(re *require.Assertions, server *tests.TestServer, token, limit string) *handlers.LoadAllKeyspacesResponse { // Construct load range request. httpReq, err := http.NewRequest(http.MethodGet, server.GetAddr()+keyspacesPrefix, http.NoBody) @@ -49,7 +43,7 @@ func sendLoadRangeRequest(re *require.Assertions, server *tests.TestServer, toke query.Add("limit", limit) httpReq.URL.RawQuery = query.Encode() // Send request. - httpResp, err := dialClient.Do(httpReq) + httpResp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer httpResp.Body.Close() re.Equal(http.StatusOK, httpResp.StatusCode) @@ -66,7 +60,7 @@ func sendUpdateStateRequest(re *require.Assertions, server *tests.TestServer, na re.NoError(err) httpReq, err := http.NewRequest(http.MethodPut, server.GetAddr()+keyspacesPrefix+"/"+name+"/state", bytes.NewBuffer(data)) re.NoError(err) - httpResp, err := dialClient.Do(httpReq) + httpResp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer httpResp.Body.Close() if httpResp.StatusCode != http.StatusOK { @@ -85,7 +79,7 @@ func MustCreateKeyspace(re *require.Assertions, server *tests.TestServer, reques re.NoError(err) httpReq, err := http.NewRequest(http.MethodPost, server.GetAddr()+keyspacesPrefix, bytes.NewBuffer(data)) re.NoError(err) - resp, err := dialClient.Do(httpReq) + resp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) @@ -109,7 +103,7 @@ func mustUpdateKeyspaceConfig(re *require.Assertions, server *tests.TestServer, re.NoError(err) httpReq, err := http.NewRequest(http.MethodPatch, server.GetAddr()+keyspacesPrefix+"/"+name+"/config", bytes.NewBuffer(data)) re.NoError(err) - resp, err := dialClient.Do(httpReq) + resp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) @@ -121,7 +115,7 @@ func mustUpdateKeyspaceConfig(re *require.Assertions, server *tests.TestServer, } func mustLoadKeyspaces(re *require.Assertions, server *tests.TestServer, name string) *keyspacepb.KeyspaceMeta { - resp, err := dialClient.Get(server.GetAddr() + keyspacesPrefix + "/" + name) + resp, err := tests.TestDialClient.Get(server.GetAddr() + keyspacesPrefix + "/" + name) re.NoError(err) defer resp.Body.Close() re.Equal(http.StatusOK, resp.StatusCode) @@ -142,7 +136,7 @@ func MustLoadKeyspaceGroups(re *require.Assertions, server *tests.TestServer, to query.Add("limit", limit) httpReq.URL.RawQuery = query.Encode() // Send request. - httpResp, err := dialClient.Do(httpReq) + httpResp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer httpResp.Body.Close() data, err := io.ReadAll(httpResp.Body) @@ -158,7 +152,7 @@ func tryCreateKeyspaceGroup(re *require.Assertions, server *tests.TestServer, re re.NoError(err) httpReq, err := http.NewRequest(http.MethodPost, server.GetAddr()+keyspaceGroupsPrefix, bytes.NewBuffer(data)) re.NoError(err) - resp, err := dialClient.Do(httpReq) + resp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer resp.Body.Close() data, err = io.ReadAll(resp.Body) @@ -168,8 +162,14 @@ func tryCreateKeyspaceGroup(re *require.Assertions, server *tests.TestServer, re // MustLoadKeyspaceGroupByID loads the keyspace group by ID with HTTP API. func MustLoadKeyspaceGroupByID(re *require.Assertions, server *tests.TestServer, id uint32) *endpoint.KeyspaceGroup { - kg, code := TryLoadKeyspaceGroupByID(re, server, id) - re.Equal(http.StatusOK, code) + var ( + kg *endpoint.KeyspaceGroup + code int + ) + testutil.Eventually(re, func() bool { + kg, code = TryLoadKeyspaceGroupByID(re, server, id) + return code == http.StatusOK + }) return kg } @@ -177,7 +177,7 @@ func MustLoadKeyspaceGroupByID(re *require.Assertions, server *tests.TestServer, func TryLoadKeyspaceGroupByID(re *require.Assertions, server *tests.TestServer, id uint32) (*endpoint.KeyspaceGroup, int) { httpReq, err := http.NewRequest(http.MethodGet, server.GetAddr()+keyspaceGroupsPrefix+fmt.Sprintf("/%d", id), http.NoBody) re.NoError(err) - resp, err := dialClient.Do(httpReq) + resp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer resp.Body.Close() data, err := io.ReadAll(resp.Body) @@ -207,7 +207,7 @@ func FailCreateKeyspaceGroupWithCode(re *require.Assertions, server *tests.TestS func MustDeleteKeyspaceGroup(re *require.Assertions, server *tests.TestServer, id uint32) { httpReq, err := http.NewRequest(http.MethodDelete, server.GetAddr()+keyspaceGroupsPrefix+fmt.Sprintf("/%d", id), http.NoBody) re.NoError(err) - resp, err := dialClient.Do(httpReq) + resp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer resp.Body.Close() data, err := io.ReadAll(resp.Body) @@ -222,7 +222,7 @@ func MustSplitKeyspaceGroup(re *require.Assertions, server *tests.TestServer, id httpReq, err := http.NewRequest(http.MethodPost, server.GetAddr()+keyspaceGroupsPrefix+fmt.Sprintf("/%d/split", id), bytes.NewBuffer(data)) re.NoError(err) // Send request. - resp, err := dialClient.Do(httpReq) + resp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer resp.Body.Close() data, err = io.ReadAll(resp.Body) @@ -232,15 +232,28 @@ func MustSplitKeyspaceGroup(re *require.Assertions, server *tests.TestServer, id // MustFinishSplitKeyspaceGroup finishes a keyspace group split with HTTP API. func MustFinishSplitKeyspaceGroup(re *require.Assertions, server *tests.TestServer, id uint32) { - httpReq, err := http.NewRequest(http.MethodDelete, server.GetAddr()+keyspaceGroupsPrefix+fmt.Sprintf("/%d/split", id), http.NoBody) - re.NoError(err) - // Send request. - resp, err := dialClient.Do(httpReq) - re.NoError(err) - defer resp.Body.Close() - data, err := io.ReadAll(resp.Body) - re.NoError(err) - re.Equal(http.StatusOK, resp.StatusCode, string(data)) + testutil.Eventually(re, func() bool { + httpReq, err := http.NewRequest(http.MethodDelete, server.GetAddr()+keyspaceGroupsPrefix+fmt.Sprintf("/%d/split", id), http.NoBody) + if err != nil { + return false + } + // Send request. + resp, err := tests.TestDialClient.Do(httpReq) + if err != nil { + return false + } + defer resp.Body.Close() + data, err := io.ReadAll(resp.Body) + if err != nil { + return false + } + if resp.StatusCode == http.StatusServiceUnavailable || + resp.StatusCode == http.StatusInternalServerError { + return false + } + re.Equal(http.StatusOK, resp.StatusCode, string(data)) + return true + }) } // MustMergeKeyspaceGroup merges keyspace groups with HTTP API. @@ -250,7 +263,7 @@ func MustMergeKeyspaceGroup(re *require.Assertions, server *tests.TestServer, id httpReq, err := http.NewRequest(http.MethodPost, server.GetAddr()+keyspaceGroupsPrefix+fmt.Sprintf("/%d/merge", id), bytes.NewBuffer(data)) re.NoError(err) // Send request. - resp, err := dialClient.Do(httpReq) + resp, err := tests.TestDialClient.Do(httpReq) re.NoError(err) defer resp.Body.Close() data, err = io.ReadAll(resp.Body) diff --git a/tests/server/cluster/cluster_test.go b/tests/server/cluster/cluster_test.go index 67c798d7f69..e03ef2fe318 100644 --- a/tests/server/cluster/cluster_test.go +++ b/tests/server/cluster/cluster_test.go @@ -38,6 +38,7 @@ import ( sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/schedulers" + "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/storage" "github.com/tikv/pd/pkg/syncer" "github.com/tikv/pd/pkg/tso" @@ -181,6 +182,99 @@ func TestDamagedRegion(t *testing.T) { re.Equal(uint64(1), rc.GetOperatorController().OperatorCount(operator.OpAdmin)) } +func TestRegionStatistics(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + tc, err := tests.NewTestCluster(ctx, 2) + defer tc.Destroy() + re.NoError(err) + + err = tc.RunInitialServers() + re.NoError(err) + + leaderName := tc.WaitLeader() + leaderServer := tc.GetLeaderServer() + grpcPDClient := testutil.MustNewGrpcClient(re, leaderServer.GetAddr()) + clusterID := leaderServer.GetClusterID() + bootstrapCluster(re, clusterID, grpcPDClient) + rc := leaderServer.GetRaftCluster() + + region := &metapb.Region{ + Id: 10, + StartKey: []byte("abc"), + EndKey: []byte("xyz"), + Peers: []*metapb.Peer{ + {Id: 101, StoreId: 1}, + {Id: 102, StoreId: 2}, + {Id: 103, StoreId: 3}, + {Id: 104, StoreId: 4, Role: metapb.PeerRole_Learner}, + }, + } + + // To put region. + regionInfo := core.NewRegionInfo(region, region.Peers[0], core.SetApproximateSize(0)) + err = tc.HandleRegionHeartbeat(regionInfo) + re.NoError(err) + regions := rc.GetRegionStatsByType(statistics.LearnerPeer) + re.Len(regions, 1) + + // wait for sync region + time.Sleep(1000 * time.Millisecond) + + leaderServer.ResignLeader() + newLeaderName := tc.WaitLeader() + re.NotEqual(newLeaderName, leaderName) + leaderServer = tc.GetLeaderServer() + rc = leaderServer.GetRaftCluster() + r := rc.GetRegion(region.Id) + re.NotNil(r) + re.True(r.LoadedFromSync()) + regions = rc.GetRegionStatsByType(statistics.LearnerPeer) + re.Empty(regions) + err = tc.HandleRegionHeartbeat(regionInfo) + re.NoError(err) + regions = rc.GetRegionStatsByType(statistics.LearnerPeer) + re.Len(regions, 1) + + leaderServer.ResignLeader() + newLeaderName = tc.WaitLeader() + re.Equal(newLeaderName, leaderName) + leaderServer = tc.GetLeaderServer() + rc = leaderServer.GetRaftCluster() + re.NotNil(r) + re.True(r.LoadedFromStorage() || r.LoadedFromSync()) + regions = rc.GetRegionStatsByType(statistics.LearnerPeer) + re.Empty(regions) + regionInfo = regionInfo.Clone(core.SetSource(core.Heartbeat), core.SetApproximateSize(30)) + err = tc.HandleRegionHeartbeat(regionInfo) + re.NoError(err) + rc = leaderServer.GetRaftCluster() + r = rc.GetRegion(region.Id) + re.NotNil(r) + re.False(r.LoadedFromStorage() && r.LoadedFromSync()) + + leaderServer.ResignLeader() + newLeaderName = tc.WaitLeader() + re.NotEqual(newLeaderName, leaderName) + leaderServer.ResignLeader() + newLeaderName = tc.WaitLeader() + re.Equal(newLeaderName, leaderName) + leaderServer = tc.GetLeaderServer() + rc = leaderServer.GetRaftCluster() + r = rc.GetRegion(region.Id) + re.NotNil(r) + re.False(r.LoadedFromStorage() && r.LoadedFromSync()) + regions = rc.GetRegionStatsByType(statistics.LearnerPeer) + re.Empty(regions) + + regionInfo = regionInfo.Clone(core.SetSource(core.Heartbeat), core.SetApproximateSize(30)) + err = tc.HandleRegionHeartbeat(regionInfo) + re.NoError(err) + regions = rc.GetRegionStatsByType(statistics.LearnerPeer) + re.Len(regions, 1) +} + func TestStaleRegion(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) @@ -507,7 +601,7 @@ func TestRaftClusterMultipleRestart(t *testing.T) { store := newMetaStore(storeID, "127.0.0.1:4", "2.1.0", metapb.StoreState_Offline, getTestDeployPath(storeID)) rc := leaderServer.GetRaftCluster() re.NotNil(rc) - err = rc.PutStore(store) + err = rc.PutMetaStore(store) re.NoError(err) re.NotNil(tc) rc.Stop() @@ -568,7 +662,7 @@ func TestNotLeader(t *testing.T) { grpcStatus, ok := status.FromError(err) re.True(ok) re.Equal(codes.Unavailable, grpcStatus.Code()) - re.Equal("not leader", grpcStatus.Message()) + re.ErrorContains(server.ErrNotLeader, grpcStatus.Message()) } func TestStoreVersionChange(t *testing.T) { @@ -659,20 +753,19 @@ func TestConcurrentHandleRegion(t *testing.T) { re.NoError(err) peerID, err := id.Alloc() re.NoError(err) - regionID, err := id.Alloc() - re.NoError(err) peer := &metapb.Peer{Id: peerID, StoreId: store.GetId()} regionReq := &pdpb.RegionHeartbeatRequest{ Header: testutil.NewRequestHeader(clusterID), Region: &metapb.Region{ - Id: regionID, + // mock error msg to trigger stream.Recv() + Id: 0, Peers: []*metapb.Peer{peer}, }, Leader: peer, } err = stream.Send(regionReq) re.NoError(err) - // make sure the first store can receive one response + // make sure the first store can receive one response(error msg) if i == 0 { wg.Add(1) } @@ -730,7 +823,7 @@ func TestSetScheduleOpt(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() // TODO: enable placementrules - tc, err := tests.NewTestCluster(ctx, 1, func(cfg *config.Config, svr string) { cfg.Replication.EnablePlacementRules = false }) + tc, err := tests.NewTestCluster(ctx, 1, func(cfg *config.Config, _ string) { cfg.Replication.EnablePlacementRules = false }) defer tc.Destroy() re.NoError(err) @@ -891,7 +984,7 @@ func TestTiFlashWithPlacementRules(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - tc, err := tests.NewTestCluster(ctx, 1, func(cfg *config.Config, name string) { cfg.Replication.EnablePlacementRules = false }) + tc, err := tests.NewTestCluster(ctx, 1, func(cfg *config.Config, _ string) { cfg.Replication.EnablePlacementRules = false }) defer tc.Destroy() re.NoError(err) err = tc.RunInitialServers() @@ -941,7 +1034,7 @@ func TestReplicationModeStatus(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - tc, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, serverName string) { + tc, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, _ string) { conf.ReplicationMode.ReplicationMode = "dr-auto-sync" }) @@ -1240,22 +1333,22 @@ func TestStaleTermHeartbeat(t *testing.T) { Term: 5, ApproximateSize: 10, } - - region := core.RegionFromHeartbeat(regionReq) + flowRoundDivisor := leaderServer.GetConfig().PDServerCfg.FlowRoundByDigit + region := core.RegionFromHeartbeat(regionReq, flowRoundDivisor) err = rc.HandleRegionHeartbeat(region) re.NoError(err) // Transfer leader regionReq.Term = 6 regionReq.Leader = peers[1] - region = core.RegionFromHeartbeat(regionReq) + region = core.RegionFromHeartbeat(regionReq, flowRoundDivisor) err = rc.HandleRegionHeartbeat(region) re.NoError(err) // issue #3379 regionReq.KeysWritten = uint64(18446744073709551615) // -1 regionReq.BytesWritten = uint64(18446744073709550602) // -1024 - region = core.RegionFromHeartbeat(regionReq) + region = core.RegionFromHeartbeat(regionReq, flowRoundDivisor) re.Equal(uint64(0), region.GetKeysWritten()) re.Equal(uint64(0), region.GetBytesWritten()) err = rc.HandleRegionHeartbeat(region) @@ -1264,14 +1357,14 @@ func TestStaleTermHeartbeat(t *testing.T) { // Stale heartbeat, update check should fail regionReq.Term = 5 regionReq.Leader = peers[0] - region = core.RegionFromHeartbeat(regionReq) + region = core.RegionFromHeartbeat(regionReq, flowRoundDivisor) err = rc.HandleRegionHeartbeat(region) re.Error(err) // Allow regions that are created by unsafe recover to send a heartbeat, even though they // are considered "stale" because their conf ver and version are both equal to 1. regionReq.Region.RegionEpoch.ConfVer = 1 - region = core.RegionFromHeartbeat(regionReq) + region = core.RegionFromHeartbeat(regionReq, flowRoundDivisor) err = rc.HandleRegionHeartbeat(region) re.NoError(err) } @@ -1312,10 +1405,10 @@ func TestTransferLeaderForScheduler(t *testing.T) { re.True(leaderServer.GetRaftCluster().IsPrepared()) schedsNum := len(rc.GetCoordinator().GetSchedulersController().GetSchedulerNames()) // Add evict leader scheduler - api.MustAddScheduler(re, leaderServer.GetAddr(), schedulers.EvictLeaderName, map[string]interface{}{ + api.MustAddScheduler(re, leaderServer.GetAddr(), schedulers.EvictLeaderName, map[string]any{ "store_id": 1, }) - api.MustAddScheduler(re, leaderServer.GetAddr(), schedulers.EvictLeaderName, map[string]interface{}{ + api.MustAddScheduler(re, leaderServer.GetAddr(), schedulers.EvictLeaderName, map[string]any{ "store_id": 2, }) // Check scheduler updated. diff --git a/tests/server/cluster/cluster_work_test.go b/tests/server/cluster/cluster_work_test.go index f503563dbb1..530e15f3f8c 100644 --- a/tests/server/cluster/cluster_work_test.go +++ b/tests/server/cluster/cluster_work_test.go @@ -109,12 +109,12 @@ func TestAskSplit(t *testing.T) { SplitCount: 10, } - re.NoError(leaderServer.GetServer().SaveTTLConfig(map[string]interface{}{"schedule.enable-tikv-split-region": 0}, time.Minute)) + re.NoError(leaderServer.GetServer().SaveTTLConfig(map[string]any{"schedule.enable-tikv-split-region": 0}, time.Minute)) _, err = rc.HandleAskSplit(req) re.ErrorIs(err, errs.ErrSchedulerTiKVSplitDisabled) _, err = rc.HandleAskBatchSplit(req1) re.ErrorIs(err, errs.ErrSchedulerTiKVSplitDisabled) - re.NoError(leaderServer.GetServer().SaveTTLConfig(map[string]interface{}{"schedule.enable-tikv-split-region": 0}, 0)) + re.NoError(leaderServer.GetServer().SaveTTLConfig(map[string]any{"schedule.enable-tikv-split-region": 0}, 0)) // wait ttl config takes effect time.Sleep(time.Second) diff --git a/tests/server/config/config_test.go b/tests/server/config/config_test.go index 98754127e55..67d7478caa0 100644 --- a/tests/server/config/config_test.go +++ b/tests/server/config/config_test.go @@ -26,23 +26,16 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + cfg "github.com/tikv/pd/pkg/mcs/scheduling/server/config" "github.com/tikv/pd/pkg/ratelimit" sc "github.com/tikv/pd/pkg/schedule/config" tu "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/pkg/utils/typeutil" "github.com/tikv/pd/pkg/versioninfo" - "github.com/tikv/pd/server" "github.com/tikv/pd/server/config" "github.com/tikv/pd/tests" ) -// testDialClient used to dial http request. -var testDialClient = &http.Client{ - Transport: &http.Transport{ - DisableKeepAlives: true, - }, -} - func TestRateLimitConfigReload(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) @@ -58,14 +51,14 @@ func TestRateLimitConfigReload(t *testing.T) { limitCfg := make(map[string]ratelimit.DimensionConfig) limitCfg["GetRegions"] = ratelimit.DimensionConfig{QPS: 1} - input := map[string]interface{}{ + input := map[string]any{ "enable-rate-limit": "true", "limiter-config": limitCfg, } data, err := json.Marshal(input) re.NoError(err) req, _ := http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config", bytes.NewBuffer(data)) - resp, err := testDialClient.Do(req) + resp, err := tests.TestDialClient.Do(req) re.NoError(err) resp.Body.Close() re.True(leader.GetServer().GetServiceMiddlewarePersistOptions().IsRateLimitEnabled()) @@ -73,11 +66,7 @@ func TestRateLimitConfigReload(t *testing.T) { oldLeaderName := leader.GetServer().Name() leader.GetServer().GetMember().ResignEtcdLeader(leader.GetServer().Context(), oldLeaderName, "") - var servers []*server.Server - for _, s := range cluster.GetServers() { - servers = append(servers, s.GetServer()) - } - server.MustWaitLeader(re, servers) + re.NotEmpty(cluster.WaitLeader()) leader = cluster.GetLeaderServer() re.NotNil(leader) re.True(leader.GetServer().GetServiceMiddlewarePersistOptions().IsRateLimitEnabled()) @@ -100,6 +89,7 @@ func (suite *configTestSuite) SetupSuite() { func (suite *configTestSuite) TearDownSuite() { suite.env.Cleanup() } + func (suite *configTestSuite) TestConfigAll() { suite.env.RunTestInTwoModes(suite.checkConfigAll) } @@ -112,7 +102,7 @@ func (suite *configTestSuite) checkConfigAll(cluster *tests.TestCluster) { addr := fmt.Sprintf("%s/pd/api/v1/config", urlPrefix) cfg := &config.Config{} tu.Eventually(re, func() bool { - err := tu.ReadGetJSON(re, testDialClient, addr, cfg) + err := tu.ReadGetJSON(re, tests.TestDialClient, addr, cfg) re.NoError(err) return cfg.PDServerCfg.DashboardAddress != "auto" }) @@ -121,36 +111,37 @@ func (suite *configTestSuite) checkConfigAll(cluster *tests.TestCluster) { r := map[string]int{"max-replicas": 5} postData, err := json.Marshal(r) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusOK(re)) re.NoError(err) - l := map[string]interface{}{ + l := map[string]any{ "location-labels": "zone,rack", "region-schedule-limit": 10, } postData, err = json.Marshal(l) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusOK(re)) re.NoError(err) - l = map[string]interface{}{ + l = map[string]any{ "metric-storage": "http://127.0.0.1:9090", } postData, err = json.Marshal(l) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - re.NoError(err) - - newCfg := &config.Config{} - err = tu.ReadGetJSON(re, testDialClient, addr, newCfg) + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusOK(re)) re.NoError(err) cfg.Replication.MaxReplicas = 5 cfg.Replication.LocationLabels = []string{"zone", "rack"} cfg.Schedule.RegionScheduleLimit = 10 cfg.PDServerCfg.MetricStorage = "http://127.0.0.1:9090" - re.Equal(newCfg, cfg) + tu.Eventually(re, func() bool { + newCfg := &config.Config{} + err = tu.ReadGetJSON(re, tests.TestDialClient, addr, newCfg) + re.NoError(err) + return suite.Equal(newCfg, cfg) + }) // the new way - l = map[string]interface{}{ + l = map[string]any{ "schedule.tolerant-size-ratio": 2.5, "schedule.enable-tikv-split-region": "false", "replication.location-labels": "idc,host", @@ -162,10 +153,7 @@ func (suite *configTestSuite) checkConfigAll(cluster *tests.TestCluster) { } postData, err = json.Marshal(l) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - re.NoError(err) - newCfg1 := &config.Config{} - err = tu.ReadGetJSON(re, testDialClient, addr, newCfg1) + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusOK(re)) re.NoError(err) cfg.Schedule.EnableTiKVSplitRegion = false cfg.Schedule.TolerantSizeRatio = 2.5 @@ -177,44 +165,49 @@ func (suite *configTestSuite) checkConfigAll(cluster *tests.TestCluster) { v, err := versioninfo.ParseVersion("v4.0.0-beta") re.NoError(err) cfg.ClusterVersion = *v - re.Equal(cfg, newCfg1) + tu.Eventually(re, func() bool { + newCfg1 := &config.Config{} + err = tu.ReadGetJSON(re, tests.TestDialClient, addr, newCfg1) + re.NoError(err) + return suite.Equal(cfg, newCfg1) + }) // revert this to avoid it affects TestConfigTTL l["schedule.enable-tikv-split-region"] = "true" postData, err = json.Marshal(l) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusOK(re)) re.NoError(err) // illegal prefix - l = map[string]interface{}{ + l = map[string]any{ "replicate.max-replicas": 1, } postData, err = json.Marshal(l) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusNotOK(re), tu.StringContain(re, "not found")) re.NoError(err) // update prefix directly - l = map[string]interface{}{ + l = map[string]any{ "replication-mode": nil, } postData, err = json.Marshal(l) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusNotOK(re), tu.StringContain(re, "cannot update config prefix")) re.NoError(err) // config item not found - l = map[string]interface{}{ + l = map[string]any{ "schedule.region-limit": 10, } postData, err = json.Marshal(l) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusNotOK(re), tu.StringContain(re, "not found")) + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusNotOK(re), tu.StringContain(re, "not found")) re.NoError(err) } @@ -230,16 +223,16 @@ func (suite *configTestSuite) checkConfigSchedule(cluster *tests.TestCluster) { addr := fmt.Sprintf("%s/pd/api/v1/config/schedule", urlPrefix) scheduleConfig := &sc.ScheduleConfig{} - re.NoError(tu.ReadGetJSON(re, testDialClient, addr, scheduleConfig)) + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, addr, scheduleConfig)) scheduleConfig.MaxStoreDownTime.Duration = time.Second postData, err := json.Marshal(scheduleConfig) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusOK(re)) re.NoError(err) tu.Eventually(re, func() bool { scheduleConfig1 := &sc.ScheduleConfig{} - re.NoError(tu.ReadGetJSON(re, testDialClient, addr, scheduleConfig1)) + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, addr, scheduleConfig1)) return reflect.DeepEqual(*scheduleConfig1, *scheduleConfig) }) } @@ -255,33 +248,33 @@ func (suite *configTestSuite) checkConfigReplication(cluster *tests.TestCluster) addr := fmt.Sprintf("%s/pd/api/v1/config/replicate", urlPrefix) rc := &sc.ReplicationConfig{} - err := tu.ReadGetJSON(re, testDialClient, addr, rc) + err := tu.ReadGetJSON(re, tests.TestDialClient, addr, rc) re.NoError(err) rc.MaxReplicas = 5 rc1 := map[string]int{"max-replicas": 5} postData, err := json.Marshal(rc1) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusOK(re)) re.NoError(err) rc.LocationLabels = []string{"zone", "rack"} rc2 := map[string]string{"location-labels": "zone,rack"} postData, err = json.Marshal(rc2) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusOK(re)) re.NoError(err) rc.IsolationLevel = "zone" rc3 := map[string]string{"isolation-level": "zone"} postData, err = json.Marshal(rc3) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusOK(re)) re.NoError(err) rc4 := &sc.ReplicationConfig{} tu.Eventually(re, func() bool { - err = tu.ReadGetJSON(re, testDialClient, addr, rc4) + err = tu.ReadGetJSON(re, tests.TestDialClient, addr, rc4) re.NoError(err) return reflect.DeepEqual(*rc4, *rc) }) @@ -299,7 +292,7 @@ func (suite *configTestSuite) checkConfigLabelProperty(cluster *tests.TestCluste addr := urlPrefix + "/pd/api/v1/config/label-property" loadProperties := func() config.LabelPropertyConfig { var cfg config.LabelPropertyConfig - err := tu.ReadGetJSON(re, testDialClient, addr, &cfg) + err := tu.ReadGetJSON(re, tests.TestDialClient, addr, &cfg) re.NoError(err) return cfg } @@ -313,7 +306,7 @@ func (suite *configTestSuite) checkConfigLabelProperty(cluster *tests.TestCluste `{"type": "bar", "action": "set", "label-key": "host", "label-value": "h1"}`, } for _, cmd := range cmds { - err := tu.CheckPostJSON(testDialClient, addr, []byte(cmd), tu.StatusOK(re)) + err := tu.CheckPostJSON(tests.TestDialClient, addr, []byte(cmd), tu.StatusOK(re)) re.NoError(err) } @@ -330,7 +323,7 @@ func (suite *configTestSuite) checkConfigLabelProperty(cluster *tests.TestCluste `{"type": "bar", "action": "delete", "label-key": "host", "label-value": "h1"}`, } for _, cmd := range cmds { - err := tu.CheckPostJSON(testDialClient, addr, []byte(cmd), tu.StatusOK(re)) + err := tu.CheckPostJSON(tests.TestDialClient, addr, []byte(cmd), tu.StatusOK(re)) re.NoError(err) } @@ -353,28 +346,28 @@ func (suite *configTestSuite) checkConfigDefault(cluster *tests.TestCluster) { r := map[string]int{"max-replicas": 5} postData, err := json.Marshal(r) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusOK(re)) re.NoError(err) - l := map[string]interface{}{ + l := map[string]any{ "location-labels": "zone,rack", "region-schedule-limit": 10, } postData, err = json.Marshal(l) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusOK(re)) re.NoError(err) - l = map[string]interface{}{ + l = map[string]any{ "metric-storage": "http://127.0.0.1:9090", } postData, err = json.Marshal(l) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusOK(re)) re.NoError(err) addr = fmt.Sprintf("%s/pd/api/v1/config/default", urlPrefix) defaultCfg := &config.Config{} - err = tu.ReadGetJSON(re, testDialClient, addr, defaultCfg) + err = tu.ReadGetJSON(re, tests.TestDialClient, addr, defaultCfg) re.NoError(err) re.Equal(uint64(3), defaultCfg.Replication.MaxReplicas) @@ -393,15 +386,15 @@ func (suite *configTestSuite) checkConfigPDServer(cluster *tests.TestCluster) { urlPrefix := leaderServer.GetAddr() addrPost := urlPrefix + "/pd/api/v1/config" - ms := map[string]interface{}{ + ms := map[string]any{ "metric-storage": "", } postData, err := json.Marshal(ms) re.NoError(err) - re.NoError(tu.CheckPostJSON(testDialClient, addrPost, postData, tu.StatusOK(re))) + re.NoError(tu.CheckPostJSON(tests.TestDialClient, addrPost, postData, tu.StatusOK(re))) addrGet := fmt.Sprintf("%s/pd/api/v1/config/pd-server", urlPrefix) sc := &config.PDServerConfig{} - re.NoError(tu.ReadGetJSON(re, testDialClient, addrGet, sc)) + re.NoError(tu.ReadGetJSON(re, tests.TestDialClient, addrGet, sc)) re.Equal(bool(true), sc.UseRegionStorage) re.Equal("table", sc.KeyType) re.Equal(typeutil.StringSlice([]string{}), sc.RuntimeServices) @@ -414,7 +407,7 @@ func (suite *configTestSuite) checkConfigPDServer(cluster *tests.TestCluster) { re.Equal(24*time.Hour, sc.MaxResetTSGap.Duration) } -var ttlConfig = map[string]interface{}{ +var ttlConfig = map[string]any{ "schedule.max-snapshot-count": 999, "schedule.enable-location-replacement": false, "schedule.max-merge-region-size": 999, @@ -428,7 +421,7 @@ var ttlConfig = map[string]interface{}{ "schedule.enable-tikv-split-region": false, } -var invalidTTLConfig = map[string]interface{}{ +var invalidTTLConfig = map[string]any{ "schedule.invalid-ttl-config": 0, } @@ -446,7 +439,7 @@ type ttlConfigInterface interface { IsTikvRegionSplitEnabled() bool } -func (suite *configTestSuite) assertTTLConfig( +func assertTTLConfig( re *require.Assertions, cluster *tests.TestCluster, expectedEqual bool, @@ -470,9 +463,10 @@ func (suite *configTestSuite) assertTTLConfig( } checkFunc(cluster.GetLeaderServer().GetServer().GetPersistOptions()) if cluster.GetSchedulingPrimaryServer() != nil { - // wait for the scheduling primary server to be synced - options := cluster.GetSchedulingPrimaryServer().GetPersistConfig() + var options *cfg.PersistConfig tu.Eventually(re, func() bool { + // wait for the scheduling primary server to be synced + options = cluster.GetSchedulingPrimaryServer().GetPersistConfig() if expectedEqual { return uint64(999) == options.GetMaxSnapshotCount() } @@ -482,11 +476,11 @@ func (suite *configTestSuite) assertTTLConfig( } } -func (suite *configTestSuite) assertTTLConfigItemEqual( +func assertTTLConfigItemEqual( re *require.Assertions, cluster *tests.TestCluster, item string, - expectedValue interface{}, + expectedValue any, ) { checkFunc := func(options ttlConfigInterface) bool { switch item { @@ -524,53 +518,53 @@ func (suite *configTestSuite) checkConfigTTL(cluster *tests.TestCluster) { re.NoError(err) // test no config and cleaning up - err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 0), postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, createTTLUrl(urlPrefix, 0), postData, tu.StatusOK(re)) re.NoError(err) - suite.assertTTLConfig(re, cluster, false) + assertTTLConfig(re, cluster, false) // test time goes by - err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 5), postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, createTTLUrl(urlPrefix, 5), postData, tu.StatusOK(re)) re.NoError(err) - suite.assertTTLConfig(re, cluster, true) + assertTTLConfig(re, cluster, true) time.Sleep(5 * time.Second) - suite.assertTTLConfig(re, cluster, false) + assertTTLConfig(re, cluster, false) // test cleaning up - err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 5), postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, createTTLUrl(urlPrefix, 5), postData, tu.StatusOK(re)) re.NoError(err) - suite.assertTTLConfig(re, cluster, true) - err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 0), postData, tu.StatusOK(re)) + assertTTLConfig(re, cluster, true) + err = tu.CheckPostJSON(tests.TestDialClient, createTTLUrl(urlPrefix, 0), postData, tu.StatusOK(re)) re.NoError(err) - suite.assertTTLConfig(re, cluster, false) + assertTTLConfig(re, cluster, false) postData, err = json.Marshal(invalidTTLConfig) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 1), postData, + err = tu.CheckPostJSON(tests.TestDialClient, createTTLUrl(urlPrefix, 1), postData, tu.StatusNotOK(re), tu.StringEqual(re, "\"unsupported ttl config schedule.invalid-ttl-config\"\n")) re.NoError(err) // only set max-merge-region-size - mergeConfig := map[string]interface{}{ + mergeConfig := map[string]any{ "schedule.max-merge-region-size": 999, } postData, err = json.Marshal(mergeConfig) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 1), postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, createTTLUrl(urlPrefix, 1), postData, tu.StatusOK(re)) re.NoError(err) - suite.assertTTLConfigItemEqual(re, cluster, "max-merge-region-size", uint64(999)) + assertTTLConfigItemEqual(re, cluster, "max-merge-region-size", uint64(999)) // max-merge-region-keys should keep consistence with max-merge-region-size. - suite.assertTTLConfigItemEqual(re, cluster, "max-merge-region-keys", uint64(999*10000)) + assertTTLConfigItemEqual(re, cluster, "max-merge-region-keys", uint64(999*10000)) // on invalid value, we use default config - mergeConfig = map[string]interface{}{ + mergeConfig = map[string]any{ "schedule.enable-tikv-split-region": "invalid", } postData, err = json.Marshal(mergeConfig) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 10), postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, createTTLUrl(urlPrefix, 10), postData, tu.StatusOK(re)) re.NoError(err) - suite.assertTTLConfigItemEqual(re, cluster, "enable-tikv-split-region", true) + assertTTLConfigItemEqual(re, cluster, "enable-tikv-split-region", true) } func (suite *configTestSuite) TestTTLConflict() { @@ -584,24 +578,24 @@ func (suite *configTestSuite) checkTTLConflict(cluster *tests.TestCluster) { addr := createTTLUrl(urlPrefix, 1) postData, err := json.Marshal(ttlConfig) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusOK(re)) re.NoError(err) - suite.assertTTLConfig(re, cluster, true) + assertTTLConfig(re, cluster, true) - cfg := map[string]interface{}{"max-snapshot-count": 30} + cfg := map[string]any{"max-snapshot-count": 30} postData, err = json.Marshal(cfg) re.NoError(err) addr = fmt.Sprintf("%s/pd/api/v1/config", urlPrefix) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusNotOK(re), tu.StringEqual(re, "\"need to clean up TTL first for schedule.max-snapshot-count\"\n")) + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusNotOK(re), tu.StringEqual(re, "\"need to clean up TTL first for schedule.max-snapshot-count\"\n")) re.NoError(err) addr = fmt.Sprintf("%s/pd/api/v1/config/schedule", urlPrefix) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusNotOK(re), tu.StringEqual(re, "\"need to clean up TTL first for schedule.max-snapshot-count\"\n")) + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusNotOK(re), tu.StringEqual(re, "\"need to clean up TTL first for schedule.max-snapshot-count\"\n")) re.NoError(err) - cfg = map[string]interface{}{"schedule.max-snapshot-count": 30} + cfg = map[string]any{"schedule.max-snapshot-count": 30} postData, err = json.Marshal(cfg) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 0), postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, createTTLUrl(urlPrefix, 0), postData, tu.StatusOK(re)) re.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + err = tu.CheckPostJSON(tests.TestDialClient, addr, postData, tu.StatusOK(re)) re.NoError(err) } diff --git a/tests/server/join/join_test.go b/tests/server/join/join_test.go index 5cdcbc090b8..32e66c27589 100644 --- a/tests/server/join/join_test.go +++ b/tests/server/join/join_test.go @@ -105,7 +105,7 @@ func TestFailedAndDeletedPDJoinsPreviousCluster(t *testing.T) { re.NoError(err) // The server should not successfully start. - res := cluster.RunServer(pd3) + res := tests.RunServer(pd3) re.Error(<-res) members, err := etcdutil.ListEtcdMembers(ctx, client) @@ -138,7 +138,7 @@ func TestDeletedPDJoinsPreviousCluster(t *testing.T) { re.NoError(err) // The server should not successfully start. - res := cluster.RunServer(pd3) + res := tests.RunServer(pd3) re.Error(<-res) members, err := etcdutil.ListEtcdMembers(ctx, client) diff --git a/tests/server/keyspace/keyspace_test.go b/tests/server/keyspace/keyspace_test.go index aa2e89296bb..d6e188359ce 100644 --- a/tests/server/keyspace/keyspace_test.go +++ b/tests/server/keyspace/keyspace_test.go @@ -53,7 +53,7 @@ func (suite *keyspaceTestSuite) SetupTest() { re := suite.Require() ctx, cancel := context.WithCancel(context.Background()) suite.cancel = cancel - cluster, err := tests.NewTestCluster(ctx, 3, func(conf *config.Config, serverName string) { + cluster, err := tests.NewTestCluster(ctx, 3, func(conf *config.Config, _ string) { conf.Keyspace.PreAlloc = preAllocKeyspace }) suite.cluster = cluster diff --git a/tests/server/member/member_test.go b/tests/server/member/member_test.go index d87e8a1a5c0..edff14a3b98 100644 --- a/tests/server/member/member_test.go +++ b/tests/server/member/member_test.go @@ -32,6 +32,7 @@ import ( "github.com/tikv/pd/pkg/utils/assertutil" "github.com/tikv/pd/pkg/utils/etcdutil" "github.com/tikv/pd/pkg/utils/testutil" + "github.com/tikv/pd/pkg/utils/typeutil" "github.com/tikv/pd/server" "github.com/tikv/pd/server/config" "github.com/tikv/pd/tests" @@ -83,14 +84,13 @@ func TestMemberDelete(t *testing.T) { {path: fmt.Sprintf("id/%d", members[1].GetServerID()), members: []*config.Config{leader.GetConfig()}}, } - httpClient := &http.Client{Timeout: 15 * time.Second} for _, table := range tables { t.Log(time.Now(), "try to delete:", table.path) testutil.Eventually(re, func() bool { addr := leader.GetConfig().ClientUrls + "/pd/api/v1/members/" + table.path req, err := http.NewRequest(http.MethodDelete, addr, http.NoBody) re.NoError(err) - res, err := httpClient.Do(req) + res, err := tests.TestDialClient.Do(req) re.NoError(err) defer res.Body.Close() // Check by status. @@ -121,9 +121,8 @@ func TestMemberDelete(t *testing.T) { } func checkMemberList(re *require.Assertions, clientURL string, configs []*config.Config) error { - httpClient := &http.Client{Timeout: 15 * time.Second} addr := clientURL + "/pd/api/v1/members" - res, err := httpClient.Get(addr) + res, err := tests.TestDialClient.Get(addr) re.NoError(err) defer res.Body.Close() buf, err := io.ReadAll(res.Body) @@ -151,7 +150,9 @@ func TestLeaderPriority(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - cluster, err := tests.NewTestCluster(ctx, 3) + cluster, err := tests.NewTestCluster(ctx, 3, func(conf *config.Config, _ string) { + conf.LeaderPriorityCheckInterval = typeutil.NewDuration(time.Second) + }) defer cluster.Destroy() re.NoError(err) @@ -180,7 +181,7 @@ func TestLeaderPriority(t *testing.T) { func post(t *testing.T, re *require.Assertions, url string, body string) { testutil.Eventually(re, func() bool { - res, err := http.Post(url, "", bytes.NewBufferString(body)) // #nosec + res, err := tests.TestDialClient.Post(url, "", bytes.NewBufferString(body)) // #nosec re.NoError(err) b, err := io.ReadAll(res.Body) res.Body.Close() @@ -327,20 +328,26 @@ func TestCampaignLeaderFrequently(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - cluster, err := tests.NewTestCluster(ctx, 5) + cluster, err := tests.NewTestCluster(ctx, 3) defer cluster.Destroy() re.NoError(err) err = cluster.RunInitialServers() re.NoError(err) + // the 1st time campaign leader. cluster.WaitLeader() leader := cluster.GetLeader() re.NotEmpty(cluster.GetLeader()) - for i := 0; i < 3; i++ { + // need to prevent 3 times(including the above 1st time) campaign leader in 5 min. + for i := 0; i < 2; i++ { cluster.GetLeaderServer().ResetPDLeader() cluster.WaitLeader() + re.Equal(leader, cluster.GetLeader()) } + // check for the 4th time. + cluster.GetLeaderServer().ResetPDLeader() + cluster.WaitLeader() // PD leader should be different from before because etcd leader changed. re.NotEmpty(cluster.GetLeader()) re.NotEqual(leader, cluster.GetLeader()) diff --git a/tests/server/region_syncer/region_syncer_test.go b/tests/server/region_syncer/region_syncer_test.go index a91bbaf6b40..f82346571ef 100644 --- a/tests/server/region_syncer/region_syncer_test.go +++ b/tests/server/region_syncer/region_syncer_test.go @@ -36,11 +36,11 @@ func TestMain(m *testing.M) { func TestRegionSyncer(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) - re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/storage/regionStorageFastFlush", `return(true)`)) + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/storage/levelDBStorageFastFlush", `return(true)`)) re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/syncer/noFastExitSync", `return(true)`)) re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/syncer/disableClientStreaming", `return(true)`)) - cluster, err := tests.NewTestCluster(ctx, 3, func(conf *config.Config, serverName string) { conf.PDServerCfg.UseRegionStorage = true }) + cluster, err := tests.NewTestCluster(ctx, 3, func(conf *config.Config, _ string) { conf.PDServerCfg.UseRegionStorage = true }) defer func() { cluster.Destroy() cancel() @@ -73,7 +73,7 @@ func TestRegionSyncer(t *testing.T) { } // merge case // region2 -> region1 -> region0 - // merge A to B will increases version to max(versionA, versionB)+1, but does not increase conver + // merge A to B will increases version to max(versionA, versionB)+1, but does not increase conversion // region0 version is max(1, max(1, 1)+1)+1=3 regions[0] = regions[0].Clone(core.WithEndKey(regions[2].GetEndKey()), core.WithIncVersion(), core.WithIncVersion()) err = rc.HandleRegionHeartbeat(regions[0]) @@ -81,7 +81,7 @@ func TestRegionSyncer(t *testing.T) { // merge case // region3 -> region4 - // merge A to B will increases version to max(versionA, versionB)+1, but does not increase conver + // merge A to B will increases version to max(versionA, versionB)+1, but does not increase conversion // region4 version is max(1, 1)+1=2 regions[4] = regions[3].Clone(core.WithEndKey(regions[4].GetEndKey()), core.WithIncVersion()) err = rc.HandleRegionHeartbeat(regions[4]) @@ -89,7 +89,7 @@ func TestRegionSyncer(t *testing.T) { // merge case // region0 -> region4 - // merge A to B will increases version to max(versionA, versionB)+1, but does not increase conver + // merge A to B will increases version to max(versionA, versionB)+1, but does not increase conversion // region4 version is max(3, 2)+1=4 regions[4] = regions[0].Clone(core.WithEndKey(regions[4].GetEndKey()), core.WithIncVersion()) err = rc.HandleRegionHeartbeat(regions[4]) @@ -156,14 +156,14 @@ func TestRegionSyncer(t *testing.T) { re.Equal(region.GetBuckets(), r.GetBuckets()) } re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/syncer/noFastExitSync")) - re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/storage/regionStorageFastFlush")) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/storage/levelDBStorageFastFlush")) } func TestFullSyncWithAddMember(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, serverName string) { conf.PDServerCfg.UseRegionStorage = true }) + cluster, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, _ string) { conf.PDServerCfg.UseRegionStorage = true }) defer cluster.Destroy() re.NoError(err) @@ -207,7 +207,7 @@ func TestPrepareChecker(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/changeCoordinatorTicker", `return(true)`)) - cluster, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, serverName string) { conf.PDServerCfg.UseRegionStorage = true }) + cluster, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, _ string) { conf.PDServerCfg.UseRegionStorage = true }) defer cluster.Destroy() re.NoError(err) @@ -256,7 +256,7 @@ func TestPrepareCheckerWithTransferLeader(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/changeCoordinatorTicker", `return(true)`)) - cluster, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, serverName string) { conf.PDServerCfg.UseRegionStorage = true }) + cluster, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, _ string) { conf.PDServerCfg.UseRegionStorage = true }) defer cluster.Destroy() re.NoError(err) diff --git a/tests/server/server_test.go b/tests/server/server_test.go index 3b85cd3cf0d..adf7202454b 100644 --- a/tests/server/server_test.go +++ b/tests/server/server_test.go @@ -98,7 +98,7 @@ func TestClusterID(t *testing.T) { re.Equal(clusterID, s.GetClusterID()) } - cluster2, err := tests.NewTestCluster(ctx, 3, func(conf *config.Config, serverName string) { conf.InitialClusterToken = "foobar" }) + cluster2, err := tests.NewTestCluster(ctx, 3, func(conf *config.Config, _ string) { conf.InitialClusterToken = "foobar" }) defer cluster2.Destroy() re.NoError(err) err = cluster2.RunInitialServers() diff --git a/tests/server/storage/hot_region_storage_test.go b/tests/server/storage/hot_region_storage_test.go index 12110be0249..b63b533bc0f 100644 --- a/tests/server/storage/hot_region_storage_test.go +++ b/tests/server/storage/hot_region_storage_test.go @@ -37,7 +37,7 @@ func TestHotRegionStorage(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() cluster, err := tests.NewTestCluster(ctx, 1, - func(cfg *config.Config, serverName string) { + func(cfg *config.Config, _ string) { cfg.Schedule.HotRegionCacheHitsThreshold = 0 cfg.Schedule.HotRegionsWriteInterval.Duration = 1000 * time.Millisecond cfg.Schedule.HotRegionsReservedDays = 1 @@ -145,7 +145,7 @@ func TestHotRegionStorageReservedDayConfigChange(t *testing.T) { interval := 100 * time.Millisecond defer cancel() cluster, err := tests.NewTestCluster(ctx, 1, - func(cfg *config.Config, serverName string) { + func(cfg *config.Config, _ string) { cfg.Schedule.HotRegionCacheHitsThreshold = 0 cfg.Schedule.HotRegionsWriteInterval.Duration = interval cfg.Schedule.HotRegionsReservedDays = 1 @@ -237,7 +237,7 @@ func TestHotRegionStorageWriteIntervalConfigChange(t *testing.T) { interval := 100 * time.Millisecond defer cancel() cluster, err := tests.NewTestCluster(ctx, 1, - func(cfg *config.Config, serverName string) { + func(cfg *config.Config, _ string) { cfg.Schedule.HotRegionCacheHitsThreshold = 0 cfg.Schedule.HotRegionsWriteInterval.Duration = interval cfg.Schedule.HotRegionsReservedDays = 1 diff --git a/tests/server/tso/allocator_test.go b/tests/server/tso/allocator_test.go index 41f544729c2..692aec490eb 100644 --- a/tests/server/tso/allocator_test.go +++ b/tests/server/tso/allocator_test.go @@ -132,7 +132,7 @@ func TestPriorityAndDifferentLocalTSO(t *testing.T) { time.Sleep(time.Second * 5) // Join a new dc-location - pd4, err := cluster.Join(ctx, func(conf *config.Config, serverName string) { + pd4, err := cluster.Join(ctx, func(conf *config.Config, _ string) { conf.EnableLocalTSO = true conf.Labels[config.ZoneLabel] = "dc-4" }) @@ -162,10 +162,10 @@ func TestPriorityAndDifferentLocalTSO(t *testing.T) { wg := sync.WaitGroup{} wg.Add(len(dcLocationConfig)) for serverName, dcLocation := range dcLocationConfig { - go func(serName, dc string) { + go func(name, dc string) { defer wg.Done() testutil.Eventually(re, func() bool { - return cluster.WaitAllocatorLeader(dc) == serName + return cluster.WaitAllocatorLeader(dc) == name }, testutil.WithWaitFor(90*time.Second), testutil.WithTickInterval(time.Second)) }(serverName, dcLocation) } @@ -188,8 +188,8 @@ func waitAllocatorPriorityCheck(cluster *tests.TestCluster) { wg := sync.WaitGroup{} for _, server := range cluster.GetServers() { wg.Add(1) - go func(ser *tests.TestServer) { - ser.GetTSOAllocatorManager().PriorityChecker() + go func(s *tests.TestServer) { + s.GetTSOAllocatorManager().PriorityChecker() wg.Done() }(server) } diff --git a/tests/server/tso/consistency_test.go b/tests/server/tso/consistency_test.go index d1c45df7f17..1bf20cce20d 100644 --- a/tests/server/tso/consistency_test.go +++ b/tests/server/tso/consistency_test.go @@ -275,7 +275,7 @@ func (suite *tsoConsistencyTestSuite) TestLocalTSOAfterMemberChanged() { re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/tso/systemTimeSlow", `return(true)`)) // Join a new dc-location - pd4, err := cluster.Join(suite.ctx, func(conf *config.Config, serverName string) { + pd4, err := cluster.Join(suite.ctx, func(conf *config.Config, _ string) { conf.EnableLocalTSO = true conf.Labels[config.ZoneLabel] = "dc-4" }) diff --git a/tests/server/tso/global_tso_test.go b/tests/server/tso/global_tso_test.go index 5ae2e6e0f67..8dd98b1d628 100644 --- a/tests/server/tso/global_tso_test.go +++ b/tests/server/tso/global_tso_test.go @@ -137,7 +137,7 @@ func TestLogicalOverflow(t *testing.T) { runCase := func(updateInterval time.Duration) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, serverName string) { + cluster, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, _ string) { conf.TSOUpdatePhysicalInterval = typeutil.Duration{Duration: updateInterval} }) defer cluster.Destroy() @@ -165,7 +165,7 @@ func TestLogicalOverflow(t *testing.T) { re.NoError(err) if i == 1 { // the 2nd request may (but not must) overflow, as max logical interval is 262144 - re.Less(time.Since(begin), updateInterval+20*time.Millisecond) // additional 20ms for gRPC latency + re.Less(time.Since(begin), updateInterval+50*time.Millisecond) // additional 50ms for gRPC latency } } // the 3rd request must overflow diff --git a/tests/server/watch/leader_watch_test.go b/tests/server/watch/leader_watch_test.go index f7765297023..84e16398677 100644 --- a/tests/server/watch/leader_watch_test.go +++ b/tests/server/watch/leader_watch_test.go @@ -35,7 +35,7 @@ func TestWatcher(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, serverName string) { conf.AutoCompactionRetention = "1s" }) + cluster, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, _ string) { conf.AutoCompactionRetention = "1s" }) defer cluster.Destroy() re.NoError(err) @@ -73,7 +73,7 @@ func TestWatcherCompacted(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, serverName string) { conf.AutoCompactionRetention = "1s" }) + cluster, err := tests.NewTestCluster(ctx, 1, func(conf *config.Config, _ string) { conf.AutoCompactionRetention = "1s" }) defer cluster.Destroy() re.NoError(err) diff --git a/tests/testutil.go b/tests/testutil.go index 106cddc9dfb..ea52bce310e 100644 --- a/tests/testutil.go +++ b/tests/testutil.go @@ -17,8 +17,12 @@ package tests import ( "context" "fmt" + "math/rand" + "net" + "net/http" "os" "runtime" + "strconv" "strings" "sync" "testing" @@ -45,6 +49,45 @@ import ( "go.uber.org/zap" ) +var ( + TestDialClient = &http.Client{ + Transport: &http.Transport{ + DisableKeepAlives: true, + }, + } + + testPortMutex sync.Mutex + testPortMap = make(map[string]struct{}) +) + +// SetRangePort sets the range of ports for test. +func SetRangePort(start, end int) { + portRange := []int{start, end} + dialContext := func(ctx context.Context, network, addr string) (net.Conn, error) { + dialer := &net.Dialer{} + randomPort := strconv.Itoa(rand.Intn(portRange[1]-portRange[0]) + portRange[0]) + testPortMutex.Lock() + for i := 0; i < 10; i++ { + if _, ok := testPortMap[randomPort]; !ok { + break + } + randomPort = strconv.Itoa(rand.Intn(portRange[1]-portRange[0]) + portRange[0]) + } + testPortMutex.Unlock() + localAddr, err := net.ResolveTCPAddr(network, "0.0.0.0:"+randomPort) + if err != nil { + return nil, err + } + dialer.LocalAddr = localAddr + return dialer.DialContext(ctx, network, addr) + } + + TestDialClient.Transport = &http.Transport{ + DisableKeepAlives: true, + DialContext: dialContext, + } +} + var once sync.Once // InitLogger initializes the logger for test. @@ -157,7 +200,7 @@ func WaitForPrimaryServing(re *require.Assertions, serverMap map[string]bs.Serve } } return false - }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) + }, testutil.WithWaitFor(10*time.Second), testutil.WithTickInterval(50*time.Millisecond)) return primary } @@ -273,14 +316,14 @@ func (s *SchedulingTestEnvironment) RunTestInTwoModes(test func(*TestCluster)) { // RunTestInPDMode is to run test in pd mode. func (s *SchedulingTestEnvironment) RunTestInPDMode(test func(*TestCluster)) { - s.t.Logf("start test %s in pd mode", s.getTestName()) + s.t.Logf("start test %s in pd mode", getTestName()) if _, ok := s.clusters[pdMode]; !ok { s.startCluster(pdMode) } test(s.clusters[pdMode]) } -func (s *SchedulingTestEnvironment) getTestName() string { +func getTestName() string { pc, _, _, _ := runtime.Caller(2) caller := runtime.FuncForPC(pc) if caller == nil || strings.Contains(caller.Name(), "RunTestInTwoModes") { @@ -303,7 +346,7 @@ func (s *SchedulingTestEnvironment) RunTestInAPIMode(test func(*TestCluster)) { re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/mcs/scheduling/server/fastUpdateMember")) re.NoError(failpoint.Disable("github.com/tikv/pd/server/cluster/highFrequencyClusterJobs")) }() - s.t.Logf("start test %s in api mode", s.getTestName()) + s.t.Logf("start test %s in api mode", getTestName()) if _, ok := s.clusters[apiMode]; !ok { s.startCluster(apiMode) } diff --git a/tests/tso_cluster.go b/tests/tso_cluster.go index 2f80f7ff970..e1fdb6d69ca 100644 --- a/tests/tso_cluster.go +++ b/tests/tso_cluster.go @@ -76,7 +76,7 @@ func RestartTestTSOCluster( defer wg.Done() clean() serverCfg := cluster.servers[addr].GetConfig() - newServer, newCleanup, err := NewTSOTestServer(newCluster.ctx, serverCfg) + newServer, newCleanup, err := NewTSOTestServer(ctx, serverCfg) serverMap.Store(addr, newServer) cleanupMap.Store(addr, newCleanup) errorMap.Store(addr, err) @@ -84,7 +84,7 @@ func RestartTestTSOCluster( } wg.Wait() - errorMap.Range(func(key, value interface{}) bool { + errorMap.Range(func(key, value any) bool { if value != nil { err = value.(error) return false diff --git a/tools.go b/tools.go index 909f42ab9b5..e5298de2827 100644 --- a/tools.go +++ b/tools.go @@ -20,7 +20,6 @@ package tools import ( _ "github.com/AlekSi/gocov-xml" _ "github.com/axw/gocov/gocov" - _ "github.com/mgechev/revive" _ "github.com/pingcap/errors/errdoc-gen" _ "github.com/pingcap/failpoint/failpoint-ctl" _ "github.com/swaggo/swag/cmd/swag" diff --git a/tools/Makefile b/tools/Makefile index d40fca2f8f4..4195160aff6 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -ROOT_PATH := .. +ROOT_PATH := $(shell pwd)/.. GO_TOOLS_BIN_PATH := $(ROOT_PATH)/.tools/bin PATH := $(GO_TOOLS_BIN_PATH):$(PATH) SHELL := env PATH='$(PATH)' GOBIN='$(GO_TOOLS_BIN_PATH)' $(shell which bash) @@ -25,10 +25,22 @@ static: install-tools @ gofmt -s -l -d . 2>&1 | awk '{ print } END { if (NR > 0) { exit 1 } }' @ echo "golangci-lint ..." @ golangci-lint run -c $(ROOT_PATH)/.golangci.yml --verbose ./... --allow-parallel-runners - @ echo "revive ..." - @ revive -formatter friendly -config $(ROOT_PATH)/revive.toml ./... tidy: @ go mod tidy git diff go.mod go.sum | cat git diff --quiet go.mod go.sum + +test: failpoint-enable + CGO_ENABLED=1 go test ./... -v -tags deadlock -race -cover || { $(MAKE) failpoint-disable && exit 1; } + $(MAKE) failpoint-disable + +ci-test-job: + if [ -f covprofile ]; then rm covprofile; fi + CGO_ENABLED=1 go test ./... -v -tags deadlock -race -cover -covermode=atomic -coverprofile=covprofile -coverpkg=../... + +failpoint-enable: + cd $(ROOT_PATH) && $(MAKE) failpoint-enable + +failpoint-disable: + cd $(ROOT_PATH) && $(MAKE) failpoint-disable diff --git a/tools/go.mod b/tools/go.mod index 767ada3c8cc..eb2c279e7fa 100644 --- a/tools/go.mod +++ b/tools/go.mod @@ -7,13 +7,11 @@ replace ( github.com/tikv/pd/client => ../client ) -// reset grpc and protobuf deps in order to import client and server at the same time -replace google.golang.org/grpc v1.59.0 => google.golang.org/grpc v1.26.0 - require ( github.com/BurntSushi/toml v0.3.1 + github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e - github.com/coreos/go-semver v0.3.0 + github.com/coreos/go-semver v0.3.1 github.com/docker/go-units v0.4.0 github.com/gin-contrib/cors v1.4.0 github.com/gin-contrib/gzip v0.0.1 @@ -24,20 +22,22 @@ require ( github.com/mattn/go-shellwords v1.0.12 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 - github.com/pingcap/kvproto v0.0.0-20231222062942-c0c73f41d0b2 + github.com/pingcap/kvproto v0.0.0-20240403065636-c699538f7aa1 github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 github.com/pkg/errors v0.9.1 - github.com/prometheus/client_golang v1.18.0 - github.com/spf13/cobra v1.0.0 + github.com/prometheus/client_golang v1.19.0 + github.com/spf13/cobra v1.8.0 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.8.4 github.com/tikv/pd v0.0.0-00010101000000-000000000000 github.com/tikv/pd/client v0.0.0-00010101000000-000000000000 - go.etcd.io/etcd v0.5.0-alpha.5.0.20220915004622-85b640cee793 - go.uber.org/goleak v1.2.0 - go.uber.org/zap v1.26.0 + go.etcd.io/etcd v0.5.0-alpha.5.0.20240320135013-950cd5fbe6ca + go.uber.org/automaxprocs v1.5.3 + go.uber.org/goleak v1.3.0 + go.uber.org/zap v1.27.0 golang.org/x/text v0.14.0 - google.golang.org/grpc v1.59.0 + golang.org/x/tools v0.14.0 + google.golang.org/grpc v1.62.1 ) require ( @@ -65,12 +65,11 @@ require ( github.com/bitly/go-simplejson v0.5.0 // indirect github.com/breeswish/gin-jwt/v2 v2.6.4-jwt-patch // indirect github.com/bytedance/sonic v1.9.1 // indirect - github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5 // indirect github.com/cenkalti/backoff/v4 v4.0.2 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect - github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f // indirect - github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect + github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf // indirect + github.com/coreos/pkg v0.0.0-20240122114842-bbd7aa9bf6fb // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/elliotchance/pie/v2 v2.1.0 // indirect @@ -90,30 +89,29 @@ require ( github.com/goccy/go-graphviz v0.0.9 // indirect github.com/goccy/go-json v0.10.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect - github.com/golang-jwt/jwt v3.2.1+incompatible // indirect + github.com/golang-jwt/jwt v3.2.2+incompatible // indirect github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect - github.com/golang/protobuf v1.5.3 // indirect + github.com/golang/protobuf v1.5.4 // indirect github.com/golang/snappy v0.0.4 // indirect github.com/google/btree v1.1.2 // indirect github.com/google/pprof v0.0.0-20211122183932-1daafda22083 // indirect - github.com/google/uuid v1.3.0 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/gorilla/mux v1.7.4 // indirect - github.com/gorilla/websocket v1.4.2 // indirect - github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 // indirect + github.com/gorilla/websocket v1.5.1 // indirect + github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect github.com/gtank/cryptopasta v0.0.0-20170601214702-1f550f6f2f69 // indirect github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d // indirect - github.com/inconshreveable/mousetrap v1.0.0 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect github.com/joho/godotenv v1.4.0 // indirect - github.com/jonboulle/clockwork v0.2.2 // indirect + github.com/jonboulle/clockwork v0.4.0 // indirect github.com/joomcode/errorx v1.0.1 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/cpuid/v2 v2.2.4 // indirect - github.com/konsorten/go-windows-terminal-sequences v1.0.3 // indirect github.com/leodido/go-urn v1.2.4 // indirect github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a // indirect github.com/mailru/easyjson v0.7.6 // indirect @@ -129,22 +127,22 @@ require ( github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d // indirect github.com/pingcap/errcode v0.3.0 // indirect github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21 // indirect - github.com/pingcap/tidb-dashboard v0.0.0-20240111062855-41f7c8011953 // indirect + github.com/pingcap/tidb-dashboard v0.0.0-20240326110213-9768844ff5d7 // indirect github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b // indirect - github.com/prometheus/client_model v0.5.0 // indirect - github.com/prometheus/common v0.46.0 // indirect - github.com/prometheus/procfs v0.12.0 // indirect + github.com/prometheus/client_model v0.6.0 // indirect + github.com/prometheus/common v0.51.1 // indirect + github.com/prometheus/procfs v0.13.0 // indirect github.com/rs/cors v1.7.0 // indirect github.com/samber/lo v1.37.0 // indirect github.com/sasha-s/go-deadlock v0.2.0 // indirect github.com/shirou/gopsutil/v3 v3.23.3 // indirect github.com/shoenig/go-m1cpu v0.1.5 // indirect github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0 // indirect - github.com/sirupsen/logrus v1.6.0 // indirect - github.com/smallnest/chanx v0.0.0-20221229104322-eb4c998d2072 // indirect - github.com/soheilhy/cmux v0.1.4 // indirect + github.com/sirupsen/logrus v1.9.3 // indirect + github.com/smallnest/chanx v1.2.1-0.20240521153536-01121e21ff99 // indirect + github.com/soheilhy/cmux v0.1.5 // indirect github.com/stretchr/objx v0.5.0 // indirect github.com/swaggo/files v0.0.0-20210815190702-a29dd2bc99b2 // indirect github.com/swaggo/http-swagger v1.2.6 // indirect @@ -152,35 +150,34 @@ require ( github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965 // indirect github.com/tklauser/go-sysconf v0.3.11 // indirect github.com/tklauser/numcpus v0.6.0 // indirect - github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 // indirect + github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.11 // indirect github.com/unrolled/render v1.0.1 // indirect github.com/urfave/negroni v0.3.0 // indirect github.com/vmihailenco/msgpack/v5 v5.3.5 // indirect github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect - github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect + github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect - go.etcd.io/bbolt v1.3.6 // indirect + go.etcd.io/bbolt v1.3.9 // indirect go.uber.org/atomic v1.10.0 // indirect go.uber.org/dig v1.9.0 // indirect go.uber.org/fx v1.12.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/arch v0.3.0 // indirect - golang.org/x/crypto v0.18.0 // indirect + golang.org/x/crypto v0.21.0 // indirect golang.org/x/exp v0.0.0-20230711005742-c3f37128e5a4 // indirect golang.org/x/image v0.10.0 // indirect - golang.org/x/net v0.20.0 // indirect - golang.org/x/oauth2 v0.16.0 // indirect - golang.org/x/sync v0.4.0 // indirect - golang.org/x/sys v0.16.0 // indirect - golang.org/x/time v0.3.0 // indirect - golang.org/x/tools v0.14.0 // indirect - google.golang.org/appengine v1.6.7 // indirect - google.golang.org/genproto v0.0.0-20231030173426-d783a09b4405 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20231016165738-49dd2c1f3d0b // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 // indirect - google.golang.org/protobuf v1.32.0 // indirect + golang.org/x/net v0.23.0 // indirect + golang.org/x/oauth2 v0.18.0 // indirect + golang.org/x/sync v0.6.0 // indirect + golang.org/x/sys v0.18.0 // indirect + golang.org/x/time v0.5.0 // indirect + google.golang.org/appengine v1.6.8 // indirect + google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda // indirect + google.golang.org/protobuf v1.33.0 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect @@ -189,5 +186,5 @@ require ( gorm.io/driver/sqlite v1.4.3 // indirect gorm.io/gorm v1.24.3 // indirect moul.io/zapgorm2 v1.1.0 // indirect - sigs.k8s.io/yaml v1.2.0 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect ) diff --git a/tools/go.sum b/tools/go.sum index 54acc216ec0..535ea668b97 100644 --- a/tools/go.sum +++ b/tools/go.sum @@ -6,7 +6,6 @@ github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww= github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= -github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI= github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M= @@ -18,13 +17,10 @@ github.com/VividCortex/mysqlerr v1.0.0/go.mod h1:xERx8E4tBhLvpjzdUyQiSfUxeMcATEQ github.com/Xeoncross/go-aesctr-with-hmac v0.0.0-20200623134604-12b17a7ff502 h1:L8IbaI/W6h5Cwgh0n4zGeZpVK78r/jBf9ASurHo9+/o= github.com/Xeoncross/go-aesctr-with-hmac v0.0.0-20200623134604-12b17a7ff502/go.mod h1:pmnBM9bxWSiHvC/gSWunUIyDvGn33EkP2CUjxFKtTTM= github.com/agiledragon/gomonkey/v2 v2.3.1/go.mod h1:ap1AmDzcVOAz1YpeJ3TCzIgstoaWLA6jbbgxfB4w2iY= -github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alvaroloes/enumer v1.1.2/go.mod h1:FxrjvuXoDAx9isTJrv4c+T410zFi0DtXIT0m65DJ+Wo= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/appleboy/gofight/v2 v2.1.2 h1:VOy3jow4vIK8BRQJoC/I9muxyYlJ2yb9ht2hZoS3rf4= github.com/appleboy/gofight/v2 v2.1.2/go.mod h1:frW+U1QZEdDgixycTj4CygQ48yLTUhplt43+Wczp3rw= -github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/aws/aws-sdk-go-v2 v1.17.7 h1:CLSjnhJSTSogvqUGhIC6LqFKATMRexcxLZ0i/Nzk9Eg= github.com/aws/aws-sdk-go-v2 v1.17.7/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw= github.com/aws/aws-sdk-go-v2/config v1.18.19 h1:AqFK6zFNtq4i1EYu+eC7lcKHYnZagMn6SW171la0bGw= @@ -52,11 +48,8 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.18.7/go.mod h1:JuTnSoeePXmMVe9G8Ncjj github.com/aws/smithy-go v1.13.5 h1:hgz0X/DX0dGqTYpGALqXJoRKRj5oQ7150i5FdTePzO8= github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= -github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= -github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/bitly/go-simplejson v0.5.0 h1:6IH+V8/tVMab511d5bn4M7EwGXZf9Hj6i2xSwkNEM+Y= github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= @@ -73,7 +66,6 @@ github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5/go.mod h1:jtAfV github.com/cenkalti/backoff/v4 v4.0.2 h1:JIufpQLbh4DkbQoii76ItQIUFzevQSqOLZca4eamEDs= github.com/cenkalti/backoff/v4 v4.0.2/go.mod h1:eEew/i+1Q6OrCDZh3WiXYv3+nJwBASZ8Bog/87DQnVg= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= @@ -89,32 +81,22 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa h1:OaNxuTZr7kxeODyLWsRMC+OD03aFUH+mW6r2d+MWa5Y= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= -github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= -github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= -github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM= -github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f h1:JOrtw2xFKzlg+cbHpyrpLDmnN1HqhBfnX7WDiW7eG2c= -github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= -github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f h1:lBNOc5arjvs8E5mO2tbpBpLoyyu8B6e44T7hJy6potg= -github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= +github.com/coreos/go-semver v0.3.1 h1:yi21YpKnrx1gt5R+la8n5WgS0kCrsPp33dmEyHReZr4= +github.com/coreos/go-semver v0.3.1/go.mod h1:irMmmIw/7yzSRPWryHsK7EYSg09caPQL03VsM8rvUec= +github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU= +github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/pkg v0.0.0-20240122114842-bbd7aa9bf6fb h1:GIzvVQ9UkUlOhSDlqmrQAAAUd6R3E+caIisNEyWXvNE= +github.com/coreos/pkg v0.0.0-20240122114842-bbd7aa9bf6fb/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= github.com/corona10/goimagehash v1.0.2 h1:pUfB0LnsJASMPGEZLj7tGY251vF+qLGqOgEP4rUs6kA= github.com/corona10/goimagehash v1.0.2/go.mod h1:/l9umBhvcHQXVtQO1V6Gp1yD20STawkhRnnX0D1bvVI= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= -github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= -github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385 h1:clC1lXBpe2kTj2VHdaIu9ajZQe4kcEY9j0NsnDDBZ3o= @@ -125,7 +107,6 @@ github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymF github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fogleman/gg v1.3.0 h1:/7zJX8F6AaYQc57WQCyN9cAIz+4bCJGO9B+dyW29am8= github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= @@ -148,9 +129,8 @@ github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= github.com/go-echarts/go-echarts v1.0.0 h1:n181E4iXwj4zrU9VYmdM2m8dyhERt2w9k9YhHqdp6A8= github.com/go-echarts/go-echarts v1.0.0/go.mod h1:qbmyAb/Rl1f2w7wKba1D4LoNq4U164yO4/wedFbcWyo= -github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= -github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= +github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= +github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= @@ -190,12 +170,11 @@ github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/gogo/protobuf v0.0.0-20171007142547-342cbe0a0415/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v0.0.0-20180717141946-636bf0302bc9/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang-jwt/jwt v3.2.1+incompatible h1:73Z+4BJcrTC+KczS6WvTPvRGOp1WmfEP4Q1lOd9Z/+c= github.com/golang-jwt/jwt v3.2.1+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= +github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY= +github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0ktxqI+Sida1w446QrXBRJ0nee3SNZlA= github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A= @@ -203,25 +182,21 @@ github.com/golang-sql/sqlexp v0.1.0/go.mod h1:J4ad9Vo8ZCWQ2GMrC4UCQy1JpCbwU9m3EO github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/glog v1.1.2 h1:DVjP2PbBOzHyzA+dn3WhHIq4NdVu3Q+pvivFICf/7fo= -github.com/golang/glog v1.1.2/go.mod h1:zR+okUeTbrL6EL3xHUDxZuEtGv04p5shwip1+mL/rLQ= -github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef h1:veQD95Isof8w9/WXiA+pa3tz3fJXkt5B7QaRBrM62gk= -github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v0.0.0-20180814211427-aa810b61a9c7/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU= github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -229,40 +204,36 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20211122183932-1daafda22083 h1:c8EUapQFi+kjzedr4c6WqbwMdmB95+oDBWZ5XFHFYxY= github.com/google/pprof v0.0.0-20211122183932-1daafda22083/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= -github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gorilla/mux v1.7.4 h1:VuZ8uybHlWmqV03+zRzdwKL4tUnIp1MAQtp1mIFE1bc= github.com/gorilla/mux v1.7.4/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= -github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= -github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 h1:z53tR0945TRRQO/fLEVPI6SMv7ZflF0TEaTAoU7tOzg= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= +github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/QY= +github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY= +github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 h1:UH//fgunKIs4JdUbpDl1VZCDaL56wXCB/5+wF6uHfaI= +github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vbp88Yd8NsDy6rZz+RcrMPxvld8= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= -github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= -github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/gtank/cryptopasta v0.0.0-20170601214702-1f550f6f2f69 h1:7xsUJsB2NrdcttQPa7JLEaGzvdbk7KvfrjgHZXOQRo0= github.com/gtank/cryptopasta v0.0.0-20170601214702-1f550f6f2f69/go.mod h1:YLEMZOtU+AZ7dhN9T/IpGhXVGly2bvkJQ+zxj3WeVQo= -github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d h1:uGg2frlt3IcT7kbV6LEp5ONv4vmoO2FW4qSO+my/aoM= github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w= -github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= -github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/influxdata/tdigest v0.0.1 h1:XpFptwYmnEKUqmkcDjrzffswZ3nvNeevbUSLPP/ZzIY= github.com/influxdata/tdigest v0.0.1/go.mod h1:Z0kXnxzbTC2qrx4NaIzYkE1k66+6oEDQTvL95hQFh5Y= github.com/jackc/chunkreader/v2 v2.0.1 h1:i+RDz65UE+mmpjTfyz0MoVTnzeYxroil2G82ki7MGG8= @@ -294,9 +265,8 @@ github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHW github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/joho/godotenv v1.4.0 h1:3l4+N6zfMWnkbPEXKng2o2/MR5mSwTrBih4ZEkkz1lg= github.com/joho/godotenv v1.4.0/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= -github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= -github.com/jonboulle/clockwork v0.2.2 h1:UOGuzwb1PwsrDAObMuhUnj0p5ULPj8V/xJ7Kx9qUBdQ= -github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8= +github.com/jonboulle/clockwork v0.4.0 h1:p4Cf1aMWXnXAUh8lVfewRBx1zaTSYKrKMF2g3ST4RZ4= +github.com/jonboulle/clockwork v0.4.0/go.mod h1:xgRqUGwRcjKCO1vbZUEtSLrqKoPSsUpK7fnezOII0kc= github.com/joomcode/errorx v1.0.1 h1:CalpDWz14ZHd68fIqluJasJosAewpz2TFaJALrUxjrk= github.com/joomcode/errorx v1.0.1/go.mod h1:kgco15ekB6cs+4Xjzo7SPeXzx38PbJzBwbnu9qfVNHQ= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -304,23 +274,16 @@ github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFF github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.5/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= -github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8= -github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= @@ -337,29 +300,23 @@ github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNa github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a h1:N9zuLhTvBSRt0gWSiJswwQ2HqDmtX/ZCDJURnKUt1Ik= github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a/go.mod h1:JKx41uQRwqlTZabZc+kILPrO/3jlKnQ2Z8b7YiVw5cE= -github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA= github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= -github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/mattn/go-shellwords v1.0.12 h1:M2zGm7EW6UQJvDeQxo4T51eKPurbeFbe8WtebGE2xrk= github.com/mattn/go-shellwords v1.0.12/go.mod h1:EZzvwXDESEeg03EKmM+RmDnNOPKG4lLtQsUlTZDWQ8Y= github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI= github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= -github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/microsoft/go-mssqldb v0.17.0 h1:Fto83dMZPnYv1Zwx5vHHxpNraeEaUlQ/hhHLgZiaenE= github.com/microsoft/go-mssqldb v0.17.0/go.mod h1:OkoNGhGEs8EZqchVTtochlXruEhEOaO4S0d2sB5aeGQ= github.com/minio/sio v0.3.0 h1:syEFBewzOMOYVzSTFpp1MqpSZk8rUNbz8VIIc+PNzus= github.com/minio/sio v0.3.0/go.mod h1:8b0yPp2avGThviy/+OCJBI6OMpvxoUuiLvE6F1lebhw= -github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -367,22 +324,20 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lN github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/nfnt/resize v0.0.0-20160724205520-891127d8d1b5 h1:BvoENQQU+fZ9uukda/RzCAL/191HHwJA5b13R6diVlY= github.com/nfnt/resize v0.0.0-20160724205520-891127d8d1b5/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/oleiade/reflections v1.0.1 h1:D1XO3LVEYroYskEsoSiGItp9RUxG6jWnCVvrqH0HHQM= github.com/oleiade/reflections v1.0.1/go.mod h1:rdFxbxq4QXVZWj0F+e9jqjDkc7dbp97vkRixKo2JR60= -github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs= github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/onsi/gomega v1.20.1 h1:PA/3qinGoukvymdIDV8pii6tiZgC8kbmJO6Z5+b002Q= github.com/onsi/gomega v1.20.1/go.mod h1:DtrZpjmvpn2mPm4YWQa0/ALMDj9v4YxLgojwPeREyVo= +github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= github.com/otiai10/copy v1.7.0/go.mod h1:rmRl6QPdJj6EiUqXQ/4Nn2lLXoNQjFCQbbNrxgc/t3U= @@ -391,7 +346,6 @@ github.com/otiai10/curr v1.0.0/go.mod h1:LskTG5wDwr8Rs+nNQ+1LlxRjAtTZZjtJW4rMXl6 github.com/otiai10/mint v1.3.0/go.mod h1:F5AjcsTsWUqX+Na9fpHb52P8pcRX2CI6A3ctIT91xUo= github.com/otiai10/mint v1.3.3/go.mod h1:/yxELlJQ0ufhjUwhshSj+wFjZ78CnZ48/1wtmBH1OTc= github.com/pascaldekloe/name v0.0.0-20180628100202-0fd16699aae1/go.mod h1:eD5JxqMiuNYyFNmyY9rkJ/slN8y59oEu4Ei7F8OoKWQ= -github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo= github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= @@ -411,19 +365,18 @@ github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c/go.mod h1:X2r9ue github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZFh4N3vQ5HEtld3S+Y+StULhWVvumU0= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= -github.com/pingcap/kvproto v0.0.0-20231222062942-c0c73f41d0b2 h1:364A6VCS+l0oHBKZKotX9LzmfEtIO/NTccTIQcPp3Ug= -github.com/pingcap/kvproto v0.0.0-20231222062942-c0c73f41d0b2/go.mod h1:rXxWk2UnwfUhLXha1jxRWPADw9eMZGWEWCg92Tgmb/8= +github.com/pingcap/kvproto v0.0.0-20240403065636-c699538f7aa1 h1:vDWWJKU6ztczn24XixahtLwcnJ15DOtSRIRM3jVtZNU= +github.com/pingcap/kvproto v0.0.0-20240403065636-c699538f7aa1/go.mod h1:rXxWk2UnwfUhLXha1jxRWPADw9eMZGWEWCg92Tgmb/8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8IDP+SZrdhV1Kibl9KrHxJ9eciw= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21 h1:QV6jqlfOkh8hqvEAgwBZa+4bSgO0EeKC7s5c6Luam2I= github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21/go.mod h1:QYnjfA95ZaMefyl1NO8oPtKeb8pYUdnDVhQgf+qdpjM= -github.com/pingcap/tidb-dashboard v0.0.0-20240111062855-41f7c8011953 h1:vY/bY5vkSvvuXB1030AUmy0LFhuEA53ryVdF/bTbFXU= -github.com/pingcap/tidb-dashboard v0.0.0-20240111062855-41f7c8011953/go.mod h1:ucZBRz52icb23T/5Z4CsuUHmarYiin7p2MeiVBe+o8c= +github.com/pingcap/tidb-dashboard v0.0.0-20240326110213-9768844ff5d7 h1:eFu98FbfJB7PKWOtkaV6YNXXJWqDhczQX56j/iucgU4= +github.com/pingcap/tidb-dashboard v0.0.0-20240326110213-9768844ff5d7/go.mod h1:ucZBRz52icb23T/5Z4CsuUHmarYiin7p2MeiVBe+o8c= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e h1:FBaTXU8C3xgt/drM58VHxojHo/QoG1oPsgWTGvaSpO4= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= -github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -432,28 +385,17 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b h1:0LFwY6Q3gMACTjAbMZBjXAqTOzOwFaj2Ld6cjeQ7Rig= github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= -github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= -github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= -github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk= -github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlkOQntgjkJWKrN5txjA= -github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= +github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= +github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= +github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= -github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= -github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= -github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.46.0 h1:doXzt5ybi1HBKpsZOL0sSkaNHJJqkyfEWZGGqqScV0Y= -github.com/prometheus/common v0.46.0/go.mod h1:Tp0qkxpb9Jsg54QMe+EAmqXkSV7Evdy1BTn+g2pa/hQ= -github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= -github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= -github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= -github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= +github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos= +github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8= +github.com/prometheus/common v0.51.1 h1:eIjN50Bwglz6a/c3hAgSMcofL3nD+nFQkV6Dd4DsQCw= +github.com/prometheus/common v0.51.1/go.mod h1:lrWtQx+iDfn2mbH5GUzlH9TSHyfZpHkSiG1W7y3sF2Q= +github.com/prometheus/procfs v0.13.0 h1:GqzLlQyfsPbaEHaQkO7tbDlriv/4o5Hudv6OXHGKX7o= +github.com/prometheus/procfs v0.13.0/go.mod h1:cd4PFCR54QLnGKPaKGA6l+cfuNXtht43ZKY6tow0Y1g= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= @@ -463,6 +405,7 @@ github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncj github.com/rs/cors v1.7.0 h1:+88SsELBHx5r+hZ8TCkggzSstaWNbDvThkVK8H6f9ik= github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/samber/lo v1.37.0 h1:XjVcB8g6tgUp8rsPsJ2CvhClfImrpL04YpQHXeHPhRw= github.com/samber/lo v1.37.0/go.mod h1:9vaz2O4o8oOnK23pd2TrXufcbdbJIa3b6cstBWKpopA= github.com/sasha-s/go-deadlock v0.2.0 h1:lMqc+fUb7RrFS3gQLtoQsJ7/6TV/pAIFvBsqX73DK8Y= @@ -479,28 +422,20 @@ github.com/shoenig/test v0.6.3/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnj github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0 h1:mj/nMDAwTBiaCqMEs4cYCqF7pO6Np7vhy1D1wcQGz+E= github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0/go.mod h1:919LwcH0M7/W4fcZ0/jy0qGght1GIhqyS/EgWGH2j5Q= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= -github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= -github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I= -github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= -github.com/smallnest/chanx v0.0.0-20221229104322-eb4c998d2072 h1:Txo4SXVJq/OgEjwgkWoxkMoTjGlcrgsQE/XSghjmu0w= -github.com/smallnest/chanx v0.0.0-20221229104322-eb4c998d2072/go.mod h1:+4nWMF0+CqEcU74SnX2NxaGqZ8zX4pcQ8Jcs77DbX5A= +github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/smallnest/chanx v1.2.1-0.20240521153536-01121e21ff99 h1:fmanhZtn5RKRljCjX46H+Q9/PECsHbflXm0RdrnK9e4= +github.com/smallnest/chanx v1.2.1-0.20240521153536-01121e21ff99/go.mod h1:+4nWMF0+CqEcU74SnX2NxaGqZ8zX4pcQ8Jcs77DbX5A= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= -github.com/soheilhy/cmux v0.1.4 h1:0HKaf1o97UwFjHH9o5XsHUOF+tqmdA7KEzXLpiyaw0E= -github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= -github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= -github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= -github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= -github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= -github.com/spf13/cobra v1.0.0 h1:6m/oheQuQ13N9ks4hubMG6BnvwOeaJrqSPLahSnczz8= -github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE= -github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= -github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= -github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js= +github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0= +github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= +github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= @@ -542,12 +477,10 @@ github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7Am github.com/tklauser/numcpus v0.3.0/go.mod h1:yFGUr7TUHQRAhyqBcEg0Ge34zDBAsIvJJcyE6boqnA8= github.com/tklauser/numcpus v0.6.0 h1:kebhY2Qt+3U6RNK7UqpYNA+tJ23IBEGKkB7JQBfDYms= github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4= -github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= -github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 h1:j6JEOq5QWFker+d7mFQYOhjTZonQ7YkLTHm56dbn+yM= -github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 h1:6fotK7otjonDflCTK0BCfls4SPy3NcCVb5dqqmbRknE= +github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75/go.mod h1:KO6IkyS8Y3j8OdNO85qEYBsRPuteD+YciPomcXdrMnk= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= -github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M= github.com/ugorji/go/codec v0.0.0-20181022190402-e5e69e061d4f/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= @@ -557,7 +490,6 @@ github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4d github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/unrolled/render v1.0.1 h1:VDDnQQVfBMsOsp3VaCJszSO0nkBIVEYoPWeRThk9spY= github.com/unrolled/render v1.0.1/go.mod h1:gN9T0NhL4Bfbwu8ann7Ry/TGHYfosul+J0obPf6NBdM= -github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI= github.com/urfave/negroni v0.3.0 h1:PaXOb61mWeZJxc1Ji2xJjpVg9QfPo0rrB+lHyBxGNSU= github.com/urfave/negroni v0.3.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4= @@ -565,36 +497,35 @@ github.com/vmihailenco/msgpack/v5 v5.3.5 h1:5gO0H1iULLWGhs2H5tbAHIZTV8/cYafcFOr9 github.com/vmihailenco/msgpack/v5 v5.3.5/go.mod h1:7xyJ9e+0+9SaZT0Wt1RGleJXzli6Q/V5KbhBonMG9jc= github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= -github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= -github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= -github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510 h1:S2dVYn90KE98chqDkyE9Z4N61UnQd+KOfgp5Iu53llk= +github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg= github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= -go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= -go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= -go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= -go.etcd.io/etcd v0.5.0-alpha.5.0.20220915004622-85b640cee793 h1:fqmtdYQlwZ/vKWSz5amW+a4cnjg23ojz5iL7rjf08Wg= -go.etcd.io/etcd v0.5.0-alpha.5.0.20220915004622-85b640cee793/go.mod h1:eBhtbxXP1qpW0F6+WxoJ64DM1Mrfx46PHtVxEdkLe0I= +go.etcd.io/bbolt v1.3.9 h1:8x7aARPEXiXbHmtUwAIv7eV2fQFHrLLavdiJ3uzJXoI= +go.etcd.io/bbolt v1.3.9/go.mod h1:zaO32+Ti0PK1ivdPtgMESzuzL2VPoIG1PCQNvOdo/dE= +go.etcd.io/etcd v0.5.0-alpha.5.0.20240320135013-950cd5fbe6ca h1:LCc0GAhfJ+qDqnUbE7ybQ0mTz1dNRn2iiM6e183p/5E= +go.etcd.io/etcd v0.5.0-alpha.5.0.20240320135013-950cd5fbe6ca/go.mod h1:1AyK+XVcIwjbjw5EYrhT+IiMYSgRZTohGb2ceZ0/US8= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= -go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.10.0 h1:9qC72Qh0+3MqyJbAn8YU5xVq1frD8bn3JtD2oXtafVQ= go.uber.org/atomic v1.10.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= +go.uber.org/automaxprocs v1.5.3 h1:kWazyxZUrS3Gs4qUpbwo5kEIMGe/DAvi5Z4tl2NW4j8= +go.uber.org/automaxprocs v1.5.3/go.mod h1:eRbA25aqJrxAbsLO0xy5jVwPt7FQnRgjW+efnwa1WM0= go.uber.org/dig v1.9.0 h1:pJTDXKEhRqBI8W7rU7kwT5EgyRZuSMVSFcZolOvKK9U= go.uber.org/dig v1.9.0/go.mod h1:X34SnWGr8Fyla9zQNO2GSO2D+TIuqB14OS8JhYocIyw= go.uber.org/fx v1.12.0 h1:+1+3Cz9M0dFMPy9SW9XUIUHye8bnPUm7q7DroNGWYG4= go.uber.org/fx v1.12.0/go.mod h1:egT3Kyg1JFYQkvKLZ3EsykxkNrZxgXS+gKoKo7abERY= go.uber.org/goleak v0.10.0/go.mod h1:VCZuO8V8mFPlL0F5J5GK1rtHV3DrFcQ1R8ryq7FK0aI= go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= -go.uber.org/goleak v1.2.0 h1:xqgm/S+aQvhWFTtR0XK3Jvg7z8kGV8P4X14IzwN3Eqk= -go.uber.org/goleak v1.2.0/go.mod h1:XJYK+MuIchqpmGmUSAzotztawfKvYLUIgg7guXrwVUo= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.4.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= @@ -605,16 +536,15 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= -go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.12.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ= +go.uber.org/zap v1.18.1/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= -go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo= -go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= -golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190513172903-22d7a77e9e5f/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -623,9 +553,8 @@ golang.org/x/crypto v0.0.0-20200204104054-c9f3fb736b72/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc= -golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= +golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= +golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20230711005742-c3f37128e5a4 h1:QLureRX3moex6NVu/Lr4MGakp9FdA7sBHGBmvRW7NaM= @@ -652,32 +581,30 @@ golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181005035420-146acd28ed58/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM= golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211123203042-d83791d6bcd9/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= -golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= +golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= +golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.16.0 h1:aDkGMBSYxElaoP81NpoUoz2oo2R2wHdZpGToUxfyQrQ= -golang.org/x/oauth2 v0.16.0/go.mod h1:hqZ+0LWXsiVoZpeld6jVt06P3adbS2Uu911W1SsJv2o= +golang.org/x/oauth2 v0.18.0 h1:09qnuIAgzdx1XplqJvW6CQqMCtGZykZWcXzPMPUusvI= +golang.org/x/oauth2 v0.18.0/go.mod h1:Wf7knwG0MPoWIMMBgFlEaSUDaKskp0dCfrlJRJXbBi8= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -687,27 +614,25 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ= -golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181228144115-9a3f9b0469bb/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -715,15 +640,16 @@ golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210816074244-15123e1e1f71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= -golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= @@ -732,15 +658,13 @@ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= -golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -775,41 +699,42 @@ gonum.org/v1/gonum v0.0.0-20181121035319-3f7ecaa7e8ca/go.mod h1:Y+Yx5eoAFn32cQvJ gonum.org/v1/netlib v0.0.0-20181029234149-ec6d1f5cefe6/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= -google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM= +google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20181004005441-af9cb2a35e7f/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20231030173426-d783a09b4405 h1:I6WNifs6pF9tNdSob2W24JtyxIYjzFB9qDlpUC76q+U= -google.golang.org/genproto v0.0.0-20231030173426-d783a09b4405/go.mod h1:3WDQMjmJk36UQhjQ89emUzb1mdaHcPeeAh4SCBKznB4= -google.golang.org/genproto/googleapis/api v0.0.0-20231016165738-49dd2c1f3d0b h1:CIC2YMXmIhYw6evmhPxBKJ4fmLbOFtXQN/GV3XOZR8k= -google.golang.org/genproto/googleapis/api v0.0.0-20231016165738-49dd2c1f3d0b/go.mod h1:IBQ646DjkDkvUIsVq/cc03FUFQ9wbZu7yE396YcL870= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 h1:Jyp0Hsi0bmHXG6k9eATXoYtjd6e2UzZ1SCn/wIupY14= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17/go.mod h1:oQ5rr10WTTMvP4A36n8JpR1OrO1BEiV4f78CneXZxkA= +google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda h1:wu/KJm9KJwpfHWhkkZGohVC6KRrc1oJNr4jwtQMOQXw= +google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda/go.mod h1:g2LLCvCeCSir/JJSWosk19BR4NVxGqHUC6rxIRsd7Aw= +google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda h1:b6F6WIV4xHHD0FA4oIyzU6mHWg2WI2X1RBehwa5QN38= +google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda/go.mod h1:AHcE/gZH76Bk/ROZhQphlRoWo5xKDEtz3eVEO1LfA8c= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda h1:LI5DOvAxUPMv/50agcLLoo+AdWc1irS9Rzz4vPuD1V4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.24.0/go.mod h1:XDChyiUovWa60DnaeDeZmSW86xtLtjtZbwvSiRnRtcA= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= -google.golang.org/grpc v1.26.0 h1:2dTRdpdFEEhJYQD8EMLB61nnrzSCTbG38PhqdhvOltg= -google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= +google.golang.org/grpc v1.62.1 h1:B4n+nfKzOICUXMgyrNd19h/I9oH0L1pizfk1d4zSgTk= +google.golang.org/grpc v1.62.1/go.mod h1:IWTG0VlJLCh1SkC58F7np9ka9mx/WNkjl4PGJaiq+QE= +google.golang.org/grpc/examples v0.0.0-20231221225426-4f03f3ff32c9 h1:ATnmU8nL2NfIyTSiBvJVDIDIr3qBmeW+c7z7XU21eWs= +google.golang.org/grpc/examples v0.0.0-20231221225426-4f03f3ff32c9/go.mod h1:j5uROIAAgi3YmtiETMt1LW0d/lHqQ7wwrIY4uGRXLQ4= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= -google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= -gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= @@ -818,10 +743,8 @@ gopkg.in/go-playground/validator.v8 v8.18.2/go.mod h1:RX2a/7Ha8BgOhfk7j780h4/u/R gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc= gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc= -gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= -gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= @@ -855,6 +778,5 @@ honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt moul.io/zapgorm2 v1.1.0 h1:qwAlMBYf+qJkJ7PAzJl4oCe6eS6QGiKAXUPeis0+RBE= moul.io/zapgorm2 v1.1.0/go.mod h1:emRfKjNqSzVj5lcgasBdovIXY1jSOwFz2GQZn1Rddks= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= -sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= -sigs.k8s.io/yaml v1.2.0 h1:kr/MCeFWJWTwyaHoR9c8EjH9OumOmoF9YGiZd7lFm/Q= -sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/tools/pd-analysis/analysis/parse_log.go b/tools/pd-analysis/analysis/parse_log.go index 44ae617284f..f096e3fe380 100644 --- a/tools/pd-analysis/analysis/parse_log.go +++ b/tools/pd-analysis/analysis/parse_log.go @@ -42,7 +42,7 @@ type Interpreter interface { } // CompileRegex is to provide regexp for transfer counter. -func (c *TransferCounter) CompileRegex(operator string) (*regexp.Regexp, error) { +func (*TransferCounter) CompileRegex(operator string) (*regexp.Regexp, error) { var r *regexp.Regexp var err error @@ -64,7 +64,7 @@ func (c *TransferCounter) CompileRegex(operator string) (*regexp.Regexp, error) return r, err } -func (c *TransferCounter) parseLine(content string, r *regexp.Regexp) ([]uint64, error) { +func parseLine(content string, r *regexp.Regexp) ([]uint64, error) { results := make([]uint64, 0, 4) subStrings := r.FindStringSubmatch(content) if len(subStrings) == 0 { @@ -78,9 +78,8 @@ func (c *TransferCounter) parseLine(content string, r *regexp.Regexp) ([]uint64, results = append(results, uint64(num)) } return results, nil - } else { - return results, errors.New("Can't parse Log, with " + content) } + return results, errors.New("Can't parse Log, with " + content) } func forEachLine(filename string, solve func(string) error) error { @@ -116,7 +115,7 @@ func forEachLine(filename string, solve func(string) error) error { func isExpectTime(expect, layout string, isBeforeThanExpect bool) func(time.Time) bool { expectTime, err := time.Parse(layout, expect) if err != nil { - return func(current time.Time) bool { + return func(_ time.Time) bool { return true } } @@ -142,14 +141,13 @@ func currentTime(layout string) func(content string) (time.Time, error) { return time.Parse(layout, result[1]) } else if len(result) == 0 { return time.Time{}, nil - } else { - return time.Time{}, errors.New("There is no valid time in log with " + content) } + return time.Time{}, errors.New("There is no valid time in log with " + content) } } // ParseLog is to parse log for transfer counter. -func (c *TransferCounter) ParseLog(filename, start, end, layout string, r *regexp.Regexp) error { +func (*TransferCounter) ParseLog(filename, start, end, layout string, r *regexp.Regexp) error { afterStart := isExpectTime(start, layout, false) beforeEnd := isExpectTime(end, layout, true) getCurrent := currentTime(layout) @@ -161,7 +159,7 @@ func (c *TransferCounter) ParseLog(filename, start, end, layout string, r *regex } // if current line time between start and end if afterStart(current) && beforeEnd(current) { - results, err := c.parseLine(content, r) + results, err := parseLine(content, r) if err != nil { return err } diff --git a/tools/pd-analysis/analysis/parse_log_test.go b/tools/pd-analysis/analysis/parse_log_test.go index ffdcb2137c0..345f70959f8 100644 --- a/tools/pd-analysis/analysis/parse_log_test.go +++ b/tools/pd-analysis/analysis/parse_log_test.go @@ -23,7 +23,7 @@ import ( func transferCounterParseLog(operator, content string, expect []uint64) bool { r, _ := GetTransferCounter().CompileRegex(operator) - results, _ := GetTransferCounter().parseLine(content, r) + results, _ := parseLine(content, r) if len(results) != len(expect) { return false } diff --git a/tools/pd-api-bench/cases/cases.go b/tools/pd-api-bench/cases/cases.go index 59ea6337115..72986df5ed8 100644 --- a/tools/pd-api-bench/cases/cases.go +++ b/tools/pd-api-bench/cases/cases.go @@ -17,11 +17,15 @@ package cases import ( "context" "fmt" - "log" "math/rand" + "strconv" + "time" + "github.com/pingcap/log" pd "github.com/tikv/pd/client" pdHttp "github.com/tikv/pd/client/http" + "go.etcd.io/etcd/clientv3" + "go.uber.org/zap" ) var ( @@ -33,6 +37,8 @@ var ( storesID []uint64 ) +const defaultKeyLen = 56 + // InitCluster initializes the cluster. func InitCluster(ctx context.Context, cli pd.Client, httpCli pdHttp.Client) error { statsResp, err := httpCli.GetRegionStatusByKeyRange(ctx, pdHttp.NewKeyRange([]byte(""), []byte("")), false) @@ -50,7 +56,7 @@ func InitCluster(ctx context.Context, cli pd.Client, httpCli pdHttp.Client) erro for _, store := range stores { storesID = append(storesID, store.GetId()) } - log.Printf("This cluster has region %d, and store %d[%v]", totalRegion, totalStore, storesID) + log.Info("init cluster info", zap.Int("total-region", totalRegion), zap.Int("total-store", totalStore), zap.Any("store-ids", storesID)) return nil } @@ -111,46 +117,60 @@ func (c *baseCase) GetConfig() *Config { return c.cfg.Clone() } +// ETCDCase is the interface for all etcd api cases. +type ETCDCase interface { + Case + Init(context.Context, *clientv3.Client) error + Unary(context.Context, *clientv3.Client) error +} + +// ETCDCreateFn is function type to create ETCDCase. +type ETCDCreateFn func() ETCDCase + +// ETCDCaseFnMap is the map for all ETCD case creation function. +var ETCDCaseFnMap = map[string]ETCDCreateFn{ + "Get": newGetKV(), + "Put": newPutKV(), + "Delete": newDeleteKV(), + "Txn": newTxnKV(), +} + // GRPCCase is the interface for all gRPC cases. type GRPCCase interface { Case Unary(context.Context, pd.Client) error } -// GRPCCraeteFn is function type to create GRPCCase. -type GRPCCraeteFn func() GRPCCase +// GRPCCreateFn is function type to create GRPCCase. +type GRPCCreateFn func() GRPCCase // GRPCCaseFnMap is the map for all gRPC case creation function. -var GRPCCaseFnMap = map[string]GRPCCraeteFn{ - "GetRegion": newGetRegion(), - "GetRegionEnableFollower": newGetRegionEnableFollower(), - "GetStore": newGetStore(), - "GetStores": newGetStores(), - "ScanRegions": newScanRegions(), - "Tso": newTso(), +var GRPCCaseFnMap = map[string]GRPCCreateFn{ + "GetRegion": newGetRegion(), + "GetRegionEnableFollower": newGetRegionEnableFollower(), + "GetStore": newGetStore(), + "GetStores": newGetStores(), + "ScanRegions": newScanRegions(), + "Tso": newTso(), + "UpdateGCSafePoint": newUpdateGCSafePoint(), + "UpdateServiceGCSafePoint": newUpdateServiceGCSafePoint(), } -// GRPCCaseMap is the map for all gRPC case creation function. -var GRPCCaseMap = map[string]GRPCCase{} - // HTTPCase is the interface for all HTTP cases. type HTTPCase interface { Case Do(context.Context, pdHttp.Client) error } -// HTTPCraeteFn is function type to create HTTPCase. -type HTTPCraeteFn func() HTTPCase +// HTTPCreateFn is function type to create HTTPCase. +type HTTPCreateFn func() HTTPCase // HTTPCaseFnMap is the map for all HTTP case creation function. -var HTTPCaseFnMap = map[string]HTTPCraeteFn{ +var HTTPCaseFnMap = map[string]HTTPCreateFn{ "GetRegionStatus": newRegionStats(), "GetMinResolvedTS": newMinResolvedTS(), } -// HTTPCaseMap is the map for all HTTP cases. -var HTTPCaseMap = map[string]HTTPCase{} - type minResolvedTS struct { *baseCase } @@ -169,7 +189,7 @@ func newMinResolvedTS() func() HTTPCase { func (c *minResolvedTS) Do(ctx context.Context, cli pdHttp.Client) error { minResolvedTS, storesMinResolvedTS, err := cli.GetMinResolvedTSByStoresIDs(ctx, storesID) if Debug { - log.Printf("Do %s: minResolvedTS: %d storesMinResolvedTS: %v err: %v", c.name, minResolvedTS, storesMinResolvedTS, err) + log.Info("do HTTP case", zap.String("case", c.name), zap.Uint64("min-resolved-ts", minResolvedTS), zap.Any("store-min-resolved-ts", storesMinResolvedTS), zap.Error(err)) } if err != nil { return err @@ -203,10 +223,59 @@ func (c *regionsStats) Do(ctx context.Context, cli pdHttp.Client) error { startID := c.regionSample*random*4 + 1 endID := c.regionSample*(random+1)*4 + 1 regionStats, err := cli.GetRegionStatusByKeyRange(ctx, - pdHttp.NewKeyRange(generateKeyForSimulator(startID, 56), generateKeyForSimulator(endID, 56)), false) + pdHttp.NewKeyRange(generateKeyForSimulator(startID), generateKeyForSimulator(endID)), false) if Debug { - log.Printf("Do %s: regionStats: %v err: %v", c.name, regionStats, err) + log.Info("do HTTP case", zap.String("case", c.name), zap.Any("region-stats", regionStats), zap.Error(err)) + } + if err != nil { + return err + } + return nil +} + +type updateGCSafePoint struct { + *baseCase +} + +func newUpdateGCSafePoint() func() GRPCCase { + return func() GRPCCase { + return &updateGCSafePoint{ + baseCase: &baseCase{ + name: "UpdateGCSafePoint", + cfg: newConfig(), + }, + } + } +} + +func (*updateGCSafePoint) Unary(ctx context.Context, cli pd.Client) error { + s := time.Now().Unix() + _, err := cli.UpdateGCSafePoint(ctx, uint64(s)) + if err != nil { + return err } + return nil +} + +type updateServiceGCSafePoint struct { + *baseCase +} + +func newUpdateServiceGCSafePoint() func() GRPCCase { + return func() GRPCCase { + return &updateServiceGCSafePoint{ + baseCase: &baseCase{ + name: "UpdateServiceGCSafePoint", + cfg: newConfig(), + }, + } + } +} + +func (*updateServiceGCSafePoint) Unary(ctx context.Context, cli pd.Client) error { + s := time.Now().Unix() + id := rand.Int63n(100) + 1 + _, err := cli.UpdateServiceGCSafePoint(ctx, strconv.FormatInt(id, 10), id, uint64(s)) if err != nil { return err } @@ -228,9 +297,9 @@ func newGetRegion() func() GRPCCase { } } -func (c *getRegion) Unary(ctx context.Context, cli pd.Client) error { +func (*getRegion) Unary(ctx context.Context, cli pd.Client) error { id := rand.Intn(totalRegion)*4 + 1 - _, err := cli.GetRegion(ctx, generateKeyForSimulator(id, 56)) + _, err := cli.GetRegion(ctx, generateKeyForSimulator(id)) if err != nil { return err } @@ -252,9 +321,9 @@ func newGetRegionEnableFollower() func() GRPCCase { } } -func (c *getRegionEnableFollower) Unary(ctx context.Context, cli pd.Client) error { +func (*getRegionEnableFollower) Unary(ctx context.Context, cli pd.Client) error { id := rand.Intn(totalRegion)*4 + 1 - _, err := cli.GetRegion(ctx, generateKeyForSimulator(id, 56), pd.WithAllowFollowerHandle()) + _, err := cli.GetRegion(ctx, generateKeyForSimulator(id), pd.WithAllowFollowerHandle()) if err != nil { return err } @@ -283,7 +352,7 @@ func (c *scanRegions) Unary(ctx context.Context, cli pd.Client) error { random := rand.Intn(upperBound) startID := c.regionSample*random*4 + 1 endID := c.regionSample*(random+1)*4 + 1 - _, err := cli.ScanRegions(ctx, generateKeyForSimulator(startID, 56), generateKeyForSimulator(endID, 56), c.regionSample) + _, err := cli.ScanRegions(ctx, generateKeyForSimulator(startID), generateKeyForSimulator(endID), c.regionSample) if err != nil { return err } @@ -305,7 +374,7 @@ func newTso() func() GRPCCase { } } -func (c *tso) Unary(ctx context.Context, cli pd.Client) error { +func (*tso) Unary(ctx context.Context, cli pd.Client) error { _, _, err := cli.GetTS(ctx) if err != nil { return err @@ -328,7 +397,7 @@ func newGetStore() func() GRPCCase { } } -func (c *getStore) Unary(ctx context.Context, cli pd.Client) error { +func (*getStore) Unary(ctx context.Context, cli pd.Client) error { storeIdx := rand.Intn(totalStore) _, err := cli.GetStore(ctx, storesID[storeIdx]) if err != nil { @@ -352,7 +421,7 @@ func newGetStores() func() GRPCCase { } } -func (c *getStores) Unary(ctx context.Context, cli pd.Client) error { +func (*getStores) Unary(ctx context.Context, cli pd.Client) error { _, err := cli.GetAllStores(ctx) if err != nil { return err @@ -360,9 +429,107 @@ func (c *getStores) Unary(ctx context.Context, cli pd.Client) error { return nil } -// nolint -func generateKeyForSimulator(id int, keyLen int) []byte { - k := make([]byte, keyLen) +func generateKeyForSimulator(id int) []byte { + k := make([]byte, defaultKeyLen) copy(k, fmt.Sprintf("%010d", id)) return k } + +type getKV struct { + *baseCase +} + +func newGetKV() func() ETCDCase { + return func() ETCDCase { + return &getKV{ + baseCase: &baseCase{ + name: "Get", + cfg: newConfig(), + }, + } + } +} + +func (*getKV) Init(ctx context.Context, cli *clientv3.Client) error { + for i := 0; i < 100; i++ { + _, err := cli.Put(ctx, fmt.Sprintf("/test/0001/%4d", i), fmt.Sprintf("%4d", i)) + if err != nil { + return err + } + } + return nil +} + +func (*getKV) Unary(ctx context.Context, cli *clientv3.Client) error { + _, err := cli.Get(ctx, "/test/0001", clientv3.WithPrefix()) + return err +} + +type putKV struct { + *baseCase +} + +func newPutKV() func() ETCDCase { + return func() ETCDCase { + return &putKV{ + baseCase: &baseCase{ + name: "Put", + cfg: newConfig(), + }, + } + } +} + +func (*putKV) Init(context.Context, *clientv3.Client) error { return nil } + +func (*putKV) Unary(ctx context.Context, cli *clientv3.Client) error { + _, err := cli.Put(ctx, "/test/0001/0000", "test") + return err +} + +type deleteKV struct { + *baseCase +} + +func newDeleteKV() func() ETCDCase { + return func() ETCDCase { + return &deleteKV{ + baseCase: &baseCase{ + name: "Put", + cfg: newConfig(), + }, + } + } +} + +func (*deleteKV) Init(context.Context, *clientv3.Client) error { return nil } + +func (*deleteKV) Unary(ctx context.Context, cli *clientv3.Client) error { + _, err := cli.Delete(ctx, "/test/0001/0000") + return err +} + +type txnKV struct { + *baseCase +} + +func newTxnKV() func() ETCDCase { + return func() ETCDCase { + return &txnKV{ + baseCase: &baseCase{ + name: "Put", + cfg: newConfig(), + }, + } + } +} + +func (*txnKV) Init(context.Context, *clientv3.Client) error { return nil } + +func (*txnKV) Unary(ctx context.Context, cli *clientv3.Client) error { + txn := cli.Txn(ctx) + txn = txn.If(clientv3.Compare(clientv3.Value("/test/0001/0000"), "=", "test")) + txn = txn.Then(clientv3.OpPut("/test/0001/0000", "test2")) + _, err := txn.Commit() + return err +} diff --git a/tools/pd-api-bench/cases/controller.go b/tools/pd-api-bench/cases/controller.go index 2a4561a3d2a..42eeafe4597 100644 --- a/tools/pd-api-bench/cases/controller.go +++ b/tools/pd-api-bench/cases/controller.go @@ -23,6 +23,7 @@ import ( "github.com/pingcap/log" pd "github.com/tikv/pd/client" pdHttp "github.com/tikv/pd/client/http" + "go.etcd.io/etcd/clientv3" "go.uber.org/zap" ) @@ -34,21 +35,25 @@ type Coordinator struct { httpClients []pdHttp.Client gRPCClients []pd.Client + etcdClients []*clientv3.Client http map[string]*httpController grpc map[string]*gRPCController + etcd map[string]*etcdController mu sync.RWMutex } // NewCoordinator returns a new coordinator. -func NewCoordinator(ctx context.Context, httpClients []pdHttp.Client, gRPCClients []pd.Client) *Coordinator { +func NewCoordinator(ctx context.Context, httpClients []pdHttp.Client, gRPCClients []pd.Client, etcdClients []*clientv3.Client) *Coordinator { return &Coordinator{ ctx: ctx, httpClients: httpClients, gRPCClients: gRPCClients, + etcdClients: etcdClients, http: make(map[string]*httpController), grpc: make(map[string]*gRPCController), + etcd: make(map[string]*etcdController), } } @@ -59,7 +64,7 @@ func (c *Coordinator) GetHTTPCase(name string) (*Config, error) { if controller, ok := c.http[name]; ok { return controller.GetConfig(), nil } - return nil, errors.Errorf("case %v does not exist.", name) + return nil, errors.Errorf("case %v does not exist", name) } // GetGRPCCase returns the gRPC case config. @@ -69,7 +74,17 @@ func (c *Coordinator) GetGRPCCase(name string) (*Config, error) { if controller, ok := c.grpc[name]; ok { return controller.GetConfig(), nil } - return nil, errors.Errorf("case %v does not exist.", name) + return nil, errors.Errorf("case %v does not exist", name) +} + +// GetETCDCase returns the etcd case config. +func (c *Coordinator) GetETCDCase(name string) (*Config, error) { + c.mu.RLock() + defer c.mu.RUnlock() + if controller, ok := c.etcd[name]; ok { + return controller.GetConfig(), nil + } + return nil, errors.Errorf("case %v does not exist", name) } // GetAllHTTPCases returns the all HTTP case configs. @@ -94,6 +109,17 @@ func (c *Coordinator) GetAllGRPCCases() map[string]*Config { return ret } +// GetAllETCDCases returns the all etcd case configs. +func (c *Coordinator) GetAllETCDCases() map[string]*Config { + c.mu.RLock() + defer c.mu.RUnlock() + ret := make(map[string]*Config) + for name, c := range c.etcd { + ret[name] = c.GetConfig() + } + return ret +} + // SetHTTPCase sets the config for the specific case. func (c *Coordinator) SetHTTPCase(name string, cfg *Config) error { c.mu.Lock() @@ -133,7 +159,29 @@ func (c *Coordinator) SetGRPCCase(name string, cfg *Config) error { } controller.run() } else { - return errors.Errorf("HTTP case %s not implemented", name) + return errors.Errorf("gRPC case %s not implemented", name) + } + return nil +} + +// SetETCDCase sets the config for the specific case. +func (c *Coordinator) SetETCDCase(name string, cfg *Config) error { + c.mu.Lock() + defer c.mu.Unlock() + if fn, ok := ETCDCaseFnMap[name]; ok { + var controller *etcdController + if controller, ok = c.etcd[name]; !ok { + controller = newEtcdController(c.ctx, c.etcdClients, fn) + c.etcd[name] = controller + } + controller.stop() + controller.SetQPS(cfg.QPS) + if cfg.Burst > 0 { + controller.SetBurst(cfg.Burst) + } + controller.run() + } else { + return errors.Errorf("etcd case %s not implemented", name) } return nil } @@ -148,7 +196,7 @@ type httpController struct { wg sync.WaitGroup } -func newHTTPController(ctx context.Context, clis []pdHttp.Client, fn HTTPCraeteFn) *httpController { +func newHTTPController(ctx context.Context, clis []pdHttp.Client, fn HTTPCreateFn) *httpController { c := &httpController{ pctx: ctx, clients: clis, @@ -166,27 +214,31 @@ func (c *httpController) run() { qps := c.GetQPS() burst := c.GetBurst() cliNum := int64(len(c.clients)) - tt := time.Duration(base/qps*burst*cliNum) * time.Microsecond + tt := time.Duration(base*burst*cliNum/qps) * time.Microsecond log.Info("begin to run http case", zap.String("case", c.Name()), zap.Int64("qps", qps), zap.Int64("burst", burst), zap.Duration("interval", tt)) for _, hCli := range c.clients { c.wg.Add(1) go func(hCli pdHttp.Client) { defer c.wg.Done() - var ticker = time.NewTicker(tt) - defer ticker.Stop() - for { - select { - case <-ticker.C: - for i := int64(0); i < burst; i++ { - err := c.Do(c.ctx, hCli) - if err != nil { - log.Error("meet erorr when doing HTTP request", zap.String("case", c.Name()), zap.Error(err)) + c.wg.Add(int(burst)) + for i := int64(0); i < burst; i++ { + go func() { + defer c.wg.Done() + var ticker = time.NewTicker(tt) + defer ticker.Stop() + for { + select { + case <-ticker.C: + err := c.Do(c.ctx, hCli) + if err != nil { + log.Error("meet erorr when doing HTTP request", zap.String("case", c.Name()), zap.Error(err)) + } + case <-c.ctx.Done(): + log.Info("Got signal to exit running HTTP case") + return } } - case <-c.ctx.Done(): - log.Info("Got signal to exit running HTTP case") - return - } + }() } }(hCli) } @@ -213,7 +265,7 @@ type gRPCController struct { wg sync.WaitGroup } -func newGRPCController(ctx context.Context, clis []pd.Client, fn GRPCCraeteFn) *gRPCController { +func newGRPCController(ctx context.Context, clis []pd.Client, fn GRPCCreateFn) *gRPCController { c := &gRPCController{ pctx: ctx, clients: clis, @@ -231,27 +283,31 @@ func (c *gRPCController) run() { qps := c.GetQPS() burst := c.GetBurst() cliNum := int64(len(c.clients)) - tt := time.Duration(base/qps*burst*cliNum) * time.Microsecond + tt := time.Duration(base*burst*cliNum/qps) * time.Microsecond log.Info("begin to run gRPC case", zap.String("case", c.Name()), zap.Int64("qps", qps), zap.Int64("burst", burst), zap.Duration("interval", tt)) for _, cli := range c.clients { c.wg.Add(1) go func(cli pd.Client) { defer c.wg.Done() - var ticker = time.NewTicker(tt) - defer ticker.Stop() - for { - select { - case <-ticker.C: - for i := int64(0); i < burst; i++ { - err := c.Unary(c.ctx, cli) - if err != nil { - log.Error("meet erorr when doing gRPC request", zap.String("case", c.Name()), zap.Error(err)) + c.wg.Add(int(burst)) + for i := int64(0); i < burst; i++ { + go func() { + defer c.wg.Done() + var ticker = time.NewTicker(tt) + defer ticker.Stop() + for { + select { + case <-ticker.C: + err := c.Unary(c.ctx, cli) + if err != nil { + log.Error("meet erorr when doing gRPC request", zap.String("case", c.Name()), zap.Error(err)) + } + case <-c.ctx.Done(): + log.Info("Got signal to exit running gRPC case") + return } } - case <-c.ctx.Done(): - log.Info("Got signal to exit running gRPC case") - return - } + }() } }(cli) } @@ -266,3 +322,77 @@ func (c *gRPCController) stop() { c.cancel = nil c.wg.Wait() } + +type etcdController struct { + ETCDCase + clients []*clientv3.Client + pctx context.Context + + ctx context.Context + cancel context.CancelFunc + + wg sync.WaitGroup +} + +func newEtcdController(ctx context.Context, clis []*clientv3.Client, fn ETCDCreateFn) *etcdController { + c := &etcdController{ + pctx: ctx, + clients: clis, + ETCDCase: fn(), + } + return c +} + +// run tries to run the gRPC api bench. +func (c *etcdController) run() { + if c.GetQPS() <= 0 || c.cancel != nil { + return + } + c.ctx, c.cancel = context.WithCancel(c.pctx) + qps := c.GetQPS() + burst := c.GetBurst() + cliNum := int64(len(c.clients)) + tt := time.Duration(base*burst*cliNum/qps) * time.Microsecond + log.Info("begin to run etcd case", zap.String("case", c.Name()), zap.Int64("qps", qps), zap.Int64("burst", burst), zap.Duration("interval", tt)) + err := c.Init(c.ctx, c.clients[0]) + if err != nil { + log.Error("init error", zap.String("case", c.Name()), zap.Error(err)) + return + } + for _, cli := range c.clients { + c.wg.Add(1) + go func(cli *clientv3.Client) { + defer c.wg.Done() + c.wg.Add(int(burst)) + for i := int64(0); i < burst; i++ { + go func() { + defer c.wg.Done() + var ticker = time.NewTicker(tt) + defer ticker.Stop() + for { + select { + case <-ticker.C: + err := c.Unary(c.ctx, cli) + if err != nil { + log.Error("meet erorr when doing etcd request", zap.String("case", c.Name()), zap.Error(err)) + } + case <-c.ctx.Done(): + log.Info("Got signal to exit running etcd case") + return + } + } + }() + } + }(cli) + } +} + +// stop stops the etcd api bench. +func (c *etcdController) stop() { + if c.cancel == nil { + return + } + c.cancel() + c.cancel = nil + c.wg.Wait() +} diff --git a/tools/pd-api-bench/config/config.go b/tools/pd-api-bench/config/config.go index 8898c0e3083..d1048c0da72 100644 --- a/tools/pd-api-bench/config/config.go +++ b/tools/pd-api-bench/config/config.go @@ -15,7 +15,6 @@ package config import ( - "github.com/BurntSushi/toml" "github.com/pingcap/log" "github.com/pkg/errors" flag "github.com/spf13/pflag" @@ -45,6 +44,7 @@ type Config struct { // only for init HTTP map[string]cases.Config `toml:"http" json:"http"` GRPC map[string]cases.Config `toml:"grpc" json:"grpc"` + ETCD map[string]cases.Config `toml:"etcd" json:"etcd"` } // NewConfig return a set of settings. @@ -72,14 +72,13 @@ func (c *Config) Parse(arguments []string) error { } // Load config file if specified. - var meta *toml.MetaData if c.configFile != "" { - meta, err = configutil.ConfigFromFile(c, c.configFile) + _, err = configutil.ConfigFromFile(c, c.configFile) if err != nil { return err } } - c.Adjust(meta) + c.Adjust() // Parse again to replace with command line options. err = c.flagSet.Parse(arguments) @@ -108,10 +107,16 @@ func (c *Config) InitCoordinator(co *cases.Coordinator) { log.Error("create gRPC case failed", zap.Error(err)) } } + for name, cfg := range c.ETCD { + err := co.SetETCDCase(name, &cfg) + if err != nil { + log.Error("create etcd case failed", zap.Error(err)) + } + } } // Adjust is used to adjust configurations -func (c *Config) Adjust(meta *toml.MetaData) { +func (c *Config) Adjust() { if len(c.Log.Format) == 0 { c.Log.Format = "text" } diff --git a/tools/pd-api-bench/config/simconfig.toml b/tools/pd-api-bench/config/simconfig.toml index 9a05001973b..48e5a2595ba 100644 --- a/tools/pd-api-bench/config/simconfig.toml +++ b/tools/pd-api-bench/config/simconfig.toml @@ -9,4 +9,8 @@ pd = "127.0.0.1:2379" burst = 1 [grpc.GetStores] qps = 1000 + burst = 1 +[etcd] + [etcd.Get] + qps = 1 burst = 1 \ No newline at end of file diff --git a/tools/pd-api-bench/main.go b/tools/pd-api-bench/main.go index 681c3579012..f9feeeea580 100644 --- a/tools/pd-api-bench/main.go +++ b/tools/pd-api-bench/main.go @@ -40,6 +40,7 @@ import ( "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/tools/pd-api-bench/cases" "github.com/tikv/pd/tools/pd-api-bench/config" + "go.etcd.io/etcd/clientv3" "go.uber.org/zap" "google.golang.org/grpc" "google.golang.org/grpc/keepalive" @@ -119,6 +120,10 @@ func main() { pdClis[i] = newPDClient(ctx, cfg) pdClis[i].UpdateOption(pd.EnableFollowerHandle, true) } + etcdClis := make([]*clientv3.Client, cfg.Client) + for i := int64(0); i < cfg.Client; i++ { + etcdClis[i] = newEtcdClient(cfg) + } httpClis := make([]pdHttp.Client, cfg.Client) for i := int64(0); i < cfg.Client; i++ { sd := pdClis[i].GetServiceDiscovery() @@ -129,7 +134,7 @@ func main() { log.Fatal("InitCluster error", zap.Error(err)) } - coordinator := cases.NewCoordinator(ctx, httpClis, pdClis) + coordinator := cases.NewCoordinator(ctx, httpClis, pdClis, etcdClis) hcaseStr := strings.Split(httpCases, ",") for _, str := range hcaseStr { @@ -158,6 +163,9 @@ func main() { for _, cli := range httpClis { cli.Close() } + for _, cli := range etcdClis { + cli.Close() + } log.Info("Exit") switch sig { case syscall.SIGTERM: @@ -276,6 +284,23 @@ func runHTTPServer(cfg *config.Config, co *cases.Coordinator) { co.SetGRPCCase(name, cfg) c.String(http.StatusOK, "") }) + engine.POST("config/etcd/all", func(c *gin.Context) { + var input map[string]cases.Config + if err := c.ShouldBindJSON(&input); err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + for name, cfg := range input { + co.SetETCDCase(name, &cfg) + } + c.String(http.StatusOK, "") + }) + engine.POST("config/etcd/:name", func(c *gin.Context) { + name := c.Param("name") + cfg := getCfg(c) + co.SetETCDCase(name, cfg) + c.String(http.StatusOK, "") + }) engine.GET("config/http/all", func(c *gin.Context) { all := co.GetAllHTTPCases() @@ -303,24 +328,55 @@ func runHTTPServer(cfg *config.Config, co *cases.Coordinator) { } c.IndentedJSON(http.StatusOK, cfg) }) - // nolint + engine.GET("config/etcd/all", func(c *gin.Context) { + all := co.GetAllETCDCases() + c.IndentedJSON(http.StatusOK, all) + }) + engine.GET("config/etcd/:name", func(c *gin.Context) { + name := c.Param("name") + cfg, err := co.GetETCDCase(name) + if err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + c.IndentedJSON(http.StatusOK, cfg) + }) engine.Run(cfg.StatusAddr) } -func trimHTTPPrefix(str string) string { - str = strings.TrimPrefix(str, "http://") - str = strings.TrimPrefix(str, "https://") - return str +const ( + keepaliveTime = 10 * time.Second + keepaliveTimeout = 3 * time.Second +) + +func newEtcdClient(cfg *config.Config) *clientv3.Client { + lgc := zap.NewProductionConfig() + lgc.Encoding = log.ZapEncodingName + tlsCfg, err := tlsutil.TLSConfig{ + CAPath: cfg.CaPath, + CertPath: cfg.CertPath, + KeyPath: cfg.KeyPath, + }.ToTLSConfig() + if err != nil { + log.Fatal("fail to create etcd client", zap.Error(err)) + return nil + } + clientConfig := clientv3.Config{ + Endpoints: []string{cfg.PDAddr}, + DialTimeout: keepaliveTimeout, + TLS: tlsCfg, + LogConfig: &lgc, + } + client, err := clientv3.New(clientConfig) + if err != nil { + log.Fatal("fail to create pd client", zap.Error(err)) + } + return client } // newPDClient returns a pd client. func newPDClient(ctx context.Context, cfg *config.Config) pd.Client { - const ( - keepaliveTime = 10 * time.Second - keepaliveTimeout = 3 * time.Second - ) - - addrs := []string{trimHTTPPrefix(cfg.PDAddr)} + addrs := []string{cfg.PDAddr} pdCli, err := pd.NewClientWithContext(ctx, addrs, pd.SecurityOption{ CAPath: cfg.CaPath, CertPath: cfg.CertPath, diff --git a/tools/pd-backup/pdbackup/backup_test.go b/tools/pd-backup/pdbackup/backup_test.go index b35bf1e8a70..0ab9116ddbe 100644 --- a/tools/pd-backup/pdbackup/backup_test.go +++ b/tools/pd-backup/pdbackup/backup_test.go @@ -83,7 +83,7 @@ func setupServer() (*httptest.Server, *config.Config) { }, } - server := httptest.NewServer(http.HandlerFunc(func(res http.ResponseWriter, req *http.Request) { + server := httptest.NewServer(http.HandlerFunc(func(res http.ResponseWriter, _ *http.Request) { b, err := json.Marshal(serverConfig) if err != nil { res.WriteHeader(http.StatusInternalServerError) @@ -98,7 +98,7 @@ func setupServer() (*httptest.Server, *config.Config) { return server, serverConfig } -func (s *backupTestSuite) BeforeTest(suiteName, testName string) { +func (s *backupTestSuite) BeforeTest(string, string) { re := s.Require() ctx, cancel := context.WithTimeout(context.Background(), time.Second*3) defer cancel() @@ -124,7 +124,7 @@ func (s *backupTestSuite) BeforeTest(suiteName, testName string) { re.NoError(err) } -func (s *backupTestSuite) AfterTest(suiteName, testName string) { +func (s *backupTestSuite) AfterTest(string, string) { s.etcd.Close() } diff --git a/tools/pd-ctl/pdctl/command/cluster_command.go b/tools/pd-ctl/pdctl/command/cluster_command.go index 4fd7b8be61a..0b3cda8a867 100644 --- a/tools/pd-ctl/pdctl/command/cluster_command.go +++ b/tools/pd-ctl/pdctl/command/cluster_command.go @@ -14,18 +14,15 @@ package command -import ( - "context" - - "github.com/spf13/cobra" -) +import "github.com/spf13/cobra" // NewClusterCommand return a cluster subcommand of rootCmd func NewClusterCommand() *cobra.Command { cmd := &cobra.Command{ - Use: "cluster", - Short: "show the cluster information", - Run: showClusterCommandFunc, + Use: "cluster", + Short: "show the cluster information", + PersistentPreRunE: requirePDClient, + Run: showClusterCommandFunc, } cmd.AddCommand(NewClusterStatusCommand()) return cmd @@ -42,7 +39,7 @@ func NewClusterStatusCommand() *cobra.Command { } func showClusterCommandFunc(cmd *cobra.Command, _ []string) { - info, err := PDCli.GetCluster(context.Background()) + info, err := PDCli.GetCluster(cmd.Context()) if err != nil { cmd.Printf("Failed to get the cluster information: %s\n", err) return @@ -51,7 +48,7 @@ func showClusterCommandFunc(cmd *cobra.Command, _ []string) { } func showClusterStatusCommandFunc(cmd *cobra.Command, _ []string) { - status, err := PDCli.GetClusterStatus(context.Background()) + status, err := PDCli.GetClusterStatus(cmd.Context()) if err != nil { cmd.Printf("Failed to get the cluster status: %s\n", err) return diff --git a/tools/pd-ctl/pdctl/command/config_command.go b/tools/pd-ctl/pdctl/command/config_command.go index 873ea222a4c..0c3851350cc 100644 --- a/tools/pd-ctl/pdctl/command/config_command.go +++ b/tools/pd-ctl/pdctl/command/config_command.go @@ -212,23 +212,23 @@ func NewDeleteLabelPropertyConfigCommand() *cobra.Command { return sc } -func showConfigCommandFunc(cmd *cobra.Command, args []string) { +func showConfigCommandFunc(cmd *cobra.Command, _ []string) { header := buildHeader(cmd) allR, err := doRequest(cmd, configPrefix, http.MethodGet, header) if err != nil { cmd.Printf("Failed to get config: %s\n", err) return } - allData := make(map[string]interface{}) + allData := make(map[string]any) err = json.Unmarshal([]byte(allR), &allData) if err != nil { cmd.Printf("Failed to unmarshal config: %s\n", err) return } - data := make(map[string]interface{}) + data := make(map[string]any) data["replication"] = allData["replication"] - scheduleConfig := make(map[string]interface{}) + scheduleConfig := make(map[string]any) scheduleConfigData, err := json.Marshal(allData["schedule"]) if err != nil { cmd.Printf("Failed to marshal schedule config: %s\n", err) @@ -268,7 +268,7 @@ var hideConfig = []string{ "scheduler-max-waiting-operator", } -func showScheduleConfigCommandFunc(cmd *cobra.Command, args []string) { +func showScheduleConfigCommandFunc(cmd *cobra.Command, _ []string) { header := buildHeader(cmd) r, err := doRequest(cmd, schedulePrefix, http.MethodGet, header) if err != nil { @@ -278,7 +278,7 @@ func showScheduleConfigCommandFunc(cmd *cobra.Command, args []string) { cmd.Println(r) } -func showReplicationConfigCommandFunc(cmd *cobra.Command, args []string) { +func showReplicationConfigCommandFunc(cmd *cobra.Command, _ []string) { header := buildHeader(cmd) r, err := doRequest(cmd, replicatePrefix, http.MethodGet, header) if err != nil { @@ -288,7 +288,7 @@ func showReplicationConfigCommandFunc(cmd *cobra.Command, args []string) { cmd.Println(r) } -func showLabelPropertyConfigCommandFunc(cmd *cobra.Command, args []string) { +func showLabelPropertyConfigCommandFunc(cmd *cobra.Command, _ []string) { r, err := doRequest(cmd, labelPropertyPrefix, http.MethodGet, http.Header{}) if err != nil { cmd.Printf("Failed to get config: %s\n", err) @@ -297,7 +297,7 @@ func showLabelPropertyConfigCommandFunc(cmd *cobra.Command, args []string) { cmd.Println(r) } -func showAllConfigCommandFunc(cmd *cobra.Command, args []string) { +func showAllConfigCommandFunc(cmd *cobra.Command, _ []string) { header := buildHeader(cmd) r, err := doRequest(cmd, configPrefix, http.MethodGet, header) if err != nil { @@ -307,7 +307,7 @@ func showAllConfigCommandFunc(cmd *cobra.Command, args []string) { cmd.Println(r) } -func showClusterVersionCommandFunc(cmd *cobra.Command, args []string) { +func showClusterVersionCommandFunc(cmd *cobra.Command, _ []string) { r, err := doRequest(cmd, clusterVersionPrefix, http.MethodGet, http.Header{}) if err != nil { cmd.Printf("Failed to get cluster version: %s\n", err) @@ -316,7 +316,7 @@ func showClusterVersionCommandFunc(cmd *cobra.Command, args []string) { cmd.Println(r) } -func showReplicationModeCommandFunc(cmd *cobra.Command, args []string) { +func showReplicationModeCommandFunc(cmd *cobra.Command, _ []string) { r, err := doRequest(cmd, replicationModePrefix, http.MethodGet, http.Header{}) if err != nil { cmd.Printf("Failed to get replication mode config: %s\n", err) @@ -325,7 +325,7 @@ func showReplicationModeCommandFunc(cmd *cobra.Command, args []string) { cmd.Println(r) } -func showServerCommandFunc(cmd *cobra.Command, args []string) { +func showServerCommandFunc(cmd *cobra.Command, _ []string) { r, err := doRequest(cmd, pdServerPrefix, http.MethodGet, http.Header{}) if err != nil { cmd.Printf("Failed to get server config: %s\n", err) @@ -335,8 +335,8 @@ func showServerCommandFunc(cmd *cobra.Command, args []string) { } func postConfigDataWithPath(cmd *cobra.Command, key, value, path string) error { - var val interface{} - data := make(map[string]interface{}) + var val any + data := make(map[string]any) val, err := strconv.ParseFloat(value, 64) if err != nil { val = value @@ -381,7 +381,7 @@ func postLabelProperty(cmd *cobra.Command, action string, args []string) { cmd.Println(cmd.UsageString()) return } - input := map[string]interface{}{ + input := map[string]any{ "type": args[0], "action": action, "label-key": args[1], @@ -396,7 +396,7 @@ func setClusterVersionCommandFunc(cmd *cobra.Command, args []string) { cmd.Println(cmd.UsageString()) return } - input := map[string]interface{}{ + input := map[string]any{ "cluster-version": args[0], } postJSON(cmd, clusterVersionPrefix, input) @@ -404,20 +404,20 @@ func setClusterVersionCommandFunc(cmd *cobra.Command, args []string) { func setReplicationModeCommandFunc(cmd *cobra.Command, args []string) { if len(args) == 1 { - postJSON(cmd, replicationModePrefix, map[string]interface{}{"replication-mode": args[0]}) + postJSON(cmd, replicationModePrefix, map[string]any{"replication-mode": args[0]}) } else if len(args) == 3 { t := reflectutil.FindFieldByJSONTag(reflect.TypeOf(config.ReplicationModeConfig{}), []string{args[0], args[1]}) if t != nil && t.Kind() == reflect.Int { - // convert to number for numberic fields. + // convert to number for numeric fields. arg2, err := strconv.ParseInt(args[2], 10, 64) if err != nil { cmd.Printf("value %v cannot covert to number: %v", args[2], err) return } - postJSON(cmd, replicationModePrefix, map[string]interface{}{args[0]: map[string]interface{}{args[1]: arg2}}) + postJSON(cmd, replicationModePrefix, map[string]any{args[0]: map[string]any{args[1]: arg2}}) return } - postJSON(cmd, replicationModePrefix, map[string]interface{}{args[0]: map[string]string{args[1]: args[2]}}) + postJSON(cmd, replicationModePrefix, map[string]any{args[0]: map[string]string{args[1]: args[2]}}) } else { cmd.Println(cmd.UsageString()) } @@ -529,7 +529,7 @@ func NewPlacementRulesCommand() *cobra.Command { return c } -func enablePlacementRulesFunc(cmd *cobra.Command, args []string) { +func enablePlacementRulesFunc(cmd *cobra.Command, _ []string) { err := postConfigDataWithPath(cmd, "enable-placement-rules", "true", configPrefix) if err != nil { cmd.Printf("Failed to set config: %s\n", err) @@ -538,7 +538,7 @@ func enablePlacementRulesFunc(cmd *cobra.Command, args []string) { cmd.Println("Success!") } -func disablePlacementRulesFunc(cmd *cobra.Command, args []string) { +func disablePlacementRulesFunc(cmd *cobra.Command, _ []string) { err := postConfigDataWithPath(cmd, "enable-placement-rules", "false", configPrefix) if err != nil { cmd.Printf("Failed to set config: %s\n", err) @@ -547,7 +547,7 @@ func disablePlacementRulesFunc(cmd *cobra.Command, args []string) { cmd.Println("Success!") } -func getPlacementRulesFunc(cmd *cobra.Command, args []string) { +func getPlacementRulesFunc(cmd *cobra.Command, _ []string) { getFlag := func(key string) string { if f := cmd.Flag(key); f != nil { return f.Value.String() @@ -598,7 +598,7 @@ func getPlacementRulesFunc(cmd *cobra.Command, args []string) { cmd.Println("rules saved to file " + file) } -func putPlacementRulesFunc(cmd *cobra.Command, args []string) { +func putPlacementRulesFunc(cmd *cobra.Command, _ []string) { var file string if f := cmd.Flag("in"); f != nil { file = f.Value.String() @@ -674,7 +674,7 @@ func updateRuleGroupFunc(cmd *cobra.Command, args []string) { cmd.Printf("override %s should be a boolean\n", args[2]) return } - postJSON(cmd, ruleGroupPrefix, map[string]interface{}{ + postJSON(cmd, ruleGroupPrefix, map[string]any{ "id": args[0], "index": index, "override": override, @@ -712,7 +712,7 @@ func getRuleBundle(cmd *cobra.Command, args []string) { cmd.Printf("rule group saved to file %s\n", file) } -func setRuleBundle(cmd *cobra.Command, args []string) { +func setRuleBundle(cmd *cobra.Command, _ []string) { var file string if f := cmd.Flag("in"); f != nil { file = f.Value.String() @@ -763,7 +763,7 @@ func delRuleBundle(cmd *cobra.Command, args []string) { cmd.Println(res) } -func loadRuleBundle(cmd *cobra.Command, args []string) { +func loadRuleBundle(cmd *cobra.Command, _ []string) { header := buildHeader(cmd) res, err := doRequest(cmd, ruleBundlePrefix, http.MethodGet, header) if err != nil { @@ -788,7 +788,7 @@ func loadRuleBundle(cmd *cobra.Command, args []string) { cmd.Printf("rule group saved to file %s\n", file) } -func saveRuleBundle(cmd *cobra.Command, args []string) { +func saveRuleBundle(cmd *cobra.Command, _ []string) { var file string if f := cmd.Flag("in"); f != nil { file = f.Value.String() diff --git a/tools/pd-ctl/pdctl/command/exit_command.go b/tools/pd-ctl/pdctl/command/exit_command.go index a3d38be97bd..3ead7e54e8e 100644 --- a/tools/pd-ctl/pdctl/command/exit_command.go +++ b/tools/pd-ctl/pdctl/command/exit_command.go @@ -30,6 +30,6 @@ func NewExitCommand() *cobra.Command { return conf } -func exitCommandFunc(cmd *cobra.Command, args []string) { +func exitCommandFunc(*cobra.Command, []string) { os.Exit(0) } diff --git a/tools/pd-ctl/pdctl/command/gc_safepoint_command.go b/tools/pd-ctl/pdctl/command/gc_safepoint_command.go index 80c6328e955..f4a6b6fcfd0 100644 --- a/tools/pd-ctl/pdctl/command/gc_safepoint_command.go +++ b/tools/pd-ctl/pdctl/command/gc_safepoint_command.go @@ -49,7 +49,7 @@ func NewDeleteServiceGCSafepointCommand() *cobra.Command { return l } -func showSSPs(cmd *cobra.Command, args []string) { +func showSSPs(cmd *cobra.Command, _ []string) { r, err := doRequest(cmd, serviceGCSafepointPrefix, http.MethodGet, http.Header{}) if err != nil { cmd.Printf("Failed to get service GC safepoint: %s\n", err) diff --git a/tools/pd-ctl/pdctl/command/global.go b/tools/pd-ctl/pdctl/command/global.go index 806ad4ecc53..7438345127e 100644 --- a/tools/pd-ctl/pdctl/command/global.go +++ b/tools/pd-ctl/pdctl/command/global.go @@ -16,6 +16,7 @@ package command import ( "bytes" + "crypto/tls" "encoding/json" "io" "net/http" @@ -24,6 +25,7 @@ import ( "strings" "github.com/pingcap/errors" + "github.com/pingcap/kvproto/pkg/metapb" "github.com/spf13/cobra" pd "github.com/tikv/pd/client/http" "github.com/tikv/pd/pkg/utils/apiutil" @@ -31,45 +33,112 @@ import ( ) const ( - pdControlCallerID = "pd-ctl" - pingPrefix = "pd/api/v1/ping" + PDControlCallerID = "pd-ctl" + clusterPrefix = "pd/api/v1/cluster" ) +func initTLSConfig(caPath, certPath, keyPath string) (*tls.Config, error) { + tlsInfo := transport.TLSInfo{ + CertFile: certPath, + KeyFile: keyPath, + TrustedCAFile: caPath, + } + tlsConfig, err := tlsInfo.ClientConfig() + if err != nil { + return nil, errors.WithStack(err) + } + return tlsConfig, nil +} + // PDCli is a pd HTTP client var PDCli pd.Client -// SetNewPDClient creates a PD HTTP client with the given PD addresses and options. -func SetNewPDClient(addrs []string, opts ...pd.ClientOption) { +func requirePDClient(cmd *cobra.Command, _ []string) error { + var ( + tlsConfig *tls.Config + err error + ) + tlsConfig, err = parseTLSConfig(cmd) + if err != nil { + return err + } + + return initNewPDClient(cmd, pd.WithTLSConfig(tlsConfig)) +} + +// shouldInitPDClient checks whether we should create a new PD client according to the cluster information. +func shouldInitPDClient(cmd *cobra.Command) (bool, error) { + // Get the cluster information the current command assigned to. + newClusterInfoJSON, err := doRequest(cmd, clusterPrefix, http.MethodGet, http.Header{}) + if err != nil { + return false, err + } + newClusterInfo := &metapb.Cluster{} + err = json.Unmarshal([]byte(newClusterInfoJSON), newClusterInfo) + if err != nil { + return false, err + } + // If the PD client is nil and we get the cluster information successfully, + // we should initialize the PD client directly. + if PDCli == nil { + return true, nil + } + // Get current cluster information that the PD client connects to. + currentClusterInfo, err := PDCli.GetCluster(cmd.Context()) + if err != nil { + return true, nil + } + // Compare the cluster ID to determine whether we should re-initialize the PD client. + return currentClusterInfo.GetId() == 0 || newClusterInfo.GetId() != currentClusterInfo.GetId(), nil +} + +func initNewPDClient(cmd *cobra.Command, opts ...pd.ClientOption) error { + if should, err := shouldInitPDClient(cmd); !should || err != nil { + return err + } if PDCli != nil { PDCli.Close() } - PDCli = pd.NewClient(pdControlCallerID, addrs, opts...) + PDCli = pd.NewClient(PDControlCallerID, getEndpoints(cmd), opts...).WithCallerID(PDControlCallerID) + return nil } -// TODO: replace dialClient with PDCli +// TODO: replace dialClient with the PD HTTP client completely. var dialClient = &http.Client{ - Transport: apiutil.NewCallerIDRoundTripper(http.DefaultTransport, pdControlCallerID), + Transport: apiutil.NewCallerIDRoundTripper(http.DefaultTransport, PDControlCallerID), } -// InitHTTPSClient creates https client with ca file -func InitHTTPSClient(pdAddrs, caPath, certPath, keyPath string) error { - tlsInfo := transport.TLSInfo{ - CertFile: certPath, - KeyFile: keyPath, - TrustedCAFile: caPath, +func parseTLSConfig(cmd *cobra.Command) (*tls.Config, error) { + caPath, err := cmd.Flags().GetString("cacert") + if err != nil || len(caPath) == 0 { + return nil, err } - tlsConfig, err := tlsInfo.ClientConfig() + certPath, err := cmd.Flags().GetString("cert") if err != nil { - return errors.WithStack(err) + return nil, err } + keyPath, err := cmd.Flags().GetString("key") + if err != nil { + return nil, err + } + tlsConfig, err := initTLSConfig(caPath, certPath, keyPath) + if err != nil { + return nil, err + } + + return tlsConfig, nil +} +// RequireHTTPSClient creates a HTTPS client if the related flags are set +func RequireHTTPSClient(cmd *cobra.Command, _ []string) error { + tlsConfig, err := parseTLSConfig(cmd) + if err != nil || tlsConfig == nil { + return err + } dialClient = &http.Client{ Transport: apiutil.NewCallerIDRoundTripper( - &http.Transport{TLSClientConfig: tlsConfig}, pdControlCallerID), + &http.Transport{TLSClientConfig: tlsConfig}, PDControlCallerID), } - - SetNewPDClient(strings.Split(pdAddrs, ","), pd.WithTLSConfig(tlsConfig.Clone())) - return nil } @@ -186,7 +255,7 @@ func getEndpoints(cmd *cobra.Command) []string { return strings.Split(addrs, ",") } -func requestJSON(cmd *cobra.Command, method, prefix string, input map[string]interface{}) { +func requestJSON(cmd *cobra.Command, method, prefix string, input map[string]any) { data, err := json.Marshal(input) if err != nil { cmd.Println(err) @@ -225,17 +294,18 @@ func requestJSON(cmd *cobra.Command, method, prefix string, input map[string]int return nil }) if err != nil { - cmd.Printf("Failed! %s\n", err) + cmd.Printf("Failed! %s\n", strings.TrimSpace(err.Error())) return } - cmd.Printf("Success! %s\n", strings.Trim(string(msg), "\"")) + msg = bytes.Trim(bytes.TrimSpace(msg), "\"") + cmd.Printf("Success! %s\n", string(msg)) } -func postJSON(cmd *cobra.Command, prefix string, input map[string]interface{}) { +func postJSON(cmd *cobra.Command, prefix string, input map[string]any) { requestJSON(cmd, http.MethodPost, prefix, input) } -func patchJSON(cmd *cobra.Command, prefix string, input map[string]interface{}) { +func patchJSON(cmd *cobra.Command, prefix string, input map[string]any) { requestJSON(cmd, http.MethodPatch, prefix, input) } diff --git a/tools/pd-ctl/pdctl/command/global_test.go b/tools/pd-ctl/pdctl/command/global_test.go new file mode 100644 index 00000000000..86eb4366d04 --- /dev/null +++ b/tools/pd-ctl/pdctl/command/global_test.go @@ -0,0 +1,58 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package command + +import ( + "os" + "os/exec" + "testing" + + "github.com/spf13/cobra" + "github.com/stretchr/testify/require" +) + +func TestParseTLSConfig(t *testing.T) { + re := require.New(t) + + rootCmd := &cobra.Command{ + Use: "pd-ctl", + Short: "Placement Driver control", + SilenceErrors: true, + } + certPath := "../../tests/cert" + rootCmd.Flags().String("cacert", certPath+"/ca.pem", "path of file that contains list of trusted SSL CAs") + rootCmd.Flags().String("cert", certPath+"/client.pem", "path of file that contains X509 certificate in PEM format") + rootCmd.Flags().String("key", certPath+"/client-key.pem", "path of file that contains X509 key in PEM format") + + // generate certs + if err := os.Mkdir(certPath, 0755); err != nil { + t.Fatal(err) + } + certScript := "../../tests/cert_opt.sh" + if err := exec.Command(certScript, "generate", certPath).Run(); err != nil { + t.Fatal(err) + } + defer func() { + if err := exec.Command(certScript, "cleanup", certPath).Run(); err != nil { + t.Fatal(err) + } + if err := os.RemoveAll(certPath); err != nil { + t.Fatal(err) + } + }() + + tlsConfig, err := parseTLSConfig(rootCmd) + re.NoError(err) + re.NotNil(tlsConfig) +} diff --git a/tools/pd-ctl/pdctl/command/health_command.go b/tools/pd-ctl/pdctl/command/health_command.go index 1bae871285d..a10ee118397 100644 --- a/tools/pd-ctl/pdctl/command/health_command.go +++ b/tools/pd-ctl/pdctl/command/health_command.go @@ -15,30 +15,25 @@ package command import ( - "net/http" - "github.com/spf13/cobra" ) -var ( - healthPrefix = "pd/api/v1/health" -) - // NewHealthCommand return a health subcommand of rootCmd func NewHealthCommand() *cobra.Command { m := &cobra.Command{ - Use: "health", - Short: "show all node's health information of the pd cluster", - Run: showHealthCommandFunc, + Use: "health", + Short: "show all node's health information of the PD cluster", + PersistentPreRunE: requirePDClient, + Run: showHealthCommandFunc, } return m } -func showHealthCommandFunc(cmd *cobra.Command, args []string) { - r, err := doRequest(cmd, healthPrefix, http.MethodGet, http.Header{}) +func showHealthCommandFunc(cmd *cobra.Command, _ []string) { + health, err := PDCli.GetHealthStatus(cmd.Context()) if err != nil { cmd.Println(err) return } - cmd.Println(r) + jsonPrint(cmd, health) } diff --git a/tools/pd-ctl/pdctl/command/hot_command.go b/tools/pd-ctl/pdctl/command/hot_command.go index 09160d8f2b9..77c0ee4d7de 100644 --- a/tools/pd-ctl/pdctl/command/hot_command.go +++ b/tools/pd-ctl/pdctl/command/hot_command.go @@ -107,7 +107,7 @@ func NewHotStoreCommand() *cobra.Command { return cmd } -func showHotStoresCommandFunc(cmd *cobra.Command, args []string) { +func showHotStoresCommandFunc(cmd *cobra.Command, _ []string) { r, err := doRequest(cmd, hotStoresPrefix, http.MethodGet, http.Header{}) if err != nil { cmd.Printf("Failed to get store hotspot: %s\n", err) @@ -216,7 +216,7 @@ func parseOptionalArgs(prefix string, param string, args []string) (string, erro return prefix, nil } -func parseHotRegionsHistoryArgs(args []string) (map[string]interface{}, error) { +func parseHotRegionsHistoryArgs(args []string) (map[string]any, error) { startTime, err := strconv.ParseInt(args[0], 10, 64) if err != nil { return nil, errors.Errorf("start_time should be a number,but got %s", args[0]) @@ -225,7 +225,7 @@ func parseHotRegionsHistoryArgs(args []string) (map[string]interface{}, error) { if err != nil { return nil, errors.Errorf("end_time should be a number,but got %s", args[1]) } - input := map[string]interface{}{ + input := map[string]any{ "start_time": startTime, "end_time": endTime, } diff --git a/tools/pd-ctl/pdctl/command/keyspace_group_command.go b/tools/pd-ctl/pdctl/command/keyspace_group_command.go index 08d5c875a18..9c3a45f4744 100644 --- a/tools/pd-ctl/pdctl/command/keyspace_group_command.go +++ b/tools/pd-ctl/pdctl/command/keyspace_group_command.go @@ -192,7 +192,7 @@ func splitKeyspaceGroupCommandFunc(cmd *cobra.Command, args []string) { } keyspaces = append(keyspaces, uint32(id)) } - postJSON(cmd, fmt.Sprintf("%s/%s/split", keyspaceGroupsPrefix, args[0]), map[string]interface{}{ + postJSON(cmd, fmt.Sprintf("%s/%s/split", keyspaceGroupsPrefix, args[0]), map[string]any{ "new-id": uint32(newID), "keyspaces": keyspaces, }) @@ -223,7 +223,7 @@ func splitRangeKeyspaceGroupCommandFunc(cmd *cobra.Command, args []string) { cmd.Printf("Failed to parse the end keyspace ID: %s\n", err) return } - postJSON(cmd, fmt.Sprintf("%s/%s/split", keyspaceGroupsPrefix, args[0]), map[string]interface{}{ + postJSON(cmd, fmt.Sprintf("%s/%s/split", keyspaceGroupsPrefix, args[0]), map[string]any{ "new-id": uint32(newID), "start-keyspace-id": uint32(startKeyspaceID), "end-keyspace-id": uint32(endKeyspaceID), @@ -251,7 +251,7 @@ func finishSplitKeyspaceGroupCommandFunc(cmd *cobra.Command, args []string) { func mergeKeyspaceGroupCommandFunc(cmd *cobra.Command, args []string) { var ( targetGroupID uint32 - params = map[string]interface{}{} + params = map[string]any{} argNum = len(args) ) mergeAll, err := cmd.Flags().GetBool("all") @@ -334,7 +334,7 @@ func setNodesKeyspaceGroupCommandFunc(cmd *cobra.Command, args []string) { } nodes = append(nodes, arg) } - patchJSON(cmd, fmt.Sprintf("%s/%s", keyspaceGroupsPrefix, args[0]), map[string]interface{}{ + patchJSON(cmd, fmt.Sprintf("%s/%s", keyspaceGroupsPrefix, args[0]), map[string]any{ "Nodes": nodes, }) } @@ -369,7 +369,7 @@ func setPriorityKeyspaceGroupCommandFunc(cmd *cobra.Command, args []string) { return } - patchJSON(cmd, fmt.Sprintf("%s/%s/%s", keyspaceGroupsPrefix, args[0], node), map[string]interface{}{ + patchJSON(cmd, fmt.Sprintf("%s/%s/%s", keyspaceGroupsPrefix, args[0], node), map[string]any{ "Priority": priority, }) } diff --git a/tools/pd-ctl/pdctl/command/label_command.go b/tools/pd-ctl/pdctl/command/label_command.go index 60762383241..6d95465392f 100644 --- a/tools/pd-ctl/pdctl/command/label_command.go +++ b/tools/pd-ctl/pdctl/command/label_command.go @@ -21,9 +21,9 @@ import ( "github.com/spf13/cobra" "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/response" sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/statistics" - "github.com/tikv/pd/server/api" ) var ( @@ -53,7 +53,7 @@ func NewLabelListStoresCommand() *cobra.Command { return l } -func showLabelsCommandFunc(cmd *cobra.Command, args []string) { +func showLabelsCommandFunc(cmd *cobra.Command, _ []string) { r, err := doRequest(cmd, labelsPrefix, http.MethodGet, http.Header{}) if err != nil { cmd.Printf("Failed to get labels: %s\n", err) @@ -114,7 +114,7 @@ func getStores(cmd *cobra.Command, _ []string) ([]*core.StoreInfo, error) { if err != nil { return nil, err } - var storesInfo api.StoresInfo + var storesInfo response.StoresInfo if err := json.Unmarshal([]byte(body), &storesInfo); err != nil { return nil, err } @@ -125,13 +125,13 @@ func getStores(cmd *cobra.Command, _ []string) ([]*core.StoreInfo, error) { return stores, nil } -func getRegions(cmd *cobra.Command, _ []string) ([]api.RegionInfo, error) { +func getRegions(cmd *cobra.Command, _ []string) ([]response.RegionInfo, error) { prefix := regionsPrefix body, err := doRequest(cmd, prefix, http.MethodGet, http.Header{}) if err != nil { return nil, err } - var RegionsInfo api.RegionsInfo + var RegionsInfo response.RegionsInfo if err := json.Unmarshal([]byte(body), &RegionsInfo); err != nil { return nil, err } diff --git a/tools/pd-ctl/pdctl/command/member_command.go b/tools/pd-ctl/pdctl/command/member_command.go index 677f7570131..b939935cfb9 100644 --- a/tools/pd-ctl/pdctl/command/member_command.go +++ b/tools/pd-ctl/pdctl/command/member_command.go @@ -89,7 +89,7 @@ func NewLeaderMemberCommand() *cobra.Command { return d } -func showMemberCommandFunc(cmd *cobra.Command, args []string) { +func showMemberCommandFunc(cmd *cobra.Command, _ []string) { r, err := doRequest(cmd, membersPrefix, http.MethodGet, http.Header{}) if err != nil { cmd.Printf("Failed to get pd members: %s\n", err) @@ -126,7 +126,7 @@ func deleteMemberByIDCommandFunc(cmd *cobra.Command, args []string) { cmd.Println("Success!") } -func getLeaderMemberCommandFunc(cmd *cobra.Command, args []string) { +func getLeaderMemberCommandFunc(cmd *cobra.Command, _ []string) { r, err := doRequest(cmd, leaderMemberPrefix, http.MethodGet, http.Header{}) if err != nil { cmd.Printf("Failed to get the leader of pd members: %s\n", err) @@ -135,7 +135,7 @@ func getLeaderMemberCommandFunc(cmd *cobra.Command, args []string) { cmd.Println(r) } -func resignLeaderCommandFunc(cmd *cobra.Command, args []string) { +func resignLeaderCommandFunc(cmd *cobra.Command, _ []string) { prefix := leaderMemberPrefix + "/resign" _, err := doRequest(cmd, prefix, http.MethodPost, http.Header{}) if err != nil { @@ -170,7 +170,7 @@ func setLeaderPriorityFunc(cmd *cobra.Command, args []string) { cmd.Printf("failed to parse priority: %v\n", err) return } - data := map[string]interface{}{"leader-priority": priority} + data := map[string]any{"leader-priority": priority} reqData, _ := json.Marshal(data) _, err = doRequest(cmd, prefix, http.MethodPost, http.Header{"Content-Type": {"application/json"}}, WithBody(bytes.NewBuffer(reqData))) if err != nil { diff --git a/tools/pd-ctl/pdctl/command/min_resolved_ts.go b/tools/pd-ctl/pdctl/command/min_resolved_ts.go index dbf0c47b2de..904f880d82d 100644 --- a/tools/pd-ctl/pdctl/command/min_resolved_ts.go +++ b/tools/pd-ctl/pdctl/command/min_resolved_ts.go @@ -35,7 +35,7 @@ func NewMinResolvedTSCommand() *cobra.Command { } // ShowMinResolvedTS show min resolved ts -func ShowMinResolvedTS(cmd *cobra.Command, args []string) { +func ShowMinResolvedTS(cmd *cobra.Command, _ []string) { r, err := doRequest(cmd, minResolvedTSPrefix, http.MethodGet, http.Header{}) if err != nil { cmd.Printf("Failed to get min resolved ts: %s\n", err) diff --git a/tools/pd-ctl/pdctl/command/operator.go b/tools/pd-ctl/pdctl/command/operator.go index 1db237b55d0..4e7771580de 100644 --- a/tools/pd-ctl/pdctl/command/operator.go +++ b/tools/pd-ctl/pdctl/command/operator.go @@ -158,7 +158,7 @@ func transferLeaderCommandFunc(cmd *cobra.Command, args []string) { return } - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = cmd.Name() input["region_id"] = ids[0] input["to_store_id"] = ids[1] @@ -192,7 +192,7 @@ func transferRegionCommandFunc(cmd *cobra.Command, args []string) { return } - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = cmd.Name() input["region_id"] = ids[0] input["to_store_ids"] = ids[1:] @@ -224,7 +224,7 @@ func transferPeerCommandFunc(cmd *cobra.Command, args []string) { return } - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = cmd.Name() input["region_id"] = ids[0] input["from_store_id"] = ids[1] @@ -254,7 +254,7 @@ func addPeerCommandFunc(cmd *cobra.Command, args []string) { return } - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = cmd.Name() input["region_id"] = ids[0] input["store_id"] = ids[1] @@ -283,7 +283,7 @@ func addLearnerCommandFunc(cmd *cobra.Command, args []string) { return } - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = cmd.Name() input["region_id"] = ids[0] input["store_id"] = ids[1] @@ -312,7 +312,7 @@ func mergeRegionCommandFunc(cmd *cobra.Command, args []string) { return } - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = cmd.Name() input["source_region_id"] = ids[0] input["target_region_id"] = ids[1] @@ -341,7 +341,7 @@ func removePeerCommandFunc(cmd *cobra.Command, args []string) { return } - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = cmd.Name() input["region_id"] = ids[0] input["store_id"] = ids[1] @@ -375,13 +375,12 @@ func splitRegionCommandFunc(cmd *cobra.Command, args []string) { policy := cmd.Flags().Lookup("policy").Value.String() switch policy { case "scan", "approximate", "usekey": - break default: cmd.Println("Error: unknown policy") return } - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = cmd.Name() input["region_id"] = ids[0] input["policy"] = policy @@ -415,7 +414,7 @@ func scatterRegionCommandFunc(cmd *cobra.Command, args []string) { return } - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = cmd.Name() input["region_id"] = ids[0] postJSON(cmd, operatorsPrefix, input) diff --git a/tools/pd-ctl/pdctl/command/ping_command.go b/tools/pd-ctl/pdctl/command/ping_command.go index 38e99e0516d..7efa46180d1 100644 --- a/tools/pd-ctl/pdctl/command/ping_command.go +++ b/tools/pd-ctl/pdctl/command/ping_command.go @@ -21,6 +21,8 @@ import ( "github.com/spf13/cobra" ) +const pingPrefix = "pd/api/v1/ping" + // NewPingCommand return a ping subcommand of rootCmd func NewPingCommand() *cobra.Command { m := &cobra.Command{ @@ -31,7 +33,7 @@ func NewPingCommand() *cobra.Command { return m } -func showPingCommandFunc(cmd *cobra.Command, args []string) { +func showPingCommandFunc(cmd *cobra.Command, _ []string) { start := time.Now() _, err := doRequest(cmd, pingPrefix, http.MethodGet, http.Header{}) if err != nil { diff --git a/tools/pd-ctl/pdctl/command/plugin_command.go b/tools/pd-ctl/pdctl/command/plugin_command.go index a713c8fd063..9716d1d7755 100644 --- a/tools/pd-ctl/pdctl/command/plugin_command.go +++ b/tools/pd-ctl/pdctl/command/plugin_command.go @@ -71,7 +71,7 @@ func sendPluginCommand(cmd *cobra.Command, action string, args []string) { cmd.Println(cmd.Usage()) return } - data := map[string]interface{}{ + data := map[string]any{ "plugin-path": args[0], } reqData, err := json.Marshal(data) diff --git a/tools/pd-ctl/pdctl/command/region_command.go b/tools/pd-ctl/pdctl/command/region_command.go index 33191bbe12b..3536b01a606 100644 --- a/tools/pd-ctl/pdctl/command/region_command.go +++ b/tools/pd-ctl/pdctl/command/region_command.go @@ -37,6 +37,8 @@ var ( regionsCheckPrefix = "pd/api/v1/regions/check" regionsWriteFlowPrefix = "pd/api/v1/regions/writeflow" regionsReadFlowPrefix = "pd/api/v1/regions/readflow" + regionsWriteQueryPrefix = "pd/api/v1/regions/writequery" + regionsReadQueryPrefix = "pd/api/v1/regions/readquery" regionsConfVerPrefix = "pd/api/v1/regions/confver" regionsVersionPrefix = "pd/api/v1/regions/version" regionsSizePrefix = "pd/api/v1/regions/size" @@ -66,17 +68,17 @@ func NewRegionCommand() *cobra.Command { r.AddCommand(NewRangesWithRangeHolesCommand()) topRead := &cobra.Command{ - Use: `topread [--jq=""]`, - Short: "show regions with top read flow", - Run: showRegionsTopCommand(regionsReadFlowPrefix), + Use: `topread [byte|query] [--jq=""]`, + Short: "show regions with top read flow or query", + Run: showTopReadRegions, } topRead.Flags().String("jq", "", "jq query") r.AddCommand(topRead) topWrite := &cobra.Command{ - Use: `topwrite [--jq=""]`, - Short: "show regions with top write flow", - Run: showRegionsTopCommand(regionsWriteFlowPrefix), + Use: `topwrite [byte|query] [--jq=""]`, + Short: "show regions with top write flow or query", + Run: showTopWriteRegions, } topWrite.Flags().String("jq", "", "jq query") r.AddCommand(topWrite) @@ -156,7 +158,7 @@ func showRegionCommandFunc(cmd *cobra.Command, args []string) { cmd.Println(r) } -func scanRegionCommandFunc(cmd *cobra.Command, args []string) { +func scanRegionCommandFunc(cmd *cobra.Command, _ []string) { const limit = 1024 var key []byte for { @@ -212,6 +214,9 @@ func showRegionsTopCommand(prefix string) run { return } prefix += "?limit=" + args[0] + } else if len(args) > 1 { + cmd.Println(cmd.UsageString()) + return } r, err := doRequest(cmd, prefix, http.MethodGet, http.Header{}) if err != nil { @@ -226,6 +231,40 @@ func showRegionsTopCommand(prefix string) run { } } +func showTopReadRegions(cmd *cobra.Command, args []string) { + // default to show top read flow + if len(args) == 0 { + showRegionsTopCommand(regionsReadFlowPrefix)(cmd, args) + return + } + // default to show top read flow with limit + switch args[0] { + case "query": + showRegionsTopCommand(regionsReadQueryPrefix)(cmd, args[1:]) + case "byte": + showRegionsTopCommand(regionsReadFlowPrefix)(cmd, args[1:]) + default: + showRegionsTopCommand(regionsReadFlowPrefix)(cmd, args) + } +} + +func showTopWriteRegions(cmd *cobra.Command, args []string) { + // default to show top write flow + if len(args) == 0 { + showRegionsTopCommand(regionsWriteFlowPrefix)(cmd, args) + return + } + // default to show top write flow with limit + switch args[0] { + case "query": + showRegionsTopCommand(regionsWriteQueryPrefix)(cmd, args[1:]) + case "byte": + showRegionsTopCommand(regionsWriteFlowPrefix)(cmd, args[1:]) + default: + showRegionsTopCommand(regionsWriteFlowPrefix)(cmd, args) + } +} + // NewRegionWithKeyCommand return a region with key subcommand of regionCmd func NewRegionWithKeyCommand() *cobra.Command { r := &cobra.Command{ @@ -447,6 +486,7 @@ func NewRegionWithStoreCommand() *cobra.Command { Short: "show the regions of a specific store", Run: showRegionWithStoreCommandFunc, } + r.Flags().String("type", "all", "the type of the regions, could be 'all', 'leader', 'learner' or 'pending'") return r } @@ -457,6 +497,8 @@ func showRegionWithStoreCommandFunc(cmd *cobra.Command, args []string) { } storeID := args[0] prefix := regionsStorePrefix + "/" + storeID + flagType := cmd.Flag("type") + prefix += "?type=" + flagType.Value.String() r, err := doRequest(cmd, prefix, http.MethodGet, http.Header{}) if err != nil { cmd.Printf("Failed to get regions with the given storeID: %s\n", err) @@ -533,7 +575,7 @@ func NewRangesWithRangeHolesCommand() *cobra.Command { return r } -func showRangesWithRangeHolesCommandFunc(cmd *cobra.Command, args []string) { +func showRangesWithRangeHolesCommandFunc(cmd *cobra.Command, _ []string) { r, err := doRequest(cmd, regionsRangeHolesPrefix, http.MethodGet, http.Header{}) if err != nil { cmd.Printf("Failed to get range holes: %s\n", err) diff --git a/tools/pd-ctl/pdctl/command/resource_manager_command.go b/tools/pd-ctl/pdctl/command/resource_manager_command.go index 8bc5ea85977..fe7913b0de6 100644 --- a/tools/pd-ctl/pdctl/command/resource_manager_command.go +++ b/tools/pd-ctl/pdctl/command/resource_manager_command.go @@ -69,12 +69,12 @@ func newConfigControllerSetCommand() *cobra.Command { return } - var val interface{} + var val any val, err := strconv.ParseFloat(args[1], 64) if err != nil { val = args[1] } - data := map[string]interface{}{args[0]: val} + data := map[string]any{args[0]: val} jsonData, err := json.Marshal(data) if err != nil { cmd.Println(err) diff --git a/tools/pd-ctl/pdctl/command/scheduler.go b/tools/pd-ctl/pdctl/command/scheduler.go index 695576edf84..c1db24cc176 100644 --- a/tools/pd-ctl/pdctl/command/scheduler.go +++ b/tools/pd-ctl/pdctl/command/scheduler.go @@ -72,11 +72,18 @@ func pauseSchedulerCommandFunc(cmd *cobra.Command, args []string) { cmd.Usage() return } - path := schedulersPrefix + "/" + args[0] - input := map[string]interface{}{"delay": delay} + path := schedulersPrefix + "/" + getEscapedSchedulerName(args[0]) + input := map[string]any{"delay": delay} postJSON(cmd, path, input) } +// Since certain scheduler's name is defined by caller such as scatter-range, +// it's possible the name contains special characters, like "#", "&" and so on. +// So we need to escape the scheduler name here before attaching it to the URL. +func getEscapedSchedulerName(schedulerName string) string { + return url.PathEscape(schedulerName) +} + // NewResumeSchedulerCommand returns a command to resume a scheduler. func NewResumeSchedulerCommand() *cobra.Command { c := &cobra.Command{ @@ -92,8 +99,8 @@ func resumeSchedulerCommandFunc(cmd *cobra.Command, args []string) { cmd.Usage() return } - path := schedulersPrefix + "/" + args[0] - input := map[string]interface{}{"delay": 0} + path := schedulersPrefix + "/" + getEscapedSchedulerName(args[0]) + input := map[string]any{"delay": 0} postJSON(cmd, path, input) } @@ -217,7 +224,7 @@ func addSchedulerForStoreCommandFunc(cmd *cobra.Command, args []string) { return } - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = cmd.Name() input["store_id"] = storeID postJSON(cmd, schedulersPrefix, input) @@ -268,7 +275,7 @@ func addSchedulerForShuffleHotRegionCommandFunc(cmd *cobra.Command, args []strin } limit = l } - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = cmd.Name() input["limit"] = limit postJSON(cmd, schedulersPrefix, input) @@ -384,8 +391,8 @@ func NewSlowTrendEvictLeaderSchedulerCommand() *cobra.Command { return c } -func addSchedulerForSplitBucketCommandFunc(cmd *cobra.Command, args []string) { - input := make(map[string]interface{}) +func addSchedulerForSplitBucketCommandFunc(cmd *cobra.Command, _ []string) { + input := make(map[string]any) input["name"] = cmd.Name() postJSON(cmd, schedulersPrefix, input) } @@ -395,7 +402,7 @@ func addSchedulerForGrantHotRegionCommandFunc(cmd *cobra.Command, args []string) cmd.Println(cmd.UsageString()) return } - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = cmd.Name() input["store-leader-id"] = args[0] input["store-id"] = args[1] @@ -407,7 +414,7 @@ func addSchedulerCommandFunc(cmd *cobra.Command, args []string) { cmd.Println(cmd.UsageString()) return } - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = cmd.Name() postJSON(cmd, schedulersPrefix, input) } @@ -439,7 +446,7 @@ func addSchedulerForScatterRangeCommandFunc(cmd *cobra.Command, args []string) { return } - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = cmd.Name() input["start_key"] = url.QueryEscape(startKey) input["end_key"] = url.QueryEscape(endKey) @@ -475,7 +482,7 @@ func removeSchedulerCommandFunc(cmd *cobra.Command, args []string) { case strings.HasPrefix(args[0], grantLeaderSchedulerName) && args[0] != grantLeaderSchedulerName: redirectRemoveSchedulerToDeleteConfig(cmd, grantLeaderSchedulerName, args) default: - path := schedulersPrefix + "/" + args[0] + path := schedulersPrefix + "/" + getEscapedSchedulerName(args[0]) _, err := doRequest(cmd, path, http.MethodDelete, http.Header{}) if err != nil { cmd.Println(err) @@ -635,13 +642,25 @@ func addStoreToSchedulerConfig(cmd *cobra.Command, schedulerName string, args [] cmd.Println(err) return } - input := make(map[string]interface{}) + input := make(map[string]any) input["name"] = schedulerName input["store_id"] = storeID postJSON(cmd, path.Join(schedulerConfigPrefix, schedulerName, "config"), input) } +var hiddenHotConfig = []string{ + "max-zombie-rounds", + "max-peer-number", + "byte-rate-rank-step-ratio", + "key-rate-rank-step-ratio", + "query-rate-rank-step-ratio", + "count-rank-step-ratio", + "great-dec-ratio", + "minor-dec-ratio", + "enable-for-tiflash", +} + func listSchedulerConfigCommandFunc(cmd *cobra.Command, args []string) { if len(args) != 0 { cmd.Println(cmd.UsageString()) @@ -660,6 +679,23 @@ func listSchedulerConfigCommandFunc(cmd *cobra.Command, args []string) { cmd.Println(err) return } + if p == "balance-hot-region-scheduler" { + schedulerConfig := make(map[string]any) + err := json.Unmarshal([]byte(r), &schedulerConfig) + if err != nil { + cmd.Println(err) + return + } + for _, config := range hiddenHotConfig { + delete(schedulerConfig, config) + } + b, err := json.MarshalIndent(schedulerConfig, "", " ") + if err != nil { + cmd.Println(err) + return + } + r = string(b) + } cmd.Println(r) } @@ -697,7 +733,7 @@ func setGrantHotRegionCommandFunc(cmd *cobra.Command, schedulerName string, args cmd.Println(cmd.UsageString()) return } - input := make(map[string]interface{}) + input := make(map[string]any) input["store-leader-id"] = args[0] input["store-id"] = args[1] postJSON(cmd, path.Join(schedulerConfigPrefix, schedulerName, "config"), input) @@ -708,8 +744,8 @@ func postSchedulerConfigCommandFunc(cmd *cobra.Command, schedulerName string, ar cmd.Println(cmd.UsageString()) return } - var val interface{} - input := make(map[string]interface{}) + var val any + input := make(map[string]any) key, value := args[0], args[1] val, err := strconv.ParseFloat(value, 64) if err != nil { diff --git a/tools/pd-ctl/pdctl/command/store_command.go b/tools/pd-ctl/pdctl/command/store_command.go index 357e527c2f4..bc024d5a2e6 100644 --- a/tools/pd-ctl/pdctl/command/store_command.go +++ b/tools/pd-ctl/pdctl/command/store_command.go @@ -25,7 +25,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/spf13/cobra" - "github.com/tikv/pd/server/api" + "github.com/tikv/pd/pkg/response" "golang.org/x/text/cases" "golang.org/x/text/language" ) @@ -275,12 +275,12 @@ func storeLimitSceneCommandFunc(cmd *cobra.Command, args []string) { if len(args) == 3 { prefix = path.Join(prefix, fmt.Sprintf("?type=%s", args[2])) } - postJSON(cmd, prefix, map[string]interface{}{scene: rate}) + postJSON(cmd, prefix, map[string]any{scene: rate}) } } func convertToStoreInfo(content string) string { - store := &api.StoreInfo{} + store := &response.StoreInfo{} err := json.Unmarshal([]byte(content), store) if err != nil { return content @@ -296,7 +296,7 @@ func convertToStoreInfo(content string) string { } func convertToStoresInfo(content string) string { - stores := &api.StoresInfo{} + stores := &response.StoresInfo{} err := json.Unmarshal([]byte(content), stores) if err != nil { return content @@ -531,7 +531,7 @@ func labelStoreCommandFunc(cmd *cobra.Command, args []string) { return } - labels := make(map[string]interface{}) + labels := make(map[string]any) // useEqual is used to compatible the old way // TODO: remove old way useEqual := true @@ -584,7 +584,7 @@ func setStoreWeightCommandFunc(cmd *cobra.Command, args []string) { return } prefix := fmt.Sprintf(path.Join(storePrefix, "weight"), args[0]) - postJSON(cmd, prefix, map[string]interface{}{ + postJSON(cmd, prefix, map[string]any{ "leader": leader, "region": region, }) @@ -620,7 +620,7 @@ func storeLimitCommandFunc(cmd *cobra.Command, args []string) { } else { prefix = fmt.Sprintf(path.Join(storePrefix, "limit"), args[0]) } - postInput := map[string]interface{}{ + postInput := map[string]any{ "rate": rate, } if argsCount == 3 { @@ -631,7 +631,7 @@ func storeLimitCommandFunc(cmd *cobra.Command, args []string) { if args[0] != "all" { cmd.Println("Labels are an option of set all stores limit.") } else { - postInput := map[string]interface{}{} + postInput := map[string]any{} prefix := storesLimitPrefix ratePos := argsCount - 1 if argsCount%2 == 1 { @@ -648,7 +648,7 @@ func storeLimitCommandFunc(cmd *cobra.Command, args []string) { return } postInput["rate"] = rate - labels := make(map[string]interface{}) + labels := make(map[string]any) for i := 1; i < ratePos; i += 2 { labels[args[i]] = args[i+1] } @@ -675,7 +675,7 @@ func storeCheckCommandFunc(cmd *cobra.Command, args []string) { cmd.Println(r) } -func showStoresCommandFunc(cmd *cobra.Command, args []string) { +func showStoresCommandFunc(cmd *cobra.Command, _ []string) { prefix := storesPrefix r, err := doRequest(cmd, prefix, http.MethodGet, http.Header{}) if err != nil { @@ -706,7 +706,7 @@ func showAllStoresLimitCommandFunc(cmd *cobra.Command, args []string) { cmd.Println(r) } -func removeTombStoneCommandFunc(cmd *cobra.Command, args []string) { +func removeTombStoneCommandFunc(cmd *cobra.Command, _ []string) { prefix := path.Join(storesPrefix, "remove-tombstone") _, err := doRequest(cmd, prefix, http.MethodDelete, http.Header{}) if err != nil { @@ -728,7 +728,7 @@ func setAllLimitCommandFunc(cmd *cobra.Command, args []string) { return } prefix := storesLimitPrefix - input := map[string]interface{}{ + input := map[string]any{ "rate": rate, } if len(args) == 2 { diff --git a/tools/pd-ctl/pdctl/command/unsafe_command.go b/tools/pd-ctl/pdctl/command/unsafe_command.go index 8494eca4b5d..04d272385e7 100644 --- a/tools/pd-ctl/pdctl/command/unsafe_command.go +++ b/tools/pd-ctl/pdctl/command/unsafe_command.go @@ -61,7 +61,7 @@ func NewRemoveFailedStoresShowCommand() *cobra.Command { func removeFailedStoresCommandFunc(cmd *cobra.Command, args []string) { prefix := fmt.Sprintf("%s/remove-failed-stores", unsafePrefix) - postInput := make(map[string]interface{}, 3) + postInput := make(map[string]any, 3) autoDetect, err := cmd.Flags().GetBool("auto-detect") if err != nil { @@ -106,7 +106,7 @@ func removeFailedStoresCommandFunc(cmd *cobra.Command, args []string) { postJSON(cmd, prefix, postInput) } -func removeFailedStoresShowCommandFunc(cmd *cobra.Command, args []string) { +func removeFailedStoresShowCommandFunc(cmd *cobra.Command, _ []string) { var resp string var err error prefix := fmt.Sprintf("%s/remove-failed-stores/show", unsafePrefix) diff --git a/tools/pd-ctl/pdctl/ctl.go b/tools/pd-ctl/pdctl/ctl.go index 773f28fb8bd..fbacd65dc53 100644 --- a/tools/pd-ctl/pdctl/ctl.go +++ b/tools/pd-ctl/pdctl/ctl.go @@ -30,20 +30,25 @@ import ( func init() { cobra.EnablePrefixMatching = true + cobra.EnableTraverseRunHooks = true } // GetRootCmd is exposed for integration tests. But it can be embedded into another suite, too. func GetRootCmd() *cobra.Command { rootCmd := &cobra.Command{ - Use: "pd-ctl", - Short: "Placement Driver control", + Use: "pd-ctl", + Short: "Placement Driver control", + PersistentPreRunE: command.RequireHTTPSClient, + SilenceErrors: true, } - rootCmd.PersistentFlags().StringP("pd", "u", "http://127.0.0.1:2379", "address of pd") + rootCmd.PersistentFlags().StringP("pd", "u", "http://127.0.0.1:2379", "address of PD") rootCmd.PersistentFlags().String("cacert", "", "path of file that contains list of trusted SSL CAs") rootCmd.PersistentFlags().String("cert", "", "path of file that contains X509 certificate in PEM format") rootCmd.PersistentFlags().String("key", "", "path of file that contains X509 key in PEM format") + rootCmd.Flags().ParseErrorsWhitelist.UnknownFlags = true + rootCmd.AddCommand( command.NewConfigCommand(), command.NewRegionCommand(), @@ -70,39 +75,6 @@ func GetRootCmd() *cobra.Command { command.NewResourceManagerCommand(), ) - rootCmd.Flags().ParseErrorsWhitelist.UnknownFlags = true - rootCmd.SilenceErrors = true - - rootCmd.PersistentPreRunE = func(cmd *cobra.Command, args []string) error { - addrs, err := cmd.Flags().GetString("pd") - if err != nil { - return err - } - - // TODO: refine code after replace dialClient with PDCli - CAPath, err := cmd.Flags().GetString("cacert") - if err == nil && len(CAPath) != 0 { - certPath, err := cmd.Flags().GetString("cert") - if err != nil { - return err - } - - keyPath, err := cmd.Flags().GetString("key") - if err != nil { - return err - } - - if err := command.InitHTTPSClient(addrs, CAPath, certPath, keyPath); err != nil { - rootCmd.Println(err) - return err - } - } else { - command.SetNewPDClient(strings.Split(addrs, ",")) - } - - return nil - } - return rootCmd } @@ -115,7 +87,7 @@ func MainStart(args []string) { // TODO: deprecated rootCmd.Flags().BoolP("detach", "d", true, "Run pdctl without readline.") - rootCmd.Run = func(cmd *cobra.Command, args []string) { + rootCmd.Run = func(cmd *cobra.Command, _ []string) { if v, err := cmd.Flags().GetBool("version"); err == nil && v { versioninfo.Print() return @@ -129,6 +101,7 @@ func MainStart(args []string) { rootCmd.SetArgs(args) rootCmd.ParseFlags(args) rootCmd.SetOut(os.Stdout) + rootCmd.SetErr(os.Stderr) if err := rootCmd.Execute(); err != nil { rootCmd.Println(err) diff --git a/tools/pd-ctl/tests/config/config_test.go b/tools/pd-ctl/tests/config/config_test.go index 4a585851227..c6430789cfc 100644 --- a/tools/pd-ctl/tests/config/config_test.go +++ b/tools/pd-ctl/tests/config/config_test.go @@ -49,8 +49,8 @@ var testDialClient = &http.Client{ type testCase struct { name string - value interface{} - read func(scheduleConfig *sc.ScheduleConfig) interface{} + value any + read func(scheduleConfig *sc.ScheduleConfig) any } func (t *testCase) judge(re *require.Assertions, scheduleConfigs ...*sc.ScheduleConfig) { @@ -70,7 +70,8 @@ func TestConfigTestSuite(t *testing.T) { suite.Run(t, new(configTestSuite)) } -func (suite *configTestSuite) SetupSuite() { +func (suite *configTestSuite) SetupTest() { + // use a new environment to avoid affecting other tests suite.env = pdTests.NewSchedulingTestEnvironment(suite.T()) } @@ -96,6 +97,7 @@ func (suite *configTestSuite) TearDownTest() { re.NoError(err) } suite.env.RunFuncInTwoModes(cleanFunc) + suite.env.Cleanup() } func (suite *configTestSuite) TestConfig() { @@ -282,20 +284,20 @@ func (suite *configTestSuite) checkConfig(cluster *pdTests.TestCluster) { // test config read and write testCases := []testCase{ - {"leader-schedule-limit", uint64(64), func(scheduleConfig *sc.ScheduleConfig) interface{} { + {"leader-schedule-limit", uint64(64), func(scheduleConfig *sc.ScheduleConfig) any { return scheduleConfig.LeaderScheduleLimit - }}, {"hot-region-schedule-limit", uint64(64), func(scheduleConfig *sc.ScheduleConfig) interface{} { + }}, {"hot-region-schedule-limit", uint64(64), func(scheduleConfig *sc.ScheduleConfig) any { return scheduleConfig.HotRegionScheduleLimit - }}, {"hot-region-cache-hits-threshold", uint64(5), func(scheduleConfig *sc.ScheduleConfig) interface{} { + }}, {"hot-region-cache-hits-threshold", uint64(5), func(scheduleConfig *sc.ScheduleConfig) any { return scheduleConfig.HotRegionCacheHitsThreshold - }}, {"enable-remove-down-replica", false, func(scheduleConfig *sc.ScheduleConfig) interface{} { + }}, {"enable-remove-down-replica", false, func(scheduleConfig *sc.ScheduleConfig) any { return scheduleConfig.EnableRemoveDownReplica }}, - {"enable-debug-metrics", true, func(scheduleConfig *sc.ScheduleConfig) interface{} { + {"enable-debug-metrics", true, func(scheduleConfig *sc.ScheduleConfig) any { return scheduleConfig.EnableDebugMetrics }}, // set again - {"enable-debug-metrics", true, func(scheduleConfig *sc.ScheduleConfig) interface{} { + {"enable-debug-metrics", true, func(scheduleConfig *sc.ScheduleConfig) any { return scheduleConfig.EnableDebugMetrics }}, } @@ -566,7 +568,7 @@ func (suite *configTestSuite) checkPlacementRules(cluster *pdTests.TestCluster) re.Contains(string(output), "Success!") // test show - suite.checkShowRuleKey(re, pdAddr, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}}) + checkShowRuleKey(re, pdAddr, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}}) f, _ := os.CreateTemp("/tmp", "pd_tests") fname := f.Name() @@ -574,7 +576,7 @@ func (suite *configTestSuite) checkPlacementRules(cluster *pdTests.TestCluster) defer os.RemoveAll(fname) // test load - rules := suite.checkLoadRule(re, pdAddr, fname, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}}) + rules := checkLoadRule(re, pdAddr, fname, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}}) // test save rules = append(rules, placement.Rule{ @@ -594,11 +596,11 @@ func (suite *configTestSuite) checkPlacementRules(cluster *pdTests.TestCluster) re.NoError(err) // test show group - suite.checkShowRuleKey(re, pdAddr, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}, {placement.DefaultGroupID, "test1"}}, "--group=pd") + checkShowRuleKey(re, pdAddr, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}, {placement.DefaultGroupID, "test1"}}, "--group=pd") // test rule region detail pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b")) - suite.checkShowRuleKey(re, pdAddr, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}}, "--region=1", "--detail") + checkShowRuleKey(re, pdAddr, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}}, "--region=1", "--detail") // test delete // need clear up args, so create new a cobra.Command. Otherwise gourp still exists. @@ -607,7 +609,7 @@ func (suite *configTestSuite) checkPlacementRules(cluster *pdTests.TestCluster) os.WriteFile(fname, b, 0600) _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "save", "--in="+fname) re.NoError(err) - suite.checkShowRuleKey(re, pdAddr, [][2]string{{placement.DefaultGroupID, "test1"}}, "--group=pd") + checkShowRuleKey(re, pdAddr, [][2]string{{placement.DefaultGroupID, "test1"}}, "--group=pd") } func (suite *configTestSuite) TestPlacementRuleGroups() { @@ -722,7 +724,7 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *pdTests.TestClus defer os.RemoveAll(fname) // test load - suite.checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ + checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ {ID: placement.DefaultGroupID, Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, }) @@ -734,7 +736,7 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *pdTests.TestClus re.NoError(os.WriteFile(fname, b, 0600)) _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "set", "--in="+fname) re.NoError(err) - suite.checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ + checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ {ID: placement.DefaultGroupID, Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, }) @@ -743,7 +745,7 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *pdTests.TestClus _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "delete", placement.DefaultGroupID) re.NoError(err) - suite.checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ + checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, }) @@ -755,7 +757,7 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *pdTests.TestClus re.NoError(os.WriteFile(fname, b, 0600)) _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "set", "--in="+fname) re.NoError(err) - suite.checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ + checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, }) @@ -766,7 +768,7 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *pdTests.TestClus bundles := []placement.GroupBundle{ {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, } - suite.checkLoadRuleBundle(re, pdAddr, fname, bundles) + checkLoadRuleBundle(re, pdAddr, fname, bundles) // test save bundle.Rules = []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}} @@ -776,7 +778,7 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *pdTests.TestClus re.NoError(os.WriteFile(fname, b, 0600)) _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "save", "--in="+fname) re.NoError(err) - suite.checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ + checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, }) @@ -789,7 +791,7 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *pdTests.TestClus _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "save", "--in="+fname, "--partial") re.NoError(err) - suite.checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ + checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, }) @@ -808,12 +810,12 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *pdTests.TestClus _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "delete", "--regexp", ".*f") re.NoError(err) - suite.checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ + checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ {ID: "pd", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pd", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, }) } -func (suite *configTestSuite) checkLoadRuleBundle(re *require.Assertions, pdAddr string, fname string, expectValues []placement.GroupBundle) { +func checkLoadRuleBundle(re *require.Assertions, pdAddr string, fname string, expectValues []placement.GroupBundle) { var bundles []placement.GroupBundle cmd := ctl.GetRootCmd() testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server @@ -826,7 +828,7 @@ func (suite *configTestSuite) checkLoadRuleBundle(re *require.Assertions, pdAddr assertBundles(re, bundles, expectValues) } -func (suite *configTestSuite) checkLoadRule(re *require.Assertions, pdAddr string, fname string, expectValues [][2]string) []placement.Rule { +func checkLoadRule(re *require.Assertions, pdAddr string, fname string, expectValues [][2]string) []placement.Rule { var rules []placement.Rule cmd := ctl.GetRootCmd() testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server @@ -842,7 +844,7 @@ func (suite *configTestSuite) checkLoadRule(re *require.Assertions, pdAddr strin return rules } -func (suite *configTestSuite) checkShowRuleKey(re *require.Assertions, pdAddr string, expectValues [][2]string, opts ...string) { +func checkShowRuleKey(re *require.Assertions, pdAddr string, expectValues [][2]string, opts ...string) { var ( rules []placement.Rule fit placement.RegionFit @@ -877,6 +879,7 @@ func TestReplicationMode(t *testing.T) { defer cancel() cluster, err := pdTests.NewTestCluster(ctx, 1) re.NoError(err) + defer cluster.Destroy() err = cluster.RunInitialServers() re.NoError(err) cluster.WaitLeader() @@ -1141,6 +1144,34 @@ func (suite *configTestSuite) checkMicroServiceConfig(cluster *pdTests.TestClust re.False(svr.GetMicroServiceConfig().EnableSchedulingFallback) } +func (suite *configTestSuite) TestRegionRules() { + suite.env.RunTestInTwoModes(suite.checkRegionRules) +} + +func (suite *configTestSuite) checkRegionRules(cluster *pdTests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + cmd := ctl.GetRootCmd() + + storeID, regionID := uint64(1), uint64(2) + store := &metapb.Store{ + Id: storeID, + State: metapb.StoreState_Up, + } + pdTests.MustPutStore(re, cluster, store) + pdTests.MustPutRegion(re, cluster, regionID, storeID, []byte{}, []byte{}) + + args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--region=" + strconv.Itoa(int(regionID)), "--detail"} + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + fit := &placement.RegionFit{} + re.NoError(json.Unmarshal(output, fit)) + re.Len(fit.RuleFits, 1) + re.Equal(placement.DefaultGroupID, fit.RuleFits[0].Rule.GroupID) + re.Equal(placement.DefaultRuleID, fit.RuleFits[0].Rule.ID) +} + func assertBundles(re *require.Assertions, a, b []placement.GroupBundle) { re.Len(b, len(a)) for i := 0; i < len(a); i++ { diff --git a/tools/pd-ctl/tests/global_test.go b/tools/pd-ctl/tests/global_test.go index 14b7aafdccd..766e357088e 100644 --- a/tools/pd-ctl/tests/global_test.go +++ b/tools/pd-ctl/tests/global_test.go @@ -16,33 +16,39 @@ package tests import ( "context" + "encoding/json" "fmt" "net/http" "testing" - "github.com/pingcap/log" + "github.com/pingcap/kvproto/pkg/metapb" "github.com/stretchr/testify/require" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/pkg/utils/assertutil" "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/server" cmd "github.com/tikv/pd/tools/pd-ctl/pdctl" - "go.uber.org/zap" + "github.com/tikv/pd/tools/pd-ctl/pdctl/command" ) -const pdControlCallerID = "pd-ctl" - func TestSendAndGetComponent(t *testing.T) { re := require.New(t) - handler := func(ctx context.Context, s *server.Server) (http.Handler, apiutil.APIServiceGroup, error) { + handler := func(context.Context, *server.Server) (http.Handler, apiutil.APIServiceGroup, error) { mux := http.NewServeMux() - mux.HandleFunc("/pd/api/v1/health", func(w http.ResponseWriter, r *http.Request) { + // check pd http sdk api + mux.HandleFunc("/pd/api/v1/cluster", func(w http.ResponseWriter, r *http.Request) { + callerID := apiutil.GetCallerIDOnHTTP(r) + re.Equal(command.PDControlCallerID, callerID) + cluster := &metapb.Cluster{Id: 1} + clusterBytes, err := json.Marshal(cluster) + re.NoError(err) + w.Write(clusterBytes) + }) + // check http client api + // TODO: remove this comment after replacing dialClient with the PD HTTP client completely. + mux.HandleFunc("/pd/api/v1/stores", func(w http.ResponseWriter, r *http.Request) { callerID := apiutil.GetCallerIDOnHTTP(r) - for k := range r.Header { - log.Info("header", zap.String("key", k)) - } - log.Info("caller id", zap.String("caller-id", callerID)) - re.Equal(pdControlCallerID, callerID) + re.Equal(command.PDControlCallerID, callerID) fmt.Fprint(w, callerID) }) info := apiutil.APIServiceGroup{ @@ -64,8 +70,15 @@ func TestSendAndGetComponent(t *testing.T) { }() cmd := cmd.GetRootCmd() - args := []string{"-u", pdAddr, "health"} + args := []string{"-u", pdAddr, "cluster"} output, err := ExecuteCommand(cmd, args...) re.NoError(err) - re.Equal(fmt.Sprintf("%s\n", pdControlCallerID), string(output)) + re.Equal(fmt.Sprintf("%s\n", `{ + "id": 1 +}`), string(output)) + + args = []string{"-u", pdAddr, "store"} + output, err = ExecuteCommand(cmd, args...) + re.NoError(err) + re.Equal(fmt.Sprintf("%s\n", command.PDControlCallerID), string(output)) } diff --git a/tools/pd-ctl/tests/health/health_test.go b/tools/pd-ctl/tests/health/health_test.go index 51bc47b5b17..f1d3c7cfbf1 100644 --- a/tools/pd-ctl/tests/health/health_test.go +++ b/tools/pd-ctl/tests/health/health_test.go @@ -17,14 +17,21 @@ package health_test import ( "context" "encoding/json" + "os" + "os/exec" + "path/filepath" + "strings" "testing" "github.com/stretchr/testify/require" + "github.com/tikv/pd/pkg/utils/grpcutil" "github.com/tikv/pd/server/api" "github.com/tikv/pd/server/cluster" + "github.com/tikv/pd/server/config" pdTests "github.com/tikv/pd/tests" ctl "github.com/tikv/pd/tools/pd-ctl/pdctl" "github.com/tikv/pd/tools/pd-ctl/tests" + "go.etcd.io/etcd/pkg/transport" ) func TestHealth(t *testing.T) { @@ -33,6 +40,7 @@ func TestHealth(t *testing.T) { defer cancel() tc, err := pdTests.NewTestCluster(ctx, 3) re.NoError(err) + defer tc.Destroy() err = tc.RunInitialServers() re.NoError(err) tc.WaitLeader() @@ -40,7 +48,6 @@ func TestHealth(t *testing.T) { re.NoError(leaderServer.BootstrapCluster()) pdAddr := tc.GetConfig().GetClientURL() cmd := ctl.GetRootCmd() - defer tc.Destroy() client := tc.GetEtcdClient() members, err := cluster.GetMembers(client) @@ -68,3 +75,80 @@ func TestHealth(t *testing.T) { re.NoError(json.Unmarshal(output, &h)) re.Equal(healths, h) } + +func TestHealthTLS(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + certPath := "../cert" + certScript := "../cert_opt.sh" + // generate certs + if err := os.Mkdir(certPath, 0755); err != nil { + t.Fatal(err) + } + if err := exec.Command(certScript, "generate", certPath).Run(); err != nil { + t.Fatal(err) + } + defer func() { + if err := exec.Command(certScript, "cleanup", certPath).Run(); err != nil { + t.Fatal(err) + } + if err := os.RemoveAll(certPath); err != nil { + t.Fatal(err) + } + }() + + tlsInfo := transport.TLSInfo{ + KeyFile: filepath.Join(certPath, "pd-server-key.pem"), + CertFile: filepath.Join(certPath, "pd-server.pem"), + TrustedCAFile: filepath.Join(certPath, "ca.pem"), + } + tc, err := pdTests.NewTestCluster(ctx, 1, func(conf *config.Config, _ string) { + conf.Security.TLSConfig = grpcutil.TLSConfig{ + KeyPath: tlsInfo.KeyFile, + CertPath: tlsInfo.CertFile, + CAPath: tlsInfo.TrustedCAFile, + } + conf.AdvertiseClientUrls = strings.ReplaceAll(conf.AdvertiseClientUrls, "http", "https") + conf.ClientUrls = strings.ReplaceAll(conf.ClientUrls, "http", "https") + conf.AdvertisePeerUrls = strings.ReplaceAll(conf.AdvertisePeerUrls, "http", "https") + conf.PeerUrls = strings.ReplaceAll(conf.PeerUrls, "http", "https") + conf.InitialCluster = strings.ReplaceAll(conf.InitialCluster, "http", "https") + }) + re.NoError(err) + defer tc.Destroy() + err = tc.RunInitialServers() + re.NoError(err) + tc.WaitLeader() + cmd := ctl.GetRootCmd() + + client := tc.GetEtcdClient() + members, err := cluster.GetMembers(client) + re.NoError(err) + healthMembers := cluster.CheckHealth(tc.GetHTTPClient(), members) + healths := []api.Health{} + for _, member := range members { + h := api.Health{ + Name: member.Name, + MemberID: member.MemberId, + ClientUrls: member.ClientUrls, + Health: false, + } + if _, ok := healthMembers[member.GetMemberId()]; ok { + h.Health = true + } + healths = append(healths, h) + } + + pdAddr := tc.GetConfig().GetClientURL() + pdAddr = strings.ReplaceAll(pdAddr, "http", "https") + args := []string{"-u", pdAddr, "health", + "--cacert=../cert/ca.pem", + "--cert=../cert/client.pem", + "--key=../cert/client-key.pem"} + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + h := make([]api.Health, len(healths)) + re.NoError(json.Unmarshal(output, &h)) + re.Equal(healths, h) +} diff --git a/tools/pd-ctl/tests/helper.go b/tools/pd-ctl/tests/helper.go index e72a290213b..bdacae48c22 100644 --- a/tools/pd-ctl/tests/helper.go +++ b/tools/pd-ctl/tests/helper.go @@ -21,27 +21,25 @@ import ( "github.com/spf13/cobra" "github.com/stretchr/testify/require" "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/response" "github.com/tikv/pd/pkg/utils/typeutil" - "github.com/tikv/pd/server/api" - "github.com/tikv/pd/tools/pd-ctl/pdctl/command" ) // ExecuteCommand is used for test purpose. func ExecuteCommand(root *cobra.Command, args ...string) (output []byte, err error) { buf := new(bytes.Buffer) root.SetOut(buf) + root.SetErr(buf) root.SetArgs(args) - command.SetNewPDClient([]string{args[1]}) - defer command.PDCli.Close() err = root.Execute() return buf.Bytes(), err } // CheckStoresInfo is used to check the test results. // CheckStoresInfo will not check Store.State because this field has been omitted pd-ctl output -func CheckStoresInfo(re *require.Assertions, stores []*api.StoreInfo, want []*api.StoreInfo) { +func CheckStoresInfo(re *require.Assertions, stores []*response.StoreInfo, want []*response.StoreInfo) { re.Len(stores, len(want)) - mapWant := make(map[uint64]*api.StoreInfo) + mapWant := make(map[uint64]*response.StoreInfo) for _, s := range want { if _, ok := mapWant[s.Store.Id]; !ok { mapWant[s.Store.Id] = s @@ -64,14 +62,14 @@ func CheckStoresInfo(re *require.Assertions, stores []*api.StoreInfo, want []*ap } // CheckRegionInfo is used to check the test results. -func CheckRegionInfo(re *require.Assertions, output *api.RegionInfo, expected *core.RegionInfo) { - region := api.NewAPIRegionInfo(expected) +func CheckRegionInfo(re *require.Assertions, output *response.RegionInfo, expected *core.RegionInfo) { + region := response.NewAPIRegionInfo(expected) output.Adjust() re.Equal(region, output) } // CheckRegionsInfo is used to check the test results. -func CheckRegionsInfo(re *require.Assertions, output *api.RegionsInfo, expected []*core.RegionInfo) { +func CheckRegionsInfo(re *require.Assertions, output *response.RegionsInfo, expected []*core.RegionInfo) { re.Len(expected, output.Count) got := output.Regions sort.Slice(got, func(i, j int) bool { @@ -84,3 +82,12 @@ func CheckRegionsInfo(re *require.Assertions, output *api.RegionsInfo, expected CheckRegionInfo(re, &got[i], region) } } + +// CheckRegionsInfoWithoutSort is used to check the test results without sort. +func CheckRegionsInfoWithoutSort(re *require.Assertions, output *response.RegionsInfo, expected []*core.RegionInfo) { + re.Len(expected, output.Count) + got := output.Regions + for i, region := range expected { + CheckRegionInfo(re, &got[i], region) + } +} diff --git a/tools/pd-ctl/tests/hot/hot_test.go b/tools/pd-ctl/tests/hot/hot_test.go index 7ec8315b6fd..dea49a1ffdd 100644 --- a/tools/pd-ctl/tests/hot/hot_test.go +++ b/tools/pd-ctl/tests/hot/hot_test.go @@ -51,7 +51,7 @@ func TestHotTestSuite(t *testing.T) { func (suite *hotTestSuite) SetupSuite() { suite.env = pdTests.NewSchedulingTestEnvironment(suite.T(), - func(conf *config.Config, serverName string) { + func(conf *config.Config, _ string) { conf.Schedule.MaxStoreDownTime.Duration = time.Hour conf.Schedule.HotRegionCacheHitsThreshold = 0 }, @@ -188,11 +188,16 @@ func (suite *hotTestSuite) checkHot(cluster *pdTests.TestCluster) { Id: 100 + regionIDCounter, StoreId: hotStoreID, } - peerInfo := core.NewPeerInfo(leader, loads, reportInterval) region := core.NewRegionInfo(&metapb.Region{ Id: hotRegionID, }, leader) - hotStat.CheckReadAsync(statistics.NewCheckPeerTask(peerInfo, region)) + checkReadPeerTask := func(cache *statistics.HotPeerCache) { + stats := cache.CheckPeerFlow(region, []*metapb.Peer{leader}, loads, reportInterval) + for _, stat := range stats { + cache.UpdateStat(stat) + } + } + hotStat.CheckReadAsync(checkReadPeerTask) testutil.Eventually(re, func() bool { hotPeerStat := getHotPeerStat(utils.Read, hotRegionID, hotStoreID) return hotPeerStat != nil @@ -398,13 +403,14 @@ func TestHistoryHotRegions(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() cluster, err := pdTests.NewTestCluster(ctx, 1, - func(cfg *config.Config, serverName string) { + func(cfg *config.Config, _ string) { cfg.Schedule.HotRegionCacheHitsThreshold = 0 cfg.Schedule.HotRegionsWriteInterval.Duration = 1000 * time.Millisecond cfg.Schedule.HotRegionsReservedDays = 1 }, ) re.NoError(err) + defer cluster.Destroy() err = cluster.RunInitialServers() re.NoError(err) cluster.WaitLeader() @@ -519,8 +525,9 @@ func TestBuckets(t *testing.T) { statistics.Denoising = false ctx, cancel := context.WithCancel(context.Background()) defer cancel() - cluster, err := pdTests.NewTestCluster(ctx, 1, func(cfg *config.Config, serverName string) { cfg.Schedule.HotRegionCacheHitsThreshold = 0 }) + cluster, err := pdTests.NewTestCluster(ctx, 1, func(cfg *config.Config, _ string) { cfg.Schedule.HotRegionCacheHitsThreshold = 0 }) re.NoError(err) + defer cluster.Destroy() err = cluster.RunInitialServers() re.NoError(err) cluster.WaitLeader() diff --git a/tools/pd-ctl/tests/keyspace/keyspace_group_test.go b/tools/pd-ctl/tests/keyspace/keyspace_group_test.go index b9ad04a3efb..1e3763d5d6e 100644 --- a/tools/pd-ctl/tests/keyspace/keyspace_group_test.go +++ b/tools/pd-ctl/tests/keyspace/keyspace_group_test.go @@ -41,6 +41,7 @@ func TestKeyspaceGroup(t *testing.T) { defer cancel() tc, err := pdTests.NewTestAPICluster(ctx, 1) re.NoError(err) + defer tc.Destroy() err = tc.RunInitialServers() re.NoError(err) tc.WaitLeader() @@ -99,10 +100,11 @@ func TestSplitKeyspaceGroup(t *testing.T) { for i := 0; i < 129; i++ { keyspaces = append(keyspaces, fmt.Sprintf("keyspace_%d", i)) } - tc, err := pdTests.NewTestAPICluster(ctx, 3, func(conf *config.Config, serverName string) { + tc, err := pdTests.NewTestAPICluster(ctx, 3, func(conf *config.Config, _ string) { conf.Keyspace.PreAlloc = keyspaces }) re.NoError(err) + defer tc.Destroy() err = tc.RunInitialServers() re.NoError(err) pdAddr := tc.GetConfig().GetClientURL() @@ -153,10 +155,11 @@ func TestExternalAllocNodeWhenStart(t *testing.T) { for i := 0; i < 10; i++ { keyspaces = append(keyspaces, fmt.Sprintf("keyspace_%d", i)) } - tc, err := pdTests.NewTestAPICluster(ctx, 1, func(conf *config.Config, serverName string) { + tc, err := pdTests.NewTestAPICluster(ctx, 1, func(conf *config.Config, _ string) { conf.Keyspace.PreAlloc = keyspaces }) re.NoError(err) + defer tc.Destroy() err = tc.RunInitialServers() re.NoError(err) pdAddr := tc.GetConfig().GetClientURL() @@ -192,10 +195,11 @@ func TestSetNodeAndPriorityKeyspaceGroup(t *testing.T) { for i := 0; i < 10; i++ { keyspaces = append(keyspaces, fmt.Sprintf("keyspace_%d", i)) } - tc, err := pdTests.NewTestAPICluster(ctx, 3, func(conf *config.Config, serverName string) { + tc, err := pdTests.NewTestAPICluster(ctx, 3, func(conf *config.Config, _ string) { conf.Keyspace.PreAlloc = keyspaces }) re.NoError(err) + defer tc.Destroy() err = tc.RunInitialServers() re.NoError(err) pdAddr := tc.GetConfig().GetClientURL() @@ -259,7 +263,7 @@ func TestSetNodeAndPriorityKeyspaceGroup(t *testing.T) { args := []string{"-u", pdAddr, "keyspace-group", "set-node", defaultKeyspaceGroupID, tsoAddrs[0]} output, err := tests.ExecuteCommand(cmd, args...) re.NoError(err) - re.Contains(string(output), "invalid num of nodes") + re.Contains(string(output), "Success!") args = []string{"-u", pdAddr, "keyspace-group", "set-node", defaultKeyspaceGroupID, "", ""} output, err = tests.ExecuteCommand(cmd, args...) re.NoError(err) @@ -295,10 +299,11 @@ func TestMergeKeyspaceGroup(t *testing.T) { for i := 0; i < 129; i++ { keyspaces = append(keyspaces, fmt.Sprintf("keyspace_%d", i)) } - tc, err := pdTests.NewTestAPICluster(ctx, 1, func(conf *config.Config, serverName string) { + tc, err := pdTests.NewTestAPICluster(ctx, 1, func(conf *config.Config, _ string) { conf.Keyspace.PreAlloc = keyspaces }) re.NoError(err) + defer tc.Destroy() err = tc.RunInitialServers() re.NoError(err) pdAddr := tc.GetConfig().GetClientURL() @@ -413,10 +418,11 @@ func TestKeyspaceGroupState(t *testing.T) { for i := 0; i < 10; i++ { keyspaces = append(keyspaces, fmt.Sprintf("keyspace_%d", i)) } - tc, err := pdTests.NewTestAPICluster(ctx, 1, func(conf *config.Config, serverName string) { + tc, err := pdTests.NewTestAPICluster(ctx, 1, func(conf *config.Config, _ string) { conf.Keyspace.PreAlloc = keyspaces }) re.NoError(err) + defer tc.Destroy() err = tc.RunInitialServers() re.NoError(err) pdAddr := tc.GetConfig().GetClientURL() @@ -503,10 +509,11 @@ func TestShowKeyspaceGroupPrimary(t *testing.T) { for i := 0; i < 10; i++ { keyspaces = append(keyspaces, fmt.Sprintf("keyspace_%d", i)) } - tc, err := pdTests.NewTestAPICluster(ctx, 1, func(conf *config.Config, serverName string) { + tc, err := pdTests.NewTestAPICluster(ctx, 1, func(conf *config.Config, _ string) { conf.Keyspace.PreAlloc = keyspaces }) re.NoError(err) + defer tc.Destroy() err = tc.RunInitialServers() re.NoError(err) pdAddr := tc.GetConfig().GetClientURL() @@ -594,6 +601,7 @@ func TestInPDMode(t *testing.T) { defer cancel() tc, err := pdTests.NewTestCluster(ctx, 1) re.NoError(err) + defer tc.Destroy() err = tc.RunInitialServers() re.NoError(err) pdAddr := tc.GetConfig().GetClientURL() diff --git a/tools/pd-ctl/tests/keyspace/keyspace_test.go b/tools/pd-ctl/tests/keyspace/keyspace_test.go index 62157ef968c..54c25fc2099 100644 --- a/tools/pd-ctl/tests/keyspace/keyspace_test.go +++ b/tools/pd-ctl/tests/keyspace/keyspace_test.go @@ -47,10 +47,11 @@ func TestKeyspace(t *testing.T) { for i := 1; i < 10; i++ { keyspaces = append(keyspaces, fmt.Sprintf("keyspace_%d", i)) } - tc, err := pdTests.NewTestAPICluster(ctx, 3, func(conf *config.Config, serverName string) { + tc, err := pdTests.NewTestAPICluster(ctx, 3, func(conf *config.Config, _ string) { conf.Keyspace.PreAlloc = keyspaces }) re.NoError(err) + defer tc.Destroy() err = tc.RunInitialServers() re.NoError(err) pdAddr := tc.GetConfig().GetClientURL() @@ -115,6 +116,7 @@ func TestKeyspaceGroupUninitialized(t *testing.T) { re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/keyspace/skipSplitRegion", "return(true)")) tc, err := pdTests.NewTestCluster(ctx, 1) re.NoError(err) + defer tc.Destroy() re.NoError(tc.RunInitialServers()) tc.WaitLeader() re.NoError(tc.GetLeaderServer().BootstrapCluster()) @@ -165,10 +167,14 @@ func (suite *keyspaceTestSuite) TearDownTest() { re := suite.Require() re.NoError(failpoint.Disable("github.com/tikv/pd/server/delayStartServerLoop")) re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/keyspace/skipSplitRegion")) +} + +func (suite *keyspaceTestSuite) TearDownSuite() { suite.cancel() + suite.cluster.Destroy() } -func (suite *keyspaceTestSuite) TestshowKeyspace() { +func (suite *keyspaceTestSuite) TestShowKeyspace() { re := suite.Require() keyspaceName := "DEFAULT" keyspaceID := uint32(0) diff --git a/tools/pd-ctl/tests/label/label_test.go b/tools/pd-ctl/tests/label/label_test.go index 185d6e8c1fe..f7370a71872 100644 --- a/tools/pd-ctl/tests/label/label_test.go +++ b/tools/pd-ctl/tests/label/label_test.go @@ -23,7 +23,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/stretchr/testify/require" - "github.com/tikv/pd/server/api" + "github.com/tikv/pd/pkg/response" "github.com/tikv/pd/server/config" pdTests "github.com/tikv/pd/tests" ctl "github.com/tikv/pd/tools/pd-ctl/pdctl" @@ -34,17 +34,18 @@ func TestLabel(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - cluster, err := pdTests.NewTestCluster(ctx, 1, func(cfg *config.Config, serverName string) { cfg.Replication.StrictlyMatchLabel = false }) + cluster, err := pdTests.NewTestCluster(ctx, 1, func(cfg *config.Config, _ string) { cfg.Replication.StrictlyMatchLabel = false }) re.NoError(err) + defer cluster.Destroy() err = cluster.RunInitialServers() re.NoError(err) cluster.WaitLeader() pdAddr := cluster.GetConfig().GetClientURL() cmd := ctl.GetRootCmd() - stores := []*api.StoreInfo{ + stores := []*response.StoreInfo{ { - Store: &api.MetaStore{ + Store: &response.MetaStore{ Store: &metapb.Store{ Id: 1, State: metapb.StoreState_Up, @@ -60,7 +61,7 @@ func TestLabel(t *testing.T) { }, }, { - Store: &api.MetaStore{ + Store: &response.MetaStore{ Store: &metapb.Store{ Id: 2, State: metapb.StoreState_Up, @@ -76,7 +77,7 @@ func TestLabel(t *testing.T) { }, }, { - Store: &api.MetaStore{ + Store: &response.MetaStore{ Store: &metapb.Store{ Id: 3, State: metapb.StoreState_Up, @@ -128,9 +129,9 @@ func TestLabel(t *testing.T) { args = []string{"-u", pdAddr, "label", "store", "zone", "us-west"} output, err = tests.ExecuteCommand(cmd, args...) re.NoError(err) - storesInfo := new(api.StoresInfo) + storesInfo := new(response.StoresInfo) re.NoError(json.Unmarshal(output, &storesInfo)) - sss := []*api.StoreInfo{stores[0], stores[2]} + sss := []*response.StoreInfo{stores[0], stores[2]} tests.CheckStoresInfo(re, storesInfo.Stores, sss) // label isolation [label] diff --git a/tools/pd-ctl/tests/member/member_test.go b/tools/pd-ctl/tests/member/member_test.go index d6888858a3b..7a2d5fc72c9 100644 --- a/tools/pd-ctl/tests/member/member_test.go +++ b/tools/pd-ctl/tests/member/member_test.go @@ -35,6 +35,7 @@ func TestMember(t *testing.T) { defer cancel() cluster, err := pdTests.NewTestCluster(ctx, 3) re.NoError(err) + defer cluster.Destroy() err = cluster.RunInitialServers() re.NoError(err) cluster.WaitLeader() diff --git a/tools/pd-ctl/tests/operator/operator_test.go b/tools/pd-ctl/tests/operator/operator_test.go index 335e6635948..7e5d390c4ce 100644 --- a/tools/pd-ctl/tests/operator/operator_test.go +++ b/tools/pd-ctl/tests/operator/operator_test.go @@ -43,7 +43,7 @@ func TestOperatorTestSuite(t *testing.T) { func (suite *operatorTestSuite) SetupSuite() { suite.env = pdTests.NewSchedulingTestEnvironment(suite.T(), - func(conf *config.Config, serverName string) { + func(conf *config.Config, _ string) { // TODO: enable placement rules conf.Replication.MaxReplicas = 2 conf.Replication.EnablePlacementRules = false @@ -232,25 +232,25 @@ func (suite *operatorTestSuite) checkOperator(cluster *pdTests.TestCluster) { }) } - output, err = tests.ExecuteCommand(cmd, "operator", "add", "transfer-region", "1", "2", "3") + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "operator", "add", "transfer-region", "1", "2", "3") re.NoError(err) re.Contains(string(output), "not supported") - output, err = tests.ExecuteCommand(cmd, "operator", "add", "transfer-region", "1", "2", "follower", "3") + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "operator", "add", "transfer-region", "1", "2", "follower", "3") re.NoError(err) re.Contains(string(output), "not match") - output, err = tests.ExecuteCommand(cmd, "operator", "add", "transfer-peer", "1", "2", "4") + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "operator", "add", "transfer-peer", "1", "2", "4") re.NoError(err) re.Contains(string(output), "is unhealthy") - output, err = tests.ExecuteCommand(cmd, "operator", "add", "transfer-region", "1", "2", "leader", "4", "follower") + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "operator", "add", "transfer-region", "1", "2", "leader", "4", "follower") re.NoError(err) re.Contains(string(output), "is unhealthy") - output, err = tests.ExecuteCommand(cmd, "operator", "add", "transfer-region", "1", "2", "follower", "leader", "3", "follower") + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "operator", "add", "transfer-region", "1", "2", "follower", "leader", "3", "follower") re.NoError(err) re.Contains(string(output), "invalid") - output, err = tests.ExecuteCommand(cmd, "operator", "add", "transfer-region", "1", "leader", "2", "follower", "3") + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "operator", "add", "transfer-region", "1", "leader", "2", "follower", "3") re.NoError(err) re.Contains(string(output), "invalid") - output, err = tests.ExecuteCommand(cmd, "operator", "add", "transfer-region", "1", "2", "leader", "3", "follower") + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "operator", "add", "transfer-region", "1", "2", "leader", "3", "follower") re.NoError(err) re.Contains(string(output), "Success!") output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "operator", "remove", "1") diff --git a/tools/pd-ctl/tests/region/region_test.go b/tools/pd-ctl/tests/region/region_test.go index 90a05ce6f91..2952e137f3b 100644 --- a/tools/pd-ctl/tests/region/region_test.go +++ b/tools/pd-ctl/tests/region/region_test.go @@ -24,6 +24,7 @@ import ( "github.com/pingcap/kvproto/pkg/pdpb" "github.com/stretchr/testify/require" "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/response" "github.com/tikv/pd/server/api" pdTests "github.com/tikv/pd/tests" ctl "github.com/tikv/pd/tools/pd-ctl/pdctl" @@ -35,6 +36,7 @@ func TestRegionKeyFormat(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() cluster, err := pdTests.NewTestCluster(ctx, 1) + defer cluster.Destroy() re.NoError(err) err = cluster.RunInitialServers() re.NoError(err) @@ -61,6 +63,7 @@ func TestRegion(t *testing.T) { defer cancel() cluster, err := pdTests.NewTestCluster(ctx, 1) re.NoError(err) + defer cluster.Destroy() err = cluster.RunInitialServers() re.NoError(err) cluster.WaitLeader() @@ -80,6 +83,7 @@ func TestRegion(t *testing.T) { r1 := pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetWrittenBytes(1000), core.SetReadBytes(1000), core.SetRegionConfVer(1), core.SetRegionVersion(1), core.SetApproximateSize(1), core.SetApproximateKeys(100), + core.SetReadQuery(100), core.SetWrittenQuery(100), core.SetPeers([]*metapb.Peer{ {Id: 1, StoreId: 1}, {Id: 5, StoreId: 2}, @@ -89,18 +93,26 @@ func TestRegion(t *testing.T) { r2 := pdTests.MustPutRegion(re, cluster, 2, 1, []byte("b"), []byte("c"), core.SetWrittenBytes(2000), core.SetReadBytes(0), core.SetRegionConfVer(2), core.SetRegionVersion(3), core.SetApproximateSize(144), core.SetApproximateKeys(14400), + core.SetReadQuery(200), core.SetWrittenQuery(200), ) r3 := pdTests.MustPutRegion(re, cluster, 3, 1, []byte("c"), []byte("d"), core.SetWrittenBytes(500), core.SetReadBytes(800), core.SetRegionConfVer(3), core.SetRegionVersion(2), core.SetApproximateSize(30), core.SetApproximateKeys(3000), + core.SetReadQuery(300), core.SetWrittenQuery(300), core.WithDownPeers([]*pdpb.PeerStats{{Peer: downPeer, DownSeconds: 3600}}), core.WithPendingPeers([]*metapb.Peer{downPeer}), core.WithLearners([]*metapb.Peer{{Id: 3, StoreId: 1}})) r4 := pdTests.MustPutRegion(re, cluster, 4, 1, []byte("d"), []byte("e"), - core.SetWrittenBytes(100), core.SetReadBytes(100), core.SetRegionConfVer(1), - core.SetRegionVersion(1), core.SetApproximateSize(10), core.SetApproximateKeys(1000), + core.SetWrittenBytes(100), core.SetReadBytes(100), core.SetRegionConfVer(4), + core.SetRegionVersion(4), core.SetApproximateSize(10), core.SetApproximateKeys(1000), + core.SetReadQuery(400), core.SetWrittenQuery(400), ) defer cluster.Destroy() + getRegionsByType := func(storeID uint64, regionType core.SubTreeRegionType) []*core.RegionInfo { + regions, _ := leaderServer.GetRaftCluster().GetStoreRegionsByTypeInSubTree(storeID, regionType) + return regions + } + var testRegionsCases = []struct { args []string expect []*core.RegionInfo @@ -111,27 +123,12 @@ func TestRegion(t *testing.T) { {[]string{"region", "sibling", "2"}, leaderServer.GetAdjacentRegions(leaderServer.GetRegionInfoByID(2))}, // region store command {[]string{"region", "store", "1"}, leaderServer.GetStoreRegions(1)}, - {[]string{"region", "store", "1"}, []*core.RegionInfo{r1, r2, r3, r4}}, - // region topread [limit] command - {[]string{"region", "topread", "2"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { return a.GetBytesRead() < b.GetBytesRead() }, 2)}, - // region topwrite [limit] command - {[]string{"region", "topwrite", "2"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { return a.GetBytesWritten() < b.GetBytesWritten() }, 2)}, - // region topconfver [limit] command - {[]string{"region", "topconfver", "2"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { - return a.GetMeta().GetRegionEpoch().GetConfVer() < b.GetMeta().GetRegionEpoch().GetConfVer() - }, 2)}, - // region topversion [limit] command - {[]string{"region", "topversion", "2"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { - return a.GetMeta().GetRegionEpoch().GetVersion() < b.GetMeta().GetRegionEpoch().GetVersion() - }, 2)}, - // region topsize [limit] command - {[]string{"region", "topsize", "2"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { - return a.GetApproximateSize() < b.GetApproximateSize() - }, 2)}, - // region topkeys [limit] command - {[]string{"region", "topkeys", "2"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { - return a.GetApproximateKeys() < b.GetApproximateKeys() - }, 2)}, + {[]string{"region", "store", "1", "--type=leader"}, getRegionsByType(1, core.LeaderInSubTree)}, + {[]string{"region", "store", "1", "--type=follower"}, getRegionsByType(1, core.FollowerInSubTree)}, + {[]string{"region", "store", "1", "--type=learner"}, getRegionsByType(1, core.LearnerInSubTree)}, + {[]string{"region", "store", "1", "--type=witness"}, getRegionsByType(1, core.WitnessInSubTree)}, + {[]string{"region", "store", "1", "--type=pending"}, getRegionsByType(1, core.PendingPeerInSubTree)}, + {[]string{"region", "store", "1", "--type=all"}, []*core.RegionInfo{r1, r2, r3, r4}}, // region check extra-peer command {[]string{"region", "check", "extra-peer"}, []*core.RegionInfo{r1}}, // region check miss-peer command @@ -168,11 +165,66 @@ func TestRegion(t *testing.T) { args := append([]string{"-u", pdAddr}, testCase.args...) output, err := tests.ExecuteCommand(cmd, args...) re.NoError(err) - regions := &api.RegionsInfo{} - re.NoError(json.Unmarshal(output, regions)) + regions := &response.RegionsInfo{} + re.NoError(json.Unmarshal(output, regions), string(output)) tests.CheckRegionsInfo(re, regions, testCase.expect) } + testRegionsCases = []struct { + args []string + expect []*core.RegionInfo + }{ + // region topread [limit] command + {[]string{"region", "topread"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { return a.GetBytesRead() < b.GetBytesRead() }, 4)}, + // region topwrite [limit] command + {[]string{"region", "topwrite"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { return a.GetBytesWritten() < b.GetBytesWritten() }, 4)}, + // region topread [limit] command + {[]string{"region", "topread", "2"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { return a.GetBytesRead() < b.GetBytesRead() }, 2)}, + // region topwrite [limit] command + {[]string{"region", "topwrite", "2"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { return a.GetBytesWritten() < b.GetBytesWritten() }, 2)}, + // region topread byte [limit] command + {[]string{"region", "topread", "byte"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { return a.GetBytesRead() < b.GetBytesRead() }, 4)}, + // region topwrite byte [limit] command + {[]string{"region", "topwrite", "byte"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { return a.GetBytesWritten() < b.GetBytesWritten() }, 4)}, + // region topread byte [limit] command + {[]string{"region", "topread", "query"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { return a.GetReadQueryNum() < b.GetReadQueryNum() }, 4)}, + // region topwrite byte [limit] command + {[]string{"region", "topwrite", "query"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { return a.GetWriteQueryNum() < b.GetWriteQueryNum() }, 4)}, + // region topread byte [limit] command + {[]string{"region", "topread", "byte", "2"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { return a.GetBytesRead() < b.GetBytesRead() }, 2)}, + // region topwrite byte [limit] command + {[]string{"region", "topwrite", "byte", "2"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { return a.GetBytesWritten() < b.GetBytesWritten() }, 2)}, + // region topread byte [limit] command + {[]string{"region", "topread", "query", "2"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { return a.GetReadQueryNum() < b.GetReadQueryNum() }, 2)}, + // region topwrite byte [limit] command + {[]string{"region", "topwrite", "query", "2"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { return a.GetWriteQueryNum() < b.GetWriteQueryNum() }, 2)}, + // region topconfver [limit] command + {[]string{"region", "topconfver", "2"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { + return a.GetMeta().GetRegionEpoch().GetConfVer() < b.GetMeta().GetRegionEpoch().GetConfVer() + }, 2)}, + // region topversion [limit] command + {[]string{"region", "topversion", "2"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { + return a.GetMeta().GetRegionEpoch().GetVersion() < b.GetMeta().GetRegionEpoch().GetVersion() + }, 2)}, + // region topsize [limit] command + {[]string{"region", "topsize", "2"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { + return a.GetApproximateSize() < b.GetApproximateSize() + }, 2)}, + // region topkeys [limit] command + {[]string{"region", "topkeys", "2"}, api.TopNRegions(leaderServer.GetRegions(), func(a, b *core.RegionInfo) bool { + return a.GetApproximateKeys() < b.GetApproximateKeys() + }, 2)}, + } + + for _, testCase := range testRegionsCases { + args := append([]string{"-u", pdAddr}, testCase.args...) + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + regions := &response.RegionsInfo{} + re.NoError(json.Unmarshal(output, regions), string(output)) + tests.CheckRegionsInfoWithoutSort(re, regions, testCase.expect) + } + var testRegionCases = []struct { args []string expect *core.RegionInfo @@ -191,7 +243,7 @@ func TestRegion(t *testing.T) { args := append([]string{"-u", pdAddr}, testCase.args...) output, err := tests.ExecuteCommand(cmd, args...) re.NoError(err) - region := &api.RegionInfo{} + region := &response.RegionInfo{} re.NoError(json.Unmarshal(output, region)) tests.CheckRegionInfo(re, region, testCase.expect) } @@ -215,6 +267,7 @@ func TestRegionNoLeader(t *testing.T) { defer cancel() cluster, err := pdTests.NewTestCluster(ctx, 1) re.NoError(err) + defer cluster.Destroy() err = cluster.RunInitialServers() re.NoError(err) cluster.WaitLeader() diff --git a/tools/pd-ctl/tests/resourcemanager/resource_manager_command_test.go b/tools/pd-ctl/tests/resourcemanager/resource_manager_command_test.go index 5d397044811..d387a2b87ae 100644 --- a/tools/pd-ctl/tests/resourcemanager/resource_manager_command_test.go +++ b/tools/pd-ctl/tests/resourcemanager/resource_manager_command_test.go @@ -58,7 +58,7 @@ func (s *testResourceManagerSuite) TearDownSuite() { func (s *testResourceManagerSuite) TestConfigController() { re := s.Require() - expectCfg := server.ControllerConfig{} + expectCfg := server.Config{} expectCfg.Adjust(nil) // Show controller config checkShow := func() { @@ -68,8 +68,8 @@ func (s *testResourceManagerSuite) TestConfigController() { actualCfg := server.ControllerConfig{} err = json.Unmarshal(output, &actualCfg) - re.NoError(err) - re.Equal(expectCfg, actualCfg) + re.NoError(err, string(output)) + re.Equal(expectCfg.Controller, actualCfg) } // Check default config @@ -80,20 +80,20 @@ func (s *testResourceManagerSuite) TestConfigController() { output, err := tests.ExecuteCommand(ctl.GetRootCmd(), args...) re.NoError(err) re.Contains(string(output), "Success!") - expectCfg.LTBMaxWaitDuration = typeutil.Duration{Duration: 1 * time.Hour} + expectCfg.Controller.LTBMaxWaitDuration = typeutil.Duration{Duration: 1 * time.Hour} checkShow() args = []string{"-u", s.pdAddr, "resource-manager", "config", "controller", "set", "enable-controller-trace-log", "true"} output, err = tests.ExecuteCommand(ctl.GetRootCmd(), args...) re.NoError(err) re.Contains(string(output), "Success!") - expectCfg.EnableControllerTraceLog = true + expectCfg.Controller.EnableControllerTraceLog = true checkShow() args = []string{"-u", s.pdAddr, "resource-manager", "config", "controller", "set", "write-base-cost", "2"} output, err = tests.ExecuteCommand(ctl.GetRootCmd(), args...) re.NoError(err) re.Contains(string(output), "Success!") - expectCfg.RequestUnit.WriteBaseCost = 2 + expectCfg.Controller.RequestUnit.WriteBaseCost = 2 checkShow() } diff --git a/tools/pd-ctl/tests/scheduler/scheduler_test.go b/tools/pd-ctl/tests/scheduler/scheduler_test.go index ab96d430523..3ea81961015 100644 --- a/tools/pd-ctl/tests/scheduler/scheduler_test.go +++ b/tools/pd-ctl/tests/scheduler/scheduler_test.go @@ -50,17 +50,19 @@ func TestSchedulerTestSuite(t *testing.T) { func (suite *schedulerTestSuite) SetupSuite() { re := suite.Require() re.NoError(failpoint.Enable("github.com/tikv/pd/server/cluster/skipStoreConfigSync", `return(true)`)) - suite.env = pdTests.NewSchedulingTestEnvironment(suite.T()) suite.defaultSchedulers = []string{ "balance-leader-scheduler", "balance-region-scheduler", "balance-hot-region-scheduler", - "balance-witness-scheduler", - "transfer-witness-leader-scheduler", "evict-slow-store-scheduler", } } +func (suite *schedulerTestSuite) SetupTest() { + // use a new environment to avoid affecting other tests + suite.env = pdTests.NewSchedulingTestEnvironment(suite.T()) +} + func (suite *schedulerTestSuite) TearDownSuite() { re := suite.Require() suite.env.Cleanup() @@ -93,6 +95,7 @@ func (suite *schedulerTestSuite) TearDownTest() { } } suite.env.RunFuncInTwoModes(cleanFunc) + suite.env.Cleanup() } func (suite *schedulerTestSuite) TestScheduler() { @@ -133,29 +136,9 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *pdTests.TestCluster) { re.Contains(string(output), "Usage") } - checkSchedulerCommand := func(args []string, expected map[string]bool) { - if args != nil { - echo := mustExec(re, cmd, args, nil) - re.Contains(echo, "Success!") - } - testutil.Eventually(re, func() bool { - var schedulers []string - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, &schedulers) - if len(schedulers) != len(expected) { - return false - } - for _, scheduler := range schedulers { - if _, ok := expected[scheduler]; !ok { - return false - } - } - return true - }) - } - - checkSchedulerConfigCommand := func(expectedConfig map[string]interface{}, schedulerName string) { + checkSchedulerConfigCommand := func(expectedConfig map[string]any, schedulerName string) { testutil.Eventually(re, func() bool { - configInfo := make(map[string]interface{}) + configInfo := make(map[string]any) mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName}, &configInfo) return reflect.DeepEqual(expectedConfig, configInfo) }) @@ -171,25 +154,21 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *pdTests.TestCluster) { // scheduler show command expected := map[string]bool{ - "balance-region-scheduler": true, - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "transfer-witness-leader-scheduler": true, - "balance-witness-scheduler": true, - "evict-slow-store-scheduler": true, + "balance-region-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "evict-slow-store-scheduler": true, } - checkSchedulerCommand(nil, expected) + checkSchedulerCommand(re, cmd, pdAddr, nil, expected) // scheduler delete command args := []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"} expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "transfer-witness-leader-scheduler": true, - "balance-witness-scheduler": true, - "evict-slow-store-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "evict-slow-store-scheduler": true, } - checkSchedulerCommand(args, expected) + checkSchedulerCommand(re, cmd, pdAddr, args, expected) // avoid the influence of the scheduler order schedulers := []string{"evict-leader-scheduler", "grant-leader-scheduler", "evict-leader-scheduler", "grant-leader-scheduler"} @@ -228,118 +207,261 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *pdTests.TestCluster) { // scheduler add command args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "2"} expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - schedulers[idx]: true, - "transfer-witness-leader-scheduler": true, - "balance-witness-scheduler": true, - "evict-slow-store-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + schedulers[idx]: true, + "evict-slow-store-scheduler": true, } - checkSchedulerCommand(args, expected) + checkSchedulerCommand(re, cmd, pdAddr, args, expected) // scheduler config show command - expectedConfig := make(map[string]interface{}) - expectedConfig["store-id-ranges"] = map[string]interface{}{"2": []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}}} + expectedConfig := make(map[string]any) + expectedConfig["store-id-ranges"] = map[string]any{"2": []any{map[string]any{"end-key": "", "start-key": ""}}} checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) checkStorePause([]uint64{2}, schedulers[idx]) // scheduler config update command args = []string{"-u", pdAddr, "scheduler", "config", schedulers[idx], "add-store", "3"} expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - schedulers[idx]: true, - "transfer-witness-leader-scheduler": true, - "balance-witness-scheduler": true, - "evict-slow-store-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + schedulers[idx]: true, + "evict-slow-store-scheduler": true, } // check update success - checkSchedulerCommand(args, expected) - expectedConfig["store-id-ranges"] = map[string]interface{}{"2": []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}}, "3": []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}}} + checkSchedulerCommand(re, cmd, pdAddr, args, expected) + expectedConfig["store-id-ranges"] = map[string]any{"2": []any{map[string]any{"end-key": "", "start-key": ""}}, "3": []any{map[string]any{"end-key": "", "start-key": ""}}} checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) checkStorePause([]uint64{2, 3}, schedulers[idx]) // scheduler delete command args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx]} expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "transfer-witness-leader-scheduler": true, - "balance-witness-scheduler": true, - "evict-slow-store-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "evict-slow-store-scheduler": true, } - checkSchedulerCommand(args, expected) + checkSchedulerCommand(re, cmd, pdAddr, args, expected) checkStorePause([]uint64{}, schedulers[idx]) // scheduler add command args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "2"} expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - schedulers[idx]: true, - "transfer-witness-leader-scheduler": true, - "balance-witness-scheduler": true, - "evict-slow-store-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + schedulers[idx]: true, + "evict-slow-store-scheduler": true, } - checkSchedulerCommand(args, expected) + checkSchedulerCommand(re, cmd, pdAddr, args, expected) checkStorePause([]uint64{2}, schedulers[idx]) // scheduler add command twice args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "4"} expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - schedulers[idx]: true, - "transfer-witness-leader-scheduler": true, - "balance-witness-scheduler": true, - "evict-slow-store-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + schedulers[idx]: true, + "evict-slow-store-scheduler": true, } - checkSchedulerCommand(args, expected) + checkSchedulerCommand(re, cmd, pdAddr, args, expected) // check add success - expectedConfig["store-id-ranges"] = map[string]interface{}{"2": []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}}, "4": []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}}} + expectedConfig["store-id-ranges"] = map[string]any{"2": []any{map[string]any{"end-key": "", "start-key": ""}}, "4": []any{map[string]any{"end-key": "", "start-key": ""}}} checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) checkStorePause([]uint64{2, 4}, schedulers[idx]) // scheduler remove command [old] args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx] + "-4"} expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - schedulers[idx]: true, - "transfer-witness-leader-scheduler": true, - "balance-witness-scheduler": true, - "evict-slow-store-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + schedulers[idx]: true, + "evict-slow-store-scheduler": true, } - checkSchedulerCommand(args, expected) + checkSchedulerCommand(re, cmd, pdAddr, args, expected) // check remove success - expectedConfig["store-id-ranges"] = map[string]interface{}{"2": []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}}} + expectedConfig["store-id-ranges"] = map[string]any{"2": []any{map[string]any{"end-key": "", "start-key": ""}}} checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) checkStorePause([]uint64{2}, schedulers[idx]) // scheduler remove command, when remove the last store, it should remove whole scheduler args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx] + "-2"} expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "transfer-witness-leader-scheduler": true, - "balance-witness-scheduler": true, - "evict-slow-store-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "evict-slow-store-scheduler": true, } - checkSchedulerCommand(args, expected) + checkSchedulerCommand(re, cmd, pdAddr, args, expected) checkStorePause([]uint64{}, schedulers[idx]) } + // test remove and add scheduler + echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-region-scheduler"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) + re.NotContains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-region-scheduler"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "1"}, nil) + re.Equal("Success! The scheduler is created.\n", echo) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "2"}, nil) + re.Equal("Success! The scheduler has been applied to the store.\n", echo) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-2"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}, nil) + re.Contains(echo, "404") + testutil.Eventually(re, func() bool { // wait for removed scheduler to be synced to scheduling server. + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "evict-leader-scheduler"}, nil) + return strings.Contains(echo, "[404] scheduler not found") + }) + + // test remove and add + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-hot-region-scheduler"}, nil) + re.Contains(echo, "Success") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-hot-region-scheduler"}, nil) + re.Contains(echo, "Success") + + // test show scheduler with paused and disabled status. + checkSchedulerWithStatusCommand := func(status string, expected []string) { + testutil.Eventually(re, func() bool { + var schedulers []string + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show", "--status", status}, &schedulers) + return reflect.DeepEqual(expected, schedulers) + }) + } + + // test scatter range scheduler + for _, name := range []string{ + "test", "test#", "?test", + /* TODO: to handle case like "tes&t", we need to modify the server's JSON render to unescape the HTML characters */ + } { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "scatter-range", "--format=raw", "a", "b", name}, nil) + re.Contains(echo, "Success!") + schedulerName := fmt.Sprintf("scatter-range-%s", name) + // test show scheduler + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return strings.Contains(echo, schedulerName) + }) + // test remove scheduler + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", schedulerName}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return !strings.Contains(echo, schedulerName) + }) + } + + mustUsage([]string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler"}) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler", "60"}, nil) + re.Contains(echo, "Success!") + checkSchedulerWithStatusCommand("paused", []string{ + "balance-leader-scheduler", + }) + result := make(map[string]any) + testutil.Eventually(re, func() bool { + mightExec(re, cmd, []string{"-u", pdAddr, "scheduler", "describe", "balance-leader-scheduler"}, &result) + return len(result) != 0 && result["status"] == "paused" && result["summary"] == "" + }, testutil.WithWaitFor(30*time.Second)) + + mustUsage([]string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler", "60"}) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler"}, nil) + re.Contains(echo, "Success!") + checkSchedulerWithStatusCommand("paused", []string{}) + + // set label scheduler to disabled manually. + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "label-scheduler"}, nil) + re.Contains(echo, "Success!") + cfg := leaderServer.GetServer().GetScheduleConfig() + origin := cfg.Schedulers + cfg.Schedulers = sc.SchedulerConfigs{{Type: "label", Disable: true}} + err := leaderServer.GetServer().SetScheduleConfig(*cfg) + re.NoError(err) + checkSchedulerWithStatusCommand("disabled", []string{"label-scheduler"}) + // reset Schedulers in ScheduleConfig + cfg.Schedulers = origin + err = leaderServer.GetServer().SetScheduleConfig(*cfg) + re.NoError(err) + checkSchedulerWithStatusCommand("disabled", []string{}) +} + +func (suite *schedulerTestSuite) TestSchedulerConfig() { + suite.env.RunTestInTwoModes(suite.checkSchedulerConfig) +} + +func (suite *schedulerTestSuite) checkSchedulerConfig(cluster *pdTests.TestCluster) { + re := suite.Require() + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 3, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 4, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + } + for _, store := range stores { + pdTests.MustPutStore(re, cluster, store) + } + + // note: because pdqsort is an unstable sort algorithm, set ApproximateSize for this region. + pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetApproximateSize(10)) + + // test evict-slow-store && evict-slow-trend schedulers config + evictSlownessSchedulers := []string{"evict-slow-store-scheduler", "evict-slow-trend-scheduler"} + for _, schedulerName := range evictSlownessSchedulers { + echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", schedulerName}, nil) + if strings.Contains(echo, "Success!") { + re.Contains(echo, "Success!") + } else { + re.Contains(echo, "scheduler existed") + } + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return strings.Contains(echo, schedulerName) + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName, "set", "recovery-duration", "100"}, nil) + re.Contains(echo, "Success! Config updated.") + conf := make(map[string]any) + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName, "show"}, &conf) + return conf["recovery-duration"] == 100. + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", schedulerName}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return !strings.Contains(echo, schedulerName) + }) + } // test shuffle region config - checkSchedulerCommand([]string{"-u", pdAddr, "scheduler", "add", "shuffle-region-scheduler"}, map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "shuffle-region-scheduler": true, - "transfer-witness-leader-scheduler": true, - "balance-witness-scheduler": true, - "evict-slow-store-scheduler": true, + checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "add", "shuffle-region-scheduler"}, map[string]bool{ + "balance-region-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "shuffle-region-scheduler": true, }) var roles []string mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler", "show-roles"}, &roles) @@ -353,19 +475,22 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *pdTests.TestCluster) { mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler"}, &roles) re.Equal([]string{"learner"}, roles) + checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "remove", "shuffle-region-scheduler"}, map[string]bool{ + "balance-region-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + }) + // test grant hot region scheduler config - checkSchedulerCommand([]string{"-u", pdAddr, "scheduler", "add", "grant-hot-region-scheduler", "1", "1,2,3"}, map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "shuffle-region-scheduler": true, - "grant-hot-region-scheduler": true, - "transfer-witness-leader-scheduler": true, - "balance-witness-scheduler": true, - "evict-slow-store-scheduler": true, + checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "add", "grant-hot-region-scheduler", "1", "1,2,3"}, map[string]bool{ + "balance-region-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "grant-hot-region-scheduler": true, }) - var conf3 map[string]interface{} - expected3 := map[string]interface{}{ - "store-id": []interface{}{float64(1), float64(2), float64(3)}, + var conf3 map[string]any + expected3 := map[string]any{ + "store-id": []any{float64(1), float64(2), float64(3)}, "store-leader-id": float64(1), } mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3) @@ -378,75 +503,141 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *pdTests.TestCluster) { mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3) return reflect.DeepEqual(expected3, conf3) }) + checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "remove", "grant-hot-region-scheduler"}, map[string]bool{ + "balance-region-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + }) - // test remove and add scheduler - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-region-scheduler"}, nil) + // test shuffle hot region scheduler + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "shuffle-hot-region-scheduler"}, nil) re.Contains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return strings.Contains(echo, "shuffle-hot-region-scheduler") + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-hot-region-scheduler", "set", "limit", "127"}, nil) re.Contains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) - re.NotContains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-region-scheduler"}, nil) + conf := make(map[string]any) + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-hot-region-scheduler", "show"}, &conf) + return conf["limit"] == 127. + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "shuffle-hot-region-scheduler"}, nil) re.Contains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "1"}, nil) - re.Contains(echo, "Success! The scheduler is created.") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "2"}, nil) - re.Contains(echo, "Success! The scheduler has been applied to the store.") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}, nil) + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return !strings.Contains(echo, "shuffle-hot-region-scheduler") + }) + + // test balance leader config + conf = make(map[string]any) + conf1 := make(map[string]any) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler", "show"}, &conf) + re.Equal(4., conf["batch"]) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler", "set", "batch", "3"}, nil) re.Contains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-2"}, nil) + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler"}, &conf1) + return conf1["batch"] == 3. + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-leader-scheduler"}, nil) + re.NotContains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-leader-scheduler"}, nil) re.Contains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}, nil) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-leader-scheduler"}, nil) re.Contains(echo, "404") - testutil.Eventually(re, func() bool { // wait for removed scheduler to be synced to scheduling server. - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "evict-leader-scheduler"}, nil) - return strings.Contains(echo, "[404] scheduler not found") + re.Contains(echo, "PD:scheduler:ErrSchedulerNotFound]scheduler not found") + // The scheduling service need time to sync from PD. + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler"}, nil) + return strings.Contains(echo, "404") && strings.Contains(echo, "scheduler not found") }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-leader-scheduler"}, nil) + re.Contains(echo, "Success!") +} + +func (suite *schedulerTestSuite) TestHotRegionSchedulerConfig() { + suite.env.RunTestInTwoModes(suite.checkHotRegionSchedulerConfig) +} + +func (suite *schedulerTestSuite) checkHotRegionSchedulerConfig(cluster *pdTests.TestCluster) { + re := suite.Require() + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 3, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 4, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + } + for _, store := range stores { + pdTests.MustPutStore(re, cluster, store) + } + // note: because pdqsort is an unstable sort algorithm, set ApproximateSize for this region. + pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetApproximateSize(10)) + leaderServer := cluster.GetLeaderServer() // test hot region config - expected1 := map[string]interface{}{ - "min-hot-byte-rate": float64(100), - "min-hot-key-rate": float64(10), - "min-hot-query-rate": float64(10), - "max-zombie-rounds": float64(3), - "max-peer-number": float64(1000), - "byte-rate-rank-step-ratio": 0.05, - "key-rate-rank-step-ratio": 0.05, - "query-rate-rank-step-ratio": 0.05, - "count-rank-step-ratio": 0.01, - "great-dec-ratio": 0.95, - "minor-dec-ratio": 0.99, - "src-tolerance-ratio": 1.05, - "dst-tolerance-ratio": 1.05, - "read-priorities": []interface{}{"byte", "key"}, - "write-leader-priorities": []interface{}{"key", "byte"}, - "write-peer-priorities": []interface{}{"byte", "key"}, - "strict-picking-store": "true", - "enable-for-tiflash": "true", - "rank-formula-version": "v2", - "split-thresholds": 0.2, + expected1 := map[string]any{ + "min-hot-byte-rate": float64(100), + "min-hot-key-rate": float64(10), + "min-hot-query-rate": float64(10), + "src-tolerance-ratio": 1.05, + "dst-tolerance-ratio": 1.05, + "read-priorities": []any{"byte", "key"}, + "write-leader-priorities": []any{"key", "byte"}, + "write-peer-priorities": []any{"byte", "key"}, + "strict-picking-store": "true", + "rank-formula-version": "v2", + "split-thresholds": 0.2, + "history-sample-duration": "5m0s", + "history-sample-interval": "30s", } - checkHotSchedulerConfig := func(expect map[string]interface{}) { + checkHotSchedulerConfig := func(expect map[string]any) { testutil.Eventually(re, func() bool { - var conf1 map[string]interface{} + var conf1 map[string]any mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) return reflect.DeepEqual(expect, conf1) }) } - - var conf map[string]interface{} + // scheduler show command + expected := map[string]bool{ + "balance-region-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(re, cmd, pdAddr, nil, expected) + var conf map[string]any mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "list"}, &conf) re.Equal(expected1, conf) mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "show"}, &conf) re.Equal(expected1, conf) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "src-tolerance-ratio", "1.02"}, nil) + echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "src-tolerance-ratio", "1.02"}, nil) re.Contains(echo, "Success!") expected1["src-tolerance-ratio"] = 1.02 checkHotSchedulerConfig(expected1) echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "byte,key"}, nil) re.Contains(echo, "Success!") - expected1["read-priorities"] = []interface{}{"byte", "key"} + expected1["read-priorities"] = []any{"byte", "key"} checkHotSchedulerConfig(expected1) echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key"}, nil) @@ -454,7 +645,7 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *pdTests.TestCluster) { checkHotSchedulerConfig(expected1) echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,byte"}, nil) re.Contains(echo, "Success!") - expected1["read-priorities"] = []interface{}{"key", "byte"} + expected1["read-priorities"] = []any{"key", "byte"} checkHotSchedulerConfig(expected1) echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "foo,bar"}, nil) re.Contains(echo, "Failed!") @@ -495,6 +686,26 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *pdTests.TestCluster) { re.Contains(echo, "Success!") checkHotSchedulerConfig(expected1) + expected1["history-sample-duration"] = "1m0s" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-duration", "1m"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + + expected1["history-sample-interval"] = "1s" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-interval", "1s"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + + expected1["history-sample-duration"] = "0s" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-duration", "0s"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + + expected1["history-sample-interval"] = "0s" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-interval", "0s"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + // test compatibility re.Equal("2.0.0", leaderServer.GetClusterVersion().String()) for _, store := range stores { @@ -504,134 +715,13 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *pdTests.TestCluster) { pdTests.MustPutStore(re, cluster, store) } re.Equal("5.2.0", leaderServer.GetClusterVersion().String()) - // After upgrading, we should not use query. + // After upgrading, we can use query. + expected1["write-leader-priorities"] = []any{"query", "byte"} checkHotSchedulerConfig(expected1) // cannot set qps as write-peer-priorities echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "write-peer-priorities", "query,byte"}, nil) re.Contains(echo, "query is not allowed to be set in priorities for write-peer-priorities") checkHotSchedulerConfig(expected1) - - // test remove and add - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-hot-region-scheduler"}, nil) - re.Contains(echo, "Success") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-hot-region-scheduler"}, nil) - re.Contains(echo, "Success") - - // test balance leader config - conf = make(map[string]interface{}) - conf1 := make(map[string]interface{}) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler", "show"}, &conf) - re.Equal(4., conf["batch"]) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler", "set", "batch", "3"}, nil) - re.Contains(echo, "Success!") - testutil.Eventually(re, func() bool { - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler"}, &conf1) - return conf1["batch"] == 3. - }) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-leader-scheduler"}, nil) - re.NotContains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-leader-scheduler"}, nil) - re.Contains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-leader-scheduler"}, nil) - re.Contains(echo, "404") - re.Contains(echo, "PD:scheduler:ErrSchedulerNotFound]scheduler not found") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler"}, nil) - re.Contains(echo, "404") - re.Contains(echo, "scheduler not found") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-leader-scheduler"}, nil) - re.Contains(echo, "Success!") - - // test evict-slow-store && evict-slow-trend schedulers config - evictSlownessSchedulers := []string{"evict-slow-store-scheduler", "evict-slow-trend-scheduler"} - for _, schedulerName := range evictSlownessSchedulers { - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", schedulerName}, nil) - if strings.Contains(echo, "Success!") { - re.Contains(echo, "Success!") - } else { - re.Contains(echo, "scheduler existed") - } - testutil.Eventually(re, func() bool { - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) - return strings.Contains(echo, schedulerName) - }) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName, "set", "recovery-duration", "100"}, nil) - re.Contains(echo, "Success! Config updated.") - conf = make(map[string]interface{}) - testutil.Eventually(re, func() bool { - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName, "show"}, &conf) - return conf["recovery-duration"] == 100. - }) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", schedulerName}, nil) - re.Contains(echo, "Success!") - testutil.Eventually(re, func() bool { - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) - return !strings.Contains(echo, schedulerName) - }) - } - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-slow-store-scheduler"}, nil) - re.Contains(echo, "Success!") - - // test shuffle hot region scheduler - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "shuffle-hot-region-scheduler"}, nil) - re.Contains(echo, "Success!") - testutil.Eventually(re, func() bool { - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) - return strings.Contains(echo, "shuffle-hot-region-scheduler") - }) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-hot-region-scheduler", "set", "limit", "127"}, nil) - re.Contains(echo, "Success!") - conf = make(map[string]interface{}) - testutil.Eventually(re, func() bool { - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-hot-region-scheduler", "show"}, &conf) - return conf["limit"] == 127. - }) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "shuffle-hot-region-scheduler"}, nil) - re.Contains(echo, "Success!") - testutil.Eventually(re, func() bool { - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) - return !strings.Contains(echo, "shuffle-hot-region-scheduler") - }) - - // test show scheduler with paused and disabled status. - checkSchedulerWithStatusCommand := func(status string, expected []string) { - testutil.Eventually(re, func() bool { - var schedulers []string - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show", "--status", status}, &schedulers) - return reflect.DeepEqual(expected, schedulers) - }) - } - - mustUsage([]string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler"}) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler", "60"}, nil) - re.Contains(echo, "Success!") - checkSchedulerWithStatusCommand("paused", []string{ - "balance-leader-scheduler", - }) - result := make(map[string]interface{}) - testutil.Eventually(re, func() bool { - mightExec(re, cmd, []string{"-u", pdAddr, "scheduler", "describe", "balance-leader-scheduler"}, &result) - return len(result) != 0 && result["status"] == "paused" && result["summary"] == "" - }, testutil.WithWaitFor(30*time.Second)) - - mustUsage([]string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler", "60"}) - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler"}, nil) - re.Contains(echo, "Success!") - checkSchedulerWithStatusCommand("paused", []string{}) - - // set label scheduler to disabled manually. - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "label-scheduler"}, nil) - re.Contains(echo, "Success!") - cfg := leaderServer.GetServer().GetScheduleConfig() - origin := cfg.Schedulers - cfg.Schedulers = sc.SchedulerConfigs{{Type: "label", Disable: true}} - err := leaderServer.GetServer().SetScheduleConfig(*cfg) - re.NoError(err) - checkSchedulerWithStatusCommand("disabled", []string{"label-scheduler"}) - // reset Schedulers in ScheduleConfig - cfg.Schedulers = origin - err = leaderServer.GetServer().SetScheduleConfig(*cfg) - re.NoError(err) - checkSchedulerWithStatusCommand("disabled", []string{}) } func (suite *schedulerTestSuite) TestSchedulerDiagnostic() { @@ -644,7 +734,7 @@ func (suite *schedulerTestSuite) checkSchedulerDiagnostic(cluster *pdTests.TestC cmd := ctl.GetRootCmd() checkSchedulerDescribeCommand := func(schedulerName, expectedStatus, expectedSummary string) { - result := make(map[string]interface{}) + result := make(map[string]any) testutil.Eventually(re, func() bool { mightExec(re, cmd, []string{"-u", pdAddr, "scheduler", "describe", schedulerName}, &result) return len(result) != 0 && expectedStatus == result["status"] && expectedSummary == result["summary"] @@ -696,7 +786,7 @@ func (suite *schedulerTestSuite) checkSchedulerDiagnostic(cluster *pdTests.TestC checkSchedulerDescribeCommand("balance-leader-scheduler", "normal", "") } -func mustExec(re *require.Assertions, cmd *cobra.Command, args []string, v interface{}) string { +func mustExec(re *require.Assertions, cmd *cobra.Command, args []string, v any) string { output, err := tests.ExecuteCommand(cmd, args...) re.NoError(err) if v == nil { @@ -706,7 +796,7 @@ func mustExec(re *require.Assertions, cmd *cobra.Command, args []string, v inter return "" } -func mightExec(re *require.Assertions, cmd *cobra.Command, args []string, v interface{}) { +func mightExec(re *require.Assertions, cmd *cobra.Command, args []string, v any) { output, err := tests.ExecuteCommand(cmd, args...) re.NoError(err) if v == nil { @@ -714,3 +804,23 @@ func mightExec(re *require.Assertions, cmd *cobra.Command, args []string, v inte } json.Unmarshal(output, v) } + +func checkSchedulerCommand(re *require.Assertions, cmd *cobra.Command, pdAddr string, args []string, expected map[string]bool) { + if args != nil { + echo := mustExec(re, cmd, args, nil) + re.Contains(echo, "Success!") + } + testutil.Eventually(re, func() bool { + var schedulers []string + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, &schedulers) + if len(schedulers) != len(expected) { + return false + } + for _, scheduler := range schedulers { + if _, ok := expected[scheduler]; !ok { + return false + } + } + return true + }) +} diff --git a/tools/pd-ctl/tests/store/store_test.go b/tools/pd-ctl/tests/store/store_test.go index c8103414e9c..afb97401168 100644 --- a/tools/pd-ctl/tests/store/store_test.go +++ b/tools/pd-ctl/tests/store/store_test.go @@ -28,9 +28,9 @@ import ( "github.com/stretchr/testify/require" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/core/storelimit" + "github.com/tikv/pd/pkg/response" "github.com/tikv/pd/pkg/statistics/utils" "github.com/tikv/pd/pkg/utils/grpcutil" - "github.com/tikv/pd/server/api" "github.com/tikv/pd/server/config" pdTests "github.com/tikv/pd/tests" ctl "github.com/tikv/pd/tools/pd-ctl/pdctl" @@ -44,15 +44,16 @@ func TestStore(t *testing.T) { defer cancel() cluster, err := pdTests.NewTestCluster(ctx, 1) re.NoError(err) + defer cluster.Destroy() err = cluster.RunInitialServers() re.NoError(err) cluster.WaitLeader() pdAddr := cluster.GetConfig().GetClientURL() cmd := ctl.GetRootCmd() - stores := []*api.StoreInfo{ + stores := []*response.StoreInfo{ { - Store: &api.MetaStore{ + Store: &response.MetaStore{ Store: &metapb.Store{ Id: 1, State: metapb.StoreState_Up, @@ -63,7 +64,7 @@ func TestStore(t *testing.T) { }, }, { - Store: &api.MetaStore{ + Store: &response.MetaStore{ Store: &metapb.Store{ Id: 3, State: metapb.StoreState_Up, @@ -74,7 +75,7 @@ func TestStore(t *testing.T) { }, }, { - Store: &api.MetaStore{ + Store: &response.MetaStore{ Store: &metapb.Store{ Id: 2, State: metapb.StoreState_Tombstone, @@ -98,7 +99,7 @@ func TestStore(t *testing.T) { args := []string{"-u", pdAddr, "store"} output, err := tests.ExecuteCommand(cmd, args...) re.NoError(err) - storesInfo := new(api.StoresInfo) + storesInfo := new(response.StoresInfo) re.NoError(json.Unmarshal(output, &storesInfo)) tests.CheckStoresInfo(re, storesInfo.Stores, stores[:2]) @@ -108,7 +109,7 @@ func TestStore(t *testing.T) { output, err = tests.ExecuteCommand(cmd, args...) re.NoError(err) re.NotContains(string(output), "\"state\":") - storesInfo = new(api.StoresInfo) + storesInfo = new(response.StoresInfo) re.NoError(json.Unmarshal(output, &storesInfo)) tests.CheckStoresInfo(re, storesInfo.Stores, stores) @@ -117,10 +118,10 @@ func TestStore(t *testing.T) { args = []string{"-u", pdAddr, "store", "1"} output, err = tests.ExecuteCommand(cmd, args...) re.NoError(err) - storeInfo := new(api.StoreInfo) + storeInfo := new(response.StoreInfo) re.NoError(json.Unmarshal(output, &storeInfo)) - tests.CheckStoresInfo(re, []*api.StoreInfo{storeInfo}, stores[:1]) + tests.CheckStoresInfo(re, []*response.StoreInfo{storeInfo}, stores[:1]) re.Nil(storeInfo.Store.Labels) // store label command @@ -169,7 +170,7 @@ func TestStore(t *testing.T) { args = testcase.newArgs } cmd := ctl.GetRootCmd() - storeInfo := new(api.StoreInfo) + storeInfo := new(response.StoreInfo) _, err = tests.ExecuteCommand(cmd, args...) re.NoError(err) args = []string{"-u", pdAddr, "store", "1"} @@ -274,7 +275,7 @@ func TestStore(t *testing.T) { output, err = tests.ExecuteCommand(cmd, args...) re.NoError(err) - allAddPeerLimit := make(map[string]map[string]interface{}) + allAddPeerLimit := make(map[string]map[string]any) json.Unmarshal(output, &allAddPeerLimit) re.Equal(float64(20), allAddPeerLimit["1"]["add-peer"].(float64)) re.Equal(float64(20), allAddPeerLimit["3"]["add-peer"].(float64)) @@ -285,7 +286,7 @@ func TestStore(t *testing.T) { output, err = tests.ExecuteCommand(cmd, args...) re.NoError(err) - allRemovePeerLimit := make(map[string]map[string]interface{}) + allRemovePeerLimit := make(map[string]map[string]any) json.Unmarshal(output, &allRemovePeerLimit) re.Equal(float64(20), allRemovePeerLimit["1"]["remove-peer"].(float64)) re.Equal(float64(25), allRemovePeerLimit["3"]["remove-peer"].(float64)) @@ -312,7 +313,7 @@ func TestStore(t *testing.T) { args = []string{"-u", pdAddr, "store", "1"} output, err = tests.ExecuteCommand(cmd, args...) re.NoError(err) - storeInfo = new(api.StoreInfo) + storeInfo = new(response.StoreInfo) re.NoError(json.Unmarshal(output, &storeInfo)) storeInfo.Store.State = metapb.StoreState(metapb.StoreState_value[storeInfo.Store.StateName]) @@ -366,7 +367,7 @@ func TestStore(t *testing.T) { args = []string{"-u", pdAddr, "store", "1"} output, err = tests.ExecuteCommand(cmd, args...) re.NoError(err) - storeInfo = new(api.StoreInfo) + storeInfo = new(response.StoreInfo) re.NoError(json.Unmarshal(output, &storeInfo)) re.Equal(metapb.StoreState_Up, storeInfo.Store.State) @@ -382,7 +383,7 @@ func TestStore(t *testing.T) { args = []string{"-u", pdAddr, "store", "3"} output, err = tests.ExecuteCommand(cmd, args...) re.NoError(err) - storeInfo = new(api.StoreInfo) + storeInfo = new(response.StoreInfo) re.NoError(json.Unmarshal(output, &storeInfo)) storeInfo.Store.State = metapb.StoreState(metapb.StoreState_value[storeInfo.Store.StateName]) @@ -398,7 +399,7 @@ func TestStore(t *testing.T) { args = []string{"-u", pdAddr, "store", "3"} output, err = tests.ExecuteCommand(cmd, args...) re.NoError(err) - storeInfo = new(api.StoreInfo) + storeInfo = new(response.StoreInfo) re.NoError(json.Unmarshal(output, &storeInfo)) re.Equal(metapb.StoreState_Up, storeInfo.Store.State) @@ -409,7 +410,7 @@ func TestStore(t *testing.T) { args = []string{"-u", pdAddr, "store", "check", "Tombstone"} output, err = tests.ExecuteCommand(cmd, args...) re.NoError(err) - storesInfo = new(api.StoresInfo) + storesInfo = new(response.StoresInfo) re.NoError(json.Unmarshal(output, &storesInfo)) re.Equal(1, storesInfo.Count) @@ -419,7 +420,7 @@ func TestStore(t *testing.T) { args = []string{"-u", pdAddr, "store", "check", "Tombstone"} output, err = tests.ExecuteCommand(cmd, args...) re.NoError(err) - storesInfo = new(api.StoresInfo) + storesInfo = new(response.StoresInfo) re.NoError(json.Unmarshal(output, &storesInfo)) re.Equal(0, storesInfo.Count) @@ -492,15 +493,16 @@ func TestTombstoneStore(t *testing.T) { defer cancel() cluster, err := pdTests.NewTestCluster(ctx, 1) re.NoError(err) + defer cluster.Destroy() err = cluster.RunInitialServers() re.NoError(err) cluster.WaitLeader() pdAddr := cluster.GetConfig().GetClientURL() cmd := ctl.GetRootCmd() - stores := []*api.StoreInfo{ + stores := []*response.StoreInfo{ { - Store: &api.MetaStore{ + Store: &response.MetaStore{ Store: &metapb.Store{ Id: 2, State: metapb.StoreState_Tombstone, @@ -511,7 +513,7 @@ func TestTombstoneStore(t *testing.T) { }, }, { - Store: &api.MetaStore{ + Store: &response.MetaStore{ Store: &metapb.Store{ Id: 3, State: metapb.StoreState_Tombstone, @@ -522,7 +524,7 @@ func TestTombstoneStore(t *testing.T) { }, }, { - Store: &api.MetaStore{ + Store: &response.MetaStore{ Store: &metapb.Store{ Id: 4, State: metapb.StoreState_Tombstone, @@ -579,7 +581,7 @@ func TestStoreTLS(t *testing.T) { CertFile: filepath.Join(certPath, "pd-server.pem"), TrustedCAFile: filepath.Join(certPath, "ca.pem"), } - cluster, err := pdTests.NewTestCluster(ctx, 1, func(conf *config.Config, serverName string) { + cluster, err := pdTests.NewTestCluster(ctx, 1, func(conf *config.Config, _ string) { conf.Security.TLSConfig = grpcutil.TLSConfig{ KeyPath: tlsInfo.KeyFile, CertPath: tlsInfo.CertFile, @@ -592,14 +594,15 @@ func TestStoreTLS(t *testing.T) { conf.InitialCluster = strings.ReplaceAll(conf.InitialCluster, "http", "https") }) re.NoError(err) + defer cluster.Destroy() err = cluster.RunInitialServers() re.NoError(err) cluster.WaitLeader() cmd := ctl.GetRootCmd() - stores := []*api.StoreInfo{ + stores := []*response.StoreInfo{ { - Store: &api.MetaStore{ + Store: &response.MetaStore{ Store: &metapb.Store{ Id: 1, State: metapb.StoreState_Up, @@ -610,7 +613,7 @@ func TestStoreTLS(t *testing.T) { }, }, { - Store: &api.MetaStore{ + Store: &response.MetaStore{ Store: &metapb.Store{ Id: 2, State: metapb.StoreState_Up, @@ -639,7 +642,7 @@ func TestStoreTLS(t *testing.T) { "--key=../cert/client-key.pem"} output, err := tests.ExecuteCommand(cmd, args...) re.NoError(err) - storesInfo := new(api.StoresInfo) + storesInfo := new(response.StoresInfo) re.NoError(json.Unmarshal(output, &storesInfo)) tests.CheckStoresInfo(re, storesInfo.Stores, stores) } diff --git a/tools/pd-ctl/tests/tso/tso_test.go b/tools/pd-ctl/tests/tso/tso_test.go index 77e1bc28e70..63816c40e7a 100644 --- a/tools/pd-ctl/tests/tso/tso_test.go +++ b/tools/pd-ctl/tests/tso/tso_test.go @@ -36,7 +36,7 @@ func TestTSO(t *testing.T) { // tso command ts := "395181938313123110" - args := []string{"-u", "127.0.0.1", "tso", ts} + args := []string{"tso", ts} output, err := tests.ExecuteCommand(cmd, args...) re.NoError(err) tsTime, err := strconv.ParseUint(ts, 10, 64) @@ -46,4 +46,16 @@ func TestTSO(t *testing.T) { physicalTime := time.Unix(int64(physical/1000), int64(physical%1000)*time.Millisecond.Nanoseconds()) str := fmt.Sprintln("system: ", physicalTime) + fmt.Sprintln("logic: ", logicalTime) re.Equal(string(output), str) + + // test with invalid address + args = []string{"-u", "127.0.0.1", "tso", ts} + output, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + tsTime, err = strconv.ParseUint(ts, 10, 64) + re.NoError(err) + logicalTime = tsTime & logicalBits + physical = tsTime >> physicalShiftBits + physicalTime = time.Unix(int64(physical/1000), int64(physical%1000)*time.Millisecond.Nanoseconds()) + str = fmt.Sprintln("system: ", physicalTime) + fmt.Sprintln("logic: ", logicalTime) + re.Equal(string(output), str) } diff --git a/tools/pd-ctl/tests/unsafe/unsafe_operation_test.go b/tools/pd-ctl/tests/unsafe/unsafe_operation_test.go index 3b52de3d66a..26fb4fc46e4 100644 --- a/tools/pd-ctl/tests/unsafe/unsafe_operation_test.go +++ b/tools/pd-ctl/tests/unsafe/unsafe_operation_test.go @@ -30,6 +30,7 @@ func TestRemoveFailedStores(t *testing.T) { defer cancel() cluster, err := pdTests.NewTestCluster(ctx, 1) re.NoError(err) + defer cluster.Destroy() err = cluster.RunInitialServers() re.NoError(err) cluster.WaitLeader() diff --git a/tools/pd-heartbeat-bench/config-template.toml b/tools/pd-heartbeat-bench/config-template.toml index d2a0fa844fe..73917c1425a 100644 --- a/tools/pd-heartbeat-bench/config-template.toml +++ b/tools/pd-heartbeat-bench/config-template.toml @@ -3,13 +3,12 @@ round = 0 store-count = 100 region-count = 2000000 -key-length = 56 replica = 3 leader-update-ratio = 0.06 -epoch-update-ratio = 0.04 -space-update-ratio = 0.15 -flow-update-ratio = 0.35 +epoch-update-ratio = 0.0 +space-update-ratio = 0.0 +flow-update-ratio = 0.0 no-update-ratio = 0.0 sample = false diff --git a/tools/pd-heartbeat-bench/config/config.go b/tools/pd-heartbeat-bench/config/config.go index 74c8159ced9..12455d78658 100644 --- a/tools/pd-heartbeat-bench/config/config.go +++ b/tools/pd-heartbeat-bench/config/config.go @@ -1,7 +1,6 @@ package config import ( - "math" "sync/atomic" "github.com/BurntSushi/toml" @@ -15,15 +14,16 @@ import ( const ( defaultStoreCount = 50 defaultRegionCount = 1000000 - defaultKeyLength = 56 + defaultHotStoreCount = 0 defaultReplica = 3 defaultLeaderUpdateRatio = 0.06 - defaultEpochUpdateRatio = 0.04 - defaultSpaceUpdateRatio = 0.15 - defaultFlowUpdateRatio = 0.35 - defaultNoUpdateRatio = 0 + defaultEpochUpdateRatio = 0.0 + defaultSpaceUpdateRatio = 0.0 + defaultFlowUpdateRatio = 0.0 + defaultReportRatio = 1 defaultRound = 0 defaultSample = false + defaultInitialVersion = 1 defaultLogFormat = "text" ) @@ -39,15 +39,18 @@ type Config struct { Logger *zap.Logger LogProps *log.ZapProperties + Security configutil.SecurityConfig `toml:"security" json:"security"` + + InitEpochVer uint64 `toml:"epoch-ver" json:"epoch-ver"` StoreCount int `toml:"store-count" json:"store-count"` + HotStoreCount int `toml:"hot-store-count" json:"hot-store-count"` RegionCount int `toml:"region-count" json:"region-count"` - KeyLength int `toml:"key-length" json:"key-length"` Replica int `toml:"replica" json:"replica"` LeaderUpdateRatio float64 `toml:"leader-update-ratio" json:"leader-update-ratio"` EpochUpdateRatio float64 `toml:"epoch-update-ratio" json:"epoch-update-ratio"` SpaceUpdateRatio float64 `toml:"space-update-ratio" json:"space-update-ratio"` FlowUpdateRatio float64 `toml:"flow-update-ratio" json:"flow-update-ratio"` - NoUpdateRatio float64 `toml:"no-update-ratio" json:"no-update-ratio"` + ReportRatio float64 `toml:"report-ratio" json:"report-ratio"` Sample bool `toml:"sample" json:"sample"` Round int `toml:"round" json:"round"` } @@ -62,6 +65,10 @@ func NewConfig() *Config { fs.StringVar(&cfg.PDAddr, "pd-endpoints", "127.0.0.1:2379", "pd address") fs.StringVar(&cfg.Log.File.Filename, "log-file", "", "log file path") fs.StringVar(&cfg.StatusAddr, "status-addr", "127.0.0.1:20180", "status address") + fs.StringVar(&cfg.Security.CAPath, "cacert", "", "path of file that contains list of trusted TLS CAs") + fs.StringVar(&cfg.Security.CertPath, "cert", "", "path of file that contains X509 certificate in PEM format") + fs.StringVar(&cfg.Security.KeyPath, "key", "", "path of file that contains X509 key in PEM format") + fs.Uint64Var(&cfg.InitEpochVer, "epoch-ver", 1, "the initial epoch version value") return cfg } @@ -113,10 +120,9 @@ func (c *Config) Adjust(meta *toml.MetaData) { configutil.AdjustInt(&c.RegionCount, defaultRegionCount) } - if !meta.IsDefined("key-length") { - configutil.AdjustInt(&c.KeyLength, defaultKeyLength) + if !meta.IsDefined("hot-store-count") { + configutil.AdjustInt(&c.HotStoreCount, defaultHotStoreCount) } - if !meta.IsDefined("replica") { configutil.AdjustInt(&c.Replica, defaultReplica) } @@ -133,34 +139,36 @@ func (c *Config) Adjust(meta *toml.MetaData) { if !meta.IsDefined("flow-update-ratio") { configutil.AdjustFloat64(&c.FlowUpdateRatio, defaultFlowUpdateRatio) } - if !meta.IsDefined("no-update-ratio") { - configutil.AdjustFloat64(&c.NoUpdateRatio, defaultNoUpdateRatio) + if !meta.IsDefined("report-ratio") { + configutil.AdjustFloat64(&c.ReportRatio, defaultReportRatio) } if !meta.IsDefined("sample") { c.Sample = defaultSample } + if !meta.IsDefined("epoch-ver") { + c.InitEpochVer = defaultInitialVersion + } } // Validate is used to validate configurations func (c *Config) Validate() error { - if c.LeaderUpdateRatio < 0 || c.LeaderUpdateRatio > 1 { - return errors.Errorf("leader-update-ratio must be in [0, 1]") + if c.HotStoreCount < 0 || c.HotStoreCount > c.StoreCount { + return errors.Errorf("hot-store-count must be in [0, store-count]") } - if c.EpochUpdateRatio < 0 || c.EpochUpdateRatio > 1 { - return errors.Errorf("epoch-update-ratio must be in [0, 1]") + if c.ReportRatio < 0 || c.ReportRatio > 1 { + return errors.Errorf("report-ratio must be in [0, 1]") } - if c.SpaceUpdateRatio < 0 || c.SpaceUpdateRatio > 1 { - return errors.Errorf("space-update-ratio must be in [0, 1]") + if c.LeaderUpdateRatio > c.ReportRatio || c.LeaderUpdateRatio < 0 { + return errors.Errorf("leader-update-ratio can not be negative or larger than report-ratio") } - if c.FlowUpdateRatio < 0 || c.FlowUpdateRatio > 1 { - return errors.Errorf("flow-update-ratio must be in [0, 1]") + if c.EpochUpdateRatio > c.ReportRatio || c.EpochUpdateRatio < 0 { + return errors.Errorf("epoch-update-ratio can not be negative or larger than report-ratio") } - if c.NoUpdateRatio < 0 || c.NoUpdateRatio > 1 { - return errors.Errorf("no-update-ratio must be in [0, 1]") + if c.SpaceUpdateRatio > c.ReportRatio || c.SpaceUpdateRatio < 0 { + return errors.Errorf("space-update-ratio can not be negative or larger than report-ratio") } - max := math.Max(c.LeaderUpdateRatio, math.Max(c.EpochUpdateRatio, math.Max(c.SpaceUpdateRatio, c.FlowUpdateRatio))) - if max+c.NoUpdateRatio > 1 { - return errors.Errorf("sum of update-ratio must be in [0, 1]") + if c.FlowUpdateRatio > c.ReportRatio || c.FlowUpdateRatio < 0 { + return errors.Errorf("flow-update-ratio can not be negative or larger than report-ratio") } return nil } @@ -174,24 +182,32 @@ func (c *Config) Clone() *Config { // Options is the option of the heartbeat-bench. type Options struct { + HotStoreCount atomic.Value + ReportRatio atomic.Value + LeaderUpdateRatio atomic.Value EpochUpdateRatio atomic.Value SpaceUpdateRatio atomic.Value FlowUpdateRatio atomic.Value - NoUpdateRatio atomic.Value } // NewOptions creates a new option. func NewOptions(cfg *Config) *Options { o := &Options{} + o.HotStoreCount.Store(cfg.HotStoreCount) o.LeaderUpdateRatio.Store(cfg.LeaderUpdateRatio) o.EpochUpdateRatio.Store(cfg.EpochUpdateRatio) o.SpaceUpdateRatio.Store(cfg.SpaceUpdateRatio) o.FlowUpdateRatio.Store(cfg.FlowUpdateRatio) - o.NoUpdateRatio.Store(cfg.NoUpdateRatio) + o.ReportRatio.Store(cfg.ReportRatio) return o } +// GetHotStoreCount returns the hot store count. +func (o *Options) GetHotStoreCount() int { + return o.HotStoreCount.Load().(int) +} + // GetLeaderUpdateRatio returns the leader update ratio. func (o *Options) GetLeaderUpdateRatio() float64 { return o.LeaderUpdateRatio.Load().(float64) @@ -212,16 +228,17 @@ func (o *Options) GetFlowUpdateRatio() float64 { return o.FlowUpdateRatio.Load().(float64) } -// GetNoUpdateRatio returns the no update ratio. -func (o *Options) GetNoUpdateRatio() float64 { - return o.NoUpdateRatio.Load().(float64) +// GetReportRatio returns the report ratio. +func (o *Options) GetReportRatio() float64 { + return o.ReportRatio.Load().(float64) } // SetOptions sets the option. func (o *Options) SetOptions(cfg *Config) { + o.HotStoreCount.Store(cfg.HotStoreCount) o.LeaderUpdateRatio.Store(cfg.LeaderUpdateRatio) o.EpochUpdateRatio.Store(cfg.EpochUpdateRatio) o.SpaceUpdateRatio.Store(cfg.SpaceUpdateRatio) o.FlowUpdateRatio.Store(cfg.FlowUpdateRatio) - o.NoUpdateRatio.Store(cfg.NoUpdateRatio) + o.ReportRatio.Store(cfg.ReportRatio) } diff --git a/tools/pd-heartbeat-bench/main.go b/tools/pd-heartbeat-bench/main.go index 0e1af0de9ca..ec5e2506e6b 100644 --- a/tools/pd-heartbeat-bench/main.go +++ b/tools/pd-heartbeat-bench/main.go @@ -16,13 +16,13 @@ package main import ( "context" + "crypto/tls" "fmt" "io" "math/rand" "net/http" "os" "os/signal" - "strings" "sync" "sync/atomic" "syscall" @@ -38,38 +38,45 @@ import ( "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" "github.com/spf13/pflag" + "github.com/tikv/pd/client/grpcutil" + pdHttp "github.com/tikv/pd/client/http" + "github.com/tikv/pd/client/tlsutil" + "github.com/tikv/pd/pkg/codec" "github.com/tikv/pd/pkg/mcs/utils" + "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/tools/pd-heartbeat-bench/config" "go.etcd.io/etcd/pkg/report" "go.uber.org/zap" - "google.golang.org/grpc" ) const ( - bytesUnit = 8 * units.MiB - keysUint = 8 * units.KiB - queryUnit = 1 * units.KiB + bytesUnit = 128 + keysUint = 8 + queryUnit = 8 + hotByteUnit = 16 * units.KiB + hotKeysUint = 256 + hotQueryUnit = 256 regionReportInterval = 60 // 60s storeReportInterval = 10 // 10s capacity = 4 * units.TiB ) -var clusterID uint64 - -func trimHTTPPrefix(str string) string { - str = strings.TrimPrefix(str, "http://") - str = strings.TrimPrefix(str, "https://") - return str -} +var ( + clusterID uint64 + maxVersion uint64 = 1 +) -func newClient(cfg *config.Config) pdpb.PDClient { - addr := trimHTTPPrefix(cfg.PDAddr) - cc, err := grpc.Dial(addr, grpc.WithInsecure()) +func newClient(ctx context.Context, cfg *config.Config) (pdpb.PDClient, error) { + tlsConfig, err := cfg.Security.ToTLSConfig() + if err != nil { + return nil, err + } + cc, err := grpcutil.GetClientConn(ctx, cfg.PDAddr, tlsConfig) if err != nil { - log.Fatal("failed to create gRPC connection", zap.Error(err)) + return nil, err } - return pdpb.NewPDClient(cc) + return pdpb.NewPDClient(cc), nil } func initClusterID(ctx context.Context, cli pdpb.PDClient) { @@ -172,18 +179,6 @@ func putStores(ctx context.Context, cfg *config.Config, cli pdpb.PDClient, store } } -func newStartKey(id uint64, keyLen int) []byte { - k := make([]byte, keyLen) - copy(k, fmt.Sprintf("%010d", id)) - return k -} - -func newEndKey(id uint64, keyLen int) []byte { - k := newStartKey(id, keyLen) - k[len(k)-1]++ - return k -} - // Regions simulates all regions to heartbeat. type Regions struct { regions []*pdpb.RegionHeartbeatRequest @@ -197,7 +192,7 @@ type Regions struct { updateFlow []int } -func (rs *Regions) init(cfg *config.Config, options *config.Options) []int { +func (rs *Regions) init(cfg *config.Config) { rs.regions = make([]*pdpb.RegionHeartbeatRequest, 0, cfg.RegionCount) rs.updateRound = 0 @@ -205,15 +200,14 @@ func (rs *Regions) init(cfg *config.Config, options *config.Options) []int { id := uint64(1) now := uint64(time.Now().Unix()) - keyLen := cfg.KeyLength for i := 0; i < cfg.RegionCount; i++ { region := &pdpb.RegionHeartbeatRequest{ Header: header(), Region: &metapb.Region{ Id: id, - StartKey: newStartKey(id, keyLen), - EndKey: newEndKey(id, keyLen), - RegionEpoch: &metapb.RegionEpoch{ConfVer: 2, Version: 1}, + StartKey: codec.GenerateTableKey(int64(i)), + EndKey: codec.GenerateTableKey(int64(i + 1)), + RegionEpoch: &metapb.RegionEpoch{ConfVer: 2, Version: maxVersion}, }, ApproximateSize: bytesUnit, Interval: &pdpb.TimeInterval{ @@ -242,75 +236,97 @@ func (rs *Regions) init(cfg *config.Config, options *config.Options) []int { region.Leader = peers[0] rs.regions = append(rs.regions, region) } +} + +func (rs *Regions) update(cfg *config.Config, options *config.Options) { + rs.updateRound += 1 // Generate sample index indexes := make([]int, cfg.RegionCount) for i := range indexes { indexes[i] = i } + reportRegions := pick(indexes, cfg.RegionCount, options.GetReportRatio()) - return indexes -} - -func (rs *Regions) update(cfg *config.Config, options *config.Options, indexes []int) { - rs.updateRound += 1 - - rs.updateLeader = pick(indexes, cfg, options.GetLeaderUpdateRatio()) - rs.updateEpoch = pick(indexes, cfg, options.GetEpochUpdateRatio()) - rs.updateSpace = pick(indexes, cfg, options.GetSpaceUpdateRatio()) - rs.updateFlow = pick(indexes, cfg, options.GetFlowUpdateRatio()) - updatedRegionsMap := make(map[int]*pdpb.RegionHeartbeatRequest) - var awakenRegions []*pdpb.RegionHeartbeatRequest + reportCount := len(reportRegions) + rs.updateFlow = pick(reportRegions, reportCount, options.GetFlowUpdateRatio()) + rs.updateLeader = randomPick(reportRegions, reportCount, options.GetLeaderUpdateRatio()) + rs.updateEpoch = randomPick(reportRegions, reportCount, options.GetEpochUpdateRatio()) + rs.updateSpace = randomPick(reportRegions, reportCount, options.GetSpaceUpdateRatio()) + var ( + updatedStatisticsMap = make(map[int]*pdpb.RegionHeartbeatRequest) + awakenRegions []*pdpb.RegionHeartbeatRequest + ) // update leader for _, i := range rs.updateLeader { region := rs.regions[i] region.Leader = region.Region.Peers[rs.updateRound%cfg.Replica] - updatedRegionsMap[i] = region } // update epoch for _, i := range rs.updateEpoch { region := rs.regions[i] region.Region.RegionEpoch.Version += 1 - updatedRegionsMap[i] = region + if region.Region.RegionEpoch.Version > maxVersion { + maxVersion = region.Region.RegionEpoch.Version + } } // update space for _, i := range rs.updateSpace { region := rs.regions[i] region.ApproximateSize = uint64(bytesUnit * rand.Float64()) region.ApproximateKeys = uint64(keysUint * rand.Float64()) - updatedRegionsMap[i] = region } // update flow for _, i := range rs.updateFlow { region := rs.regions[i] - region.BytesWritten = uint64(bytesUnit * rand.Float64()) - region.BytesRead = uint64(bytesUnit * rand.Float64()) - region.KeysWritten = uint64(keysUint * rand.Float64()) - region.KeysRead = uint64(keysUint * rand.Float64()) - region.QueryStats = &pdpb.QueryStats{ - Get: uint64(queryUnit * rand.Float64()), - Put: uint64(queryUnit * rand.Float64()), + if region.Leader.StoreId <= uint64(options.GetHotStoreCount()) { + region.BytesWritten = uint64(hotByteUnit * (1 + rand.Float64()) * 60) + region.BytesRead = uint64(hotByteUnit * (1 + rand.Float64()) * 10) + region.KeysWritten = uint64(hotKeysUint * (1 + rand.Float64()) * 60) + region.KeysRead = uint64(hotKeysUint * (1 + rand.Float64()) * 10) + region.QueryStats = &pdpb.QueryStats{ + Get: uint64(hotQueryUnit * (1 + rand.Float64()) * 10), + Put: uint64(hotQueryUnit * (1 + rand.Float64()) * 60), + } + } else { + region.BytesWritten = uint64(bytesUnit * rand.Float64()) + region.BytesRead = uint64(bytesUnit * rand.Float64()) + region.KeysWritten = uint64(keysUint * rand.Float64()) + region.KeysRead = uint64(keysUint * rand.Float64()) + region.QueryStats = &pdpb.QueryStats{ + Get: uint64(queryUnit * rand.Float64()), + Put: uint64(queryUnit * rand.Float64()), + } } - updatedRegionsMap[i] = region + updatedStatisticsMap[i] = region } // update interval for _, region := range rs.regions { region.Interval.StartTimestamp = region.Interval.EndTimestamp region.Interval.EndTimestamp = region.Interval.StartTimestamp + regionReportInterval } - for _, region := range updatedRegionsMap { + for _, i := range reportRegions { + region := rs.regions[i] + // reset the statistics of the region which is not updated + if _, exist := updatedStatisticsMap[i]; !exist { + region.BytesWritten = 0 + region.BytesRead = 0 + region.KeysWritten = 0 + region.KeysRead = 0 + region.QueryStats = &pdpb.QueryStats{} + } awakenRegions = append(awakenRegions, region) } - noUpdatedRegions := pickNoUpdatedRegions(indexes, cfg, options.GetNoUpdateRatio(), updatedRegionsMap) - for _, i := range noUpdatedRegions { - awakenRegions = append(awakenRegions, rs.regions[i]) - } + rs.awakenRegions.Store(awakenRegions) } -func createHeartbeatStream(ctx context.Context, cfg *config.Config) pdpb.PD_RegionHeartbeatClient { - cli := newClient(cfg) +func createHeartbeatStream(ctx context.Context, cfg *config.Config) (pdpb.PDClient, pdpb.PD_RegionHeartbeatClient) { + cli, err := newClient(ctx, cfg) + if err != nil { + log.Fatal("create client error", zap.Error(err)) + } stream, err := cli.RegionHeartbeat(ctx) if err != nil { log.Fatal("create stream error", zap.Error(err)) @@ -322,7 +338,7 @@ func createHeartbeatStream(ctx context.Context, cfg *config.Config) pdpb.PD_Regi stream.Recv() } }() - return stream + return cli, stream } func (rs *Regions) handleRegionHeartbeat(wg *sync.WaitGroup, stream pdpb.PD_RegionHeartbeatClient, storeID uint64, rep report.Report) { @@ -359,7 +375,7 @@ func (rs *Regions) handleRegionHeartbeat(wg *sync.WaitGroup, stream pdpb.PD_Regi return } } - log.Info("store finish one round region heartbeat", zap.Uint64("store-id", storeID), zap.Duration("cost-time", time.Since(start))) + log.Info("store finish one round region heartbeat", zap.Uint64("store-id", storeID), zap.Duration("cost-time", time.Since(start)), zap.Int("reported-region-count", len(regions))) } // Stores contains store stats with lock. @@ -395,7 +411,14 @@ func (s *Stores) update(rs *Regions) { }, } } - for _, region := range rs.regions { + var toUpdate []*pdpb.RegionHeartbeatRequest + updatedRegions := rs.awakenRegions.Load() + if updatedRegions == nil { + toUpdate = rs.regions + } else { + toUpdate = updatedRegions.([]*pdpb.RegionHeartbeatRequest) + } + for _, region := range toUpdate { for _, peer := range region.Region.Peers { store := stats[peer.StoreId] store.UsedSize += region.ApproximateSize @@ -425,32 +448,20 @@ func (s *Stores) update(rs *Regions) { } } -func pick(slice []int, cfg *config.Config, ratio float64) []int { - rand.Shuffle(cfg.RegionCount, func(i, j int) { +func randomPick(slice []int, total int, ratio float64) []int { + rand.Shuffle(total, func(i, j int) { slice[i], slice[j] = slice[j], slice[i] }) - return append(slice[:0:0], slice[0:int(float64(cfg.RegionCount)*ratio)]...) + return append(slice[:0:0], slice[0:int(float64(total)*ratio)]...) } -func pickNoUpdatedRegions(slice []int, cfg *config.Config, ratio float64, updatedMap map[int]*pdpb.RegionHeartbeatRequest) []int { - if ratio == 0 { - return nil - } - rand.Shuffle(cfg.RegionCount, func(i, j int) { - slice[i], slice[j] = slice[j], slice[i] - }) - NoUpdatedRegionsNum := int(float64(cfg.RegionCount) * ratio) - res := make([]int, 0, NoUpdatedRegionsNum) - for i := 0; len(res) < NoUpdatedRegionsNum; i++ { - if _, ok := updatedMap[slice[i]]; !ok { - res = append(res, slice[i]) - } - } - return res +func pick(slice []int, total int, ratio float64) []int { + return append(slice[:0:0], slice[0:int(float64(total)*ratio)]...) } func main() { rand.New(rand.NewSource(0)) // Ensure consistent behavior multiple times + statistics.Denoising = false cfg := config.NewConfig() err := cfg.Parse(os.Args[1:]) defer logutil.LogPanic() @@ -471,6 +482,7 @@ func main() { log.Fatal("initialize logger error", zap.Error(err)) } + maxVersion = cfg.InitEpochVer options := config.NewOptions(cfg) // let PD have enough time to start time.Sleep(5 * time.Second) @@ -487,23 +499,35 @@ func main() { sig = <-sc cancel() }() - cli := newClient(cfg) + cli, err := newClient(ctx, cfg) + if err != nil { + log.Fatal("create client error", zap.Error(err)) + } + initClusterID(ctx, cli) go runHTTPServer(cfg, options) regions := new(Regions) - indexes := regions.init(cfg, options) + regions.init(cfg) log.Info("finish init regions") stores := newStores(cfg.StoreCount) stores.update(regions) bootstrap(ctx, cli) putStores(ctx, cfg, cli, stores) log.Info("finish put stores") + clis := make(map[uint64]pdpb.PDClient, cfg.StoreCount) + httpCli := pdHttp.NewClient("tools-heartbeat-bench", []string{cfg.PDAddr}, pdHttp.WithTLSConfig(loadTLSConfig(cfg))) + go deleteOperators(ctx, httpCli) streams := make(map[uint64]pdpb.PD_RegionHeartbeatClient, cfg.StoreCount) for i := 1; i <= cfg.StoreCount; i++ { - streams[uint64(i)] = createHeartbeatStream(ctx, cfg) + clis[uint64(i)], streams[uint64(i)] = createHeartbeatStream(ctx, cfg) + } + header := &pdpb.RequestHeader{ + ClusterId: clusterID, } var heartbeatTicker = time.NewTicker(regionReportInterval * time.Second) defer heartbeatTicker.Stop() + var resolvedTSTicker = time.NewTicker(time.Second) + defer resolvedTSTicker.Stop() for { select { case <-heartbeatTicker.C: @@ -532,10 +556,31 @@ func main() { zap.String("average", fmt.Sprintf("%.4fs", stats.Average)), zap.String("stddev", fmt.Sprintf("%.4fs", stats.Stddev)), zap.String("rps", fmt.Sprintf("%.4f", stats.RPS)), + zap.Uint64("max-epoch-version", maxVersion), ) log.Info("store heartbeat stats", zap.String("max", fmt.Sprintf("%.4fs", since))) - regions.update(cfg, options, indexes) + regions.update(cfg, options) go stores.update(regions) // update stores in background, unusually region heartbeat is slower than store update. + case <-resolvedTSTicker.C: + wg := &sync.WaitGroup{} + for i := 1; i <= cfg.StoreCount; i++ { + id := uint64(i) + wg.Add(1) + go func(wg *sync.WaitGroup, id uint64) { + defer wg.Done() + cli := clis[id] + _, err := cli.ReportMinResolvedTS(ctx, &pdpb.ReportMinResolvedTsRequest{ + Header: header, + StoreId: id, + MinResolvedTs: uint64(time.Now().Unix()), + }) + if err != nil { + log.Error("send resolved TS error", zap.Uint64("store-id", id), zap.Error(err)) + return + } + }(wg, id) + } + wg.Wait() case <-ctx.Done(): log.Info("got signal to exit") switch sig { @@ -552,6 +597,22 @@ func exit(code int) { os.Exit(code) } +func deleteOperators(ctx context.Context, httpCli pdHttp.Client) { + ticker := time.NewTicker(30 * time.Second) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + err := httpCli.DeleteOperators(ctx) + if err != nil { + log.Error("fail to delete operators", zap.Error(err)) + } + } + } +} + func newReport(cfg *config.Config) report.Report { p := "%4.4f" if cfg.Sample { @@ -600,11 +661,12 @@ func runHTTPServer(cfg *config.Config, options *config.Options) { pprof.Register(engine) engine.PUT("config", func(c *gin.Context) { newCfg := cfg.Clone() + newCfg.HotStoreCount = options.GetHotStoreCount() newCfg.FlowUpdateRatio = options.GetFlowUpdateRatio() newCfg.LeaderUpdateRatio = options.GetLeaderUpdateRatio() newCfg.EpochUpdateRatio = options.GetEpochUpdateRatio() newCfg.SpaceUpdateRatio = options.GetSpaceUpdateRatio() - newCfg.NoUpdateRatio = options.GetNoUpdateRatio() + newCfg.ReportRatio = options.GetReportRatio() if err := c.BindJSON(&newCfg); err != nil { c.String(http.StatusBadRequest, err.Error()) return @@ -618,13 +680,43 @@ func runHTTPServer(cfg *config.Config, options *config.Options) { }) engine.GET("config", func(c *gin.Context) { output := cfg.Clone() + output.HotStoreCount = options.GetHotStoreCount() output.FlowUpdateRatio = options.GetFlowUpdateRatio() output.LeaderUpdateRatio = options.GetLeaderUpdateRatio() output.EpochUpdateRatio = options.GetEpochUpdateRatio() output.SpaceUpdateRatio = options.GetSpaceUpdateRatio() - output.NoUpdateRatio = options.GetNoUpdateRatio() + output.ReportRatio = options.GetReportRatio() c.IndentedJSON(http.StatusOK, output) }) engine.Run(cfg.StatusAddr) } + +func loadTLSConfig(cfg *config.Config) *tls.Config { + if len(cfg.Security.CAPath) == 0 { + return nil + } + caData, err := os.ReadFile(cfg.Security.CAPath) + if err != nil { + log.Error("fail to read ca file", zap.Error(err)) + } + certData, err := os.ReadFile(cfg.Security.CertPath) + if err != nil { + log.Error("fail to read cert file", zap.Error(err)) + } + keyData, err := os.ReadFile(cfg.Security.KeyPath) + if err != nil { + log.Error("fail to read key file", zap.Error(err)) + } + + tlsConf, err := tlsutil.TLSConfig{ + SSLCABytes: caData, + SSLCertBytes: certData, + SSLKEYBytes: keyData, + }.ToTLSConfig() + if err != nil { + log.Fatal("failed to load tlc config", zap.Error(err)) + } + + return tlsConf +} diff --git a/tools/pd-recover/main.go b/tools/pd-recover/main.go index 375a9398a4f..9b5d08013db 100644 --- a/tools/pd-recover/main.go +++ b/tools/pd-recover/main.go @@ -62,7 +62,7 @@ func main() { fs.BoolVar(&v, "V", false, "print version information") fs.BoolVar(&fromOldMember, "from-old-member", false, "recover from a member of an existing cluster") fs.StringVar(&endpoints, "endpoints", "http://127.0.0.1:2379", "endpoints urls") - fs.Uint64Var(&allocID, "alloc-id", 0, "please make sure alloced ID is safe") + fs.Uint64Var(&allocID, "alloc-id", 0, "please make sure allocated ID is safe") fs.Uint64Var(&clusterID, "cluster-id", 0, "please make cluster ID match with tikv") fs.StringVar(&caPath, "cacert", "", "path of file that contains list of trusted SSL CAs") fs.StringVar(&certPath, "cert", "", "path of file that contains list of trusted SSL CAs") diff --git a/tools/pd-simulator/main.go b/tools/pd-simulator/main.go index 5d781757b39..12254c1a947 100644 --- a/tools/pd-simulator/main.go +++ b/tools/pd-simulator/main.go @@ -17,8 +17,6 @@ package main import ( "context" "fmt" - "net/http" - "net/http/pprof" "os" "os/signal" "syscall" @@ -26,7 +24,6 @@ import ( "github.com/BurntSushi/toml" "github.com/pingcap/log" - "github.com/prometheus/client_golang/prometheus/promhttp" flag "github.com/spf13/pflag" "github.com/tikv/pd/pkg/schedule/schedulers" "github.com/tikv/pd/pkg/statistics" @@ -38,21 +35,19 @@ import ( "github.com/tikv/pd/tools/pd-analysis/analysis" "github.com/tikv/pd/tools/pd-simulator/simulator" "github.com/tikv/pd/tools/pd-simulator/simulator/cases" + sc "github.com/tikv/pd/tools/pd-simulator/simulator/config" "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" "go.uber.org/zap" ) var ( - pdAddr = flag.String("pd-endpoints", "", "pd address") - configFile = flag.String("config", "conf/simconfig.toml", "config file") - caseName = flag.String("case", "", "case name") - serverLogLevel = flag.String("serverLog", "info", "pd server log level") - simLogLevel = flag.String("simLog", "info", "simulator log level") - simLogFile = flag.String("log-file", "", "simulator log file") - regionNum = flag.Int("regionNum", 0, "regionNum of one store") - storeNum = flag.Int("storeNum", 0, "storeNum") - enableTransferRegionCounter = flag.Bool("enableTransferRegionCounter", false, "enableTransferRegionCounter") - statusAddress = flag.String("status-addr", "0.0.0.0:20180", "status address") + pdAddr = flag.String("pd-endpoints", "", "pd address") + configFile = flag.String("config", "conf/simconfig.toml", "config file") + caseName = flag.String("case", "", "case name") + serverLogLevel = flag.String("serverLog", "info", "pd server log level") + simLogLevel = flag.String("simLog", "info", "simulator log level") + simLogFile = flag.String("log-file", "", "simulator log file") + statusAddress = flag.String("status-addr", "0.0.0.0:20180", "status address") ) func main() { @@ -63,14 +58,12 @@ func main() { flag.Parse() simutil.InitLogger(*simLogLevel, *simLogFile) - simutil.InitCaseConfig(*storeNum, *regionNum, *enableTransferRegionCounter) statistics.Denoising = false - if simutil.CaseConfigure.EnableTransferRegionCounter { - analysis.GetTransferCounter().Init(simutil.CaseConfigure.StoreNum, simutil.CaseConfigure.RegionNum) - } - schedulers.Register() // register schedulers, which is needed by simConfig.Adjust - simConfig := simulator.NewSimConfig(*serverLogLevel) + simConfig := sc.NewSimConfig(*serverLogLevel) + if simConfig.EnableTransferRegionCounter { + analysis.GetTransferCounter().Init(simConfig.TotalStore, simConfig.TotalRegion) + } var meta toml.MetaData var err error if *configFile != "" { @@ -97,10 +90,9 @@ func main() { } } -func run(simCase string, simConfig *simulator.SimConfig) { +func run(simCase string, simConfig *sc.SimConfig) { if *pdAddr != "" { - go runHTTPServer() - simStart(*pdAddr, simCase, simConfig) + simStart(*pdAddr, *statusAddress, simCase, simConfig) } else { local, clean := NewSingleServer(context.Background(), simConfig) err := local.Run() @@ -113,27 +105,12 @@ func run(simCase string, simConfig *simulator.SimConfig) { } time.Sleep(100 * time.Millisecond) } - simStart(local.GetAddr(), simCase, simConfig, clean) + simStart(local.GetAddr(), "", simCase, simConfig, clean) } } -func runHTTPServer() { - http.Handle("/metrics", promhttp.Handler()) - // profile API - http.HandleFunc("/pprof/profile", pprof.Profile) - http.HandleFunc("/pprof/trace", pprof.Trace) - http.HandleFunc("/pprof/symbol", pprof.Symbol) - http.Handle("/pprof/heap", pprof.Handler("heap")) - http.Handle("/pprof/mutex", pprof.Handler("mutex")) - http.Handle("/pprof/allocs", pprof.Handler("allocs")) - http.Handle("/pprof/block", pprof.Handler("block")) - http.Handle("/pprof/goroutine", pprof.Handler("goroutine")) - // nolint - http.ListenAndServe(*statusAddress, nil) -} - // NewSingleServer creates a pd server for simulator. -func NewSingleServer(ctx context.Context, simConfig *simulator.SimConfig) (*server.Server, testutil.CleanupFunc) { +func NewSingleServer(ctx context.Context, simConfig *sc.SimConfig) (*server.Server, testutil.CleanupFunc) { err := logutil.SetupLogger(simConfig.ServerConfig.Log, &simConfig.ServerConfig.Logger, &simConfig.ServerConfig.LogProps) if err == nil { log.ReplaceGlobals(simConfig.ServerConfig.Logger, simConfig.ServerConfig.LogProps) @@ -158,9 +135,9 @@ func cleanServer(cfg *config.Config) { os.RemoveAll(cfg.DataDir) } -func simStart(pdAddr string, simCase string, simConfig *simulator.SimConfig, clean ...testutil.CleanupFunc) { +func simStart(pdAddr, statusAddress string, simCase string, simConfig *sc.SimConfig, clean ...testutil.CleanupFunc) { start := time.Now() - driver, err := simulator.NewDriver(pdAddr, simCase, simConfig) + driver, err := simulator.NewDriver(pdAddr, statusAddress, simCase, simConfig) if err != nil { simutil.Logger.Fatal("create driver error", zap.Error(err)) } @@ -174,6 +151,8 @@ func simStart(pdAddr string, simCase string, simConfig *simulator.SimConfig, cle tick := time.NewTicker(tickInterval) defer tick.Stop() sc := make(chan os.Signal, 1) + // halt scheduling + simulator.ChooseToHaltPDSchedule(true) signal.Notify(sc, syscall.SIGHUP, syscall.SIGINT, @@ -206,6 +185,9 @@ EXIT: analysis.GetTransferCounter().PrintResult() } + if simulator.PDHTTPClient != nil { + simulator.PDHTTPClient.Close() + } if simResult != "OK" { os.Exit(1) } diff --git a/tools/pd-simulator/simulator/cases/add_nodes.go b/tools/pd-simulator/simulator/cases/add_nodes.go deleted file mode 100644 index 833ead89f53..00000000000 --- a/tools/pd-simulator/simulator/cases/add_nodes.go +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2017 TiKV Project Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cases - -import ( - "math/rand" - - "github.com/docker/go-units" - "github.com/pingcap/kvproto/pkg/metapb" - "github.com/tikv/pd/pkg/core" - "github.com/tikv/pd/tools/pd-simulator/simulator/info" - "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" - "go.uber.org/zap" -) - -func newAddNodes() *Case { - var simCase Case - - storeNum, regionNum := getStoreNum(), getRegionNum() - noEmptyRatio := rand.Float64() // the ratio of noEmpty store to total store - noEmptyStoreNum := getNoEmptyStoreNum(storeNum, noEmptyRatio) - - for i := 1; i <= storeNum; i++ { - simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), - Status: metapb.StoreState_Up, - }) - } - - for i := 0; i < regionNum*storeNum/3; i++ { - peers := []*metapb.Peer{ - {Id: IDAllocator.nextID(), StoreId: uint64(i)%noEmptyStoreNum + 1}, - {Id: IDAllocator.nextID(), StoreId: uint64(i+1)%noEmptyStoreNum + 1}, - {Id: IDAllocator.nextID(), StoreId: uint64(i+2)%noEmptyStoreNum + 1}, - } - simCase.Regions = append(simCase.Regions, Region{ - ID: IDAllocator.nextID(), - Peers: peers, - Leader: peers[0], - Size: 96 * units.MiB, - Keys: 960000, - }) - } - - threshold := 0.05 - simCase.Checker = func(regions *core.RegionsInfo, stats []info.StoreStats) bool { - res := true - leaderCounts := make([]int, 0, storeNum) - regionCounts := make([]int, 0, storeNum) - for i := 1; i <= storeNum; i++ { - leaderCount := regions.GetStoreLeaderCount(uint64(i)) - regionCount := regions.GetStoreRegionCount(uint64(i)) - leaderCounts = append(leaderCounts, leaderCount) - regionCounts = append(regionCounts, regionCount) - res = res && leaderAndRegionIsUniform(leaderCount, regionCount, regionNum, threshold) - } - - simutil.Logger.Info("current counts", zap.Ints("leader", leaderCounts), zap.Ints("region", regionCounts)) - return res - } - return &simCase -} diff --git a/tools/pd-simulator/simulator/cases/add_nodes_dynamic.go b/tools/pd-simulator/simulator/cases/add_nodes_dynamic.go deleted file mode 100644 index 410d5e984c7..00000000000 --- a/tools/pd-simulator/simulator/cases/add_nodes_dynamic.go +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright 2018 TiKV Project Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cases - -import ( - "math/rand" - - "github.com/docker/go-units" - "github.com/pingcap/kvproto/pkg/metapb" - "github.com/tikv/pd/pkg/core" - "github.com/tikv/pd/tools/pd-simulator/simulator/info" - "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" - "go.uber.org/zap" -) - -func newAddNodesDynamic() *Case { - var simCase Case - - storeNum, regionNum := getStoreNum(), getRegionNum() - noEmptyRatio := rand.Float64() // the ratio of noEmpty store to total store - noEmptyStoreNum := getNoEmptyStoreNum(storeNum, noEmptyRatio) - - for i := 1; i <= int(noEmptyStoreNum); i++ { - simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), - Status: metapb.StoreState_Up, - }) - } - - var ids []uint64 - for i := 1; i <= storeNum-int(noEmptyStoreNum); i++ { - ids = append(ids, IDAllocator.nextID()) - } - - for i := 0; i < regionNum*storeNum/3; i++ { - peers := []*metapb.Peer{ - {Id: IDAllocator.nextID(), StoreId: uint64(i)%noEmptyStoreNum + 1}, - {Id: IDAllocator.nextID(), StoreId: uint64(i+1)%noEmptyStoreNum + 1}, - {Id: IDAllocator.nextID(), StoreId: uint64(i+2)%noEmptyStoreNum + 1}, - } - simCase.Regions = append(simCase.Regions, Region{ - ID: IDAllocator.nextID(), - Peers: peers, - Leader: peers[0], - Size: 96 * units.MiB, - Keys: 960000, - }) - } - - numNodes := int(noEmptyStoreNum) - e := &AddNodesDescriptor{} - e.Step = func(tick int64) uint64 { - if tick%100 == 0 && numNodes < storeNum { - numNodes++ - nodeID := ids[0] - ids = append(ids[:0], ids[1:]...) - return nodeID - } - return 0 - } - simCase.Events = []EventDescriptor{e} - - threshold := 0.05 - simCase.Checker = func(regions *core.RegionsInfo, stats []info.StoreStats) bool { - res := numNodes == storeNum - leaderCounts := make([]int, 0, numNodes) - regionCounts := make([]int, 0, numNodes) - for i := 1; i <= numNodes; i++ { - leaderCount := regions.GetStoreLeaderCount(uint64(i)) - regionCount := regions.GetStoreRegionCount(uint64(i)) - leaderCounts = append(leaderCounts, leaderCount) - regionCounts = append(regionCounts, regionCount) - res = res && leaderAndRegionIsUniform(leaderCount, regionCount, regionNum, threshold) - } - - simutil.Logger.Info("current counts", zap.Ints("leader", leaderCounts), zap.Ints("region", regionCounts)) - return res - } - return &simCase -} diff --git a/tools/pd-simulator/simulator/cases/balance_leader.go b/tools/pd-simulator/simulator/cases/balance_leader.go index 8f2b87e3180..fd9028bc91a 100644 --- a/tools/pd-simulator/simulator/cases/balance_leader.go +++ b/tools/pd-simulator/simulator/cases/balance_leader.go @@ -18,31 +18,39 @@ import ( "github.com/docker/go-units" "github.com/pingcap/kvproto/pkg/metapb" "github.com/tikv/pd/pkg/core" + sc "github.com/tikv/pd/tools/pd-simulator/simulator/config" "github.com/tikv/pd/tools/pd-simulator/simulator/info" "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" - "go.uber.org/zap" ) -func newBalanceLeader() *Case { +func newBalanceLeader(config *sc.SimConfig) *Case { var simCase Case - storeNum, regionNum := getStoreNum(), getRegionNum() - - for i := 1; i <= storeNum; i++ { + totalStore := config.TotalStore + totalRegion := config.TotalRegion + replica := int(config.ServerConfig.Replication.MaxReplicas) + for i := 0; i < totalStore; i++ { simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), + ID: simutil.IDAllocator.NextID(), Status: metapb.StoreState_Up, }) } - for i := 0; i < storeNum*regionNum/3; i++ { - peers := []*metapb.Peer{ - {Id: IDAllocator.nextID(), StoreId: uint64(storeNum)}, - {Id: IDAllocator.nextID(), StoreId: uint64((i+1)%(storeNum-1)) + 1}, - {Id: IDAllocator.nextID(), StoreId: uint64((i+2)%(storeNum-1)) + 1}, + leaderStoreID := simCase.Stores[totalStore-1].ID + for i := 0; i < totalRegion; i++ { + peers := make([]*metapb.Peer, 0, replica) + peers = append(peers, &metapb.Peer{ + Id: simutil.IDAllocator.NextID(), + StoreId: leaderStoreID, + }) + for j := 1; j < replica; j++ { + peers = append(peers, &metapb.Peer{ + Id: simutil.IDAllocator.NextID(), + StoreId: uint64((i+j)%(totalStore-1) + 1), + }) } simCase.Regions = append(simCase.Regions, Region{ - ID: IDAllocator.nextID(), + ID: simutil.IDAllocator.NextID(), Peers: peers, Leader: peers[0], Size: 96 * units.MiB, @@ -50,17 +58,14 @@ func newBalanceLeader() *Case { }) } - threshold := 0.05 - simCase.Checker = func(regions *core.RegionsInfo, stats []info.StoreStats) bool { - res := true - leaderCounts := make([]int, 0, storeNum) - for i := 1; i <= storeNum; i++ { + simCase.Checker = func(regions *core.RegionsInfo, _ []info.StoreStats) bool { + for i := 1; i <= totalStore; i++ { leaderCount := regions.GetStoreLeaderCount(uint64(i)) - leaderCounts = append(leaderCounts, leaderCount) - res = res && isUniform(leaderCount, regionNum/3, threshold) + if !isUniform(leaderCount, totalRegion/totalStore) { + return false + } } - simutil.Logger.Info("current counts", zap.Ints("leader", leaderCounts)) - return res + return true } return &simCase } diff --git a/tools/pd-simulator/simulator/cases/balance_region.go b/tools/pd-simulator/simulator/cases/balance_region.go index 0a013cf3876..82a7ac2d704 100644 --- a/tools/pd-simulator/simulator/cases/balance_region.go +++ b/tools/pd-simulator/simulator/cases/balance_region.go @@ -19,23 +19,21 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/tikv/pd/pkg/core" + sc "github.com/tikv/pd/tools/pd-simulator/simulator/config" "github.com/tikv/pd/tools/pd-simulator/simulator/info" "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" - "go.uber.org/zap" ) -func newRedundantBalanceRegion() *Case { +func newRedundantBalanceRegion(config *sc.SimConfig) *Case { var simCase Case - storeNum := simutil.CaseConfigure.StoreNum - regionNum := simutil.CaseConfigure.RegionNum - if storeNum == 0 || regionNum == 0 { - storeNum, regionNum = 6, 4000 - } + totalStore := config.TotalStore + totalRegion := config.TotalRegion + replica := int(config.ServerConfig.Replication.MaxReplicas) - for i := 0; i < storeNum; i++ { + for i := 0; i < totalStore; i++ { s := &Store{ - ID: IDAllocator.nextID(), + ID: simutil.IDAllocator.NextID(), Status: metapb.StoreState_Up, } if i%2 == 1 { @@ -44,43 +42,41 @@ func newRedundantBalanceRegion() *Case { simCase.Stores = append(simCase.Stores, s) } - for i := 0; i < regionNum; i++ { - peers := []*metapb.Peer{ - {Id: IDAllocator.nextID(), StoreId: uint64(i%storeNum + 1)}, - {Id: IDAllocator.nextID(), StoreId: uint64((i+1)%storeNum + 1)}, - {Id: IDAllocator.nextID(), StoreId: uint64((i+2)%storeNum + 1)}, + for i := 0; i < totalRegion; i++ { + peers := make([]*metapb.Peer, 0, replica) + for j := 0; j < replica; j++ { + peers = append(peers, &metapb.Peer{ + Id: simutil.IDAllocator.NextID(), + StoreId: uint64((i+j)%totalStore + 1), + }) } simCase.Regions = append(simCase.Regions, Region{ - ID: IDAllocator.nextID(), + ID: simutil.IDAllocator.NextID(), Peers: peers, Leader: peers[0], }) } - storesLastUpdateTime := make([]int64, storeNum+1) - storeLastAvailable := make([]uint64, storeNum+1) - simCase.Checker = func(regions *core.RegionsInfo, stats []info.StoreStats) bool { - res := true + storesLastUpdateTime := make([]int64, totalStore+1) + storeLastAvailable := make([]uint64, totalStore+1) + simCase.Checker = func(_ *core.RegionsInfo, stats []info.StoreStats) bool { curTime := time.Now().Unix() - storesAvailable := make([]uint64, 0, storeNum+1) - for i := 1; i <= storeNum; i++ { + for i := 1; i <= totalStore; i++ { available := stats[i].GetAvailable() - storesAvailable = append(storesAvailable, available) if curTime-storesLastUpdateTime[i] > 60 { if storeLastAvailable[i] != available { - res = false + return false } if stats[i].ToCompactionSize != 0 { - res = false + return false } storesLastUpdateTime[i] = curTime storeLastAvailable[i] = available } else { - res = false + return false } } - simutil.Logger.Info("current counts", zap.Uint64s("storesAvailable", storesAvailable)) - return res + return true } return &simCase } diff --git a/tools/pd-simulator/simulator/cases/cases.go b/tools/pd-simulator/simulator/cases/cases.go index 0a8967a8d86..c4e2f999978 100644 --- a/tools/pd-simulator/simulator/cases/cases.go +++ b/tools/pd-simulator/simulator/cases/cases.go @@ -16,11 +16,11 @@ package cases import ( "github.com/pingcap/kvproto/pkg/metapb" + pdHttp "github.com/tikv/pd/client/http" "github.com/tikv/pd/pkg/core" - "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/utils/typeutil" + "github.com/tikv/pd/tools/pd-simulator/simulator/config" "github.com/tikv/pd/tools/pd-simulator/simulator/info" - "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" ) // Store is used to simulate tikv. @@ -57,7 +57,7 @@ type Case struct { TableNumber int Checker CheckerFunc // To check the schedule is finished. - Rules []*placement.Rule + Rules []*pdHttp.Rule Labels typeutil.StringSlice } @@ -86,12 +86,9 @@ func (a *idAllocator) GetID() uint64 { var IDAllocator idAllocator // CaseMap is a mapping of the cases to the their corresponding initialize functions. -var CaseMap = map[string]func() *Case{ +var CaseMap = map[string]func(*config.SimConfig) *Case{ "balance-leader": newBalanceLeader, "redundant-balance-region": newRedundantBalanceRegion, - "add-nodes": newAddNodes, - "add-nodes-dynamic": newAddNodesDynamic, - "delete-nodes": newDeleteNodes, "region-split": newRegionSplit, "region-merge": newRegionMerge, "hot-read": newHotRead, @@ -106,43 +103,16 @@ var CaseMap = map[string]func() *Case{ } // NewCase creates a new case. -func NewCase(name string) *Case { +func NewCase(name string, simConfig *config.SimConfig) *Case { if f, ok := CaseMap[name]; ok { - return f() + return f(simConfig) } return nil } -func leaderAndRegionIsUniform(leaderCount, regionCount, regionNum int, threshold float64) bool { - return isUniform(leaderCount, regionNum/3, threshold) && isUniform(regionCount, regionNum, threshold) -} - -func isUniform(count, meanCount int, threshold float64) bool { +func isUniform(count, meanCount int) bool { + threshold := 0.05 maxCount := int((1.0 + threshold) * float64(meanCount)) minCount := int((1.0 - threshold) * float64(meanCount)) return minCount <= count && count <= maxCount } - -func getStoreNum() int { - storeNum := simutil.CaseConfigure.StoreNum - if storeNum < 3 { - simutil.Logger.Fatal("store num should be larger than or equal to 3") - } - return storeNum -} - -func getRegionNum() int { - regionNum := simutil.CaseConfigure.RegionNum - if regionNum <= 0 { - simutil.Logger.Fatal("region num should be larger than 0") - } - return regionNum -} - -func getNoEmptyStoreNum(storeNum int, noEmptyRatio float64) uint64 { - noEmptyStoreNum := uint64(float64(storeNum) * noEmptyRatio) - if noEmptyStoreNum < 3 || noEmptyStoreNum == uint64(storeNum) { - noEmptyStoreNum = 3 - } - return noEmptyStoreNum -} diff --git a/tools/pd-simulator/simulator/cases/delete_nodes.go b/tools/pd-simulator/simulator/cases/delete_nodes.go deleted file mode 100644 index 33f7ada14a0..00000000000 --- a/tools/pd-simulator/simulator/cases/delete_nodes.go +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2018 TiKV Project Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cases - -import ( - "math/rand" - - "github.com/docker/go-units" - "github.com/pingcap/kvproto/pkg/metapb" - "github.com/tikv/pd/pkg/core" - "github.com/tikv/pd/tools/pd-simulator/simulator/info" - "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" - "go.uber.org/zap" -) - -func newDeleteNodes() *Case { - var simCase Case - - storeNum, regionNum := getStoreNum(), getRegionNum() - noEmptyStoreNum := storeNum - 1 - for i := 1; i <= storeNum; i++ { - simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), - Status: metapb.StoreState_Up, - }) - } - - for i := 0; i < regionNum*storeNum/3; i++ { - peers := []*metapb.Peer{ - {Id: IDAllocator.nextID(), StoreId: uint64(i%storeNum) + 1}, - {Id: IDAllocator.nextID(), StoreId: uint64((i+1)%storeNum) + 1}, - {Id: IDAllocator.nextID(), StoreId: uint64((i+2)%storeNum) + 1}, - } - simCase.Regions = append(simCase.Regions, Region{ - ID: IDAllocator.nextID(), - Peers: peers, - Leader: peers[0], - Size: 96 * units.MiB, - Keys: 960000, - }) - } - - ids := make([]uint64, 0, len(simCase.Stores)) - for _, store := range simCase.Stores { - ids = append(ids, store.ID) - } - - numNodes := storeNum - e := &DeleteNodesDescriptor{} - e.Step = func(tick int64) uint64 { - if numNodes > noEmptyStoreNum && tick%100 == 0 { - idx := rand.Intn(numNodes) - numNodes-- - nodeID := ids[idx] - ids = append(ids[:idx], ids[idx+1:]...) - return nodeID - } - return 0 - } - simCase.Events = []EventDescriptor{e} - - threshold := 0.05 - simCase.Checker = func(regions *core.RegionsInfo, stats []info.StoreStats) bool { - res := numNodes == noEmptyStoreNum - leaderCounts := make([]int, 0, numNodes) - regionCounts := make([]int, 0, numNodes) - for _, i := range ids { - leaderCount := regions.GetStoreLeaderCount(i) - regionCount := regions.GetStoreRegionCount(i) - leaderCounts = append(leaderCounts, leaderCount) - regionCounts = append(regionCounts, regionCount) - res = res && leaderAndRegionIsUniform(leaderCount, regionCount, regionNum*storeNum/noEmptyStoreNum, threshold) - } - - simutil.Logger.Info("current counts", zap.Ints("leader", leaderCounts), zap.Ints("region", regionCounts)) - return res - } - return &simCase -} diff --git a/tools/pd-simulator/simulator/cases/diagnose_label_isolation.go b/tools/pd-simulator/simulator/cases/diagnose_label_isolation.go index bd056bdf9c1..09037136608 100644 --- a/tools/pd-simulator/simulator/cases/diagnose_label_isolation.go +++ b/tools/pd-simulator/simulator/cases/diagnose_label_isolation.go @@ -21,12 +21,13 @@ import ( "github.com/docker/go-units" "github.com/pingcap/kvproto/pkg/metapb" "github.com/tikv/pd/pkg/core" + sc "github.com/tikv/pd/tools/pd-simulator/simulator/config" "github.com/tikv/pd/tools/pd-simulator/simulator/info" "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" "go.uber.org/zap" ) -func newLabelNotMatch1() *Case { +func newLabelNotMatch1(_ *sc.SimConfig) *Case { var simCase Case simCase.Labels = []string{"host"} @@ -62,7 +63,7 @@ func newLabelNotMatch1() *Case { storesLastUpdateTime := make([]int64, storeNum+1) storeLastAvailable := make([]uint64, storeNum+1) - simCase.Checker = func(regions *core.RegionsInfo, stats []info.StoreStats) bool { + simCase.Checker = func(_ *core.RegionsInfo, stats []info.StoreStats) bool { res := true curTime := time.Now().Unix() storesAvailable := make([]uint64, 0, storeNum+1) @@ -88,7 +89,7 @@ func newLabelNotMatch1() *Case { return &simCase } -func newLabelIsolation1() *Case { +func newLabelIsolation1(_ *sc.SimConfig) *Case { var simCase Case simCase.Labels = []string{"host"} @@ -128,7 +129,7 @@ func newLabelIsolation1() *Case { storesLastUpdateTime := make([]int64, storeNum+1) storeLastAvailable := make([]uint64, storeNum+1) - simCase.Checker = func(regions *core.RegionsInfo, stats []info.StoreStats) bool { + simCase.Checker = func(_ *core.RegionsInfo, stats []info.StoreStats) bool { res := true curTime := time.Now().Unix() storesAvailable := make([]uint64, 0, storeNum+1) @@ -154,7 +155,7 @@ func newLabelIsolation1() *Case { return &simCase } -func newLabelIsolation2() *Case { +func newLabelIsolation2(_ *sc.SimConfig) *Case { var simCase Case simCase.Labels = []string{"dc", "zone", "host"} @@ -189,7 +190,7 @@ func newLabelIsolation2() *Case { storesLastUpdateTime := make([]int64, storeNum+1) storeLastAvailable := make([]uint64, storeNum+1) - simCase.Checker = func(regions *core.RegionsInfo, stats []info.StoreStats) bool { + simCase.Checker = func(_ *core.RegionsInfo, stats []info.StoreStats) bool { res := true curTime := time.Now().Unix() storesAvailable := make([]uint64, 0, storeNum+1) diff --git a/tools/pd-simulator/simulator/cases/diagnose_rule.go b/tools/pd-simulator/simulator/cases/diagnose_rule.go index 6cd76c854b7..2cd11b9624a 100644 --- a/tools/pd-simulator/simulator/cases/diagnose_rule.go +++ b/tools/pd-simulator/simulator/cases/diagnose_rule.go @@ -19,25 +19,27 @@ import ( "github.com/docker/go-units" "github.com/pingcap/kvproto/pkg/metapb" + pdHttp "github.com/tikv/pd/client/http" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/schedule/placement" + sc "github.com/tikv/pd/tools/pd-simulator/simulator/config" "github.com/tikv/pd/tools/pd-simulator/simulator/info" "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" "go.uber.org/zap" ) -func newRule1() *Case { +func newRule1(_ *sc.SimConfig) *Case { var simCase Case - simCase.Rules = make([]*placement.Rule, 0) - simCase.Rules = append(simCase.Rules, &placement.Rule{ + simCase.Rules = make([]*pdHttp.Rule, 0) + simCase.Rules = append(simCase.Rules, &pdHttp.Rule{ GroupID: "test1", ID: "test1", StartKeyHex: "", EndKeyHex: "", - Role: placement.Learner, + Role: pdHttp.Learner, Count: 1, - LabelConstraints: []placement.LabelConstraint{ + LabelConstraints: []pdHttp.LabelConstraint{ { Key: "region", Op: "in", @@ -45,14 +47,14 @@ func newRule1() *Case { }, }, LocationLabels: []string{"host"}, - }, &placement.Rule{ + }, &pdHttp.Rule{ GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, StartKeyHex: "", EndKeyHex: "", - Role: placement.Voter, + Role: pdHttp.Voter, Count: 5, - LabelConstraints: []placement.LabelConstraint{ + LabelConstraints: []pdHttp.LabelConstraint{ { Key: "region", Op: "in", @@ -100,7 +102,7 @@ func newRule1() *Case { storesLastUpdateTime := make([]int64, storeNum+1) storeLastAvailable := make([]uint64, storeNum+1) - simCase.Checker = func(regions *core.RegionsInfo, stats []info.StoreStats) bool { + simCase.Checker = func(_ *core.RegionsInfo, stats []info.StoreStats) bool { res := true curTime := time.Now().Unix() storesAvailable := make([]uint64, 0, storeNum+1) @@ -126,19 +128,19 @@ func newRule1() *Case { return &simCase } -func newRule2() *Case { +func newRule2(_ *sc.SimConfig) *Case { var simCase Case - simCase.Rules = make([]*placement.Rule, 0) + simCase.Rules = make([]*pdHttp.Rule, 0) simCase.Rules = append(simCase.Rules, - &placement.Rule{ + &pdHttp.Rule{ GroupID: "test1", ID: "test1", StartKeyHex: "", EndKeyHex: "", - Role: placement.Leader, + Role: pdHttp.Leader, Count: 1, - LabelConstraints: []placement.LabelConstraint{ + LabelConstraints: []pdHttp.LabelConstraint{ { Key: "region", Op: "in", @@ -179,7 +181,7 @@ func newRule2() *Case { storesLastUpdateTime := make([]int64, storeNum+1) storeLastAvailable := make([]uint64, storeNum+1) - simCase.Checker = func(regions *core.RegionsInfo, stats []info.StoreStats) bool { + simCase.Checker = func(_ *core.RegionsInfo, stats []info.StoreStats) bool { res := true curTime := time.Now().Unix() storesAvailable := make([]uint64, 0, storeNum+1) diff --git a/tools/pd-simulator/simulator/cases/event_inner.go b/tools/pd-simulator/simulator/cases/event_inner.go index 3edf26b72a5..72521584e88 100644 --- a/tools/pd-simulator/simulator/cases/event_inner.go +++ b/tools/pd-simulator/simulator/cases/event_inner.go @@ -25,7 +25,7 @@ type WriteFlowOnSpotDescriptor struct { } // Type implements the EventDescriptor interface. -func (w *WriteFlowOnSpotDescriptor) Type() string { +func (*WriteFlowOnSpotDescriptor) Type() string { return "write-flow-on-spot" } @@ -35,7 +35,7 @@ type WriteFlowOnRegionDescriptor struct { } // Type implements the EventDescriptor interface. -func (w *WriteFlowOnRegionDescriptor) Type() string { +func (*WriteFlowOnRegionDescriptor) Type() string { return "write-flow-on-region" } @@ -45,7 +45,7 @@ type ReadFlowOnRegionDescriptor struct { } // Type implements the EventDescriptor interface. -func (w *ReadFlowOnRegionDescriptor) Type() string { +func (*ReadFlowOnRegionDescriptor) Type() string { return "read-flow-on-region" } @@ -55,7 +55,7 @@ type AddNodesDescriptor struct { } // Type implements the EventDescriptor interface. -func (w *AddNodesDescriptor) Type() string { +func (*AddNodesDescriptor) Type() string { return "add-nodes" } @@ -65,6 +65,6 @@ type DeleteNodesDescriptor struct { } // Type implements the EventDescriptor interface. -func (w *DeleteNodesDescriptor) Type() string { +func (*DeleteNodesDescriptor) Type() string { return "delete-nodes" } diff --git a/tools/pd-simulator/simulator/cases/hot_read.go b/tools/pd-simulator/simulator/cases/hot_read.go index 9df4f8796e8..d154886b0a4 100644 --- a/tools/pd-simulator/simulator/cases/hot_read.go +++ b/tools/pd-simulator/simulator/cases/hot_read.go @@ -15,38 +15,38 @@ package cases import ( - "math/rand" - "github.com/docker/go-units" "github.com/pingcap/kvproto/pkg/metapb" "github.com/tikv/pd/pkg/core" + sc "github.com/tikv/pd/tools/pd-simulator/simulator/config" "github.com/tikv/pd/tools/pd-simulator/simulator/info" "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" - "go.uber.org/zap" ) -func newHotRead() *Case { +func newHotRead(config *sc.SimConfig) *Case { var simCase Case - - storeNum, regionNum := getStoreNum(), getRegionNum() + totalStore := config.TotalStore + totalRegion := config.TotalRegion + replica := int(config.ServerConfig.Replication.MaxReplicas) // Initialize the cluster - for i := 1; i <= storeNum; i++ { + for i := 0; i < totalStore; i++ { simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), + ID: simutil.IDAllocator.NextID(), Status: metapb.StoreState_Up, }) } - for i := 0; i < storeNum*regionNum/3; i++ { - storeIDs := rand.Perm(storeNum) - peers := []*metapb.Peer{ - {Id: IDAllocator.nextID(), StoreId: uint64(storeIDs[0] + 1)}, - {Id: IDAllocator.nextID(), StoreId: uint64(storeIDs[1] + 1)}, - {Id: IDAllocator.nextID(), StoreId: uint64(storeIDs[2] + 1)}, + for i := 0; i < totalRegion; i++ { + peers := make([]*metapb.Peer, 0, replica) + for j := 0; j < replica; j++ { + peers = append(peers, &metapb.Peer{ + Id: simutil.IDAllocator.NextID(), + StoreId: uint64((i+j)%totalStore + 1), + }) } simCase.Regions = append(simCase.Regions, Region{ - ID: IDAllocator.nextID(), + ID: simutil.IDAllocator.NextID(), Peers: peers, Leader: peers[0], Size: 96 * units.MiB, @@ -56,7 +56,7 @@ func newHotRead() *Case { // Events description // select regions on store 1 as hot read regions. - selectRegionNum := 4 * storeNum + selectRegionNum := 4 * totalStore readFlow := make(map[uint64]int64, selectRegionNum) for _, r := range simCase.Regions { if r.Leader.GetStoreId() == 1 { @@ -67,18 +67,17 @@ func newHotRead() *Case { } } e := &ReadFlowOnRegionDescriptor{} - e.Step = func(tick int64) map[uint64]int64 { + e.Step = func(int64) map[uint64]int64 { return readFlow } simCase.Events = []EventDescriptor{e} // Checker description - simCase.Checker = func(regions *core.RegionsInfo, stats []info.StoreStats) bool { - leaderCount := make([]int, storeNum) + simCase.Checker = func(regions *core.RegionsInfo, _ []info.StoreStats) bool { + leaderCount := make([]int, totalStore) for id := range readFlow { leaderStore := regions.GetRegion(id).GetLeader().GetStoreId() leaderCount[int(leaderStore-1)]++ } - simutil.Logger.Info("current hot region counts", zap.Reflect("hot-region", leaderCount)) // check count diff < 2. var min, max int diff --git a/tools/pd-simulator/simulator/cases/hot_write.go b/tools/pd-simulator/simulator/cases/hot_write.go index 8efe32c5657..e73ca6f3ce3 100644 --- a/tools/pd-simulator/simulator/cases/hot_write.go +++ b/tools/pd-simulator/simulator/cases/hot_write.go @@ -15,37 +15,38 @@ package cases import ( - "math/rand" - "github.com/docker/go-units" "github.com/pingcap/kvproto/pkg/metapb" "github.com/tikv/pd/pkg/core" + sc "github.com/tikv/pd/tools/pd-simulator/simulator/config" "github.com/tikv/pd/tools/pd-simulator/simulator/info" "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" - "go.uber.org/zap" ) -func newHotWrite() *Case { +func newHotWrite(config *sc.SimConfig) *Case { var simCase Case + totalStore := config.TotalStore + totalRegion := config.TotalRegion + replica := int(config.ServerConfig.Replication.MaxReplicas) - storeNum, regionNum := getStoreNum(), getRegionNum() // Initialize the cluster - for i := 1; i <= storeNum; i++ { + for i := 0; i < totalStore; i++ { simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), + ID: simutil.IDAllocator.NextID(), Status: metapb.StoreState_Up, }) } - for i := 0; i < storeNum*regionNum/3; i++ { - storeIDs := rand.Perm(storeNum) - peers := []*metapb.Peer{ - {Id: IDAllocator.nextID(), StoreId: uint64(storeIDs[0] + 1)}, - {Id: IDAllocator.nextID(), StoreId: uint64(storeIDs[1] + 1)}, - {Id: IDAllocator.nextID(), StoreId: uint64(storeIDs[2] + 1)}, + for i := 0; i < totalRegion; i++ { + peers := make([]*metapb.Peer, 0, replica) + for j := 0; j < replica; j++ { + peers = append(peers, &metapb.Peer{ + Id: simutil.IDAllocator.NextID(), + StoreId: uint64((i+j)%totalStore + 1), + }) } simCase.Regions = append(simCase.Regions, Region{ - ID: IDAllocator.nextID(), + ID: simutil.IDAllocator.NextID(), Peers: peers, Leader: peers[0], Size: 96 * units.MiB, @@ -55,7 +56,7 @@ func newHotWrite() *Case { // Events description // select regions on store 1 as hot write regions. - selectStoreNum := storeNum + selectStoreNum := totalStore writeFlow := make(map[uint64]int64, selectStoreNum) for _, r := range simCase.Regions { if r.Leader.GetStoreId() == 1 { @@ -66,16 +67,16 @@ func newHotWrite() *Case { } } e := &WriteFlowOnRegionDescriptor{} - e.Step = func(tick int64) map[uint64]int64 { + e.Step = func(int64) map[uint64]int64 { return writeFlow } simCase.Events = []EventDescriptor{e} // Checker description - simCase.Checker = func(regions *core.RegionsInfo, stats []info.StoreStats) bool { - leaderCount := make([]int, storeNum) - peerCount := make([]int, storeNum) + simCase.Checker = func(regions *core.RegionsInfo, _ []info.StoreStats) bool { + leaderCount := make([]int, totalStore) + peerCount := make([]int, totalStore) for id := range writeFlow { region := regions.GetRegion(id) leaderCount[int(region.GetLeader().GetStoreId()-1)]++ @@ -83,7 +84,6 @@ func newHotWrite() *Case { peerCount[int(p.GetStoreId()-1)]++ } } - simutil.Logger.Info("current hot region counts", zap.Reflect("leader", leaderCount), zap.Reflect("peer", peerCount)) // check count diff <= 2. var minLeader, maxLeader, minPeer, maxPeer int diff --git a/tools/pd-simulator/simulator/cases/import_data.go b/tools/pd-simulator/simulator/cases/import_data.go index 0e7f7770a48..b9f448a6cf6 100644 --- a/tools/pd-simulator/simulator/cases/import_data.go +++ b/tools/pd-simulator/simulator/cases/import_data.go @@ -17,7 +17,6 @@ package cases import ( "bytes" "fmt" - "math/rand" "os" "github.com/docker/go-units" @@ -26,27 +25,33 @@ import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/codec" "github.com/tikv/pd/pkg/core" + sc "github.com/tikv/pd/tools/pd-simulator/simulator/config" "github.com/tikv/pd/tools/pd-simulator/simulator/info" "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" "go.uber.org/zap" ) -func newImportData() *Case { +func newImportData(config *sc.SimConfig) *Case { var simCase Case + totalStore := config.TotalStore + totalRegion := config.TotalRegion + replica := int(config.ServerConfig.Replication.MaxReplicas) + // Initialize the cluster - for i := 1; i <= 10; i++ { + for i := 0; i < totalStore; i++ { simCase.Stores = append(simCase.Stores, &Store{ ID: IDAllocator.nextID(), Status: metapb.StoreState_Up, }) } - for i := 0; i < getRegionNum(); i++ { - storeIDs := rand.Perm(10) - peers := []*metapb.Peer{ - {Id: IDAllocator.nextID(), StoreId: uint64(storeIDs[0] + 1)}, - {Id: IDAllocator.nextID(), StoreId: uint64(storeIDs[1] + 1)}, - {Id: IDAllocator.nextID(), StoreId: uint64(storeIDs[2] + 1)}, + for i := 0; i < totalRegion; i++ { + peers := make([]*metapb.Peer, 0, replica) + for j := 0; j < replica; j++ { + peers = append(peers, &metapb.Peer{ + Id: IDAllocator.nextID(), + StoreId: uint64((i+j)%totalStore + 1), + }) } simCase.Regions = append(simCase.Regions, Region{ ID: IDAllocator.nextID(), @@ -65,7 +70,7 @@ func newImportData() *Case { table12 := string(codec.EncodeBytes(codec.GenerateTableKey(12))) table13 := string(codec.EncodeBytes(codec.GenerateTableKey(13))) e.Step = func(tick int64) map[string]int64 { - if tick > int64(getRegionNum())/10 { + if tick > int64(totalRegion)/10 { return nil } return map[string]int64{ @@ -78,7 +83,7 @@ func newImportData() *Case { checkCount := uint64(0) var newRegionCount [][3]int var allRegionCount [][3]int - simCase.Checker = func(regions *core.RegionsInfo, stats []info.StoreStats) bool { + simCase.Checker = func(regions *core.RegionsInfo, _ []info.StoreStats) bool { leaderDist := make(map[uint64]int) peerDist := make(map[uint64]int) leaderTotal := 0 @@ -141,14 +146,14 @@ func newImportData() *Case { if dev > 0.02 { simutil.Logger.Warn("Not balanced, change scheduler or store limit", zap.Float64("dev score", dev)) } - if checkCount > uint64(getRegionNum())/5 { + if checkCount > uint64(totalRegion)/5 { isEnd = true - } else if checkCount > uint64(getRegionNum())/10 { + } else if checkCount > uint64(totalRegion)/10 { isEnd = dev < 0.01 } if isEnd { - renderPlot("new_region.html", newRegionCount, int(checkCount), 0, getRegionNum()/10) - renderPlot("all_region.html", allRegionCount, int(checkCount), 28*getRegionNum()/100, getRegionNum()/3) + renderPlot("new_region.html", newRegionCount, int(checkCount), 0, totalRegion/10) + renderPlot("all_region.html", allRegionCount, int(checkCount), 28*totalRegion/100, totalRegion/3) } return isEnd } diff --git a/tools/pd-simulator/simulator/cases/makeup_down_replica.go b/tools/pd-simulator/simulator/cases/makeup_down_replica.go index 57eb2dd1f53..a5ee63e71a0 100644 --- a/tools/pd-simulator/simulator/cases/makeup_down_replica.go +++ b/tools/pd-simulator/simulator/cases/makeup_down_replica.go @@ -18,30 +18,35 @@ import ( "github.com/docker/go-units" "github.com/pingcap/kvproto/pkg/metapb" "github.com/tikv/pd/pkg/core" + sc "github.com/tikv/pd/tools/pd-simulator/simulator/config" "github.com/tikv/pd/tools/pd-simulator/simulator/info" "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" - "go.uber.org/zap" ) -func newMakeupDownReplicas() *Case { +func newMakeupDownReplicas(config *sc.SimConfig) *Case { var simCase Case - storeNum, regionNum := getStoreNum(), getRegionNum() - noEmptyStoreNum := storeNum - 1 - for i := 1; i <= storeNum; i++ { + totalStore := config.TotalStore + totalRegion := config.TotalRegion + replica := int(config.ServerConfig.Replication.MaxReplicas) + + noEmptyStoreNum := totalStore - 1 + for i := 0; i < totalStore; i++ { simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), + ID: simutil.IDAllocator.NextID(), Status: metapb.StoreState_Up, }) } - for i := 0; i < storeNum*regionNum/3; i++ { - peers := []*metapb.Peer{ - {Id: IDAllocator.nextID(), StoreId: uint64((i)%storeNum) + 1}, - {Id: IDAllocator.nextID(), StoreId: uint64((i+1)%storeNum) + 1}, - {Id: IDAllocator.nextID(), StoreId: uint64((i+2)%storeNum) + 1}, + for i := 0; i < totalRegion; i++ { + peers := make([]*metapb.Peer, 0, replica) + for j := 0; j < replica; j++ { + peers = append(peers, &metapb.Peer{ + Id: simutil.IDAllocator.NextID(), + StoreId: uint64((i+j)%totalStore + 1), + }) } simCase.Regions = append(simCase.Regions, Region{ - ID: IDAllocator.nextID(), + ID: simutil.IDAllocator.NextID(), Peers: peers, Leader: peers[0], Size: 96 * units.MiB, @@ -49,7 +54,7 @@ func newMakeupDownReplicas() *Case { }) } - numNodes := storeNum + numNodes := totalStore down := false e := &DeleteNodesDescriptor{} e.Step = func(tick int64) uint64 { @@ -64,32 +69,17 @@ func newMakeupDownReplicas() *Case { } simCase.Events = []EventDescriptor{e} - simCase.Checker = func(regions *core.RegionsInfo, stats []info.StoreStats) bool { - sum := 0 - regionCounts := make([]int, 0, storeNum) - for i := 1; i <= storeNum; i++ { - regionCount := regions.GetStoreRegionCount(uint64(i)) - regionCounts = append(regionCounts, regionCount) - sum += regionCount - } - simutil.Logger.Info("current region counts", zap.Ints("region", regionCounts)) - - if down && sum < storeNum*regionNum { - // only need to print once - down = false - simutil.Logger.Error("making up replicas don't start immediately") + simCase.Checker = func(regions *core.RegionsInfo, _ []info.StoreStats) bool { + if !down { return false } - - res := true - threshold := 0.05 - for index, regionCount := range regionCounts { - if index == 0 { // storeId == 1 - continue + for i := 1; i <= totalStore; i++ { + peerCount := regions.GetStoreRegionCount(uint64(i)) + if isUniform(peerCount, replica*totalRegion/noEmptyStoreNum) { + return false } - res = res && isUniform(regionCount, storeNum*regionNum/noEmptyStoreNum, threshold) } - return res + return true } return &simCase } diff --git a/tools/pd-simulator/simulator/cases/region_merge.go b/tools/pd-simulator/simulator/cases/region_merge.go index 501803d439e..8097565d1a7 100644 --- a/tools/pd-simulator/simulator/cases/region_merge.go +++ b/tools/pd-simulator/simulator/cases/region_merge.go @@ -15,36 +15,37 @@ package cases import ( - "math/rand" - "github.com/docker/go-units" "github.com/pingcap/kvproto/pkg/metapb" "github.com/tikv/pd/pkg/core" + sc "github.com/tikv/pd/tools/pd-simulator/simulator/config" "github.com/tikv/pd/tools/pd-simulator/simulator/info" "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" - "go.uber.org/zap" ) -func newRegionMerge() *Case { +func newRegionMerge(config *sc.SimConfig) *Case { var simCase Case - // Initialize the cluster - storeNum, regionNum := getStoreNum(), getRegionNum() - for i := 1; i <= storeNum; i++ { + totalStore := config.TotalStore + totalRegion := config.TotalRegion + replica := int(config.ServerConfig.Replication.MaxReplicas) + + for i := 0; i < totalStore; i++ { simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), + ID: simutil.IDAllocator.NextID(), Status: metapb.StoreState_Up, }) } - for i := 0; i < storeNum*regionNum/3; i++ { - storeIDs := rand.Perm(storeNum) - peers := []*metapb.Peer{ - {Id: IDAllocator.nextID(), StoreId: uint64(storeIDs[0] + 1)}, - {Id: IDAllocator.nextID(), StoreId: uint64(storeIDs[1] + 1)}, - {Id: IDAllocator.nextID(), StoreId: uint64(storeIDs[2] + 1)}, + for i := 0; i < totalRegion; i++ { + peers := make([]*metapb.Peer, 0, replica) + for j := 0; j < replica; j++ { + peers = append(peers, &metapb.Peer{ + Id: simutil.IDAllocator.NextID(), + StoreId: uint64((i+j)%totalStore + 1), + }) } simCase.Regions = append(simCase.Regions, Region{ - ID: IDAllocator.nextID(), + ID: simutil.IDAllocator.NextID(), Peers: peers, Leader: peers[0], Size: 10 * units.MiB, @@ -52,18 +53,13 @@ func newRegionMerge() *Case { }) } // Checker description - threshold := 0.05 mergeRatio := 4 // when max-merge-region-size is 20, per region will reach 40MB - simCase.Checker = func(regions *core.RegionsInfo, stats []info.StoreStats) bool { - sum := 0 - regionCounts := make([]int, 0, storeNum) - for i := 1; i <= storeNum; i++ { - regionCount := regions.GetStoreRegionCount(uint64(i)) - regionCounts = append(regionCounts, regionCount) - sum += regionCount + simCase.Checker = func(regions *core.RegionsInfo, _ []info.StoreStats) bool { + currentPeerCount := 0 + for i := 1; i <= totalStore; i++ { + currentPeerCount += regions.GetStoreRegionCount(uint64(i)) } - simutil.Logger.Info("current counts", zap.Ints("region", regionCounts), zap.Int64("average region size", regions.GetAverageRegionSize())) - return isUniform(sum, storeNum*regionNum/mergeRatio, threshold) + return isUniform(currentPeerCount, totalRegion*replica/mergeRatio) } return &simCase } diff --git a/tools/pd-simulator/simulator/cases/region_split.go b/tools/pd-simulator/simulator/cases/region_split.go index 6a69386cb6b..7b712f4dc48 100644 --- a/tools/pd-simulator/simulator/cases/region_split.go +++ b/tools/pd-simulator/simulator/cases/region_split.go @@ -18,16 +18,15 @@ import ( "github.com/docker/go-units" "github.com/pingcap/kvproto/pkg/metapb" "github.com/tikv/pd/pkg/core" + sc "github.com/tikv/pd/tools/pd-simulator/simulator/config" "github.com/tikv/pd/tools/pd-simulator/simulator/info" - "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" - "go.uber.org/zap" ) -func newRegionSplit() *Case { +func newRegionSplit(config *sc.SimConfig) *Case { var simCase Case - // Initialize the cluster - storeNum := getStoreNum() - for i := 1; i <= storeNum; i++ { + totalStore := config.TotalStore + + for i := 0; i < totalStore; i++ { simCase.Stores = append(simCase.Stores, &Store{ ID: uint64(i), Status: metapb.StoreState_Up, @@ -48,7 +47,7 @@ func newRegionSplit() *Case { simCase.RegionSplitKeys = 10000 // Events description e := &WriteFlowOnSpotDescriptor{} - e.Step = func(tick int64) map[string]int64 { + e.Step = func(int64) map[string]int64 { return map[string]int64{ "foobar": 8 * units.MiB, } @@ -56,16 +55,14 @@ func newRegionSplit() *Case { simCase.Events = []EventDescriptor{e} // Checker description - simCase.Checker = func(regions *core.RegionsInfo, stats []info.StoreStats) bool { - res := true - regionCounts := make([]int, 0, storeNum) - for i := 1; i <= storeNum; i++ { - regionCount := regions.GetStoreRegionCount(uint64(i)) - regionCounts = append(regionCounts, regionCount) - res = res && regionCount > 5 + simCase.Checker = func(regions *core.RegionsInfo, _ []info.StoreStats) bool { + for i := 1; i <= totalStore; i++ { + peerCount := regions.GetStoreRegionCount(uint64(i)) + if peerCount < 5 { + return false + } } - simutil.Logger.Info("current counts", zap.Ints("region", regionCounts)) - return res + return true } return &simCase } diff --git a/tools/pd-simulator/simulator/client.go b/tools/pd-simulator/simulator/client.go index 8dd1ee1646e..f5bd379d17e 100644 --- a/tools/pd-simulator/simulator/client.go +++ b/tools/pd-simulator/simulator/client.go @@ -15,11 +15,9 @@ package simulator import ( - "bytes" "context" - "encoding/json" "fmt" - "net/http" + "strconv" "strings" "sync" "time" @@ -27,45 +25,56 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" + pd "github.com/tikv/pd/client" + pdHttp "github.com/tikv/pd/client/http" "github.com/tikv/pd/pkg/core" - "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/utils/typeutil" + sc "github.com/tikv/pd/tools/pd-simulator/simulator/config" "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" "go.uber.org/zap" "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" ) // Client is a PD (Placement Driver) client. // It should not be used after calling Close(). type Client interface { - GetClusterID(ctx context.Context) uint64 - AllocID(ctx context.Context) (uint64, error) - Bootstrap(ctx context.Context, store *metapb.Store, region *metapb.Region) error - PutStore(ctx context.Context, store *metapb.Store) error - StoreHeartbeat(ctx context.Context, stats *pdpb.StoreStats) error - RegionHeartbeat(ctx context.Context, region *core.RegionInfo) error - PutPDConfig(*PDConfig) error + AllocID(context.Context) (uint64, error) + PutStore(context.Context, *metapb.Store) error + StoreHeartbeat(context.Context, *pdpb.StoreStats) error + RegionHeartbeat(context.Context, *core.RegionInfo) error + HeartbeatStreamLoop() + ChangeConn(*grpc.ClientConn) error Close() } const ( pdTimeout = time.Second maxInitClusterRetries = 100 - httpPrefix = "pd/api/v1" + // retry to get leader URL + leaderChangedWaitTime = 100 * time.Millisecond + retryTimes = 10 ) var ( // errFailInitClusterID is returned when failed to load clusterID from all supplied PD addresses. errFailInitClusterID = errors.New("[pd] failed to get cluster id") + PDHTTPClient pdHttp.Client + sd pd.ServiceDiscovery + ClusterID uint64 ) +// requestHeader returns a header for fixed ClusterID. +func requestHeader() *pdpb.RequestHeader { + return &pdpb.RequestHeader{ + ClusterId: ClusterID, + } +} + type client struct { - url string tag string - clusterID uint64 clientConn *grpc.ClientConn - httpClient *http.Client reportRegionHeartbeatCh chan *core.RegionInfo receiveRegionHeartbeatCh chan *pdpb.RegionHeartbeatResponse @@ -76,30 +85,15 @@ type client struct { } // NewClient creates a PD client. -func NewClient(pdAddr string, tag string) (Client, <-chan *pdpb.RegionHeartbeatResponse, error) { - simutil.Logger.Info("create pd client with endpoints", zap.String("tag", tag), zap.String("pd-address", pdAddr)) +func NewClient(tag string) (Client, <-chan *pdpb.RegionHeartbeatResponse, error) { ctx, cancel := context.WithCancel(context.Background()) c := &client{ - url: pdAddr, reportRegionHeartbeatCh: make(chan *core.RegionInfo, 1), receiveRegionHeartbeatCh: make(chan *pdpb.RegionHeartbeatResponse, 1), ctx: ctx, cancel: cancel, tag: tag, - httpClient: &http.Client{}, - } - cc, err := c.createConn() - if err != nil { - return nil, nil, err } - c.clientConn = cc - if err := c.initClusterID(); err != nil { - return nil, nil, err - } - simutil.Logger.Info("init cluster id", zap.String("tag", c.tag), zap.Uint64("cluster-id", c.clusterID)) - c.wg.Add(1) - go c.heartbeatStreamLoop() - return c, c.receiveRegionHeartbeatCh, nil } @@ -107,39 +101,18 @@ func (c *client) pdClient() pdpb.PDClient { return pdpb.NewPDClient(c.clientConn) } -func (c *client) initClusterID() error { - ctx, cancel := context.WithCancel(c.ctx) - defer cancel() - for i := 0; i < maxInitClusterRetries; i++ { - members, err := c.getMembers(ctx) - if err != nil || members.GetHeader() == nil { - simutil.Logger.Error("failed to get cluster id", zap.String("tag", c.tag), zap.Error(err)) - continue - } - c.clusterID = members.GetHeader().GetClusterId() - return nil - } - - return errors.WithStack(errFailInitClusterID) -} - -func (c *client) getMembers(ctx context.Context) (*pdpb.GetMembersResponse, error) { - members, err := c.pdClient().GetMembers(ctx, &pdpb.GetMembersRequest{}) +func createConn(url string) (*grpc.ClientConn, error) { + cc, err := grpc.Dial(strings.TrimPrefix(url, "http://"), grpc.WithTransportCredentials(insecure.NewCredentials())) if err != nil { return nil, errors.WithStack(err) } - if members.GetHeader().GetError() != nil { - return nil, errors.WithStack(errors.New(members.GetHeader().GetError().String())) - } - return members, nil + return cc, nil } -func (c *client) createConn() (*grpc.ClientConn, error) { - cc, err := grpc.Dial(strings.TrimPrefix(c.url, "http://"), grpc.WithInsecure()) - if err != nil { - return nil, errors.WithStack(err) - } - return cc, nil +func (c *client) ChangeConn(cc *grpc.ClientConn) error { + c.clientConn = cc + simutil.Logger.Info("change pd client with endpoints", zap.String("tag", c.tag), zap.String("pd-address", cc.Target())) + return nil } func (c *client) createHeartbeatStream() (pdpb.PD_RegionHeartbeatClient, context.Context, context.CancelFunc) { @@ -169,7 +142,8 @@ func (c *client) createHeartbeatStream() (pdpb.PD_RegionHeartbeatClient, context return stream, ctx, cancel } -func (c *client) heartbeatStreamLoop() { +func (c *client) HeartbeatStreamLoop() { + c.wg.Add(1) defer c.wg.Done() for { stream, ctx, cancel := c.createHeartbeatStream() @@ -190,6 +164,23 @@ func (c *client) heartbeatStreamLoop() { return } wg.Wait() + + // update connection to recreate heartbeat stream + for i := 0; i < retryTimes; i++ { + sd.ScheduleCheckMemberChanged() + time.Sleep(leaderChangedWaitTime) + if client := sd.GetServiceClient(); client != nil { + _, conn, err := getLeaderURL(ctx, client.GetClientConn()) + if err != nil { + simutil.Logger.Error("[HeartbeatStreamLoop] failed to get leader URL", zap.Error(err)) + continue + } + if err = c.ChangeConn(conn); err == nil { + break + } + } + } + simutil.Logger.Info("recreate heartbeat stream", zap.String("tag", c.tag)) } } @@ -199,6 +190,7 @@ func (c *client) receiveRegionHeartbeat(ctx context.Context, stream pdpb.PD_Regi resp, err := stream.Recv() if err != nil { errCh <- err + simutil.Logger.Error("receive regionHeartbeat error", zap.String("tag", c.tag), zap.Error(err)) return } select { @@ -216,7 +208,7 @@ func (c *client) reportRegionHeartbeat(ctx context.Context, stream pdpb.PD_Regio case r := <-c.reportRegionHeartbeatCh: region := r.Clone() request := &pdpb.RegionHeartbeatRequest{ - Header: c.requestHeader(), + Header: requestHeader(), Region: region.GetMeta(), Leader: region.GetLeader(), DownPeers: region.GetDownPeers(), @@ -230,6 +222,7 @@ func (c *client) reportRegionHeartbeat(ctx context.Context, stream pdpb.PD_Regio if err != nil { errCh <- err simutil.Logger.Error("report regionHeartbeat error", zap.String("tag", c.tag), zap.Error(err)) + return } case <-ctx.Done(): return @@ -238,6 +231,11 @@ func (c *client) reportRegionHeartbeat(ctx context.Context, stream pdpb.PD_Regio } func (c *client) Close() { + if c.cancel == nil { + simutil.Logger.Info("pd client has been closed", zap.String("tag", c.tag)) + return + } + simutil.Logger.Info("closing pd client", zap.String("tag", c.tag)) c.cancel() c.wg.Wait() @@ -246,14 +244,10 @@ func (c *client) Close() { } } -func (c *client) GetClusterID(context.Context) uint64 { - return c.clusterID -} - func (c *client) AllocID(ctx context.Context) (uint64, error) { ctx, cancel := context.WithTimeout(ctx, pdTimeout) resp, err := c.pdClient().AllocID(ctx, &pdpb.AllocIDRequest{ - Header: c.requestHeader(), + Header: requestHeader(), }) cancel() if err != nil { @@ -265,127 +259,287 @@ func (c *client) AllocID(ctx context.Context) (uint64, error) { return resp.GetId(), nil } -func (c *client) Bootstrap(ctx context.Context, store *metapb.Store, region *metapb.Region) error { +func (c *client) PutStore(ctx context.Context, store *metapb.Store) error { ctx, cancel := context.WithTimeout(ctx, pdTimeout) - defer cancel() - req := &pdpb.IsBootstrappedRequest{ - Header: &pdpb.RequestHeader{ - ClusterId: c.clusterID, - }, - } - resp, err := c.pdClient().IsBootstrapped(ctx, req) - if resp.GetBootstrapped() { - simutil.Logger.Fatal("failed to bootstrap, server is not clean") - } - if err != nil { - return err - } newStore := typeutil.DeepClone(store, core.StoreFactory) - newRegion := typeutil.DeepClone(region, core.RegionFactory) - - res, err := c.pdClient().Bootstrap(ctx, &pdpb.BootstrapRequest{ - Header: c.requestHeader(), + resp, err := c.pdClient().PutStore(ctx, &pdpb.PutStoreRequest{ + Header: requestHeader(), Store: newStore, - Region: newRegion, }) + cancel() if err != nil { return err } - if res.GetHeader().GetError() != nil { - return errors.Errorf("bootstrap failed: %s", resp.GetHeader().GetError().String()) + if resp.Header.GetError() != nil { + simutil.Logger.Error("put store error", zap.Reflect("error", resp.Header.GetError())) + return nil } return nil } -func (c *client) PutStore(ctx context.Context, store *metapb.Store) error { +func (c *client) StoreHeartbeat(ctx context.Context, stats *pdpb.StoreStats) error { ctx, cancel := context.WithTimeout(ctx, pdTimeout) - newStore := typeutil.DeepClone(store, core.StoreFactory) - resp, err := c.pdClient().PutStore(ctx, &pdpb.PutStoreRequest{ - Header: c.requestHeader(), - Store: newStore, + newStats := typeutil.DeepClone(stats, core.StoreStatsFactory) + resp, err := c.pdClient().StoreHeartbeat(ctx, &pdpb.StoreHeartbeatRequest{ + Header: requestHeader(), + Stats: newStats, }) cancel() if err != nil { return err } if resp.Header.GetError() != nil { - simutil.Logger.Error("put store error", zap.Reflect("error", resp.Header.GetError())) + simutil.Logger.Error("store heartbeat error", zap.Reflect("error", resp.Header.GetError())) return nil } return nil } -func (c *client) PutPDConfig(config *PDConfig) error { +func (c *client) RegionHeartbeat(_ context.Context, region *core.RegionInfo) error { + c.reportRegionHeartbeatCh <- region + return nil +} + +type RetryClient struct { + client Client + retryCount int +} + +func NewRetryClient(node *Node) *RetryClient { + // Init PD client and putting it into node. + tag := fmt.Sprintf("store %d", node.Store.Id) + var ( + client Client + receiveRegionHeartbeatCh <-chan *pdpb.RegionHeartbeatResponse + err error + ) + + // Client should wait if PD server is not ready. + for i := 0; i < maxInitClusterRetries; i++ { + client, receiveRegionHeartbeatCh, err = NewClient(tag) + if err == nil { + break + } + time.Sleep(time.Second) + } + + if err != nil { + simutil.Logger.Fatal("create client failed", zap.Error(err)) + } + node.client = client + + // Init RetryClient + retryClient := &RetryClient{ + client: client, + retryCount: retryTimes, + } + // check leader url firstly + retryClient.requestWithRetry(func() (any, error) { + return nil, errors.New("retry to create client") + }) + // start heartbeat stream + node.receiveRegionHeartbeatCh = receiveRegionHeartbeatCh + go client.HeartbeatStreamLoop() + + return retryClient +} + +func (rc *RetryClient) requestWithRetry(f func() (any, error)) (any, error) { + // execute the function directly + if res, err := f(); err == nil { + return res, nil + } + // retry to get leader URL + for i := 0; i < rc.retryCount; i++ { + sd.ScheduleCheckMemberChanged() + time.Sleep(100 * time.Millisecond) + if client := sd.GetServiceClient(); client != nil { + _, conn, err := getLeaderURL(context.Background(), client.GetClientConn()) + if err != nil { + simutil.Logger.Error("[retry] failed to get leader URL", zap.Error(err)) + return nil, err + } + if err = rc.client.ChangeConn(conn); err != nil { + simutil.Logger.Error("failed to change connection", zap.Error(err)) + return nil, err + } + return f() + } + } + return nil, errors.New("failed to retry") +} + +func getLeaderURL(ctx context.Context, conn *grpc.ClientConn) (string, *grpc.ClientConn, error) { + pdCli := pdpb.NewPDClient(conn) + members, err := pdCli.GetMembers(ctx, &pdpb.GetMembersRequest{}) + if err != nil { + return "", nil, err + } + if members.GetHeader().GetError() != nil { + return "", nil, errors.New(members.GetHeader().GetError().String()) + } + ClusterID = members.GetHeader().GetClusterId() + if ClusterID == 0 { + return "", nil, errors.New("cluster id is 0") + } + if members.GetLeader() == nil { + return "", nil, errors.New("leader is nil") + } + leaderURL := members.GetLeader().ClientUrls[0] + conn, err = createConn(leaderURL) + return leaderURL, conn, err +} + +func (rc *RetryClient) AllocID(ctx context.Context) (uint64, error) { + res, err := rc.requestWithRetry(func() (any, error) { + id, err := rc.client.AllocID(ctx) + return id, err + }) + if err != nil { + return 0, err + } + return res.(uint64), nil +} + +func (rc *RetryClient) PutStore(ctx context.Context, store *metapb.Store) error { + _, err := rc.requestWithRetry(func() (any, error) { + err := rc.client.PutStore(ctx, store) + return nil, err + }) + return err +} + +func (rc *RetryClient) StoreHeartbeat(ctx context.Context, stats *pdpb.StoreStats) error { + _, err := rc.requestWithRetry(func() (any, error) { + err := rc.client.StoreHeartbeat(ctx, stats) + return nil, err + }) + return err +} + +func (rc *RetryClient) RegionHeartbeat(ctx context.Context, region *core.RegionInfo) error { + _, err := rc.requestWithRetry(func() (any, error) { + err := rc.client.RegionHeartbeat(ctx, region) + return nil, err + }) + return err +} + +func (*RetryClient) ChangeConn(_ *grpc.ClientConn) error { + panic("unImplement") +} + +func (rc *RetryClient) HeartbeatStreamLoop() { + rc.client.HeartbeatStreamLoop() +} + +func (rc *RetryClient) Close() { + rc.client.Close() +} + +// Bootstrap bootstraps the cluster and using the given PD address firstly. +// because before bootstrapping the cluster, PDServiceDiscovery can not been started. +func Bootstrap(ctx context.Context, pdAddrs string, store *metapb.Store, region *metapb.Region) ( + leaderURL string, pdCli pdpb.PDClient, err error) { + urls := strings.Split(pdAddrs, ",") + if len(urls) == 0 { + return "", nil, errors.New("empty pd address") + } + +retry: + for i := 0; i < maxInitClusterRetries; i++ { + time.Sleep(100 * time.Millisecond) + for _, url := range urls { + conn, err := createConn(url) + if err != nil { + continue + } + leaderURL, conn, err = getLeaderURL(ctx, conn) + if err != nil { + continue + } + pdCli = pdpb.NewPDClient(conn) + break retry + } + } + if ClusterID == 0 { + return "", nil, errors.WithStack(errFailInitClusterID) + } + simutil.Logger.Info("get cluster id successfully", zap.Uint64("cluster-id", ClusterID)) + + // Check if the cluster is already bootstrapped. + ctx, cancel := context.WithTimeout(ctx, pdTimeout) + defer cancel() + req := &pdpb.IsBootstrappedRequest{ + Header: requestHeader(), + } + resp, err := pdCli.IsBootstrapped(ctx, req) + if resp.GetBootstrapped() { + simutil.Logger.Fatal("failed to bootstrap, server is not clean") + } + if err != nil { + return "", nil, err + } + // Bootstrap the cluster. + newStore := typeutil.DeepClone(store, core.StoreFactory) + newRegion := typeutil.DeepClone(region, core.RegionFactory) + var res *pdpb.BootstrapResponse + for i := 0; i < maxInitClusterRetries; i++ { + // Bootstrap the cluster. + res, err = pdCli.Bootstrap(ctx, &pdpb.BootstrapRequest{ + Header: requestHeader(), + Store: newStore, + Region: newRegion, + }) + if err != nil { + continue + } + if res.GetHeader().GetError() != nil { + continue + } + break + } + if err != nil { + return "", nil, err + } + if res.GetHeader().GetError() != nil { + return "", nil, errors.New(res.GetHeader().GetError().String()) + } + + return leaderURL, pdCli, nil +} + +/* PDHTTPClient is a client for PD HTTP API, these are the functions that are used in the simulator */ + +func PutPDConfig(config *sc.PDConfig) error { if len(config.PlacementRules) > 0 { - path := fmt.Sprintf("%s/%s/config/rules/batch", c.url, httpPrefix) - ruleOps := make([]*placement.RuleOp, 0) + ruleOps := make([]*pdHttp.RuleOp, 0) for _, rule := range config.PlacementRules { - ruleOps = append(ruleOps, &placement.RuleOp{ + ruleOps = append(ruleOps, &pdHttp.RuleOp{ Rule: rule, - Action: placement.RuleOpAdd, + Action: pdHttp.RuleOpAdd, }) } - content, _ := json.Marshal(ruleOps) - req, err := http.NewRequest(http.MethodPost, path, bytes.NewBuffer(content)) - req.Header.Add("Content-Type", "application/json") - if err != nil { - return err - } - res, err := c.httpClient.Do(req) + err := PDHTTPClient.SetPlacementRuleInBatch(context.Background(), ruleOps) if err != nil { return err } - defer res.Body.Close() - simutil.Logger.Info("add placement rule success", zap.String("rules", string(content))) + simutil.Logger.Info("add placement rule success", zap.Any("rules", config.PlacementRules)) } if len(config.LocationLabels) > 0 { - path := fmt.Sprintf("%s/%s/config", c.url, httpPrefix) - data := make(map[string]interface{}) + data := make(map[string]any) data["location-labels"] = config.LocationLabels - content, err := json.Marshal(data) - if err != nil { - return err - } - req, err := http.NewRequest(http.MethodPost, path, bytes.NewBuffer(content)) - req.Header.Add("Content-Type", "application/json") + err := PDHTTPClient.SetConfig(context.Background(), data) if err != nil { return err } - res, err := c.httpClient.Do(req) - if err != nil { - return err - } - defer res.Body.Close() - simutil.Logger.Info("add location labels success", zap.String("labels", string(content))) + simutil.Logger.Info("add location labels success", zap.Any("labels", config.LocationLabels)) } return nil } -func (c *client) StoreHeartbeat(ctx context.Context, stats *pdpb.StoreStats) error { - ctx, cancel := context.WithTimeout(ctx, pdTimeout) - newStats := typeutil.DeepClone(stats, core.StoreStatsFactory) - resp, err := c.pdClient().StoreHeartbeat(ctx, &pdpb.StoreHeartbeatRequest{ - Header: c.requestHeader(), - Stats: newStats, +func ChooseToHaltPDSchedule(halt bool) { + PDHTTPClient.SetConfig(context.Background(), map[string]any{ + "schedule.halt-scheduling": strconv.FormatBool(halt), }) - cancel() - if err != nil { - return err - } - if resp.Header.GetError() != nil { - simutil.Logger.Error("store heartbeat error", zap.Reflect("error", resp.Header.GetError())) - return nil - } - return nil -} - -func (c *client) RegionHeartbeat(ctx context.Context, region *core.RegionInfo) error { - c.reportRegionHeartbeatCh <- region - return nil -} - -func (c *client) requestHeader() *pdpb.RequestHeader { - return &pdpb.RequestHeader{ - ClusterId: c.clusterID, - } } diff --git a/tools/pd-simulator/simulator/config.go b/tools/pd-simulator/simulator/config/config.go similarity index 84% rename from tools/pd-simulator/simulator/config.go rename to tools/pd-simulator/simulator/config/config.go index 4f197fb83c2..6598cf35c0f 100644 --- a/tools/pd-simulator/simulator/config.go +++ b/tools/pd-simulator/simulator/config/config.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package simulator +package config import ( "fmt" @@ -21,8 +21,8 @@ import ( "github.com/BurntSushi/toml" "github.com/docker/go-units" + pdHttp "github.com/tikv/pd/client/http" sc "github.com/tikv/pd/pkg/schedule/config" - "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/utils/configutil" "github.com/tikv/pd/pkg/utils/tempurl" "github.com/tikv/pd/pkg/utils/typeutil" @@ -31,8 +31,11 @@ import ( ) const ( - // tick - defaultSimTickInterval = 100 * time.Millisecond + // simulator + defaultSimTickInterval = 100 * time.Millisecond + defaultTotalStore = 3 + defaultTotalRegion = 1000 + defaultEnableTransferRegionCounter = false // store defaultStoreIOMBPerSecond = 40 defaultStoreHeartbeat = 10 * time.Second @@ -53,9 +56,12 @@ const ( // SimConfig is the simulator configuration. type SimConfig struct { - // tick - CaseName string `toml:"case-name"` - SimTickInterval typeutil.Duration `toml:"sim-tick-interval"` + // Simulator + CaseName string `toml:"case-name"` + TotalStore int `toml:"total-store"` + TotalRegion int `toml:"total-region"` + EnableTransferRegionCounter bool `toml:"enable-transfer-region-counter"` + SimTickInterval typeutil.Duration `toml:"sim-tick-interval"` // store StoreIOMBPerSecond int64 `toml:"store-io-per-second"` StoreVersion string `toml:"store-version"` @@ -99,6 +105,9 @@ func NewSimConfig(serverLogLevel string) *SimConfig { // Adjust is used to adjust configurations func (sc *SimConfig) Adjust(meta *toml.MetaData) error { configutil.AdjustDuration(&sc.SimTickInterval, defaultSimTickInterval) + configutil.AdjustInt(&sc.TotalStore, defaultTotalStore) + configutil.AdjustInt(&sc.TotalRegion, defaultTotalRegion) + configutil.AdjustBool(&sc.EnableTransferRegionCounter, defaultEnableTransferRegionCounter) configutil.AdjustInt64(&sc.StoreIOMBPerSecond, defaultStoreIOMBPerSecond) configutil.AdjustString(&sc.StoreVersion, versioninfo.PDReleaseVersion) configutil.AdjustDuration(&sc.RaftStore.RegionHeartBeatInterval, defaultRegionHeartbeat) @@ -118,12 +127,12 @@ func (sc *SimConfig) Adjust(meta *toml.MetaData) error { return sc.ServerConfig.Adjust(meta, false) } -func (sc *SimConfig) speed() uint64 { +func (sc *SimConfig) Speed() uint64 { return uint64(time.Second / sc.SimTickInterval.Duration) } // PDConfig saves some config which may be changed in PD. type PDConfig struct { - PlacementRules []*placement.Rule + PlacementRules []*pdHttp.Rule LocationLabels typeutil.StringSlice } diff --git a/tools/pd-simulator/simulator/conn.go b/tools/pd-simulator/simulator/conn.go index 588fec246d4..b1000c0f17b 100644 --- a/tools/pd-simulator/simulator/conn.go +++ b/tools/pd-simulator/simulator/conn.go @@ -17,23 +17,22 @@ package simulator import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/tikv/pd/tools/pd-simulator/simulator/cases" + "github.com/tikv/pd/tools/pd-simulator/simulator/config" ) // Connection records the information of connection among nodes. type Connection struct { - pdAddr string - Nodes map[uint64]*Node + Nodes map[uint64]*Node } // NewConnection creates nodes according to the configuration and returns the connection among nodes. -func NewConnection(simCase *cases.Case, pdAddr string, storeConfig *SimConfig) (*Connection, error) { +func NewConnection(simCase *cases.Case, storeConfig *config.SimConfig) (*Connection, error) { conn := &Connection{ - pdAddr: pdAddr, - Nodes: make(map[uint64]*Node), + Nodes: make(map[uint64]*Node), } for _, store := range simCase.Stores { - node, err := NewNode(store, pdAddr, storeConfig) + node, err := NewNode(store, storeConfig) if err != nil { return nil, err } @@ -51,3 +50,13 @@ func (c *Connection) nodeHealth(storeID uint64) bool { return n.GetNodeState() == metapb.NodeState_Preparing || n.GetNodeState() == metapb.NodeState_Serving } + +func (c *Connection) getNodes() []*Node { + var nodes []*Node + for _, n := range c.Nodes { + if n.GetNodeState() != metapb.NodeState_Removed { + nodes = append(nodes, n) + } + } + return nodes +} diff --git a/tools/pd-simulator/simulator/drive.go b/tools/pd-simulator/simulator/drive.go index c7f64324c19..0296710b705 100644 --- a/tools/pd-simulator/simulator/drive.go +++ b/tools/pd-simulator/simulator/drive.go @@ -16,16 +16,24 @@ package simulator import ( "context" + "net/http" + "net/http/pprof" "path" "strconv" + "strings" "sync" "time" "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pingcap/kvproto/pkg/pdpb" + "github.com/prometheus/client_golang/prometheus/promhttp" + pd "github.com/tikv/pd/client" + pdHttp "github.com/tikv/pd/client/http" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/utils/typeutil" "github.com/tikv/pd/tools/pd-simulator/simulator/cases" + "github.com/tikv/pd/tools/pd-simulator/simulator/config" "github.com/tikv/pd/tools/pd-simulator/simulator/info" "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" "go.etcd.io/etcd/clientv3" @@ -34,38 +42,39 @@ import ( // Driver promotes the cluster status change. type Driver struct { - wg sync.WaitGroup - pdAddr string - simCase *cases.Case - client Client - tickCount int64 - eventRunner *EventRunner - raftEngine *RaftEngine - conn *Connection - simConfig *SimConfig - pdConfig *PDConfig + wg sync.WaitGroup + pdAddr string + statusAddress string + simCase *cases.Case + tickCount int64 + eventRunner *EventRunner + raftEngine *RaftEngine + conn *Connection + simConfig *config.SimConfig + pdConfig *config.PDConfig } // NewDriver returns a driver. -func NewDriver(pdAddr string, caseName string, simConfig *SimConfig) (*Driver, error) { - simCase := cases.NewCase(caseName) +func NewDriver(pdAddr, statusAddress, caseName string, simConfig *config.SimConfig) (*Driver, error) { + simCase := cases.NewCase(caseName, simConfig) if simCase == nil { return nil, errors.Errorf("failed to create case %s", caseName) } - pdConfig := &PDConfig{} + pdConfig := &config.PDConfig{} pdConfig.PlacementRules = simCase.Rules pdConfig.LocationLabels = simCase.Labels return &Driver{ - pdAddr: pdAddr, - simCase: simCase, - simConfig: simConfig, - pdConfig: pdConfig, + pdAddr: pdAddr, + statusAddress: statusAddress, + simCase: simCase, + simConfig: simConfig, + pdConfig: pdConfig, }, nil } // Prepare initializes cluster information, bootstraps cluster and starts nodes. func (d *Driver) Prepare() error { - conn, err := NewConnection(d.simCase, d.pdAddr, d.simConfig) + conn, err := NewConnection(d.simCase, d.simConfig) if err != nil { return err } @@ -73,19 +82,27 @@ func (d *Driver) Prepare() error { d.raftEngine = NewRaftEngine(d.simCase, d.conn, d.simConfig) d.eventRunner = NewEventRunner(d.simCase.Events, d.raftEngine) - d.updateNodeAvailable() + if d.statusAddress != "" { + go d.runHTTPServer() + } + + if err = d.allocID(); err != nil { + return err + } + + return d.Start() +} + +func (d *Driver) allocID() error { // Bootstrap. store, region, err := d.GetBootstrapInfo(d.raftEngine) if err != nil { return err } - d.client = d.conn.Nodes[store.GetId()].client - ctx, cancel := context.WithTimeout(context.Background(), pdTimeout) - err = d.client.Bootstrap(ctx, store, region) - cancel() + leaderURL, pdCli, err := Bootstrap(context.Background(), d.pdAddr, store, region) if err != nil { simutil.Logger.Fatal("bootstrap error", zap.Error(err)) } else { @@ -94,19 +111,18 @@ func (d *Driver) Prepare() error { // Setup alloc id. // TODO: This is a hack way. Once we have reset alloc ID API, we need to replace it. - maxID := cases.IDAllocator.GetID() + maxID := simutil.IDAllocator.GetID() requestTimeout := 10 * time.Second etcdTimeout := 3 * time.Second etcdClient, err := clientv3.New(clientv3.Config{ - Endpoints: []string{d.pdAddr}, + Endpoints: []string{leaderURL}, DialTimeout: etcdTimeout, }) if err != nil { return err } - ctx, cancel = context.WithTimeout(context.Background(), requestTimeout) - clusterID := d.client.GetClusterID(ctx) - rootPath := path.Join("/pd", strconv.FormatUint(clusterID, 10)) + ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) + rootPath := path.Join("/pd", strconv.FormatUint(ClusterID, 10)) allocIDPath := path.Join(rootPath, "alloc_id") _, err = etcdClient.Put(ctx, allocIDPath, string(typeutil.Uint64ToBytes(maxID+1000))) if err != nil { @@ -116,22 +132,34 @@ func (d *Driver) Prepare() error { cancel() for { - var id uint64 - id, err = d.client.AllocID(context.Background()) + var resp *pdpb.AllocIDResponse + resp, err = pdCli.AllocID(context.Background(), &pdpb.AllocIDRequest{ + Header: requestHeader(), + }) if err != nil { return errors.WithStack(err) } - if id > maxID { - cases.IDAllocator.ResetID() + if resp.Id > maxID { + simutil.IDAllocator.ResetID() break } } + return nil +} - err = d.Start() - if err != nil { +func (d *Driver) updateNodesClient() error { + urls := strings.Split(d.pdAddr, ",") + ctx, cancel := context.WithCancel(context.Background()) + sd = pd.NewDefaultPDServiceDiscovery(ctx, cancel, urls, nil) + if err := sd.Init(); err != nil { return err } + // Init PD HTTP client. + PDHTTPClient = pdHttp.NewClientWithServiceDiscovery("pd-simulator", sd) + for _, node := range d.conn.Nodes { + node.client = NewRetryClient(node) + } return nil } @@ -165,19 +193,18 @@ func (d *Driver) Check() bool { // Start starts all nodes. func (d *Driver) Start() error { + if err := d.updateNodesClient(); err != nil { + return err + } + for _, n := range d.conn.Nodes { err := n.Start() if err != nil { return err } } - d.ChangePDConfig() - return nil -} -// ChangePDConfig changes pd config -func (d *Driver) ChangePDConfig() error { - d.client.PutPDConfig(d.pdConfig) + PutPDConfig(d.pdConfig) return nil } @@ -225,3 +252,20 @@ func (d *Driver) updateNodeAvailable() { } } } + +func (d *Driver) runHTTPServer() { + http.Handle("/metrics", promhttp.Handler()) + // profile API + http.HandleFunc("/pprof/profile", pprof.Profile) + http.HandleFunc("/pprof/trace", pprof.Trace) + http.HandleFunc("/pprof/symbol", pprof.Symbol) + http.Handle("/pprof/heap", pprof.Handler("heap")) + http.Handle("/pprof/mutex", pprof.Handler("mutex")) + http.Handle("/pprof/allocs", pprof.Handler("allocs")) + http.Handle("/pprof/block", pprof.Handler("block")) + http.Handle("/pprof/goroutine", pprof.Handler("goroutine")) + eventHandler := newEventHandler(d.eventRunner) + http.HandleFunc("/event", eventHandler.createEvent) + // nolint + http.ListenAndServe(d.statusAddress, nil) +} diff --git a/tools/pd-simulator/simulator/event.go b/tools/pd-simulator/simulator/event.go index 04ad10a0db8..8e01a8f5f40 100644 --- a/tools/pd-simulator/simulator/event.go +++ b/tools/pd-simulator/simulator/event.go @@ -15,6 +15,12 @@ package simulator import ( + "context" + "fmt" + "math/rand" + "net/http" + "sync" + "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/tikv/pd/pkg/core" @@ -30,6 +36,7 @@ type Event interface { // EventRunner includes all events. type EventRunner struct { + sync.RWMutex events []Event raftEngine *RaftEngine } @@ -46,6 +53,33 @@ func NewEventRunner(events []cases.EventDescriptor, raftEngine *RaftEngine) *Eve return er } +type eventHandler struct { + er *EventRunner +} + +func newEventHandler(er *EventRunner) *eventHandler { + return &eventHandler{ + er: er, + } +} + +func (e *eventHandler) createEvent(w http.ResponseWriter, r *http.Request) { + event := r.URL.Query().Get("event") + if len(event) < 1 { + fmt.Fprintf(w, "no given event") + return + } + switch event { + case "add-node": + e.er.addEvent(&AddNode{}) + return + case "down-node": + e.er.addEvent(&DownNode{}) + return + default: + } +} + func parserEvent(e cases.EventDescriptor) Event { switch t := e.(type) { case *cases.WriteFlowOnSpotDescriptor: @@ -54,16 +88,20 @@ func parserEvent(e cases.EventDescriptor) Event { return &WriteFlowOnRegion{descriptor: t} case *cases.ReadFlowOnRegionDescriptor: return &ReadFlowOnRegion{descriptor: t} - case *cases.AddNodesDescriptor: - return &AddNodes{descriptor: t} - case *cases.DeleteNodesDescriptor: - return &DeleteNodes{descriptor: t} } return nil } +func (er *EventRunner) addEvent(e Event) { + er.Lock() + defer er.Unlock() + er.events = append(er.events, e) +} + // Tick ticks the event run func (er *EventRunner) Tick(tickCount int64) { + er.Lock() + defer er.Unlock() var finishedIndex int for i, e := range er.events { isFinished := e.Run(er.raftEngine, tickCount) @@ -126,75 +164,79 @@ func (e *ReadFlowOnRegion) Run(raft *RaftEngine, tickCount int64) bool { return false } -// AddNodes adds nodes. -type AddNodes struct { - descriptor *cases.AddNodesDescriptor -} +// AddNode adds nodes. +type AddNode struct{} // Run implements the event interface. -func (e *AddNodes) Run(raft *RaftEngine, tickCount int64) bool { - id := e.descriptor.Step(tickCount) - if id == 0 { - return false - } - - if _, ok := raft.conn.Nodes[id]; ok { - simutil.Logger.Info("node has already existed", zap.Uint64("node-id", id)) +func (*AddNode) Run(raft *RaftEngine, _ int64) bool { + config := raft.storeConfig + nodes := raft.conn.getNodes() + id, err := nodes[0].client.AllocID(context.TODO()) + if err != nil { + simutil.Logger.Error("alloc node id failed", zap.Error(err)) return false } - - config := raft.storeConfig s := &cases.Store{ ID: id, Status: metapb.StoreState_Up, Capacity: uint64(config.RaftStore.Capacity), Version: config.StoreVersion, } - n, err := NewNode(s, raft.conn.pdAddr, config) + n, err := NewNode(s, config) if err != nil { - simutil.Logger.Error("add node failed", zap.Uint64("node-id", id), zap.Error(err)) + simutil.Logger.Error("create node failed", zap.Error(err)) return false } - raft.conn.Nodes[id] = n + + raft.conn.Nodes[s.ID] = n n.raftEngine = raft + n.client = NewRetryClient(n) + err = n.Start() if err != nil { - simutil.Logger.Error("start node failed", zap.Uint64("node-id", id), zap.Error(err)) + delete(raft.conn.Nodes, s.ID) + simutil.Logger.Error("start node failed", zap.Uint64("node-id", s.ID), zap.Error(err)) + return false } - return false + return true } -// DeleteNodes deletes nodes. -type DeleteNodes struct { - descriptor *cases.DeleteNodesDescriptor -} +// DownNode deletes nodes. +type DownNode struct{} // Run implements the event interface. -func (e *DeleteNodes) Run(raft *RaftEngine, tickCount int64) bool { - id := e.descriptor.Step(tickCount) - if id == 0 { +func (*DownNode) Run(raft *RaftEngine, _ int64) bool { + nodes := raft.conn.getNodes() + if len(nodes) == 0 { + simutil.Logger.Error("can not find any node") return false } - - node := raft.conn.Nodes[id] + i := rand.Intn(len(nodes)) + node := nodes[i] if node == nil { - simutil.Logger.Error("node is not existed", zap.Uint64("node-id", id)) + simutil.Logger.Error("node is not existed", zap.Uint64("node-id", node.Id)) + return false + } + delete(raft.conn.Nodes, node.Id) + // delete store + err := PDHTTPClient.DeleteStore(context.Background(), node.Id) + if err != nil { + simutil.Logger.Error("put store failed", zap.Uint64("node-id", node.Id), zap.Error(err)) return false } - delete(raft.conn.Nodes, id) node.Stop() regions := raft.GetRegions() for _, region := range regions { storeIDs := region.GetStoreIDs() - if _, ok := storeIDs[id]; ok { + if _, ok := storeIDs[node.Id]; ok { downPeer := &pdpb.PeerStats{ - Peer: region.GetStorePeer(id), + Peer: region.GetStorePeer(node.Id), DownSeconds: 24 * 60 * 60, } region = region.Clone(core.WithDownPeers(append(region.GetDownPeers(), downPeer))) raft.SetRegion(region) } } - return false + return true } diff --git a/tools/pd-simulator/simulator/node.go b/tools/pd-simulator/simulator/node.go index 68a10a8638e..8238a6486c1 100644 --- a/tools/pd-simulator/simulator/node.go +++ b/tools/pd-simulator/simulator/node.go @@ -27,6 +27,7 @@ import ( "github.com/tikv/pd/pkg/ratelimit" "github.com/tikv/pd/pkg/utils/syncutil" "github.com/tikv/pd/tools/pd-simulator/simulator/cases" + sc "github.com/tikv/pd/tools/pd-simulator/simulator/config" "github.com/tikv/pd/tools/pd-simulator/simulator/info" "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" "go.uber.org/zap" @@ -41,23 +42,24 @@ const ( type Node struct { *metapb.Store syncutil.RWMutex - stats *info.StoreStats - tick uint64 - wg sync.WaitGroup - tasks map[uint64]*Task + stats *info.StoreStats + tick uint64 + wg sync.WaitGroup + tasks map[uint64]*Task + ctx context.Context + cancel context.CancelFunc + raftEngine *RaftEngine + limiter *ratelimit.RateLimiter + sizeMutex syncutil.Mutex + hasExtraUsedSpace bool + snapStats []*pdpb.SnapshotStat + // PD client client Client receiveRegionHeartbeatCh <-chan *pdpb.RegionHeartbeatResponse - ctx context.Context - cancel context.CancelFunc - raftEngine *RaftEngine - limiter *ratelimit.RateLimiter - sizeMutex syncutil.Mutex - hasExtraUsedSpace bool - snapStats []*pdpb.SnapshotStat } // NewNode returns a Node. -func NewNode(s *cases.Store, pdAddr string, config *SimConfig) (*Node, error) { +func NewNode(s *cases.Store, config *sc.SimConfig) (*Node, error) { ctx, cancel := context.WithCancel(context.Background()) store := &metapb.Store{ Id: s.ID, @@ -71,42 +73,22 @@ func NewNode(s *cases.Store, pdAddr string, config *SimConfig) (*Node, error) { StoreId: s.ID, Capacity: uint64(config.RaftStore.Capacity), StartTime: uint32(time.Now().Unix()), + Available: uint64(config.RaftStore.Capacity), }, } - tag := fmt.Sprintf("store %d", s.ID) - var ( - client Client - receiveRegionHeartbeatCh <-chan *pdpb.RegionHeartbeatResponse - err error - ) - // Client should wait if PD server is not ready. - for i := 0; i < maxInitClusterRetries; i++ { - client, receiveRegionHeartbeatCh, err = NewClient(pdAddr, tag) - if err == nil { - break - } - time.Sleep(time.Second) - } - - if err != nil { - cancel() - return nil, err - } - ratio := config.speed() + ratio := config.Speed() speed := config.StoreIOMBPerSecond * units.MiB * int64(ratio) return &Node{ - Store: store, - stats: stats, - client: client, - ctx: ctx, - cancel: cancel, - tasks: make(map[uint64]*Task), - receiveRegionHeartbeatCh: receiveRegionHeartbeatCh, - limiter: ratelimit.NewRateLimiter(float64(speed), int(speed)), - tick: uint64(rand.Intn(storeHeartBeatPeriod)), - hasExtraUsedSpace: s.HasExtraUsedSpace, - snapStats: make([]*pdpb.SnapshotStat, 0), + Store: store, + stats: stats, + ctx: ctx, + cancel: cancel, + tasks: make(map[uint64]*Task), + limiter: ratelimit.NewRateLimiter(float64(speed), int(speed)), + tick: uint64(rand.Intn(storeHeartBeatPeriod)), + hasExtraUsedSpace: s.HasExtraUsedSpace, + snapStats: make([]*pdpb.SnapshotStat, 0), }, nil } @@ -170,6 +152,8 @@ func (n *Node) stepTask() { } } +var schedulerCheck sync.Once + func (n *Node) stepHeartBeat() { config := n.raftEngine.storeConfig @@ -180,6 +164,7 @@ func (n *Node) stepHeartBeat() { period = uint64(config.RaftStore.RegionHeartBeatInterval.Duration / config.SimTickInterval.Duration) if n.tick%period == 0 { n.regionHeartBeat() + schedulerCheck.Do(func() { ChooseToHaltPDSchedule(false) }) } } @@ -200,7 +185,7 @@ func (n *Node) storeHeartBeat() { n.stats.SnapshotStats = stats err := n.client.StoreHeartbeat(ctx, &n.stats.StoreStats) if err != nil { - simutil.Logger.Info("report heartbeat error", + simutil.Logger.Info("report store heartbeat error", zap.Uint64("node-id", n.GetId()), zap.Error(err)) } @@ -225,7 +210,7 @@ func (n *Node) regionHeartBeat() { ctx, cancel := context.WithTimeout(n.ctx, pdTimeout) err := n.client.RegionHeartbeat(ctx, region) if err != nil { - simutil.Logger.Info("report heartbeat error", + simutil.Logger.Info("report region heartbeat error", zap.Uint64("node-id", n.Id), zap.Uint64("region-id", region.GetID()), zap.Error(err)) @@ -242,7 +227,7 @@ func (n *Node) reportRegionChange() { ctx, cancel := context.WithTimeout(n.ctx, pdTimeout) err := n.client.RegionHeartbeat(ctx, region) if err != nil { - simutil.Logger.Info("report heartbeat error", + simutil.Logger.Info("report region change heartbeat error", zap.Uint64("node-id", n.Id), zap.Uint64("region-id", region.GetID()), zap.Error(err)) diff --git a/tools/pd-simulator/simulator/raft.go b/tools/pd-simulator/simulator/raft.go index fccf75781d3..d416f69ff80 100644 --- a/tools/pd-simulator/simulator/raft.go +++ b/tools/pd-simulator/simulator/raft.go @@ -22,6 +22,7 @@ import ( "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/utils/syncutil" "github.com/tikv/pd/tools/pd-simulator/simulator/cases" + "github.com/tikv/pd/tools/pd-simulator/simulator/config" "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" "go.uber.org/zap" ) @@ -34,12 +35,12 @@ type RaftEngine struct { regionChange map[uint64][]uint64 regionSplitSize int64 regionSplitKeys int64 - storeConfig *SimConfig + storeConfig *config.SimConfig useTiDBEncodedKey bool } // NewRaftEngine creates the initialized raft with the configuration. -func NewRaftEngine(conf *cases.Case, conn *Connection, storeConfig *SimConfig) *RaftEngine { +func NewRaftEngine(conf *cases.Case, conn *Connection, storeConfig *config.SimConfig) *RaftEngine { r := &RaftEngine{ regionsInfo: core.NewRegionsInfo(), conn: conn, diff --git a/tools/pd-simulator/simulator/simutil/case_config.go b/tools/pd-simulator/simulator/simutil/id.go similarity index 50% rename from tools/pd-simulator/simulator/simutil/case_config.go rename to tools/pd-simulator/simulator/simutil/id.go index a34035c15aa..8badddff3f1 100644 --- a/tools/pd-simulator/simulator/simutil/case_config.go +++ b/tools/pd-simulator/simulator/simutil/id.go @@ -1,4 +1,4 @@ -// Copyright 2019 TiKV Project Authors. +// Copyright 2024 TiKV Project Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,21 +14,26 @@ package simutil -// CaseConfig is to save flags -type CaseConfig struct { - StoreNum int - RegionNum int - EnableTransferRegionCounter bool +// IDAllocator is used to alloc unique ID. +type idAllocator struct { + id uint64 } -// CaseConfigure is an global instance for CaseConfig -var CaseConfigure *CaseConfig +// NextID gets the next unique ID. +func (a *idAllocator) NextID() uint64 { + a.id++ + return a.id +} + +// ResetID resets the IDAllocator. +func (a *idAllocator) ResetID() { + a.id = 0 +} -// InitCaseConfig is to init caseConfigure -func InitCaseConfig(storeNum, regionNum int, enableTransferRegionCounter bool) { - CaseConfigure = &CaseConfig{ - StoreNum: storeNum, - RegionNum: regionNum, - EnableTransferRegionCounter: enableTransferRegionCounter, - } +// GetID gets the current ID. +func (a *idAllocator) GetID() uint64 { + return a.id } + +// IDAllocator is used to alloc unique ID. +var IDAllocator idAllocator diff --git a/tools/pd-simulator/simulator/task.go b/tools/pd-simulator/simulator/task.go index b1c609b503d..c0bfa1e691b 100644 --- a/tools/pd-simulator/simulator/task.go +++ b/tools/pd-simulator/simulator/task.go @@ -261,7 +261,7 @@ type transferLeader struct { toPeers []*metapb.Peer } -func (t *transferLeader) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { +func (t *transferLeader) tick(_ *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { isFinished = true toPeer := t.toPeers[0] // TODO: Support selection logic if peer := region.GetPeer(toPeer.GetId()); peer == nil || peer.GetRole() != toPeer.GetRole() || core.IsLearner(peer) { @@ -313,7 +313,7 @@ type promoteLearner struct { peer *metapb.Peer } -func (pl *promoteLearner) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { +func (pl *promoteLearner) tick(_ *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { isFinished = true peer := region.GetPeer(pl.peer.GetId()) opts := checkAndCreateChangePeerOption(region, peer, metapb.PeerRole_Learner, metapb.PeerRole_Voter) @@ -327,7 +327,7 @@ type demoteVoter struct { peer *metapb.Peer } -func (dv *demoteVoter) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { +func (dv *demoteVoter) tick(_ *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { isFinished = true peer := region.GetPeer(dv.peer.GetId()) opts := checkAndCreateChangePeerOption(region, peer, metapb.PeerRole_Voter, metapb.PeerRole_Learner) @@ -342,7 +342,7 @@ type changePeerV2Enter struct { demoteVoters []*metapb.Peer } -func (ce *changePeerV2Enter) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { +func (ce *changePeerV2Enter) tick(_ *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { isFinished = true var opts []core.RegionCreateOption for _, pl := range ce.promoteLearners { @@ -367,7 +367,7 @@ func (ce *changePeerV2Enter) tick(engine *RaftEngine, region *core.RegionInfo) ( type changePeerV2Leave struct{} -func (cl *changePeerV2Leave) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { +func (*changePeerV2Leave) tick(_ *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { isFinished = true var opts []core.RegionCreateOption for _, peer := range region.GetPeers() { @@ -415,7 +415,7 @@ func (a *addPeer) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion * pendingPeers := append(region.GetPendingPeers(), a.peer) return region.Clone(core.WithAddPeer(a.peer), core.WithIncConfVer(), core.WithPendingPeers(pendingPeers)), false } - speed := engine.storeConfig.speed() + speed := engine.storeConfig.Speed() // Step 2: Process Snapshot if !processSnapshot(sendNode, a.sendingStat, speed) { return nil, false diff --git a/tools/pd-tso-bench/main.go b/tools/pd-tso-bench/main.go index 5919f270a8f..3726373779e 100644 --- a/tools/pd-tso-bench/main.go +++ b/tools/pd-tso-bench/main.go @@ -383,10 +383,10 @@ func reqWorker(ctx context.Context, pdClients []pd.Client, clientIdx int, durCh i := 0 for ; i < maxRetryTime; i++ { + var ticker *time.Ticker if *maxTSOSendIntervalMilliseconds > 0 { sleepBeforeGetTS := time.Duration(rand.Intn(*maxTSOSendIntervalMilliseconds)) * time.Millisecond - ticker := time.NewTicker(sleepBeforeGetTS) - defer ticker.Stop() + ticker = time.NewTicker(sleepBeforeGetTS) select { case <-reqCtx.Done(): case <-ticker.C: @@ -395,9 +395,11 @@ func reqWorker(ctx context.Context, pdClients []pd.Client, clientIdx int, durCh } _, _, err = pdCli.GetLocalTS(reqCtx, *dcLocation) if errors.Cause(err) == context.Canceled { + ticker.Stop() return } if err == nil { + ticker.Stop() break } log.Error(fmt.Sprintf("%v", err)) diff --git a/tools/pd-ut/README.md b/tools/pd-ut/README.md new file mode 100644 index 00000000000..805ee5cf322 --- /dev/null +++ b/tools/pd-ut/README.md @@ -0,0 +1,70 @@ +# pd-ut + +pd-ut is a tool to run unit tests for PD. + +## Build + +1. [Go](https://golang.org/) Version 1.21 or later +2. In the root directory of the [PD project](https://github.com/tikv/pd), use the `make pd-ut` command to compile and generate `bin/pd-ut` + +## Usage + +This section describes how to use the pd-ut tool. + +### brief run all tests +```shell +make ut +``` + + +### run by pd-ut + +- You should `make failpoint-enable` before running the tests. +- And after running the tests, you should `make failpoint-disable` and `make clean-test` to disable the failpoint and clean the environment. + +#### Flags description + +```shell +// run all tests +pd-ut + +// show usage +pd-ut -h + +// list all packages +pd-ut list + +// list test cases of a single package +pd-ut list $package + +// list test cases that match a pattern +pd-ut list $package 'r:$regex' + +// run all tests +pd-ut run + +// run test all cases of a single package +pd-ut run $package + +// run test cases of a single package +pd-ut run $package $test + +// run test cases that match a pattern +pd-ut run $package 'r:$regex' + +// build all test package +pd-ut build + +// build a test package +pd-ut build xxx + +// write the junitfile +pd-ut run --junitfile xxx + +// test with race flag +pd-ut run --race + +// test with coverprofile +pd-ut run --coverprofile xxx +go tool cover --func=xxx +``` diff --git a/tools/pd-ut/alloc/check_env_dummy.go b/tools/pd-ut/alloc/check_env_dummy.go new file mode 100644 index 00000000000..b9b8eb4827a --- /dev/null +++ b/tools/pd-ut/alloc/check_env_dummy.go @@ -0,0 +1,21 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//go:build !linux +// +build !linux + +package alloc + +func environmentCheck(_ string) bool { + return true +} diff --git a/tools/pd-ut/alloc/check_env_linux.go b/tools/pd-ut/alloc/check_env_linux.go new file mode 100644 index 00000000000..1a51f8075cf --- /dev/null +++ b/tools/pd-ut/alloc/check_env_linux.go @@ -0,0 +1,42 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//go:build linux +// +build linux + +package alloc + +import ( + "github.com/cakturk/go-netstat/netstat" + "github.com/pingcap/log" + "go.uber.org/zap" +) + +func environmentCheck(addr string) bool { + valid, err := checkAddr(addr[len("http://"):]) + if err != nil { + log.Error("check port status failed", zap.Error(err)) + return false + } + return valid +} + +func checkAddr(addr string) (bool, error) { + tabs, err := netstat.TCPSocks(func(s *netstat.SockTabEntry) bool { + return s.RemoteAddr.String() == addr || s.LocalAddr.String() == addr + }) + if err != nil { + return false, err + } + return len(tabs) < 1, nil +} diff --git a/tools/pd-ut/alloc/server.go b/tools/pd-ut/alloc/server.go new file mode 100644 index 00000000000..aced73467fb --- /dev/null +++ b/tools/pd-ut/alloc/server.go @@ -0,0 +1,56 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package alloc + +import ( + "errors" + "flag" + "fmt" + "net/http" + "os" + "time" + + "github.com/gin-gonic/gin" + "github.com/pingcap/log" + "github.com/tikv/pd/pkg/utils/tempurl" + "go.uber.org/zap" +) + +var statusAddress = flag.String("status-addr", "0.0.0.0:20180", "status address") + +func RunHTTPServer() *http.Server { + err := os.Setenv(tempurl.AllocURLFromUT, fmt.Sprintf("http://%s/alloc", *statusAddress)) + if err != nil { + fmt.Println(err) + } + + gin.SetMode(gin.ReleaseMode) + engine := gin.New() + engine.Use(gin.Recovery()) + + engine.GET("alloc", func(c *gin.Context) { + addr := Alloc() + c.String(http.StatusOK, addr) + }) + + srv := &http.Server{Addr: *statusAddress, Handler: engine.Handler(), ReadHeaderTimeout: 3 * time.Second} + go func() { + if err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) { + log.Fatal("server listen error", zap.Error(err)) + } + }() + + return srv +} diff --git a/tools/pd-ut/alloc/tempurl.go b/tools/pd-ut/alloc/tempurl.go new file mode 100644 index 00000000000..6be69dfe056 --- /dev/null +++ b/tools/pd-ut/alloc/tempurl.go @@ -0,0 +1,65 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package alloc + +import ( + "fmt" + "net" + "sync" + "time" + + "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" +) + +var ( + testAddrMutex sync.Mutex + testAddrMap = make(map[string]struct{}) +) + +// Alloc allocates a local URL for testing. +func Alloc() string { + for i := 0; i < 50; i++ { + if u := tryAllocTestURL(); u != "" { + return u + } + time.Sleep(200 * time.Millisecond) + } + log.Fatal("failed to alloc test URL") + return "" +} + +func tryAllocTestURL() string { + l, err := net.Listen("tcp", "127.0.0.1:") + if err != nil { + return "" + } + addr := fmt.Sprintf("http://%s", l.Addr()) + err = l.Close() + if err != nil { + log.Fatal("close failed", errs.ZapError(err)) + } + + testAddrMutex.Lock() + defer testAddrMutex.Unlock() + if _, ok := testAddrMap[addr]; ok { + return "" + } + if !environmentCheck(addr) { + return "" + } + testAddrMap[addr] = struct{}{} + return addr +} diff --git a/tools/pd-ut/coverProfile.go b/tools/pd-ut/coverProfile.go new file mode 100644 index 00000000000..0ed1c3f3c61 --- /dev/null +++ b/tools/pd-ut/coverProfile.go @@ -0,0 +1,176 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bufio" + "fmt" + "os" + "path" + "sort" + + "golang.org/x/tools/cover" +) + +func collectCoverProfileFile() { + // Combine all the cover file of single test function into a whole. + files, err := os.ReadDir(coverFileTempDir) + if err != nil { + fmt.Println("collect cover file error:", err) + os.Exit(-1) + } + + w, err := os.Create(coverProfile) + if err != nil { + fmt.Println("create cover file error:", err) + os.Exit(-1) + } + //nolint: errcheck + defer w.Close() + w.WriteString("mode: atomic\n") + + result := make(map[string]*cover.Profile) + for _, file := range files { + if file.IsDir() { + continue + } + collectOneCoverProfileFile(result, file) + } + + w1 := bufio.NewWriter(w) + for _, prof := range result { + for _, block := range prof.Blocks { + fmt.Fprintf(w1, "%s:%d.%d,%d.%d %d %d\n", + prof.FileName, + block.StartLine, + block.StartCol, + block.EndLine, + block.EndCol, + block.NumStmt, + block.Count, + ) + } + if err := w1.Flush(); err != nil { + fmt.Println("flush data to cover profile file error:", err) + os.Exit(-1) + } + } +} + +func collectOneCoverProfileFile(result map[string]*cover.Profile, file os.DirEntry) { + f, err := os.Open(path.Join(coverFileTempDir, file.Name())) + if err != nil { + fmt.Println("open temp cover file error:", err) + os.Exit(-1) + } + //nolint: errcheck + defer f.Close() + + profs, err := cover.ParseProfilesFromReader(f) + if err != nil { + fmt.Println("parse cover profile file error:", err) + os.Exit(-1) + } + mergeProfile(result, profs) +} + +func mergeProfile(m map[string]*cover.Profile, profs []*cover.Profile) { + for _, prof := range profs { + sort.Sort(blocksByStart(prof.Blocks)) + old, ok := m[prof.FileName] + if !ok { + m[prof.FileName] = prof + continue + } + + // Merge samples from the same location. + // The data has already been sorted. + tmp := old.Blocks[:0] + var i, j int + for i < len(old.Blocks) && j < len(prof.Blocks) { + v1 := old.Blocks[i] + v2 := prof.Blocks[j] + + switch compareProfileBlock(v1, v2) { + case -1: + tmp = appendWithReduce(tmp, v1) + i++ + case 1: + tmp = appendWithReduce(tmp, v2) + j++ + default: + tmp = appendWithReduce(tmp, v1) + tmp = appendWithReduce(tmp, v2) + i++ + j++ + } + } + for ; i < len(old.Blocks); i++ { + tmp = appendWithReduce(tmp, old.Blocks[i]) + } + for ; j < len(prof.Blocks); j++ { + tmp = appendWithReduce(tmp, prof.Blocks[j]) + } + + m[prof.FileName] = old + } +} + +// appendWithReduce works like append(), but it merge the duplicated values. +func appendWithReduce(input []cover.ProfileBlock, b cover.ProfileBlock) []cover.ProfileBlock { + if len(input) >= 1 { + last := &input[len(input)-1] + if b.StartLine == last.StartLine && + b.StartCol == last.StartCol && + b.EndLine == last.EndLine && + b.EndCol == last.EndCol { + if b.NumStmt != last.NumStmt { + panic(fmt.Errorf("inconsistent NumStmt: changed from %d to %d", last.NumStmt, b.NumStmt)) + } + // Merge the data with the last one of the slice. + last.Count |= b.Count + return input + } + } + return append(input, b) +} + +type blocksByStart []cover.ProfileBlock + +func compareProfileBlock(x, y cover.ProfileBlock) int { + if x.StartLine < y.StartLine { + return -1 + } + if x.StartLine > y.StartLine { + return 1 + } + + // Now x.StartLine == y.StartLine + if x.StartCol < y.StartCol { + return -1 + } + if x.StartCol > y.StartCol { + return 1 + } + + return 0 +} + +func (b blocksByStart) Len() int { return len(b) } +func (b blocksByStart) Swap(i, j int) { b[i], b[j] = b[j], b[i] } +func (b blocksByStart) Less(i, j int) bool { + bi, bj := b[i], b[j] + return bi.StartLine < bj.StartLine || bi.StartLine == bj.StartLine && bi.StartCol < bj.StartCol +} diff --git a/tools/pd-ut/go-compile-without-link.sh b/tools/pd-ut/go-compile-without-link.sh new file mode 100755 index 00000000000..88e6282b076 --- /dev/null +++ b/tools/pd-ut/go-compile-without-link.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# See https://gist.github.com/howardjohn/c0f5d0bc293ef7d7fada533a2c9ffaf4 +# Usage: go test -exec=true -toolexec=go-compile-without-link -vet=off ./... +# Preferably as an alias like `alias go-test-compile='go test -exec=true -toolexec=go-compile-without-link -vet=off'` +# This will compile all tests, but not link them (which is the least cacheable part) + +if [[ "${2}" == "-V=full" ]]; then + "$@" + exit 0 +fi +case "$(basename ${1})" in + link) + # Output a dummy file + touch "${3}" + ;; + # We could skip vet as well, but it can be done with -vet=off if desired + *) + "$@" +esac diff --git a/tools/pd-ut/ut.go b/tools/pd-ut/ut.go new file mode 100644 index 00000000000..fbf2a640651 --- /dev/null +++ b/tools/pd-ut/ut.go @@ -0,0 +1,931 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "context" + "encoding/xml" + "errors" + "fmt" + "io" + "log" + "math/rand" + "os" + "os/exec" + "path" + "regexp" + "runtime" + "strconv" + "strings" + "sync" + "time" + + "github.com/tikv/pd/tools/pd-ut/alloc" + "go.uber.org/zap" + + // Set the correct value when it runs inside docker. + _ "go.uber.org/automaxprocs" +) + +func usage() bool { + msg := `// run all tests +pd-ut + +// show usage +pd-ut -h + +// list all packages +pd-ut list + +// list test cases of a single package +pd-ut list $package + +// list test cases that match a pattern +pd-ut list $package 'r:$regex' + +// run all tests +pd-ut run + +// run test all cases of a single package +pd-ut run $package + +// run test cases of a single package +pd-ut run $package $test + +// run test cases that match a pattern +pd-ut run $package 'r:$regex' + +// build all test package +pd-ut build + +// build a test package +pd-ut build xxx + +// write the junitfile +pd-ut run --junitfile xxx + +// test with race flag +pd-ut run --race + +// test with coverprofile +pd-ut run --coverprofile xxx +go tool cover --func=xxx` + + fmt.Println(msg) + return true +} + +var ( + modulePath = "github.com/tikv/pd" + integrationsTestPath = "tests/integrations" +) + +var ( + // runtime + parallel int + workDir string + coverFileTempDir string + // arguments + race bool + junitFile string + coverProfile string + ignoreDir string +) + +func main() { + race = handleFlag("--race") + parallelStr := stripFlag("--parallel") + junitFile = stripFlag("--junitfile") + coverProfile = stripFlag("--coverprofile") + ignoreDir = stripFlag("--ignore") + + if coverProfile != "" { + var err error + coverFileTempDir, err = os.MkdirTemp(os.TempDir(), "cov") + if err != nil { + fmt.Println("create temp dir fail", coverFileTempDir) + os.Exit(1) + } + defer os.RemoveAll(coverFileTempDir) + } + + var err error + procs := runtime.GOMAXPROCS(0) + if parallelStr == "" { + // Get the correct count of CPU if it's in docker. + parallel = procs + } else { + parallel, err = strconv.Atoi(parallelStr) + if err != nil { + fmt.Println("parse parallel error", err) + return + } + if parallel > procs { + fmt.Printf("Recommend to set parallel be same as the GOMAXPROCS=%d\n", procs) + } + } + workDir, err = os.Getwd() + if err != nil { + fmt.Println("os.Getwd() error", err) + } + + srv := alloc.RunHTTPServer() + defer func() { + if err := srv.Shutdown(context.Background()); err != nil { + log.Fatal("server shutdown error", zap.Error(err)) + } + }() + + var isSucceed bool + // run all tests + if len(os.Args) == 1 { + isSucceed = cmdRun() + } + + if len(os.Args) >= 2 { + switch os.Args[1] { + case "list": + isSucceed = cmdList(os.Args[2:]...) + case "build": + isSucceed = cmdBuild(os.Args[2:]...) + case "run": + isSucceed = cmdRun(os.Args[2:]...) + case "it": + // run integration tests + if len(os.Args) >= 3 { + modulePath = path.Join(modulePath, integrationsTestPath) + workDir = path.Join(workDir, integrationsTestPath) + switch os.Args[2] { + case "run": + isSucceed = cmdRun(os.Args[3:]...) + default: + isSucceed = usage() + } + } + default: + isSucceed = usage() + } + } + if !isSucceed { + os.Exit(1) + } +} + +func cmdList(args ...string) bool { + pkgs, err := listPackages() + if err != nil { + log.Println("list package error", err) + return false + } + + // list all packages + if len(args) == 0 { + for _, pkg := range pkgs { + fmt.Println(pkg) + } + return false + } + + // list test case of a single package + if len(args) == 1 || len(args) == 2 { + pkg := args[0] + pkgs = filter(pkgs, func(s string) bool { return s == pkg }) + if len(pkgs) != 1 { + fmt.Println("package not exist", pkg) + return false + } + + err := buildTestBinary(pkg) + if err != nil { + log.Println("build package error", pkg, err) + return false + } + exist, err := testBinaryExist(pkg) + if err != nil { + log.Println("check test binary existence error", err) + return false + } + if !exist { + fmt.Println("no test case in ", pkg) + return false + } + + res := listTestCases(pkg, nil) + + if len(args) == 2 { + res, err = filterTestCases(res, args[1]) + if err != nil { + log.Println("filter test cases error", err) + return false + } + } + + for _, x := range res { + fmt.Println(x.test) + } + } + return true +} + +func cmdBuild(args ...string) bool { + pkgs, err := listPackages() + if err != nil { + log.Println("list package error", err) + return false + } + + // build all packages + if len(args) == 0 { + err := buildTestBinaryMulti(pkgs) + if err != nil { + fmt.Println("build package error", pkgs, err) + return false + } + return true + } + + // build test binary of a single package + if len(args) >= 1 { + var dirPkgs []string + for _, pkg := range pkgs { + if strings.Contains(pkg, args[0]) { + dirPkgs = append(dirPkgs, pkg) + } + } + + err := buildTestBinaryMulti(dirPkgs) + if err != nil { + log.Println("build package error", dirPkgs, err) + return false + } + } + return true +} + +func cmdRun(args ...string) bool { + var err error + pkgs, err := listPackages() + if err != nil { + fmt.Println("list packages error", err) + return false + } + tasks := make([]task, 0, 5000) + start := time.Now() + // run all tests + if len(args) == 0 { + err := buildTestBinaryMulti(pkgs) + if err != nil { + fmt.Println("build package error", pkgs, err) + return false + } + + for _, pkg := range pkgs { + exist, err := testBinaryExist(pkg) + if err != nil { + fmt.Println("check test binary existence error", err) + return false + } + if !exist { + fmt.Println("no test case in ", pkg) + continue + } + + tasks = listTestCases(pkg, tasks) + } + } + + // run tests for a single package + if len(args) == 1 { + var dirPkgs []string + for _, pkg := range pkgs { + if strings.Contains(pkg, args[0]) { + dirPkgs = append(dirPkgs, pkg) + } + } + + err := buildTestBinaryMulti(dirPkgs) + if err != nil { + log.Println("build package error", dirPkgs, err) + return false + } + + for _, pkg := range dirPkgs { + exist, err := testBinaryExist(pkg) + if err != nil { + fmt.Println("check test binary existence error", err) + return false + } + if !exist { + fmt.Println("no test case in ", pkg) + continue + } + + tasks = listTestCases(pkg, tasks) + } + } + + // run a single test + if len(args) == 2 { + pkg := args[0] + err := buildTestBinary(pkg) + if err != nil { + log.Println("build package error", pkg, err) + return false + } + exist, err := testBinaryExist(pkg) + if err != nil { + log.Println("check test binary existence error", err) + return false + } + if !exist { + fmt.Println("no test case in ", pkg) + return false + } + + tasks = listTestCases(pkg, tasks) + tasks, err = filterTestCases(tasks, args[1]) + if err != nil { + log.Println("filter test cases error", err) + return false + } + } + + fmt.Printf("building task finish, parallelism=%d, count=%d, takes=%v\n", parallel*2, len(tasks), time.Since(start)) + + taskCh := make(chan task, 100) + works := make([]numa, parallel) + var wg sync.WaitGroup + for i := 0; i < parallel; i++ { + wg.Add(1) + go works[i].worker(&wg, taskCh) + } + + shuffle(tasks) + + start = time.Now() + for _, task := range tasks { + taskCh <- task + } + close(taskCh) + wg.Wait() + fmt.Println("run all tasks takes", time.Since(start)) + + if junitFile != "" { + out := collectTestResults(works) + f, err := os.Create(junitFile) + if err != nil { + fmt.Println("create junit file fail:", err) + return false + } + if err := write(f, out); err != nil { + fmt.Println("write junit file error:", err) + return false + } + } + + if coverProfile != "" { + collectCoverProfileFile() + } + + for _, work := range works { + if work.Fail { + return false + } + } + return true +} + +// stripFlag strip the '--flag xxx' from the command line os.Args +// Example of the os.Args changes +// Before: ut run pkg TestXXX --coverprofile xxx --junitfile yyy --parallel 16 +// After: ut run pkg TestXXX +// The value of the flag is returned. +func stripFlag(flag string) string { + var res string + tmp := os.Args[:0] + // Iter to the flag + var i int + for ; i < len(os.Args); i++ { + if os.Args[i] == flag { + i++ + break + } + tmp = append(tmp, os.Args[i]) + } + // Handle the flag + if i < len(os.Args) { + res = os.Args[i] + i++ + } + // Iter the remain flags + for ; i < len(os.Args); i++ { + tmp = append(tmp, os.Args[i]) + } + + os.Args = tmp + return res +} + +func handleFlag(f string) (found bool) { + tmp := os.Args[:0] + for i := 0; i < len(os.Args); i++ { + if os.Args[i] == f { + found = true + continue + } + tmp = append(tmp, os.Args[i]) + } + os.Args = tmp + return +} + +type task struct { + pkg string + test string +} + +func (t *task) String() string { + return t.pkg + " " + t.test +} + +func listTestCases(pkg string, tasks []task) []task { + newCases := listNewTestCases(pkg) + for _, c := range newCases { + tasks = append(tasks, task{pkg, c}) + } + + return tasks +} + +func filterTestCases(tasks []task, arg1 string) ([]task, error) { + if strings.HasPrefix(arg1, "r:") { + r, err := regexp.Compile(arg1[2:]) + if err != nil { + return nil, err + } + tmp := tasks[:0] + for _, task := range tasks { + if r.MatchString(task.test) { + tmp = append(tmp, task) + } + } + return tmp, nil + } + tmp := tasks[:0] + for _, task := range tasks { + if strings.Contains(task.test, arg1) { + tmp = append(tmp, task) + } + } + return tmp, nil +} + +func listPackages() ([]string, error) { + cmd := exec.Command("go", "list", "./...") + cmd.Dir = workDir + ss, err := cmdToLines(cmd) + if err != nil { + return nil, withTrace(err) + } + + ret := ss[:0] + for _, s := range ss { + if !strings.HasPrefix(s, modulePath) { + continue + } + pkg := s[len(modulePath)+1:] + if skipDIR(pkg) { + continue + } + ret = append(ret, pkg) + } + return ret, nil +} + +type numa struct { + Fail bool + results []testResult +} + +func (n *numa) worker(wg *sync.WaitGroup, ch chan task) { + defer wg.Done() + for t := range ch { + res := n.runTestCase(t.pkg, t.test) + if res.Failure != nil { + fmt.Println("[FAIL] ", t.pkg, t.test) + fmt.Fprintf(os.Stderr, "err=%s\n%s", res.err.Error(), res.Failure.Contents) + n.Fail = true + } + n.results = append(n.results, res) + } +} + +type testResult struct { + JUnitTestCase + d time.Duration + err error +} + +func (n *numa) runTestCase(pkg string, fn string) testResult { + res := testResult{ + JUnitTestCase: JUnitTestCase{ + ClassName: path.Join(modulePath, pkg), + Name: fn, + }, + } + + var buf bytes.Buffer + var err error + var start time.Time + for i := 0; i < 3; i++ { + cmd := n.testCommand(pkg, fn) + cmd.Dir = path.Join(workDir, pkg) + // Combine the test case output, so the run result for failed cases can be displayed. + cmd.Stdout = &buf + cmd.Stderr = &buf + + start = time.Now() + err = cmd.Run() + if err != nil { + var exitError *exec.ExitError + if errors.As(err, &exitError) { + // Retry 3 times to get rid of the weird error: + switch err.Error() { + case "signal: segmentation fault (core dumped)": + buf.Reset() + continue + case "signal: trace/breakpoint trap (core dumped)": + buf.Reset() + continue + } + if strings.Contains(buf.String(), "panic during panic") { + buf.Reset() + continue + } + } + } + break + } + if err != nil { + res.Failure = &JUnitFailure{ + Message: "Failed", + Contents: buf.String(), + } + res.err = err + } + + res.d = time.Since(start) + res.Time = formatDurationAsSeconds(res.d) + return res +} + +func collectTestResults(workers []numa) JUnitTestSuites { + version := goVersion() + // pkg => test cases + pkgs := make(map[string][]JUnitTestCase) + durations := make(map[string]time.Duration) + + // The test result in workers are shuffled, so group by the packages here + for _, n := range workers { + for _, res := range n.results { + cases, ok := pkgs[res.ClassName] + if !ok { + cases = make([]JUnitTestCase, 0, 10) + } + cases = append(cases, res.JUnitTestCase) + pkgs[res.ClassName] = cases + durations[res.ClassName] += res.d + } + } + + suites := JUnitTestSuites{} + // Turn every package result to a suite. + for pkg, cases := range pkgs { + suite := JUnitTestSuite{ + Tests: len(cases), + Failures: failureCases(cases), + Time: formatDurationAsSeconds(durations[pkg]), + Name: pkg, + Properties: packageProperties(version), + TestCases: cases, + } + suites.Suites = append(suites.Suites, suite) + } + return suites +} + +func failureCases(input []JUnitTestCase) int { + sum := 0 + for _, v := range input { + if v.Failure != nil { + sum++ + } + } + return sum +} + +func (*numa) testCommand(pkg string, fn string) *exec.Cmd { + args := make([]string, 0, 10) + exe := "./" + testFileName(pkg) + if coverProfile != "" { + fileName := strings.ReplaceAll(pkg, "/", "_") + "." + fn + tmpFile := path.Join(coverFileTempDir, fileName) + args = append(args, "-test.coverprofile", tmpFile) + } + if strings.Contains(fn, "Suite") { + args = append(args, "-test.cpu", fmt.Sprint(parallel/2)) + } else { + args = append(args, "-test.cpu", "1") + } + if !race { + args = append(args, []string{"-test.timeout", "2m"}...) + } else { + // it takes a longer when race is enabled. so it is set more timeout value. + args = append(args, []string{"-test.timeout", "5m"}...) + } + + // core.test -test.run TestClusteredPrefixColum + args = append(args, "-test.run", "^"+fn+"$") + + return exec.Command(exe, args...) +} + +func skipDIR(pkg string) bool { + skipDir := []string{"bin", "cmd", "realcluster"} + if ignoreDir != "" { + skipDir = append(skipDir, ignoreDir) + } + for _, ignore := range skipDir { + if strings.HasPrefix(pkg, ignore) { + return true + } + } + return false +} + +func generateBuildCache() error { + // cd cmd/pd-server && go test -tags=tso_function_test,deadlock -exec-=true -vet=off -toolexec=go-compile-without-link + cmd := exec.Command("go", "test", "-exec=true", "-vet", "off", "--tags=tso_function_test,deadlock") + goCompileWithoutLink := fmt.Sprintf("-toolexec=%s/tools/pd-ut/go-compile-without-link.sh", workDir) + cmd.Dir = fmt.Sprintf("%s/cmd/pd-server", workDir) + if strings.Contains(workDir, integrationsTestPath) { + cmd.Dir = fmt.Sprintf("%s/cmd/pd-server", workDir[:strings.LastIndex(workDir, integrationsTestPath)]) + goCompileWithoutLink = fmt.Sprintf("-toolexec=%s/tools/pd-ut/go-compile-without-link.sh", + workDir[:strings.LastIndex(workDir, integrationsTestPath)]) + } + cmd.Args = append(cmd.Args, goCompileWithoutLink) + + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return withTrace(err) + } + return nil +} + +// buildTestBinaryMulti is much faster than build the test packages one by one. +func buildTestBinaryMulti(pkgs []string) error { + // staged build, generate the build cache for all the tests first, then generate the test binary. + // This way is faster than generating test binaries directly, because the cache can be used. + if err := generateBuildCache(); err != nil { + return withTrace(err) + } + + // go test --exec=xprog --tags=tso_function_test,deadlock -vet=off --count=0 $(pkgs) + // workPath just like `/pd/tests/integrations` + xprogPath := path.Join(workDir, "bin/xprog") + if strings.Contains(workDir, integrationsTestPath) { + xprogPath = path.Join(workDir[:strings.LastIndex(workDir, integrationsTestPath)], "bin/xprog") + } + packages := make([]string, 0, len(pkgs)) + for _, pkg := range pkgs { + packages = append(packages, path.Join(modulePath, pkg)) + } + + // We use 2 * parallel for `go build` to make it faster. + p := strconv.Itoa(parallel * 2) + cmd := exec.Command("go", "test", "-p", p, "--exec", xprogPath, "-vet", "off", "--tags=tso_function_test,deadlock") + if coverProfile != "" { + coverpkg := "./..." + if strings.Contains(workDir, integrationsTestPath) { + coverpkg = "../../..." + } + cmd.Args = append(cmd.Args, "-cover", fmt.Sprintf("-coverpkg=%s", coverpkg)) + } + cmd.Args = append(cmd.Args, packages...) + cmd.Dir = workDir + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return withTrace(err) + } + return nil +} + +func buildTestBinary(pkg string) error { + //nolint:gosec + cmd := exec.Command("go", "test", "-c", "-vet", "off", "--tags=tso_function_test,deadlock", "-o", testFileName(pkg), "-v") + if coverProfile != "" { + cmd.Args = append(cmd.Args, "-cover", "-coverpkg=./...") + } + if race { + cmd.Args = append(cmd.Args, "-race") + } + cmd.Dir = path.Join(workDir, pkg) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return withTrace(err) + } + return nil +} + +func testBinaryExist(pkg string) (bool, error) { + _, err := os.Stat(testFileFullPath(pkg)) + if err != nil { + var pathError *os.PathError + if errors.As(err, &pathError) { + return false, nil + } + } + return true, withTrace(err) +} + +func testFileName(pkg string) string { + _, file := path.Split(pkg) + return file + ".test.bin" +} + +func testFileFullPath(pkg string) string { + return path.Join(workDir, pkg, testFileName(pkg)) +} + +func listNewTestCases(pkg string) []string { + exe := "./" + testFileName(pkg) + + // core.test -test.list Test + cmd := exec.Command(exe, "-test.list", "Test") + cmd.Dir = path.Join(workDir, pkg) + var buf bytes.Buffer + cmd.Stdout = &buf + err := cmd.Run() + res := strings.Split(buf.String(), "\n") + if err != nil && len(res) == 0 { + fmt.Println("err ==", err) + } + return filter(res, func(s string) bool { + return strings.HasPrefix(s, "Test") && s != "TestT" && s != "TestBenchDaily" + }) +} + +func cmdToLines(cmd *exec.Cmd) ([]string, error) { + res, err := cmd.Output() + if err != nil { + return nil, withTrace(err) + } + ss := bytes.Split(res, []byte{'\n'}) + ret := make([]string, len(ss)) + for i, s := range ss { + ret[i] = string(s) + } + return ret, nil +} + +func filter(input []string, f func(string) bool) []string { + ret := input[:0] + for _, s := range input { + if f(s) { + ret = append(ret, s) + } + } + return ret +} + +func shuffle(tasks []task) { + for i := 0; i < len(tasks); i++ { + pos := rand.Intn(len(tasks)) + tasks[i], tasks[pos] = tasks[pos], tasks[i] + } +} + +type errWithStack struct { + err error + buf []byte +} + +func (e *errWithStack) Error() string { + return e.err.Error() + "\n" + string(e.buf) +} + +func withTrace(err error) error { + if err == nil { + return err + } + var errStack *errWithStack + if errors.As(err, &errStack) { + return err + } + var stack [4096]byte + sz := runtime.Stack(stack[:], false) + return &errWithStack{err, stack[:sz]} +} + +func formatDurationAsSeconds(d time.Duration) string { + return fmt.Sprintf("%f", d.Seconds()) +} + +func packageProperties(goVersion string) []JUnitProperty { + return []JUnitProperty{ + {Name: "go.version", Value: goVersion}, + } +} + +// goVersion returns the version as reported by the go binary in PATH. This +// version will not be the same as runtime.Version, which is always the version +// of go used to build the gotestsum binary. +// +// To skip the os/exec call set the GOVERSION environment variable to the +// desired value. +func goVersion() string { + if version, ok := os.LookupEnv("GOVERSION"); ok { + return version + } + cmd := exec.Command("go", "version") + out, err := cmd.Output() + if err != nil { + return "unknown" + } + return strings.TrimPrefix(strings.TrimSpace(string(out)), "go version ") +} + +func write(out io.Writer, suites JUnitTestSuites) error { + doc, err := xml.MarshalIndent(suites, "", "\t") + if err != nil { + return err + } + _, err = out.Write([]byte(xml.Header)) + if err != nil { + return err + } + _, err = out.Write(doc) + return err +} + +// JUnitTestSuites is a collection of JUnit test suites. +type JUnitTestSuites struct { + XMLName xml.Name `xml:"testsuites"` + Suites []JUnitTestSuite +} + +// JUnitTestSuite is a single JUnit test suite which may contain many +// testcases. +type JUnitTestSuite struct { + XMLName xml.Name `xml:"testsuite"` + Tests int `xml:"tests,attr"` + Failures int `xml:"failures,attr"` + Time string `xml:"time,attr"` + Name string `xml:"name,attr"` + Properties []JUnitProperty `xml:"properties>property,omitempty"` + TestCases []JUnitTestCase +} + +// JUnitTestCase is a single test case with its result. +type JUnitTestCase struct { + XMLName xml.Name `xml:"testcase"` + ClassName string `xml:"classname,attr"` + Name string `xml:"name,attr"` + Time string `xml:"time,attr"` + SkipMessage *JUnitSkipMessage `xml:"skipped,omitempty"` + Failure *JUnitFailure `xml:"failure,omitempty"` +} + +// JUnitSkipMessage contains the reason why a testcase was skipped. +type JUnitSkipMessage struct { + Message string `xml:"message,attr"` +} + +// JUnitProperty represents a key/value pair used to define properties. +type JUnitProperty struct { + Name string `xml:"name,attr"` + Value string `xml:"value,attr"` +} + +// JUnitFailure contains data related to a failed test. +type JUnitFailure struct { + Message string `xml:"message,attr"` + Type string `xml:"type,attr"` + Contents string `xml:",chardata"` +} diff --git a/tools/pd-ut/xprog.go b/tools/pd-ut/xprog.go new file mode 100644 index 00000000000..cf3e9b295e2 --- /dev/null +++ b/tools/pd-ut/xprog.go @@ -0,0 +1,119 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//go:build xprog +// +build xprog + +package main + +import ( + "bufio" + "fmt" + "io" + "os" + "path/filepath" + "strings" +) + +func main() { + // See https://github.com/golang/go/issues/15513#issuecomment-773994959 + // go test --exec=xprog ./... + // Command line args looks like: + // '$CWD/xprog /tmp/go-build2662369829/b1382/aggfuncs.test -test.paniconexit0 -test.timeout=10m0s' + // This program moves the test binary /tmp/go-build2662369829/b1382/aggfuncs.test to someplace else for later use. + + // Extract the current work directory + cwd := os.Args[0] + cwd = cwd[:len(cwd)-len("bin/xprog")] + + testBinaryPath := os.Args[1] + dir, _ := filepath.Split(testBinaryPath) + + // Extract the package info from /tmp/go-build2662369829/b1382/importcfg.link + pkg := getPackageInfo(dir) + + const prefix = "github.com/tikv/pd/" + if !strings.HasPrefix(pkg, prefix) { + os.Exit(-3) + } + + // github.com/tikv/pd/server/api/api.test => server/api/api + pkg = pkg[len(prefix) : len(pkg)-len(".test")] + + _, file := filepath.Split(pkg) + + // The path of the destination file looks like $CWD/server/api/api.test.bin + newName := filepath.Join(cwd, pkg, file+".test.bin") + + if err1 := os.Rename(testBinaryPath, newName); err1 != nil { + // Rename fail, handle error like "invalid cross-device linkcd tools/check" + err1 = MoveFile(testBinaryPath, newName) + if err1 != nil { + os.Exit(-4) + } + } +} + +func getPackageInfo(dir string) string { + // Read the /tmp/go-build2662369829/b1382/importcfg.link file to get the package information + f, err := os.Open(filepath.Join(dir, "importcfg.link")) + if err != nil { + os.Exit(-1) + } + defer f.Close() + + r := bufio.NewReader(f) + line, _, err := r.ReadLine() + if err != nil { + os.Exit(-2) + } + start := strings.IndexByte(string(line), ' ') + end := strings.IndexByte(string(line), '=') + pkg := string(line[start+1 : end]) + return pkg +} + +func MoveFile(srcPath, dstPath string) error { + inputFile, err := os.Open(srcPath) + if err != nil { + return fmt.Errorf("couldn't open source file: %s", err) + } + outputFile, err := os.Create(dstPath) + if err != nil { + inputFile.Close() + return fmt.Errorf("couldn't open dst file: %s", err) + } + defer outputFile.Close() + _, err = io.Copy(outputFile, inputFile) + inputFile.Close() + if err != nil { + return fmt.Errorf("writing to output file failed: %s", err) + } + + // Handle the permissions + si, err := os.Stat(srcPath) + if err != nil { + return fmt.Errorf("stat error: %s", err) + } + err = os.Chmod(dstPath, si.Mode()) + if err != nil { + return fmt.Errorf("chmod error: %s", err) + } + + // The copy was successful, so now delete the original file + err = os.Remove(srcPath) + if err != nil { + return fmt.Errorf("failed removing original file: %s", err) + } + return nil +}