From cdc1594d7cc665b10d6b9bbcb1ca04dc76be620c Mon Sep 17 00:00:00 2001 From: jinhelin Date: Thu, 4 Jan 2024 01:12:32 +0800 Subject: [PATCH] This is an automated cherry-pick of #8653 Signed-off-by: ti-chi-bot --- dbms/src/Common/TiFlashMetrics.h | 770 ++++++++++++ .../DeltaMerge/SegmentReadTaskPool.cpp | 6 + metrics/grafana/tiflash_summary.json | 1089 +++++++++++++++++ 3 files changed, 1865 insertions(+) diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index 1c7f46c26a4..73bf303ef61 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -41,6 +41,7 @@ namespace DB /// 2. Keep metrics with same prefix next to each other. /// 3. Add metrics of new subsystems at tail. /// 4. Keep it proper formatted using clang-format. +<<<<<<< HEAD // clang-format off #define APPLY_FOR_METRICS(M, F) \ M(tiflash_coprocessor_request_count, "Total number of request", Counter, F(type_cop, {"type", "cop"}), \ @@ -278,6 +279,775 @@ namespace DB F(type_merged_task, {{"type", "merged_task"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_mpp_task_manager, "The gauge of mpp task manager", Gauge, \ F(type_mpp_query_count, {"type", "mpp_query_count"})) +======= +#define APPLY_FOR_METRICS(M, F) \ + M(tiflash_coprocessor_request_count, \ + "Total number of request", \ + Counter, \ + F(type_cop, {"type", "cop"}), \ + F(type_cop_executing, {"type", "cop_executing"}), \ + F(type_cop_stream, {"type", "cop_stream"}), \ + F(type_cop_stream_executing, {"type", "cop_stream_executing"}), \ + F(type_batch, {"type", "batch"}), \ + F(type_batch_executing, {"type", "batch_executing"}), \ + F(type_dispatch_mpp_task, {"type", "dispatch_mpp_task"}), \ + F(type_mpp_establish_conn, {"type", "mpp_establish_conn"}), \ + F(type_cancel_mpp_task, {"type", "cancel_mpp_task"}), \ + F(type_run_mpp_task, {"type", "run_mpp_task"}), \ + F(type_remote_read, {"type", "remote_read"}), \ + F(type_remote_read_constructed, {"type", "remote_read_constructed"}), \ + F(type_remote_read_sent, {"type", "remote_read_sent"}), \ + F(type_disagg_establish_task, {"type", "disagg_establish_task"}), \ + F(type_disagg_fetch_pages, {"type", "disagg_fetch_pages"})) \ + M(tiflash_coprocessor_handling_request_count, \ + "Number of handling request", \ + Gauge, \ + F(type_cop, {"type", "cop"}), \ + F(type_cop_executing, {"type", "cop_executing"}), \ + F(type_cop_stream, {"type", "cop_stream"}), \ + F(type_cop_stream_executing, {"type", "cop_stream_executing"}), \ + F(type_batch, {"type", "batch"}), \ + F(type_batch_executing, {"type", "batch_executing"}), \ + F(type_dispatch_mpp_task, {"type", "dispatch_mpp_task"}), \ + F(type_mpp_establish_conn, {"type", "mpp_establish_conn"}), \ + F(type_cancel_mpp_task, {"type", "cancel_mpp_task"}), \ + F(type_run_mpp_task, {"type", "run_mpp_task"}), \ + F(type_remote_read, {"type", "remote_read"}), \ + F(type_remote_read_executing, {"type", "remote_read_executing"}), \ + F(type_disagg_establish_task, {"type", "disagg_establish_task"}), \ + F(type_disagg_fetch_pages, {"type", "disagg_fetch_pages"})) \ + M(tiflash_coprocessor_executor_count, \ + "Total number of each executor", \ + Counter, \ + F(type_ts, {"type", "table_scan"}), \ + F(type_sel, {"type", "selection"}), \ + F(type_agg, {"type", "aggregation"}), \ + F(type_topn, {"type", "top_n"}), \ + F(type_limit, {"type", "limit"}), \ + F(type_join, {"type", "join"}), \ + F(type_exchange_sender, {"type", "exchange_sender"}), \ + F(type_exchange_receiver, {"type", "exchange_receiver"}), \ + F(type_projection, {"type", "projection"}), \ + F(type_partition_ts, {"type", "partition_table_scan"}), \ + F(type_window, {"type", "window"}), \ + F(type_window_sort, {"type", "window_sort"}), \ + F(type_expand, {"type", "expand"})) \ + M(tiflash_memory_exceed_quota_count, "Total number of cases where memory exceeds quota", Counter) \ + M(tiflash_coprocessor_request_duration_seconds, \ + "Bucketed histogram of request duration", \ + Histogram, \ + F(type_cop, {{"type", "cop"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_cop_stream, {{"type", "cop_stream"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_batch, {{"type", "batch"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_dispatch_mpp_task, {{"type", "dispatch_mpp_task"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_mpp_establish_conn, {{"type", "mpp_establish_conn"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_cancel_mpp_task, {{"type", "cancel_mpp_task"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_run_mpp_task, {{"type", "run_mpp_task"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_disagg_establish_task, {{"type", "disagg_establish_task"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_disagg_fetch_pages, {{"type", "type_disagg_fetch_pages"}}, ExpBuckets{0.001, 2, 20})) \ + M(tiflash_coprocessor_request_memory_usage, \ + "Bucketed histogram of request memory usage", \ + Histogram, \ + F(type_cop, {{"type", "cop"}}, ExpBuckets{1024 * 1024, 2, 16}), \ + F(type_cop_stream, {{"type", "cop_stream"}}, ExpBuckets{1024 * 1024, 2, 16}), \ + F(type_batch, {{"type", "batch"}}, ExpBuckets{1024 * 1024, 2, 20}), \ + F(type_run_mpp_task, {{"type", "run_mpp_task"}}, ExpBuckets{1024 * 1024, 2, 20}), \ + F(type_run_mpp_query, {{"type", "run_mpp_query"}}, ExpBuckets{1024 * 1024, 2, 20})) \ + M(tiflash_coprocessor_request_error, \ + "Total number of request error", \ + Counter, \ + F(reason_meet_lock, {"reason", "meet_lock"}), \ + F(reason_region_not_found, {"reason", "region_not_found"}), \ + F(reason_epoch_not_match, {"reason", "epoch_not_match"}), \ + F(reason_kv_client_error, {"reason", "kv_client_error"}), \ + F(reason_internal_error, {"reason", "internal_error"}), \ + F(reason_other_error, {"reason", "other_error"})) \ + M(tiflash_coprocessor_request_handle_seconds, \ + "Bucketed histogram of request handle duration", \ + Histogram, \ + F(type_cop, {{"type", "cop"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_cop_stream, {{"type", "cop_stream"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_batch, {{"type", "batch"}}, ExpBuckets{0.001, 2, 20})) \ + M(tiflash_coprocessor_response_bytes, \ + "Total bytes of response body", \ + Counter, \ + F(type_cop, {{"type", "cop"}}), \ + F(type_cop_stream, {{"type", "cop_stream"}}), \ + F(type_batch_cop, {{"type", "batch_cop"}}), \ + F(type_dispatch_mpp_task, {{"type", "dispatch_mpp_task"}}), \ + F(type_mpp_establish_conn, {{"type", "mpp_tunnel"}}), \ + F(type_mpp_establish_conn_local, {{"type", "mpp_tunnel_local"}}), \ + F(type_cancel_mpp_task, {{"type", "cancel_mpp_task"}}), \ + F(type_disagg_establish_task, {{"type", "type_disagg_establish_task"}})) \ + M(tiflash_exchange_data_bytes, \ + "Total bytes sent by exchange operators", \ + Counter, \ + F(type_hash_original, {"type", "hash_original"}), \ + F(type_hash_none_compression_remote, {"type", "hash_none_compression_remote"}), \ + F(type_hash_none_compression_local, {"type", "hash_none_compression_local"}), \ + F(type_hash_lz4_compression, {"type", "hash_lz4_compression"}), \ + F(type_hash_zstd_compression, {"type", "hash_zstd_compression"}), \ + F(type_broadcast_original, {"type", "broadcast_original"}), \ + F(type_broadcast_none_compression_local, {"type", "broadcast_none_compression_local"}), \ + F(type_broadcast_none_compression_remote, {"type", "broadcast_none_compression_remote"}), \ + F(type_broadcast_lz4_compression, {"type", "broadcast_lz4_compression"}), \ + F(type_broadcast_zstd_compression, {"type", "broadcast_zstd_compression"}), \ + F(type_passthrough_original, {"type", "passthrough_original"}), \ + F(type_passthrough_none_compression_local, {"type", "passthrough_none_compression_local"}), \ + F(type_passthrough_none_compression_remote, {"type", "passthrough_none_compression_remote"}), \ + F(type_passthrough_lz4_compression, {"type", "passthrough_lz4_compression"}), \ + F(type_passthrough_zstd_compression, {"type", "passthrough_zstd_compression"})) \ + M(tiflash_sync_schema_applying, "Whether the schema is applying or not (holding lock)", Gauge) \ + M(tiflash_schema_trigger_count, \ + "Total number of each kinds of schema sync trigger", \ + Counter, \ + F(type_timer, {"type", "timer"}), \ + F(type_raft_decode, {"type", "raft_decode"}), \ + F(type_cop_read, {"type", "cop_read"}), \ + F(type_sync_table_schema, {"type", "sync_table_schema"})) \ + M(tiflash_schema_internal_ddl_count, \ + "Total number of each kinds of internal ddl operations", \ + Counter, \ + F(type_create_table, {"type", "create_table"}), \ + F(type_create_db, {"type", "create_db"}), \ + F(type_drop_table, {"type", "drop_table"}), \ + F(type_drop_db, {"type", "drop_db"}), \ + F(type_rename_table, {"type", "rename_table"}), \ + F(type_modify_column, {"type", "modify_column"}), \ + F(type_apply_partition, {"type", "apply_partition"}), \ + F(type_exchange_partition, {"type", "exchange_partition"})) \ + M(tiflash_schema_apply_duration_seconds, \ + "Bucketed histogram of ddl apply duration", \ + Histogram, \ + F(type_sync_schema_apply_duration, {{"type", "sync_schema_duration"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_sync_table_schema_apply_duration, {{"type", "sync_table_schema_duration"}}, ExpBuckets{0.001, 2, 20})) \ + M(tiflash_raft_read_index_count, "Total number of raft read index", Counter) \ + M(tiflash_stale_read_count, "Total number of stale read", Counter) \ + M(tiflash_raft_read_index_duration_seconds, \ + "Bucketed histogram of raft read index duration", \ + Histogram, \ + F(type_raft_read_index_duration, {{"type", "tmt_raft_read_index_duration"}}, ExpBuckets{0.001, 2, 20})) \ + M(tiflash_raft_wait_index_duration_seconds, \ + "Bucketed histogram of raft wait index duration", \ + Histogram, \ + F(type_raft_wait_index_duration, {{"type", "tmt_raft_wait_index_duration"}}, ExpBuckets{0.001, 2, 20})) \ + M(tiflash_raft_eager_gc_duration_seconds, \ + "Bucketed histogram of RaftLog eager", \ + Histogram, \ + F(type_run, {{"type", "run"}}, ExpBuckets{0.0005, 2, 20})) \ + M(tiflash_raft_eager_gc_count, \ + "Total number processed in RaftLog eager GC", \ + Counter, \ + F(type_num_raft_logs, {"type", "num_raft_logs"}), \ + F(type_num_skip_regions, {"type", "num_skip_regions"}), \ + F(type_num_process_regions, {"type", "num_process_regions"})) \ + M(tiflash_syncing_data_freshness, \ + "The freshness of tiflash data with tikv data", \ + Histogram, \ + F(type_syncing_data_freshness, {{"type", "data_freshness"}}, ExpBuckets{0.001, 2, 20})) \ + M(tiflash_storage_read_tasks_count, "Total number of storage engine read tasks", Counter) \ + M(tiflash_storage_command_count, \ + "Total number of storage's command, such as delete range / shutdown /startup", \ + Counter, \ + F(type_delete_range, {"type", "delete_range"}), \ + F(type_ingest, {"type", "ingest"}), \ + F(type_ingest_checkpoint, {"type", "ingest_check_point"})) \ + M(tiflash_storage_subtask_count, \ + "Total number of storage's sub task", \ + Counter, \ + F(type_delta_merge_bg, {"type", "delta_merge_bg"}), \ + F(type_delta_merge_bg_gc, {"type", "delta_merge_bg_gc"}), \ + F(type_delta_merge_fg, {"type", "delta_merge_fg"}), \ + F(type_delta_merge_manual, {"type", "delta_merge_manual"}), \ + F(type_delta_compact, {"type", "delta_compact"}), \ + F(type_delta_flush, {"type", "delta_flush"}), \ + F(type_seg_split_bg, {"type", "seg_split_bg"}), \ + F(type_seg_split_fg, {"type", "seg_split_fg"}), \ + F(type_seg_split_ingest, {"type", "seg_split_ingest"}), \ + F(type_seg_merge_bg_gc, {"type", "seg_merge_bg_gc"}), \ + F(type_place_index_update, {"type", "place_index_update"})) \ + M(tiflash_storage_subtask_duration_seconds, \ + "Bucketed histogram of storage's sub task duration", \ + Histogram, \ + F(type_delta_merge_bg, {{"type", "delta_merge_bg"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delta_merge_bg_gc, {{"type", "delta_merge_bg_gc"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delta_merge_fg, {{"type", "delta_merge_fg"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delta_merge_manual, {{"type", "delta_merge_manual"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delta_compact, {{"type", "delta_compact"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delta_flush, {{"type", "delta_flush"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_seg_split_bg, {{"type", "seg_split_bg"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_seg_split_fg, {{"type", "seg_split_fg"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_seg_split_ingest, {{"type", "seg_split_ingest"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_seg_merge_bg_gc, {{"type", "seg_merge_bg_gc"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_place_index_update, {{"type", "place_index_update"}}, ExpBuckets{0.001, 2, 20})) \ + M(tiflash_storage_subtask_throughput_bytes, \ + "Calculate the throughput of (maybe foreground) tasks of storage in bytes", \ + Counter, /**/ \ + F(type_delta_flush, {"type", "delta_flush"}), /**/ \ + F(type_delta_compact, {"type", "delta_compact"}), /**/ \ + F(type_write_to_cache, {"type", "write_to_cache"}), /**/ \ + F(type_write_to_disk, {"type", "write_to_disk"})) /**/ \ + M(tiflash_storage_subtask_throughput_rows, \ + "Calculate the throughput of (maybe foreground) tasks of storage in rows", \ + Counter, /**/ \ + F(type_delta_flush, {"type", "delta_flush"}), /**/ \ + F(type_delta_compact, {"type", "delta_compact"}), /**/ \ + F(type_write_to_cache, {"type", "write_to_cache"}), /**/ \ + F(type_write_to_disk, {"type", "write_to_disk"})) /**/ \ + M(tiflash_storage_throughput_bytes, \ + "Calculate the throughput of tasks of storage in bytes", \ + Gauge, /**/ \ + F(type_write, {"type", "write"}), /**/ \ + F(type_ingest, {"type", "ingest"}), /**/ \ + F(type_delta_merge, {"type", "delta_merge"}), /**/ \ + F(type_split, {"type", "split"}), /**/ \ + F(type_merge, {"type", "merge"})) /**/ \ + M(tiflash_storage_throughput_rows, \ + "Calculate the throughput of tasks of storage in rows", \ + Gauge, /**/ \ + F(type_write, {"type", "write"}), /**/ \ + F(type_ingest, {"type", "ingest"}), /**/ \ + F(type_delta_merge, {"type", "delta_merge"}), /**/ \ + F(type_split, {"type", "split"}), /**/ \ + F(type_merge, {"type", "merge"})) /**/ \ + M(tiflash_storage_write_stall_duration_seconds, \ + "The write stall duration of storage, in seconds", \ + Histogram, \ + F(type_write, {{"type", "write"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delta_merge_by_write, {{"type", "delta_merge_by_write"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delta_merge_by_delete_range, {{"type", "delta_merge_by_delete_range"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_flush, {{"type", "flush"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_split, {{"type", "split"}}, ExpBuckets{0.001, 2, 20})) \ + M(tiflash_storage_page_gc_count, \ + "Total number of page's gc execution.", \ + Counter, \ + F(type_v2, {"type", "v2"}), \ + F(type_v2_low, {"type", "v2_low"}), \ + F(type_v3, {"type", "v3"}), \ + F(type_v3_mvcc_dumped, {"type", "v3_mvcc_dumped"}), \ + F(type_v3_bs_full_gc, {"type", "v3_bs_full_gc"})) \ + M(tiflash_storage_page_gc_duration_seconds, \ + "Bucketed histogram of page's gc task duration", \ + Histogram, \ + F(type_v2, {{"type", "v2"}}, ExpBuckets{0.0005, 2, 20}), \ + F(type_v2_data_compact, {{"type", "v2_data_compact"}}, ExpBuckets{0.0005, 2, 20}), \ + F(type_v2_ver_compact, \ + {{"type", "v2_ver_compact"}}, \ + ExpBuckets{0.0005, 2, 20}), /* Below are metrics for PageStorage V3 */ \ + F(type_compact_wal, {{"type", "compact_wal"}}, ExpBuckets{0.0005, 2, 20}), \ + F(type_compact_directory, {{"type", "compact_directory"}}, ExpBuckets{0.0005, 2, 20}), \ + F(type_compact_spacemap, {{"type", "compact_spacemap"}}, ExpBuckets{0.0005, 2, 20}), \ + F(type_fullgc_rewrite, {{"type", "fullgc_rewrite"}}, ExpBuckets{0.0005, 2, 20}), \ + F(type_fullgc_commit, {{"type", "fullgc_commit"}}, ExpBuckets{0.0005, 2, 20}), \ + F(type_clean_external, {{"type", "clean_external"}}, ExpBuckets{0.0005, 2, 20}), \ + F(type_v3, {{"type", "v3"}}, ExpBuckets{0.0005, 2, 20})) \ + M(tiflash_storage_page_command_count, \ + "Total number of PageStorage's command, such as write / read / scan / snapshot", \ + Counter, \ + F(type_write, {"type", "write"}), \ + F(type_read, {"type", "read"}), \ + F(type_read_page_dir, {"type", "read_page_dir"}), \ + F(type_read_blob, {"type", "read_blob"}), \ + F(type_scan, {"type", "scan"}), \ + F(type_snapshot, {"type", "snapshot"})) \ + M(tiflash_storage_page_write_batch_size, \ + "The size of each write batch in bytes", \ + Histogram, \ + F(type_v3, {{"type", "v3"}}, ExpBuckets{4 * 1024, 4, 10})) \ + M(tiflash_storage_page_write_duration_seconds, \ + "The duration of each write batch", \ + Histogram, \ + F(type_total, \ + {{"type", "total"}}, \ + ExpBuckets{0.0001, 2, 20}), /* the bucket range for apply in memory is 50us ~ 120s */ \ + F(type_choose_stat, {{"type", "choose_stat"}}, ExpBuckets{0.00005, 1.8, 26}), \ + F(type_search_pos, {{"type", "search_pos"}}, ExpBuckets{0.00005, 1.8, 26}), \ + F(type_blob_write, {{"type", "blob_write"}}, ExpBuckets{0.00005, 1.8, 26}), \ + F(type_latch, {{"type", "latch"}}, ExpBuckets{0.00005, 1.8, 26}), \ + F(type_wait_in_group, {{"type", "wait_in_group"}}, ExpBuckets{0.00005, 1.8, 26}), \ + F(type_wal, {{"type", "wal"}}, ExpBuckets{0.00005, 1.8, 26}), \ + F(type_commit, {{"type", "commit"}}, ExpBuckets{0.00005, 1.8, 26})) \ + M(tiflash_storage_logical_throughput_bytes, \ + "The logical throughput of read tasks of storage in bytes", \ + Histogram, \ + F(type_read, {{"type", "read"}}, EqualWidthBuckets{1 * 1024 * 1024, 60, 50 * 1024 * 1024})) \ + M(tiflash_storage_io_limiter, \ + "Storage I/O limiter metrics", \ + Counter, \ + F(type_fg_read_req_bytes, {"type", "fg_read_req_bytes"}), \ + F(type_fg_read_alloc_bytes, {"type", "fg_read_alloc_bytes"}), \ + F(type_bg_read_req_bytes, {"type", "bg_read_req_bytes"}), \ + F(type_bg_read_alloc_bytes, {"type", "bg_read_alloc_bytes"}), \ + F(type_fg_write_req_bytes, {"type", "fg_write_req_bytes"}), \ + F(type_fg_write_alloc_bytes, {"type", "fg_write_alloc_bytes"}), \ + F(type_bg_write_req_bytes, {"type", "bg_write_req_bytes"}), \ + F(type_bg_write_alloc_bytes, {"type", "bg_write_alloc_bytes"})) \ + M(tiflash_storage_rough_set_filter_rate, \ + "Bucketed histogram of rough set filter rate", \ + Histogram, \ + F(type_dtfile_pack, {{"type", "dtfile_pack"}}, EqualWidthBuckets{0, 6, 20})) \ + M(tiflash_disaggregated_object_lock_request_count, \ + "Total number of S3 object lock/delete request", \ + Counter, \ + F(type_lock, {"type", "lock"}), \ + F(type_delete, {"type", "delete"}), \ + F(type_owner_changed, {"type", "owner_changed"}), \ + F(type_error, {"type", "error"}), \ + F(type_lock_conflict, {"type", "lock_conflict"}), \ + F(type_delete_conflict, {"type", "delete_conflict"}), \ + F(type_delete_risk, {"type", "delete_risk"})) \ + M(tiflash_disaggregated_object_lock_request_duration_seconds, \ + "Bucketed histogram of S3 object lock/delete request duration", \ + Histogram, \ + F(type_lock, {{"type", "lock"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delete, {{"type", "delete"}}, ExpBuckets{0.001, 2, 20})) \ + M(tiflash_disaggregated_read_tasks_count, "Total number of storage engine disaggregated read tasks", Counter) \ + M(tiflash_disaggregated_breakdown_duration_seconds, \ + "", \ + Histogram, \ + F(type_rpc_establish, {{"type", "rpc_establish"}}, ExpBuckets{0.01, 2, 20}), \ + F(type_total_establish_backoff, {{"type", "total_establish_backoff"}}, ExpBuckets{0.01, 2, 20}), \ + F(type_resolve_lock, {{"type", "resolve_lock"}}, ExpBuckets{0.01, 2, 20}), \ + F(type_rpc_fetch_page, {{"type", "rpc_fetch_page"}}, ExpBuckets{0.01, 2, 20}), \ + F(type_write_page_cache, {{"type", "write_page_cache"}}, ExpBuckets{0.01, 2, 20}), \ + F(type_cache_occupy, {{"type", "cache_occupy"}}, ExpBuckets{0.01, 2, 20}), \ + F(type_worker_fetch_page, {{"type", "worker_fetch_page"}}, ExpBuckets{0.01, 2, 20}), \ + F(type_worker_prepare_stream, {{"type", "worker_prepare_stream"}}, ExpBuckets{0.01, 2, 20}), \ + F(type_stream_wait_next_task, {{"type", "stream_wait_next_task"}}, ExpBuckets{0.01, 2, 20}), \ + F(type_stream_read, {{"type", "stream_read"}}, ExpBuckets{0.01, 2, 20}), \ + F(type_deserialize_page, {{"type", "deserialize_page"}}, ExpBuckets{0.01, 2, 20})) \ + M(tiflash_disaggregated_details, \ + "", \ + Counter, \ + F(type_cftiny_read, {{"type", "cftiny_read"}}), \ + F(type_cftiny_fetch, {{"type", "cftiny_fetch"}})) \ + M(tiflash_fap_task_result, \ + "", \ + Counter, \ + F(type_total, {{"type", "total"}}), \ + F(type_success_transform, {{"type", "success_transform"}}), \ + F(type_failed_other, {{"type", "failed_other"}}), \ + F(type_failed_cancel, {{"type", "failed_cancel"}}), \ + F(type_failed_no_suitable, {{"type", "failed_no_suitable"}}), \ + F(type_failed_timeout, {{"type", "failed_timeout"}}), \ + F(type_failed_baddata, {{"type", "failed_baddata"}}), \ + F(type_failed_repeated, {{"type", "failed_repeated"}}), \ + F(type_restore, {{"type", "restore"}}), \ + F(type_succeed, {{"type", "succeed"}})) \ + M(tiflash_fap_task_state, \ + "", \ + Gauge, \ + F(type_ongoing, {{"type", "ongoing"}}), \ + F(type_ingesting_stage, {{"type", "ingesting_stage"}}), \ + F(type_writing_stage, {{"type", "writing_stage"}}), \ + F(type_queueing_stage, {{"type", "queueing_stage"}}), \ + F(type_selecting_stage, {{"type", "selecting_stage"}})) \ + M(tiflash_fap_nomatch_reason, \ + "", \ + Counter, \ + F(type_conf, {{"type", "conf"}}), \ + F(type_region_state, {{"type", "region_state"}}), \ + F(type_no_meta, {{"type", "no_meta"}})) \ + M(tiflash_fap_task_duration_seconds, \ + "", \ + Histogram, \ + F(type_select_stage, {{"type", "select_stage"}}, ExpBucketsWithRange{0.1, 2, 60}), \ + F(type_write_stage, {{"type", "write_stage"}}, ExpBucketsWithRange{0.05, 2, 60}), \ + F(type_ingest_stage, {{"type", "ingest_stage"}}, ExpBucketsWithRange{0.05, 2, 30}), \ + F(type_total, {{"type", "total"}}, ExpBucketsWithRange{0.1, 2, 300}), \ + F(type_queue_stage, {{"type", "queue_stage"}}, ExpBucketsWithRange{0.1, 2, 300}), \ + F(type_phase1_total, {{"type", "phase1_total"}}, ExpBucketsWithRange{0.2, 2, 80})) \ + M(tiflash_raft_command_duration_seconds, \ + "Bucketed histogram of some raft command: apply snapshot and ingest SST", \ + Histogram, /* these command usually cost several seconds, increase the start bucket to 50ms */ \ + F(type_remove_peer, {{"type", "remove_peer"}}, ExpBuckets{0.05, 2, 10}), \ + F(type_ingest_sst, {{"type", "ingest_sst"}}, ExpBuckets{0.05, 2, 10}), \ + F(type_ingest_sst_sst2dt, {{"type", "ingest_sst_sst2dt"}}, ExpBuckets{0.05, 2, 10}), \ + F(type_ingest_sst_upload, {{"type", "ingest_sst_upload"}}, ExpBuckets{0.05, 2, 10}), \ + F(type_apply_snapshot_predecode, {{"type", "snapshot_predecode"}}, ExpBuckets{0.05, 2, 15}), \ + F(type_apply_snapshot_total, {{"type", "snapshot_total"}}, ExpBucketsWithRange{0.1, 2, 600}), \ + F(type_apply_snapshot_predecode_sst2dt, {{"type", "snapshot_predecode_sst2dt"}}, ExpBuckets{0.05, 2, 15}), \ + F(type_apply_snapshot_predecode_parallel_wait, \ + {{"type", "snapshot_predecode_parallel_wait"}}, \ + ExpBuckets{0.1, 2, 10}), \ + F(type_apply_snapshot_predecode_upload, {{"type", "snapshot_predecode_upload"}}, ExpBuckets{0.05, 2, 10}), \ + F(type_apply_snapshot_flush, {{"type", "snapshot_flush"}}, ExpBuckets{0.05, 2, 10})) \ + M(tiflash_raft_process_keys, \ + "Total number of keys processed in some types of Raft commands", \ + Counter, \ + F(type_write_put, {"type", "write_put"}), \ + F(type_write_del, {"type", "write_del"}), \ + F(type_apply_snapshot, {"type", "apply_snapshot"}), \ + F(type_ingest_sst, {"type", "ingest_sst"})) \ + M(tiflash_raft_apply_write_command_duration_seconds, \ + "Bucketed histogram of applying write command Raft logs", \ + Histogram, \ + F(type_write, {{"type", "write"}}, ExpBuckets{0.0005, 2, 20}), \ + F(type_admin, {{"type", "admin"}}, ExpBuckets{0.0005, 2, 20}), \ + F(type_admin_batch_split, {{"type", "admin_batch_split"}}, ExpBuckets{0.0005, 2, 20}), \ + F(type_admin_prepare_merge, {{"type", "admin_prepare_merge"}}, ExpBuckets{0.0005, 2, 20}), \ + F(type_admin_commit_merge, {{"type", "admin_commit_merge"}}, ExpBuckets{0.0005, 2, 20}), \ + F(type_admin_change_peer, {{"type", "admin_change_peer"}}, ExpBuckets{0.0005, 2, 20}), \ + F(type_flush_region, {{"type", "flush_region"}}, ExpBuckets{0.0005, 2, 20})) \ + M(tiflash_raft_upstream_latency, \ + "The latency that tikv sends raft log to tiflash.", \ + Histogram, \ + F(type_write, {{"type", "write"}}, ExpBuckets{0.001, 2, 30})) \ + M(tiflash_raft_write_data_to_storage_duration_seconds, \ + "Bucketed histogram of writting region into storage layer", \ + Histogram, \ + F(type_decode, {{"type", "decode"}}, ExpBuckets{0.0005, 2, 20}), \ + F(type_write, {{"type", "write"}}, ExpBuckets{0.0005, 2, 20})) \ + M(tiflash_raft_raft_log_gap_count, \ + "Bucketed histogram raft index gap between applied and truncated index", \ + Histogram, \ + F(type_applied_index, {{"type", "applied_index"}}, EqualWidthBuckets{0, 100, 15}), \ + F(type_eager_gc_applied_index, {{"type", "eager_gc_applied_index"}}, EqualWidthBuckets{0, 100, 10}), \ + F(type_unflushed_applied_index, {{"type", "unflushed_applied_index"}}, EqualWidthBuckets{0, 100, 15})) \ + M(tiflash_raft_raft_events_count, \ + "Raft event counter", \ + Counter, \ + F(type_pre_exec_compact, {{"type", "pre_exec_compact"}}), \ + F(type_flush_apply_snapshot, {{"type", "flush_apply_snapshot"}}), \ + F(type_flush_ingest_sst, {{"type", "flush_ingest_sst"}}), \ + F(type_flush_useless_admin, {{"type", "flush_useless_admin"}}), \ + F(type_flush_useful_admin, {{"type", "flush_useful_admin"}}), \ + F(type_flush_passive, {{"type", "flush_passive"}}), \ + F(type_flush_proactive, {{"type", "flush_proactive"}}), \ + F(type_flush_log_gap, {{"type", "flush_log_gap"}}), \ + F(type_flush_size, {{"type", "flush_size"}}), \ + F(type_flush_rowcount, {{"type", "flush_rowcount"}}), \ + F(type_flush_eager_gc, {{"type", "flush_eager_gc"}})) \ + M(tiflash_raft_raft_frequent_events_count, \ + "Raft frequent event counter", \ + Counter, \ + F(type_write_commit, {{"type", "write_commit"}}), \ + F(type_write, {{"type", "write"}})) \ + M(tiflash_raft_region_flush_bytes, \ + "Bucketed histogram of region flushed bytes", \ + Histogram, \ + F(type_flushed, {{"type", "flushed"}}, ExpBucketsWithRange{32, 4, 32 * 1024 * 1024}), \ + F(type_unflushed, {{"type", "unflushed"}}, ExpBucketsWithRange{32, 4, 32 * 1024 * 1024})) \ + M(tiflash_raft_entry_size, \ + "Bucketed histogram entry size", \ + Histogram, \ + F(type_normal, {{"type", "normal"}}, ExpBuckets{1, 2, 13})) \ + M(tiflash_raft_ongoing_snapshot_total_bytes, \ + "Ongoing snapshot total size", \ + Gauge, \ + F(type_raft_snapshot, {{"type", "raft_snapshot"}}), \ + F(type_dt_on_disk, {{"type", "dt_on_disk"}}), \ + F(type_dt_total, {{"type", "dt_total"}})) \ + M(tiflash_raft_throughput_bytes, \ + "Raft handled bytes in global", \ + Counter, \ + F(type_write, {{"type", "write"}}), \ + F(type_write_committed, {{"type", "write_committed"}})) \ + M(tiflash_raft_write_flow_bytes, \ + "Bucketed histogram of bytes for each write", \ + Histogram, \ + F(type_ingest_uncommitted, {{"type", "ingest_uncommitted"}}, ExpBucketsWithRange{16, 4, 64 * 1024}), \ + F(type_snapshot_uncommitted, {{"type", "snapshot_uncommitted"}}, ExpBucketsWithRange{16, 4, 1024 * 1024}), \ + F(type_write_committed, {{"type", "write_committed"}}, ExpBucketsWithRange{16, 2, 1024 * 1024}), \ + F(type_big_write_to_region, \ + {{"type", "big_write_to_region"}}, \ + ExpBucketsWithRange{RAFT_REGION_BIG_WRITE_THRES, 4, RAFT_REGION_BIG_WRITE_MAX})) \ + M(tiflash_raft_snapshot_total_bytes, \ + "Bucketed snapshot total size", \ + Histogram, \ + F(type_approx_raft_snapshot, {{"type", "approx_raft_snapshot"}}, ExpBuckets{1024, 2, 24})) /* 16G */ \ + /* required by DBaaS */ \ + M(tiflash_server_info, \ + "Indicate the tiflash server info, and the value is the start timestamp (s).", \ + Gauge, \ + F(start_time, {"version", TiFlashBuildInfo::getReleaseVersion()}, {"hash", TiFlashBuildInfo::getGitHash()})) \ + M(tiflash_object_count, \ + "Number of objects", \ + Gauge, \ + F(type_count_of_establish_calldata, {"type", "count_of_establish_calldata"}), \ + F(type_count_of_mpptunnel, {"type", "count_of_mpptunnel"})) \ + M(tiflash_thread_count, \ + "Number of threads", \ + Gauge, \ + F(type_max_threads_of_thdpool, {"type", "thread_pool_total_max"}), \ + F(type_active_threads_of_thdpool, {"type", "thread_pool_active"}), \ + F(type_max_active_threads_of_thdpool, {"type", "thread_pool_active_max"}), \ + F(type_total_threads_of_thdpool, {"type", "thread_pool_total"}), \ + F(type_max_threads_of_raw, {"type", "total_max"}), \ + F(type_total_threads_of_raw, {"type", "total"}), \ + F(type_threads_of_client_cq_pool, {"type", "rpc_client_cq_pool"}), \ + F(type_threads_of_receiver_read_loop, {"type", "rpc_receiver_read_loop"}), \ + F(type_threads_of_receiver_reactor, {"type", "rpc_receiver_reactor"}), \ + F(type_max_threads_of_establish_mpp, {"type", "rpc_establish_mpp_max"}), \ + F(type_active_threads_of_establish_mpp, {"type", "rpc_establish_mpp"}), \ + F(type_max_threads_of_dispatch_mpp, {"type", "rpc_dispatch_mpp_max"}), \ + F(type_active_threads_of_dispatch_mpp, {"type", "rpc_dispatch_mpp"}), \ + F(type_active_rpc_async_worker, {"type", "rpc_async_worker_active"}), \ + F(type_total_rpc_async_worker, {"type", "rpc_async_worker_total"})) \ + M(tiflash_task_scheduler, \ + "Min-tso task scheduler", \ + Gauge, \ + F(type_min_tso, {"type", "min_tso"}), \ + F(type_waiting_queries_count, {"type", "waiting_queries_count"}), \ + F(type_active_queries_count, {"type", "active_queries_count"}), \ + F(type_waiting_tasks_count, {"type", "waiting_tasks_count"}), \ + F(type_active_tasks_count, {"type", "active_tasks_count"}), \ + F(type_global_estimated_thread_usage, {"type", "global_estimated_thread_usage"}), \ + F(type_estimated_thread_usage, {"type", "estimated_thread_usage"}), \ + F(type_thread_soft_limit, {"type", "thread_soft_limit"}), \ + F(type_thread_hard_limit, {"type", "thread_hard_limit"}), \ + F(type_hard_limit_exceeded_count, {"type", "hard_limit_exceeded_count"}), \ + F(type_group_entry_count, {"type", "group_entry_count"})) \ + M(tiflash_task_scheduler_waiting_duration_seconds, \ + "Bucketed histogram of task waiting for scheduling duration", \ + Histogram, \ + F(type_task_scheduler_waiting_duration, {{"type", "task_waiting_duration"}}, ExpBuckets{0.001, 2, 20})) \ + M(tiflash_storage_read_thread_counter, \ + "The counter of storage read thread", \ + Counter, \ + F(type_sche_no_pool, {"type", "sche_no_pool"}), \ + F(type_sche_no_slot, {"type", "sche_no_slot"}), \ + F(type_sche_no_ru, {"type", "sche_no_ru"}), \ + F(type_sche_no_segment, {"type", "sche_no_segment"}), \ + F(type_sche_active_segment_limit, {"type", "sche_active_segment_limit"}), \ + F(type_sche_from_cache, {"type", "sche_from_cache"}), \ + F(type_sche_new_task, {"type", "sche_new_task"}), \ + F(type_ru_exhausted, {"type", "ru_exhausted"}), \ + F(type_push_block_bytes, {"type", "push_block_bytes"}), \ + F(type_add_cache_succ, {"type", "add_cache_succ"}), \ + F(type_add_cache_stale, {"type", "add_cache_stale"}), \ + F(type_add_cache_reach_count_limit, {"type", "add_cache_reach_count_limit"}), \ + F(type_add_cache_total_bytes_limit, {"type", "add_cache_total_bytes_limit"}), \ + F(type_get_cache_miss, {"type", "get_cache_miss"}), \ + F(type_get_cache_part, {"type", "get_cache_part"}), \ + F(type_get_cache_hit, {"type", "get_cache_hit"}), \ + F(type_get_cache_copy, {"type", "get_cache_copy"})) \ + M(tiflash_storage_read_thread_gauge, \ + "The gauge of storage read thread", \ + Gauge, \ + F(type_merged_task, {"type", "merged_task"})) \ + M(tiflash_storage_read_thread_seconds, \ + "Bucketed histogram of read thread", \ + Histogram, \ + F(type_merged_task, {{"type", "merged_task"}}, ExpBuckets{0.001, 2, 20})) \ + M(tiflash_mpp_task_manager, \ + "The gauge of mpp task manager", \ + Gauge, \ + F(type_mpp_query_count, {"type", "mpp_query_count"})) \ + M(tiflash_mpp_task_monitor, \ + "Monitor the lifecycle of MPP Task", \ + Gauge, \ + F(type_longest_live_time, {"type", "longest_live_time"}), ) \ + M(tiflash_exchange_queueing_data_bytes, \ + "Total bytes of data contained in the queue", \ + Gauge, \ + F(type_send, {{"type", "send_queue"}}), \ + F(type_receive, {{"type", "recv_queue"}})) \ + M(tiflash_compute_request_unit, \ + "Request Unit used by tiflash compute", \ + Counter, \ + F(type_mpp, \ + {{"type", "mpp"}, \ + ComputeLabelHolder::instance().getClusterIdLabel(), \ + ComputeLabelHolder::instance().getProcessIdLabel()}), \ + F(type_cop, \ + {{"type", "cop"}, \ + ComputeLabelHolder::instance().getClusterIdLabel(), \ + ComputeLabelHolder::instance().getProcessIdLabel()}), \ + F(type_cop_stream, \ + {{"type", "cop_stream"}, \ + ComputeLabelHolder::instance().getClusterIdLabel(), \ + ComputeLabelHolder::instance().getProcessIdLabel()}), \ + F(type_batch, \ + {{"type", "batch"}, \ + ComputeLabelHolder::instance().getClusterIdLabel(), \ + ComputeLabelHolder::instance().getProcessIdLabel()})) \ + M(tiflash_shared_block_schemas, \ + "statistics about shared block schemas of ColumnFiles", \ + Gauge, \ + F(type_current_size, {{"type", "current_size"}}), \ + F(type_still_used_when_evict, {{"type", "still_used_when_evict"}}), \ + F(type_miss_count, {{"type", "miss_count"}}), \ + F(type_hit_count, {{"type", "hit_count"}})) \ + M(tiflash_storage_remote_stats, \ + "The file stats on remote store", \ + Gauge, \ + F(type_total_size, {"type", "total_size"}), \ + F(type_valid_size, {"type", "valid_size"}), \ + F(type_num_files, {"type", "num_files"})) \ + M(tiflash_storage_checkpoint_seconds, \ + "PageStorage checkpoint elapsed time", \ + Histogram, /* these command usually cost several seconds, increase the start bucket to 50ms */ \ + F(type_dump_checkpoint_snapshot, {{"type", "dump_checkpoint_snapshot"}}, ExpBuckets{0.05, 2, 20}), \ + F(type_dump_checkpoint_data, {{"type", "dump_checkpoint_data"}}, ExpBuckets{0.05, 2, 20}), \ + F(type_upload_checkpoint, {{"type", "upload_checkpoint"}}, ExpBuckets{0.05, 2, 20}), \ + F(type_copy_checkpoint_info, {{"type", "copy_checkpoint_info"}}, ExpBuckets{0.05, 2, 20})) \ + M(tiflash_storage_checkpoint_flow, \ + "The bytes flow cause by remote checkpoint", \ + Counter, \ + F(type_incremental, {"type", "incremental"}), \ + F(type_compaction, {"type", "compaction"})) \ + M(tiflash_storage_checkpoint_keys_by_types, \ + "The keys flow cause by remote checkpoint", \ + Counter, \ + F(type_raftengine, {"type", "raftengine"}), \ + F(type_kvengine, {"type", "kvengine"}), \ + F(type_kvstore, {"type", "kvstore"}), \ + F(type_data, {"type", "data"}), \ + F(type_log, {"type", "log"}), \ + F(type_meta, {"type", "kvstore"}), \ + F(type_localkv, {"type", "localkv"}), \ + F(type_unknown, {"type", "unknown"})) \ + M(tiflash_storage_checkpoint_flow_by_types, \ + "The bytes flow cause by remote checkpoint", \ + Counter, \ + F(type_raftengine, {"type", "raftengine"}), \ + F(type_kvengine, {"type", "kvengine"}), \ + F(type_kvstore, {"type", "kvstore"}), \ + F(type_data, {"type", "data"}), \ + F(type_log, {"type", "log"}), \ + F(type_meta, {"type", "kvstore"}), \ + F(type_localkv, {"type", "localkv"}), \ + F(type_unknown, {"type", "unknown"})) \ + M(tiflash_storage_page_data_by_types, \ + "The existing bytes stored in UniPageStorage", \ + Gauge, \ + F(type_raftengine, {"type", "raftengine"}), \ + F(type_kvengine, {"type", "kvengine"}), \ + F(type_kvstore, {"type", "kvstore"}), \ + F(type_data, {"type", "data"}), \ + F(type_log, {"type", "log"}), \ + F(type_meta, {"type", "kvstore"}), \ + F(type_localkv, {"type", "localkv"}), \ + F(type_unknown, {"type", "unknown"})) \ + M(tiflash_storage_s3_request_seconds, \ + "S3 request duration in seconds", \ + Histogram, \ + F(type_put_object, {{"type", "put_object"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_put_dmfile, {{"type", "put_dmfile"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_copy_object, {{"type", "copy_object"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_get_object, {{"type", "get_object"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_create_multi_part_upload, {{"type", "create_multi_part_upload"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_upload_part, {{"type", "upload_part"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_complete_multi_part_upload, {{"type", "complete_multi_part_upload"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_list_objects, {{"type", "list_objects"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delete_object, {{"type", "delete_object"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_head_object, {{"type", "head_object"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_read_stream, {{"type", "read_stream"}}, ExpBuckets{0.0001, 2, 20})) \ + M(tiflash_storage_s3_http_request_seconds, \ + "S3 request duration breakdown in seconds", \ + Histogram, \ + F(type_dns, {{"type", "dns"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_connect, {{"type", "connect"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_request, {{"type", "request"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_response, {{"type", "response"}}, ExpBuckets{0.001, 2, 20})) \ + M(tiflash_pipeline_scheduler, \ + "pipeline scheduler", \ + Gauge, \ + F(type_waiting_tasks_count, {"type", "waiting_tasks_count"}), \ + F(type_cpu_pending_tasks_count, {"type", "cpu_pending_tasks_count"}), \ + F(type_cpu_executing_tasks_count, {"type", "cpu_executing_tasks_count"}), \ + F(type_io_pending_tasks_count, {"type", "io_pending_tasks_count"}), \ + F(type_io_executing_tasks_count, {"type", "io_executing_tasks_count"}), \ + F(type_cpu_task_thread_pool_size, {"type", "cpu_task_thread_pool_size"}), \ + F(type_io_task_thread_pool_size, {"type", "io_task_thread_pool_size"})) \ + M(tiflash_pipeline_task_duration_seconds, \ + "Bucketed histogram of pipeline task duration in seconds", \ + Histogram, /* these command usually cost several hundred milliseconds to several seconds, increase the start bucket to 5ms */ \ + F(type_cpu_execute, {{"type", "cpu_execute"}}, ExpBuckets{0.005, 2, 20}), \ + F(type_io_execute, {{"type", "io_execute"}}, ExpBuckets{0.005, 2, 20}), \ + F(type_cpu_queue, {{"type", "cpu_queue"}}, ExpBuckets{0.005, 2, 20}), \ + F(type_io_queue, {{"type", "io_queue"}}, ExpBuckets{0.005, 2, 20}), \ + F(type_await, {{"type", "await"}}, ExpBuckets{0.005, 2, 20})) \ + M(tiflash_pipeline_task_execute_max_time_seconds_per_round, \ + "Bucketed histogram of pipeline task execute max time per round in seconds", \ + Histogram, /* these command usually cost several hundred milliseconds to several seconds, increase the start bucket to 5ms */ \ + F(type_cpu, {{"type", "cpu"}}, ExpBuckets{0.005, 2, 20}), \ + F(type_io, {{"type", "io"}}, ExpBuckets{0.005, 2, 20})) \ + M(tiflash_pipeline_task_change_to_status, \ + "pipeline task change to status", \ + Counter, \ + F(type_to_waiting, {"type", "to_waiting"}), \ + F(type_to_running, {"type", "to_running"}), \ + F(type_to_io, {"type", "to_io"}), \ + F(type_to_finished, {"type", "to_finished"}), \ + F(type_to_error, {"type", "to_error"}), \ + F(type_to_cancelled, {"type", "to_cancelled"})) \ + M(tiflash_storage_s3_gc_status, \ + "S3 GC status", \ + Gauge, \ + F(type_lifecycle_added, {{"type", "lifecycle_added"}}), \ + F(type_lifecycle_failed, {{"type", "lifecycle_failed"}}), \ + F(type_owner, {{"type", "owner"}}), \ + F(type_running, {{"type", "running"}})) \ + M(tiflash_storage_s3_gc_seconds, \ + "S3 GC subprocess duration in seconds", \ + Histogram, /* these command usually cost several seconds, increase the start bucket to 500ms */ \ + F(type_total, {{"type", "total"}}, ExpBuckets{0.5, 2, 20}), \ + F(type_one_store, {{"type", "one_store"}}, ExpBuckets{0.5, 2, 20}), \ + F(type_read_locks, {{"type", "read_locks"}}, ExpBuckets{0.5, 2, 20}), \ + F(type_clean_locks, {{"type", "clean_locks"}}, ExpBuckets{0.5, 2, 20}), \ + F(type_clean_manifests, {{"type", "clean_manifests"}}, ExpBuckets{0.5, 2, 20}), \ + F(type_scan_then_clean_data_files, {{"type", "scan_then_clean_data_files"}}, ExpBuckets{0.5, 2, 20}), \ + F(type_clean_one_lock, {{"type", "clean_one_lock"}}, ExpBuckets{0.5, 2, 20})) \ + M(tiflash_storage_remote_cache, \ + "Operations of remote cache", \ + Counter, \ + F(type_dtfile_hit, {"type", "dtfile_hit"}), \ + F(type_dtfile_miss, {"type", "dtfile_miss"}), \ + F(type_dtfile_evict, {"type", "dtfile_evict"}), \ + F(type_dtfile_full, {"type", "dtfile_full"}), \ + F(type_dtfile_download, {"type", "dtfile_download"}), \ + F(type_dtfile_download_failed, {"type", "dtfile_download_failed"}), \ + F(type_page_hit, {"type", "page_hit"}), \ + F(type_page_miss, {"type", "page_miss"}), \ + F(type_page_evict, {"type", "page_evict"}), \ + F(type_page_full, {"type", "page_full"}), \ + F(type_page_download, {"type", "page_download"})) \ + M(tiflash_storage_remote_cache_bytes, \ + "Flow of remote cache", \ + Counter, \ + F(type_dtfile_evict_bytes, {"type", "dtfile_evict_bytes"}), \ + F(type_dtfile_download_bytes, {"type", "dtfile_download_bytes"}), \ + F(type_dtfile_read_bytes, {"type", "dtfile_read_bytes"}), \ + F(type_page_evict_bytes, {"type", "page_evict_bytes"}), \ + F(type_page_download_bytes, {"type", "page_download_bytes"}), \ + F(type_page_read_bytes, {"type", "page_read_bytes"})) \ + M(tiflash_storage_io_limiter_pending_seconds, \ + "I/O limiter pending duration in seconds", \ + Histogram, \ + F(type_fg_read, {{"type", "fg_read"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_bg_read, {{"type", "bg_read"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_fg_write, {{"type", "fg_write"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_bg_write, {{"type", "bg_write"}}, ExpBuckets{0.001, 2, 20})) \ + M(tiflash_system_seconds, \ + "system calls duration in seconds", \ + Histogram, \ + F(type_fsync, {{"type", "fsync"}}, ExpBuckets{0.0001, 2, 20})) \ + M(tiflash_storage_delta_index_cache, "", Counter, F(type_hit, {"type", "hit"}), F(type_miss, {"type", "miss"})) \ + M(tiflash_resource_group, \ + "meta info of resource group", \ + Gauge, \ + F(type_remaining_tokens, {"type", "remaining_tokens"}), \ + F(type_avg_speed, {"type", "avg_speed"}), \ + F(type_total_consumption, {"type", "total_consumption"}), \ + F(type_bucket_fill_rate, {"type", "bucket_fill_rate"}), \ + F(type_bucket_capacity, {"type", "bucket_capacity"}), \ + F(type_compute_ru_consumption, {"type", "compute_ru_consumption"}), \ + F(type_storage_ru_consumption, {"type", "storage_ru_consumption"}), \ + F(type_compute_ru_exhausted, {"type", "compute_ru_exhausted"}), \ + F(type_gac_req_acquire_tokens, {"type", "gac_req_acquire_tokens"}), \ + F(type_gac_req_ru_consumption_delta, {"type", "gac_req_ru_consumption_delta"}), \ + F(type_gac_resp_tokens, {"type", "gac_resp_tokens"}), \ + F(type_gac_resp_capacity, {"type", "gac_resp_capacity"})) \ + M(tiflash_storage_io_limiter_pending_count, \ + "I/O limiter pending count", \ + Counter, \ + F(type_fg_read, {"type", "fg_read"}), \ + F(type_bg_read, {"type", "bg_read"}), \ + F(type_fg_write, {"type", "fg_write"}), \ + F(type_bg_write, {"type", "bg_write"})) +>>>>>>> 904bec8a55 (Storages: Refine metrics of read threads and data sharing (#8653)) // clang-format on diff --git a/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.cpp b/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.cpp index cf9775ac8f0..bd2c81cbc40 100644 --- a/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.cpp +++ b/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.cpp @@ -266,6 +266,12 @@ void SegmentReadTaskPool::pushBlock(Block && block) { blk_stat.push(block); global_blk_stat.push(block); +<<<<<<< HEAD +======= + auto bytes = block.bytes(); + read_bytes_after_last_check += bytes; + GET_METRIC(tiflash_storage_read_thread_counter, type_push_block_bytes).Increment(bytes); +>>>>>>> 904bec8a55 (Storages: Refine metrics of read threads and data sharing (#8653)) q.push(std::move(block), nullptr); } diff --git a/metrics/grafana/tiflash_summary.json b/metrics/grafana/tiflash_summary.json index 1d50ea738c6..7309a8c59cd 100644 --- a/metrics/grafana/tiflash_summary.json +++ b/metrics/grafana/tiflash_summary.json @@ -2739,6 +2739,269 @@ "align": false, "alignLevel": null } +<<<<<<< HEAD +======= + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": null, + "description": "", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 44 + }, + "hiddenSeries": false, + "id": 267, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 250, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Limit", + "color": "#F2495C", + "hideTooltip": true, + "legend": false, + "linewidth": 2, + "nullPointMode": "connected" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum by (instance) (rate(tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"segment_sched.*\"}[1m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}} {{instance}}", + "refId": "A", + "step": 40 + }, + { + "exemplar": true, + "expr": "count by (instance) (tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"segment_sched.*\"})", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Limit", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Segment Scheduler", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": null, + "description": "", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 44 + }, + "hiddenSeries": false, + "id": 268, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 250, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Limit", + "color": "#F2495C", + "hideTooltip": true, + "legend": false, + "linewidth": 2, + "nullPointMode": "connected" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum by (instance) (rate(tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"SegmentReader.*\"}[1m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}} {{instance}}", + "refId": "A", + "step": 40 + }, + { + "exemplar": true, + "expr": "count by (instance) (tiflash_proxy_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"SegmentReader.*\"})", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Limit", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Segment Reader", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } +>>>>>>> 904bec8a55 (Storages: Refine metrics of read threads and data sharing (#8653)) } ], "title": "Threads CPU", @@ -6784,6 +7047,832 @@ { "aliasColors": {}, "bars": false, +<<<<<<< HEAD +======= + "cacheTimeout": null, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The current processing number of segments' background management", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 62 + }, + "hiddenSeries": false, + "id": 67, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(tiflash_system_current_metric_DT_DeltaMerge{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "delta_merge-{{instance}}", + "refId": "A" + }, + { + "expr": "avg(tiflash_system_current_metric_DT_SegmentSplit{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "seg_split-{{instance}}", + "refId": "B" + }, + { + "expr": "avg(tiflash_system_current_metric_DT_SegmentMerge{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "seg_merge-{{instance}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Current Data Management Tasks", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Errors of DeltaIndex", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 62 + }, + "hiddenSeries": false, + "id": 237, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tiflash_system_profile_event_DTDeltaIndexError{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "DeltaIndexError-{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "DeltaIndexError", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "cps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "opm", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The storage I/O limiter metrics.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 70 + }, + "hiddenSeries": false, + "id": 84, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tiflash_storage_io_limiter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "I/O Limiter Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The storage I/O limiter metrics.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 70 + }, + "hiddenSeries": false, + "id": 266, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:563", + "alias": "/-/", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_io_limiter_pending_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "I/O Limiter Pending Rate and Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:230", + "decimals": 0, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:231", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "I/O Limiter current pending count.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 70 + }, + "hiddenSeries": false, + "id": 86, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/pending/", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(tiflash_system_current_metric_RateLimiterPendingWriteRequest{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "other-current-{{instance}}", + "refId": "A", + "hide": true + }, + { + "exemplar": true, + "expr": "avg(tiflash_system_current_metric_IOLimiterPendingBgWriteReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "bgwrite-current-{{instance}}", + "refId": "B" + }, + { + "exemplar": true, + "expr": "avg(tiflash_system_current_metric_IOLimiterPendingFgWriteReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "fgwrite-current-{{instance}}", + "refId": "C" + }, + { + "exemplar": true, + "expr": "avg(tiflash_system_current_metric_IOLimiterPendingBgReadReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "bgread-current-{{instance}}", + "refId": "D" + }, + { + "exemplar": true, + "expr": "avg(tiflash_system_current_metric_IOLimiterPendingFgReadReq{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "fgread-current-{{instance}}", + "refId": "E" + }, + { + "exemplar": true, + "expr": "histogram_quantile(1.00, sum(round(1000000000*rate(tiflash_storage_io_limiter_pending_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))) by (le, type) / 1000000000)", + "hide": false, + "interval": "", + "legendFormat": "{{type}}-pending-max", + "refId": "F" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_io_limiter_pending_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "hide": false, + "interval": "", + "legendFormat": "{{type}}-pending-P99", + "refId": "G" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "I/O Limiter Current Pending Count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The information of data sharing cache hit ratio. Data sharing cache is purpose-built for OLAP workload that can reduce repeated data reads of concurrent table scanning.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 78 + }, + "hiddenSeries": false, + "id": 132, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "cache_hit_ratio", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"add_cache_stale|add_cache_succ|add_cache_total_bytes_limit|add_cache_reach_count_limit\"}[1m])) by (type)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"get_cache_hit|get_cache_copy\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "get_cache_hit", + "refId": "C" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"get_cache_miss|get_cache_hit|get_cache_part|get_cache_copy\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "get_cache_total", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"get_cache_hit|get_cache_copy\"}[1m]))/sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"get_cache_miss|get_cache_hit|get_cache_part|get_cache_copy\"}[1m]))", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "cache_hit_ratio", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Data Sharing", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, +>>>>>>> 904bec8a55 (Storages: Refine metrics of read threads and data sharing (#8653)) + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The information of read thread scheduling.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 78 + }, + "hiddenSeries": false, + "id": 269, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:308", + "alias": "/push_block/", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"ru_exhausted|sche_active_segment_limit|sche_from_cache|sche_new_task|sche_no_pool|sche_no_ru|sche_no_segment|sche_no_slot|push_block_bytes\"}[1m])) by (type)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Read Thread Scheduling", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:321", + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:322", + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}",