diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 998c15ccaf2c..e3b6f43bc4db 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -464,6 +464,24 @@ static LAST_RECORD_LSN: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); +static DISK_CONSISTENT_LSN: Lazy = Lazy::new(|| { + register_int_gauge_vec!( + "pageserver_disk_consistent_lsn", + "Disk consistent LSN grouped by timeline", + &["tenant_id", "shard_id", "timeline_id"] + ) + .expect("failed to define a metric") +}); + +pub(crate) static PROJECTED_REMOTE_CONSISTENT_LSN: Lazy = Lazy::new(|| { + register_uint_gauge_vec!( + "pageserver_projected_remote_consistent_lsn", + "Projected remote consistent LSN grouped by timeline", + &["tenant_id", "shard_id", "timeline_id"] + ) + .expect("failed to define a metric") +}); + static PITR_HISTORY_SIZE: Lazy = Lazy::new(|| { register_uint_gauge_vec!( "pageserver_pitr_history_size", @@ -2394,7 +2412,8 @@ pub(crate) struct TimelineMetrics { pub load_layer_map_histo: StorageTimeMetrics, pub garbage_collect_histo: StorageTimeMetrics, pub find_gc_cutoffs_histo: StorageTimeMetrics, - pub last_record_gauge: IntGauge, + pub last_record_lsn_gauge: IntGauge, + pub disk_consistent_lsn_gauge: IntGauge, pub pitr_history_size: UIntGauge, pub archival_size: UIntGauge, pub(crate) layer_size_image: UIntGauge, @@ -2475,7 +2494,11 @@ impl TimelineMetrics { &shard_id, &timeline_id, ); - let last_record_gauge = LAST_RECORD_LSN + let last_record_lsn_gauge = LAST_RECORD_LSN + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) + .unwrap(); + + let disk_consistent_lsn_gauge = DISK_CONSISTENT_LSN .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); @@ -2578,7 +2601,8 @@ impl TimelineMetrics { garbage_collect_histo, find_gc_cutoffs_histo, load_layer_map_histo, - last_record_gauge, + last_record_lsn_gauge, + disk_consistent_lsn_gauge, pitr_history_size, archival_size, layer_size_image, @@ -2642,6 +2666,7 @@ impl TimelineMetrics { let timeline_id = &self.timeline_id; let shard_id = &self.shard_id; let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]); + let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]); let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]); let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]); { @@ -2805,6 +2830,7 @@ pub(crate) struct RemoteTimelineClientMetrics { calls: Mutex>, bytes_started_counter: Mutex>, bytes_finished_counter: Mutex>, + pub(crate) projected_remote_consistent_lsn_gauge: UIntGauge, } impl RemoteTimelineClientMetrics { @@ -2819,6 +2845,10 @@ impl RemoteTimelineClientMetrics { .unwrap(), ); + let projected_remote_consistent_lsn_gauge = PROJECTED_REMOTE_CONSISTENT_LSN + .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str]) + .unwrap(); + RemoteTimelineClientMetrics { tenant_id: tenant_id_str, shard_id: shard_id_str, @@ -2827,6 +2857,7 @@ impl RemoteTimelineClientMetrics { bytes_started_counter: Mutex::new(HashMap::default()), bytes_finished_counter: Mutex::new(HashMap::default()), remote_physical_size_gauge, + projected_remote_consistent_lsn_gauge, } } @@ -3040,6 +3071,7 @@ impl Drop for RemoteTimelineClientMetrics { calls, bytes_started_counter, bytes_finished_counter, + projected_remote_consistent_lsn_gauge, } = self; for ((a, b), _) in calls.get_mut().unwrap().drain() { let mut res = [Ok(()), Ok(())]; @@ -3069,6 +3101,14 @@ impl Drop for RemoteTimelineClientMetrics { let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]); } + { + let _ = projected_remote_consistent_lsn_gauge; + let _ = PROJECTED_REMOTE_CONSISTENT_LSN.remove_label_values(&[ + tenant_id, + shard_id, + timeline_id, + ]); + } } } diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 007bd3eef083..00df72977e8f 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -2190,6 +2190,9 @@ impl RemoteTimelineClient { upload_queue.clean.1 = Some(task.task_id); let lsn = upload_queue.clean.0.metadata.disk_consistent_lsn(); + self.metrics + .projected_remote_consistent_lsn_gauge + .set(lsn.0); if self.generation.is_none() { // Legacy mode: skip validating generation diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 1414bef0a5a2..444c0b762384 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -2359,7 +2359,7 @@ impl Timeline { result .metrics - .last_record_gauge + .last_record_lsn_gauge .set(disk_consistent_lsn.0 as i64); result }) @@ -3482,7 +3482,7 @@ impl Timeline { pub(crate) fn finish_write(&self, new_lsn: Lsn) { assert!(new_lsn.is_aligned()); - self.metrics.last_record_gauge.set(new_lsn.0 as i64); + self.metrics.last_record_lsn_gauge.set(new_lsn.0 as i64); self.last_record_lsn.advance(new_lsn); } @@ -3850,6 +3850,10 @@ impl Timeline { fn set_disk_consistent_lsn(&self, new_value: Lsn) -> bool { let old_value = self.disk_consistent_lsn.fetch_max(new_value); assert!(new_value >= old_value, "disk_consistent_lsn must be growing monotonously at runtime; current {old_value}, offered {new_value}"); + + self.metrics + .disk_consistent_lsn_gauge + .set(new_value.0 as i64); new_value != old_value } diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index ffdbd988a58f..1278ed1aef54 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -152,6 +152,8 @@ def counter(name: str) -> str: "pageserver_resident_physical_size", "pageserver_io_operations_bytes_total", "pageserver_last_record_lsn", + "pageserver_disk_consistent_lsn", + "pageserver_projected_remote_consistent_lsn", "pageserver_standby_horizon", "pageserver_smgr_query_seconds_bucket", "pageserver_smgr_query_seconds_count",