firecracker-microvm · kalyazin · Mar 8, 2024 · Mar 5, 2024 · Mar 6, 2024 · Mar 7, 2024
@@ -17,7 +17,13 @@ and this project adheres to
   be deduced from the available emitted metrics.
 - [#4360](https://github.com/firecracker-microvm/firecracker/pull/4360): Added
   dev-preview support for backing a VM's guest memory by 2M hugetlbfs pages.
-  Please see the [documentation](docs/hugepages.md) for more information.
+  Please see the [documentation](docs/hugepages.md) for more information
+- [#4490](https://github.com/firecracker-microvm/firecracker/pull/4490): Added
+  block and net device metrics for file/tap access latencies and queue backlog
+  lengths, which can be used to analyse saturation of the Firecracker VMM thread
+  and underlying layers. Queue backlog length metrics are flushed periodically.
+  They can be used to esimtate an average queue length by request by dividing
+  its value by the number of requests served.
 
 ### Changed
 

diff --git a/src/vmm/src/devices/virtio/block/virtio/device.rs b/src/vmm/src/devices/virtio/block/virtio/device.rs
@@ -424,6 +424,7 @@ impl VirtioBlock {
         let mut used_any = false;
 
         while let Some(head) = queue.pop_or_enable_notification(mem) {
+            self.metrics.remaining_reqs_count.add(queue.len(mem).into());
             let processing_result = match Request::parse(&head, mem, self.disk.nsectors) {
                 Ok(request) => {
                     if request.rate_limit(&mut self.rate_limiter) {

diff --git a/src/vmm/src/devices/virtio/block/virtio/metrics.rs b/src/vmm/src/devices/virtio/block/virtio/metrics.rs
@@ -83,7 +83,7 @@ use std::sync::{Arc, RwLock};
 use serde::ser::SerializeMap;
 use serde::{Serialize, Serializer};
 
-use crate::logger::{IncMetric, SharedIncMetric};
+use crate::logger::{IncMetric, LatencyAggregateMetrics, SharedIncMetric};
 
 /// map of block drive id and metrics
 /// this should be protected by a lock before accessing.
@@ -105,7 +105,7 @@ impl BlockMetricsPerDevice {
                 .write()
                 .unwrap()
                 .metrics
-                .insert(drive_id.clone(), Arc::new(BlockDeviceMetrics::default()));
+                .insert(drive_id.clone(), Arc::new(BlockDeviceMetrics::new()));
         }
         METRICS
             .read()
@@ -178,14 +178,29 @@ pub struct BlockDeviceMetrics {
     pub read_count: SharedIncMetric,
     /// Number of successful write operations.
     pub write_count: SharedIncMetric,
+    /// Duration of all read operations.
+    pub read_agg: LatencyAggregateMetrics,
+    /// Duration of all write operations.
+    pub write_agg: LatencyAggregateMetrics,
     /// Number of rate limiter throttling events.
     pub rate_limiter_throttled_events: SharedIncMetric,
     /// Number of virtio events throttled because of the IO engine.
     /// This happens when the io_uring submission queue is full.
     pub io_engine_throttled_events: SharedIncMetric,
+    /// Number of remaining requests in the queue.
+    pub remaining_reqs_count: SharedIncMetric,
 }
 
 impl BlockDeviceMetrics {
+    /// Const default construction.
+    pub fn new() -> Self {
+        Self {
+            read_agg: LatencyAggregateMetrics::new(),
+            write_agg: LatencyAggregateMetrics::new(),
+            ..Default::default()
+        }
+    }
+
     /// block metrics are SharedIncMetric where the diff of current vs
     /// old is serialized i.e. serialize_u64(current-old).
     /// So to have the aggregate serialized in same way we need to
@@ -210,10 +225,16 @@ impl BlockDeviceMetrics {
         self.write_bytes.add(other.write_bytes.fetch_diff());
         self.read_count.add(other.read_count.fetch_diff());
         self.write_count.add(other.write_count.fetch_diff());
+        self.read_agg.sum_us.add(other.read_agg.sum_us.fetch_diff());
+        self.write_agg
+            .sum_us
+            .add(other.write_agg.sum_us.fetch_diff());
         self.rate_limiter_throttled_events
             .add(other.rate_limiter_throttled_events.fetch_diff());
         self.io_engine_throttled_events
             .add(other.io_engine_throttled_events.fetch_diff());
+        self.remaining_reqs_count
+            .add(other.remaining_reqs_count.fetch_diff());
     }
 }
 

diff --git a/src/vmm/src/devices/virtio/block/virtio/request.rs b/src/vmm/src/devices/virtio/block/virtio/request.rs
@@ -373,10 +373,12 @@ impl Request {
         let pending = self.to_pending_request(desc_idx);
         let res = match self.r#type {
             RequestType::In => {
+                let _metric = block_metrics.read_agg.record_latency_metrics();
                 disk.file_engine
                     .read(self.offset(), mem, self.data_addr, self.data_len, pending)
             }
             RequestType::Out => {
+                let _metric = block_metrics.write_agg.record_latency_metrics();
                 disk.file_engine
                     .write(self.offset(), mem, self.data_addr, self.data_len, pending)
             }

diff --git a/src/vmm/src/devices/virtio/net/device.rs b/src/vmm/src/devices/virtio/net/device.rs
@@ -490,6 +490,7 @@ impl Net {
             });
         }
 
+        let _metric = net_metrics.tap_write_agg.record_latency_metrics();
         match Self::write_tap(tap, frame_iovec) {
             Ok(_) => {
                 let len = frame_iovec.len() as u64;
@@ -591,6 +592,9 @@ impl Net {
         let tx_queue = &mut self.queues[TX_INDEX];
 
         while let Some(head) = tx_queue.pop_or_enable_notification(mem) {
+            self.metrics
+                .tx_remaining_reqs_count
+                .add(tx_queue.len(mem).into());
             let head_index = head.index;
             // Parse IoVecBuffer from descriptor head
             let buffer = match IoVecBuffer::from_descriptor_chain(head) {

diff --git a/src/vmm/src/devices/virtio/net/metrics.rs b/src/vmm/src/devices/virtio/net/metrics.rs
@@ -85,7 +85,7 @@ use std::sync::{Arc, RwLock};
 use serde::ser::SerializeMap;
 use serde::{Serialize, Serializer};
 
-use crate::logger::{IncMetric, SharedIncMetric};
+use crate::logger::{IncMetric, LatencyAggregateMetrics, SharedIncMetric};
 
 /// map of network interface id and metrics
 /// this should be protected by a lock before accessing.
@@ -107,7 +107,7 @@ impl NetMetricsPerDevice {
                 .write()
                 .unwrap()
                 .metrics
-                .insert(iface_id.clone(), Arc::new(NetDeviceMetrics::default()));
+                .insert(iface_id.clone(), Arc::new(NetDeviceMetrics::new()));
         }
         METRICS
             .read()
@@ -184,6 +184,8 @@ pub struct NetDeviceMetrics {
     pub tap_read_fails: SharedIncMetric,
     /// Number of times writing to TAP failed.
     pub tap_write_fails: SharedIncMetric,
+    /// Duration of all tap write operations.
+    pub tap_write_agg: LatencyAggregateMetrics,
     /// Number of transmitted bytes.
     pub tx_bytes_count: SharedIncMetric,
     /// Number of malformed TX frames.
@@ -204,9 +206,19 @@ pub struct NetDeviceMetrics {
     pub tx_rate_limiter_throttled: SharedIncMetric,
     /// Number of packets with a spoofed mac, sent by the guest.
     pub tx_spoofed_mac_count: SharedIncMetric,
+    /// Number of remaining requests in the TX queue.
+    pub tx_remaining_reqs_count: SharedIncMetric,
 }
 
 impl NetDeviceMetrics {
+    /// Const default construction.
+    pub fn new() -> Self {
+        Self {
+            tap_write_agg: LatencyAggregateMetrics::new(),
+            ..Default::default()
+        }
+    }
+
     /// Net metrics are SharedIncMetric where the diff of current vs
     /// old is serialized i.e. serialize_u64(current-old).
     /// So to have the aggregate serialized in same way we need to
@@ -239,6 +251,9 @@ impl NetDeviceMetrics {
         self.rx_count.add(other.rx_count.fetch_diff());
         self.tap_read_fails.add(other.tap_read_fails.fetch_diff());
         self.tap_write_fails.add(other.tap_write_fails.fetch_diff());
+        self.tap_write_agg
+            .sum_us
+            .add(other.tap_write_agg.sum_us.fetch_diff());
         self.tx_bytes_count.add(other.tx_bytes_count.fetch_diff());
         self.tx_malformed_frames
             .add(other.tx_malformed_frames.fetch_diff());
@@ -256,6 +271,8 @@ impl NetDeviceMetrics {
             .add(other.tx_rate_limiter_throttled.fetch_diff());
         self.tx_spoofed_mac_count
             .add(other.tx_spoofed_mac_count.fetch_diff());
+        self.tx_remaining_reqs_count
+            .add(other.tx_remaining_reqs_count.fetch_diff());
     }
 }
 

diff --git a/src/vmm/src/devices/virtio/queue.rs b/src/vmm/src/devices/virtio/queue.rs
@@ -305,7 +305,7 @@ impl Queue {
     }
 
     /// Returns the number of yet-to-be-popped descriptor chains in the avail ring.
-    fn len<M: GuestMemory>(&self, mem: &M) -> u16 {
+    pub fn len<M: GuestMemory>(&self, mem: &M) -> u16 {
         debug_assert!(self.is_layout_valid(mem));
 
         (self.avail_idx(mem) - self.next_avail).0

diff --git a/src/vmm/src/logger/mod.rs b/src/vmm/src/logger/mod.rs
@@ -15,8 +15,8 @@ pub use logging::{
     DEFAULT_INSTANCE_ID, DEFAULT_LEVEL, INSTANCE_ID, LOGGER,
 };
 pub use metrics::{
-    IncMetric, MetricsError, ProcessTimeReporter, SharedIncMetric, SharedStoreMetric, StoreMetric,
-    METRICS,
+    IncMetric, LatencyAggregateMetrics, MetricsError, ProcessTimeReporter, SharedIncMetric,
+    SharedStoreMetric, StoreMetric, METRICS,
 };
 
 /// Alias for `std::io::LineWriter<std::fs::File>`.

diff --git a/tests/host_tools/fcmetrics.py b/tests/host_tools/fcmetrics.py
@@ -59,6 +59,7 @@ def validate_fc_metrics(metrics):
             "write_count",
             "rate_limiter_throttled_events",
             "io_engine_throttled_events",
+            "remaining_reqs_count",
         ],
         "deprecated_api": [
             "deprecated_http_api_calls",
@@ -137,6 +138,7 @@ def validate_fc_metrics(metrics):
             "tx_rate_limiter_event_count",
             "tx_rate_limiter_throttled",
             "tx_spoofed_mac_count",
+            "tx_remaining_reqs_count",
         ],
         "patch_api_requests": [
             "drive_count",
@@ -234,6 +236,16 @@ def validate_fc_metrics(metrics):
         ],
     }
 
+    latency_agg_metrics = {
+        "block": [
+            "read_agg",
+            "write_agg",
+        ],
+        "net": [
+            "tap_write_agg",
+        ],
+    }
+
     # validate timestamp before jsonschema validation which some more time
     utc_time = datetime.datetime.now(datetime.timezone.utc)
     utc_timestamp_ms = math.floor(utc_time.timestamp() * 1000)
@@ -277,7 +289,14 @@ def validate_fc_metrics(metrics):
             "properties": {},
         }
         for metrics_field in metrics_fields:
-            metrics_schema["properties"][metrics_field] = {"type": "number"}
+            if (
+                metrics_name in latency_agg_metrics
+                and metrics_field in latency_agg_metrics[metrics_name]
+            ):
+                metrics_type = "object"
+            else:
+                metrics_type = "number"
+            metrics_schema["properties"][metrics_field] = {"type": metrics_type}
         firecracker_metrics_schema["properties"][metrics_name] = metrics_schema
         firecracker_metrics_schema["required"].append(metrics_name)
 
@@ -351,9 +370,21 @@ def validate_per_device_metrics(self, fc_metrics):
             ):
                 actual_num_devices += 1
                 for metrics_name, metric_value in component_metric_values.items():
-                    if metrics_name not in metrics_calculated:
-                        metrics_calculated[metrics_name] = 0
-                    metrics_calculated[metrics_name] += metric_value
+                    if isinstance(metric_value, int):
+                        if metrics_name not in metrics_calculated:
+                            metrics_calculated[metrics_name] = 0
+                        metrics_calculated[metrics_name] += metric_value
+                    elif isinstance(metric_value, dict):
+                        # this is for LatencyAggregateMetrics metrics type
+                        if metrics_name not in metrics_calculated:
+                            metrics_calculated[metrics_name] = {
+                                "min_us": 0,
+                                "max_us": 0,
+                                "sum_us": 0,
+                            }
+                        metrics_calculated[metrics_name]["sum_us"] += metric_value[
+                            "sum_us"
+                        ]
 
         assert self.num_dev == actual_num_devices
         if self.aggr_supported: