From c9f4869dca62b4f0a8bdb34af163c7a76867d6e9 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 18 Oct 2023 14:33:55 +0200 Subject: [PATCH 1/7] passt: Fix sending the buffer using multiple iterations in write_loop Since bug https://bugs.passt.top/show_bug.cgi?id=74 is now fixed in passt, it is now became possible to test with small socket buffers, doing so revealed this mistake. Signed-off-by: Matej Hrica --- src/devices/src/virtio/net/passt.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/devices/src/virtio/net/passt.rs b/src/devices/src/virtio/net/passt.rs index ccbaf7ce..4c5e951e 100644 --- a/src/devices/src/virtio/net/passt.rs +++ b/src/devices/src/virtio/net/passt.rs @@ -168,7 +168,7 @@ impl Passt { while bytes_send < buf.len() { match send( self.fd, - buf, + &buf[bytes_send..], MsgFlags::MSG_DONTWAIT | MsgFlags::MSG_NOSIGNAL, ) { Ok(size) => bytes_send += size, From 3239fa69f72682a30ed667e67722d00da1c38618 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 18 Oct 2023 11:46:16 +0200 Subject: [PATCH 2/7] passt: properly handle multiple consecutive partial writes to socket Previously when write_loop was called by try_finish_write and it wrote something to the socket (but not fully finish the current frame), it would set last_partial_write_length to an incorect value. This commit also changes the type of last_partial_write_length to usize from, Option because it seems to be simpler to work with. Signed-off-by: Matej Hrica --- src/devices/src/virtio/net/passt.rs | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/devices/src/virtio/net/passt.rs b/src/devices/src/virtio/net/passt.rs index 4c5e951e..43cf9bbe 100644 --- a/src/devices/src/virtio/net/passt.rs +++ b/src/devices/src/virtio/net/passt.rs @@ -1,5 +1,4 @@ use nix::sys::socket::{recv, send, setsockopt, sockopt, MsgFlags}; -use std::num::NonZeroUsize; use std::os::fd::{AsRawFd, RawFd}; use vm_memory::VolatileMemory; @@ -31,7 +30,8 @@ pub struct Passt { fd: RawFd, // 0 when a frame length has not been read expecting_frame_length: u32, - last_partial_write_length: Option, + // 0 if last write is fully complete, otherwise the length that was written + last_partial_write_length: usize, } impl Passt { @@ -44,7 +44,7 @@ impl Passt { Self { fd: passt_fd, expecting_frame_length: 0, - last_partial_write_length: None, + last_partial_write_length: 0, } } @@ -76,7 +76,7 @@ impl Passt { /// If this function returns WriteError::PartialWrite, you have to finish the write using /// try_finish_write. pub fn write_frame(&mut self, hdr_len: usize, buf: &mut [u8]) -> Result<(), WriteError> { - if self.last_partial_write_length.is_some() { + if self.last_partial_write_length != 0 { panic!("Cannot write a frame to passt, while a partial write is not resolved."); } assert!( @@ -94,7 +94,7 @@ impl Passt { } pub fn has_unfinished_write(&self) -> bool { - self.last_partial_write_length.is_some() + self.last_partial_write_length != 0 } /// Try to finish a partial write @@ -104,12 +104,13 @@ impl Passt { /// * `hdr_len` - must be the same value as passed to write_frame, that caused the partial write /// * `buf` - must be same buffer that was given to write_frame, that caused the partial write pub fn try_finish_write(&mut self, hdr_len: usize, buf: &[u8]) -> Result<(), WriteError> { - if let Some(written_bytes) = self.last_partial_write_length { + if self.last_partial_write_length != 0 { + let already_written = self.last_partial_write_length; log::trace!("Requested to finish partial write"); - self.write_loop(&buf[hdr_len - PASST_HEADER_LEN + written_bytes.get()..])?; + self.write_loop(&buf[hdr_len - PASST_HEADER_LEN + already_written..])?; log::debug!( "Finished partial write ({}bytes written before)", - written_bytes.get() + already_written ) } @@ -181,7 +182,8 @@ impl Passt { "Wrote {} bytes, but socket blocked, will need try_finish_write() to finish", bytes_send ); - self.last_partial_write_length = Some(bytes_send.try_into().unwrap()); + + self.last_partial_write_length += bytes_send; return Err(WriteError::PartialWrite); } } @@ -189,7 +191,7 @@ impl Passt { Err(e) => return Err(WriteError::Internal(e)), } } - self.last_partial_write_length = None; + self.last_partial_write_length = 0; Ok(()) } } From 181da11678a584a6a82e9949f70b19807d1498a2 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 18 Oct 2023 16:14:10 +0200 Subject: [PATCH 3/7] virtio/net: continue processing tx queue when socket is writable again Signed-off-by: Matej Hrica --- src/devices/src/virtio/net/device.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/devices/src/virtio/net/device.rs b/src/devices/src/virtio/net/device.rs index c0df1876..9f9e133d 100644 --- a/src/devices/src/virtio/net/device.rs +++ b/src/devices/src/virtio/net/device.rs @@ -172,7 +172,12 @@ impl Net { .passt .try_finish_write(vnet_hdr_len(), &self.tx_frame_buf[..self.tx_frame_len]) { - Ok(()) | Err(passt::WriteError::PartialWrite | passt::WriteError::NothingWritten) => (), + Ok(()) => { + if let Err(e) = self.process_tx() { + log::error!("Failed to continue processing tx after passt socket was writable again: {e:?}"); + } + } + Err(passt::WriteError::PartialWrite | passt::WriteError::NothingWritten) => {} Err(e @ passt::WriteError::Internal(_)) => { log::error!("Failed to finish write: {e:?}"); } From 7a294b44f8bebcece523e83b5398431e42bf763c Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 18 Oct 2023 15:15:56 +0200 Subject: [PATCH 4/7] virtio-net: do not drop packets - just leave them in the virtio queue With the default small socket buffer sizes of 204KiB, this seems to masively improve iperf3 TCP upload from guest to host. The throughput is now around 33 Gbits/sec instead of 130 Mbits/sec. To set the socket buffer sizes you can use: $ sudo sysctl -w net.core.rmem_max=$((204*1024)) -w net.core.wmem_max=$((204*1024)) Tested with passt version: 0^20231004.gf851084-1.fc38.x86_64 Signed-off-by: Matej Hrica --- src/devices/src/virtio/net/device.rs | 37 +++++++++------------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/src/devices/src/virtio/net/device.rs b/src/devices/src/virtio/net/device.rs index 9f9e133d..17326c2f 100644 --- a/src/devices/src/virtio/net/device.rs +++ b/src/devices/src/virtio/net/device.rs @@ -238,29 +238,17 @@ impl Net { let tx_queue = &mut self.queues[TX_INDEX]; - let drop_rest_of_frames = |tx_queue: &mut Queue| { - let mut dropped_frames = 0; - while let Some(head) = tx_queue.pop(mem) { - tx_queue.add_used(mem, head.index, 0); - dropped_frames += 1; - } - dropped_frames - }; - if self.passt.has_unfinished_write() && self .passt .try_finish_write(vnet_hdr_len(), &self.tx_frame_buf[..self.tx_frame_len]) .is_err() { - let dropped_frames = drop_rest_of_frames(tx_queue); - log::trace!("Dropped {dropped_frames} frames, due to unfinished partial write"); - self.signal_used_queue().map_err(TxError::DeviceError)?; + log::trace!("Cannot process tx because of unfinished partial write!"); return Ok(()); } let mut raise_irq = false; - let mut dropped_frames = 0; while let Some(head) = tx_queue.pop(mem) { let head_index = head.index; @@ -297,9 +285,6 @@ impl Net { } } - tx_queue.add_used(mem, head_index, 0); - raise_irq = true; - self.tx_frame_len = read_count; match self .passt @@ -307,10 +292,11 @@ impl Net { { Ok(()) => { self.tx_frame_len = 0; + tx_queue.add_used(mem, head_index, 0); + raise_irq = true; } Err(passt::WriteError::NothingWritten) => { - self.tx_frame_len = 0; - dropped_frames += drop_rest_of_frames(tx_queue); + tx_queue.undo_pop(); break; } Err(passt::WriteError::PartialWrite) => { @@ -318,25 +304,24 @@ impl Net { /* This situation should be pretty rare, assuming reasonably sized socket buffers. We have written only a part of a frame to the passt socket (the socket is full). - But we cannot wait for passt to process our sending frames, because passt + + The frame we have read from the guest remains in tx_frame_buf, and will be sent + later. + + Note that we cannot wait for passt to process our sending frames, because passt could be blocked on sending a remainder of a frame to us - us waiting for passt would cause a deadlock. - The guest, expects us to process the tx_queue, so we drop the rest of the frames. */ - dropped_frames += drop_rest_of_frames(tx_queue); + tx_queue.add_used(mem, head_index, 0); + raise_irq = true; break; } - Err( e @ passt::WriteError::Internal(_) | e @ passt::WriteError::ProcessNotRunning, ) => return Err(TxError::Passt(e)), } } - if dropped_frames > 0 { - log::trace!("Dropped {dropped_frames} frames"); - } - if raise_irq { self.signal_used_queue().map_err(TxError::DeviceError)?; } From cb44d048100ae4bb17ca2355eebbea8bbf084b31 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 18 Oct 2023 17:55:48 +0200 Subject: [PATCH 5/7] passt: fix misleading comments Signed-off-by: Matej Hrica --- src/devices/src/virtio/net/passt.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/devices/src/virtio/net/passt.rs b/src/devices/src/virtio/net/passt.rs index 43cf9bbe..60f8fd3f 100644 --- a/src/devices/src/virtio/net/passt.rs +++ b/src/devices/src/virtio/net/passt.rs @@ -48,7 +48,7 @@ impl Passt { } } - /// Try to read a frame from passt. If no bytes are available reports PasstError::WouldBlock + /// Try to read a frame from passt. If no bytes are available reports ReadError::NothingRead pub fn read_frame(&mut self, buf: &mut [u8]) -> Result { if self.expecting_frame_length == 0 { self.expecting_frame_length = { @@ -122,7 +122,6 @@ impl Passt { } /// Try to read until filling the whole slice. - /// May return WouldBlock only if the first read fails fn read_loop(&self, buf: &mut [u8], block_until_has_data: bool) -> Result<(), ReadError> { let mut bytes_read = 0; From 6669f1fe2307fd63cc25a7f37305e9792a34aa21 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Thu, 19 Oct 2023 10:22:14 +0200 Subject: [PATCH 6/7] passt: log socket buffer sizes Signed-off-by: Matej Hrica --- src/devices/src/virtio/net/passt.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/devices/src/virtio/net/passt.rs b/src/devices/src/virtio/net/passt.rs index 60f8fd3f..ee19ee09 100644 --- a/src/devices/src/virtio/net/passt.rs +++ b/src/devices/src/virtio/net/passt.rs @@ -1,4 +1,4 @@ -use nix::sys::socket::{recv, send, setsockopt, sockopt, MsgFlags}; +use nix::sys::socket::{getsockopt, recv, send, setsockopt, sockopt, MsgFlags}; use std::os::fd::{AsRawFd, RawFd}; use vm_memory::VolatileMemory; @@ -41,6 +41,12 @@ impl Passt { log::warn!("Failed to increase SO_SNDBUF (performance may be decreased): {e}"); } + log::debug!( + "passt socket (fd {passt_fd}) buffer sizes: SndBuf={:?} RcvBuf={:?}", + getsockopt(passt_fd, sockopt::SndBuf), + getsockopt(passt_fd, sockopt::RcvBuf) + ); + Self { fd: passt_fd, expecting_frame_length: 0, From 8b2dc3d3b83eda9502eef9974b44a85c7313f8dd Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 29 Nov 2023 17:08:54 +0100 Subject: [PATCH 7/7] Fix clippy warning Signed-off-by: Matej Hrica --- src/vmm/src/vmm_config/net.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vmm/src/vmm_config/net.rs b/src/vmm/src/vmm_config/net.rs index b210bc1f..e50e126e 100644 --- a/src/vmm/src/vmm_config/net.rs +++ b/src/vmm/src/vmm_config/net.rs @@ -48,7 +48,7 @@ impl NetBuilder { /// Creates an empty list of Network Devices. pub fn new() -> Self { NetBuilder { - /// List of built network devices. + // List of built network devices. net_devices: Vec::new(), } }