diff --git a/src/devices/src/virtio/net/device.rs b/src/devices/src/virtio/net/device.rs index c0df1876..17326c2f 100644 --- a/src/devices/src/virtio/net/device.rs +++ b/src/devices/src/virtio/net/device.rs @@ -172,7 +172,12 @@ impl Net { .passt .try_finish_write(vnet_hdr_len(), &self.tx_frame_buf[..self.tx_frame_len]) { - Ok(()) | Err(passt::WriteError::PartialWrite | passt::WriteError::NothingWritten) => (), + Ok(()) => { + if let Err(e) = self.process_tx() { + log::error!("Failed to continue processing tx after passt socket was writable again: {e:?}"); + } + } + Err(passt::WriteError::PartialWrite | passt::WriteError::NothingWritten) => {} Err(e @ passt::WriteError::Internal(_)) => { log::error!("Failed to finish write: {e:?}"); } @@ -233,29 +238,17 @@ impl Net { let tx_queue = &mut self.queues[TX_INDEX]; - let drop_rest_of_frames = |tx_queue: &mut Queue| { - let mut dropped_frames = 0; - while let Some(head) = tx_queue.pop(mem) { - tx_queue.add_used(mem, head.index, 0); - dropped_frames += 1; - } - dropped_frames - }; - if self.passt.has_unfinished_write() && self .passt .try_finish_write(vnet_hdr_len(), &self.tx_frame_buf[..self.tx_frame_len]) .is_err() { - let dropped_frames = drop_rest_of_frames(tx_queue); - log::trace!("Dropped {dropped_frames} frames, due to unfinished partial write"); - self.signal_used_queue().map_err(TxError::DeviceError)?; + log::trace!("Cannot process tx because of unfinished partial write!"); return Ok(()); } let mut raise_irq = false; - let mut dropped_frames = 0; while let Some(head) = tx_queue.pop(mem) { let head_index = head.index; @@ -292,9 +285,6 @@ impl Net { } } - tx_queue.add_used(mem, head_index, 0); - raise_irq = true; - self.tx_frame_len = read_count; match self .passt @@ -302,10 +292,11 @@ impl Net { { Ok(()) => { self.tx_frame_len = 0; + tx_queue.add_used(mem, head_index, 0); + raise_irq = true; } Err(passt::WriteError::NothingWritten) => { - self.tx_frame_len = 0; - dropped_frames += drop_rest_of_frames(tx_queue); + tx_queue.undo_pop(); break; } Err(passt::WriteError::PartialWrite) => { @@ -313,25 +304,24 @@ impl Net { /* This situation should be pretty rare, assuming reasonably sized socket buffers. We have written only a part of a frame to the passt socket (the socket is full). - But we cannot wait for passt to process our sending frames, because passt + + The frame we have read from the guest remains in tx_frame_buf, and will be sent + later. + + Note that we cannot wait for passt to process our sending frames, because passt could be blocked on sending a remainder of a frame to us - us waiting for passt would cause a deadlock. - The guest, expects us to process the tx_queue, so we drop the rest of the frames. */ - dropped_frames += drop_rest_of_frames(tx_queue); + tx_queue.add_used(mem, head_index, 0); + raise_irq = true; break; } - Err( e @ passt::WriteError::Internal(_) | e @ passt::WriteError::ProcessNotRunning, ) => return Err(TxError::Passt(e)), } } - if dropped_frames > 0 { - log::trace!("Dropped {dropped_frames} frames"); - } - if raise_irq { self.signal_used_queue().map_err(TxError::DeviceError)?; } diff --git a/src/devices/src/virtio/net/passt.rs b/src/devices/src/virtio/net/passt.rs index ccbaf7ce..ee19ee09 100644 --- a/src/devices/src/virtio/net/passt.rs +++ b/src/devices/src/virtio/net/passt.rs @@ -1,5 +1,4 @@ -use nix::sys::socket::{recv, send, setsockopt, sockopt, MsgFlags}; -use std::num::NonZeroUsize; +use nix::sys::socket::{getsockopt, recv, send, setsockopt, sockopt, MsgFlags}; use std::os::fd::{AsRawFd, RawFd}; use vm_memory::VolatileMemory; @@ -31,7 +30,8 @@ pub struct Passt { fd: RawFd, // 0 when a frame length has not been read expecting_frame_length: u32, - last_partial_write_length: Option, + // 0 if last write is fully complete, otherwise the length that was written + last_partial_write_length: usize, } impl Passt { @@ -41,14 +41,20 @@ impl Passt { log::warn!("Failed to increase SO_SNDBUF (performance may be decreased): {e}"); } + log::debug!( + "passt socket (fd {passt_fd}) buffer sizes: SndBuf={:?} RcvBuf={:?}", + getsockopt(passt_fd, sockopt::SndBuf), + getsockopt(passt_fd, sockopt::RcvBuf) + ); + Self { fd: passt_fd, expecting_frame_length: 0, - last_partial_write_length: None, + last_partial_write_length: 0, } } - /// Try to read a frame from passt. If no bytes are available reports PasstError::WouldBlock + /// Try to read a frame from passt. If no bytes are available reports ReadError::NothingRead pub fn read_frame(&mut self, buf: &mut [u8]) -> Result { if self.expecting_frame_length == 0 { self.expecting_frame_length = { @@ -76,7 +82,7 @@ impl Passt { /// If this function returns WriteError::PartialWrite, you have to finish the write using /// try_finish_write. pub fn write_frame(&mut self, hdr_len: usize, buf: &mut [u8]) -> Result<(), WriteError> { - if self.last_partial_write_length.is_some() { + if self.last_partial_write_length != 0 { panic!("Cannot write a frame to passt, while a partial write is not resolved."); } assert!( @@ -94,7 +100,7 @@ impl Passt { } pub fn has_unfinished_write(&self) -> bool { - self.last_partial_write_length.is_some() + self.last_partial_write_length != 0 } /// Try to finish a partial write @@ -104,12 +110,13 @@ impl Passt { /// * `hdr_len` - must be the same value as passed to write_frame, that caused the partial write /// * `buf` - must be same buffer that was given to write_frame, that caused the partial write pub fn try_finish_write(&mut self, hdr_len: usize, buf: &[u8]) -> Result<(), WriteError> { - if let Some(written_bytes) = self.last_partial_write_length { + if self.last_partial_write_length != 0 { + let already_written = self.last_partial_write_length; log::trace!("Requested to finish partial write"); - self.write_loop(&buf[hdr_len - PASST_HEADER_LEN + written_bytes.get()..])?; + self.write_loop(&buf[hdr_len - PASST_HEADER_LEN + already_written..])?; log::debug!( "Finished partial write ({}bytes written before)", - written_bytes.get() + already_written ) } @@ -121,7 +128,6 @@ impl Passt { } /// Try to read until filling the whole slice. - /// May return WouldBlock only if the first read fails fn read_loop(&self, buf: &mut [u8], block_until_has_data: bool) -> Result<(), ReadError> { let mut bytes_read = 0; @@ -168,7 +174,7 @@ impl Passt { while bytes_send < buf.len() { match send( self.fd, - buf, + &buf[bytes_send..], MsgFlags::MSG_DONTWAIT | MsgFlags::MSG_NOSIGNAL, ) { Ok(size) => bytes_send += size, @@ -181,7 +187,8 @@ impl Passt { "Wrote {} bytes, but socket blocked, will need try_finish_write() to finish", bytes_send ); - self.last_partial_write_length = Some(bytes_send.try_into().unwrap()); + + self.last_partial_write_length += bytes_send; return Err(WriteError::PartialWrite); } } @@ -189,7 +196,7 @@ impl Passt { Err(e) => return Err(WriteError::Internal(e)), } } - self.last_partial_write_length = None; + self.last_partial_write_length = 0; Ok(()) } } diff --git a/src/vmm/src/vmm_config/net.rs b/src/vmm/src/vmm_config/net.rs index b210bc1f..e50e126e 100644 --- a/src/vmm/src/vmm_config/net.rs +++ b/src/vmm/src/vmm_config/net.rs @@ -48,7 +48,7 @@ impl NetBuilder { /// Creates an empty list of Network Devices. pub fn new() -> Self { NetBuilder { - /// List of built network devices. + // List of built network devices. net_devices: Vec::new(), } }