Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

virtio net bugfixes and performance improvement #149

Merged
merged 7 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 17 additions & 27 deletions src/devices/src/virtio/net/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,12 @@ impl Net {
.passt
.try_finish_write(vnet_hdr_len(), &self.tx_frame_buf[..self.tx_frame_len])
{
Ok(()) | Err(passt::WriteError::PartialWrite | passt::WriteError::NothingWritten) => (),
Ok(()) => {
if let Err(e) = self.process_tx() {
log::error!("Failed to continue processing tx after passt socket was writable again: {e:?}");
}
}
Err(passt::WriteError::PartialWrite | passt::WriteError::NothingWritten) => {}
Err(e @ passt::WriteError::Internal(_)) => {
log::error!("Failed to finish write: {e:?}");
}
Expand Down Expand Up @@ -233,29 +238,17 @@ impl Net {

let tx_queue = &mut self.queues[TX_INDEX];

let drop_rest_of_frames = |tx_queue: &mut Queue| {
let mut dropped_frames = 0;
while let Some(head) = tx_queue.pop(mem) {
tx_queue.add_used(mem, head.index, 0);
dropped_frames += 1;
}
dropped_frames
};

if self.passt.has_unfinished_write()
&& self
.passt
.try_finish_write(vnet_hdr_len(), &self.tx_frame_buf[..self.tx_frame_len])
.is_err()
{
let dropped_frames = drop_rest_of_frames(tx_queue);
log::trace!("Dropped {dropped_frames} frames, due to unfinished partial write");
self.signal_used_queue().map_err(TxError::DeviceError)?;
log::trace!("Cannot process tx because of unfinished partial write!");
return Ok(());
}

let mut raise_irq = false;
let mut dropped_frames = 0;

while let Some(head) = tx_queue.pop(mem) {
let head_index = head.index;
Expand Down Expand Up @@ -292,46 +285,43 @@ impl Net {
}
}

tx_queue.add_used(mem, head_index, 0);
raise_irq = true;

self.tx_frame_len = read_count;
match self
.passt
.write_frame(vnet_hdr_len(), &mut self.tx_frame_buf[..read_count])
{
Ok(()) => {
self.tx_frame_len = 0;
tx_queue.add_used(mem, head_index, 0);
raise_irq = true;
}
Err(passt::WriteError::NothingWritten) => {
self.tx_frame_len = 0;
dropped_frames += drop_rest_of_frames(tx_queue);
tx_queue.undo_pop();
break;
}
Err(passt::WriteError::PartialWrite) => {
log::trace!("process_tx: partial write");
/*
This situation should be pretty rare, assuming reasonably sized socket buffers.
We have written only a part of a frame to the passt socket (the socket is full).
But we cannot wait for passt to process our sending frames, because passt

The frame we have read from the guest remains in tx_frame_buf, and will be sent
later.

Note that we cannot wait for passt to process our sending frames, because passt
could be blocked on sending a remainder of a frame to us - us waiting for passt
would cause a deadlock.
The guest, expects us to process the tx_queue, so we drop the rest of the frames.
*/
dropped_frames += drop_rest_of_frames(tx_queue);
tx_queue.add_used(mem, head_index, 0);
raise_irq = true;
break;
}

Err(
e @ passt::WriteError::Internal(_) | e @ passt::WriteError::ProcessNotRunning,
) => return Err(TxError::Passt(e)),
}
}

if dropped_frames > 0 {
log::trace!("Dropped {dropped_frames} frames");
}

if raise_irq {
self.signal_used_queue().map_err(TxError::DeviceError)?;
}
Expand Down
35 changes: 21 additions & 14 deletions src/devices/src/virtio/net/passt.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use nix::sys::socket::{recv, send, setsockopt, sockopt, MsgFlags};
use std::num::NonZeroUsize;
use nix::sys::socket::{getsockopt, recv, send, setsockopt, sockopt, MsgFlags};
use std::os::fd::{AsRawFd, RawFd};
use vm_memory::VolatileMemory;

Expand Down Expand Up @@ -31,7 +30,8 @@ pub struct Passt {
fd: RawFd,
// 0 when a frame length has not been read
expecting_frame_length: u32,
last_partial_write_length: Option<NonZeroUsize>,
// 0 if last write is fully complete, otherwise the length that was written
last_partial_write_length: usize,
}

impl Passt {
Expand All @@ -41,14 +41,20 @@ impl Passt {
log::warn!("Failed to increase SO_SNDBUF (performance may be decreased): {e}");
}

log::debug!(
"passt socket (fd {passt_fd}) buffer sizes: SndBuf={:?} RcvBuf={:?}",
getsockopt(passt_fd, sockopt::SndBuf),
getsockopt(passt_fd, sockopt::RcvBuf)
);

Self {
fd: passt_fd,
expecting_frame_length: 0,
last_partial_write_length: None,
last_partial_write_length: 0,
}
}

/// Try to read a frame from passt. If no bytes are available reports PasstError::WouldBlock
/// Try to read a frame from passt. If no bytes are available reports ReadError::NothingRead
pub fn read_frame(&mut self, buf: &mut [u8]) -> Result<usize, ReadError> {
if self.expecting_frame_length == 0 {
self.expecting_frame_length = {
Expand Down Expand Up @@ -76,7 +82,7 @@ impl Passt {
/// If this function returns WriteError::PartialWrite, you have to finish the write using
/// try_finish_write.
pub fn write_frame(&mut self, hdr_len: usize, buf: &mut [u8]) -> Result<(), WriteError> {
if self.last_partial_write_length.is_some() {
if self.last_partial_write_length != 0 {
panic!("Cannot write a frame to passt, while a partial write is not resolved.");
}
assert!(
Expand All @@ -94,7 +100,7 @@ impl Passt {
}

pub fn has_unfinished_write(&self) -> bool {
self.last_partial_write_length.is_some()
self.last_partial_write_length != 0
}

/// Try to finish a partial write
Expand All @@ -104,12 +110,13 @@ impl Passt {
/// * `hdr_len` - must be the same value as passed to write_frame, that caused the partial write
/// * `buf` - must be same buffer that was given to write_frame, that caused the partial write
pub fn try_finish_write(&mut self, hdr_len: usize, buf: &[u8]) -> Result<(), WriteError> {
if let Some(written_bytes) = self.last_partial_write_length {
if self.last_partial_write_length != 0 {
let already_written = self.last_partial_write_length;
log::trace!("Requested to finish partial write");
self.write_loop(&buf[hdr_len - PASST_HEADER_LEN + written_bytes.get()..])?;
self.write_loop(&buf[hdr_len - PASST_HEADER_LEN + already_written..])?;
log::debug!(
"Finished partial write ({}bytes written before)",
written_bytes.get()
already_written
)
}

Expand All @@ -121,7 +128,6 @@ impl Passt {
}

/// Try to read until filling the whole slice.
/// May return WouldBlock only if the first read fails
fn read_loop(&self, buf: &mut [u8], block_until_has_data: bool) -> Result<(), ReadError> {
let mut bytes_read = 0;

Expand Down Expand Up @@ -168,7 +174,7 @@ impl Passt {
while bytes_send < buf.len() {
match send(
self.fd,
buf,
&buf[bytes_send..],
MsgFlags::MSG_DONTWAIT | MsgFlags::MSG_NOSIGNAL,
) {
Ok(size) => bytes_send += size,
Expand All @@ -181,15 +187,16 @@ impl Passt {
"Wrote {} bytes, but socket blocked, will need try_finish_write() to finish",
bytes_send
);
self.last_partial_write_length = Some(bytes_send.try_into().unwrap());

self.last_partial_write_length += bytes_send;
return Err(WriteError::PartialWrite);
}
}
Err(nix::Error::EPIPE) => return Err(WriteError::ProcessNotRunning),
Err(e) => return Err(WriteError::Internal(e)),
}
}
self.last_partial_write_length = None;
self.last_partial_write_length = 0;
Ok(())
}
}
2 changes: 1 addition & 1 deletion src/vmm/src/vmm_config/net.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ impl NetBuilder {
/// Creates an empty list of Network Devices.
pub fn new() -> Self {
NetBuilder {
/// List of built network devices.
// List of built network devices.
net_devices: Vec::new(),
}
}
Expand Down
Loading