From 5702761073f602033654be6d353215457d645d96 Mon Sep 17 00:00:00 2001
From: tiif <pekyuan@gmail.com>
Date: Wed, 14 Aug 2024 18:45:29 +0800
Subject: [PATCH] Implement epoll shim

---
 src/helpers.rs                    |   8 +
 src/machine.rs                    |   5 +
 src/shims/mod.rs                  |   2 +-
 src/shims/unix/fd.rs              | 107 +++++-
 src/shims/unix/fs.rs              |   4 +
 src/shims/unix/linux/epoll.rs     | 382 ++++++++++++++++++---
 src/shims/unix/linux/eventfd.rs   |  51 ++-
 src/shims/unix/mod.rs             |   1 +
 src/shims/unix/socket.rs          |  91 ++++-
 tests/fail-dep/tokio/sleep.rs     |   2 +-
 tests/fail-dep/tokio/sleep.stderr |   4 +-
 tests/pass-dep/libc/libc-epoll.rs | 552 ++++++++++++++++++++++++++++++
 12 files changed, 1134 insertions(+), 75 deletions(-)
 create mode 100644 tests/pass-dep/libc/libc-epoll.rs
diff --git a/src/helpers.rs b/src/helpers.rs
index 1bdf9f06dc..0483745621 100644
--- a/src/helpers.rs
+++ b/src/helpers.rs
@@ -371,6 +371,14 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         path_ty_layout(this, &["std", "sys", "pal", "windows", "c", name])
     }
 
+    /// Helper function to get `TyAndLayout` of an array that consists of `libc` type.
+    fn libc_array_ty_layout(&self, name: &str, size: u64) -> TyAndLayout<'tcx> {
+        let this = self.eval_context_ref();
+        let elem_ty_layout = this.libc_ty_layout(name);
+        let array_ty = Ty::new_array(*this.tcx, elem_ty_layout.ty, size);
+        this.layout_of(array_ty).unwrap()
+    }
+
     /// Project to the given *named* field (which must be a struct or union type).
     fn project_field_named<P: Projectable<'tcx, Provenance>>(
         &self,
diff --git a/src/machine.rs b/src/machine.rs
index 94598e7d2e..2d8b8cb71d 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -453,6 +453,9 @@ pub struct MiriMachine<'tcx> {
     /// The table of directory descriptors.
     pub(crate) dirs: shims::DirTable,
 
+    /// The list of all EpollEventInterest.
+    pub(crate) epoll_interests: shims::EpollInterestTable,
+
     /// This machine's monotone clock.
     pub(crate) clock: Clock,
 
@@ -647,6 +650,7 @@ impl<'tcx> MiriMachine<'tcx> {
             isolated_op: config.isolated_op,
             validation: config.validation,
             fds: shims::FdTable::init(config.mute_stdout_stderr),
+            epoll_interests: shims::EpollInterestTable::new(),
             dirs: Default::default(),
             layouts,
             threads,
@@ -785,6 +789,7 @@ impl VisitProvenance for MiriMachine<'_> {
             data_race,
             alloc_addresses,
             fds,
+            epoll_interests:_,
             tcx: _,
             isolated_op: _,
             validation: _,
diff --git a/src/shims/mod.rs b/src/shims/mod.rs
index a41a2883c9..7d5349f26b 100644
--- a/src/shims/mod.rs
+++ b/src/shims/mod.rs
@@ -17,7 +17,7 @@ pub mod panic;
 pub mod time;
 pub mod tls;
 
-pub use unix::{DirTable, FdTable};
+pub use unix::{DirTable, EpollInterestTable, FdTable};
 
 /// What needs to be done after emulating an item (a shim or an intrinsic) is done.
 pub enum EmulateItemResult {
diff --git a/src/shims/unix/fd.rs b/src/shims/unix/fd.rs
index 1b25ef0576..98a124b9a5 100644
--- a/src/shims/unix/fd.rs
+++ b/src/shims/unix/fd.rs
@@ -6,9 +6,11 @@ use std::cell::{Ref, RefCell, RefMut};
 use std::collections::BTreeMap;
 use std::io::{self, ErrorKind, IsTerminal, Read, SeekFrom, Write};
 use std::rc::Rc;
+use std::rc::Weak;
 
 use rustc_target::abi::Size;
 
+use crate::shims::unix::linux::epoll::EpollReadyEvents;
 use crate::shims::unix::*;
 use crate::*;
 
@@ -27,6 +29,7 @@ pub trait FileDescription: std::fmt::Debug + Any {
     fn read<'tcx>(
         &mut self,
         _communicate_allowed: bool,
+        _fd_id: FdId,
         _bytes: &mut [u8],
         _ecx: &mut MiriInterpCx<'tcx>,
     ) -> InterpResult<'tcx, io::Result<usize>> {
@@ -37,6 +40,7 @@ pub trait FileDescription: std::fmt::Debug + Any {
     fn write<'tcx>(
         &mut self,
         _communicate_allowed: bool,
+        _fd_id: FdId,
         _bytes: &[u8],
         _ecx: &mut MiriInterpCx<'tcx>,
     ) -> InterpResult<'tcx, io::Result<usize>> {
@@ -80,6 +84,7 @@ pub trait FileDescription: std::fmt::Debug + Any {
     fn close<'tcx>(
         self: Box<Self>,
         _communicate_allowed: bool,
+        _ecx: &mut MiriInterpCx<'tcx>,
     ) -> InterpResult<'tcx, io::Result<()>> {
         throw_unsup_format!("cannot close {}", self.name());
     }
@@ -97,6 +102,11 @@ pub trait FileDescription: std::fmt::Debug + Any {
         // so we use a default impl here.
         false
     }
+
+    /// Check the readiness of file description.
+    fn get_epoll_ready_events<'tcx>(&self) -> InterpResult<'tcx, EpollReadyEvents> {
+        throw_unsup_format!("{}: epoll does not support this file description", self.name());
+    }
 }
 
 impl dyn FileDescription {
@@ -119,6 +129,7 @@ impl FileDescription for io::Stdin {
     fn read<'tcx>(
         &mut self,
         communicate_allowed: bool,
+        _fd_id: FdId,
         bytes: &mut [u8],
         _ecx: &mut MiriInterpCx<'tcx>,
     ) -> InterpResult<'tcx, io::Result<usize>> {
@@ -142,6 +153,7 @@ impl FileDescription for io::Stdout {
     fn write<'tcx>(
         &mut self,
         _communicate_allowed: bool,
+        _fd_id: FdId,
         bytes: &[u8],
         _ecx: &mut MiriInterpCx<'tcx>,
     ) -> InterpResult<'tcx, io::Result<usize>> {
@@ -170,6 +182,7 @@ impl FileDescription for io::Stderr {
     fn write<'tcx>(
         &mut self,
         _communicate_allowed: bool,
+        _fd_id: FdId,
         bytes: &[u8],
         _ecx: &mut MiriInterpCx<'tcx>,
     ) -> InterpResult<'tcx, io::Result<usize>> {
@@ -195,6 +208,7 @@ impl FileDescription for NullOutput {
     fn write<'tcx>(
         &mut self,
         _communicate_allowed: bool,
+        _fd_id: FdId,
         bytes: &[u8],
         _ecx: &mut MiriInterpCx<'tcx>,
     ) -> InterpResult<'tcx, io::Result<usize>> {
@@ -203,36 +217,98 @@ impl FileDescription for NullOutput {
     }
 }
 
+/// Structure contains both the file description and its unique identifier.
+#[derive(Clone, Debug)]
+pub struct FileDescWithId<T: FileDescription + ?Sized> {
+    id: FdId,
+    file_description: RefCell<Box<T>>,
+}
+
 #[derive(Clone, Debug)]
-pub struct FileDescriptionRef(Rc<RefCell<Box<dyn FileDescription>>>);
+pub struct FileDescriptionRef(Rc<FileDescWithId<dyn FileDescription>>);
 
 impl FileDescriptionRef {
-    fn new(fd: impl FileDescription) -> Self {
-        FileDescriptionRef(Rc::new(RefCell::new(Box::new(fd))))
+    fn new(fd: impl FileDescription, id: FdId) -> Self {
+        FileDescriptionRef(Rc::new(FileDescWithId {
+            id,
+            file_description: RefCell::new(Box::new(fd)),
+        }))
     }
 
     pub fn borrow(&self) -> Ref<'_, dyn FileDescription> {
-        Ref::map(self.0.borrow(), |fd| fd.as_ref())
+        Ref::map(self.0.file_description.borrow(), |fd| fd.as_ref())
     }
 
     pub fn borrow_mut(&self) -> RefMut<'_, dyn FileDescription> {
-        RefMut::map(self.0.borrow_mut(), |fd| fd.as_mut())
+        RefMut::map(self.0.file_description.borrow_mut(), |fd| fd.as_mut())
     }
 
-    pub fn close<'ctx>(self, communicate_allowed: bool) -> InterpResult<'ctx, io::Result<()>> {
+    pub fn close<'tcx>(
+        self,
+        communicate_allowed: bool,
+        ecx: &mut MiriInterpCx<'tcx>,
+    ) -> InterpResult<'tcx, io::Result<()>> {
         // Destroy this `Rc` using `into_inner` so we can call `close` instead of
         // implicitly running the destructor of the file description.
+        let id = self.get_id();
         match Rc::into_inner(self.0) {
-            Some(fd) => RefCell::into_inner(fd).close(communicate_allowed),
+            Some(fd) => {
+                // Remove entry from the global epoll_event_interest table.
+                ecx.machine.epoll_interests.remove(id);
+
+                RefCell::into_inner(fd.file_description).close(communicate_allowed, ecx)
+            }
             None => Ok(Ok(())),
         }
     }
+
+    pub fn downgrade(&self) -> WeakFileDescriptionRef {
+        WeakFileDescriptionRef { weak_ref: Rc::downgrade(&self.0) }
+    }
+
+    pub fn get_id(&self) -> FdId {
+        self.0.id
+    }
+
+    /// Function used to retrieve the readiness events of a file description and insert
+    /// an `EpollEventInstance` into the ready list if the file description is ready.
+    pub(crate) fn check_and_update_readiness<'tcx>(
+        &self,
+        ecx: &mut InterpCx<'tcx, MiriMachine<'tcx>>,
+    ) -> InterpResult<'tcx, ()> {
+        use crate::shims::unix::linux::epoll::EvalContextExt;
+        ecx.check_and_update_readiness(self.get_id(), || self.borrow_mut().get_epoll_ready_events())
+    }
+}
+
+/// Holds a weak reference to the actual file description.
+#[derive(Clone, Debug, Default)]
+pub struct WeakFileDescriptionRef {
+    weak_ref: Weak<FileDescWithId<dyn FileDescription>>,
+}
+
+impl WeakFileDescriptionRef {
+    pub fn upgrade(&self) -> Option<FileDescriptionRef> {
+        if let Some(file_desc_with_id) = self.weak_ref.upgrade() {
+            return Some(FileDescriptionRef(file_desc_with_id));
+        }
+        None
+    }
 }
 
+/// A unique id for file descriptions. While we could use the address, considering that
+/// is definitely unique, the address would expose interpreter internal state when used
+/// for sorting things. So instead we generate a unique id per file description that stays
+/// the same even if a file descriptor is duplicated and gets a new integer file descriptor.
+#[derive(Debug, Copy, Clone, Default, Eq, PartialEq, Ord, PartialOrd)]
+pub struct FdId(usize);
+
 /// The file descriptor table
 #[derive(Debug)]
 pub struct FdTable {
-    fds: BTreeMap<i32, FileDescriptionRef>,
+    pub fds: BTreeMap<i32, FileDescriptionRef>,
+    /// Unique identifier for file description, used to differentiate between various file description.
+    next_file_description_id: FdId,
 }
 
 impl VisitProvenance for FdTable {
@@ -243,7 +319,7 @@ impl VisitProvenance for FdTable {
 
 impl FdTable {
     fn new() -> Self {
-        FdTable { fds: BTreeMap::new() }
+        FdTable { fds: BTreeMap::new(), next_file_description_id: FdId(0) }
     }
     pub(crate) fn init(mute_stdout_stderr: bool) -> FdTable {
         let mut fds = FdTable::new();
@@ -260,7 +336,8 @@ impl FdTable {
 
     /// Insert a new file description to the FdTable.
     pub fn insert_new(&mut self, fd: impl FileDescription) -> i32 {
-        let file_handle = FileDescriptionRef::new(fd);
+        let file_handle = FileDescriptionRef::new(fd, self.next_file_description_id);
+        self.next_file_description_id = FdId(self.next_file_description_id.0.strict_add(1));
         self.insert_ref_with_min_fd(file_handle, 0)
     }
 
@@ -337,7 +414,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             // If old_fd and new_fd point to the same description, then `dup_fd` ensures we keep the underlying file description alive.
             if let Some(file_description) = this.machine.fds.fds.insert(new_fd, dup_fd) {
                 // Ignore close error (not interpreter's) according to dup2() doc.
-                file_description.close(this.machine.communicate())?.ok();
+                file_description.close(this.machine.communicate(), this)?.ok();
             }
         }
         Ok(Scalar::from_i32(new_fd))
@@ -442,7 +519,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let Some(file_description) = this.machine.fds.remove(fd) else {
             return Ok(Scalar::from_i32(this.fd_not_found()?));
         };
-        let result = file_description.close(this.machine.communicate())?;
+        let result = file_description.close(this.machine.communicate(), this)?;
         // return `0` if close is successful
         let result = result.map(|()| 0i32);
         Ok(Scalar::from_i32(this.try_unwrap_io_result(result)?))
@@ -499,7 +576,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         // `usize::MAX` because it is bounded by the host's `isize`.
         let mut bytes = vec![0; usize::try_from(count).unwrap()];
         let result = match offset {
-            None => fd.borrow_mut().read(communicate, &mut bytes, this),
+            None => fd.borrow_mut().read(communicate, fd.get_id(), &mut bytes, this),
             Some(offset) => {
                 let Ok(offset) = u64::try_from(offset) else {
                     let einval = this.eval_libc("EINVAL");
@@ -509,7 +586,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 fd.borrow_mut().pread(communicate, &mut bytes, offset, this)
             }
         };
-        drop(fd);
 
         // `File::read` never returns a value larger than `count`, so this cannot fail.
         match result?.map(|c| i64::try_from(c).unwrap()) {
@@ -558,7 +634,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         };
 
         let result = match offset {
-            None => fd.borrow_mut().write(communicate, &bytes, this),
+            None => fd.borrow_mut().write(communicate, fd.get_id(), &bytes, this),
             Some(offset) => {
                 let Ok(offset) = u64::try_from(offset) else {
                     let einval = this.eval_libc("EINVAL");
@@ -568,7 +644,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 fd.borrow_mut().pwrite(communicate, &bytes, offset, this)
             }
         };
-        drop(fd);
 
         let result = result?.map(|c| i64::try_from(c).unwrap());
         Ok(Scalar::from_target_isize(this.try_unwrap_io_result(result)?, this))
diff --git a/src/shims/unix/fs.rs b/src/shims/unix/fs.rs
index d93374db81..9da36e64a0 100644
--- a/src/shims/unix/fs.rs
+++ b/src/shims/unix/fs.rs
@@ -12,6 +12,7 @@ use rustc_data_structures::fx::FxHashMap;
 use rustc_target::abi::Size;
 
 use crate::shims::os_str::bytes_to_os_str;
+use crate::shims::unix::fd::FdId;
 use crate::shims::unix::*;
 use crate::*;
 use shims::time::system_time_to_duration;
@@ -32,6 +33,7 @@ impl FileDescription for FileHandle {
     fn read<'tcx>(
         &mut self,
         communicate_allowed: bool,
+        _fd_id: FdId,
         bytes: &mut [u8],
         _ecx: &mut MiriInterpCx<'tcx>,
     ) -> InterpResult<'tcx, io::Result<usize>> {
@@ -42,6 +44,7 @@ impl FileDescription for FileHandle {
     fn write<'tcx>(
         &mut self,
         communicate_allowed: bool,
+        _fd_id: FdId,
         bytes: &[u8],
         _ecx: &mut MiriInterpCx<'tcx>,
     ) -> InterpResult<'tcx, io::Result<usize>> {
@@ -109,6 +112,7 @@ impl FileDescription for FileHandle {
     fn close<'tcx>(
         self: Box<Self>,
         communicate_allowed: bool,
+        _ecx: &mut MiriInterpCx<'tcx>,
     ) -> InterpResult<'tcx, io::Result<()>> {
         assert!(communicate_allowed, "isolation should have prevented even opening a file");
         // We sync the file if it was opened in a mode different than read-only.
diff --git a/src/shims/unix/linux/epoll.rs b/src/shims/unix/linux/epoll.rs
index 9127db3d00..89616bd0d0 100644
--- a/src/shims/unix/linux/epoll.rs
+++ b/src/shims/unix/linux/epoll.rs
@@ -1,32 +1,103 @@
+use std::cell::RefCell;
+use std::collections::BTreeMap;
 use std::io;
+use std::rc::{Rc, Weak};
 
-use rustc_data_structures::fx::FxHashMap;
-
+use crate::shims::unix::fd::FdId;
 use crate::shims::unix::*;
 use crate::*;
 
 /// An `Epoll` file descriptor connects file handles and epoll events
 #[derive(Clone, Debug, Default)]
 struct Epoll {
-    /// The file descriptors we are watching, and what we are watching for.
-    file_descriptors: FxHashMap<i32, EpollEvent>,
+    /// A map of EpollEventInterests registered under this epoll instance.
+    /// Each entry is differentiated using FdId and file descriptor value.
+    interest_list: BTreeMap<(FdId, i32), Rc<RefCell<EpollEventInterest>>>,
+    /// A map of EpollEventInstance that will be returned when `epoll_wait` is called.
+    /// Similar to interest_list, the entry is also differentiated using FdId
+    /// and file descriptor value.
+    // This is an Rc because EpollInterest need to hold a reference to update
+    // it.
+    ready_list: Rc<RefCell<BTreeMap<(FdId, i32), EpollEventInstance>>>,
 }
 
-/// Epoll Events associate events with data.
-/// These fields are currently unused by miri.
-/// This matches the `epoll_event` struct defined
+/// EpollEventInstance contains information that will be returned by epoll_wait.
+#[derive(Debug)]
+pub struct EpollEventInstance {
+    /// Xor-ed event types that happened to the file description.
+    events: u32,
+    /// Original data retrieved from `epoll_event` during `epoll_ctl`.
+    data: u64,
+}
+
+impl EpollEventInstance {
+    pub fn new(events: u32, data: u64) -> EpollEventInstance {
+        EpollEventInstance { events, data }
+    }
+}
+/// EpollEventInterest registers the file description information to an epoll
+/// instance during a successful `epoll_ctl` call. It also stores additional
+/// information needed to check and update readiness state for `epoll_wait`.
+///
+/// `events` and `data` field matches the `epoll_event` struct defined
 /// by the epoll_ctl man page. For more information
 /// see the man page:
 ///
 /// <https://man7.org/linux/man-pages/man2/epoll_ctl.2.html>
 #[derive(Clone, Debug)]
-struct EpollEvent {
-    #[allow(dead_code)]
+pub struct EpollEventInterest {
+    /// The file descriptor value of the file description registered.
+    file_descriptor: i32,
+    /// The events bitmask retrieved from `epoll_event`.
     events: u32,
-    /// `Scalar` is used to represent the
-    /// `epoll_data` type union.
-    #[allow(dead_code)]
-    data: Scalar,
+    /// The data retrieved from `epoll_event`.
+    /// libc's data field in epoll_event can store integer or pointer,
+    /// but only u64 is supported for now.
+    /// <https://man7.org/linux/man-pages/man3/epoll_event.3type.html>
+    data: u64,
+    /// Ready list of the epoll instance under which this EpollEventInterest is registered.
+    ready_list: Rc<RefCell<BTreeMap<(FdId, i32), EpollEventInstance>>>,
+}
+
+/// EpollReadyEvents reflects the readiness of a file description.
+pub struct EpollReadyEvents {
+    /// The associated file is available for read(2) operations.
+    pub epollin: bool,
+    /// The associated file is available for write(2) operations.
+    pub epollout: bool,
+    /// Stream socket peer closed connection, or shut down writing
+    /// half of connection.
+    pub epollrdhup: bool,
+}
+
+impl EpollReadyEvents {
+    pub fn new() -> Self {
+        EpollReadyEvents { epollin: false, epollout: false, epollrdhup: false }
+    }
+
+    pub fn get_event_bitmask<'tcx>(&self, ecx: &MiriInterpCx<'tcx>) -> u32 {
+        let epollin = ecx.eval_libc_u32("EPOLLIN");
+        let epollout = ecx.eval_libc_u32("EPOLLOUT");
+        let epollrdhup = ecx.eval_libc_u32("EPOLLRDHUP");
+
+        let mut bitmask = 0;
+        if self.epollin {
+            bitmask |= epollin;
+        }
+        if self.epollout {
+            bitmask |= epollout;
+        }
+        if self.epollrdhup {
+            bitmask |= epollrdhup;
+        }
+        bitmask
+    }
+}
+
+impl Epoll {
+    fn get_ready_list(&self) -> Rc<RefCell<BTreeMap<(FdId, i32), EpollEventInstance>>> {
+        Rc::clone(&self.ready_list)
+    }
 }
 
 impl FileDescription for Epoll {
@@ -37,11 +108,51 @@ impl FileDescription for Epoll {
     fn close<'tcx>(
         self: Box<Self>,
         _communicate_allowed: bool,
+        _ecx: &mut MiriInterpCx<'tcx>,
     ) -> InterpResult<'tcx, io::Result<()>> {
         Ok(Ok(()))
     }
 }
 
+/// The table of all EpollEventInterest.
+/// The BTreeMap key is the FdId of an active file description registered with
+/// any epoll instance. The value is a list of EpollEventInterest associated
+/// with that file description.
+pub struct EpollInterestTable(BTreeMap<FdId, Vec<Weak<RefCell<EpollEventInterest>>>>);
+
+impl EpollInterestTable {
+    pub(crate) fn new() -> Self {
+        EpollInterestTable(BTreeMap::new())
+    }
+
+    pub fn insert_epoll_interest(&mut self, id: FdId, fd: Weak<RefCell<EpollEventInterest>>) {
+        match self.0.get_mut(&id) {
+            Some(fds) => {
+                fds.push(fd);
+            }
+            None => {
+                let vec = vec![fd];
+                self.0.insert(id, vec);
+            }
+        }
+    }
+
+    pub fn get_epoll_interest(&self, id: FdId) -> Option<&Vec<Weak<RefCell<EpollEventInterest>>>> {
+        self.0.get(&id)
+    }
+
+    pub fn get_epoll_interest_mut(
+        &mut self,
+        id: FdId,
+    ) -> Option<&mut Vec<Weak<RefCell<EpollEventInterest>>>> {
+        self.0.get_mut(&id)
+    }
+
+    pub fn remove(&mut self, id: FdId) {
+        self.0.remove(&id);
+    }
+}
+
 impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
 pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
     /// This function returns a file descriptor referring to the new `Epoll` instance. This file
@@ -64,6 +175,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             );
         }
 
+        let mut epoll_instance = Epoll::default();
+        epoll_instance.ready_list = Rc::new(RefCell::new(BTreeMap::new()));
+
         let fd = this.machine.fds.insert_new(Epoll::default());
         Ok(Scalar::from_i32(fd))
     }
@@ -90,48 +204,143 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
     ) -> InterpResult<'tcx, Scalar> {
         let this = self.eval_context_mut();
 
-        let epfd = this.read_scalar(epfd)?.to_i32()?;
+        let epfd_value = this.read_scalar(epfd)?.to_i32()?;
         let op = this.read_scalar(op)?.to_i32()?;
         let fd = this.read_scalar(fd)?.to_i32()?;
-        let _event = this.read_scalar(event)?.to_pointer(this)?;
+        let event = this.deref_pointer_as(event, this.libc_ty_layout("epoll_event"))?;
 
         let epoll_ctl_add = this.eval_libc_i32("EPOLL_CTL_ADD");
         let epoll_ctl_mod = this.eval_libc_i32("EPOLL_CTL_MOD");
         let epoll_ctl_del = this.eval_libc_i32("EPOLL_CTL_DEL");
+        let epollin = this.eval_libc_u32("EPOLLIN");
+        let epollout = this.eval_libc_u32("EPOLLOUT");
+        let epollrdhup = this.eval_libc_u32("EPOLLRDHUP");
+        let epollet = this.eval_libc_u32("EPOLLET");
+
+        // Fail on unsupported operations.
+        if op & epoll_ctl_add != epoll_ctl_add
+            && op & epoll_ctl_mod != epoll_ctl_mod
+            && op & epoll_ctl_del != epoll_ctl_del
+        {
+            throw_unsup_format!("epoll_ctl: encountered unknown unsupported operation {:#x}", op);
+        }
+
+        // Check if epfd is a valid epoll file descriptor.
+        let Some(epfd) = this.machine.fds.get_ref(epfd_value) else {
+            return Ok(Scalar::from_i32(this.fd_not_found()?));
+        };
+        let mut binding = epfd.borrow_mut();
+        let epoll_file_description = &mut binding
+            .downcast_mut::<Epoll>()
+            .ok_or_else(|| err_unsup_format!("non-epoll FD passed to `epoll_ctl`"))?;
+
+        let interest_list = &mut epoll_file_description.interest_list;
+        let ready_list = &epoll_file_description.ready_list;
+
+        let Some(file_descriptor) = this.machine.fds.get_ref(fd) else {
+            return Ok(Scalar::from_i32(this.fd_not_found()?));
+        };
+        let id = file_descriptor.get_id();
 
         if op == epoll_ctl_add || op == epoll_ctl_mod {
-            let event = this.deref_pointer_as(event, this.libc_ty_layout("epoll_event"))?;
+            // Read event bitmask and data from epoll_event passed by caller.
+            let events = this.read_scalar(&this.project_field(&event, 0)?)?.to_u32()?;
+            let data = this.read_scalar(&this.project_field(&event, 1)?)?.to_u64()?;
 
-            let events = this.project_field(&event, 0)?;
-            let events = this.read_scalar(&events)?.to_u32()?;
-            let data = this.project_field(&event, 1)?;
-            let data = this.read_scalar(&data)?;
-            let event = EpollEvent { events, data };
+            // Unset the flag we support to discover if any unsupported flags are used.
+            let mut flags = events;
+            if events & epollet != epollet {
+                // We only support edge-triggered notification for now.
+                throw_unsup_format!("epoll_ctl: epollet flag must be included.");
+            } else {
+                flags &= !epollet;
+            }
+            if flags & epollin == epollin {
+                flags &= !epollin;
+            }
+            if flags & epollout == epollout {
+                flags &= !epollout;
+            }
+            if flags & epollrdhup == epollrdhup {
+                flags &= !epollrdhup;
+            }
+            if flags != 0 {
+                throw_unsup_format!(
+                    "epoll_ctl: encountered unknown unsupported flags {:#x}",
+                    flags
+                );
+            }
 
-            let Some(mut epfd) = this.machine.fds.get_mut(epfd) else {
-                return Ok(Scalar::from_i32(this.fd_not_found()?));
-            };
-            let epfd = epfd
-                .downcast_mut::<Epoll>()
-                .ok_or_else(|| err_unsup_format!("non-epoll FD passed to `epoll_ctl`"))?;
+            let epoll_key = (id, fd);
+
+            // Check the existence of fd in the interest list.
+            if op == epoll_ctl_add {
+                if interest_list.contains_key(&epoll_key) {
+                    let eexist = this.eval_libc("EEXIST");
+                    this.set_last_error(eexist)?;
+                    return Ok(Scalar::from_i32(-1));
+                }
+            } else {
+                if !interest_list.contains_key(&epoll_key) {
+                    let enoent = this.eval_libc("ENOENT");
+                    this.set_last_error(enoent)?;
+                    return Ok(Scalar::from_i32(-1));
+                }
+            }
+
+            let id = file_descriptor.get_id();
+            // Create an epoll_interest.
+            let interest = Rc::new(RefCell::new(EpollEventInterest {
+                file_descriptor: fd,
+                events,
+                data,
+                ready_list: Rc::clone(ready_list),
+            }));
+
+            if op == epoll_ctl_add {
+                // Insert an epoll_interest to global epoll_interest list.
+                this.machine.epoll_interests.insert_epoll_interest(id, Rc::downgrade(&interest));
+                interest_list.insert(epoll_key, interest);
+            } else {
+                // Directly modify the epoll_interest so the global epoll_event_interest table
+                // will be updated too.
+                let mut epoll_interest = interest_list.get_mut(&epoll_key).unwrap().borrow_mut();
+                epoll_interest.events = events;
+                epoll_interest.data = data;
+            }
+
+            // Readiness will be updated immediately when the epoll_event_interest is added or modified.
+            file_descriptor.check_and_update_readiness(this)?;
 
-            epfd.file_descriptors.insert(fd, event);
-            Ok(Scalar::from_i32(0))
+            return Ok(Scalar::from_i32(0));
         } else if op == epoll_ctl_del {
-            let Some(mut epfd) = this.machine.fds.get_mut(epfd) else {
-                return Ok(Scalar::from_i32(this.fd_not_found()?));
+            let epoll_key = (id, fd);
+
+            // Remove epoll_event_interest from interest_list.
+            let Some(epoll_interest) = interest_list.remove(&epoll_key) else {
+                let enoent = this.eval_libc("ENOENT");
+                this.set_last_error(enoent)?;
+                return Ok(Scalar::from_i32(-1));
             };
-            let epfd = epfd
-                .downcast_mut::<Epoll>()
-                .ok_or_else(|| err_unsup_format!("non-epoll FD passed to `epoll_ctl`"))?;
+            // All related Weak<EpollEventInterest> will fail to upgrade after the drop.
+            drop(epoll_interest);
 
-            epfd.file_descriptors.remove(&fd);
-            Ok(Scalar::from_i32(0))
-        } else {
-            let einval = this.eval_libc("EINVAL");
-            this.set_last_error(einval)?;
-            Ok(Scalar::from_i32(-1))
+            // Remove related epoll_interest from ready list.
+            ready_list.borrow_mut().remove(&epoll_key);
+
+            // Remove dangling EpollEventInterest from its global table.
+            // .unwrap() below should succeed because the file description id must have registered
+            // at least one epoll_interest, if not, it will fail when removing epoll_interest from
+            // interest list.
+            this.machine
+                .epoll_interests
+                .get_epoll_interest_mut(id)
+                .unwrap()
+                .retain(|event| event.upgrade().is_some());
+
+            return Ok(Scalar::from_i32(0));
         }
+        Ok(Scalar::from_i32(-1))
     }
 
     /// The `epoll_wait()` system call waits for events on the `Epoll`
@@ -166,25 +375,102 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
     fn epoll_wait(
         &mut self,
         epfd: &OpTy<'tcx>,
-        events: &OpTy<'tcx>,
+        events_op: &OpTy<'tcx>,
         maxevents: &OpTy<'tcx>,
         timeout: &OpTy<'tcx>,
     ) -> InterpResult<'tcx, Scalar> {
         let this = self.eval_context_mut();
 
         let epfd = this.read_scalar(epfd)?.to_i32()?;
-        let _events = this.read_scalar(events)?.to_pointer(this)?;
-        let _maxevents = this.read_scalar(maxevents)?.to_i32()?;
-        let _timeout = this.read_scalar(timeout)?.to_i32()?;
+        let maxevents = this.read_scalar(maxevents)?.to_i32()?;
+        let event = this.deref_pointer_as(
+            events_op,
+            this.libc_array_ty_layout("epoll_event", maxevents.try_into().unwrap()),
+        )?;
+        let timeout = this.read_scalar(timeout)?.to_i32()?;
 
-        let Some(mut epfd) = this.machine.fds.get_mut(epfd) else {
+        if epfd <= 0 {
+            let einval = this.eval_libc("EINVAL");
+            this.set_last_error(einval)?;
+            return Ok(Scalar::from_i32(-1));
+        }
+        // FIXME: Implement blocking support
+        if timeout != 0 {
+            throw_unsup_format!("epoll_wait: timeout value can only be 0");
+        }
+
+        let Some(epfd) = this.machine.fds.get_ref(epfd) else {
             return Ok(Scalar::from_i32(this.fd_not_found()?));
         };
-        let _epfd = epfd
+        let mut binding = epfd.borrow_mut();
+        let epoll_file_description = &mut binding
             .downcast_mut::<Epoll>()
             .ok_or_else(|| err_unsup_format!("non-epoll FD passed to `epoll_wait`"))?;
 
-        // FIXME return number of events ready when scheme for marking events ready exists
-        throw_unsup_format!("returning ready events from epoll_wait is not yet implemented");
+        let binding = epoll_file_description.get_ready_list();
+        let mut ready_list = binding.borrow_mut();
+        let mut num_of_events: i32 = 0;
+        let mut array_iter = this.project_array_fields(&event)?;
+
+        while let Some((epoll_key, epoll_return)) = ready_list.pop_first() {
+            // If the file description is fully close, the entry for corresponding FdID in the
+            // global epoll event interest table would be empty.
+            if this.machine.epoll_interests.get_epoll_interest(epoll_key.0).is_some() {
+                // Return notification to the caller if the file description is not fully closed.
+                if let Some(des) = array_iter.next(this)? {
+                    this.write_int_fields_named(
+                        &[
+                            ("events", epoll_return.events.into()),
+                            ("u64", epoll_return.data.into()),
+                        ],
+                        &des.1,
+                    )?;
+                    num_of_events = num_of_events.checked_add(1).unwrap();
+                } else {
+                    break;
+                }
+            }
+        }
+        Ok(Scalar::from_i32(num_of_events))
+    }
+
+    /// For a specific unique file descriptor id, get its ready events and update
+    /// the corresponding ready list. This function is called whenever a file description
+    /// is registered with epoll, or when read, write, or close operations are performed,
+    /// regardless of any changes in readiness.
+    ///
+    /// This is an internal helper function and is typically not meant to be used directly.
+    /// In most cases, `FileDescriptionRef::check_and_update_readiness` should be preferred.
+    fn check_and_update_readiness(
+        &self,
+        id: FdId,
+        get_ready_events: impl FnOnce() -> InterpResult<'tcx, EpollReadyEvents>,
+    ) -> InterpResult<'tcx, ()> {
+        let this = self.eval_context_ref();
+        // Get a list of EpollEventInterest that is associated to a specific file description.
+        if let Some(epoll_interests) = this.machine.epoll_interests.get_epoll_interest(id) {
+            let epoll_ready_events = get_ready_events()?;
+            // Get the bitmask of ready events.
+            let ready_events = epoll_ready_events.get_event_bitmask(this);
+
+            for weak_epoll_interest in epoll_interests {
+                if let Some(epoll_interest) = weak_epoll_interest.upgrade() {
+                    // This checks if any of the events specified in epoll_event_interest.events
+                    // match those in ready_events.
+                    let epoll_event_interest = epoll_interest.borrow();
+                    let flags = epoll_event_interest.events & ready_events;
+                    // If there is any event that we are interested in being specified as ready,
+                    // insert an epoll_return to the ready list.
+                    if flags != 0 {
+                        let epoll_key = (id, epoll_event_interest.file_descriptor);
+                        let ready_list = &mut epoll_event_interest.ready_list.borrow_mut();
+                        let event_instance =
+                            EpollEventInstance::new(flags, epoll_event_interest.data);
+                        ready_list.insert(epoll_key, event_instance);
+                    }
+                }
+            }
+        }
+        Ok(())
     }
 }
diff --git a/src/shims/unix/linux/eventfd.rs b/src/shims/unix/linux/eventfd.rs
index 4ab8760d93..8a11f225b2 100644
--- a/src/shims/unix/linux/eventfd.rs
+++ b/src/shims/unix/linux/eventfd.rs
@@ -3,8 +3,10 @@ use std::io;
 use std::io::{Error, ErrorKind};
 use std::mem;
 
+use fd::FdId;
 use rustc_target::abi::Endian;
 
+use crate::shims::unix::linux::epoll::EpollReadyEvents;
 use crate::shims::unix::*;
 use crate::{concurrency::VClock, *};
 
@@ -35,9 +37,21 @@ impl FileDescription for Event {
         "event"
     }
 
+    fn get_epoll_ready_events<'tcx>(&self) -> InterpResult<'tcx, EpollReadyEvents> {
+        // We only check the status of EPOLLIN and EPOLLOUT flags for eventfd. If other event flags
+        // need to be supported in the future, the check should be added here.
+
+        Ok(EpollReadyEvents {
+            epollin: self.counter != 0,
+            epollout: self.counter != MAX_COUNTER,
+            ..EpollReadyEvents::new()
+        })
+    }
+
     fn close<'tcx>(
         self: Box<Self>,
         _communicate_allowed: bool,
+        _ecx: &mut MiriInterpCx<'tcx>,
     ) -> InterpResult<'tcx, io::Result<()>> {
         Ok(Ok(()))
     }
@@ -46,6 +60,7 @@ impl FileDescription for Event {
     fn read<'tcx>(
         &mut self,
         _communicate_allowed: bool,
+        fd_id: FdId,
         bytes: &mut [u8],
         ecx: &mut MiriInterpCx<'tcx>,
     ) -> InterpResult<'tcx, io::Result<usize>> {
@@ -70,6 +85,18 @@ impl FileDescription for Event {
                 Endian::Big => self.counter.to_be_bytes(),
             };
             self.counter = 0;
+            // When any of the event happened, we check and update the status of all supported event
+            // types for current file description.
+
+            // We have to use our own FdID in contrast to every other file descriptor out there, because
+            // we are updating ourselves when writing and reading. Technically `Event` is like socketpair, but
+            // it does not create two separate file descriptors. Thus we can't re-borrow ourselves via
+            // `FileDescriptionRef::check_and_update_readiness` while already being mutably borrowed for read/write.
+            crate::shims::unix::linux::epoll::EvalContextExt::check_and_update_readiness(
+                ecx,
+                fd_id,
+                || self.get_epoll_ready_events(),
+            )?;
             return Ok(Ok(U64_ARRAY_SIZE));
         }
     }
@@ -89,6 +116,7 @@ impl FileDescription for Event {
     fn write<'tcx>(
         &mut self,
         _communicate_allowed: bool,
+        fd_id: FdId,
         bytes: &[u8],
         ecx: &mut MiriInterpCx<'tcx>,
     ) -> InterpResult<'tcx, io::Result<usize>> {
@@ -124,6 +152,17 @@ impl FileDescription for Event {
                 }
             }
         };
+        // When any of the event happened, we check and update the status of all supported event
+        // types for current file description.
+
+        // Just like read() above, we use this internal method to not get the second borrow of the
+        // RefCell of this FileDescription. This is a special case, we should only use
+        // FileDescriptionRef::check_and_update_readiness in normal case.
+        crate::shims::unix::linux::epoll::EvalContextExt::check_and_update_readiness(
+            ecx,
+            fd_id,
+            || self.get_epoll_ready_events(),
+        )?;
         Ok(Ok(U64_ARRAY_SIZE))
     }
 }
@@ -178,11 +217,11 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             throw_unsup_format!("eventfd: encountered unknown unsupported flags {:#x}", flags);
         }
 
-        let fd = this.machine.fds.insert_new(Event {
-            counter: val.into(),
-            is_nonblock,
-            clock: VClock::default(),
-        });
-        Ok(Scalar::from_i32(fd))
+        let fds = &mut this.machine.fds;
+
+        let fd_value =
+            fds.insert_new(Event { counter: val.into(), is_nonblock, clock: VClock::default() });
+
+        Ok(Scalar::from_i32(fd_value))
     }
 }
diff --git a/src/shims/unix/mod.rs b/src/shims/unix/mod.rs
index dc9068fddd..8cfa659d90 100644
--- a/src/shims/unix/mod.rs
+++ b/src/shims/unix/mod.rs
@@ -17,6 +17,7 @@ mod solarish;
 pub use env::UnixEnvVars;
 pub use fd::{FdTable, FileDescription};
 pub use fs::DirTable;
+pub use linux::epoll::EpollInterestTable;
 // All the Unix-specific extension traits
 pub use env::EvalContextExt as _;
 pub use fd::EvalContextExt as _;
diff --git a/src/shims/unix/socket.rs b/src/shims/unix/socket.rs
index 455820a9e6..0f40d9776b 100644
--- a/src/shims/unix/socket.rs
+++ b/src/shims/unix/socket.rs
@@ -4,6 +4,8 @@ use std::io;
 use std::io::{Error, ErrorKind, Read};
 use std::rc::{Rc, Weak};
 
+use crate::shims::unix::fd::{FdId, WeakFileDescriptionRef};
+use crate::shims::unix::linux::epoll::EpollReadyEvents;
 use crate::shims::unix::*;
 use crate::{concurrency::VClock, *};
 
@@ -19,6 +21,11 @@ struct SocketPair {
     // gone, and trigger EPIPE as appropriate.
     writebuf: Weak<RefCell<Buffer>>,
     readbuf: Rc<RefCell<Buffer>>,
+    /// When a socketpair instance is created, two socketpair file descriptions are generated.
+    /// The peer_fd field holds a weak reference to the file description of peer socketpair.
+    // TODO: It might be possible to retrieve writebuf from peer_fd and remove the writebuf
+    // field above.
+    peer_fd: WeakFileDescriptionRef,
     is_nonblock: bool,
 }
 
@@ -37,21 +44,62 @@ impl FileDescription for SocketPair {
         "socketpair"
     }
 
+    fn get_epoll_ready_events<'tcx>(&self) -> InterpResult<'tcx, EpollReadyEvents> {
+        // We only check the status of EPOLLIN, EPOLLOUT and EPOLLRDHUP flags. If other event flags
+        // need to be supported in the future, the check should be added here.
+
+        let mut epoll_ready_events = EpollReadyEvents::new();
+        let readbuf = self.readbuf.borrow();
+
+        // Check if it is readable.
+        if !readbuf.buf.is_empty() {
+            epoll_ready_events.epollin = true;
+        }
+
+        // Check if is writable.
+        if let Some(writebuf) = self.writebuf.upgrade() {
+            let writebuf = writebuf.borrow();
+            let data_size = writebuf.buf.len();
+            let available_space = MAX_SOCKETPAIR_BUFFER_CAPACITY.strict_sub(data_size);
+            if available_space != 0 {
+                epoll_ready_events.epollout = true;
+            }
+        }
+
+        // Check if the peer_fd closed
+        if self.peer_fd.upgrade().is_none() {
+            epoll_ready_events.epollrdhup = true;
+            // This is an edge case. Whenever epollrdhup is triggered, epollin will be added
+            // even though there is no data in the buffer.
+            epoll_ready_events.epollin = true;
+        }
+        Ok(epoll_ready_events)
+    }
+
     fn close<'tcx>(
         self: Box<Self>,
         _communicate_allowed: bool,
+        ecx: &mut MiriInterpCx<'tcx>,
     ) -> InterpResult<'tcx, io::Result<()>> {
         // This is used to signal socketfd of other side that there is no writer to its readbuf.
         // If the upgrade fails, there is no need to update as all read ends have been dropped.
         if let Some(writebuf) = self.writebuf.upgrade() {
             writebuf.borrow_mut().buf_has_writer = false;
         };
+
+        // Notify peer fd that closed has happened.
+        if let Some(peer_fd) = self.peer_fd.upgrade() {
+            // When any of the event happened, we check and update the status of all supported events
+            // types of peer fd.
+            peer_fd.check_and_update_readiness(ecx)?;
+        }
         Ok(Ok(()))
     }
 
     fn read<'tcx>(
         &mut self,
         _communicate_allowed: bool,
+        _fd_id: FdId,
         bytes: &mut [u8],
         ecx: &mut MiriInterpCx<'tcx>,
     ) -> InterpResult<'tcx, io::Result<usize>> {
@@ -88,15 +136,33 @@ impl FileDescription for SocketPair {
         // FIXME: this over-synchronizes; a more precise approach would be to
         // only sync with the writes whose data we will read.
         ecx.acquire_clock(&readbuf.clock);
+
         // Do full read / partial read based on the space available.
         // Conveniently, `read` exists on `VecDeque` and has exactly the desired behavior.
         let actual_read_size = readbuf.buf.read(bytes).unwrap();
+
+        // The readbuf needs to be explicitly dropped because it will cause panic when
+        // check_and_update_readiness borrows it again.
+        drop(readbuf);
+
+        // A notification should be provided for the peer file description even when it can
+        // only write 1 byte. This implementation is not compliant with the actual Linux kernel
+        // implementation. For optimization reasons, the kernel will only mark the file description
+        // as "writable" when it can write more than a certain number of bytes. Since we
+        // don't know what that *certain number* is, we will provide a notification every time
+        // a read is successful. This might result in our epoll emulation providing more
+        // notifications than the real system.
+        if let Some(peer_fd) = self.peer_fd.upgrade() {
+            peer_fd.check_and_update_readiness(ecx)?;
+        }
+
         return Ok(Ok(actual_read_size));
     }
 
     fn write<'tcx>(
         &mut self,
         _communicate_allowed: bool,
+        _fd_id: FdId,
         bytes: &[u8],
         ecx: &mut MiriInterpCx<'tcx>,
     ) -> InterpResult<'tcx, io::Result<usize>> {
@@ -131,6 +197,14 @@ impl FileDescription for SocketPair {
         // Do full write / partial write based on the space available.
         let actual_write_size = write_size.min(available_space);
         writebuf.buf.extend(&bytes[..actual_write_size]);
+
+        // The writebuf needs to be explicitly dropped because it will cause panic when
+        // check_and_update_readiness borrows it again.
+        drop(writebuf);
+        // Notification should be provided for peer fd as it became readable.
+        if let Some(peer_fd) = self.peer_fd.upgrade() {
+            peer_fd.check_and_update_readiness(ecx)?;
+        }
         return Ok(Ok(actual_write_size));
     }
 }
@@ -209,18 +283,33 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let socketpair_0 = SocketPair {
             writebuf: Rc::downgrade(&buffer1),
             readbuf: Rc::clone(&buffer2),
+            peer_fd: WeakFileDescriptionRef::default(),
             is_nonblock: is_sock_nonblock,
         };
-
         let socketpair_1 = SocketPair {
             writebuf: Rc::downgrade(&buffer2),
             readbuf: Rc::clone(&buffer1),
+            peer_fd: WeakFileDescriptionRef::default(),
             is_nonblock: is_sock_nonblock,
         };
 
+        // Insert the file description to the fd table.
         let fds = &mut this.machine.fds;
         let sv0 = fds.insert_new(socketpair_0);
         let sv1 = fds.insert_new(socketpair_1);
+
+        // Get weak file descriptor and file description id value.
+        let fd_ref0 = fds.get_ref(sv0).unwrap();
+        let fd_ref1 = fds.get_ref(sv1).unwrap();
+        let weak_fd_ref0 = fd_ref0.downgrade();
+        let weak_fd_ref1 = fd_ref1.downgrade();
+
+        // Update peer_fd and id field.
+        fd_ref1.borrow_mut().downcast_mut::<SocketPair>().unwrap().peer_fd = weak_fd_ref0;
+
+        fd_ref0.borrow_mut().downcast_mut::<SocketPair>().unwrap().peer_fd = weak_fd_ref1;
+
+        // Return socketpair file description value to the caller.
         let sv0 = Scalar::from_int(sv0, sv.layout.size);
         let sv1 = Scalar::from_int(sv1, sv.layout.size);
 
diff --git a/tests/fail-dep/tokio/sleep.rs b/tests/fail-dep/tokio/sleep.rs
index d96d778e6c..0fa5080d48 100644
--- a/tests/fail-dep/tokio/sleep.rs
+++ b/tests/fail-dep/tokio/sleep.rs
@@ -1,6 +1,6 @@
 //@compile-flags: -Zmiri-permissive-provenance -Zmiri-backtrace=full
 //@only-target-x86_64-unknown-linux: support for tokio only on linux and x86
-//@error-in-other-file: returning ready events from epoll_wait is not yet implemented
+//@error-in-other-file: timeout value can only be 0
 //@normalize-stderr-test: " += note:.*\n" -> ""
 
 use tokio::time::{sleep, Duration, Instant};
diff --git a/tests/fail-dep/tokio/sleep.stderr b/tests/fail-dep/tokio/sleep.stderr
index 6d19faab90..d5bf00fc17 100644
--- a/tests/fail-dep/tokio/sleep.stderr
+++ b/tests/fail-dep/tokio/sleep.stderr
@@ -1,4 +1,4 @@
-error: unsupported operation: returning ready events from epoll_wait is not yet implemented
+error: unsupported operation: epoll_wait: timeout value can only be 0
   --> CARGO_REGISTRY/.../epoll.rs:LL:CC
    |
 LL | /         syscall!(epoll_wait(
@@ -7,7 +7,7 @@ LL | |             events.as_mut_ptr(),
 LL | |             events.capacity() as i32,
 LL | |             timeout,
 LL | |         ))
-   | |__________^ returning ready events from epoll_wait is not yet implemented
+   | |__________^ epoll_wait: timeout value can only be 0
    |
    = help: this is likely not a bug in the program; it indicates that the program performed an operation that Miri does not support
 
diff --git a/tests/pass-dep/libc/libc-epoll.rs b/tests/pass-dep/libc/libc-epoll.rs
new file mode 100644
index 0000000000..11a0257dc4
--- /dev/null
+++ b/tests/pass-dep/libc/libc-epoll.rs
@@ -0,0 +1,552 @@
+//@only-target-linux
+
+#![feature(exposed_provenance)] // Needed for fn test_pointer()
+use std::convert::TryInto;
+use std::mem::MaybeUninit;
+
+fn main() {
+    test_event_overwrite();
+    test_not_fully_closed_fd();
+    test_closed_fd();
+    test_epoll_socketpair_special_case();
+    test_two_epoll_instance();
+    test_epoll_ctl_mod();
+    test_epoll_socketpair();
+    test_epoll_eventfd();
+    test_epoll_ctl_del();
+    test_pointer();
+    test_two_same_fd_in_same_epoll_instance();
+    test_socketpair_read();
+}
+
+fn check_epoll_wait<const N: usize>(
+    epfd: i32,
+    mut expected_notifications: Vec<(u32, u64)>,
+) -> bool {
+    let epoll_event = libc::epoll_event { events: 0, u64: 0 };
+    let mut array: [libc::epoll_event; N] = [epoll_event; N];
+    let maxsize = N;
+    let array_ptr = array.as_mut_ptr();
+    let res = unsafe { libc::epoll_wait(epfd, array_ptr, maxsize.try_into().unwrap(), 0) };
+    assert_eq!(res, expected_notifications.len().try_into().unwrap());
+    let slice = unsafe { std::slice::from_raw_parts(array_ptr, res.try_into().unwrap()) };
+    let mut return_events = slice.iter();
+    while let Some(return_event) = return_events.next() {
+        if let Some(notification) = expected_notifications.pop() {
+            let event = return_event.events;
+            let data = return_event.u64;
+            assert_eq!(event, notification.0);
+            assert_eq!(data, notification.1);
+        } else {
+            return false;
+        }
+    }
+    if !expected_notifications.is_empty() {
+        return false;
+    }
+    return true;
+}
+
+fn test_epoll_socketpair() {
+    // Create an epoll instance.
+    let epfd = unsafe { libc::epoll_create1(0) };
+    assert_ne!(epfd, -1);
+
+    // Create a socketpair instance.
+    let mut fds = [-1, -1];
+    let mut res =
+        unsafe { libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, fds.as_mut_ptr()) };
+    assert_eq!(res, 0);
+
+    // Write to fd[0]
+    let data = "abcde".as_bytes().as_ptr();
+    res = unsafe { libc::write(fds[0], data as *const libc::c_void, 5).try_into().unwrap() };
+    assert_eq!(res, 5);
+
+    // Register fd[1] with EPOLLIN|EPOLLOUT|EPOLLET
+    // EPOLLET is negative number for i32 so casting is needed to do proper bitwise OR for u32.
+    let epollet = libc::EPOLLET as u32;
+    let flags = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT | libc::EPOLLRDHUP).unwrap() | epollet;
+    let mut ev = libc::epoll_event {
+        events: u32::try_from(flags).unwrap(),
+        u64: u64::try_from(fds[1]).unwrap(),
+    };
+    let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[1], &mut ev) };
+    assert_ne!(res, -1);
+
+    // Check result from epoll_wait.
+    let expected_event = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap();
+    let expected_value = u64::try_from(fds[1]).unwrap();
+    assert!(check_epoll_wait::<8>(epfd, vec![(expected_event, expected_value)]));
+
+    // Close the peer socketpair.
+    let res = unsafe { libc::close(fds[0]) };
+    assert_eq!(res, 0);
+
+    // Check result from epoll_wait.
+    let expected_event = u32::try_from(libc::EPOLLRDHUP | libc::EPOLLIN | libc::EPOLLOUT).unwrap();
+    let expected_value = u64::try_from(fds[1]).unwrap();
+    assert!(check_epoll_wait::<8>(epfd, vec![(expected_event, expected_value)]));
+}
+
+fn test_epoll_ctl_mod() {
+    // Create an epoll instance.
+    let epfd = unsafe { libc::epoll_create1(0) };
+    assert_ne!(epfd, -1);
+
+    // Create a socketpair instance.
+    let mut fds = [-1, -1];
+    let mut res =
+        unsafe { libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, fds.as_mut_ptr()) };
+    assert_eq!(res, 0);
+
+    // Write to fd[0].
+    let data = "abcde".as_bytes().as_ptr();
+    res = unsafe { libc::write(fds[0], data as *const libc::c_void, 5).try_into().unwrap() };
+    assert_eq!(res, 5);
+
+    // Register fd[1] with EPOLLIN|EPOLLOUT|EPOLLET.
+    // EPOLLET is negative number for i32 so casting is needed to do proper bitwise OR for u32.
+    let epollet = libc::EPOLLET as u32;
+    let mut flags = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap() | epollet;
+    let mut ev = libc::epoll_event {
+        events: u32::try_from(flags).unwrap(),
+        u64: u64::try_from(fds[1]).unwrap(),
+    };
+    let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[1], &mut ev) };
+    assert_ne!(res, -1);
+
+    // Check result from epoll_wait.
+    let expected_event = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap();
+    let expected_value = u64::try_from(fds[1]).unwrap();
+    assert!(check_epoll_wait::<8>(epfd, vec![(expected_event, expected_value)]));
+
+    // Test EPOLLRDHUP.
+    flags |= u32::try_from(libc::EPOLLRDHUP).unwrap();
+    let mut ev = libc::epoll_event {
+        events: u32::try_from(flags).unwrap(),
+        u64: u64::try_from(fds[1]).unwrap(),
+    };
+    let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_MOD, fds[1], &mut ev) };
+    assert_ne!(res, -1);
+
+    // Close the other side of the socketpair to invoke EPOLLRDHUP.
+    let res = unsafe { libc::close(fds[0]) };
+    assert_eq!(res, 0);
+
+    // Check result from epoll_wait.
+    let expected_event = u32::try_from(libc::EPOLLRDHUP | libc::EPOLLIN | libc::EPOLLOUT).unwrap();
+    let expected_value = u64::try_from(fds[1]).unwrap();
+    assert!(check_epoll_wait::<8>(epfd, vec![(expected_event, expected_value)]));
+}
+
+fn test_epoll_ctl_del() {
+    // Create an epoll instance.
+    let epfd = unsafe { libc::epoll_create1(0) };
+    assert_ne!(epfd, -1);
+
+    // Create a socketpair instance.
+    let mut fds = [-1, -1];
+    let mut res =
+        unsafe { libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, fds.as_mut_ptr()) };
+    assert_eq!(res, 0);
+
+    // Write to fd[0]
+    let data = "abcde".as_bytes().as_ptr();
+    res = unsafe { libc::write(fds[0], data as *const libc::c_void, 5).try_into().unwrap() };
+    assert_eq!(res, 5);
+
+    // Register fd[1] with EPOLLIN|EPOLLOUT|EPOLLET
+    // EPOLLET is negative number for i32 so casting is needed to do proper bitwise OR for u32.
+    let epollet = libc::EPOLLET as u32;
+    let flags = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap() | epollet;
+    let mut ev = libc::epoll_event {
+        events: u32::try_from(flags).unwrap(),
+        u64: u64::try_from(fds[1]).unwrap(),
+    };
+    let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[1], &mut ev) };
+    assert_ne!(res, -1);
+
+    // Test EPOLL_CTL_DEL.
+    assert!(check_epoll_wait::<0>(epfd, vec![]));
+}
+
+// This test is for one fd registered under two different epoll instance.
+fn test_two_epoll_instance() {
+    // Create two epoll instance.
+    let epfd1 = unsafe { libc::epoll_create1(0) };
+    assert_ne!(epfd1, -1);
+    let epfd2 = unsafe { libc::epoll_create1(0) };
+    assert_ne!(epfd2, -1);
+
+    // Create a socketpair instance.
+    let mut fds = [-1, -1];
+    let mut res =
+        unsafe { libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, fds.as_mut_ptr()) };
+    assert_eq!(res, 0);
+
+    // Write to the socketpair.
+    let data = "abcde".as_bytes().as_ptr();
+    res = unsafe { libc::write(fds[0], data as *const libc::c_void, 5).try_into().unwrap() };
+    assert_eq!(res, 5);
+
+    // Register one side of the socketpair with EPOLLIN | EPOLLOUT | EPOLLET.
+    let epollet = libc::EPOLLET as u32;
+    let flags = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap() | epollet;
+    let mut ev = libc::epoll_event {
+        events: u32::try_from(flags).unwrap(),
+        u64: u64::try_from(fds[1]).unwrap(),
+    };
+    let res = unsafe { libc::epoll_ctl(epfd1, libc::EPOLL_CTL_ADD, fds[1], &mut ev) };
+    assert_ne!(res, -1);
+    let res = unsafe { libc::epoll_ctl(epfd2, libc::EPOLL_CTL_ADD, fds[1], &mut ev) };
+    assert_ne!(res, -1);
+
+    // Notification should be received from both instance of epoll.
+    let expected_event = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap();
+    let expected_value = u64::try_from(fds[1]).unwrap();
+    assert!(check_epoll_wait::<8>(epfd1, vec![(expected_event, expected_value)]));
+    assert!(check_epoll_wait::<8>(epfd2, vec![(expected_event, expected_value)]));
+}
+
+// This test is for two same file description registered under the same epoll instance through dup.
+// Notification should be provided for both.
+fn test_two_same_fd_in_same_epoll_instance() {
+    // Create an epoll instance.
+    let epfd = unsafe { libc::epoll_create1(0) };
+    assert_ne!(epfd, -1);
+
+    // Create a socketpair instance.
+    let mut fds = [-1, -1];
+    let res = unsafe { libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, fds.as_mut_ptr()) };
+    assert_eq!(res, 0);
+
+    // Dup the fd.
+    let newfd = unsafe { libc::dup(fds[1]) };
+    assert_ne!(newfd, -1);
+
+    // Register both fd to the same epoll instance.
+    let epollet = libc::EPOLLET as u32;
+    let flags = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap() | epollet;
+    let mut ev = libc::epoll_event { events: u32::try_from(flags).unwrap(), u64: 5 as u64 };
+    let mut res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[1], &mut ev) };
+    assert_ne!(res, -1);
+    res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, newfd, &mut ev) };
+    assert_ne!(res, -1);
+
+    // Write to the socketpair.
+    let data = "abcde".as_bytes().as_ptr();
+    res = unsafe { libc::write(fds[0], data as *const libc::c_void, 5).try_into().unwrap() };
+    assert_eq!(res, 5);
+
+    //Two notification should be received.
+    let expected_event = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap();
+    let expected_value = 5 as u64;
+    assert!(check_epoll_wait::<8>(
+        epfd,
+        vec![(expected_event, expected_value), (expected_event, expected_value)]
+    ));
+}
+
+fn test_epoll_eventfd() {
+    // Create an eventfd instance.
+    let flags = libc::EFD_NONBLOCK | libc::EFD_CLOEXEC;
+    let fd = unsafe { libc::eventfd(0, flags) };
+
+    // Write to the eventfd instance.
+    let sized_8_data: [u8; 8] = 1_u64.to_ne_bytes();
+    let res: i32 = unsafe {
+        libc::write(fd, sized_8_data.as_ptr() as *const libc::c_void, 8).try_into().unwrap()
+    };
+    assert_eq!(res, 8);
+
+    // Create an epoll instance.
+    let epfd = unsafe { libc::epoll_create1(0) };
+    assert_ne!(epfd, -1);
+
+    // Register eventfd with EPOLLIN | EPOLLOUT | EPOLLET
+    // EPOLLET is negative number for i32 so casting is needed to do proper bitwise OR for u32.
+    let epollet = libc::EPOLLET as u32;
+    let flags = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap() | epollet;
+    let mut ev = libc::epoll_event {
+        events: u32::try_from(flags).unwrap(),
+        u64: u64::try_from(fd).unwrap(),
+    };
+    let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fd, &mut ev) };
+    assert_ne!(res, -1);
+
+    // Check result from epoll_wait.
+    let expected_event = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap();
+    let expected_value = u64::try_from(fd).unwrap();
+    assert!(check_epoll_wait::<8>(epfd, vec![(expected_event, expected_value)]));
+}
+
+fn test_pointer() {
+    // Create an epoll instance.
+    let epfd = unsafe { libc::epoll_create1(0) };
+    assert_ne!(epfd, -1);
+
+    // Create a socketpair instance.
+    let mut fds = [-1, -1];
+    let res = unsafe { libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, fds.as_mut_ptr()) };
+    assert_eq!(res, 0);
+
+    // Register fd[1] with EPOLLIN|EPOLLOUT|EPOLLET
+    // EPOLLET is negative number for i32 so casting is needed to do proper bitwise OR for u32.
+    let epollet = libc::EPOLLET as u32;
+    let flags = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT | libc::EPOLLRDHUP).unwrap() | epollet;
+    let data = MaybeUninit::<u64>::uninit().as_ptr();
+    let mut ev = libc::epoll_event {
+        events: u32::try_from(flags).unwrap(),
+        u64: data.expose_provenance() as u64,
+    };
+    let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[1], &mut ev) };
+    assert_ne!(res, -1);
+}
+
+// When read/write happened on one side of the socketpair, only the other side will be notified.
+fn test_epoll_socketpair_special_case() {
+    // Create an epoll instance.
+    let epfd = unsafe { libc::epoll_create1(0) };
+    assert_ne!(epfd, -1);
+
+    // Create a socketpair instance.
+    let mut fds = [-1, -1];
+    let res = unsafe { libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, fds.as_mut_ptr()) };
+    assert_eq!(res, 0);
+
+    // Register both fd to the same epoll instance.
+    let epollet = libc::EPOLLET as u32;
+    let flags = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap() | epollet;
+    let mut ev = libc::epoll_event { events: u32::try_from(flags).unwrap(), u64: fds[0] as u64 };
+    let mut res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[0], &mut ev) };
+    assert_ne!(res, -1);
+    let mut ev = libc::epoll_event { events: u32::try_from(flags).unwrap(), u64: fds[1] as u64 };
+    res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[1], &mut ev) };
+    assert_ne!(res, -1);
+
+    // Write to fds[1].
+    let data = "abcde".as_bytes().as_ptr();
+    res = unsafe { libc::write(fds[1], data as *const libc::c_void, 5).try_into().unwrap() };
+    assert_eq!(res, 5);
+
+    //Two notification should be received.
+    let expected_event0 = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap();
+    let expected_value0 = fds[0] as u64;
+    let expected_event1 = u32::try_from(libc::EPOLLOUT).unwrap();
+    let expected_value1 = fds[1] as u64;
+    assert!(check_epoll_wait::<8>(
+        epfd,
+        vec![(expected_event1, expected_value1), (expected_event0, expected_value0)]
+    ));
+
+    // Read from fds[0].
+    let mut buf: [u8; 5] = [0; 5];
+    res = unsafe {
+        libc::read(fds[0], buf.as_mut_ptr().cast(), buf.len() as libc::size_t).try_into().unwrap()
+    };
+    assert_eq!(res, 5);
+    assert_eq!(buf, "abcde".as_bytes());
+
+    // Notification should be provided for fds[1].
+    let expected_event = u32::try_from(libc::EPOLLOUT).unwrap();
+    let expected_value = fds[1] as u64;
+    assert!(check_epoll_wait::<8>(epfd, vec![(expected_event, expected_value)]));
+}
+
+// When file description is fully closed, epoll_wait should not provide any notification for
+// that file description.
+fn test_closed_fd() {
+    // Create an epoll instance.
+    let epfd = unsafe { libc::epoll_create1(0) };
+    assert_ne!(epfd, -1);
+
+    // Create an eventfd instance.
+    let flags = libc::EFD_NONBLOCK | libc::EFD_CLOEXEC;
+    let fd = unsafe { libc::eventfd(0, flags) };
+
+    // Register eventfd with EPOLLIN | EPOLLOUT | EPOLLET
+    // EPOLLET is negative number for i32 so casting is needed to do proper bitwise OR for u32.
+    let epollet = libc::EPOLLET as u32;
+    let flags = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap() | epollet;
+    let mut ev = libc::epoll_event {
+        events: u32::try_from(flags).unwrap(),
+        u64: u64::try_from(fd).unwrap(),
+    };
+    let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fd, &mut ev) };
+    assert_ne!(res, -1);
+
+    // Write to the eventfd instance.
+    let sized_8_data: [u8; 8] = 1_u64.to_ne_bytes();
+    let res: i32 = unsafe {
+        libc::write(fd, sized_8_data.as_ptr() as *const libc::c_void, 8).try_into().unwrap()
+    };
+    assert_eq!(res, 8);
+
+    // Close the eventfd.
+    let res = unsafe { libc::close(fd) };
+    assert_eq!(res, 0);
+
+    // No notification should be provided because the file description is closed.
+    assert!(check_epoll_wait::<8>(epfd, vec![]));
+}
+
+// When a certain file descriptor registered with epoll is closed, but the underlying file description
+// is not closed, notification should still be provided.
+//
+// This is a quirk of epoll being described in https://man7.org/linux/man-pages/man7/epoll.7.html
+// A file descriptor is removed from an interest list only after all the file descriptors
+// referring to the underlying open file description have been closed.
+fn test_not_fully_closed_fd() {
+    // Create an epoll instance.
+    let epfd = unsafe { libc::epoll_create1(0) };
+    assert_ne!(epfd, -1);
+
+    // Create an eventfd instance.
+    let flags = libc::EFD_NONBLOCK | libc::EFD_CLOEXEC;
+    let fd = unsafe { libc::eventfd(0, flags) };
+
+    // Dup the fd.
+    let newfd = unsafe { libc::dup(fd) };
+    assert_ne!(newfd, -1);
+
+    // Register eventfd with EPOLLIN | EPOLLOUT | EPOLLET
+    // EPOLLET is negative number for i32 so casting is needed to do proper bitwise OR for u32.
+    let epollet = libc::EPOLLET as u32;
+    let flags = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap() | epollet;
+    let mut ev = libc::epoll_event {
+        events: u32::try_from(flags).unwrap(),
+        u64: u64::try_from(fd).unwrap(),
+    };
+    let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fd, &mut ev) };
+    assert_ne!(res, -1);
+
+    // Close the original fd that being used to register with epoll.
+    let res = unsafe { libc::close(fd) };
+    assert_eq!(res, 0);
+
+    // Notification should still be provided because the file description is not closed.
+    let expected_event = u32::try_from(libc::EPOLLOUT).unwrap();
+    let expected_value = fd as u64;
+    assert!(check_epoll_wait::<1>(epfd, vec![(expected_event, expected_value)]));
+
+    // Write to the eventfd instance to produce notification.
+    let sized_8_data: [u8; 8] = 1_u64.to_ne_bytes();
+    let res: i32 = unsafe {
+        libc::write(newfd, sized_8_data.as_ptr() as *const libc::c_void, 8).try_into().unwrap()
+    };
+    assert_eq!(res, 8);
+
+    // Close the dupped fd.
+    let res = unsafe { libc::close(newfd) };
+    assert_eq!(res, 0);
+
+    // No notification should be provided.
+    assert!(check_epoll_wait::<1>(epfd, vec![]));
+}
+
+// Each time a notification is provided, it should reflect the file description's readiness
+// at the moment the latest event occurred.
+fn test_event_overwrite() {
+    // Create an eventfd instance.
+    let flags = libc::EFD_NONBLOCK | libc::EFD_CLOEXEC;
+    let fd = unsafe { libc::eventfd(0, flags) };
+
+    // Write to the eventfd instance.
+    let sized_8_data: [u8; 8] = 1_u64.to_ne_bytes();
+    let res: i32 = unsafe {
+        libc::write(fd, sized_8_data.as_ptr() as *const libc::c_void, 8).try_into().unwrap()
+    };
+    assert_eq!(res, 8);
+
+    // Create an epoll instance.
+    let epfd = unsafe { libc::epoll_create1(0) };
+    assert_ne!(epfd, -1);
+
+    // Register eventfd with EPOLLIN | EPOLLOUT | EPOLLET
+    // EPOLLET is negative number for i32 so casting is needed to do proper bitwise OR for u32.
+    let epollet = libc::EPOLLET as u32;
+    let flags = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap() | epollet;
+    let mut ev = libc::epoll_event {
+        events: u32::try_from(flags).unwrap(),
+        u64: u64::try_from(fd).unwrap(),
+    };
+    let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fd, &mut ev) };
+    assert_ne!(res, -1);
+
+    // Read from the eventfd instance.
+    let mut buf: [u8; 8] = [0; 8];
+    let res: i32 = unsafe { libc::read(fd, buf.as_mut_ptr().cast(), 8).try_into().unwrap() };
+    assert_eq!(res, 8);
+
+    // Check result from epoll_wait.
+    let expected_event = u32::try_from(libc::EPOLLOUT).unwrap();
+    let expected_value = u64::try_from(fd).unwrap();
+    assert!(check_epoll_wait::<8>(epfd, vec![(expected_event, expected_value)]));
+}
+
+// An epoll notification will be provided for every succesful read in a socketpair.
+// This behaviour differs from the real system.
+fn test_socketpair_read() {
+    // Create an epoll instance.
+    let epfd = unsafe { libc::epoll_create1(0) };
+    assert_ne!(epfd, -1);
+
+    // Create a socketpair instance.
+    let mut fds = [-1, -1];
+    let res = unsafe { libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, fds.as_mut_ptr()) };
+    assert_eq!(res, 0);
+
+    // Register both fd to the same epoll instance.
+    let epollet = libc::EPOLLET as u32;
+    let flags = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap() | epollet;
+    let mut ev = libc::epoll_event { events: u32::try_from(flags).unwrap(), u64: fds[0] as u64 };
+    let mut res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[0], &mut ev) };
+    assert_ne!(res, -1);
+    let mut ev = libc::epoll_event { events: u32::try_from(flags).unwrap(), u64: fds[1] as u64 };
+    res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[1], &mut ev) };
+    assert_ne!(res, -1);
+
+    // Write 5 bytes to fds[1].
+    let data = "abcde".as_bytes().as_ptr();
+    res = unsafe { libc::write(fds[1], data as *const libc::c_void, 5).try_into().unwrap() };
+    assert_eq!(res, 5);
+
+    //Two notification should be received.
+    let expected_event0 = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap();
+    let expected_value0 = fds[0] as u64;
+    let expected_event1 = u32::try_from(libc::EPOLLOUT).unwrap();
+    let expected_value1 = fds[1] as u64;
+    assert!(check_epoll_wait::<8>(
+        epfd,
+        vec![(expected_event1, expected_value1), (expected_event0, expected_value0)]
+    ));
+
+    // Read 3 bytes from fds[0].
+    let mut buf: [u8; 3] = [0; 3];
+    res = unsafe {
+        libc::read(fds[0], buf.as_mut_ptr().cast(), buf.len() as libc::size_t).try_into().unwrap()
+    };
+    assert_eq!(res, 3);
+    assert_eq!(buf, "abc".as_bytes());
+
+    // Notification will be provided.
+    // But in real system, no notification will be provided here.
+    let expected_event = u32::try_from(libc::EPOLLOUT).unwrap();
+    let expected_value = fds[1] as u64;
+    assert!(check_epoll_wait::<8>(epfd, vec![(expected_event, expected_value)]));
+
+    // Read until the buffer is empty.
+    let mut buf: [u8; 2] = [0; 2];
+    res = unsafe {
+        libc::read(fds[0], buf.as_mut_ptr().cast(), buf.len() as libc::size_t).try_into().unwrap()
+    };
+    assert_eq!(res, 2);
+    assert_eq!(buf, "de".as_bytes());
+
+    // Notification will be provided.
+    // In real system, notification will be provided too.
+    let expected_event = u32::try_from(libc::EPOLLOUT).unwrap();
+    let expected_value = fds[1] as u64;
+    assert!(check_epoll_wait::<8>(epfd, vec![(expected_event, expected_value)]));
+}