From ecbbaf72e01293b410ef3bc5970d18aa81e45599 Mon Sep 17 00:00:00 2001
From: Daniel Prilik <danielprilik@gmail.com>
Date: Sat, 19 Jun 2021 13:59:48 -0700
Subject: [PATCH] remove all panicking code from `gdbstub`!!

this is something I've been meaning to do for a _long_ time now, and
after many failed attempts and false-starts, I've finally done it! if
you inspect the asm output of `example_no_std`, you'll find that there
is absolutely _zero_ panic handling machinery in the final binary!

This has also resulted in a _substantial_ drop in binary size, as those
bounds checks and panicking machinery were taking up a lot of space!

removing panicking code ended up requiring 3 different approaches:

1. Rewriting array-indexing operations to use simpler indexing, which
the compiler is able to optimize better. e.g: see the code used to index
into the `target.xml` buffer in `base.rs`
2. Adding a sprinkle of unsafe code to certain array-indexing operations
that the compiler is unsable to prove are safe. This was only done in
two places: `decode_hex_buf` and `PacketBuf`.
3. Manually re-implementing the standard library's `slice::split_mut`
and `slice::splitn_mut` methods to elide a bounds check.
  - Tracked upstream via https://github.com/rust-lang/rust/issues/86313

Introducing unsafe code isn't something I take lightly, and while I've
done my best to audit and validate the unsafe code I've written, I did
end up including an optional `paranoid_unsafe` feature which gives
end-users the option to opt-out of `gdbstub`'s no-panic guarantee in
exchange for some additional peace of mind.
---
 Cargo.toml                          |   1 +
 README.md                           |  26 ++++--
 example_no_std/Cargo.toml           |   1 +
 src/gdbstub_impl/ext/base.rs        |  20 ++---
 src/protocol/commands.rs            |   1 +
 src/protocol/commands/_m.rs         |  13 +--
 src/protocol/commands/_m_upcase.rs  |  10 +--
 src/protocol/commands/_vRun.rs      |   2 +-
 src/protocol/commands/breakpoint.rs |  10 +--
 src/protocol/common/hex.rs          |  75 ++++++++++++++++-
 src/protocol/packet.rs              |  57 ++++++++++---
 src/protocol/response_writer.rs     |  17 +++-
 src/util/mod.rs                     |   1 +
 src/util/no_panic_iter.rs           | 126 ++++++++++++++++++++++++++++
 14 files changed, 307 insertions(+), 53 deletions(-)
 create mode 100644 src/util/no_panic_iter.rs

diff --git a/Cargo.toml b/Cargo.toml
index 0c176ea0..c180951a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -31,6 +31,7 @@ goblin = "0.2"
 default = ["std"]
 alloc = ["managed/alloc"]
 std = ["alloc"]
+paranoid_unsafe = []
 
 # INTERNAL: enables the `__dead_code_marker!` macro.
 # used as part of the `scripts/test_dead_code_elim.sh`
diff --git a/README.md b/README.md
index b4750aa4..aebb263d 100644
--- a/README.md
+++ b/README.md
@@ -22,17 +22,19 @@ Why use `gdbstub`?
         -   Organizes GDB's countless optional protocol extensions into a coherent, understandable, and type-safe hierarchy of traits.
         -   Automatically handles client/server protocol feature negotiation, without needing to micro-manage the specific [`qSupported` packet](https://sourceware.org/gdb/onlinedocs/gdb/General-Query-Packets.html#qSupported) response.
     -   `gdbstub` makes _extensive_ use of Rust's powerful type system + generics to enforce protocol invariants at compile time, minimizing the number of tricky protocol details end users have to worry about.
-    -   Using a novel technique called [**Inlineable Dyn Extension Traits**](#zero-overhead-protocol-extensions) (IDETs), `gdbstub` enables fine-grained control over active protocol extensions _without_ relying on clunky `cargo` features.
+    -   Using a novel technique called [**Inlineable Dyn Extension Traits**](#zero-overhead-protocol-extensions) (IDETs), `gdbstub` enables fine-grained control over active protocol extensions _without_ relying on clunky `cargo` features or the use of `unsafe` code!
 -   **Easy to Integrate**
     -   `gdbstub`'s API is designed to be as unobtrusive as possible, and shouldn't require any large refactoring effort to integrate into an existing project. It doesn't require taking direct ownership of any key data structures, and aims to be a "drop in" solution when you need to add debugging to a project.
 -   **`#![no_std]` Ready & Size Optimized**
     -   `gdbstub` is a **`no_std` first** library, whereby all protocol features are required to be `no_std` compatible.
     -   `gdbstub` does not require _any_ dynamic memory allocation, and can be configured to use fixed-size, pre-allocated buffers. This enables `gdbstub` to be used on even the most resource constrained, no-[`alloc`](https://doc.rust-lang.org/alloc/) platforms.
-    -   `gdbstub` is transport-layer agnostic, and uses a basic [`Connection`](https://docs.rs/gdbstub/latest/gdbstub/trait.Connection.html) interface to communicate with the GDB server. As long as target has some method of performing in-order, serial, byte-wise I/O (e.g: putchar/getchar over UART), it's possible to run `gdbstub` on it.
-    -   "You don't pay for what you don't use": All code related to parsing/handling protocol extensions is guaranteed to be dead-code-eliminated from an optimized binary if left unimplmeneted! See the [Zero-overhead Protocol Extensions](#zero-overhead-protocol-extensions) section below for more details.
-    -   `gdbstub` tries to keep the binary and RAM footprint of its minimal configuration to a bare minimum, enabling it to be used on even the most resource-constrained microcontrollers.
-        -   When compiled in release mode, using all the tricks outlined in [`min-sized-rust`](https://github.com/johnthagen/min-sized-rust), a baseline `gdbstub` implementation weighs in at **_roughly 10kb of `.text` and negligible `.rodata`!_** \*
-        - \* Exact numbers vary by target platform, compiler version, and `gdbstub` revision. Data was collected using the included `example_no_std` project compiled on x86_64.
+    -   `gdbstub` is entirely **panic free** (when compiled in release mode, without the `paranoid_unsafe` cargo feature).
+        -   Validated by inspecting the asm output of the in-tree `example_no_std`.
+    -   `gdbstub` is transport-layer agnostic, and uses a basic [`Connection`](https://docs.rs/gdbstub/latest/gdbstub/trait.Connection.html) interface to communicate with the GDB server. As long as target has some method of performing in-order, serial, byte-wise I/O (e.g: putchar/getchar over UART), it's possible to run `gdbstub` on it!
+    -   "You don't pay for what you don't use": All code related to parsing/handling protocol extensions is guaranteed to be dead-code-eliminated from an optimized binary if left unimplemented! See the [Zero-overhead Protocol Extensions](#zero-overhead-protocol-extensions) section below for more details.
+    -   `gdbstub`'s minimal configuration has an incredibly low binary size + RAM overhead, enabling it to be used on even the most resource-constrained microcontrollers.
+        -   When compiled in release mode, using all the tricks outlined in [`min-sized-rust`](https://github.com/johnthagen/min-sized-rust), a baseline `gdbstub` implementation can weigh in at **_less than 10kb of `.text` + `.rodata`!_** \*
+        - \*Exact numbers vary by target platform, compiler version, and `gdbstub` revision. Data was collected using the included `example_no_std` project compiled on x86_64.
 
 ### Can I Use `gdbstub` in Production?
 
@@ -102,6 +104,11 @@ When using `gdbstub` in `#![no_std]` contexts, make sure to set `default-feature
     -   Implement `Connection` for [`TcpStream`](https://doc.rust-lang.org/std/net/struct.TcpStream.html) and [`UnixStream`](https://doc.rust-lang.org/std/os/unix/net/struct.UnixStream.html).
     -   Implement [`std::error::Error`](https://doc.rust-lang.org/std/error/trait.Error.html) for `gdbstub::Error`.
     -   Add a `TargetError::Io` variant to simplify `std::io::Error` handling from Target methods.
+-   `paranoid_unsafe`
+    -   Enabling the `paranoid_unsafe` feature will swap out a handful of unsafe `get_unchecked_mut` operations with their safe equivalents, at the expense of introducing panicking code into `gdbstub`.
+        -   `rustc` + LLVM do a pretty incredible job at eliding bounds checks... most of the time. Unfortunately, there are a few places in the code where the compiler is not smart enough to "prove" that a bounds check isn't needed, and a bit of unsafe code is required to remove those bounds checks.
+    -   This feature is **disabled** by default, as the unsafe code has been aggressively audited and tested for correctness. That said, if you're particularly paranoid about the use of unsafe code, enabling this feature may offer some piece of mind.
+    -   Please refer to the [`unsafe` in `gdbstub`](#unsafe-in-gdbstub) section below for more details.
 
 ## Examples
 
@@ -150,8 +157,13 @@ If you happen to stumble across this crate and end up using it to debug some bar
 
 -   When no cargo features are enabled:
     -   A few trivially safe calls to `NonZeroUsize::new_unchecked()` when defining internal constants.
+
+-   When the `paranoid_unsafe` feature is enabled, the following `unsafe` code is _removed_:
+    -   `src/protocol/packet.rs`: Swaps a couple slice-index methods in `PacketBuf` to use `get_unchecked_mut`. The public API of struct ensures that the bounds used to index into the array remain in-bounds.
+    -   `src/protocol/common/hex`: Use an alternate implementation of `decode_hex_buf` which uses unsafe slice indexing.
+
 -   When the `std` feature is enabled:
-    -   An implementation of `UnixStream::peek` which uses `libc::recv`. This manual implementation will be removed once [rust-lang/rust#76923](https://github.com/rust-lang/rust/issues/76923) is stabilized.
+    -   `src/connection/impls/unixstream.rs`: An implementation of `UnixStream::peek` which uses `libc::recv`. This manual implementation will be removed once [rust-lang/rust#76923](https://github.com/rust-lang/rust/issues/76923) is stabilized.
 
 ## Future Plans + Roadmap to `1.0.0`
 
diff --git a/example_no_std/Cargo.toml b/example_no_std/Cargo.toml
index ddd75b2e..87a0db17 100644
--- a/example_no_std/Cargo.toml
+++ b/example_no_std/Cargo.toml
@@ -19,3 +19,4 @@ panic = "abort"
 opt-level = 's'  # Optimize for size.
 lto = true
 codegen-units = 1
+debug = true
diff --git a/src/gdbstub_impl/ext/base.rs b/src/gdbstub_impl/ext/base.rs
index 92fde3f9..db3bcb04 100644
--- a/src/gdbstub_impl/ext/base.rs
+++ b/src/gdbstub_impl/ext/base.rs
@@ -130,22 +130,20 @@ impl<T: Target, C: Connection> GdbStubImpl<T, C> {
 
                 match xml {
                     Some(xml) => {
-                        let xml = xml.trim();
-                        if cmd.offset >= xml.len() {
-                            // no more data
-                            res.write_str("l")?;
-                        } else if cmd.offset + cmd.len >= xml.len() {
-                            // last little bit of data
-                            res.write_str("l")?;
-                            res.write_binary(&xml.as_bytes()[cmd.offset..])?
-                        } else {
+                        let xml = xml.trim().as_bytes();
+                        if cmd.offset < xml.len() {
                             // still more data
                             res.write_str("m")?;
-                            res.write_binary(&xml.as_bytes()[cmd.offset..(cmd.offset + cmd.len)])?
+                            res.write_binary(
+                                &xml[cmd.offset..][..cmd.len.min(xml.len() - cmd.offset)],
+                            )?
+                        } else {
+                            // no more data
+                            res.write_str("l")?;
                         }
                     }
                     // If the target hasn't provided their own XML, then the initial response to
-                    // "qSupported" wouldn't have included  "qXfer:features:read", and gdb wouldn't
+                    // "qSupported" wouldn't have included "qXfer:features:read", and gdb wouldn't
                     // send this packet unless it was explicitly marked as supported.
                     None => return Err(Error::PacketUnexpected),
                 }
diff --git a/src/protocol/commands.rs b/src/protocol/commands.rs
index 0c94194b..e70e71a1 100644
--- a/src/protocol/commands.rs
+++ b/src/protocol/commands.rs
@@ -13,6 +13,7 @@ pub(self) mod prelude {
     };
     pub use crate::protocol::common::Bstr;
     pub use crate::protocol::packet::PacketBuf;
+    pub use crate::util::no_panic_iter::SliceExt;
     pub use core::convert::{TryFrom, TryInto};
 }
 
diff --git a/src/protocol/commands/_m.rs b/src/protocol/commands/_m.rs
index f7570ad9..4b879f46 100644
--- a/src/protocol/commands/_m.rs
+++ b/src/protocol/commands/_m.rs
@@ -29,11 +29,9 @@ impl<'a> ParseCommand<'a> for m<'a> {
         // +------+------------------+------------------------------------------------+
 
         let (buf, body_range) = buf.into_raw_buf();
-        let body = &mut buf[body_range.start..];
+        let body = buf.get_mut(body_range.start..body_range.end)?;
 
-        // should return 3 slices: the addr (hex-encoded), len (hex-encoded), and the
-        // "rest" of the buffer
-        let mut body = body.split_mut(|b| *b == b',' || *b == b'#');
+        let mut body = body.split_mut_no_panic(|b| *b == b',');
 
         let addr = decode_hex_buf(body.next()?).ok()?;
         let addr_len = addr.len();
@@ -41,8 +39,13 @@ impl<'a> ParseCommand<'a> for m<'a> {
 
         drop(body);
 
+        // ensures that `split_at_mut` doesn't panic
+        if buf.len() < body_range.start + addr_len {
+            return None;
+        }
+
         let (addr, buf) = buf.split_at_mut(body_range.start + addr_len);
-        let addr = &addr[b"$m".len()..];
+        let addr = addr.get(b"$m".len()..)?;
 
         Some(m { addr, len, buf })
     }
diff --git a/src/protocol/commands/_m_upcase.rs b/src/protocol/commands/_m_upcase.rs
index 49773aa1..0e4f9a6a 100644
--- a/src/protocol/commands/_m_upcase.rs
+++ b/src/protocol/commands/_m_upcase.rs
@@ -11,15 +11,11 @@ impl<'a> ParseCommand<'a> for M<'a> {
     fn from_packet(buf: PacketBuf<'a>) -> Option<Self> {
         let body = buf.into_body();
 
-        let mut body = body.split_mut(|&b| b == b',' || b == b':');
+        let mut body = body.split_mut_no_panic(|&b| b == b',' || b == b':');
         let addr = decode_hex_buf(body.next()?).ok()?;
         let len = decode_hex(body.next()?).ok()?;
-        let val = body.next()?;
+        let val = decode_hex_buf(body.next()?).ok()?;
 
-        Some(M {
-            addr,
-            len,
-            val: decode_hex_buf(val).ok()?,
-        })
+        Some(M { addr, len, val })
     }
 }
diff --git a/src/protocol/commands/_vRun.rs b/src/protocol/commands/_vRun.rs
index e5a37fdf..b177a9cf 100644
--- a/src/protocol/commands/_vRun.rs
+++ b/src/protocol/commands/_vRun.rs
@@ -10,7 +10,7 @@ impl<'a> ParseCommand<'a> for vRun<'a> {
     fn from_packet(buf: PacketBuf<'a>) -> Option<Self> {
         let body = buf.into_body();
 
-        let mut body = body.splitn_mut(3, |b| *b == b';');
+        let mut body = body.splitn_mut_no_panic(3, |b| *b == b';');
 
         let _first_semi = body.next()?;
         let filename = match decode_hex_buf(body.next()?).ok()? {
diff --git a/src/protocol/commands/breakpoint.rs b/src/protocol/commands/breakpoint.rs
index edc9ecaf..46b9bb6e 100644
--- a/src/protocol/commands/breakpoint.rs
+++ b/src/protocol/commands/breakpoint.rs
@@ -1,7 +1,5 @@
-pub use crate::common::*;
-pub use crate::protocol::common::hex::{decode_hex, decode_hex_buf};
-pub use crate::protocol::packet::PacketBuf;
-pub use core::convert::{TryFrom, TryInto};
+use crate::protocol::common::hex::{decode_hex, decode_hex_buf};
+use crate::util::no_panic_iter::SliceExt;
 
 // Breakpoint packets are split up like this:
 //
@@ -26,7 +24,7 @@ pub struct BasicBreakpoint<'a> {
 
 impl<'a> BasicBreakpoint<'a> {
     pub fn from_slice(body: &'a mut [u8]) -> Option<BasicBreakpoint<'a>> {
-        let mut body = body.splitn_mut(4, |b| matches!(*b, b',' | b';'));
+        let mut body = body.splitn_mut_no_panic(4, |b| matches!(*b, b',' | b';'));
         let type_ = decode_hex(body.next()?).ok()?;
         let addr = decode_hex_buf(body.next()?).ok()?;
         let kind = decode_hex_buf(body.next()?).ok()?;
@@ -44,7 +42,7 @@ pub struct BytecodeBreakpoint<'a> {
 
 impl<'a> BytecodeBreakpoint<'a> {
     pub fn from_slice(body: &'a mut [u8]) -> Option<BytecodeBreakpoint<'a>> {
-        let mut body = body.splitn_mut(2, |b| *b == b';');
+        let mut body = body.splitn_mut_no_panic(2, |b| *b == b';');
 
         let base = BasicBreakpoint::from_slice(body.next()?)?;
 
diff --git a/src/protocol/common/hex.rs b/src/protocol/common/hex.rs
index 4c3170b9..2d13b232 100644
--- a/src/protocol/common/hex.rs
+++ b/src/protocol/common/hex.rs
@@ -8,7 +8,7 @@ pub enum DecodeHexError {
     InvalidOutput,
 }
 
-/// Decode a GDB dex string into the specified integer.
+/// Decode a GDB hex string into the specified integer.
 ///
 /// GDB hex strings may include "xx", which represent "missing" data. This
 /// method simply treats "xx" as 0x00.
@@ -35,7 +35,7 @@ where
     Ok(result)
 }
 
-/// Wrapper around a raw hex string. Enabled "late" calls to `decode` from
+/// Wrapper around a raw hex string. Enables "late" calls to `decode` from
 /// outside the `crate::protocol` module.
 #[derive(Debug, Clone, Copy)]
 pub struct HexString<'a>(pub &'a [u8]);
@@ -64,7 +64,7 @@ fn ascii2byte(c: u8) -> Option<u8> {
     }
 }
 
-/// Check if the byte `c` is a valid GDB hex digit `[0-9][a-f][A-F][xX]`
+/// Check if the byte `c` is a valid GDB hex digit `[0-9a-fA-FxX]`
 #[allow(clippy::match_like_matches_macro)]
 pub fn is_hex(c: u8) -> bool {
     match c {
@@ -81,7 +81,74 @@ pub fn is_hex(c: u8) -> bool {
 /// GDB hex strings may include "xx", which represent "missing" data. This
 /// method simply treats "xx" as 0x00.
 // TODO: maybe don't blindly translate "xx" as 0x00?
-// TODO: rewrite this method to elide bound checks
+#[cfg(not(feature = "paranoid_unsafe"))]
+pub fn decode_hex_buf(base_buf: &mut [u8]) -> Result<&mut [u8], DecodeHexBufError> {
+    use DecodeHexBufError::*;
+
+    if base_buf.is_empty() {
+        return Ok(&mut []);
+    }
+
+    let odd_adust = base_buf.len() % 2;
+    if odd_adust != 0 {
+        base_buf[0] = ascii2byte(base_buf[0]).ok_or(NotAscii)?;
+    }
+
+    let buf = &mut base_buf[odd_adust..];
+
+    let decoded_len = buf.len() / 2;
+    for i in 0..decoded_len {
+        // SAFETY: rustc isn't smart enough to automatically elide these bound checks.
+        //
+        // If buf.len() == 0 or 1: trivially safe, since the for block is never taken
+        // If buf.len() >= 2: the range of values for `i` is 0..(buf.len() / 2 - 1)
+        let (hi, lo, b) = unsafe {
+            (
+                //    (buf.len() / 2 - 1) * 2
+                // == (buf.len() - 2)
+                // since buf.len() is >2, this is in-bounds
+                *buf.get_unchecked(i * 2),
+                //    (buf.len() / 2 - 1) * 2 + 1
+                // == (buf.len() - 1)
+                // since buf.len() is >2, this is in-bounds
+                *buf.get_unchecked(i * 2 + 1),
+                // since buf.len() is >2, (buf.len() / 2 - 1) is always in-bounds
+                buf.get_unchecked_mut(i),
+            )
+        };
+
+        let hi = ascii2byte(hi).ok_or(NotAscii)?;
+        let lo = ascii2byte(lo).ok_or(NotAscii)?;
+        *b = hi << 4 | lo;
+    }
+
+    // SAFETY: rustc isn't smart enough to automatically elide this bound check.
+    //
+    // Consider the different values (decoded_len + odd_adust) can take:
+    //
+    //  buf.len() | (decoded_len + odd_adust)
+    // -----------|---------------------------
+    //      0     | (0 + 0) == 0
+    //      1     | (0 + 1) == 1
+    //      2     | (1 + 0) == 1
+    //      3     | (1 + 1) == 2
+    //      4     | (2 + 0) == 2
+    //      5     | (2 + 1) == 3
+    //
+    // Note that the computed index is always in-bounds.
+    //
+    // If I were still in undergrad, I could probably have whipped up a proper
+    // mathematical proof by induction or whatnot, but hopefully this "proof by
+    // example" ought to suffice.
+    unsafe { Ok(base_buf.get_unchecked_mut(..decoded_len + odd_adust)) }
+}
+
+/// Decode a GDB hex string into a byte slice _in place_.
+///
+/// GDB hex strings may include "xx", which represent "missing" data. This
+/// method simply treats "xx" as 0x00.
+// TODO: maybe don't blindly translate "xx" as 0x00?
+#[cfg(feature = "paranoid_unsafe")]
 pub fn decode_hex_buf(base_buf: &mut [u8]) -> Result<&mut [u8], DecodeHexBufError> {
     use DecodeHexBufError::*;
 
diff --git a/src/protocol/packet.rs b/src/protocol/packet.rs
index 9fbf9779..61704249 100644
--- a/src/protocol/packet.rs
+++ b/src/protocol/packet.rs
@@ -22,6 +22,18 @@ pub enum Packet<'a> {
     Command(Command<'a>),
 }
 
+/// Wrapper around a byte buffer containing a GDB packet, while also tracking
+/// the range of the buffer containing the packet's "body".
+///
+/// A newly constructed `PacketBuf` will have a body that spans the entire data
+/// portion of the packet (i.e: `b"$data#checksum"`), but this range can be
+/// further restricted as part of packet parsing.
+///
+/// Notably, `PacketBuf` will _always_ maintain a mutable reference back to the
+/// _entire_ underlying packet buffer. This makes it possible to re-use any
+/// unused buffer space as "scratch" space. One notable example of this use-case
+/// is the 'm' packet, which recycles unused packet buffer space as a buffer for
+/// the target's `read_memory` method.
 pub struct PacketBuf<'a> {
     buf: &'a mut [u8],
     body_range: core::ops::Range<usize>,
@@ -38,7 +50,7 @@ impl<'a> PacketBuf<'a> {
         // split buffer into body and checksum components
         let mut parts = pkt_buf[1..].split(|b| *b == b'#');
 
-        let body = parts.next().unwrap(); // spit iter always returns at least one elem
+        let body = parts.next().unwrap(); // spit iter always returns at least one element
         let checksum = parts
             .next()
             .ok_or(PacketParseError::MissingChecksum)?
@@ -60,11 +72,11 @@ impl<'a> PacketBuf<'a> {
             });
         }
 
-        let end_of_body = 1 + body.len();
+        let body_range = 1..(body.len() + 1); // compensate for the leading '$'
 
         Ok(PacketBuf {
             buf: pkt_buf,
-            body_range: 1..end_of_body,
+            body_range,
         })
     }
 
@@ -84,8 +96,24 @@ impl<'a> PacketBuf<'a> {
         })
     }
 
+    /// Strip the specified prefix from the packet buffer, returning `true` if
+    /// there was a prefix match.
     pub fn strip_prefix(&mut self, prefix: &[u8]) -> bool {
-        if self.buf[self.body_range.clone()].starts_with(prefix) {
+        let body = {
+            // SAFETY: The public interface of `PacketBuf` ensures that `self.body_range`
+            // always stays within the bounds of the provided buffer.
+            #[cfg(not(feature = "paranoid_unsafe"))]
+            unsafe {
+                self.buf.get_unchecked_mut(self.body_range.clone())
+            }
+
+            #[cfg(feature = "paranoid_unsafe")]
+            &mut self.buf[self.body_range.clone()]
+        };
+
+        if body.starts_with(prefix) {
+            // SAFETY: if the current buffer range `starts_with` the specified prefix, then
+            // it is safe to bump `body_range.start` by the prefix length.
             self.body_range = (self.body_range.start + prefix.len())..self.body_range.end;
             true
         } else {
@@ -93,14 +121,22 @@ impl<'a> PacketBuf<'a> {
         }
     }
 
-    /// Return a mut reference to slice of the packet buffer corresponding to
-    /// the current body.
+    /// Return a mutable reference to slice of the packet buffer corresponding
+    /// to the current body.
     pub fn into_body(self) -> &'a mut [u8] {
-        &mut self.buf[self.body_range]
+        // SAFETY: The public interface of `PacketBuf` ensures that `self.body_range`
+        // always stays within the bounds of the provided buffer.
+        #[cfg(not(feature = "paranoid_unsafe"))]
+        unsafe {
+            self.buf.get_unchecked_mut(self.body_range.clone())
+        }
+
+        #[cfg(feature = "paranoid_unsafe")]
+        &mut self.buf[self.body_range.clone()]
     }
 
-    /// Return a mut reference to the _entire_ underlying packet buffer, and the
-    /// current body's range.
+    /// Return a mutable reference to the _entire_ underlying packet buffer, and
+    /// the current body's range.
     pub fn into_raw_buf(self) -> (&'a mut [u8], core::ops::Range<usize>) {
         (self.buf, self.body_range)
     }
@@ -108,7 +144,8 @@ impl<'a> PacketBuf<'a> {
     /// Returns the length of the _entire_ underlying packet buffer - not just
     /// the length of the current range.
     ///
-    /// This method is used when handing the `qSupported` packet.
+    /// This method is used when handing the `qSupported` packet in order to
+    /// obtain the maximum packet size the stub supports.
     pub fn full_len(&self) -> usize {
         self.buf.len()
     }
diff --git a/src/protocol/response_writer.rs b/src/protocol/response_writer.rs
index 484bd609..96615365 100644
--- a/src/protocol/response_writer.rs
+++ b/src/protocol/response_writer.rs
@@ -147,11 +147,24 @@ impl<'a, C: Connection + 'a> ResponseWriter<'a, C> {
 
     /// Write a single byte as a hex string (two ascii chars)
     fn write_hex(&mut self, byte: u8) -> Result<(), Error<C::Error>> {
-        for digit in [(byte & 0xf0) >> 4, byte & 0x0f].iter() {
+        for &digit in [(byte & 0xf0) >> 4, byte & 0x0f].iter() {
             let c = match digit {
                 0..=9 => b'0' + digit,
                 10..=15 => b'a' + digit - 10,
-                _ => unreachable!(),
+                // This match arm is unreachable, but the compiler isn't smart enough to optimize
+                // out the branch. As such, using `unreachable!` here would introduce panicking
+                // code to `gdbstub`.
+                //
+                // In this case, it'd be totally reasonable to use
+                // `unsafe { core::hint::unreachable_unchecked() }`, but i'll be honest, using some
+                // spooky unsafe compiler hints just to eek out a smidge more performance here just
+                // isn't worth the cognitive overhead.
+                //
+                // Moreover, I've played around with this code in godbolt.org, and it turns out that
+                // leaving this match arm as `=> digit` ends up generating the _exact same code_ as
+                // using `unreachable_unchecked` (at least on x86_64 targets compiled using the
+                // latest Rust compiler). YMMV on other platforms.
+                _ => digit,
             };
             self.write(c)?;
         }
diff --git a/src/util/mod.rs b/src/util/mod.rs
index a0b1e45a..26297b70 100644
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -1 +1,2 @@
 pub mod managed_vec;
+pub mod no_panic_iter;
diff --git a/src/util/no_panic_iter.rs b/src/util/no_panic_iter.rs
new file mode 100644
index 00000000..4037826c
--- /dev/null
+++ b/src/util/no_panic_iter.rs
@@ -0,0 +1,126 @@
+/// Slice extension trait that provides non-panicing variants of several
+/// standard library iterators.
+pub trait SliceExt<T> {
+    /// Variant of [`core::slice::split_mut`] that elides bound checks.
+    fn split_mut_no_panic<F>(&mut self, pred: F) -> SplitMut<'_, T, F>
+    where
+        F: FnMut(&T) -> bool;
+
+    /// Variant of [`core::slice::splitn_mut`] that elides bound checks.
+    fn splitn_mut_no_panic<F>(&mut self, n: usize, pred: F) -> SplitNMut<'_, T, F>
+    where
+        F: FnMut(&T) -> bool;
+}
+
+impl<T> SliceExt<T> for [T] {
+    fn split_mut_no_panic<F>(&mut self, pred: F) -> SplitMut<'_, T, F>
+    where
+        F: FnMut(&T) -> bool,
+    {
+        SplitMut::new(self, pred)
+    }
+
+    fn splitn_mut_no_panic<F>(&mut self, n: usize, pred: F) -> SplitNMut<'_, T, F>
+    where
+        F: FnMut(&T) -> bool,
+    {
+        SplitNMut {
+            iter: SplitMut::new(self, pred),
+            count: n,
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct SplitMut<'a, T: 'a, P>
+where
+    P: FnMut(&T) -> bool,
+{
+    v: &'a mut [T],
+    pred: P,
+    finished: bool,
+}
+
+impl<'a, T: 'a, P: FnMut(&T) -> bool> SplitMut<'a, T, P> {
+    #[inline]
+    pub fn new(slice: &'a mut [T], pred: P) -> Self {
+        Self {
+            v: slice,
+            pred,
+            finished: false,
+        }
+    }
+
+    #[inline]
+    fn finish(&mut self) -> Option<&'a mut [T]> {
+        if self.finished {
+            None
+        } else {
+            self.finished = true;
+            Some(core::mem::replace(&mut self.v, &mut []))
+        }
+    }
+}
+
+impl<'a, T, P> Iterator for SplitMut<'a, T, P>
+where
+    P: FnMut(&T) -> bool,
+{
+    type Item = &'a mut [T];
+
+    #[inline]
+    fn next(&mut self) -> Option<&'a mut [T]> {
+        if self.finished {
+            return None;
+        }
+
+        let idx_opt = {
+            // work around borrowck limitations
+            let pred = &mut self.pred;
+            self.v.iter().position(|x| (*pred)(x))
+        };
+        match idx_opt {
+            None => self.finish(),
+            Some(idx) => {
+                let tmp = core::mem::replace(&mut self.v, &mut []);
+                let (head, tail) = tmp.split_at_mut(idx);
+                self.v = tail.get_mut(1..)?; // will never fail
+                Some(head)
+            }
+        }
+    }
+}
+
+/// An private iterator over subslices separated by elements that
+/// match a predicate function, splitting at most a fixed number of
+/// times.
+#[derive(Debug)]
+pub struct SplitNMut<'a, T: 'a, P>
+where
+    P: FnMut(&T) -> bool,
+{
+    iter: SplitMut<'a, T, P>,
+    count: usize,
+}
+
+impl<'a, T, P> Iterator for SplitNMut<'a, T, P>
+where
+    P: FnMut(&T) -> bool,
+{
+    type Item = &'a mut [T];
+
+    #[inline]
+    fn next(&mut self) -> Option<&'a mut [T]> {
+        match self.count {
+            0 => None,
+            1 => {
+                self.count -= 1;
+                self.iter.finish()
+            }
+            _ => {
+                self.count -= 1;
+                self.iter.next()
+            }
+        }
+    }
+}