From 6713ae9d4262976aed77083b322b981c12d6a432 Mon Sep 17 00:00:00 2001 From: Ayush Singh Date: Mon, 2 Oct 2023 17:00:09 +0530 Subject: [PATCH] Implement args for UEFI - Uses `EFI_LOADED_IMAGE_PROTOCOL` - verify that cli args are valid UTF-16 - Update Docs Signed-off-by: Ayush Singh --- library/std/src/sys/uefi/args.rs | 158 ++++++++++++++++++ library/std/src/sys/uefi/helpers.rs | 7 + library/std/src/sys/uefi/mod.rs | 1 - .../src/platform-support/unknown-uefi.md | 2 + 4 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 library/std/src/sys/uefi/args.rs diff --git a/library/std/src/sys/uefi/args.rs b/library/std/src/sys/uefi/args.rs new file mode 100644 index 0000000000000..4ff7be748e90b --- /dev/null +++ b/library/std/src/sys/uefi/args.rs @@ -0,0 +1,158 @@ +use r_efi::protocols::loaded_image; + +use crate::env::current_exe; +use crate::ffi::OsString; +use crate::fmt; +use crate::iter::Iterator; +use crate::mem::size_of; +use crate::sys::uefi::helpers; +use crate::vec; + +pub struct Args { + parsed_args_list: vec::IntoIter, +} + +pub fn args() -> Args { + let lazy_current_exe = || Vec::from([current_exe().map(Into::into).unwrap_or_default()]); + + // Each loaded image has an image handle that supports `EFI_LOADED_IMAGE_PROTOCOL`. Thus, this + // will never fail. + let protocol = + helpers::image_handle_protocol::(loaded_image::PROTOCOL_GUID) + .unwrap(); + + let lp_size = unsafe { (*protocol.as_ptr()).load_options_size } as usize; + // Break if we are sure that it cannot be UTF-16 + if lp_size < size_of::() || lp_size % size_of::() != 0 { + return Args { parsed_args_list: lazy_current_exe().into_iter() }; + } + let lp_size = lp_size / size_of::(); + + let lp_cmd_line = unsafe { (*protocol.as_ptr()).load_options as *const u16 }; + if !lp_cmd_line.is_aligned() { + return Args { parsed_args_list: lazy_current_exe().into_iter() }; + } + let lp_cmd_line = unsafe { crate::slice::from_raw_parts(lp_cmd_line, lp_size) }; + + Args { + parsed_args_list: parse_lp_cmd_line(lp_cmd_line) + .unwrap_or_else(lazy_current_exe) + .into_iter(), + } +} + +impl fmt::Debug for Args { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.parsed_args_list.as_slice().fmt(f) + } +} + +impl Iterator for Args { + type Item = OsString; + + fn next(&mut self) -> Option { + self.parsed_args_list.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.parsed_args_list.size_hint() + } +} + +impl ExactSizeIterator for Args { + fn len(&self) -> usize { + self.parsed_args_list.len() + } +} + +impl DoubleEndedIterator for Args { + fn next_back(&mut self) -> Option { + self.parsed_args_list.next_back() + } +} + +/// Implements the UEFI command-line argument parsing algorithm. +/// +/// This implementation is based on what is defined in Section 3.4 of +/// [UEFI Shell Specification](https://uefi.org/sites/default/files/resources/UEFI_Shell_Spec_2_0.pdf) +/// +/// Return None in the following cases: +/// - Invalid UTF-16 (unpaired surrogate) +/// - Empty/improper arguments +fn parse_lp_cmd_line(code_units: &[u16]) -> Option> { + const QUOTE: char = '"'; + const SPACE: char = ' '; + const CARET: char = '^'; + const NULL: char = '\0'; + + let mut ret_val = Vec::new(); + let mut code_units_iter = char::decode_utf16(code_units.iter().cloned()).peekable(); + + // The executable name at the beginning is special. + let mut in_quotes = false; + let mut cur = String::new(); + while let Some(w) = code_units_iter.next() { + let w = w.ok()?; + match w { + // break on NULL + NULL => break, + // A quote mark always toggles `in_quotes` no matter what because + // there are no escape characters when parsing the executable name. + QUOTE => in_quotes = !in_quotes, + // If not `in_quotes` then whitespace ends argv[0]. + SPACE if !in_quotes => break, + // In all other cases the code unit is taken literally. + _ => cur.push(w), + } + } + + // If exe name is missing, the cli args are invalid + if cur.is_empty() { + return None; + } + + ret_val.push(OsString::from(cur)); + // Skip whitespace. + while code_units_iter.next_if_eq(&Ok(SPACE)).is_some() {} + + // Parse the arguments according to these rules: + // * All code units are taken literally except space, quote and caret. + // * When not `in_quotes`, space separate arguments. Consecutive spaces are + // treated as a single separator. + // * A space `in_quotes` is taken literally. + // * A quote toggles `in_quotes` mode unless it's escaped. An escaped quote is taken literally. + // * A quote can be escaped if preceded by caret. + // * A caret can be escaped if preceded by caret. + let mut cur = String::new(); + let mut in_quotes = false; + while let Some(w) = code_units_iter.next() { + let w = w.ok()?; + match w { + // break on NULL + NULL => break, + // If not `in_quotes`, a space or tab ends the argument. + SPACE if !in_quotes => { + ret_val.push(OsString::from(&cur[..])); + cur.truncate(0); + + // Skip whitespace. + while code_units_iter.next_if_eq(&Ok(SPACE)).is_some() {} + } + // Caret can escape quotes or carets + CARET if in_quotes => { + if let Some(x) = code_units_iter.next() { + cur.push(x.ok()?); + } + } + // If quote then flip `in_quotes` + QUOTE => in_quotes = !in_quotes, + // Everything else is always taken literally. + _ => cur.push(w), + } + } + // Push the final argument, if any. + if !cur.is_empty() || in_quotes { + ret_val.push(OsString::from(cur)); + } + Some(ret_val) +} diff --git a/library/std/src/sys/uefi/helpers.rs b/library/std/src/sys/uefi/helpers.rs index 126661bfc961c..9837cc89f2d39 100644 --- a/library/std/src/sys/uefi/helpers.rs +++ b/library/std/src/sys/uefi/helpers.rs @@ -139,3 +139,10 @@ pub(crate) unsafe fn close_event(evt: NonNull) -> io::Result if r.is_error() { Err(crate::io::Error::from_raw_os_error(r.as_usize())) } else { Ok(()) } } + +/// Get the Protocol for current system handle. +/// Note: Some protocols need to be manually freed. It is the callers responsibility to do so. +pub(crate) fn image_handle_protocol(protocol_guid: Guid) -> Option> { + let system_handle = uefi::env::try_image_handle()?; + open_protocol(system_handle, protocol_guid).ok() +} diff --git a/library/std/src/sys/uefi/mod.rs b/library/std/src/sys/uefi/mod.rs index 097396ae99396..4edc00e3ea022 100644 --- a/library/std/src/sys/uefi/mod.rs +++ b/library/std/src/sys/uefi/mod.rs @@ -13,7 +13,6 @@ //! [`OsString`]: crate::ffi::OsString pub mod alloc; -#[path = "../unsupported/args.rs"] pub mod args; #[path = "../unix/cmath.rs"] pub mod cmath; diff --git a/src/doc/rustc/src/platform-support/unknown-uefi.md b/src/doc/rustc/src/platform-support/unknown-uefi.md index 370939520dc0c..1230ea22bd99b 100644 --- a/src/doc/rustc/src/platform-support/unknown-uefi.md +++ b/src/doc/rustc/src/platform-support/unknown-uefi.md @@ -268,6 +268,8 @@ cargo build --target x86_64-unknown-uefi -Zbuild-std=std,panic_abort #### stdio - Uses `Simple Text Input Protocol` and `Simple Text Output Protocol`. - Note: UEFI uses CRLF for new line. This means Enter key is registered as CR instead of LF. +#### args +- Uses `EFI_LOADED_IMAGE_PROTOCOL->LoadOptions` ## Example: Hello World With std The following code features a valid UEFI application, including `stdio` and `alloc` (`OsString` and `Vec`):