Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Windows parse_prefix #74220

Merged
merged 8 commits into from
Jul 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions src/libstd/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,8 @@
#![feature(atomic_mut_ptr)]
#![feature(box_syntax)]
#![feature(c_variadic)]
#![feature(cfg_accessible)]
tesuji marked this conversation as resolved.
Show resolved Hide resolved
#![feature(can_vector)]
#![feature(cfg_accessible)]
#![feature(cfg_target_has_atomic)]
#![feature(cfg_target_thread_local)]
#![feature(char_error_internals)]
Expand Down Expand Up @@ -276,8 +276,8 @@
#![feature(hashmap_internals)]
#![feature(int_error_internals)]
#![feature(int_error_matching)]
#![feature(into_future)]
#![feature(integer_atomics)]
#![feature(into_future)]
#![feature(lang_items)]
#![feature(libc)]
#![feature(link_args)]
Expand All @@ -286,6 +286,7 @@
#![feature(log_syntax)]
#![feature(maybe_uninit_ref)]
#![feature(maybe_uninit_slice)]
#![feature(min_specialization)]
#![feature(needs_panic_runtime)]
#![feature(negative_impls)]
#![feature(never_type)]
Expand All @@ -305,7 +306,7 @@
#![feature(shrink_to)]
#![feature(slice_concat_ext)]
#![feature(slice_internals)]
#![feature(min_specialization)]
#![feature(slice_strip)]
#![feature(staged_api)]
#![feature(std_internals)]
#![feature(stdsimd)]
Expand Down
139 changes: 76 additions & 63 deletions src/libstd/sys/windows/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@ use crate::ffi::OsStr;
use crate::mem;
use crate::path::Prefix;

#[cfg(test)]
mod tests;

pub const MAIN_SEP_STR: &str = "\\";
pub const MAIN_SEP: char = '\\';

// The unsafety here stems from converting between `&OsStr` and `&[u8]`
// and back. This is safe to do because (1) we only look at ASCII
// contents of the encoding and (2) new &OsStr values are produced
// only from ASCII-bounded slices of existing &OsStr values.
fn os_str_as_u8_slice(s: &OsStr) -> &[u8] {
unsafe { mem::transmute(s) }
}
Expand All @@ -19,76 +29,79 @@ pub fn is_verbatim_sep(b: u8) -> bool {
b == b'\\'
}

// In most DOS systems, it is not possible to have more than 26 drive letters.
// See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
pub fn is_valid_drive_letter(disk: u8) -> bool {
tesuji marked this conversation as resolved.
Show resolved Hide resolved
disk.is_ascii_alphabetic()
}

pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
use crate::path::Prefix::*;
unsafe {
// The unsafety here stems from converting between &OsStr and &[u8]
// and back. This is safe to do because (1) we only look at ASCII
// contents of the encoding and (2) new &OsStr values are produced
// only from ASCII-bounded slices of existing &OsStr values.
let mut path = os_str_as_u8_slice(path);
use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC};

let path = os_str_as_u8_slice(path);

if path.starts_with(br"\\") {
// \\
path = &path[2..];
if path.starts_with(br"?\") {
// \\?\
path = &path[2..];
if path.starts_with(br"UNC\") {
// \\?\UNC\server\share
path = &path[4..];
let (server, share) = match parse_two_comps(path, is_verbatim_sep) {
Some((server, share)) => {
(u8_slice_as_os_str(server), u8_slice_as_os_str(share))
}
None => (u8_slice_as_os_str(path), u8_slice_as_os_str(&[])),
};
return Some(VerbatimUNC(server, share));
} else {
// \\?\path
let idx = path.iter().position(|&b| b == b'\\');
if idx == Some(2) && path[1] == b':' {
let c = path[0];
if c.is_ascii() && (c as char).is_alphabetic() {
// \\?\C:\ path
return Some(VerbatimDisk(c.to_ascii_uppercase()));
}
// \\
if let Some(path) = path.strip_prefix(br"\\") {
// \\?\
if let Some(path) = path.strip_prefix(br"?\") {
// \\?\UNC\server\share
if let Some(path) = path.strip_prefix(br"UNC\") {
let (server, share) = match get_first_two_components(path, is_verbatim_sep) {
Some((server, share)) => unsafe {
(u8_slice_as_os_str(server), u8_slice_as_os_str(share))
},
None => (unsafe { u8_slice_as_os_str(path) }, OsStr::new("")),
};
return Some(VerbatimUNC(server, share));
} else {
// \\?\path
match path {
// \\?\C:\path
[c, b':', b'\\', ..] if is_valid_drive_letter(*c) => {
return Some(VerbatimDisk(c.to_ascii_uppercase()));
}
// \\?\cat_pics
_ => {
let idx = path.iter().position(|&b| b == b'\\').unwrap_or(path.len());
let slice = &path[..idx];
return Some(Verbatim(unsafe { u8_slice_as_os_str(slice) }));
}
let slice = &path[..idx.unwrap_or(path.len())];
return Some(Verbatim(u8_slice_as_os_str(slice)));
}
} else if path.starts_with(b".\\") {
// \\.\path
path = &path[2..];
let pos = path.iter().position(|&b| b == b'\\');
let slice = &path[..pos.unwrap_or(path.len())];
return Some(DeviceNS(u8_slice_as_os_str(slice)));
}
match parse_two_comps(path, is_sep_byte) {
Some((server, share)) if !server.is_empty() && !share.is_empty() => {
// \\server\share
return Some(UNC(u8_slice_as_os_str(server), u8_slice_as_os_str(share)));
}
_ => (),
}
} else if path.get(1) == Some(&b':') {
// C:
let c = path[0];
if c.is_ascii() && (c as char).is_alphabetic() {
return Some(Disk(c.to_ascii_uppercase()));
} else if let Some(path) = path.strip_prefix(b".\\") {
// \\.\COM42
let idx = path.iter().position(|&b| b == b'\\').unwrap_or(path.len());
let slice = &path[..idx];
return Some(DeviceNS(unsafe { u8_slice_as_os_str(slice) }));
}
match get_first_two_components(path, is_sep_byte) {
Some((server, share)) if !server.is_empty() && !share.is_empty() => {
// \\server\share
return Some(unsafe { UNC(u8_slice_as_os_str(server), u8_slice_as_os_str(share)) });
}
_ => {}
}
} else if let [c, b':', ..] = path {
// C:
if is_valid_drive_letter(*c) {
return Some(Disk(c.to_ascii_uppercase()));
}
return None;
}

fn parse_two_comps(mut path: &[u8], f: fn(u8) -> bool) -> Option<(&[u8], &[u8])> {
let first = &path[..path.iter().position(|x| f(*x))?];
path = &path[(first.len() + 1)..];
let idx = path.iter().position(|x| f(*x));
let second = &path[..idx.unwrap_or(path.len())];
Some((first, second))
}
None
}

pub const MAIN_SEP_STR: &str = "\\";
pub const MAIN_SEP: char = '\\';
/// Returns the first two path components with predicate `f`.
///
/// The two components returned will be use by caller
/// to construct `VerbatimUNC` or `UNC` Windows path prefix.
///
/// Returns [`None`] if there are no separators in path.
fn get_first_two_components(path: &[u8], f: fn(u8) -> bool) -> Option<(&[u8], &[u8])> {
let idx = path.iter().position(|&x| f(x))?;
// Panic safe
// The max `idx+1` is `path.len()` and `path[path.len()..]` is a valid index.
let (first, path) = (&path[..idx], &path[idx + 1..]);
let idx = path.iter().position(|&x| f(x)).unwrap_or(path.len());
let second = &path[..idx];
Some((first, second))
}
21 changes: 21 additions & 0 deletions src/libstd/sys/windows/path/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
use super::*;

#[test]
fn test_get_first_two_components() {
assert_eq!(
get_first_two_components(br"server\share", is_verbatim_sep),
Some((&b"server"[..], &b"share"[..])),
);

assert_eq!(
get_first_two_components(br"server\", is_verbatim_sep),
Some((&b"server"[..], &b""[..]))
);

assert_eq!(
get_first_two_components(br"\server\", is_verbatim_sep),
Some((&b""[..], &b"server"[..]))
);

assert_eq!(get_first_two_components(br"there are no separators here", is_verbatim_sep), None,);
}
5 changes: 2 additions & 3 deletions src/libstd/thread/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -641,9 +641,8 @@ where
#[stable(feature = "rust1", since = "1.0.0")]
pub fn current() -> Thread {
thread_info::current_thread().expect(
"use of std::thread::current() is not \
possible after the thread's local \
data has been destroyed",
"use of std::thread::current() is not possible \
after the thread's local data has been destroyed",
)
}

Expand Down