Skip to content

Commit

Permalink
Rollup merge of #74220 - lzutao:windows-path-com, r=LukasKalbertodt
Browse files Browse the repository at this point in the history
Refactor Windows `parse_prefix`

These changes make me feel more readable.
See the commit messages for more details.
  • Loading branch information
Manishearth committed Jul 14, 2020
2 parents b9a0f58 + e31898b commit 063bbc4
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 69 deletions.
7 changes: 4 additions & 3 deletions src/libstd/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,8 @@
#![feature(atomic_mut_ptr)]
#![feature(box_syntax)]
#![feature(c_variadic)]
#![feature(cfg_accessible)]
#![feature(can_vector)]
#![feature(cfg_accessible)]
#![feature(cfg_target_has_atomic)]
#![feature(cfg_target_thread_local)]
#![feature(char_error_internals)]
Expand Down Expand Up @@ -276,8 +276,8 @@
#![feature(hashmap_internals)]
#![feature(int_error_internals)]
#![feature(int_error_matching)]
#![feature(into_future)]
#![feature(integer_atomics)]
#![feature(into_future)]
#![feature(lang_items)]
#![feature(libc)]
#![feature(link_args)]
Expand All @@ -286,6 +286,7 @@
#![feature(log_syntax)]
#![feature(maybe_uninit_ref)]
#![feature(maybe_uninit_slice)]
#![feature(min_specialization)]
#![feature(needs_panic_runtime)]
#![feature(negative_impls)]
#![feature(never_type)]
Expand All @@ -305,7 +306,7 @@
#![feature(shrink_to)]
#![feature(slice_concat_ext)]
#![feature(slice_internals)]
#![feature(min_specialization)]
#![feature(slice_strip)]
#![feature(staged_api)]
#![feature(std_internals)]
#![feature(stdsimd)]
Expand Down
139 changes: 76 additions & 63 deletions src/libstd/sys/windows/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@ use crate::ffi::OsStr;
use crate::mem;
use crate::path::Prefix;

#[cfg(test)]
mod tests;

pub const MAIN_SEP_STR: &str = "\\";
pub const MAIN_SEP: char = '\\';

// The unsafety here stems from converting between `&OsStr` and `&[u8]`
// and back. This is safe to do because (1) we only look at ASCII
// contents of the encoding and (2) new &OsStr values are produced
// only from ASCII-bounded slices of existing &OsStr values.
fn os_str_as_u8_slice(s: &OsStr) -> &[u8] {
unsafe { mem::transmute(s) }
}
Expand All @@ -19,76 +29,79 @@ pub fn is_verbatim_sep(b: u8) -> bool {
b == b'\\'
}

// In most DOS systems, it is not possible to have more than 26 drive letters.
// See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
pub fn is_valid_drive_letter(disk: u8) -> bool {
disk.is_ascii_alphabetic()
}

pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
use crate::path::Prefix::*;
unsafe {
// The unsafety here stems from converting between &OsStr and &[u8]
// and back. This is safe to do because (1) we only look at ASCII
// contents of the encoding and (2) new &OsStr values are produced
// only from ASCII-bounded slices of existing &OsStr values.
let mut path = os_str_as_u8_slice(path);
use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC};

let path = os_str_as_u8_slice(path);

if path.starts_with(br"\\") {
// \\
path = &path[2..];
if path.starts_with(br"?\") {
// \\?\
path = &path[2..];
if path.starts_with(br"UNC\") {
// \\?\UNC\server\share
path = &path[4..];
let (server, share) = match parse_two_comps(path, is_verbatim_sep) {
Some((server, share)) => {
(u8_slice_as_os_str(server), u8_slice_as_os_str(share))
}
None => (u8_slice_as_os_str(path), u8_slice_as_os_str(&[])),
};
return Some(VerbatimUNC(server, share));
} else {
// \\?\path
let idx = path.iter().position(|&b| b == b'\\');
if idx == Some(2) && path[1] == b':' {
let c = path[0];
if c.is_ascii() && (c as char).is_alphabetic() {
// \\?\C:\ path
return Some(VerbatimDisk(c.to_ascii_uppercase()));
}
// \\
if let Some(path) = path.strip_prefix(br"\\") {
// \\?\
if let Some(path) = path.strip_prefix(br"?\") {
// \\?\UNC\server\share
if let Some(path) = path.strip_prefix(br"UNC\") {
let (server, share) = match get_first_two_components(path, is_verbatim_sep) {
Some((server, share)) => unsafe {
(u8_slice_as_os_str(server), u8_slice_as_os_str(share))
},
None => (unsafe { u8_slice_as_os_str(path) }, OsStr::new("")),
};
return Some(VerbatimUNC(server, share));
} else {
// \\?\path
match path {
// \\?\C:\path
[c, b':', b'\\', ..] if is_valid_drive_letter(*c) => {
return Some(VerbatimDisk(c.to_ascii_uppercase()));
}
// \\?\cat_pics
_ => {
let idx = path.iter().position(|&b| b == b'\\').unwrap_or(path.len());
let slice = &path[..idx];
return Some(Verbatim(unsafe { u8_slice_as_os_str(slice) }));
}
let slice = &path[..idx.unwrap_or(path.len())];
return Some(Verbatim(u8_slice_as_os_str(slice)));
}
} else if path.starts_with(b".\\") {
// \\.\path
path = &path[2..];
let pos = path.iter().position(|&b| b == b'\\');
let slice = &path[..pos.unwrap_or(path.len())];
return Some(DeviceNS(u8_slice_as_os_str(slice)));
}
match parse_two_comps(path, is_sep_byte) {
Some((server, share)) if !server.is_empty() && !share.is_empty() => {
// \\server\share
return Some(UNC(u8_slice_as_os_str(server), u8_slice_as_os_str(share)));
}
_ => (),
}
} else if path.get(1) == Some(&b':') {
// C:
let c = path[0];
if c.is_ascii() && (c as char).is_alphabetic() {
return Some(Disk(c.to_ascii_uppercase()));
} else if let Some(path) = path.strip_prefix(b".\\") {
// \\.\COM42
let idx = path.iter().position(|&b| b == b'\\').unwrap_or(path.len());
let slice = &path[..idx];
return Some(DeviceNS(unsafe { u8_slice_as_os_str(slice) }));
}
match get_first_two_components(path, is_sep_byte) {
Some((server, share)) if !server.is_empty() && !share.is_empty() => {
// \\server\share
return Some(unsafe { UNC(u8_slice_as_os_str(server), u8_slice_as_os_str(share)) });
}
_ => {}
}
} else if let [c, b':', ..] = path {
// C:
if is_valid_drive_letter(*c) {
return Some(Disk(c.to_ascii_uppercase()));
}
return None;
}

fn parse_two_comps(mut path: &[u8], f: fn(u8) -> bool) -> Option<(&[u8], &[u8])> {
let first = &path[..path.iter().position(|x| f(*x))?];
path = &path[(first.len() + 1)..];
let idx = path.iter().position(|x| f(*x));
let second = &path[..idx.unwrap_or(path.len())];
Some((first, second))
}
None
}

pub const MAIN_SEP_STR: &str = "\\";
pub const MAIN_SEP: char = '\\';
/// Returns the first two path components with predicate `f`.
///
/// The two components returned will be use by caller
/// to construct `VerbatimUNC` or `UNC` Windows path prefix.
///
/// Returns [`None`] if there are no separators in path.
fn get_first_two_components(path: &[u8], f: fn(u8) -> bool) -> Option<(&[u8], &[u8])> {
let idx = path.iter().position(|&x| f(x))?;
// Panic safe
// The max `idx+1` is `path.len()` and `path[path.len()..]` is a valid index.
let (first, path) = (&path[..idx], &path[idx + 1..]);
let idx = path.iter().position(|&x| f(x)).unwrap_or(path.len());
let second = &path[..idx];
Some((first, second))
}
21 changes: 21 additions & 0 deletions src/libstd/sys/windows/path/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
use super::*;

#[test]
fn test_get_first_two_components() {
assert_eq!(
get_first_two_components(br"server\share", is_verbatim_sep),
Some((&b"server"[..], &b"share"[..])),
);

assert_eq!(
get_first_two_components(br"server\", is_verbatim_sep),
Some((&b"server"[..], &b""[..]))
);

assert_eq!(
get_first_two_components(br"\server\", is_verbatim_sep),
Some((&b""[..], &b"server"[..]))
);

assert_eq!(get_first_two_components(br"there are no separators here", is_verbatim_sep), None,);
}
5 changes: 2 additions & 3 deletions src/libstd/thread/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -641,9 +641,8 @@ where
#[stable(feature = "rust1", since = "1.0.0")]
pub fn current() -> Thread {
thread_info::current_thread().expect(
"use of std::thread::current() is not \
possible after the thread's local \
data has been destroyed",
"use of std::thread::current() is not possible \
after the thread's local data has been destroyed",
)
}

Expand Down

0 comments on commit 063bbc4

Please sign in to comment.