std: Respect formatting flags for str-like OsStr

Historically many `Display` and `Debug` implementations for `OsStr`-like abstractions have gone through `String::from_utf8_lossy`, but this was updated in #42613 to use an internal `Utf8Lossy` abstraction instead. This had the unfortunate side effect of causing a regression (#43765) in code which relied on these `fmt` trait implementations respecting the various formatting flags specified. This commit opportunistically adds back interpretation of formatting trait flags in the "common case" where where `OsStr`-like "thing" is all valid utf-8 and can delegate to the formatting implementation for `str`. This doesn't entirely solve the regression as non-utf8 paths will format differently than they did before still (in that they will not respect formatting flags), but this should solve the regression for all "real world" use cases of paths and such. The door's also still open for handling these flags in the future! Closes #43765
rust-lang · Aug 14, 2017 · 742ca0c · 742ca0c
1 parent f3cf206
commit 742ca0c
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 6 deletions.
diff --git a/src/libstd/path.rs b/src/libstd/path.rs
@@ -3953,4 +3953,10 @@ mod tests {
         assert_eq!(path, path_buf);
         assert!(path_buf.into_os_string().capacity() >= 15);
     }
+
+    #[test]
+    fn display_format_flags() {
+        assert_eq!(format!("a{:#<5}b", Path::new("").display()), "a#####b");
+        assert_eq!(format!("a{:#<5}b", Path::new("a").display()), "aa####b");
+    }
 }
diff --git a/src/libstd/sys_common/wtf8.rs b/src/libstd/sys_common/wtf8.rs
@@ -452,10 +452,14 @@ impl fmt::Display for Wtf8 {
                     pos = surrogate_pos + 3;
                 },
                 None => {
-                    formatter.write_str(unsafe {
+                    let s = unsafe {
                         str::from_utf8_unchecked(&wtf8_bytes[pos..])
-                    })?;
-                    return Ok(());
+                    };
+                    if pos == 0 {
+                        return s.fmt(formatter)
+                    } else {
+                        return formatter.write_str(s)
+                    }
                 }
             }
         }

diff --git a/src/libstd_unicode/lib.rs b/src/libstd_unicode/lib.rs
@@ -34,7 +34,6 @@
 
 #![feature(core_char_ext)]
 #![feature(str_internals)]
-#![feature(core_intrinsics)]
 #![feature(decode_utf8)]
 #![feature(fused)]
 #![feature(fn_traits)]

diff --git a/src/libstd_unicode/lossy.rs b/src/libstd_unicode/lossy.rs
@@ -12,7 +12,7 @@ use core::str as core_str;
 use core::fmt;
 use core::fmt::Write;
 use char;
-use core::intrinsics;
+use core::mem;
 
 
 /// Lossy UTF-8 string.
@@ -27,7 +27,7 @@ impl Utf8Lossy {
     }
 
     pub fn from_bytes(bytes: &[u8]) -> &Utf8Lossy {
-        unsafe { intrinsics::transmute(bytes) }
+        unsafe { mem::transmute(bytes) }
     }
 
     pub fn chunks(&self) -> Utf8LossyChunksIter {
@@ -153,7 +153,21 @@ impl<'a> Iterator for Utf8LossyChunksIter<'a> {
 
 impl fmt::Display for Utf8Lossy {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        // If we're the empty string then our iterator won't actually yield
+        // anything, so perform the formatting manually
+        if self.bytes.len() == 0 {
+            return "".fmt(f)
+        }
+
         for Utf8LossyChunk { valid, broken } in self.chunks() {
+            // If we successfully decoded the whole chunk as a valid string then
+            // we can return a direct formatting of the string which will also
+            // respect various formatting flags if possible.
+            if valid.len() == self.bytes.len() {
+                assert!(broken.is_empty());
+                return valid.fmt(f)
+            }
+
             f.write_str(valid)?;
             if !broken.is_empty() {
                 f.write_char(char::REPLACEMENT_CHARACTER)?;