From e12aeb39bc4221421890011006c5ae23154cc695 Mon Sep 17 00:00:00 2001 From: Phil Ruffwind Date: Sun, 4 May 2014 03:11:42 -0400 Subject: [PATCH 1/4] Use CreateProcessW instead of CreateProcessA Changed libnative to use CreateProcessW instead of CreateProcessA. In addition, the lpEnvironment parameter now uses Unicode. Added a helper function os::win32::as_mut_utf16_p, which does basically the same thing as os::win32::as_utf16_p except the pointer is mutable. --- src/liblibc/lib.rs | 11 ++++++----- src/libnative/io/process.rs | 17 ++++++++++------- src/libstd/os.rs | 6 +++++- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/liblibc/lib.rs b/src/liblibc/lib.rs index 731c46d6f4d50..cf80d5a06d5f4 100644 --- a/src/liblibc/lib.rs +++ b/src/liblibc/lib.rs @@ -208,7 +208,7 @@ pub use funcs::bsd43::{shutdown}; #[cfg(windows)] pub use consts::os::extra::{TRUE, FALSE, INFINITE}; #[cfg(windows)] pub use consts::os::extra::{PROCESS_TERMINATE, PROCESS_QUERY_INFORMATION}; #[cfg(windows)] pub use consts::os::extra::{STILL_ACTIVE, DETACHED_PROCESS}; -#[cfg(windows)] pub use consts::os::extra::{CREATE_NEW_PROCESS_GROUP}; +#[cfg(windows)] pub use consts::os::extra::{CREATE_NEW_PROCESS_GROUP, CREATE_UNICODE_ENVIRONMENT}; #[cfg(windows)] pub use consts::os::extra::{FILE_BEGIN, FILE_END, FILE_CURRENT}; #[cfg(windows)] pub use consts::os::extra::{FILE_GENERIC_READ, FILE_GENERIC_WRITE}; #[cfg(windows)] pub use consts::os::extra::{FILE_SHARE_READ, FILE_SHARE_WRITE, FILE_SHARE_DELETE}; @@ -1937,6 +1937,7 @@ pub mod consts { pub static DETACHED_PROCESS: DWORD = 0x00000008; pub static CREATE_NEW_PROCESS_GROUP: DWORD = 0x00000200; + pub static CREATE_UNICODE_ENVIRONMENT: DWORD = 0x00000400; pub static PIPE_ACCESS_DUPLEX: DWORD = 0x00000003; pub static PIPE_ACCESS_INBOUND: DWORD = 0x00000001; @@ -4193,7 +4194,7 @@ pub mod funcs { pub mod kernel32 { use types::os::arch::c95::{c_uint}; use types::os::arch::extra::{BOOL, DWORD, SIZE_T, HMODULE, - LPCWSTR, LPWSTR, LPCSTR, LPSTR, + LPCWSTR, LPWSTR, LPCH, LPDWORD, LPVOID, LPCVOID, LPOVERLAPPED, LPSECURITY_ATTRIBUTES, @@ -4251,8 +4252,8 @@ pub mod funcs { dwProcessId: DWORD) -> HANDLE; pub fn GetCurrentProcess() -> HANDLE; - pub fn CreateProcessA(lpApplicationName: LPCSTR, - lpCommandLine: LPSTR, + pub fn CreateProcessW(lpApplicationName: LPCWSTR, + lpCommandLine: LPWSTR, lpProcessAttributes: LPSECURITY_ATTRIBUTES, lpThreadAttributes: @@ -4260,7 +4261,7 @@ pub mod funcs { bInheritHandles: BOOL, dwCreationFlags: DWORD, lpEnvironment: LPVOID, - lpCurrentDirectory: LPCSTR, + lpCurrentDirectory: LPCWSTR, lpStartupInfo: LPSTARTUPINFO, lpProcessInformation: LPPROCESS_INFORMATION) diff --git a/src/libnative/io/process.rs b/src/libnative/io/process.rs index 81c76bba7a0eb..dbb191911e741 100644 --- a/src/libnative/io/process.rs +++ b/src/libnative/io/process.rs @@ -258,7 +258,7 @@ fn spawn_process_os(config: p::ProcessConfig, GetCurrentProcess, DuplicateHandle, CloseHandle, - CreateProcessA + CreateProcessW }; use libc::funcs::extra::msvcrt::get_osfhandle; @@ -318,15 +318,15 @@ fn spawn_process_os(config: p::ProcessConfig, let mut create_err = None; // stolen from the libuv code. - let mut flags = 0; + let mut flags = libc::CREATE_UNICODE_ENVIRONMENT; if config.detach { flags |= libc::DETACHED_PROCESS | libc::CREATE_NEW_PROCESS_GROUP; } with_envp(env, |envp| { with_dirp(dir, |dirp| { - cmd.with_c_str(|cmdp| { - let created = CreateProcessA(ptr::null(), mem::transmute(cmdp), + os::win32::as_mut_utf16_p(cmd, |cmdp| { + let created = CreateProcessW(ptr::null(), cmdp, ptr::mut_null(), ptr::mut_null(), TRUE, flags, envp, dirp, &mut si, &mut pi); @@ -691,7 +691,7 @@ fn with_envp(env: Option<~[(~str, ~str)]>, cb: |*mut c_void| -> T) -> T { for pair in env.iter() { let kv = format!("{}={}", *pair.ref0(), *pair.ref1()); - blk.push_all(kv.as_bytes()); + blk.push_all(kv.to_utf16().as_slice()); blk.push(0); } @@ -704,9 +704,12 @@ fn with_envp(env: Option<~[(~str, ~str)]>, cb: |*mut c_void| -> T) -> T { } #[cfg(windows)] -fn with_dirp(d: Option<&Path>, cb: |*libc::c_char| -> T) -> T { +fn with_dirp(d: Option<&Path>, cb: |*u16| -> T) -> T { match d { - Some(dir) => dir.with_c_str(|buf| cb(buf)), + Some(dir) => match dir.as_str() { + Some(dir_str) => os::win32::as_utf16_p(dir_str, cb), + None => cb(ptr::null()) + }, None => cb(ptr::null()) } } diff --git a/src/libstd/os.rs b/src/libstd/os.rs index f7324dc08b6d5..44c338a309180 100644 --- a/src/libstd/os.rs +++ b/src/libstd/os.rs @@ -141,10 +141,14 @@ pub mod win32 { } pub fn as_utf16_p(s: &str, f: |*u16| -> T) -> T { + as_mut_utf16_p(s, |t| { f(t as *u16) }) + } + + pub fn as_mut_utf16_p(s: &str, f: |*mut u16| -> T) -> T { let mut t = s.to_utf16(); // Null terminate before passing on. t.push(0u16); - f(t.as_ptr()) + f(t.as_mut_ptr()) } } From b6cce7ea5497977291f4ae57937360978e764e41 Mon Sep 17 00:00:00 2001 From: Phil Ruffwind Date: Sun, 4 May 2014 17:27:42 -0400 Subject: [PATCH 2/4] Fix make_command_line to handle Unicode correctly Previously, make_command_line iterates over each u8 in the string and then appends them as chars, so any non-ASCII string will get horribly mangled by this function. This fix should allow Unicode arguments to work correctly in native::io::process::spawn. --- src/libnative/io/process.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/libnative/io/process.rs b/src/libnative/io/process.rs index dbb191911e741..be67d0a3fb45d 100644 --- a/src/libnative/io/process.rs +++ b/src/libnative/io/process.rs @@ -409,16 +409,17 @@ fn make_command_line(prog: &str, args: &[~str]) -> ~str { if quote { cmd.push_char('"'); } - for i in range(0u, arg.len()) { - append_char_at(cmd, arg, i); + let argvec: Vec = arg.chars().collect(); + for i in range(0u, argvec.len()) { + append_char_at(cmd, &argvec, i); } if quote { cmd.push_char('"'); } } - fn append_char_at(cmd: &mut StrBuf, arg: &str, i: uint) { - match arg[i] as char { + fn append_char_at(cmd: &mut StrBuf, arg: &Vec, i: uint) { + match *arg.get(i) { '"' => { // Escape quotes. cmd.push_str("\\\""); @@ -438,11 +439,11 @@ fn make_command_line(prog: &str, args: &[~str]) -> ~str { } } - fn backslash_run_ends_in_quote(s: &str, mut i: uint) -> bool { - while i < s.len() && s[i] as char == '\\' { + fn backslash_run_ends_in_quote(s: &Vec, mut i: uint) -> bool { + while i < s.len() && *s.get(i) == '\\' { i += 1; } - return i < s.len() && s[i] as char == '"'; + return i < s.len() && *s.get(i) == '"'; } } From d9eca56c065d27498a0e5fbd20ad114063c96281 Mon Sep 17 00:00:00 2001 From: Phil Ruffwind Date: Sun, 4 May 2014 21:14:55 -0400 Subject: [PATCH 3/4] Use Get/FreeEnvironmentStringsW instead of Get/FreeEnvironmentStringsA Changed libstd to use Get/FreeEnvironmentStringsW instead of Get/FreeEnvironmentStringsA to support Unicode environment variables. --- src/liblibc/lib.rs | 6 +++--- src/libstd/os.rs | 40 +++++++++++++++++++++++++++++++--------- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/liblibc/lib.rs b/src/liblibc/lib.rs index cf80d5a06d5f4..a35ebb06437d9 100644 --- a/src/liblibc/lib.rs +++ b/src/liblibc/lib.rs @@ -4195,7 +4195,7 @@ pub mod funcs { use types::os::arch::c95::{c_uint}; use types::os::arch::extra::{BOOL, DWORD, SIZE_T, HMODULE, LPCWSTR, LPWSTR, - LPCH, LPDWORD, LPVOID, + LPWCH, LPDWORD, LPVOID, LPCVOID, LPOVERLAPPED, LPSECURITY_ATTRIBUTES, LPSTARTUPINFO, @@ -4212,8 +4212,8 @@ pub mod funcs { -> DWORD; pub fn SetEnvironmentVariableW(n: LPCWSTR, v: LPCWSTR) -> BOOL; - pub fn GetEnvironmentStringsA() -> LPCH; - pub fn FreeEnvironmentStringsA(env_ptr: LPCH) -> BOOL; + pub fn GetEnvironmentStringsW() -> LPWCH; + pub fn FreeEnvironmentStringsW(env_ptr: LPWCH) -> BOOL; pub fn GetModuleFileNameW(hModule: HMODULE, lpFilename: LPWSTR, nSize: DWORD) diff --git a/src/libstd/os.rs b/src/libstd/os.rs index 44c338a309180..0a920a275acc4 100644 --- a/src/libstd/os.rs +++ b/src/libstd/os.rs @@ -31,7 +31,7 @@ use clone::Clone; use container::Container; use libc; -use libc::{c_char, c_void, c_int}; +use libc::{c_void, c_int}; use option::{Some, None, Option}; use os; use ops::Drop; @@ -49,6 +49,8 @@ use vec::Vec; #[cfg(unix)] use c_str::ToCStr; +#[cfg(unix)] +use libc::c_char; #[cfg(windows)] use str::OwnedStr; @@ -186,22 +188,42 @@ pub fn env_as_bytes() -> Vec<(~[u8],~[u8])> { unsafe { #[cfg(windows)] unsafe fn get_env_pairs() -> Vec<~[u8]> { - use c_str; + use slice::raw; use libc::funcs::extra::kernel32::{ - GetEnvironmentStringsA, - FreeEnvironmentStringsA + GetEnvironmentStringsW, + FreeEnvironmentStringsW }; - let ch = GetEnvironmentStringsA(); + let ch = GetEnvironmentStringsW(); if ch as uint == 0 { fail!("os::env() failure getting env string from OS: {}", os::last_os_error()); } + // Here, we lossily decode the string as UTF16. + // + // The docs suggest that the result should be in Unicode, but + // Windows doesn't guarantee it's actually UTF16 -- it doesn't + // validate the environment string passed to CreateProcess nor + // SetEnvironmentVariable. Yet, it's unlikely that returning a + // raw u16 buffer would be of practical use since the result would + // be inherently platform-dependent and introduce additional + // complexity to this code. + // + // Using the non-Unicode version of GetEnvironmentStrings is even + // worse since the result is in an OEM code page. Characters that + // can't be encoded in the code page would be turned into question + // marks. let mut result = Vec::new(); - c_str::from_c_multistring(ch as *c_char, None, |cstr| { - result.push(cstr.as_bytes_no_nul().to_owned()); - }); - FreeEnvironmentStringsA(ch); + let mut i = 0; + while *ch.offset(i) != 0 { + let p = &*ch.offset(i); + let len = ptr::position(p, |c| *c == 0); + raw::buf_as_slice(p, len, |s| { + result.push(str::from_utf16_lossy(s).into_bytes()); + }); + i += len as int + 1; + } + FreeEnvironmentStringsW(ch); result } #[cfg(unix)] From b8e3f3a41715a7de7e32eb32456aa25132c8ff46 Mon Sep 17 00:00:00 2001 From: Phil Ruffwind Date: Wed, 7 May 2014 19:26:16 -0400 Subject: [PATCH 4/4] Test Unicode support of process spawning Added a run-pass test to ensure that processes can be correctly spawned using non-ASCII arguments, working directory, and environment variables. It also tests Unicode support of os::env_as_bytes. An additional assertion was added to the test for make_command_line to verify it handles Unicode correctly. --- src/libnative/io/process.rs | 4 + .../process-spawn-with-unicode-params.rs | 86 +++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 src/test/run-pass/process-spawn-with-unicode-params.rs diff --git a/src/libnative/io/process.rs b/src/libnative/io/process.rs index be67d0a3fb45d..14ea1f12a5ca0 100644 --- a/src/libnative/io/process.rs +++ b/src/libnative/io/process.rs @@ -864,5 +864,9 @@ mod tests { make_command_line("echo", ["a b c".to_owned()]), "echo \"a b c\"".to_owned() ); + assert_eq!( + make_command_line("\u03c0\u042f\u97f3\u00e6\u221e", []), + "\u03c0\u042f\u97f3\u00e6\u221e".to_owned() + ); } } diff --git a/src/test/run-pass/process-spawn-with-unicode-params.rs b/src/test/run-pass/process-spawn-with-unicode-params.rs new file mode 100644 index 0000000000000..f9839ed39e752 --- /dev/null +++ b/src/test/run-pass/process-spawn-with-unicode-params.rs @@ -0,0 +1,86 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// no-prefer-dynamic + +// The test copies itself into a subdirectory with a non-ASCII name and then +// runs it as a child process within the subdirectory. The parent process +// also adds an environment variable and an argument, both containing +// non-ASCII characters. The child process ensures all the strings are +// intact. + +extern crate native; + +use std::io; +use std::io::fs; +use std::io::process::Process; +use std::io::process::ProcessConfig; +use std::os; +use std::path::Path; + +fn main() { + let my_args = os::args(); + let my_cwd = os::getcwd(); + let my_env = os::env(); + let my_path = Path::new(os::self_exe_name().unwrap()); + let my_dir = my_path.dir_path(); + let my_ext = my_path.extension_str().unwrap_or(""); + + // some non-ASCII characters + let blah = "\u03c0\u042f\u97f3\u00e6\u221e"; + + let child_name = "child"; + let child_dir = "process-spawn-with-unicode-params-" + blah; + + // parameters sent to child / expected to be received from parent + let arg = blah; + let cwd = my_dir.join(Path::new(child_dir.clone())); + let env = ("RUST_TEST_PROC_SPAWN_UNICODE".to_owned(), blah.to_owned()); + + // am I the parent or the child? + if my_args.len() == 1 { // parent + + let child_filestem = Path::new(child_name); + let child_filename = child_filestem.with_extension(my_ext); + let child_path = cwd.join(child_filename.clone()); + + // make a separate directory for the child + drop(fs::mkdir(&cwd, io::UserRWX).is_ok()); + assert!(fs::copy(&my_path, &child_path).is_ok()); + + // run child + let p = Process::configure(ProcessConfig { + program: child_path.as_str().unwrap(), + args: [arg.to_owned()], + cwd: Some(&cwd), + env: Some(my_env.append_one(env).as_slice()), + .. ProcessConfig::new() + }).unwrap().wait_with_output(); + + // display the output + assert!(io::stdout().write(p.output.as_slice()).is_ok()); + assert!(io::stderr().write(p.error.as_slice()).is_ok()); + + // make sure the child succeeded + assert!(p.status.success()); + + } else { // child + + // check working directory (don't try to compare with `cwd` here!) + assert!(my_cwd.ends_with_path(&Path::new(child_dir))); + + // check arguments + assert_eq!(my_args.get(1).as_slice(), arg); + + // check environment variable + assert!(my_env.contains(&env)); + + }; +}