Skip to content

Commit

Permalink
Auto merge of rust-lang#11824 - kylematsuda:windows-config-env-fix, r…
Browse files Browse the repository at this point in the history
…=ehuss

Handle case mismatches when looking up env vars in the Config snapshot

### What does this PR try to resolve?

Fixes rust-lang#11814.

Windows environment variables are case-insensitive, which causes problems when looking them up in the `Config` env snapshot.

This PR adds another member (`case_insensitive_env`) in `Config` that maps upper-cased keys to their original values in the env (for example, `"PATH" => "Path"`). If lookup in `self.env` fails, this PR converts the key to upper case and looks it up in `self.case_insensitive_env` to obtain the correct key name if it exists (on Windows only).

### How should we test and review this PR?

Please see the new tests in `testsuite/config.rs` and `testsuite/cargo_command.rs`.

### Additional information

Currently, this uses `str::to_uppercase` to uppercase the keys. This requires key to be valid UTF-8, and may disagree with how the OS uppercases things (see the link in [this comment](rust-lang#11814 (comment)) for details).
  • Loading branch information
bors authored and weihanglo committed Mar 17, 2023
1 parent 4a4d795 commit a02ca0d
Show file tree
Hide file tree
Showing 5 changed files with 257 additions and 48 deletions.
2 changes: 1 addition & 1 deletion src/cargo/util/config/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ impl<'config> ValueDeserializer<'config> {
let definition = {
let env = de.key.as_env_key();
let env_def = Definition::Environment(env.to_string());
match (de.config.env_has_key(env), de.config.get_cv(&de.key)?) {
match (de.config.env.contains_key(env), de.config.get_cv(&de.key)?) {
(true, Some(cv)) => {
// Both, pick highest priority.
if env_def.is_higher_priority(cv.definition()) {
Expand Down
184 changes: 184 additions & 0 deletions src/cargo/util/config/environment.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
//! Encapsulates snapshotting of environment variables.

use std::collections::HashMap;
use std::ffi::{OsStr, OsString};

use crate::util::errors::CargoResult;
use anyhow::{anyhow, bail};

/// Generate `case_insensitive_env` and `normalized_env` from the `env`.
fn make_case_insensitive_and_normalized_env(
env: &HashMap<OsString, OsString>,
) -> (HashMap<String, String>, HashMap<String, String>) {
let case_insensitive_env: HashMap<_, _> = env
.keys()
.filter_map(|k| k.to_str())
.map(|k| (k.to_uppercase(), k.to_owned()))
.collect();
let normalized_env = env
.iter()
// Only keep entries where both the key and value are valid UTF-8,
// since the config env vars only support UTF-8 keys and values.
// Otherwise, the normalized map warning could incorrectly warn about entries that can't be
// read by the config system.
// Please see the docs for `Env` for more context.
.filter_map(|(k, v)| Some((k.to_str()?, v.to_str()?)))
.map(|(k, _)| (k.to_uppercase().replace("-", "_"), k.to_owned()))
.collect();
(case_insensitive_env, normalized_env)
}

/// A snapshot of the environment variables available to [`super::Config`].
///
/// Currently, the [`Config`](super::Config) supports lookup of environment variables
/// through two different means:
///
/// - [`Config::get_env`](super::Config::get_env)
/// and [`Config::get_env_os`](super::Config::get_env_os)
/// for process environment variables (similar to [`std::env::var`] and [`std::env::var_os`]),
/// - Typed Config Value API via [`Config::get`](super::Config::get).
/// This is only available for `CARGO_` prefixed environment keys.
///
/// This type contains the env var snapshot and helper methods for both APIs.
#[derive(Debug)]
pub struct Env {
/// A snapshot of the process's environment variables.
env: HashMap<OsString, OsString>,
/// Used in the typed Config value API for warning messages when config keys are
/// given in the wrong format.
///
/// Maps from "normalized" (upper case and with "-" replaced by "_") env keys
/// to the actual keys in the environment.
/// The normalized format is the one expected by Cargo.
///
/// This only holds env keys that are valid UTF-8, since [`super::ConfigKey`] only supports UTF-8 keys.
/// In addition, this only holds env keys whose value in the environment is also valid UTF-8,
/// since the typed Config value API only supports UTF-8 values.
normalized_env: HashMap<String, String>,
/// Used to implement `get_env` and `get_env_os` on Windows, where env keys are case-insensitive.
///
/// Maps from uppercased env keys to the actual key in the environment.
/// For example, this might hold a pair `("PATH", "Path")`.
/// Currently only supports UTF-8 keys and values.
case_insensitive_env: HashMap<String, String>,
}

impl Env {
/// Create a new `Env` from process's environment variables.
pub fn new() -> Self {
let env: HashMap<_, _> = std::env::vars_os().collect();
let (case_insensitive_env, normalized_env) = make_case_insensitive_and_normalized_env(&env);
Self {
env,
case_insensitive_env,
normalized_env,
}
}

/// Set the env directly from a `HashMap`.
/// This should be used for debugging purposes only.
pub(super) fn from_map(env: HashMap<String, String>) -> Self {
let env = env.into_iter().map(|(k, v)| (k.into(), v.into())).collect();
let (case_insensitive_env, normalized_env) = make_case_insensitive_and_normalized_env(&env);
Self {
env,
case_insensitive_env,
normalized_env,
}
}

/// Returns all environment variables as an iterator,
/// keeping only entries where both the key and value are valid UTF-8.
pub fn iter_str(&self) -> impl Iterator<Item = (&str, &str)> {
self.env
.iter()
.filter_map(|(k, v)| Some((k.to_str()?, v.to_str()?)))
}

/// Returns all environment variable keys, filtering out keys that are not valid UTF-8.
pub fn keys_str(&self) -> impl Iterator<Item = &str> {
self.env.keys().filter_map(|k| k.to_str())
}

/// Get the value of environment variable `key` through the `Config` snapshot.
///
/// This can be used similarly to `std::env::var_os`.
/// On Windows, we check for case mismatch since environment keys are case-insensitive.
pub fn get_env_os(&self, key: impl AsRef<OsStr>) -> Option<OsString> {
match self.env.get(key.as_ref()) {
Some(s) => Some(s.clone()),
None => {
if cfg!(windows) {
self.get_env_case_insensitive(key).cloned()
} else {
None
}
}
}
}

/// Get the value of environment variable `key` through the `self.env` snapshot.
///
/// This can be used similarly to `std::env::var`.
/// On Windows, we check for case mismatch since environment keys are case-insensitive.
pub fn get_env(&self, key: impl AsRef<OsStr>) -> CargoResult<String> {
let key = key.as_ref();
let s = self
.get_env_os(key)
.ok_or_else(|| anyhow!("{key:?} could not be found in the environment snapshot"))?;

match s.to_str() {
Some(s) => Ok(s.to_owned()),
None => bail!("environment variable value is not valid unicode: {s:?}"),
}
}

/// Performs a case-insensitive lookup of `key` in the environment.
///
/// This is relevant on Windows, where environment variables are case-insensitive.
/// Note that this only works on keys that are valid UTF-8 and it uses Unicode uppercase,
/// which may differ from the OS's notion of uppercase.
fn get_env_case_insensitive(&self, key: impl AsRef<OsStr>) -> Option<&OsString> {
let upper_case_key = key.as_ref().to_str()?.to_uppercase();
let env_key: &OsStr = self.case_insensitive_env.get(&upper_case_key)?.as_ref();
self.env.get(env_key)
}

/// Get the value of environment variable `key` as a `&str`.
/// Returns `None` if `key` is not in `self.env` or if the value is not valid UTF-8.
///
/// This is intended for use in private methods of `Config`,
/// and does not check for env key case mismatch.
///
/// This is case-sensitive on Windows (even though environment keys on Windows are usually
/// case-insensitive) due to an unintended regression in 1.28 (via #5552).
/// This should only affect keys used for cargo's config-system env variables (`CARGO_`
/// prefixed ones), which are currently all uppercase.
/// We may want to consider rectifying it if users report issues.
/// One thing that adds a wrinkle here is the unstable advanced-env option that *requires*
/// case-sensitive keys.
///
/// Do not use this for any other purposes.
/// Use [`Env::get_env_os`] or [`Env::get_env`] instead, which properly handle case
/// insensitivity on Windows.
pub(super) fn get_str(&self, key: impl AsRef<OsStr>) -> Option<&str> {
self.env.get(key.as_ref()).and_then(|s| s.to_str())
}

/// Check if the environment contains `key`.
///
/// This is intended for use in private methods of `Config`,
/// and does not check for env key case mismatch.
/// See the docstring of [`Env::get_str`] for more context.
pub(super) fn contains_key(&self, key: impl AsRef<OsStr>) -> bool {
self.env.contains_key(key.as_ref())
}

/// Looks up a normalized `key` in the `normalized_env`.
/// Returns the corresponding (non-normalized) env key if it exists, else `None`.
///
/// This is used by [`super::Config::check_environment_key_case_mismatch`].
pub(super) fn get_normalized(&self, key: &str) -> Option<&str> {
self.normalized_env.get(key).map(|s| s.as_ref())
}
}
68 changes: 21 additions & 47 deletions src/cargo/util/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ pub use path::{ConfigRelativePath, PathAndArgs};
mod target;
pub use target::{TargetCfgConfig, TargetConfig};

mod environment;
use environment::Env;

// Helper macro for creating typed access methods.
macro_rules! get_value_typed {
($name:ident, $ty:ty, $variant:ident, $expected:expr) => {
Expand Down Expand Up @@ -199,10 +202,8 @@ pub struct Config {
creation_time: Instant,
/// Target Directory via resolved Cli parameter
target_dir: Option<Filesystem>,
/// Environment variables, separated to assist testing.
env: HashMap<OsString, OsString>,
/// Environment variables, converted to uppercase to check for case mismatch
upper_case_env: HashMap<String, String>,
/// Environment variable snapshot.
env: Env,
/// Tracks which sources have been updated to avoid multiple updates.
updated_sources: LazyCell<RefCell<HashSet<SourceId>>>,
/// Cache of credentials from configuration or credential providers.
Expand Down Expand Up @@ -260,16 +261,10 @@ impl Config {
}
});

let env: HashMap<_, _> = env::vars_os().collect();

let upper_case_env = env
.iter()
.filter_map(|(k, _)| k.to_str()) // Only keep valid UTF-8
.map(|k| (k.to_uppercase().replace("-", "_"), k.to_owned()))
.collect();
let env = Env::new();

let cache_key: &OsStr = "CARGO_CACHE_RUSTC_INFO".as_ref();
let cache_rustc_info = match env.get(cache_key) {
let cache_key = "CARGO_CACHE_RUSTC_INFO";
let cache_rustc_info = match env.get_env_os(cache_key) {
Some(cache) => cache != "0",
_ => true,
};
Expand Down Expand Up @@ -303,7 +298,6 @@ impl Config {
creation_time: Instant::now(),
target_dir: None,
env,
upper_case_env,
updated_sources: LazyCell::new(),
credential_cache: LazyCell::new(),
package_cache_lock: RefCell::new(None),
Expand Down Expand Up @@ -658,7 +652,7 @@ impl Config {
// Root table can't have env value.
return Ok(cv);
}
let env = self.get_env_str(key.as_env_key());
let env = self.env.get_str(key.as_env_key());
let env_def = Definition::Environment(key.as_env_key().to_string());
let use_env = match (&cv, env) {
// Lists are always merged.
Expand Down Expand Up @@ -729,28 +723,26 @@ impl Config {

/// Helper primarily for testing.
pub fn set_env(&mut self, env: HashMap<String, String>) {
self.env = env.into_iter().map(|(k, v)| (k.into(), v.into())).collect();
self.env = Env::from_map(env);
}

/// Returns all environment variables as an iterator, filtering out entries
/// that are not valid UTF-8.
/// Returns all environment variables as an iterator,
/// keeping only entries where both the key and value are valid UTF-8.
pub(crate) fn env(&self) -> impl Iterator<Item = (&str, &str)> {
self.env
.iter()
.filter_map(|(k, v)| Some((k.to_str()?, v.to_str()?)))
self.env.iter_str()
}

/// Returns all environment variable keys, filtering out entries that are not valid UTF-8.
/// Returns all environment variable keys, filtering out keys that are not valid UTF-8.
fn env_keys(&self) -> impl Iterator<Item = &str> {
self.env.iter().filter_map(|(k, _)| k.to_str())
self.env.keys_str()
}

fn get_config_env<T>(&self, key: &ConfigKey) -> Result<OptValue<T>, ConfigError>
where
T: FromStr,
<T as FromStr>::Err: fmt::Display,
{
match self.get_env_str(key.as_env_key()) {
match self.env.get_str(key.as_env_key()) {
Some(value) => {
let definition = Definition::Environment(key.as_env_key().to_string());
Ok(Some(Value {
Expand All @@ -771,39 +763,21 @@ impl Config {
///
/// This can be used similarly to `std::env::var`.
pub fn get_env(&self, key: impl AsRef<OsStr>) -> CargoResult<String> {
let key = key.as_ref();
let s = match self.env.get(key) {
Some(s) => s,
None => bail!("{key:?} could not be found in the environment snapshot",),
};
match s.to_str() {
Some(s) => Ok(s.to_owned()),
None => bail!("environment variable value is not valid unicode: {s:?}"),
}
self.env.get_env(key)
}

/// Get the value of environment variable `key` through the `Config` snapshot.
///
/// This can be used similarly to `std::env::var_os`.
pub fn get_env_os(&self, key: impl AsRef<OsStr>) -> Option<OsString> {
self.env.get(key.as_ref()).cloned()
}

/// Get the value of environment variable `key`.
/// Returns `None` if `key` is not in `self.env` or if the value is not valid UTF-8.
fn get_env_str(&self, key: impl AsRef<OsStr>) -> Option<&str> {
self.env.get(key.as_ref()).and_then(|s| s.to_str())
}

fn env_has_key(&self, key: impl AsRef<OsStr>) -> bool {
self.env.contains_key(key.as_ref())
self.env.get_env_os(key)
}

/// Check if the [`Config`] contains a given [`ConfigKey`].
///
/// See `ConfigMapAccess` for a description of `env_prefix_ok`.
fn has_key(&self, key: &ConfigKey, env_prefix_ok: bool) -> CargoResult<bool> {
if self.env_has_key(key.as_env_key()) {
if self.env.contains_key(key.as_env_key()) {
return Ok(true);
}
if env_prefix_ok {
Expand All @@ -821,7 +795,7 @@ impl Config {
}

fn check_environment_key_case_mismatch(&self, key: &ConfigKey) {
if let Some(env_key) = self.upper_case_env.get(key.as_env_key()) {
if let Some(env_key) = self.env.get_normalized(key.as_env_key()) {
let _ = self.shell().warn(format!(
"Environment variables are expected to use uppercase letters and underscores, \
the variable `{}` will be ignored and have no effect",
Expand Down Expand Up @@ -920,7 +894,7 @@ impl Config {
key: &ConfigKey,
output: &mut Vec<(String, Definition)>,
) -> CargoResult<()> {
let env_val = match self.get_env_str(key.as_env_key()) {
let env_val = match self.env.get_str(key.as_env_key()) {
Some(v) => v,
None => {
self.check_environment_key_case_mismatch(key);
Expand Down
26 changes: 26 additions & 0 deletions tests/testsuite/cargo_command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,32 @@ fn list_command_looks_at_path() {
);
}

#[cfg(windows)]
#[cargo_test]
fn list_command_looks_at_path_case_mismatch() {
let proj = project()
.executable(Path::new("path-test").join("cargo-1"), "")
.build();

let mut path = path();
path.push(proj.root().join("path-test"));
let path = env::join_paths(path.iter()).unwrap();

// See issue #11814: Environment variable names are case-insensitive on Windows.
// We need to check that having "Path" instead of "PATH" is okay.
let output = cargo_process("-v --list")
.env("Path", &path)
.env_remove("PATH")
.exec_with_output()
.unwrap();
let output = str::from_utf8(&output.stdout).unwrap();
assert!(
output.contains("\n 1 "),
"missing 1: {}",
output
);
}

#[cargo_test]
fn list_command_handles_known_external_commands() {
let p = project()
Expand Down
Loading

0 comments on commit a02ca0d

Please sign in to comment.