Skip to content

Commit

Permalink
Auto merge of #9161 - ehuss:index-v-features2, r=alexcrichton
Browse files Browse the repository at this point in the history
Add schema field and `features2` to the index.

This adds a `v` field to the index which indicates a format version for an index entry. If Cargo encounters a version newer than it understands, it will ignore those entries. This makes it safer to make changes to the index entries (such as adding new things), and not need to worry about how older cargos will react to it. In particular, this will make it safer to run `cargo update` on older versions if we ever decide to add new things to the index.

Currently this is not written anywhere, and is intended as a safety guard for the future. For now I will leave it undocumented until we actually decide to start using it.

This also moves the new syntax for namespaced features and weak dependency features into a new field ("features2") in the index. This is necessary to avoid breaking Cargo versions older than 1.19, which fail to parse the index even if there is a Cargo.lock file.

It is intended that only crates.io will bother with creating this field. Other registries don't need to bother, since they generally don't support Cargo older than 1.19.

I'm uncertain exactly when we should try to update crates.io to start accepting this, as that is a somewhat permanent decision.
  • Loading branch information
bors committed Feb 22, 2021
2 parents 51a437d + f258569 commit 7442c14
Show file tree
Hide file tree
Showing 11 changed files with 985 additions and 19 deletions.
53 changes: 46 additions & 7 deletions crates/cargo-test-support/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use cargo::sources::CRATES_IO_INDEX;
use cargo::util::Sha256;
use flate2::write::GzEncoder;
use flate2::Compression;
use std::collections::HashMap;
use std::collections::BTreeMap;
use std::fmt::Write as _;
use std::fs::{self, File};
use std::io::{BufRead, BufReader, Write};
Expand Down Expand Up @@ -319,16 +319,19 @@ pub struct Package {
deps: Vec<Dependency>,
files: Vec<PackageFile>,
yanked: bool,
features: HashMap<String, Vec<String>>,
features: FeatureMap,
local: bool,
alternative: bool,
invalid_json: bool,
proc_macro: bool,
links: Option<String>,
rust_version: Option<String>,
cargo_features: Vec<String>,
v: Option<u32>,
}

type FeatureMap = BTreeMap<String, Vec<String>>;

#[derive(Clone)]
pub struct Dependency {
name: String,
Expand Down Expand Up @@ -393,14 +396,15 @@ impl Package {
deps: Vec::new(),
files: Vec::new(),
yanked: false,
features: HashMap::new(),
features: BTreeMap::new(),
local: false,
alternative: false,
invalid_json: false,
proc_macro: false,
links: None,
rust_version: None,
cargo_features: Vec::new(),
v: None,
}
}

Expand Down Expand Up @@ -554,6 +558,14 @@ impl Package {
self
}

/// Sets the index schema version for this package.
///
/// See [`cargo::sources::registry::RegistryPackage`] for more information.
pub fn schema_version(&mut self, version: u32) -> &mut Package {
self.v = Some(version);
self
}

/// Creates the package and place it in the registry.
///
/// This does not actually use Cargo's publishing system, but instead
Expand Down Expand Up @@ -599,16 +611,25 @@ impl Package {
} else {
serde_json::json!(self.name)
};
let line = serde_json::json!({
// This emulates what crates.io may do in the future.
let (features, features2) = split_index_features(self.features.clone());
let mut json = serde_json::json!({
"name": name,
"vers": self.vers,
"deps": deps,
"cksum": cksum,
"features": self.features,
"features": features,
"yanked": self.yanked,
"links": self.links,
})
.to_string();
});
if let Some(f2) = &features2 {
json["features2"] = serde_json::json!(f2);
json["v"] = serde_json::json!(2);
}
if let Some(v) = self.v {
json["v"] = serde_json::json!(v);
}
let line = json.to_string();

let file = match self.name.len() {
1 => format!("1/{}", self.name),
Expand Down Expand Up @@ -837,3 +858,21 @@ impl Dependency {
self
}
}

fn split_index_features(mut features: FeatureMap) -> (FeatureMap, Option<FeatureMap>) {
let mut features2 = FeatureMap::new();
for (feat, values) in features.iter_mut() {
if values
.iter()
.any(|value| value.starts_with("dep:") || value.contains("?/"))
{
let new_values = values.drain(..).collect();
features2.insert(feat.clone(), new_values);
}
}
if features2.is_empty() {
(features, None)
} else {
(features, Some(features2))
}
}
2 changes: 2 additions & 0 deletions crates/crates-io/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ pub struct NewCrate {
pub repository: Option<String>,
pub badges: BTreeMap<String, BTreeMap<String, String>>,
pub links: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub v: Option<u32>,
}

#[derive(Serialize)]
Expand Down
3 changes: 3 additions & 0 deletions src/cargo/core/summary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ impl Summary {
features: &BTreeMap<InternedString, Vec<InternedString>>,
links: Option<impl Into<InternedString>>,
) -> CargoResult<Summary> {
// ****CAUTION**** If you change anything here than may raise a new
// error, be sure to coordinate that change with either the index
// schema field or the SummariesCache version.
let mut has_overlapping_features = None;
for dep in dependencies.iter() {
let dep_name = dep.name_in_toml();
Expand Down
1 change: 1 addition & 0 deletions src/cargo/ops/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ fn transmit(
license_file: license_file.clone(),
badges: badges.clone(),
links: links.clone(),
v: None,
},
tarball,
);
Expand Down
94 changes: 83 additions & 11 deletions src/cargo/sources/registry/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,15 @@

use crate::core::dependency::Dependency;
use crate::core::{PackageId, SourceId, Summary};
use crate::sources::registry::{RegistryData, RegistryPackage};
use crate::sources::registry::{RegistryData, RegistryPackage, INDEX_V_MAX};
use crate::util::interning::InternedString;
use crate::util::paths;
use crate::util::{internal, CargoResult, Config, Filesystem, ToSemver};
use log::info;
use anyhow::bail;
use log::{debug, info};
use semver::{Version, VersionReq};
use std::collections::{HashMap, HashSet};
use std::convert::TryInto;
use std::fs;
use std::path::Path;
use std::str;
Expand Down Expand Up @@ -233,6 +235,8 @@ enum MaybeIndexSummary {
pub struct IndexSummary {
pub summary: Summary,
pub yanked: bool,
/// Schema version, see [`RegistryPackage`].
v: u32,
}

/// A representation of the cache on disk that Cargo maintains of summaries.
Expand Down Expand Up @@ -305,6 +309,11 @@ impl<'cfg> RegistryIndex<'cfg> {
// minimize the amount of work being done here and parse as little as
// necessary.
let raw_data = &summaries.raw_data;
let max_version = if namespaced_features || weak_dep_features {
INDEX_V_MAX
} else {
1
};
Ok(summaries
.versions
.iter_mut()
Expand All @@ -318,6 +327,19 @@ impl<'cfg> RegistryIndex<'cfg> {
}
},
)
.filter(move |is| {
if is.v > max_version {
debug!(
"unsupported schema version {} ({} {})",
is.v,
is.summary.name(),
is.summary.version()
);
false
} else {
true
}
})
.filter(move |is| {
is.summary
.unstable_gate(namespaced_features, weak_dep_features)
Expand Down Expand Up @@ -550,6 +572,14 @@ impl Summaries {
let summary = match IndexSummary::parse(config, line, source_id) {
Ok(summary) => summary,
Err(e) => {
// This should only happen when there is an index
// entry from a future version of cargo that this
// version doesn't understand. Hopefully, those future
// versions of cargo correctly set INDEX_V_MAX and
// CURRENT_CACHE_VERSION, otherwise this will skip
// entries in the cache preventing those newer
// versions from reading them (that is, until the
// cache is rebuilt).
log::info!("failed to parse {:?} registry package: {}", relative, e);
continue;
}
Expand Down Expand Up @@ -578,7 +608,14 @@ impl Summaries {
// actually happens to verify that our cache is indeed fresh and
// computes exactly the same value as before.
if cfg!(debug_assertions) && cache_contents.is_some() {
assert_eq!(cache_bytes, cache_contents);
if cache_bytes != cache_contents {
panic!(
"original cache contents:\n{:?}\n\
does not equal new cache contents:\n{:?}\n",
cache_contents.as_ref().map(|s| String::from_utf8_lossy(s)),
cache_bytes.as_ref().map(|s| String::from_utf8_lossy(s)),
);
}
}

// Once we have our `cache_bytes` which represents the `Summaries` we're
Expand Down Expand Up @@ -630,9 +667,9 @@ impl Summaries {
// Implementation of serializing/deserializing the cache of summaries on disk.
// Currently the format looks like:
//
// +--------------+-------------+---+
// | version byte | git sha rev | 0 |
// +--------------+-------------+---+
// +--------------------+----------------------+-------------+---+
// | cache version byte | index format version | git sha rev | 0 |
// +--------------------+----------------------+-------------+---+
//
// followed by...
//
Expand All @@ -649,8 +686,14 @@ impl Summaries {
// versions of Cargo share the same cache they don't get too confused. The git
// sha lets us know when the file needs to be regenerated (it needs regeneration
// whenever the index itself updates).
//
// Cache versions:
// * `1`: The original version.
// * `2`: Added the "index format version" field so that if the index format
// changes, different versions of cargo won't get confused reading each
// other's caches.

const CURRENT_CACHE_VERSION: u8 = 1;
const CURRENT_CACHE_VERSION: u8 = 2;

impl<'a> SummariesCache<'a> {
fn parse(data: &'a [u8], last_index_update: &str) -> CargoResult<SummariesCache<'a>> {
Expand All @@ -659,19 +702,32 @@ impl<'a> SummariesCache<'a> {
.split_first()
.ok_or_else(|| anyhow::format_err!("malformed cache"))?;
if *first_byte != CURRENT_CACHE_VERSION {
anyhow::bail!("looks like a different Cargo's cache, bailing out");
bail!("looks like a different Cargo's cache, bailing out");
}
let index_v_bytes = rest
.get(..4)
.ok_or_else(|| anyhow::anyhow!("cache expected 4 bytes for index version"))?;
let index_v = u32::from_le_bytes(index_v_bytes.try_into().unwrap());
if index_v != INDEX_V_MAX {
bail!(
"index format version {} doesn't match the version I know ({})",
index_v,
INDEX_V_MAX
);
}
let rest = &rest[4..];

let mut iter = split(rest, 0);
if let Some(update) = iter.next() {
if update != last_index_update.as_bytes() {
anyhow::bail!(
bail!(
"cache out of date: current index ({}) != cache ({})",
last_index_update,
str::from_utf8(update)?,
)
}
} else {
anyhow::bail!("malformed file");
bail!("malformed file");
}
let mut ret = SummariesCache::default();
while let Some(version) = iter.next() {
Expand All @@ -692,6 +748,7 @@ impl<'a> SummariesCache<'a> {
.sum();
let mut contents = Vec::with_capacity(size);
contents.push(CURRENT_CACHE_VERSION);
contents.extend(&u32::to_le_bytes(INDEX_V_MAX));
contents.extend_from_slice(index_version.as_bytes());
contents.push(0);
for (version, data) in self.versions.iter() {
Expand Down Expand Up @@ -741,26 +798,41 @@ impl IndexSummary {
///
/// The `line` provided is expected to be valid JSON.
fn parse(config: &Config, line: &[u8], source_id: SourceId) -> CargoResult<IndexSummary> {
// ****CAUTION**** Please be extremely careful with returning errors
// from this function. Entries that error are not included in the
// index cache, and can cause cargo to get confused when switching
// between different versions that understand the index differently.
// Make sure to consider the INDEX_V_MAX and CURRENT_CACHE_VERSION
// values carefully when making changes here.
let RegistryPackage {
name,
vers,
cksum,
deps,
features,
mut features,
features2,
yanked,
links,
v,
} = serde_json::from_slice(line)?;
let v = v.unwrap_or(1);
log::trace!("json parsed registry {}/{}", name, vers);
let pkgid = PackageId::new(name, &vers, source_id)?;
let deps = deps
.into_iter()
.map(|dep| dep.into_dep(source_id))
.collect::<CargoResult<Vec<_>>>()?;
if let Some(features2) = features2 {
for (name, values) in features2 {
features.entry(name).or_default().extend(values);
}
}
let mut summary = Summary::new(config, pkgid, deps, &features, links)?;
summary.set_checksum(cksum);
Ok(IndexSummary {
summary,
yanked: yanked.unwrap_or(false),
v,
})
}
}
Expand Down
31 changes: 31 additions & 0 deletions src/cargo/sources/registry/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,10 @@ pub struct RegistryConfig {
pub api: Option<String>,
}

/// The maximum version of the `v` field in the index this version of cargo
/// understands.
pub(crate) const INDEX_V_MAX: u32 = 2;

/// A single line in the index representing a single version of a package.
#[derive(Deserialize)]
pub struct RegistryPackage<'a> {
Expand All @@ -258,6 +262,13 @@ pub struct RegistryPackage<'a> {
#[serde(borrow)]
deps: Vec<RegistryDependency<'a>>,
features: BTreeMap<InternedString, Vec<InternedString>>,
/// This field contains features with new, extended syntax. Specifically,
/// namespaced features (`dep:`) and weak dependencies (`pkg?/feat`).
///
/// This is separated from `features` because versions older than 1.19
/// will fail to load due to not being able to parse the new syntax, even
/// with a `Cargo.lock` file.
features2: Option<BTreeMap<InternedString, Vec<InternedString>>>,
cksum: String,
/// If `true`, Cargo will skip this version when resolving.
///
Expand All @@ -269,6 +280,26 @@ pub struct RegistryPackage<'a> {
/// Added early 2018 (see <https://github.com/rust-lang/cargo/pull/4978>),
/// can be `None` if published before then.
links: Option<InternedString>,
/// The schema version for this entry.
///
/// If this is None, it defaults to version 1. Entries with unknown
/// versions are ignored.
///
/// Version `2` format adds the `features2` field.
///
/// This provides a method to safely introduce changes to index entries
/// and allow older versions of cargo to ignore newer entries it doesn't
/// understand. This is honored as of 1.51, so unfortunately older
/// versions will ignore it, and potentially misinterpret version 2 and
/// newer entries.
///
/// The intent is that versions older than 1.51 will work with a
/// pre-existing `Cargo.lock`, but they may not correctly process `cargo
/// update` or build a lock from scratch. In that case, cargo may
/// incorrectly select a new package that uses a new index format. A
/// workaround is to downgrade any packages that are incompatible with the
/// `--precise` flag of `cargo update`.
v: Option<u32>,
}

#[test]
Expand Down
Loading

0 comments on commit 7442c14

Please sign in to comment.