diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3f68682cf5a..7634dd99a98 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -185,6 +185,10 @@ jobs: - name: Clear test output run: ci/clean-test-output.sh + - name: Check operability of index cache in SQLite3 + run: 'cargo test -p cargo --test testsuite -- alt_registry:: global_cache_tracker::' + env: + __CARGO_TEST_FORCE_SQLITE_INDEX_CACHE: 1 # This only tests `cargo fix` because fix-proxy-mode is one of the most # complicated subprocess management in Cargo. - name: Check operability of rustc invocation with argfile diff --git a/src/cargo/core/features.rs b/src/cargo/core/features.rs index 1b6cba0460b..2af4550590c 100644 --- a/src/cargo/core/features.rs +++ b/src/cargo/core/features.rs @@ -762,6 +762,7 @@ unstable_cli_options!( git: Option = ("Enable support for shallow git fetch operations"), gitoxide: Option = ("Use gitoxide for the given git interactions, or all of them if no argument is given"), host_config: bool = ("Enable the `[host]` section in the .cargo/config.toml file"), + index_cache_sqlite: bool, minimal_versions: bool = ("Resolve minimal dependency versions instead of maximum"), msrv_policy: bool = ("Enable rust-version aware policy within cargo"), mtime_on_use: bool = ("Configure Cargo to update the mtime of used files"), @@ -1149,6 +1150,7 @@ impl CliUnstable { )? } "host-config" => self.host_config = parse_empty(k, v)?, + "index-cache-sqlite" => self.index_cache_sqlite = parse_empty(k, v)?, "next-lockfile-bump" => self.next_lockfile_bump = parse_empty(k, v)?, "minimal-versions" => self.minimal_versions = parse_empty(k, v)?, "msrv-policy" => self.msrv_policy = parse_empty(k, v)?, diff --git a/src/cargo/sources/registry/index/cache.rs b/src/cargo/sources/registry/index/cache.rs index 5d3bb28500a..db1cdf117ea 100644 --- a/src/cargo/sources/registry/index/cache.rs +++ b/src/cargo/sources/registry/index/cache.rs @@ -65,6 +65,9 @@ //! [`IndexSummary::parse`]: super::IndexSummary::parse //! [`RemoteRegistry`]: crate::sources::registry::remote::RemoteRegistry +use std::cell::OnceCell; +use std::cell::RefCell; +use std::collections::BTreeMap; use std::fs; use std::io; use std::path::PathBuf; @@ -72,14 +75,21 @@ use std::str; use anyhow::bail; use cargo_util::registry::make_dep_path; +use rusqlite::params; +use rusqlite::Connection; use semver::Version; use crate::util::cache_lock::CacheLockMode; +use crate::util::sqlite; +use crate::util::sqlite::basic_migration; +use crate::util::sqlite::Migration; use crate::util::Filesystem; use crate::CargoResult; use crate::GlobalContext; use super::split; +use super::Summaries; +use super::MaybeIndexSummary; use super::INDEX_V_MAX; /// The current version of [`SummariesCache`]. @@ -220,12 +230,30 @@ impl<'a> SummariesCache<'a> { } } +/// An abstraction of the actual cache store. +trait CacheStore { + /// Gets the cache associated with the key. + fn get(&self, key: &str) -> Option; + + /// Associates the value with the key. + fn put(&self, key: &str, value: &[u8]); + + /// Associates the value with the key + version tuple. + fn put_summary(&self, key: (&str, &Version), value: &[u8]); + + /// Invalidates the cache associated with the key. + fn invalidate(&self, key: &str); +} + +pub enum MaybeSummaries { + Unparsed(Vec), + Parsed(Summaries), +} + /// Manages the on-disk index caches. pub struct CacheManager<'gctx> { - /// The root path where caches are located. - cache_root: Filesystem, - /// [`GlobalContext`] reference for convenience. - gctx: &'gctx GlobalContext, + store: Box, + is_sqlite: bool, } impl<'gctx> CacheManager<'gctx> { @@ -233,14 +261,70 @@ impl<'gctx> CacheManager<'gctx> { /// /// `root` --- The root path where caches are located. pub fn new(cache_root: Filesystem, gctx: &'gctx GlobalContext) -> CacheManager<'gctx> { - CacheManager { cache_root, gctx } + #[allow(clippy::disallowed_methods)] + let use_sqlite = gctx.cli_unstable().index_cache_sqlite + || std::env::var("__CARGO_TEST_FORCE_SQLITE_INDEX_CACHE").is_ok(); + let store: Box = if use_sqlite { + Box::new(LocalDatabase::new(cache_root, gctx)) + } else { + Box::new(LocalFileSystem::new(cache_root, gctx)) + }; + CacheManager { store, is_sqlite: use_sqlite } + } + + pub fn is_sqlite(&self) -> bool { + self.is_sqlite } /// Gets the cache associated with the key. - pub fn get(&self, key: &str) -> Option> { + pub fn get(&self, key: &str) -> Option { + self.store.get(key) + } + + /// Associates the value with the key. + pub fn put(&self, key: &str, value: &[u8]) { + self.store.put(key, value) + } + + /// Associates the value with the key + version tuple. + pub fn put_summary(&self, key: (&str, &Version), value: &[u8]) { + self.store.put_summary(key, value) + } + + /// Invalidates the cache associated with the key. + pub fn invalidate(&self, key: &str) { + self.store.invalidate(key) + } +} + +/// Stores index caches in a file system wth a registry index like layout. +struct LocalFileSystem<'gctx> { + /// The root path where caches are located. + cache_root: Filesystem, + /// [`GlobalContext`] reference for convenience. + gctx: &'gctx GlobalContext, +} + +impl LocalFileSystem<'_> { + /// Creates a new instance of the file system index cache store. + fn new(cache_root: Filesystem, gctx: &GlobalContext) -> LocalFileSystem<'_> { + LocalFileSystem { cache_root, gctx } + } + + fn cache_path(&self, key: &str) -> PathBuf { + let relative = make_dep_path(key, false); + // This is the file we're loading from cache or the index data. + // See module comment in `registry/mod.rs` for why this is structured + // the way it is. + self.cache_root.join(relative).into_path_unlocked() + } +} + +impl CacheStore for LocalFileSystem<'_> { + fn get(&self, key: &str) -> Option { let cache_path = &self.cache_path(key); match fs::read(cache_path) { - Ok(contents) => Some(contents), + Ok(contents) => Some(MaybeSummaries::Unparsed(contents)), Err(e) => { tracing::debug!(?cache_path, "cache missing: {e}"); None @@ -248,8 +332,7 @@ impl<'gctx> CacheManager<'gctx> { } } - /// Associates the value with the key. - pub fn put(&self, key: &str, value: &[u8]) { + fn put(&self, key: &str, value: &[u8]) { let cache_path = &self.cache_path(key); if fs::create_dir_all(cache_path.parent().unwrap()).is_ok() { let path = Filesystem::new(cache_path.clone()); @@ -261,8 +344,11 @@ impl<'gctx> CacheManager<'gctx> { } } - /// Invalidates the cache associated with the key. - pub fn invalidate(&self, key: &str) { + fn put_summary(&self, _key: (&str, &Version), _value: &[u8]) { + panic!("unsupported"); + } + + fn invalidate(&self, key: &str) { let cache_path = &self.cache_path(key); if let Err(e) = fs::remove_file(cache_path) { if e.kind() != io::ErrorKind::NotFound { @@ -270,12 +356,137 @@ impl<'gctx> CacheManager<'gctx> { } } } +} - fn cache_path(&self, key: &str) -> PathBuf { - let relative = make_dep_path(key, false); - // This is the file we're loading from cache or the index data. - // See module comment in `registry/mod.rs` for why this is structured - // the way it is. - self.cache_root.join(relative).into_path_unlocked() +/// Stores index caches in a local SQLite database. +struct LocalDatabase<'gctx> { + /// The root path where caches are located. + cache_root: Filesystem, + /// Connection to the SQLite database. + conn: OnceCell>>, + /// [`GlobalContext`] reference for convenience. + deferred_writes: RefCell)>>>, + gctx: &'gctx GlobalContext, +} + +impl LocalDatabase<'_> { + /// Creates a new instance of the SQLite index cache store. + fn new(cache_root: Filesystem, gctx: &GlobalContext) -> LocalDatabase<'_> { + LocalDatabase { + cache_root, + conn: OnceCell::new(), + deferred_writes: Default::default(), + gctx, + } } + + fn conn(&self) -> Option<&RefCell> { + self.conn + .get_or_init(|| { + self.conn_init() + .map(RefCell::new) + .map_err(|e| tracing::debug!("cannot open index cache db: {e}")) + .ok() + }) + .as_ref() + } + + fn conn_init(&self) -> CargoResult { + let _lock = self + .gctx + .acquire_package_cache_lock(CacheLockMode::DownloadExclusive) + .unwrap(); + let cache_root = self.cache_root.as_path_unlocked(); + fs::create_dir_all(cache_root)?; + let mut conn = Connection::open(cache_root.join("index-cache.db"))?; + sqlite::migrate(&mut conn, &migrations())?; + Ok(conn) + } + + fn bulk_put(&self) -> CargoResult<()> { + let Some(conn) = self.conn() else { + anyhow::bail!("no connection"); + }; + let mut conn = conn.borrow_mut(); + let tx = conn.transaction()?; + let mut stmt = + tx.prepare_cached("INSERT OR REPLACE INTO summaries (name, version, value) VALUES (?, ?, ?)")?; + for (name, summaries) in self.deferred_writes.borrow().iter() { + for (version, value) in summaries { + stmt.execute(params!(name, version, value))?; + } + } + drop(stmt); + tx.commit()?; + self.deferred_writes.borrow_mut().clear(); + Ok(()) + } +} + +impl Drop for LocalDatabase<'_> { + fn drop(&mut self) { + let _ = self + .bulk_put() + .map_err(|e| tracing::info!("failed to flush cache: {e}")); + } +} + +impl CacheStore for LocalDatabase<'_> { + fn get(&self, key: &str) -> Option { + self.conn()? + .borrow() + .prepare_cached("SELECT version, value FROM summaries WHERE name = ?") + .and_then(|mut stmt| { + let rows = stmt.query_map([key], |row| Ok((row.get(0)?, row.get(1)?)))?; + let mut summaries = Summaries::default(); + for row in rows { + let (version, raw_data): (String, Vec) = row?; + let version = Version::parse(&version).expect("semver"); + summaries.versions.insert(version, MaybeIndexSummary::UnparsedData(raw_data)); + } + Ok(MaybeSummaries::Parsed(summaries)) + }) + .map_err(|e| { + tracing::debug!(key, "cache missing: {e}"); + }) + .ok() + } + + fn put(&self, _key: &str, _value: &[u8]) { + panic!("unsupported"); + } + + fn put_summary(&self, (name, version): (&str, &Version), value: &[u8]) { + self.deferred_writes + .borrow_mut() + .entry(name.into()) + .or_insert(Default::default()) + .push((version.to_string(), value.to_vec())); + } + + fn invalidate(&self, key: &str) { + if let Some(conn) = self.conn() { + _ = conn + .borrow() + .prepare_cached("DELETE FROM summaries WHERE name = ?") + .and_then(|mut stmt| stmt.execute([key])) + .map_err(|e| tracing::debug!(key, "failed to remove from cache: {e}")); + } + } +} + +/// Migrations which initialize the database, and can be used to evolve it over time. +/// +/// See [`Migration`] for more detail. +/// +/// **Be sure to not change the order or entries here!** +fn migrations() -> Vec { + vec![basic_migration( + "CREATE TABLE IF NOT EXISTS summaries ( + name TEXT NOT NULL, + version TEXT NOT NULL, + value BLOB NOT NULL, + PRIMARY KEY (name, version) + )", + )] } diff --git a/src/cargo/sources/registry/index/mod.rs b/src/cargo/sources/registry/index/mod.rs index 0bc6196fad5..571cdb7d3fc 100644 --- a/src/cargo/sources/registry/index/mod.rs +++ b/src/cargo/sources/registry/index/mod.rs @@ -40,7 +40,7 @@ use std::task::{ready, Poll}; use tracing::{debug, info}; mod cache; -use self::cache::CacheManager; +use self::cache::{CacheManager, MaybeSummaries}; use self::cache::SummariesCache; /// The maximum schema version of the `v` field in the index this version of @@ -115,7 +115,8 @@ struct Summaries { enum MaybeIndexSummary { /// A summary which has not been parsed, The `start` and `end` are pointers /// into [`Summaries::raw_data`] which this is an entry of. - Unparsed { start: usize, end: usize }, + Unparsed(std::ops::Range), + UnparsedData(Vec), /// An actually parsed summary. Parsed(IndexSummary), @@ -551,14 +552,20 @@ impl Summaries { let mut cached_summaries = None; let mut index_version = None; - if let Some(contents) = cache_manager.get(name) { - match Summaries::parse_cache(contents) { - Ok((s, v)) => { - cached_summaries = Some(s); - index_version = Some(v); + if let Some(maybe_summaries) = cache_manager.get(name) { + match maybe_summaries { + MaybeSummaries::Unparsed(contents) => match Summaries::parse_cache(contents) { + Ok((s, v)) => { + cached_summaries = Some(s); + index_version = Some(v); + } + Err(e) => { + tracing::debug!("failed to parse {name:?} cache: {e}"); + } } - Err(e) => { - tracing::debug!("failed to parse {name:?} cache: {e}"); + MaybeSummaries::Parsed(summaries) => { + cached_summaries = Some(summaries); + index_version = Some("2".into()); } } } @@ -611,9 +618,18 @@ impl Summaries { } }; let version = summary.package_id().version().clone(); - cache.versions.push((version.clone(), line)); + if cache_manager.is_sqlite() { + cache_manager.put_summary((&name, &version), line); + } else { + cache.versions.push((version.clone(), line)); + } ret.versions.insert(version, summary.into()); } + + if cache_manager.is_sqlite() { + return Poll::Ready(Ok(Some(ret))); + } + if let Some(index_version) = index_version { tracing::trace!("caching index_version {}", index_version); let cache_bytes = cache.serialize(index_version.as_str()); @@ -649,7 +665,7 @@ impl Summaries { for (version, summary) in cache.versions { let (start, end) = subslice_bounds(&contents, summary); ret.versions - .insert(version, MaybeIndexSummary::Unparsed { start, end }); + .insert(version, MaybeIndexSummary::Unparsed(start..end)); } ret.raw_data = contents; return Ok((ret, index_version)); @@ -680,14 +696,16 @@ impl MaybeIndexSummary { source_id: SourceId, bindeps: bool, ) -> CargoResult<&IndexSummary> { - let (start, end) = match self { - MaybeIndexSummary::Unparsed { start, end } => (*start, *end), + let data = match self { + MaybeIndexSummary::Unparsed(range) => &raw_data[range.clone()], + MaybeIndexSummary::UnparsedData(data) => data, MaybeIndexSummary::Parsed(summary) => return Ok(summary), }; - let summary = IndexSummary::parse(&raw_data[start..end], source_id, bindeps)?; + let summary = IndexSummary::parse(data, source_id, bindeps)?; *self = MaybeIndexSummary::Parsed(summary); match self { MaybeIndexSummary::Unparsed { .. } => unreachable!(), + MaybeIndexSummary::UnparsedData { .. } => unreachable!(), MaybeIndexSummary::Parsed(summary) => Ok(summary), } } diff --git a/tests/testsuite/index_cache_sqlite.rs b/tests/testsuite/index_cache_sqlite.rs new file mode 100644 index 00000000000..f06557aef45 --- /dev/null +++ b/tests/testsuite/index_cache_sqlite.rs @@ -0,0 +1,104 @@ +//! Tests for the `-Zindex-cache-sqlite`. + +use std::collections::HashSet; + +use cargo_test_support::paths; +use cargo_test_support::project; +use cargo_test_support::registry; +use cargo_test_support::registry::Package; + +#[cargo_test] +fn gated() { + project() + .build() + .cargo("fetch") + .arg("-Zindex-cache-sqlite") + .with_status(101) + .with_stderr_contains("[ERROR] the `-Z` flag is only accepted on the nightly channel of Cargo, but this is the `stable` channel") + .run(); +} + +#[cargo_test] +fn crates_io() { + registry::alt_init(); + let p = project() + .file( + "Cargo.toml", + r#" + [package] + name = "foo" + edition = "2015" + + [dependencies] + dep2 = "0.0.0" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("dep1", "0.0.0").publish(); + Package::new("dep2", "0.0.0").dep("dep1", "0.0.0").publish(); + Package::new("dep3", "0.0.0").publish(); + + p.cargo("fetch") + .masquerade_as_nightly_cargo(&["index-cache-sqlite"]) + .arg("-Zindex-cache-sqlite") + .with_stderr( + "\ +[UPDATING] `dummy-registry` index +[LOCKING] 3 packages +[DOWNLOADING] crates ... +[DOWNLOADED] dep1 v0.0.0 (registry `dummy-registry`) +[DOWNLOADED] dep2 v0.0.0 (registry `dummy-registry`) +", + ) + .run(); + + assert_rows_inserted(&["dep1", "dep2"]); + + p.change_file( + "Cargo.toml", + r#" + [package] + name = "foo" + edition = "2015" + + [dependencies] + dep2 = "0.0.0" + dep3 = "0.0.0" + "#, + ); + + p.cargo("fetch") + .masquerade_as_nightly_cargo(&["index-cache-sqlite"]) + .arg("-Zindex-cache-sqlite") + .with_stderr( + "\ +[UPDATING] `dummy-registry` index +[LOCKING] 1 package +[ADDING] dep3 v0.0.0 +[DOWNLOADING] crates ... +[DOWNLOADED] dep3 v0.0.0 (registry `dummy-registry`) +", + ) + .run(); + + assert_rows_inserted(&["dep1", "dep2", "dep3"]); +} + +#[track_caller] +fn assert_rows_inserted(names: &[&str]) { + let pattern = paths::home().join(".cargo/registry/index/*/.cache/index-cache.db"); + let pattern = pattern.to_str().unwrap(); + let db_path = glob::glob(pattern).unwrap().next().unwrap().unwrap(); + + let set: HashSet = rusqlite::Connection::open(&db_path) + .unwrap() + .prepare("SELECT name FROM summaries") + .unwrap() + .query_map([], |row| row.get(0)) + .unwrap() + .collect::>() + .unwrap(); + assert_eq!(set, HashSet::from_iter(names.iter().map(|n| n.to_string()))); +} diff --git a/tests/testsuite/main.rs b/tests/testsuite/main.rs index 3e6b07c7ccf..d4f64fc1b1c 100644 --- a/tests/testsuite/main.rs +++ b/tests/testsuite/main.rs @@ -103,6 +103,7 @@ mod glob_targets; mod global_cache_tracker; mod help; mod https; +mod index_cache_sqlite; mod inheritable_workspace_fields; mod install; mod install_upgrade;