Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stop using UncanonicalizedIter for QueryKind::Exact #11937

Merged
merged 1 commit into from
Apr 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 27 additions & 52 deletions src/cargo/sources/registry/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
//! details like invalidating caches and whatnot which are handled below, but
//! hopefully those are more obvious inline in the code itself.

use crate::core::dependency::Dependency;
use crate::core::{PackageId, SourceId, Summary};
use crate::sources::registry::{LoadResponse, RegistryData, RegistryPackage, INDEX_V_MAX};
use crate::util::interning::InternedString;
Expand All @@ -87,14 +86,14 @@ use std::task::{ready, Poll};
/// This loop tries all possible combinations of switching hyphen and underscores to find the
/// uncanonicalized one. As all stored inputs have the correct spelling, we start with the spelling
/// as-provided.
struct UncanonicalizedIter<'s> {
pub struct UncanonicalizedIter<'s> {
input: &'s str,
num_hyphen_underscore: u32,
hyphen_combination_num: u16,
}

impl<'s> UncanonicalizedIter<'s> {
fn new(input: &'s str) -> Self {
pub fn new(input: &'s str) -> Self {
let num_hyphen_underscore = input.chars().filter(|&c| c == '_' || c == '-').count() as u32;
UncanonicalizedIter {
input,
Expand Down Expand Up @@ -267,7 +266,7 @@ impl<'cfg> RegistryIndex<'cfg> {
/// Returns the hash listed for a specified `PackageId`.
pub fn hash(&mut self, pkg: PackageId, load: &mut dyn RegistryData) -> Poll<CargoResult<&str>> {
let req = OptVersionReq::exact(pkg.version());
let summary = self.summaries(pkg.name(), &req, load)?;
let summary = self.summaries(&pkg.name(), &req, load)?;
let summary = ready!(summary).next();
Poll::Ready(Ok(summary
.ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?
Expand All @@ -285,7 +284,7 @@ impl<'cfg> RegistryIndex<'cfg> {
/// though since this method is called quite a lot on null builds in Cargo.
pub fn summaries<'a, 'b>(
&'a mut self,
name: InternedString,
name: &str,
req: &'b OptVersionReq,
load: &mut dyn RegistryData,
) -> Poll<CargoResult<impl Iterator<Item = &'a IndexSummary> + 'b>>
Expand All @@ -299,6 +298,7 @@ impl<'cfg> RegistryIndex<'cfg> {
// has run previously this will parse a Cargo-specific cache file rather
// than the registry itself. In effect this is intended to be a quite
// cheap operation.
let name = InternedString::new(name);
let summaries = ready!(self.load_summaries(name, load)?);

// Iterate over our summaries, extract all relevant ones which match our
Expand Down Expand Up @@ -361,45 +361,17 @@ impl<'cfg> RegistryIndex<'cfg> {
.flat_map(|c| c.to_lowercase())
.collect::<String>();

let mut any_pending = false;
// Attempt to handle misspellings by searching for a chain of related
// names to the original `fs_name` name. Only return summaries
// associated with the first hit, however. The resolver will later
// reject any candidates that have the wrong name, and with this it'll
// along the way produce helpful "did you mean?" suggestions.
for (i, name_permutation) in UncanonicalizedIter::new(&fs_name).take(1024).enumerate() {
let path = make_dep_path(&name_permutation, false);
let summaries = Summaries::parse(
root,
&cache_root,
path.as_ref(),
self.source_id,
load,
self.config,
)?;
if summaries.is_pending() {
if i == 0 {
// If we have not herd back about the name as requested
// then don't ask about other spellings yet.
// This prevents us spamming all the variations in the
// case where we have the correct spelling.
return Poll::Pending;
}
any_pending = true;
}
if let Poll::Ready(Some(summaries)) = summaries {
self.summaries_cache.insert(name, summaries);
return Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()));
}
}

if any_pending {
return Poll::Pending;
}

// If nothing was found then this crate doesn't exists, so just use an
// empty `Summaries` list.
self.summaries_cache.insert(name, Summaries::default());
let path = make_dep_path(&fs_name, false);
let summaries = ready!(Summaries::parse(
root,
&cache_root,
path.as_ref(),
self.source_id,
load,
self.config,
))?
.unwrap_or_default();
self.summaries_cache.insert(name, summaries);
Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()))
}

Expand All @@ -410,7 +382,8 @@ impl<'cfg> RegistryIndex<'cfg> {

pub fn query_inner(
&mut self,
dep: &Dependency,
name: &str,
req: &OptVersionReq,
load: &mut dyn RegistryData,
yanked_whitelist: &HashSet<PackageId>,
f: &mut dyn FnMut(Summary),
Expand All @@ -426,25 +399,28 @@ impl<'cfg> RegistryIndex<'cfg> {
// then cargo will fail to download and an error message
// indicating that the required dependency is unavailable while
// offline will be displayed.
if ready!(self.query_inner_with_online(dep, load, yanked_whitelist, f, false)?) > 0 {
if ready!(self.query_inner_with_online(name, req, load, yanked_whitelist, f, false)?)
> 0
{
return Poll::Ready(Ok(()));
}
}
self.query_inner_with_online(dep, load, yanked_whitelist, f, true)
self.query_inner_with_online(name, req, load, yanked_whitelist, f, true)
.map_ok(|_| ())
}

fn query_inner_with_online(
&mut self,
dep: &Dependency,
name: &str,
req: &OptVersionReq,
load: &mut dyn RegistryData,
yanked_whitelist: &HashSet<PackageId>,
f: &mut dyn FnMut(Summary),
online: bool,
) -> Poll<CargoResult<usize>> {
let source_id = self.source_id;

let summaries = ready!(self.summaries(dep.package_name(), dep.version_req(), load))?;
let summaries = ready!(self.summaries(name, req, load))?;

let summaries = summaries
// First filter summaries for `--offline`. If we're online then
Expand All @@ -469,7 +445,6 @@ impl<'cfg> RegistryIndex<'cfg> {
// `<pkg>=<p_req>o-><f_req>` where `<pkg>` is the name of a crate on
// this source, `<p_req>` is the version installed and `<f_req> is the
// version requested (argument to `--precise`).
let name = dep.package_name().as_str();
let precise = match source_id.precise() {
Some(p) if p.starts_with(name) && p[name.len()..].starts_with('=') => {
let mut vers = p[name.len() + 1..].splitn(2, "->");
Expand All @@ -481,7 +456,7 @@ impl<'cfg> RegistryIndex<'cfg> {
};
let summaries = summaries.filter(|s| match &precise {
Some((current, requested)) => {
if dep.version_req().matches(current) {
if req.matches(current) {
// Unfortunately crates.io allows versions to differ only
// by build metadata. This shouldn't be allowed, but since
// it is, this will honor it if requested. However, if not
Expand Down Expand Up @@ -521,7 +496,7 @@ impl<'cfg> RegistryIndex<'cfg> {
) -> Poll<CargoResult<bool>> {
let req = OptVersionReq::exact(pkg.version());
let found = self
.summaries(pkg.name(), &req, load)
.summaries(&pkg.name(), &req, load)
.map_ok(|mut p| p.any(|summary| summary.yanked));
found
}
Expand Down
89 changes: 63 additions & 26 deletions src/cargo/sources/registry/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ use std::collections::HashSet;
use std::fs::{File, OpenOptions};
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::task::Poll;
use std::task::{ready, Poll};

use anyhow::Context as _;
use cargo_util::paths::{self, exclude_from_backups_and_indexing};
Expand Down Expand Up @@ -756,7 +756,7 @@ impl<'cfg> RegistrySource<'cfg> {
let req = OptVersionReq::exact(package.version());
let summary_with_cksum = self
.index
.summaries(package.name(), &req, &mut *self.ops)?
.summaries(&package.name(), &req, &mut *self.ops)?
.expect("a downloaded dep now pending!?")
.map(|s| s.summary.clone())
.next()
Expand Down Expand Up @@ -786,36 +786,73 @@ impl<'cfg> Source for RegistrySource<'cfg> {
{
debug!("attempting query without update");
let mut called = false;
let pend =
self.index
.query_inner(dep, &mut *self.ops, &self.yanked_whitelist, &mut |s| {
if dep.matches(&s) {
called = true;
f(s);
}
})?;
if pend.is_pending() {
return Poll::Pending;
}
ready!(self.index.query_inner(
&dep.package_name(),
dep.version_req(),
&mut *self.ops,
&self.yanked_whitelist,
&mut |s| {
if dep.matches(&s) {
called = true;
f(s);
}
},
))?;
if called {
return Poll::Ready(Ok(()));
Poll::Ready(Ok(()))
} else {
debug!("falling back to an update");
self.invalidate_cache();
return Poll::Pending;
Poll::Pending
}
}

self.index
.query_inner(dep, &mut *self.ops, &self.yanked_whitelist, &mut |s| {
let matched = match kind {
QueryKind::Exact => dep.matches(&s),
QueryKind::Fuzzy => true,
};
if matched {
f(s);
} else {
let mut called = false;
ready!(self.index.query_inner(
&dep.package_name(),
dep.version_req(),
&mut *self.ops,
&self.yanked_whitelist,
&mut |s| {
let matched = match kind {
QueryKind::Exact => dep.matches(&s),
QueryKind::Fuzzy => true,
};
if matched {
f(s);
called = true;
}
}
))?;
if called {
return Poll::Ready(Ok(()));
}
let mut any_pending = false;
if kind == QueryKind::Fuzzy {
// Attempt to handle misspellings by searching for a chain of related
// names to the original name. The resolver will later
// reject any candidates that have the wrong name, and with this it'll
// along the way produce helpful "did you mean?" suggestions.
for name_permutation in
index::UncanonicalizedIter::new(&dep.package_name()).take(1024)
{
any_pending |= self
.index
.query_inner(
&name_permutation,
dep.version_req(),
&mut *self.ops,
&self.yanked_whitelist,
f,
)?
.is_pending();
}
})
}
if any_pending {
Poll::Pending
} else {
Poll::Ready(Ok(()))
}
}
}

fn supports_checksums(&self) -> bool {
Expand Down
17 changes: 17 additions & 0 deletions tests/testsuite/publish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2566,6 +2566,8 @@ fn wait_for_first_publish_underscore() {
// Counter for number of tries before the package is "published"
let arc: Arc<Mutex<u32>> = Arc::new(Mutex::new(0));
let arc2 = arc.clone();
let misses = Arc::new(Mutex::new(Vec::new()));
let misses2 = misses.clone();

// Registry returns an invalid response.
let registry = registry::RegistryBuilder::new()
Expand All @@ -2580,6 +2582,14 @@ fn wait_for_first_publish_underscore() {
server.index(req)
}
})
.not_found_handler(move |req, _| {
misses.lock().unwrap().push(req.url.to_string());
Response {
body: b"not found".to_vec(),
code: 404,
headers: vec![],
}
})
.build();

let p = project()
Expand Down Expand Up @@ -2621,6 +2631,13 @@ You may press ctrl-c to skip waiting; the crate should be available shortly.
let lock = arc2.lock().unwrap();
assert_eq!(*lock, 2);
drop(lock);
{
let misses = misses2.lock().unwrap();
assert!(
misses.len() == 1,
"should only have 1 not found URL; instead found {misses:?}"
);
}

let p = project()
.file(
Expand Down