Skip to content

Commit

Permalink
Stabilize sparse-registry
Browse files Browse the repository at this point in the history
  • Loading branch information
arlosi committed Jan 5, 2023
1 parent 247b22f commit 7dc5506
Show file tree
Hide file tree
Showing 10 changed files with 347 additions and 344 deletions.
14 changes: 12 additions & 2 deletions src/cargo/core/features.rs
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,7 @@ unstable_cli_options!(
no_index_update: bool = ("Do not update the registry index even if the cache is outdated"),
panic_abort_tests: bool = ("Enable support to run tests with -Cpanic=abort"),
host_config: bool = ("Enable the [host] section in the .cargo/config.toml file"),
sparse_registry: bool = ("Support plain-HTTP-based crate registries"),
sparse_registry: bool = ("Use the sparse protocol when accessing crates.io"),
registry_auth: bool = ("Authentication for alternative registries, and generate registry authentication tokens using asymmetric cryptography"),
target_applies_to_host: bool = ("Enable the `target-applies-to-host` key in the .cargo/config.toml file"),
rustdoc_map: bool = ("Allow passing external documentation mappings to rustdoc"),
Expand Down Expand Up @@ -751,6 +751,11 @@ const STABILISED_MULTITARGET: &str = "Multiple `--target` options are now always
const STABILIZED_TERMINAL_WIDTH: &str =
"The -Zterminal-width option is now always enabled for terminal output.";

const STABILISED_SPARSE_REGISTRY: &str = "This flag currently still sets the default protocol\
to `sparse` when accessing crates.io. However, this will be removed in the future. \n\
The stable equivalent is to set the config value `registries.crates-io.protocol = 'sparse'`\n\
or environment variable `CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse`";

fn deserialize_build_std<'de, D>(deserializer: D) -> Result<Option<Vec<String>>, D::Error>
where
D: serde::Deserializer<'de>,
Expand Down Expand Up @@ -948,7 +953,12 @@ impl CliUnstable {
"multitarget" => stabilized_warn(k, "1.64", STABILISED_MULTITARGET),
"rustdoc-map" => self.rustdoc_map = parse_empty(k, v)?,
"terminal-width" => stabilized_warn(k, "1.68", STABILIZED_TERMINAL_WIDTH),
"sparse-registry" => self.sparse_registry = parse_empty(k, v)?,
"sparse-registry" => {
// Once sparse-registry becomes the default for crates.io, `sparse_registry` should
// be removed entirely from `CliUnstable`.
stabilized_warn(k, "1.68", STABILISED_SPARSE_REGISTRY);
self.sparse_registry = parse_empty(k, v)?;
}
"registry-auth" => self.registry_auth = parse_empty(k, v)?,
"namespaced-features" => stabilized_warn(k, "1.60", STABILISED_NAMESPACED_FEATURES),
"weak-dep-features" => stabilized_warn(k, "1.60", STABILIZED_WEAK_DEP_FEATURES),
Expand Down
3 changes: 0 additions & 3 deletions src/cargo/sources/registry/http_remote.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,6 @@ impl<'cfg> HttpRegistry<'cfg> {
config: &'cfg Config,
name: &str,
) -> CargoResult<HttpRegistry<'cfg>> {
if !config.cli_unstable().sparse_registry {
anyhow::bail!("usage of sparse registries requires `-Z sparse-registry`");
}
let url = source_id.url().as_str();
// Ensure the url ends with a slash so we can concatenate paths.
if !url.ends_with('/') {
Expand Down
14 changes: 14 additions & 0 deletions src/doc/src/reference/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -892,6 +892,20 @@ commands like [`cargo publish`] that require authentication.

Can be overridden with the `--token` command-line option.

##### `registries.crates-io.protocol`
* Type: string
* Default: `git`
* Environment: `CARGO_REGISTRIES_CRATES_IO_PROTOCOL`

Specifies the protocol used to access crates.io. Allowed values are `git` or `sparse`.

`git` causes Cargo to clone the entire index of all packages ever published to [crates.io] from <https://github.com/rust-lang/crates.io-index/>.
This can have performance implications due to the size of the index.
`sparse` is a newer protocol which uses HTTPS to download only what is necessary from <https://index.crates.io/>.
This can result in a significant performance improvement for resolving new dependencies in most situations.

More information about registry protocols may be found in the [Registries chapter](registries.md).

#### `[registry]`

The `[registry]` table controls the default registry used when one is not
Expand Down
21 changes: 20 additions & 1 deletion src/doc/src/reference/registries.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ table has a key for each registry, for example:
my-registry = { index = "https://my-intranet:8080/git/index" }
```

The `index` key should be a URL to a git repository with the registry's index.
The `index` key should be a URL to a git repository with the registry's index or a
Cargo sparse registry URL with the `sparse+` prefix.

A crate can then depend on a crate from another registry by specifying the
`registry` key and a value of the registry's name in that dependency's entry
in `Cargo.toml`:
Expand Down Expand Up @@ -98,13 +100,30 @@ has a separate table for each registry, for example:
token = "854DvwSlUwEHtIo3kWy6x7UCPKHfzCmy"
```

### Registry Protocols
Cargo supports two remote registry protocols: `git` and `sparse`. If the registry
index URL starts with `sparse+`, Cargo uses the sparse protocol. Otherwise
Cargo uses the `git` protocol.

The `git` protocol stores index metadata in a git repository and requires Cargo to clone
the entire repo.

The `sparse` protocol fetches individual metadata files using plain HTTP requests.
Since Cargo only downloads the metadata for relevant crates, the `sparse` protocol can
save significant time and bandwidth.

The [crates.io] registry supports both protocols. The protocol for crates.io is
controlled via the [`registries.crates-io.protocol`] config key.

[Source Replacement]: source-replacement.md
[Running a Registry]: running-a-registry.md
[`cargo publish`]: ../commands/cargo-publish.md
[`cargo package`]: ../commands/cargo-package.md
[`cargo login`]: ../commands/cargo-login.md
[config]: config.md
[crates.io]: https://crates.io/
[`registries.crates-io.protocol`]: config.md#registriescrates-ioprotocol


<script>
(function() {
Expand Down
62 changes: 57 additions & 5 deletions src/doc/src/reference/registry-index.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@ introduced them. Older versions of Cargo may not be able to use packages that
make use of new features. However, the format for older packages should not
change, so older versions of Cargo should be able to use them.

The index is stored in a git repository so that Cargo can efficiently fetch
incremental updates to the index. In the root of the repository is a file
named `config.json` which contains JSON information used by Cargo for
accessing the registry. This is an example of what the [crates.io] config file
looks like:
### Index Configuration
The root of the index contains a file named `config.json` which contains JSON
information used by Cargo for accessing the registry. This is an example of
what the [crates.io] config file looks like:

```javascript
{
Expand All @@ -37,11 +36,15 @@ The keys are:
is not specified, commands such as [`cargo publish`] will not work. The web
API is described below.


### Download Endpoint
The download endpoint should send the `.crate` file for the requested package.
Cargo supports https, http, and file URLs, HTTP redirects, HTTP1 and HTTP2.
The exact specifics of TLS support depend on the platform that Cargo is
running on, the version of Cargo, and how it was compiled.


### Index files
The rest of the index repository contains one file for each package, where the
filename is the name of the package in lowercase. Each version of the package
has a separate line in the file. The files are organized in a tier of
Expand Down Expand Up @@ -202,6 +205,55 @@ explaining the format of the entry.
The JSON objects should not be modified after they are added except for the
`yanked` field whose value may change at any time.

### Index Protocols
Cargo supports two remote registry protocols: `git` and `sparse`. The `git` protocol
stores index files in a git repository and the `sparse` protocol fetches individual
files over HTTP.

#### Git Protocol
The git protocol has no protocol prefix in the index url. For example the git index URL
for [crates.io] is `https://github.com/rust-lang/crates.io-index`.

Cargo caches the git repository on disk so that it can efficiently incrementally fetch
updates.

#### Sparse Protocol
The sparse protocol uses the `sparse+` protocol prefix in the registry URL. For example,
the sparse index URL for [crates.io] is `sparse+https://index.crates.io/`.

The sparse protocol downloads each index file using an individual HTTP request. Since
this results in a large number of small HTTP requests, performance is signficiantly
improved with a server that supports pipelining and HTTP/2.

##### Caching
Cargo caches the crate metadata files, and captures the `ETag` or `Last-Modified`
HTTP header from the server for each entry. When refreshing crate metadata, Cargo
sends the `If-None-Match` or `If-Modified-Since` header to allow the server to respond
with HTTP 304 "Not Modified" if the local cache is valid, saving time and bandwidth.
If both `ETag` and `Last-Modified` headers are present, Cargo uses the `ETag` only.

##### Cache Invalidation
If a registry is using some kind of CDN or proxy which caches access to the index files,
then it is recommended that registries implement some form of cache invalidation when
the files are updated. If these caches are not updated, then users may not be able to
access new crates until the cache is cleared.

##### Nonexistent Crates
For crates that do not exist, the registry should respond with a 404 "Not Found", 410 "Gone"
or 451 "Unavailable For Legal Reasons" code.

##### Sparse Limitations
Since the URL of the registry is stored in the lockfile, it's not recommended to offer
a registry with both protocols. Discussion about a transition plan is ongoing in issue
[#10964]. The [crates.io] registry is an exception, since Cargo internally substitues
the equivalent git URL when the sparse protocol is used.

If a registry does offer both protocols, it's currently recommended to choose one protocol
as the canonical protocol and use [source replacement] for the other protocol.


[`cargo publish`]: ../commands/cargo-publish.md
[alphanumeric]: ../../std/primitive.char.html#method.is_alphanumeric
[crates.io]: https://crates.io/
[source replacement]: ../reference/source-replacement.md
[#10964]: https://github.com/rust-lang/cargo/issues/10964
32 changes: 5 additions & 27 deletions src/doc/src/reference/unstable.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ Each new feature described below should explain how to use it.
* Registries
* [credential-process](#credential-process) — Adds support for fetching registry tokens from an external authentication program.
* [`cargo logout`](#cargo-logout) — Adds the `logout` command to remove the currently saved registry token.
* [sparse-registry](#sparse-registry) — Adds support for fetching from static-file HTTP registries (`sparse+`)
* [publish-timeout](#publish-timeout) — Controls the timeout between uploading the crate and being available in the index
* [registry-auth](#registry-auth) — Adds support for authenticated registries, and generate registry authentication tokens using asymmetric cryptography.

Expand Down Expand Up @@ -792,32 +791,6 @@ fn main() {
}
```

### sparse-registry
* Tracking Issue: [9069](https://github.com/rust-lang/cargo/issues/9069)
* RFC: [#2789](https://github.com/rust-lang/rfcs/pull/2789)

The `sparse-registry` feature allows cargo to interact with remote registries served
over plain HTTP rather than git. These registries can be identified by urls starting with
`sparse+http://` or `sparse+https://`.

When fetching index metadata over HTTP, Cargo only downloads the metadata for relevant
crates, which can save significant time and bandwidth.

The format of the sparse index is identical to a checkout of a git-based index.

The `registries.crates-io.protocol` config option can be used to set the default protocol
for crates.io. This option requires `-Z sparse-registry` to be enabled.

* `sparse` — Use sparse index.
* `git` — Use git index.
* If the option is unset, it will be sparse index if `-Z sparse-registry` is enabled,
otherwise it will be git index.

Cargo locally caches the crate metadata files, and captures an `ETag` or `Last-Modified`
HTTP header from the server for each entry. When refreshing crate metadata, Cargo will
send the `If-None-Match` or `If-Modified-Since` header to allow the server to respond
with HTTP 304 if the local cache is valid, saving time and bandwidth.

### publish-timeout
* Tracking Issue: [11222](https://github.com/rust-lang/cargo/issues/11222)

Expand Down Expand Up @@ -1456,3 +1429,8 @@ for more information.
The `-Z terminal-width` option has been stabilized in the 1.68 release.
The terminal width is always passed to the compiler when running from a
terminal where Cargo can automatically detect the width.

### sparse-registry

Sparse registry support has been stabilized in the 1.68 release.
See [Registry Protocols](registries.md#registry-protocols) for more information.
12 changes: 3 additions & 9 deletions tests/testsuite/alt_registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1335,9 +1335,7 @@ fn sparse_lockfile() {
.file("src/lib.rs", "")
.build();

p.cargo("-Zsparse-registry generate-lockfile")
.masquerade_as_nightly_cargo(&["sparse-registry"])
.run();
p.cargo("generate-lockfile").run();
assert_match_exact(
&p.read_lockfile(),
r#"# This file is automatically @generated by Cargo.
Expand Down Expand Up @@ -1383,9 +1381,7 @@ fn publish_with_transitive_dep() {
)
.file("src/lib.rs", "")
.build();
p1.cargo("publish -Zsparse-registry --registry Alt-1")
.masquerade_as_nightly_cargo(&["sparse-registry"])
.run();
p1.cargo("publish --registry Alt-1").run();

let p2 = project()
.file(
Expand All @@ -1402,7 +1398,5 @@ fn publish_with_transitive_dep() {
)
.file("src/lib.rs", "")
.build();
p2.cargo("publish -Zsparse-registry")
.masquerade_as_nightly_cargo(&["sparse-registry"])
.run();
p2.cargo("publish").run();
}
28 changes: 8 additions & 20 deletions tests/testsuite/publish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,8 @@ fn simple_publish_with_asymmetric() {
.file("src/main.rs", "fn main() {}")
.build();

p.cargo("publish --no-verify -Zregistry-auth -Zsparse-registry --registry dummy-registry")
.masquerade_as_nightly_cargo(&["registry-auth", "sparse-registry"])
p.cargo("publish --no-verify -Zregistry-auth --registry dummy-registry")
.masquerade_as_nightly_cargo(&["registry-auth"])
.with_stderr(
"\
[UPDATING] `dummy-registry` index
Expand Down Expand Up @@ -2518,8 +2518,7 @@ fn wait_for_first_publish() {
.file("src/lib.rs", "")
.build();

p.cargo("publish --no-verify -Z sparse-registry")
.masquerade_as_nightly_cargo(&["sparse-registry"])
p.cargo("publish --no-verify")
.replace_crates_io(registry.index_url())
.with_status(0)
.with_stderr(
Expand Down Expand Up @@ -2556,10 +2555,7 @@ See [..]
.file("src/main.rs", "fn main() {}")
.build();

p.cargo("build -Z sparse-registry")
.masquerade_as_nightly_cargo(&["sparse-registry"])
.with_status(0)
.run();
p.cargo("build").with_status(0).run();
}

/// A separate test is needed for package names with - or _ as they hit
Expand Down Expand Up @@ -2602,8 +2598,7 @@ fn wait_for_first_publish_underscore() {
.file("src/lib.rs", "")
.build();

p.cargo("publish --no-verify -Z sparse-registry")
.masquerade_as_nightly_cargo(&["sparse-registry"])
p.cargo("publish --no-verify")
.replace_crates_io(registry.index_url())
.with_status(0)
.with_stderr(
Expand Down Expand Up @@ -2640,10 +2635,7 @@ See [..]
.file("src/main.rs", "fn main() {}")
.build();

p.cargo("build -Z sparse-registry")
.masquerade_as_nightly_cargo(&["sparse-registry"])
.with_status(0)
.run();
p.cargo("build").with_status(0).run();
}

#[cargo_test]
Expand Down Expand Up @@ -2696,8 +2688,7 @@ fn wait_for_subsequent_publish() {
.file("src/lib.rs", "")
.build();

p.cargo("publish --no-verify -Z sparse-registry")
.masquerade_as_nightly_cargo(&["sparse-registry"])
p.cargo("publish --no-verify")
.replace_crates_io(registry.index_url())
.with_status(0)
.with_stderr(
Expand Down Expand Up @@ -2734,10 +2725,7 @@ See [..]
.file("src/main.rs", "fn main() {}")
.build();

p.cargo("build -Z sparse-registry")
.masquerade_as_nightly_cargo(&["sparse-registry"])
.with_status(0)
.run();
p.cargo("build").with_status(0).run();
}

#[cargo_test]
Expand Down
Loading

0 comments on commit 7dc5506

Please sign in to comment.