Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upload index metadata to index/ when publishing new crates #4661

Merged
merged 1 commit into from
May 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ export TEST_DATABASE_URL=
# not needed if the S3 bucket is in US standard
# export S3_REGION=

# Credentials for uploading index metadata to S3. You can leave these commented
# out if you're not publishing index metadata to s3 from your crates.io instance.
# export S3_INDEX_BUCKET=
# export S3_INDEX_ACCESS_KEY=
# export S3_INDEX_SECRET_KEY=
# not needed if the S3 bucket is in US standard
# export S3_INDEX_REGION=

# Upstream location of the registry index. Background jobs will push to
# this URL. The default points to a local index for development.
# Run `./script/init-local-index.sh` to initialize this repo.
Expand Down
73 changes: 66 additions & 7 deletions cargo-registry-index/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -293,18 +293,33 @@ impl Repository {
.join(Self::relative_index_file(name))
}

/// Returns the relative path to the crate index file.
/// Does not perform conversion to lowercase.
fn relative_index_file_helper(name: &str) -> Vec<&str> {
match name.len() {
1 => vec!["1", name],
2 => vec!["2", name],
3 => vec!["3", &name[..1], name],
_ => vec![&name[0..2], &name[2..4], name],
}
}

/// Returns the relative path to the crate index file that corresponds to
/// the given crate name.
/// the given crate name as a path (i.e. with platform-dependent folder separators).
///
/// see <https://doc.rust-lang.org/cargo/reference/registries.html#index-format>
pub fn relative_index_file(name: &str) -> PathBuf {
let name = name.to_lowercase();
match name.len() {
1 => Path::new("1").join(&name),
2 => Path::new("2").join(&name),
3 => Path::new("3").join(&name[..1]).join(&name),
_ => Path::new(&name[0..2]).join(&name[2..4]).join(&name),
}
Self::relative_index_file_helper(&name).iter().collect()
}

/// Returns the relative path to the crate index file that corresponds to
/// the given crate name for usage in URLs (i.e. with `/` separator).
///
/// see <https://doc.rust-lang.org/cargo/reference/registries.html#index-format>
pub fn relative_index_file_for_url(name: &str) -> String {
let name = name.to_lowercase();
Self::relative_index_file_helper(&name).join("/")
}

/// Returns the [Object ID](git2::Oid) of the currently checked out commit
Expand Down Expand Up @@ -343,6 +358,50 @@ impl Repository {
self.push("refs/heads/master")
}

/// Gets a list of files that have been modified since a given `starting_commit`
/// (use `starting_commit = None` for a list of all files).
pub fn get_files_modified_since(
&self,
starting_commit: Option<&str>,
) -> anyhow::Result<Vec<PathBuf>> {
let starting_commit = match starting_commit {
Some(starting_commit) => {
let oid = git2::Oid::from_str(starting_commit)
.context("failed to parse commit into Oid")?;
let commit = self
.repository
.find_commit(oid)
.context("failed to find commit")?;
Some(
commit
.as_object()
.peel_to_tree()
.context("failed to find tree for commit")?,
)
}
None => None,
};

let head = self
.repository
.find_commit(self.head_oid()?)?
.as_object()
.peel_to_tree()
.context("failed to find tree for HEAD")?;
let diff = self
.repository
.diff_tree_to_tree(starting_commit.as_ref(), Some(&head), None)
.context("failed to run diff")?;
let files = diff
.deltas()
.map(|delta| delta.new_file())
.filter(|file| file.exists())
.map(|file| file.path().unwrap().to_path_buf())
.collect();

Ok(files)
}

/// Push the current branch to the provided refname
fn push(&self, refspec: &str) -> anyhow::Result<()> {
let mut ref_status = Ok(());
Expand Down
9 changes: 8 additions & 1 deletion src/admin/delete_crate.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::{admin::dialoguer, db, models::Crate, schema::crates};
use crate::{admin::dialoguer, config, db, models::Crate, schema::crates};

use diesel::prelude::*;
use reqwest::blocking::Client;

#[derive(clap::Parser, Debug)]
#[clap(
Expand All @@ -25,6 +26,10 @@ pub fn run(opts: Opts) {
fn delete(opts: Opts, conn: &PgConnection) {
let krate: Crate = Crate::by_name(&opts.crate_name).first(conn).unwrap();

let config = config::Base::from_environment();
let uploader = config.uploader();
let client = Client::new();

let prompt = format!(
"Are you sure you want to delete {} ({})?",
opts.crate_name, krate.id
Expand All @@ -42,4 +47,6 @@ fn delete(opts: Opts, conn: &PgConnection) {
if !dialoguer::confirm("commit?") {
panic!("aborting transaction");
}

uploader.delete_index(&client, &krate.name).unwrap();
}
1 change: 1 addition & 0 deletions src/admin/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ pub mod populate;
pub mod render_readmes;
pub mod test_pagerduty;
pub mod transfer_crates;
pub mod upload_index;
pub mod verify_token;
60 changes: 60 additions & 0 deletions src/admin/upload_index.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
use std::time::{Duration, Instant};

use crate::admin::dialoguer;
use cargo_registry_index::{Repository, RepositoryConfig};
use reqwest::blocking::Client;

use crate::config;

#[derive(clap::Parser, Debug)]
#[clap(
name = "upload-index",
about = "Upload index from git to S3 (http-based index)"
)]
pub struct Opts {
/// Incremental commit. Any changed files made after this commit will be uploaded.
incremental_commit: Option<String>,
}

pub fn run(opts: Opts) -> anyhow::Result<()> {
let config = config::Base::from_environment();
let uploader = config.uploader();
let client = Client::new();

println!("fetching git repo");
let config = RepositoryConfig::from_environment();
let repo = Repository::open(&config)?;
repo.reset_head()?;
println!("HEAD is at {}", repo.head_oid()?);

let files = repo.get_files_modified_since(opts.incremental_commit.as_deref())?;
println!("found {} files to upload", files.len());
if !dialoguer::confirm("continue with upload?") {
return Ok(());
}

let mut progress_update_time = Instant::now();
for (i, file) in files.iter().enumerate() {
let crate_name = file.file_name().unwrap().to_str().unwrap();
let path = repo.index_file(crate_name);
if !path.exists() {
println!("skipping file `{}`", crate_name);
continue;
}
let contents = std::fs::read_to_string(&path)?;
uploader.upload_index(&client, crate_name, contents)?;

// Print a progress update every 10 seconds.
let now = Instant::now();
if now - progress_update_time > Duration::from_secs(10) {
progress_update_time = now;
println!("uploading {}/{}", i, files.len());
}
}

println!(
"uploading completed; use `upload-index {}` for an incremental run",
repo.head_oid()?
);
Ok(())
}
4 changes: 3 additions & 1 deletion src/bin/crates-admin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use cargo_registry::admin::{
delete_crate, delete_version, migrate, populate, render_readmes, test_pagerduty,
transfer_crates, verify_token,
transfer_crates, upload_index, verify_token,
};

#[derive(clap::Parser, Debug)]
Expand All @@ -22,6 +22,7 @@ enum SubCommand {
TransferCrates(transfer_crates::Opts),
VerifyToken(verify_token::Opts),
Migrate(migrate::Opts),
UploadIndex(upload_index::Opts),
}

fn main() -> anyhow::Result<()> {
Expand All @@ -38,6 +39,7 @@ fn main() -> anyhow::Result<()> {
SubCommand::TransferCrates(opts) => transfer_crates::run(opts),
SubCommand::VerifyToken(opts) => verify_token::run(opts).unwrap(),
SubCommand::Migrate(opts) => migrate::run(opts)?,
SubCommand::UploadIndex(opts) => upload_index::run(opts)?,
}

Ok(())
Expand Down
43 changes: 37 additions & 6 deletions src/config/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,24 @@ impl Base {

pub fn test() -> Self {
let uploader = Uploader::S3 {
bucket: s3::Bucket::new(
bucket: Box::new(s3::Bucket::new(
String::from("alexcrichton-test"),
None,
dotenv::var("S3_ACCESS_KEY").unwrap_or_default(),
dotenv::var("S3_SECRET_KEY").unwrap_or_default(),
// When testing we route all API traffic over HTTP so we can
// sniff/record it, but everywhere else we use https
"http",
),
)),
index_bucket: Some(Box::new(s3::Bucket::new(
String::from("alexcrichton-test"),
None,
dotenv::var("S3_INDEX_ACCESS_KEY").unwrap_or_default(),
dotenv::var("S3_INDEX_SECRET_KEY").unwrap_or_default(),
// When testing we route all API traffic over HTTP so we can
// sniff/record it, but everywhere else we use https
"http",
))),
cdn: None,
};
Self {
Expand All @@ -96,27 +105,49 @@ impl Base {
}

fn s3_panic_if_missing_keys() -> Uploader {
let index_bucket = match dotenv::var("S3_INDEX_BUCKET") {
Ok(name) => Some(Box::new(s3::Bucket::new(
name,
dotenv::var("S3_INDEX_REGION").ok(),
env("S3_INDEX_ACCESS_KEY"),
env("S3_INDEX_SECRET_KEY"),
"https",
))),
Err(_) => None,
};
Uploader::S3 {
bucket: s3::Bucket::new(
bucket: Box::new(s3::Bucket::new(
env("S3_BUCKET"),
dotenv::var("S3_REGION").ok(),
env("S3_ACCESS_KEY"),
env("S3_SECRET_KEY"),
"https",
),
)),
index_bucket,
cdn: dotenv::var("S3_CDN").ok(),
}
}

fn s3_maybe_read_only() -> Uploader {
let index_bucket = match dotenv::var("S3_INDEX_BUCKET") {
Ok(name) => Some(Box::new(s3::Bucket::new(
name,
dotenv::var("S3_INDEX_REGION").ok(),
dotenv::var("S3_INDEX_ACCESS_KEY").unwrap_or_default(),
dotenv::var("S3_INDEX_SECRET_KEY").unwrap_or_default(),
"https",
))),
Err(_) => None,
};
Uploader::S3 {
bucket: s3::Bucket::new(
bucket: Box::new(s3::Bucket::new(
env("S3_BUCKET"),
dotenv::var("S3_REGION").ok(),
dotenv::var("S3_ACCESS_KEY").unwrap_or_default(),
dotenv::var("S3_SECRET_KEY").unwrap_or_default(),
"https",
),
)),
index_bucket,
cdn: dotenv::var("S3_CDN").ok(),
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/controllers/krate/publish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult {
// Upload crate tarball
app.config
.uploader()
.upload_crate(&app, tarball, &krate, vers)?;
.upload_crate(app.http_client(), tarball, &krate, vers)?;

let (features, features2): (HashMap<_, _>, HashMap<_, _>) =
features.into_iter().partition(|(_k, vals)| {
Expand Down
67 changes: 67 additions & 0 deletions src/tests/http-data/krate_publish_features_version_2
Original file line number Diff line number Diff line change
Expand Up @@ -65,5 +65,72 @@
],
"body": ""
}
},
{
"request": {
"uri": "http://alexcrichton-test.s3.amazonaws.com/index/3/f/foo",
"method": "PUT",
"headers": [
[
"accept-encoding",
"gzip"
],
[
"accept",
"*/*"
],
[
"content-length",
"336"
],
[
"date",
"Fri, 15 Sep 2017 07:53:06 -0700"
],
[
"authorization",
"AWS AKIAICL5IWUZYWWKA7JA:uDc39eNdF6CcwB+q+JwKsoDLQc4="
],
[
"content-type",
"text/plain"
],
[
"host",
"alexcrichton-test.s3.amazonaws.com"
]
],
"body": "eyJuYW1lIjoiZm9vIiwidmVycyI6IjEuMC4wIiwiZGVwcyI6W3sibmFtZSI6ImJhciIsInJlcSI6Ij4gMCIsImZlYXR1cmVzIjpbXSwib3B0aW9uYWwiOmZhbHNlLCJkZWZhdWx0X2ZlYXR1cmVzIjp0cnVlLCJ0YXJnZXQiOm51bGwsImtpbmQiOiJub3JtYWwifV0sImNrc3VtIjoiYWNiNTYwNGIxMjZhYzg5NGMxZWIxMWM0NTc1YmYyMDcyZmVhNjEyMzJhODg4ZTQ1Mzc3MGM3OWQ3ZWQ1NjQxOSIsImZlYXR1cmVzIjp7Im9sZF9mZWF0IjpbXX0sImZlYXR1cmVzMiI6eyJuZXdfZmVhdCI6WyJkZXA6YmFyIiwiYmFyPy9mZWF0Il19LCJ5YW5rZWQiOmZhbHNlLCJsaW5rcyI6bnVsbCwidiI6Mn0K"
},
"response": {
"status": 200,
"headers": [
[
"x-amz-request-id",
"26589A5E52F8395C"
],
[
"x-amz-id-2",
"JdIvnNTw53aqXjBIqBLNuN4kxf/w1XWX+xuIiGBDYy7yzOSDuAMtBSrTW4ZWetcCIdqCUHuQ51A="
],
[
"content-length",
"0"
],
[
"Server",
"AmazonS3"
],
[
"date",
"Fri,15 Sep 2017 14:53:07 GMT"
],
[
"ETag",
"\"f9016ad360cebb4fe2e6e96e5949f022\""
]
],
"body": ""
}
}
]
Loading