From c7dc4afe408b2419b832d8dbede82775b3cd9a25 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 21 Mar 2021 13:46:59 -0400 Subject: [PATCH] Don't hard-code essential files in `copy_doc_dir` This avoids unnecessary upload costs for S3. --- src/docbuilder/rustwide_builder.rs | 93 +++++++++++++++++------------- src/utils/copy.rs | 35 ++++++----- 2 files changed, 74 insertions(+), 54 deletions(-) diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 574830c7f..a4579f498 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -182,20 +182,6 @@ impl RustwideBuilder { let krate = Crate::crates_io(DUMMY_CRATE_NAME, DUMMY_CRATE_VERSION); krate.fetch(&self.workspace)?; - // TODO: remove this when https://github.com/rust-lang/rustwide/pull/53 lands. - struct Rustdoc<'a> { - toolchain_version: &'a str, - } - impl rustwide::cmd::Runnable for Rustdoc<'_> { - fn name(&self) -> Binary { - Binary::ManagedByRustwide(PathBuf::from("rustdoc")) - } - - fn prepare_command<'w, 'pl>(&self, cmd: Command<'w, 'pl>) -> Command<'w, 'pl> { - cmd.args(&[format!("+{}", self.toolchain_version)]) - } - } - build_dir .build(&self.toolchain, &krate, self.prepare_sandbox(&limits)) .run(|build| { @@ -212,29 +198,14 @@ impl RustwideBuilder { .prefix("essential-files") .tempdir()?; - let toolchain_version = self.toolchain.as_dist().unwrap().name(); - let output = build.cmd(Rustdoc { toolchain_version }) - .args(&["-Zunstable-options", "--print=unversioned-files"]) - .run_capture() - .context("failed to learn about unversioned files - make sure you have nightly-2021-03-07 or later")?; - let essential_files_unversioned = output - .stdout_lines() - .iter() - .map(PathBuf::from); - let resource_suffix = format!("-{}", parse_rustc_version(&self.rustc_version)?); - let essential_files_versioned: Vec<_> = source.read_dir()? - .collect::, _>>()? - .into_iter() - .filter_map(|entry| { - entry.file_name().to_str().and_then(|name| if name.contains(&resource_suffix) { - Some(entry.file_name().into()) - } else { None }) - }) - .collect(); - for file_name in essential_files_unversioned.chain(essential_files_versioned) { + for file_name in self.essential_files(build, &source)? { let source_path = source.join(&file_name); let dest_path = dest.path().join(&file_name); - debug!("copying {} to {}", source_path.display(), dest_path.display()); + debug!( + "copying {} to {}", + source_path.display(), + dest_path.display() + ); ::std::fs::copy(&source_path, &dest_path).with_context(|_| { format!( "couldn't copy '{}' to '{}'", @@ -363,7 +334,7 @@ impl RustwideBuilder { let mut algs = HashSet::new(); if has_docs { debug!("adding documentation for the default target to the database"); - self.copy_docs(&build.host_target_dir(), local_storage.path(), "", true)?; + self.copy_docs(build, local_storage.path(), "", true)?; successful_targets.push(res.target.clone()); @@ -465,7 +436,7 @@ impl RustwideBuilder { // adding target to successfully_targets. if build.host_target_dir().join(target).join("doc").is_dir() { debug!("adding documentation for target {} to the database", target,); - self.copy_docs(&build.host_target_dir(), local_storage, target, false)?; + self.copy_docs(build, local_storage, target, false)?; successful_targets.push(target.to_string()); } } @@ -638,12 +609,12 @@ impl RustwideBuilder { fn copy_docs( &self, - target_dir: &Path, + build: &Build, local_storage: &Path, target: &str, is_default_target: bool, ) -> Result<()> { - let source = target_dir.join(target).join("doc"); + let source = build.host_target_dir().join(target).join("doc"); let mut dest = local_storage.to_path_buf(); // only add target name to destination directory when we are copying a non-default target. @@ -656,7 +627,49 @@ impl RustwideBuilder { } info!("{} {}", source.display(), dest.display()); - copy_doc_dir(source, dest) + let essential_files = self.essential_files(build, &source)?; + copy_doc_dir(source, dest, &essential_files) + } + + fn essential_files(&self, build: &Build, doc_dir: &Path) -> Result> { + // TODO: remove this when https://github.com/rust-lang/rustwide/pull/53 lands. + struct Rustdoc<'a> { + toolchain_version: &'a str, + } + impl rustwide::cmd::Runnable for Rustdoc<'_> { + fn name(&self) -> Binary { + Binary::ManagedByRustwide(PathBuf::from("rustdoc")) + } + + fn prepare_command<'w, 'pl>(&self, cmd: Command<'w, 'pl>) -> Command<'w, 'pl> { + cmd.args(&[format!("+{}", self.toolchain_version)]) + } + } + + let toolchain_version = self.toolchain.as_dist().unwrap().name(); + let output = build.cmd(Rustdoc { toolchain_version }) + .args(&["-Zunstable-options", "--print=unversioned-files"]) + .run_capture() + .context("failed to learn about unversioned files - make sure you have nightly-2021-03-07 or later")?; + let mut essential_files: Vec<_> = output.stdout_lines().iter().map(PathBuf::from).collect(); + let resource_suffix = format!("-{}", parse_rustc_version(&self.rustc_version)?); + + let essential_files_versioned = doc_dir + .read_dir()? + .collect::, _>>()? + .into_iter() + .filter_map(|entry| { + entry.file_name().to_str().and_then(|name| { + if name.contains(&resource_suffix) { + Some(entry.file_name().into()) + } else { + None + } + }) + }); + + essential_files.extend(essential_files_versioned); + Ok(essential_files) } fn upload_docs( diff --git a/src/utils/copy.rs b/src/utils/copy.rs index 1ccde1cef..006990e0e 100644 --- a/src/utils/copy.rs +++ b/src/utils/copy.rs @@ -1,15 +1,17 @@ use crate::error::Result; use std::fs; -use std::path::Path; - -use regex::Regex; +use std::path::{Path, PathBuf}; /// Copies documentation from a crate's target directory to destination. /// /// Target directory must have doc directory. /// -/// This function is designed to avoid file duplications. -pub fn copy_doc_dir, Q: AsRef>(source: P, destination: Q) -> Result<()> { +/// This does not copy any files with the same name as `shared_files`. +pub fn copy_doc_dir, Q: AsRef>( + source: P, + destination: Q, + shared_files: &[PathBuf], +) -> Result<()> { let destination = destination.as_ref(); // Make sure destination directory exists @@ -17,21 +19,20 @@ pub fn copy_doc_dir, Q: AsRef>(source: P, destination: Q) - fs::create_dir_all(destination)?; } - // Avoid copying common files - let dup_regex = Regex::new( - r"(\.lock|\.txt|\.woff|\.svg|\.css|main-.*\.css|main-.*\.js|normalize-.*\.js|rustdoc-.*\.css|storage-.*\.js|theme-.*\.js)$") - .unwrap(); - for file in source.as_ref().read_dir()? { let file = file?; - let destination_full_path = destination.join(file.file_name()); + let filename = file.file_name(); + let destination_full_path = destination.join(&filename); let metadata = file.metadata()?; if metadata.is_dir() { fs::create_dir_all(&destination_full_path)?; - copy_doc_dir(file.path(), destination_full_path)? - } else if dup_regex.is_match(&file.file_name().into_string().unwrap()[..]) { + copy_doc_dir(file.path(), destination_full_path, shared_files)?; + continue; + } + + if shared_files.contains(&PathBuf::from(filename)) { continue; } else { fs::copy(&file.path(), &destination_full_path)?; @@ -66,7 +67,13 @@ mod test { fs::write(doc.join("inner").join("important.svg"), "").unwrap(); // lets try to copy a src directory to tempdir - copy_doc_dir(source.path().join("doc"), destination.path()).unwrap(); + let ignored_files = ["index.txt".into(), "important.svg".into()]; + copy_doc_dir( + source.path().join("doc"), + destination.path(), + &ignored_files, + ) + .unwrap(); assert!(destination.path().join("index.html").exists()); assert!(!destination.path().join("index.txt").exists()); assert!(destination.path().join("inner").join("index.html").exists());