Skip to content

Commit

Permalink
feat: add a way of using GHA cache locally
Browse files Browse the repository at this point in the history
This is the first implementation of uploading the local cache
as a single file to a remote cache for reuse in a future build.

Right now it is only done for GHA as that was the intended scope¹,
but one could adapt this system to other remote caches.

Because of the immutability of GHACache, this commit only adds support
for re-using the cache for the same version (as defined by the user
through the `SCCACHE_GHA_VERSION` environment variable).
A way of reusing incremental build within a given version or even
across versions could be devised, but it falls outside the scope of
this particular effort, and it's probably not trivial.

[1] Mozilla-Actions/sccache-action#81
  • Loading branch information
Alphare committed Apr 23, 2024
1 parent c8d5ffa commit 80ac943
Show file tree
Hide file tree
Showing 5 changed files with 193 additions and 9 deletions.
57 changes: 50 additions & 7 deletions src/cache/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -591,12 +591,23 @@ pub fn storage_from_config(
return Ok(Arc::new(storage));
}
#[cfg(feature = "gha")]
CacheType::GHA(config::GHACacheConfig { ref version, .. }) => {
debug!("Init gha cache with version {version}");

let storage = GHACache::build(version)
.map_err(|err| anyhow!("create gha cache failed: {err:?}"))?;
return Ok(Arc::new(storage));
CacheType::GHA(config::GHACacheConfig {
ref version,
as_local,
..
}) => {
if *as_local {
debug!("Init gha as local cache");
let downloaded_path = GHACache::download_to_local(config, version)
.map_err(|err| anyhow!("download gha cache as local failed: {err:?}"))?;
let storage = disk_cache_from_config(config, pool, downloaded_path)?;
return Ok(storage);
} else {
debug!("Init gha cache with version {version}");
let storage = GHACache::build(version)
.map_err(|err| anyhow!("create gha cache failed: {err:?}"))?;
return Ok(Arc::new(storage));
}
}
#[cfg(feature = "memcached")]
CacheType::Memcached(config::MemcachedCacheConfig {
Expand Down Expand Up @@ -724,7 +735,21 @@ pub fn storage_from_config(
}
}

let (dir, size) = (&config.fallback_cache.dir, config.fallback_cache.size);
disk_cache_from_config(config, pool, None)
}

fn disk_cache_from_config(
config: &Config,
pool: &tokio::runtime::Handle,
root_override: Option<PathBuf>,
) -> Result<Arc<dyn Storage>> {
let (mut dir, size) = (
config.fallback_cache.dir.to_owned(),
config.fallback_cache.size,
);
if let Some(new_root) = root_override {
dir = dir.join(new_root);
}
let preprocessor_cache_mode_config = config.fallback_cache.preprocessor_cache_mode;
let rw_mode = config.fallback_cache.rw_mode.into();
debug!("Init disk cache with dir {:?}, size {}", dir, size);
Expand All @@ -737,6 +762,24 @@ pub fn storage_from_config(
)))
}

#[cfg(feature = "gha")]
pub fn upload_local_cache(config: &Config) -> Result<()> {
match &config.cache {
Some(CacheType::GHA(gha_config)) => {
if !gha_config.enabled {
bail!("GHA cache is disabled in config");
}
if !gha_config.as_local {
bail!("GHA not configured `as_local`")
}
GHACache::upload_local_cache(config)
}
_ => {
bail!("Uploading the local cache is only possible when using GitHub Actions")
}
}
}

#[cfg(test)]
mod test {
use super::*;
Expand Down
107 changes: 107 additions & 0 deletions src/cache/gha.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,18 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::path::PathBuf;

use opendal::layers::LoggingLayer;
use opendal::services::Ghac;
use opendal::Operator;

use crate::config::Config;
use crate::errors::*;
use crate::VERSION;

const FULL_GHA_CACHE_ROOT: &str = "sccache-full";

/// A cache that stores entries in GHA Cache Services.
pub struct GHACache;

Expand All @@ -43,4 +48,106 @@ impl GHACache {
.finish();
Ok(op)
}

/// Download a copy of the entire GHA cache from the given version
/// and return the path to the root folder on the local disk.
///
/// It is the user's responsibility to split the caches according
/// to anything relevant like architecture, OS, etc. by using the `version`.
pub fn download_to_local(config: &Config, version: &str) -> Result<Option<PathBuf>> {
let tarball_path = local_cache_tarball_path(config);
let mut builder = Ghac::default();

// TODO somehow loop over decreasingly "fresh" versions of the cache
// like in
// https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#matching-a-cache-key
// For now the behavior is to match the same version, which would
// speed up rebuilds in the same (Git) branch.
if version.is_empty() {
builder.version(&format!("sccache-v{VERSION}"));
} else {
builder.version(&format!("sccache-v{VERSION}-{version}"));
}

let op = Operator::new(builder)?
.layer(LoggingLayer::default())
.finish()
.blocking();

match op.reader(FULL_GHA_CACHE_ROOT) {
Ok(mut reader) => {
debug!("Found full gha cache");
let mut writer = std::fs::OpenOptions::new()
.write(true)
.open(&tarball_path)?;
std::io::copy(&mut reader, &mut writer)?;
}
Err(error) => match error.kind() {
opendal::ErrorKind::NotFound => {
debug!("Full gha cache not found");
return Ok(None);
}
_ => bail!(error),
},
};

let cache = local_cache_path(config);
let tarball =
std::fs::File::open(tarball_path).context("Failed to open the GHA cache tarball")?;
tar::Archive::new(tarball)
.unpack(&cache)
.context("Failed to extract the GHA cache tarball")?;

Ok(Some(cache))
}

/// Upload a tarball of the local cache
pub fn upload_local_cache(config: &Config) -> Result<()> {
let cache = local_cache_path(config);
if !cache.exists() {
bail!("Local cache does not exist: nothing to do")
}
debug!("Found local gha cache at {}", cache.display());

let op = Operator::new(Ghac::default())?
.layer(LoggingLayer::default())
.finish()
.blocking();

// GHA cache is immutable, if the path has already been written within
// a given version, it cannot be changed again.
if op.is_exist(FULL_GHA_CACHE_ROOT)? {
debug!("remote cache of this version already exists, cannot upload");
return Ok(());
}

let mut tar_builder = tar::Builder::new(vec![]);
tar_builder
.append_dir_all(local_cache_path(config), ".")
.context("Failed to create GHA local cache tarball")?;
let source = local_cache_tarball_path(config);
std::fs::write(&source, tar_builder.into_inner()?)
.context("Failed to write the GHA local cache tarball to disk")?;

match op.writer(FULL_GHA_CACHE_ROOT) {
Ok(mut writer) => {
let mut reader = std::fs::File::open(&source)?;
std::io::copy(&mut reader, &mut writer)?;
Ok(())
}
// TODO handle error gracefully in case of TOCTOU from the
// check at the start of the function
Err(error) => bail!(error),
}
}
}

fn local_cache_tarball_path(config: &Config) -> PathBuf {
let mut path = config.fallback_cache.dir.join(FULL_GHA_CACHE_ROOT);
path.set_extension(".tar");
path
}

fn local_cache_path(config: &Config) -> PathBuf {
config.fallback_cache.dir.join(FULL_GHA_CACHE_ROOT)
}
17 changes: 17 additions & 0 deletions src/cmdline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ pub enum Command {
env_vars: Vec<(OsString, OsString)>,
},
DebugPreprocessorCacheEntries,
/// Uploads the local cache to a shared storage
#[cfg(feature = "gha")]
UploadCache,
}

fn flag_infer_long_and_short(name: &'static str) -> Arg {
Expand Down Expand Up @@ -156,6 +159,9 @@ fn get_clap_command() -> clap::Command {
.value_name("FMT")
.value_parser(clap::value_parser!(StatsFormat))
.default_value(StatsFormat::default().as_str()),
flag_infer_long("upload-cache")
.help("upload the local cache to the configured shared storage")
.action(ArgAction::SetTrue),
Arg::new("CMD")
.value_parser(clap::value_parser!(OsString))
.trailing_var_arg(true)
Expand All @@ -171,6 +177,8 @@ fn get_clap_command() -> clap::Command {
"show-adv-stats",
"start-server",
"stop-server",
#[cfg(feature = "gha")]
"upload-cache",
"zero-stats",
"package-toolchain",
"CMD",
Expand Down Expand Up @@ -274,6 +282,15 @@ pub fn try_parse() -> Result<Command> {
Ok(Command::DistAuth)
} else if matches.get_flag("dist-status") {
Ok(Command::DistStatus)
} else if matches.get_flag("upload-cache") {
#[cfg(feature = "gha")]
{
Ok(Command::UploadCache)
}
#[cfg(not(feature = "gha"))]
{
bail!("--upload-cache is only available when using GHA")
}
} else if matches.contains_id("package-toolchain") {
let mut toolchain_values = matches
.get_many("package-toolchain")
Expand Down
4 changes: 4 additions & 0 deletions src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
// limitations under the License.

use crate::cache::storage_from_config;
#[cfg(feature = "gha")]
use crate::cache::upload_local_cache;
use crate::client::{connect_to_server, connect_with_retry, ServerConnection};
use crate::cmdline::{Command, StatsFormat};
use crate::compiler::ColorMode;
Expand Down Expand Up @@ -743,6 +745,8 @@ pub fn run_command(cmd: Command) -> Result<i32> {
}
};
}
#[cfg(feature = "gha")]
Command::UploadCache => upload_local_cache(config)?,
#[cfg(not(feature = "dist-client"))]
Command::DistAuth => bail!(
"Distributed compilation not compiled in, please rebuild with the dist-client feature"
Expand Down
17 changes: 15 additions & 2 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,11 @@ pub struct GHACacheConfig {
/// Version for gha cache is a namespace. By setting different versions,
/// we can avoid mixed caches.
pub version: String,
/// Download the entire cache to be used like a local cache, then upload
/// it back if anything changed.
/// This is useful in CI contexts to reduce the number of requests,
/// hence avoiding rate limiting and improving overall cache speed.
pub as_local: bool,
}

/// Memcached's default value of expiration is 10800s (3 hours), which is too
Expand Down Expand Up @@ -784,24 +789,30 @@ fn config_from_env() -> Result<EnvConfig> {
});

// ======= GHA =======
let gha = if let Ok(version) = env::var("SCCACHE_GHA_VERSION") {
let mut gha = if let Ok(version) = env::var("SCCACHE_GHA_VERSION") {
// If SCCACHE_GHA_VERSION has been set, we don't need to check
// SCCACHE_GHA_ENABLED's value anymore.
Some(GHACacheConfig {
enabled: true,
version,
as_local: false,
})
} else if bool_from_env_var("SCCACHE_GHA_ENABLED")?.unwrap_or(false) {
// If only SCCACHE_GHA_ENABLED has been set to the true value, enable with
// default version.
Some(GHACacheConfig {
enabled: true,
version: "".to_string(),
as_local: false,
})
} else {
None
};

if let Some(gha) = &mut gha {
gha.as_local = bool_from_env_var("SCCACHE_GHA_AS_LOCAL")?.unwrap_or(false);
}

// ======= Azure =======
let azure = if let (Ok(connection_string), Ok(container)) = (
env::var("SCCACHE_AZURE_CONNECTION_STRING"),
Expand Down Expand Up @@ -1453,6 +1464,7 @@ service_account = "example_service_account"
[cache.gha]
enabled = true
version = "sccache"
as_local = false
[cache.memcached]
# Deprecated alias for `endpoint`
Expand Down Expand Up @@ -1519,7 +1531,8 @@ no_credentials = true
}),
gha: Some(GHACacheConfig {
enabled: true,
version: "sccache".to_string()
version: "sccache".to_string(),
as_local: false,
}),
redis: Some(RedisCacheConfig {
url: Some("redis://user:passwd@1.2.3.4:6379/?db=1".to_owned()),
Expand Down

0 comments on commit 80ac943

Please sign in to comment.