Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sccache-action usage: Use GHA as local cache #2142

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 51 additions & 1 deletion .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,56 @@ jobs:

${SCCACHE_PATH} --show-stats | grep -e "Cache hits\s*[1-9]"

gha-as-local:
runs-on: ubuntu-latest
needs: build

env:
SCCACHE_GHA_ENABLED: "on"
SCCACHE_GHA_AS_LOCAL: "on"
RUSTC_WRAPPER: /home/runner/.cargo/bin/sccache

steps:
- name: Clone repository
uses: actions/checkout@v4

- name: Configure Cache Env
uses: actions/github-script@v7
with:
script: |
core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');

- name: Install rust
uses: ./.github/actions/rust-toolchain
with:
toolchain: "stable"

- uses: actions/download-artifact@v4
with:
name: integration-tests
path: /home/runner/.cargo/bin/
- name: Chmod for binary
run: chmod +x ${SCCACHE_PATH}

- name: Test
run: cargo clean && cargo build

- name: Output
run: |
${SCCACHE_PATH} --show-stats

${SCCACHE_PATH} --show-stats | grep gha

- name: Test Twice for Cache Read
run: cargo clean && cargo build

- name: Output
run: |
${SCCACHE_PATH} --show-stats

${SCCACHE_PATH} --show-stats | grep -e "Cache hits\s*[1-9]"

memcached-deprecated:
runs-on: ubuntu-latest
needs: build
Expand Down Expand Up @@ -576,7 +626,7 @@ jobs:
${SCCACHE_PATH} --show-stats | grep -e "Cache hits\s*[1-9]"

hip:
# Probably wouldn't matter anyway since we run in a container, but staying
# Probably wouldn't matter anyway since we run in a container, but staying
# close to the version is better than not.
runs-on: ubuntu-22.04
needs: build
Expand Down
61 changes: 54 additions & 7 deletions src/cache/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -591,12 +591,24 @@ pub fn storage_from_config(
return Ok(Arc::new(storage));
}
#[cfg(feature = "gha")]
CacheType::GHA(config::GHACacheConfig { ref version, .. }) => {
debug!("Init gha cache with version {version}");

let storage = GHACache::build(version)
.map_err(|err| anyhow!("create gha cache failed: {err:?}"))?;
return Ok(Arc::new(storage));
CacheType::GHA(config::GHACacheConfig {
ref version,
as_local,
..
}) => {
if *as_local {
debug!("Init gha as local cache");
let downloaded_path = pool
.block_on(GHACache::download_to_local(config, version))
.map_err(|err| anyhow!("download gha cache as local failed: {err:?}"))?;
let storage = disk_cache_from_config(config, pool, downloaded_path)?;
return Ok(storage);
} else {
debug!("Init gha cache with version {version}");
let storage = GHACache::build(version)
.map_err(|err| anyhow!("create gha cache failed: {err:?}"))?;
return Ok(Arc::new(storage));
}
}
#[cfg(feature = "memcached")]
CacheType::Memcached(config::MemcachedCacheConfig {
Expand Down Expand Up @@ -724,7 +736,21 @@ pub fn storage_from_config(
}
}

let (dir, size) = (&config.fallback_cache.dir, config.fallback_cache.size);
disk_cache_from_config(config, pool, None)
}

fn disk_cache_from_config(
config: &Config,
pool: &tokio::runtime::Handle,
root_override: Option<PathBuf>,
) -> Result<Arc<dyn Storage>> {
let (mut dir, size) = (
config.fallback_cache.dir.to_owned(),
config.fallback_cache.size,
);
if let Some(new_root) = root_override {
dir = dir.join(new_root);
}
let preprocessor_cache_mode_config = config.fallback_cache.preprocessor_cache_mode;
let rw_mode = config.fallback_cache.rw_mode.into();
debug!("Init disk cache with dir {:?}, size {}", dir, size);
Expand All @@ -737,6 +763,27 @@ pub fn storage_from_config(
)))
}

#[cfg(feature = "gha")]
pub async fn upload_local_cache(config: &Config) -> Result<()> {
match &config.cache {
Some(CacheType::GHA(gha_config)) => {
if !gha_config.enabled {
debug!("GHA cache is disabled in config");
return Ok(());
}
if !gha_config.as_local {
debug!("GHA not configured `as_local`");
return Ok(());
}
GHACache::upload_local_cache(config).await
}
_ => {
debug!("Uploading the local cache is only possible when using GitHub Actions");
Ok(())
}
}
}

#[cfg(test)]
mod test {
use super::*;
Expand Down
142 changes: 142 additions & 0 deletions src/cache/gha.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,18 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::path::PathBuf;

use opendal::layers::LoggingLayer;
use opendal::services::Ghac;
use opendal::Operator;

use crate::config::Config;
use crate::errors::*;
use crate::VERSION;

const FULL_GHA_CACHE_ROOT: &str = "sccache-full";

/// A cache that stores entries in GHA Cache Services.
pub struct GHACache;

Expand All @@ -43,4 +48,141 @@ impl GHACache {
.finish();
Ok(op)
}

/// Download a copy of the entire GHA cache from the given version
/// and return the path to the root folder on the local disk.
///
/// It is the user's responsibility to split the caches according
/// to anything relevant like architecture, OS, etc. by using the `version`.
pub async fn download_to_local(config: &Config, version: &str) -> Result<Option<PathBuf>> {
let tarball_path = local_cache_tarball_path(config);
let mut builder = Ghac::default();

// TODO somehow loop over decreasingly "fresh" versions of the cache
// like in
// https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#matching-a-cache-key
// For now the behavior is to match the same version, which would
// speed up rebuilds in the same (Git) branch.
//
// A few things to note that make this difficult, plus ideas:
// - GHA's cache is immutable (meaning you cannot modify a given path
// for a given version), so we cannot reuse a "global version"
// - GHA's cache doesn't allow for listing items in a version
// - GHA's cache is not shared across branches, except for branches
// that are directly from the default branch, which can use the
// default cache.
// - Maybe only using the default branch cache with a way of renewing
// it periodically is already a benefit.
// - This maybe could be done as a fallback if the current branch cache
// is empty, though this is unclear to me at the time of writing.
if version.is_empty() {
builder.version(&format!("sccache-v{VERSION}"));
} else {
builder.version(&format!("sccache-v{VERSION}-{version}"));
}

let op = Operator::new(builder)?
.layer(LoggingLayer::default())
.finish();

if !op.is_exist(FULL_GHA_CACHE_ROOT).await? {
info!("Remote full gha cache does not exist: nothing to do");
return Ok(None);
}
debug!("Found full gha cache");

let mut reader = op.reader(FULL_GHA_CACHE_ROOT).await?;
std::fs::create_dir_all(tarball_path.parent().expect("root path"))?;

let mut writer = tokio::fs::OpenOptions::new()
.write(true)
.create(true)
.open(&tarball_path)
.await
.context("opening the local tarball for writing")?;

if let Err(error) = tokio::io::copy(&mut reader, &mut writer).await {
match error.kind() {
std::io::ErrorKind::NotFound => {
debug!("Remote full gha cache was deleted: nothing to do");
// TOCTOU race with the above existence check and the cache
// being cleared.
return Ok(None);
}
_ => {
bail!(error)
}
}
};

let cache = local_cache_path(config);
let tarball =
std::fs::File::open(tarball_path).context("Failed to open the GHA cache tarball")?;
tar::Archive::new(tarball)
.unpack(&cache)
.context("Failed to extract the GHA cache tarball")?;

Ok(Some(cache))
}

/// Upload a tarball of the local cache
pub async fn upload_local_cache(config: &Config) -> Result<()> {
let cache = local_cache_path(config);
if !cache.exists() {
info!("Local cache does not exist: nothing to do");
return Ok(());
}
debug!("Found local gha cache at {}", cache.display());

let op = Operator::new(Ghac::default())?
.layer(LoggingLayer::default())
.finish();

// GHA cache is immutable, if the path has already been written within
// a given version, it cannot be changed again.
if op.is_exist(FULL_GHA_CACHE_ROOT).await? {
info!("Remote cache of this version already exists, cannot upload");
return Ok(());
}

let mut tar_builder = tar::Builder::new(vec![]);
tar_builder
.append_dir_all(local_cache_path(config), ".")
.context("Failed to create GHA local cache tarball")?;
let source = local_cache_tarball_path(config);
std::fs::write(&source, tar_builder.into_inner()?)
.context("Failed to write the GHA local cache tarball to disk")?;

let mut writer = op
.writer(FULL_GHA_CACHE_ROOT)
.await
.context("opening the remote tarball for writing")?;

let mut reader = tokio::fs::File::open(&source)
.await
.context("opening the local tarball for reading")?;

if let Err(error) = tokio::io::copy(&mut reader, &mut writer).await {
match error.kind() {
std::io::ErrorKind::AlreadyExists => {
debug!("Remote cache of this version raced us, cannot upload");
// TOCTOU race with the above existence check and the cache
// being uploaded by another worker.
return Ok(());
}
_ => bail!(error),
}
}
Ok(())
}
}

fn local_cache_tarball_path(config: &Config) -> PathBuf {
let mut path = config.fallback_cache.dir.join(FULL_GHA_CACHE_ROOT);
path.set_extension(".tar");
path
}

fn local_cache_path(config: &Config) -> PathBuf {
config.fallback_cache.dir.join(FULL_GHA_CACHE_ROOT)
}
17 changes: 15 additions & 2 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,11 @@ pub struct GHACacheConfig {
/// Version for gha cache is a namespace. By setting different versions,
/// we can avoid mixed caches.
pub version: String,
/// Download the entire cache to be used like a local cache, then upload
/// it back if anything changed.
/// This is useful in CI contexts to reduce the number of requests,
/// hence avoiding rate limiting and improving overall cache speed.
pub as_local: bool,
}

/// Memcached's default value of expiration is 10800s (3 hours), which is too
Expand Down Expand Up @@ -784,24 +789,30 @@ fn config_from_env() -> Result<EnvConfig> {
});

// ======= GHA =======
let gha = if let Ok(version) = env::var("SCCACHE_GHA_VERSION") {
let mut gha = if let Ok(version) = env::var("SCCACHE_GHA_VERSION") {
// If SCCACHE_GHA_VERSION has been set, we don't need to check
// SCCACHE_GHA_ENABLED's value anymore.
Some(GHACacheConfig {
enabled: true,
version,
as_local: false,
})
} else if bool_from_env_var("SCCACHE_GHA_ENABLED")?.unwrap_or(false) {
// If only SCCACHE_GHA_ENABLED has been set to the true value, enable with
// default version.
Some(GHACacheConfig {
enabled: true,
version: "".to_string(),
as_local: false,
})
} else {
None
};

if let Some(gha) = &mut gha {
gha.as_local = bool_from_env_var("SCCACHE_GHA_AS_LOCAL")?.unwrap_or(false);
}

// ======= Azure =======
let azure = if let (Ok(connection_string), Ok(container)) = (
env::var("SCCACHE_AZURE_CONNECTION_STRING"),
Expand Down Expand Up @@ -1453,6 +1464,7 @@ service_account = "example_service_account"
[cache.gha]
enabled = true
version = "sccache"
as_local = false

[cache.memcached]
# Deprecated alias for `endpoint`
Expand Down Expand Up @@ -1519,7 +1531,8 @@ no_credentials = true
}),
gha: Some(GHACacheConfig {
enabled: true,
version: "sccache".to_string()
version: "sccache".to_string(),
as_local: false,
}),
redis: Some(RedisCacheConfig {
url: Some("redis://user:passwd@1.2.3.4:6379/?db=1".to_owned()),
Expand Down
Loading
Loading