From 39e0255042db0c32172ede1271e1ebac2e5a67f8 Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Tue, 17 Oct 2023 21:33:42 -0400 Subject: [PATCH 01/22] airbyte-ci: better gradle caching --- .../connectors/pipelines/pipelines/gradle.py | 89 +++++++++++++------ .../connectors/pipelines/pipelines/hacks.py | 17 ---- 2 files changed, 64 insertions(+), 42 deletions(-) diff --git a/airbyte-ci/connectors/pipelines/pipelines/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/gradle.py index 637ae18c7551..8b4e423234f6 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/gradle.py @@ -37,11 +37,14 @@ def __init__(self, context: PipelineContext) -> None: super().__init__(context) @property - def connector_java_build_cache(self) -> CacheVolume: - # TODO: remove this once we finish the project to boost source-postgres CI performance. - # We should use a static gradle-cache volume name. - cache_volume_name = hacks.get_gradle_cache_volume_name(self.context, self.logger) - return self.context.dagger_client.cache_volume(cache_volume_name) + def persistent_cache_volume(self) -> CacheVolume: + """This cache volume is for sharing gradle state across all pipeline runs.""" + return self.context.dagger_client.cache_volume("gradle-persistent-cache") + + @property + def connector_transient_cache_volume(self) -> CacheVolume: + """This cache volume is for sharing gradle state across tasks within a single connector pipeline run.""" + return self.context.dagger_client.cache_volume(f"gradle-{self.context.connector.technical_name}-transient-cache") @property def build_include(self) -> List[str]: @@ -60,10 +63,10 @@ def build_include(self) -> List[str]: def _get_gradle_command(self, task: str) -> List[str]: return sh_dash_c( [ - # The gradle command is chained in between a couple of rsyncs which load from- and store to the cache volume. - "(rsync -a --stats /root/gradle-cache/ /root/.gradle || true)", + # The gradle command is chained in between a couple of rsyncs which load from- and store to the transient cache volume. + "(rsync -a --stats /root/gradle-transient-cache/ /root/.gradle || true)", f"./gradlew {' '.join(self.DEFAULT_GRADLE_TASK_OPTIONS)} {task}", - "(rsync -a --stats /root/.gradle/ /root/gradle-cache || true)", + "(rsync -a --stats /root/.gradle/ /root/gradle-transient-cache || true)", ] ) @@ -86,7 +89,6 @@ async def _run(self) -> StepResult: "tools/lib/lib.sh", "tools/gradle/codestyle", "pyproject.toml", - "airbyte-cdk/java/airbyte-cdk/**", ] + self.build_include yum_packages_to_install = [ @@ -98,9 +100,8 @@ async def _run(self) -> StepResult: "rsync", # required for gradle cache synchronization. ] - # Define a gradle container which will be cached and re-used for all tasks. - # We should do our best to cram any generic & expensive layers in here. - gradle_container = ( + # Common base container. + gradle_container_base = ( self.dagger_client.container() # Use a linux+jdk base image with long-term support, such as amazoncorretto. .from_(AMAZONCORRETTO_IMAGE) @@ -127,23 +128,61 @@ async def _run(self) -> StepResult: # Set RUN_IN_AIRBYTE_CI to tell gradle how to configure its build cache. # This is consumed by settings.gradle in the repo root. .with_env_variable("RUN_IN_AIRBYTE_CI", "1") + # Disable the Ryuk container because it needs privileged docker access which it can't have. + .with_env_variable("TESTCONTAINERS_RYUK_DISABLED", "true") + # Set the current working directory. + .with_workdir("/airbyte") + ) + + # Mount the whole git repo to update the persistent gradle cache and build the CDK. + with_whole_git_repo = ( + gradle_container_base + # Mount the whole repo. + .with_mounted_directory("/airbyte", self.context.get_repo_dir(".")) + # Mount the cache volume for the gradle cache which is persisted throughout all pipeline runs. + # We deliberately don't mount any cache volumes before mounting the git repo otherwise these will effectively be always empty. + # This volume is LOCKED instead of SHARED because gradle doesn't cope well with concurrency. + .with_mounted_cache("/root/gradle-persistent-cache", self.persistent_cache_volume, sharing=CacheSharingMode.LOCKED) + # Update the persistent gradle cache by resolving all dependencies. + # The idea here is to have this persistent cache contain little more than jars and poms. + # Also, build the java CDK and publish it to the local maven repository. + .with_exec( + sh_dash_c( + [ + # Ensure that the local maven repository root directory exists. + "mkdir -p /root/.m2", + # Load from the persistent cache. + "(rsync -a --stats /root/gradle-persistent-cache/ /root/.gradle || true)", + # Resolve all dependencies and write their checksums to './gradle/verification-metadata.dryrun.xml'. + f"./gradlew {' '.join(self.DEFAULT_GRADLE_TASK_OPTIONS)} --write-verification-metadata sha256 help --dry-run", + # Store to the persistent cache. + "(rsync -a --stats /root/.gradle/ /root/gradle-persistent-cache || true)", + # Build the CDK and publish it to the local maven repository. + # Do this last to not pollute the persistent cache. + f"./gradlew {' '.join(self.DEFAULT_GRADLE_TASK_OPTIONS)} :airbyte-cdk:java:airbyte-cdk:publishSnapshotIfNeeded", + ] + ) + ) + ) + + # Mount only the code needed to build the connector. + # This reduces the scope of the inputs to help dagger reuse container layers. + # The contents of '/root/.gradle' and '/root/.m2' are by design not overly sensitive to changes in the rest of the git repo. + gradle_container = ( + gradle_container_base # TODO: remove this once we finish the project to boost source-postgres CI performance. .with_env_variable("CACHEBUSTER", hacks.get_cachebuster(self.context, self.logger)) - # Mount the gradle cache volume. - # We deliberately don't mount it at $GRADLE_HOME, instead we load it there and store it from there using rsync. - # This is because the volume is accessed concurrently by all GradleTask instances. - # Hence, why we synchronize the writes by setting the `sharing` parameter to LOCKED. - .with_mounted_cache("/root/gradle-cache", self.connector_java_build_cache, sharing=CacheSharingMode.LOCKED) - # Mount the parts of the repo which interest us in /airbyte. - .with_workdir("/airbyte") + # Mount the whole repo. .with_mounted_directory("/airbyte", self.context.get_repo_dir(".", include=include)) .with_mounted_directory(str(self.context.connector.code_directory), await self.context.get_connector_dir()) - # Disable the Ryuk container because it needs privileged docker access that does not work: - .with_env_variable("TESTCONTAINERS_RYUK_DISABLED", "true") - # Run gradle once to populate the container's local maven repository. - # This step is useful also to serve as a basic sanity check and to warm the gradle cache. - # This will download gradle itself, a bunch of poms and jars, compile the gradle plugins, configure tasks, etc. - .with_exec(self._get_gradle_command(":airbyte-cdk:java:airbyte-cdk:publishSnapshotIfNeeded")) + # Mount the cache volume for the transient gradle cache used for this connector only. + # This volume is PRIVATE meaning it exists only for the duration of the dagger pipeline. + # We deliberately don't mount at $GRADLE_HOME, instead we rsync, again because gradle doesn't cope well with concurrency. + .with_mounted_cache("/root/gradle-transient-cache", self.connector_transient_cache_volume, sharing=CacheSharingMode.PRIVATE) + # Warm the gradle cache. + .with_mounted_directory("/root/.gradle", await with_whole_git_repo.directory("/root/.gradle")) + # Populate the local maven repository. + .with_mounted_directory("/root/.m2", await with_whole_git_repo.directory("/root/.m2")) ) # From this point on, we add layers which are task-dependent. diff --git a/airbyte-ci/connectors/pipelines/pipelines/hacks.py b/airbyte-ci/connectors/pipelines/pipelines/hacks.py index 61af0ed05045..4073df11e569 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/hacks.py +++ b/airbyte-ci/connectors/pipelines/pipelines/hacks.py @@ -108,20 +108,3 @@ def get_cachebuster(context: ConnectorContext, logger: Logger) -> str: ) return str(context.pipeline_start_timestamp) return "0" - - -def get_gradle_cache_volume_name(context: ConnectorContext, logger: Logger) -> str: - """ - This function will return a semi-static gradle cache volume name for connectors in CONNECTORS_WITHOUT_CACHING and a static value for all other connectors. - By semi-static I mean that the gradle cache volume name will change on each pipeline execution but will be the same for all the steps of the pipeline. - This hack is useful to collect unbiased metrics on the CI speed for connectors in CONNECTORS_WITHOUT_CACHING: it guarantees that the gradle cache volume will be empty on each pipeline execution and no remote caching is used. - - Returns: - str: The gradle cache volume name. - """ - if context.connector.technical_name in CONNECTORS_WITHOUT_CACHING: - logger.warning( - f"Getting a fresh gradle cache volume name for {context.connector.technical_name} to not use remote caching. Only used in the context of the CI performance improvements project for {context.connector.technical_name}." - ) - return f"gradle-cache-{context.pipeline_start_timestamp}" - return "gradle-cache" From 94a67f7f6568b66c1daecd1c5a0e42b759c85226 Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Tue, 17 Oct 2023 22:58:46 -0400 Subject: [PATCH 02/22] bump version and update changelog --- airbyte-ci/connectors/pipelines/README.md | 1 + airbyte-ci/connectors/pipelines/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/airbyte-ci/connectors/pipelines/README.md b/airbyte-ci/connectors/pipelines/README.md index a13dd5c7b8c8..ca9e5c0bd197 100644 --- a/airbyte-ci/connectors/pipelines/README.md +++ b/airbyte-ci/connectors/pipelines/README.md @@ -398,6 +398,7 @@ This command runs the Python tests for a airbyte-ci poetry package. ## Changelog | Version | PR | Description | | ------- | ---------------------------------------------------------- | --------------------------------------------------------------------------------------------------------- | +| 2.1.0 | [#31535](https://github.com/airbytehq/airbyte/pull/31535) | Improve gradle caching when building java connectors. | | 2.0.0 | [#31424](https://github.com/airbytehq/airbyte/pull/31424) | Remove `airbyte-ci connectors format` command. | | 1.9.4 | [#31478](https://github.com/airbytehq/airbyte/pull/31478) | Fix running tests for connector-ops package. | | 1.9.3 | [#31457](https://github.com/airbytehq/airbyte/pull/31457) | Improve the connector documentation for connectors migrated to our base image. | diff --git a/airbyte-ci/connectors/pipelines/pyproject.toml b/airbyte-ci/connectors/pipelines/pyproject.toml index 215724eb9871..983323b09050 100644 --- a/airbyte-ci/connectors/pipelines/pyproject.toml +++ b/airbyte-ci/connectors/pipelines/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "pipelines" -version = "2.0.0" +version = "2.1.0" description = "Packaged maintained by the connector operations team to perform CI for connectors' pipelines" authors = ["Airbyte "] From ad894e879f2c236a53c3ad3b6bde2a9e96bb8799 Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Tue, 17 Oct 2023 23:04:40 -0400 Subject: [PATCH 03/22] format --- airbyte-ci/connectors/pipelines/pipelines/gradle.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/airbyte-ci/connectors/pipelines/pipelines/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/gradle.py index 8b4e423234f6..d790653ea83e 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/gradle.py @@ -122,8 +122,9 @@ async def _run(self) -> StepResult: ] ) ) - # Set GRADLE_HOME and GRADLE_USER_HOME to the directory which will be rsync-ed with the gradle cache volume. + # Set GRADLE_HOME to the directory which will be rsync-ed with the gradle cache volume. .with_env_variable("GRADLE_HOME", "/root/.gradle") + # Same for GRADLE_USER_HOME. .with_env_variable("GRADLE_USER_HOME", "/root/.gradle") # Set RUN_IN_AIRBYTE_CI to tell gradle how to configure its build cache. # This is consumed by settings.gradle in the repo root. @@ -172,8 +173,9 @@ async def _run(self) -> StepResult: gradle_container_base # TODO: remove this once we finish the project to boost source-postgres CI performance. .with_env_variable("CACHEBUSTER", hacks.get_cachebuster(self.context, self.logger)) - # Mount the whole repo. + # Mount the connector-agnostic whitelisted files in the git repo. .with_mounted_directory("/airbyte", self.context.get_repo_dir(".", include=include)) + # Mount the sources for the connector and its dependencies. .with_mounted_directory(str(self.context.connector.code_directory), await self.context.get_connector_dir()) # Mount the cache volume for the transient gradle cache used for this connector only. # This volume is PRIVATE meaning it exists only for the duration of the dagger pipeline. From f375b3fee410ece572b45986bdd02dc49866c544 Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Wed, 18 Oct 2023 09:45:41 -0400 Subject: [PATCH 04/22] apply feedback from first round of review --- .../connectors/pipelines/pipelines/gradle.py | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/airbyte-ci/connectors/pipelines/pipelines/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/gradle.py index d790653ea83e..cb7bdecd5311 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/gradle.py @@ -39,12 +39,13 @@ def __init__(self, context: PipelineContext) -> None: @property def persistent_cache_volume(self) -> CacheVolume: """This cache volume is for sharing gradle state across all pipeline runs.""" - return self.context.dagger_client.cache_volume("gradle-persistent-cache") + return self.context.dagger_client.cache_volume("gradle-dependency-cache") @property def connector_transient_cache_volume(self) -> CacheVolume: """This cache volume is for sharing gradle state across tasks within a single connector pipeline run.""" - return self.context.dagger_client.cache_volume(f"gradle-{self.context.connector.technical_name}-transient-cache") + volume_name = f"gradle-{self.context.connector.technical_name}-transient-cache-{self.context.git_revision}" + return self.context.dagger_client.cache_volume(volume_name) @property def build_include(self) -> List[str]: @@ -71,6 +72,7 @@ def _get_gradle_command(self, task: str) -> List[str]: ) async def _run(self) -> StepResult: + connector_code_directory = str(self.context.connector.code_directory) include = [ ".root", ".env", @@ -139,10 +141,11 @@ async def _run(self) -> StepResult: with_whole_git_repo = ( gradle_container_base # Mount the whole repo. - .with_mounted_directory("/airbyte", self.context.get_repo_dir(".")) + .with_mounted_directory("/airbyte", self.context.get_repo_dir(".").with_timestamps(1)) # Mount the cache volume for the gradle cache which is persisted throughout all pipeline runs. # We deliberately don't mount any cache volumes before mounting the git repo otherwise these will effectively be always empty. - # This volume is LOCKED instead of SHARED because gradle doesn't cope well with concurrency. + # This volume is LOCKED instead of SHARED and we rsync to it instead of mounting it directly to $GRADLE_HOME. + # This is because gradle doesn't cope well with concurrency. .with_mounted_cache("/root/gradle-persistent-cache", self.persistent_cache_volume, sharing=CacheSharingMode.LOCKED) # Update the persistent gradle cache by resolving all dependencies. # The idea here is to have this persistent cache contain little more than jars and poms. @@ -174,17 +177,18 @@ async def _run(self) -> StepResult: # TODO: remove this once we finish the project to boost source-postgres CI performance. .with_env_variable("CACHEBUSTER", hacks.get_cachebuster(self.context, self.logger)) # Mount the connector-agnostic whitelisted files in the git repo. - .with_mounted_directory("/airbyte", self.context.get_repo_dir(".", include=include)) + .with_mounted_directory("/airbyte", await with_whole_git_repo.directory("/airbyte", include=include)) # Mount the sources for the connector and its dependencies. - .with_mounted_directory(str(self.context.connector.code_directory), await self.context.get_connector_dir()) - # Mount the cache volume for the transient gradle cache used for this connector only. - # This volume is PRIVATE meaning it exists only for the duration of the dagger pipeline. - # We deliberately don't mount at $GRADLE_HOME, instead we rsync, again because gradle doesn't cope well with concurrency. - .with_mounted_cache("/root/gradle-transient-cache", self.connector_transient_cache_volume, sharing=CacheSharingMode.PRIVATE) + .with_mounted_directory(connector_code_directory, await with_whole_git_repo.directory(connector_code_directory)) # Warm the gradle cache. .with_mounted_directory("/root/.gradle", await with_whole_git_repo.directory("/root/.gradle")) # Populate the local maven repository. .with_mounted_directory("/root/.m2", await with_whole_git_repo.directory("/root/.m2")) + # Mount the cache volume for the transient gradle cache used for this connector only. + # We deliberately don't mount any cache volumes before mounting the git repo otherwise these will effectively be always empty. + # This volume is LOCKED instead of SHARED and we rsync to it instead of mounting it directly to $GRADLE_HOME. + # This is because gradle doesn't cope well with concurrency. + .with_mounted_cache("/root/gradle-transient-cache", self.connector_transient_cache_volume, sharing=CacheSharingMode.LOCKED) ) # From this point on, we add layers which are task-dependent. From 539c25edf65738253e7318a6240f13fbee01a35a Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Wed, 18 Oct 2023 13:51:50 -0400 Subject: [PATCH 05/22] fix caching problems --- .../connectors/pipelines/pipelines/gradle.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/airbyte-ci/connectors/pipelines/pipelines/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/gradle.py index cb7bdecd5311..93743bb5b31b 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/gradle.py @@ -72,7 +72,6 @@ def _get_gradle_command(self, task: str) -> List[str]: ) async def _run(self) -> StepResult: - connector_code_directory = str(self.context.connector.code_directory) include = [ ".root", ".env", @@ -167,6 +166,16 @@ async def _run(self) -> StepResult: ] ) ) + # Also mount the transient cache volume. + .with_mounted_cache("/root/gradle-transient-cache", self.connector_transient_cache_volume, sharing=CacheSharingMode.LOCKED) + .with_exec( + sh_dash_c( + [ + # Store to the transient cache. + "(rsync -a --stats /root/.gradle/ /root/gradle-transient-cache || true)", + ] + ) + ) ) # Mount only the code needed to build the connector. @@ -177,13 +186,11 @@ async def _run(self) -> StepResult: # TODO: remove this once we finish the project to boost source-postgres CI performance. .with_env_variable("CACHEBUSTER", hacks.get_cachebuster(self.context, self.logger)) # Mount the connector-agnostic whitelisted files in the git repo. - .with_mounted_directory("/airbyte", await with_whole_git_repo.directory("/airbyte", include=include)) - # Mount the sources for the connector and its dependencies. - .with_mounted_directory(connector_code_directory, await with_whole_git_repo.directory(connector_code_directory)) - # Warm the gradle cache. - .with_mounted_directory("/root/.gradle", await with_whole_git_repo.directory("/root/.gradle")) + .with_mounted_directory("/airbyte", self.context.get_repo_dir(".", include=include)) + # Mount the sources for the connector and its dependencies in the git repo. + .with_mounted_directory(str(self.context.connector.code_directory), await self.context.get_connector_dir()) # Populate the local maven repository. - .with_mounted_directory("/root/.m2", await with_whole_git_repo.directory("/root/.m2")) + .with_directory("/root/.m2", await with_whole_git_repo.directory("/root/.m2")) # Mount the cache volume for the transient gradle cache used for this connector only. # We deliberately don't mount any cache volumes before mounting the git repo otherwise these will effectively be always empty. # This volume is LOCKED instead of SHARED and we rsync to it instead of mounting it directly to $GRADLE_HOME. From e312f93227ce7daa0559f7ed0fc776e04a2a0b52 Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Wed, 18 Oct 2023 14:25:50 -0400 Subject: [PATCH 06/22] add comment --- airbyte-ci/connectors/pipelines/pipelines/gradle.py | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte-ci/connectors/pipelines/pipelines/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/gradle.py index 93743bb5b31b..2b02378f3432 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/gradle.py @@ -190,6 +190,7 @@ async def _run(self) -> StepResult: # Mount the sources for the connector and its dependencies in the git repo. .with_mounted_directory(str(self.context.connector.code_directory), await self.context.get_connector_dir()) # Populate the local maven repository. + # Awaiting on this other container's directory ensures that the caches have been warmed. .with_directory("/root/.m2", await with_whole_git_repo.directory("/root/.m2")) # Mount the cache volume for the transient gradle cache used for this connector only. # We deliberately don't mount any cache volumes before mounting the git repo otherwise these will effectively be always empty. From 269ba9748761612306434132844b8d13a92c816d Mon Sep 17 00:00:00 2001 From: postamar Date: Wed, 18 Oct 2023 18:46:55 +0000 Subject: [PATCH 07/22] Automated Commit - Formatting Changes --- airbyte-ci/connectors/pipelines/pipelines/gradle.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/airbyte-ci/connectors/pipelines/pipelines/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/gradle.py index 2b02378f3432..99f58f7089eb 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/gradle.py @@ -167,8 +167,9 @@ async def _run(self) -> StepResult: ) ) # Also mount the transient cache volume. - .with_mounted_cache("/root/gradle-transient-cache", self.connector_transient_cache_volume, sharing=CacheSharingMode.LOCKED) - .with_exec( + .with_mounted_cache( + "/root/gradle-transient-cache", self.connector_transient_cache_volume, sharing=CacheSharingMode.LOCKED + ).with_exec( sh_dash_c( [ # Store to the transient cache. From 89a0d516db08a4f5e079b1433ce6bbf99f336c48 Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Thu, 19 Oct 2023 09:58:53 -0400 Subject: [PATCH 08/22] scrap transient cache, introduce s3 build cache --- .../actions/run-dagger-pipeline/action.yml | 8 +++ .github/workflows/connectors_tests.yml | 4 ++ .../pipelines/actions/environments.py | 29 +++++++--- .../pipelines/commands/airbyte_ci.py | 6 ++ .../pipelines/commands/groups/connectors.py | 12 ++++ .../pipelines/pipelines/contexts.py | 22 ++++++++ .../connectors/pipelines/pipelines/gradle.py | 55 +++++-------------- settings.gradle | 2 +- 8 files changed, 86 insertions(+), 52 deletions(-) diff --git a/.github/actions/run-dagger-pipeline/action.yml b/.github/actions/run-dagger-pipeline/action.yml index 8d413a9c658f..afb0d8e69c1a 100644 --- a/.github/actions/run-dagger-pipeline/action.yml +++ b/.github/actions/run-dagger-pipeline/action.yml @@ -63,6 +63,12 @@ inputs: ci_job_key: description: "CI job key" required: false + s3_build_cache_access_key_id: + description: "Gradle S3 Build Cache AWS access key ID" + required: false + s3_build_cache_secret_key: + description: "Gradle S3 Build Cache AWS secret key" + required: false runs: using: "composite" steps: @@ -120,4 +126,6 @@ runs: SPEC_CACHE_GCS_CREDENTIALS: ${{ inputs.spec_cache_gcs_credentials }} DOCKER_HUB_USERNAME: ${{ inputs.docker_hub_username }} DOCKER_HUB_PASSWORD: ${{ inputs.docker_hub_password }} + S3_BUILD_CACHE_ACCESS_KEY_ID: ${{ inputs.s3_build_cache_access_key_id }} + S3_BUILD_CACHE_SECRET_KEY: ${{ inputs.s3_build_cache_secret_key }} CI: "True" diff --git a/.github/workflows/connectors_tests.yml b/.github/workflows/connectors_tests.yml index b5567e9d6ec9..610e4fc94ad1 100644 --- a/.github/workflows/connectors_tests.yml +++ b/.github/workflows/connectors_tests.yml @@ -63,6 +63,8 @@ jobs: git_branch: ${{ steps.extract_branch.outputs.branch }} git_revision: ${{ steps.fetch_last_commit_id_pr.outputs.commit_id }} github_token: ${{ env.PAT }} + s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} + s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} subcommand: "connectors ${{ github.event.inputs.test-connectors-options }} test" - name: Test connectors [PULL REQUESTS] if: github.event_name == 'pull_request' @@ -76,4 +78,6 @@ jobs: git_branch: ${{ github.head_ref }} git_revision: ${{ steps.fetch_last_commit_id_pr.outputs.commit_id }} github_token: ${{ env.PAT }} + s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} + s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} subcommand: "connectors --modified test" diff --git a/airbyte-ci/connectors/pipelines/pipelines/actions/environments.py b/airbyte-ci/connectors/pipelines/pipelines/actions/environments.py index 3a21ea9b305e..9cf544892ba7 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/actions/environments.py +++ b/airbyte-ci/connectors/pipelines/pipelines/actions/environments.py @@ -968,7 +968,7 @@ def with_crane( return base_container -async def mounted_connector_secrets(context: PipelineContext, secret_directory_path: str) -> Callable[[Container], Container]: +async def mounted_connector_secrets(context: PipelineContext, secret_directory_path: Optional[str] = None) -> Callable[[Container], Container]: # By default, mount the secrets properly as dagger secret files. # # This will cause the contents of these files to be scrubbed from the logs. This scrubbing comes at the cost of @@ -995,21 +995,32 @@ async def mounted_connector_secrets(context: PipelineContext, secret_directory_p # Special case for local development. # Query dagger for the contents of the secrets and mount these strings as files in the container. contents = {} - for secret_file_name, secret in context.connector_secrets.items(): - contents[secret_file_name] = await secret.plaintext() + if secret_directory_path: + for secret_file_name, secret in context.connector_secrets.items(): + contents[secret_file_name] = await secret.plaintext() def with_secrets_mounted_as_regular_files(container: Container) -> Container: - container = container.with_exec(["mkdir", "-p", secret_directory_path], skip_entrypoint=True) - for secret_file_name, secret_content_str in contents.items(): - container = container.with_new_file(f"{secret_directory_path}/{secret_file_name}", secret_content_str, permissions=0o600) + if context.s3_build_cache_access_key_id: + container = container.with_env_variable("S3_BUILD_CACHE_ACCESS_KEY_ID", context.s3_build_cache_access_key_id) + if context.s3_build_cache_secret_key: + container = container.with_env_variable("S3_BUILD_CACHE_SECRET_KEY", context.s3_build_cache_secret_key) + if secret_directory_path: + container = container.with_exec(["mkdir", "-p", secret_directory_path], skip_entrypoint=True) + for secret_file_name, secret_content_str in contents.items(): + container = container.with_new_file(f"{secret_directory_path}/{secret_file_name}", secret_content_str, permissions=0o600) return container return with_secrets_mounted_as_regular_files def with_secrets_mounted_as_dagger_secrets(container: Container) -> Container: - container = container.with_exec(["mkdir", "-p", secret_directory_path], skip_entrypoint=True) - for secret_file_name, secret in context.connector_secrets.items(): - container = container.with_mounted_secret(f"{secret_directory_path}/{secret_file_name}", secret) + if context.s3_build_cache_access_key_id_secret: + container = container.with_secret_variable("S3_BUILD_CACHE_ACCESS_KEY_ID", context.s3_build_cache_access_key_id_secret) + if context.s3_build_cache_secret_key_secret: + container = container.with_secret_variable("S3_BUILD_CACHE_SECRET_KEY", context.s3_build_cache_secret_key_secret) + if secret_directory_path: + container = container.with_exec(["mkdir", "-p", secret_directory_path], skip_entrypoint=True) + for secret_file_name, secret in context.connector_secrets.items(): + container = container.with_mounted_secret(f"{secret_directory_path}/{secret_file_name}", secret) return container return with_secrets_mounted_as_dagger_secrets diff --git a/airbyte-ci/connectors/pipelines/pipelines/commands/airbyte_ci.py b/airbyte-ci/connectors/pipelines/pipelines/commands/airbyte_ci.py index d7ec796b8f2e..5abb916c22ba 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/commands/airbyte_ci.py +++ b/airbyte-ci/connectors/pipelines/pipelines/commands/airbyte_ci.py @@ -116,6 +116,8 @@ def get_modified_files( envvar="GCP_GSM_CREDENTIALS", ) @click.option("--ci-job-key", envvar="CI_JOB_KEY", type=str) +@click.option("--s3-build-cache-access-key-id", envvar="S3_BUILD_CACHE_ACCESS_KEY_ID", type=str) +@click.option("--s3-build-cache-secret-key", envvar="S3_BUILD_CACHE_SECRET_KEY", type=str) @click.option("--show-dagger-logs/--hide-dagger-logs", default=False, type=bool) @click.pass_context @track_command @@ -134,6 +136,8 @@ def airbyte_ci( ci_report_bucket_name: str, ci_gcs_credentials: str, ci_job_key: str, + s3_build_cache_access_key_id: str, + s3_build_cache_secret_key: str, show_dagger_logs: bool, ): # noqa D103 ctx.ensure_object(dict) @@ -152,6 +156,8 @@ def airbyte_ci( ctx.obj["ci_git_user"] = ci_git_user ctx.obj["ci_github_access_token"] = ci_github_access_token ctx.obj["ci_job_key"] = ci_job_key + ctx.obj["s3_build_cache_access_key_id"] = s3_build_cache_access_key_id + ctx.obj["s3_build_cache_secret_key"] = s3_build_cache_secret_key ctx.obj["pipeline_start_timestamp"] = pipeline_start_timestamp ctx.obj["show_dagger_logs"] = show_dagger_logs diff --git a/airbyte-ci/connectors/pipelines/pipelines/commands/groups/connectors.py b/airbyte-ci/connectors/pipelines/pipelines/commands/groups/connectors.py index f1521ba00af7..35ad338def7f 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/commands/groups/connectors.py +++ b/airbyte-ci/connectors/pipelines/pipelines/commands/groups/connectors.py @@ -281,6 +281,8 @@ def test( fast_tests_only=fast_tests_only, code_tests_only=code_tests_only, use_local_cdk=ctx.obj.get("use_local_cdk"), + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] @@ -339,6 +341,8 @@ def build(ctx: click.Context, use_host_gradle_dist_tar: bool) -> bool: use_local_cdk=ctx.obj.get("use_local_cdk"), open_report_in_browser=ctx.obj.get("open_report_in_browser"), use_host_gradle_dist_tar=use_host_gradle_dist_tar, + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] @@ -461,6 +465,8 @@ def publish( ci_context=ctx.obj.get("ci_context"), ci_gcs_credentials=ctx.obj["ci_gcs_credentials"], pull_request=ctx.obj.get("pull_request"), + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] @@ -554,6 +560,8 @@ def upgrade_base_image(ctx: click.Context, set_if_not_exists: bool, docker_hub_u open_report_in_browser=False, docker_hub_username=docker_hub_username, docker_hub_password=docker_hub_password, + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] @@ -603,6 +611,8 @@ def bump_version( ci_git_user=ctx.obj["ci_git_user"], ci_github_access_token=ctx.obj["ci_github_access_token"], open_report_in_browser=False, + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] @@ -671,6 +681,8 @@ def migrate_to_base_image( open_report_in_browser=False, docker_hub_username=docker_hub_username, docker_hub_password=docker_hub_password, + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] diff --git a/airbyte-ci/connectors/pipelines/pipelines/contexts.py b/airbyte-ci/connectors/pipelines/pipelines/contexts.py index 08f0366cb2ab..75a912452b50 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/contexts.py +++ b/airbyte-ci/connectors/pipelines/pipelines/contexts.py @@ -340,6 +340,8 @@ def __init__( open_report_in_browser: bool = True, docker_hub_username: Optional[str] = None, docker_hub_password: Optional[str] = None, + s3_build_cache_access_key_id: Optional[str] = None, + s3_build_cache_secret_key: Optional[str] = None, ): """Initialize a connector context. @@ -365,6 +367,8 @@ def __init__( open_report_in_browser (bool, optional): Open HTML report in browser window. Defaults to True. docker_hub_username (Optional[str], optional): Docker Hub username to use to read registries. Defaults to None. docker_hub_password (Optional[str], optional): Docker Hub password to use to read registries. Defaults to None. + s3_build_cache_access_key_id (Optional[str], optional): Gradle S3 Build Cache credentials. Defaults to None. + s3_build_cache_secret_key (Optional[str], optional): Gradle S3 Build Cache credentials. Defaults to None. """ self.pipeline_name = pipeline_name @@ -384,6 +388,8 @@ def __init__( self.open_report_in_browser = open_report_in_browser self.docker_hub_username = docker_hub_username self.docker_hub_password = docker_hub_password + self.s3_build_cache_access_key_id = s3_build_cache_access_key_id + self.s3_build_cache_secret_key = s3_build_cache_secret_key super().__init__( pipeline_name=pipeline_name, @@ -432,6 +438,18 @@ def connector_acceptance_test_source_dir(self) -> Directory: # noqa D102 def should_save_updated_secrets(self) -> bool: # noqa D102 return self.use_remote_secrets and self.updated_secrets_dir is not None + @property + def s3_build_cache_access_key_id_secret(self) -> Optional[Secret]: + if self.s3_build_cache_access_key_id: + return self.dagger_client.set_secret("s3_build_cache_access_key_id", self.s3_build_cache_access_key_id) + return None + + @property + def s3_build_cache_secret_key_secret(self) -> Optional[Secret]: + if self.s3_build_cache_secret_key: + return self.dagger_client.set_secret("s3_build_cache_secret_key", self.s3_build_cache_secret_key) + return None + @property def host_image_export_dir_path(self) -> str: return "." if self.is_ci else "/tmp" @@ -541,6 +559,8 @@ def __init__( ci_context: Optional[str] = None, ci_gcs_credentials: str = None, pull_request: PullRequest = None, + s3_build_cache_access_key_id: Optional[str] = None, + s3_build_cache_secret_key: Optional[str] = None, ): self.pre_release = pre_release self.spec_cache_bucket_name = spec_cache_bucket_name @@ -568,6 +588,8 @@ def __init__( should_save_report=True, docker_hub_username=docker_hub_username, docker_hub_password=docker_hub_password, + s3_build_cache_access_key_id=s3_build_cache_access_key_id, + s3_build_cache_secret_key=s3_build_cache_secret_key, ) @property diff --git a/airbyte-ci/connectors/pipelines/pipelines/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/gradle.py index 99f58f7089eb..cda23b7079cf 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/gradle.py @@ -41,12 +41,6 @@ def persistent_cache_volume(self) -> CacheVolume: """This cache volume is for sharing gradle state across all pipeline runs.""" return self.context.dagger_client.cache_volume("gradle-dependency-cache") - @property - def connector_transient_cache_volume(self) -> CacheVolume: - """This cache volume is for sharing gradle state across tasks within a single connector pipeline run.""" - volume_name = f"gradle-{self.context.connector.technical_name}-transient-cache-{self.context.git_revision}" - return self.context.dagger_client.cache_volume(volume_name) - @property def build_include(self) -> List[str]: """Retrieve the list of source code directory required to run a Java connector Gradle task. @@ -61,15 +55,8 @@ def build_include(self) -> List[str]: for dependency_directory in self.context.connector.get_local_dependency_paths(with_test_dependencies=True) ] - def _get_gradle_command(self, task: str) -> List[str]: - return sh_dash_c( - [ - # The gradle command is chained in between a couple of rsyncs which load from- and store to the transient cache volume. - "(rsync -a --stats /root/gradle-transient-cache/ /root/.gradle || true)", - f"./gradlew {' '.join(self.DEFAULT_GRADLE_TASK_OPTIONS)} {task}", - "(rsync -a --stats /root/.gradle/ /root/gradle-transient-cache || true)", - ] - ) + def _get_gradle_command(self, task: str, *args) -> str: + return f"./gradlew {' '.join(self.DEFAULT_GRADLE_TASK_OPTIONS + args)} {task}" async def _run(self) -> StepResult: include = [ @@ -147,7 +134,6 @@ async def _run(self) -> StepResult: # This is because gradle doesn't cope well with concurrency. .with_mounted_cache("/root/gradle-persistent-cache", self.persistent_cache_volume, sharing=CacheSharingMode.LOCKED) # Update the persistent gradle cache by resolving all dependencies. - # The idea here is to have this persistent cache contain little more than jars and poms. # Also, build the java CDK and publish it to the local maven repository. .with_exec( sh_dash_c( @@ -155,25 +141,13 @@ async def _run(self) -> StepResult: # Ensure that the local maven repository root directory exists. "mkdir -p /root/.m2", # Load from the persistent cache. - "(rsync -a --stats /root/gradle-persistent-cache/ /root/.gradle || true)", + "(rsync -a --stats --mkpath /root/gradle-persistent-cache/ /root/.gradle || true)", # Resolve all dependencies and write their checksums to './gradle/verification-metadata.dryrun.xml'. - f"./gradlew {' '.join(self.DEFAULT_GRADLE_TASK_OPTIONS)} --write-verification-metadata sha256 help --dry-run", + self._get_gradle_command("help", "--write-verification-metadata", "sha256", "--dry-run"), + # Build the CDK and publish it to the local maven repository. + self._get_gradle_command(":airbyte-cdk:java:airbyte-cdk:publishSnapshotIfNeeded"), # Store to the persistent cache. "(rsync -a --stats /root/.gradle/ /root/gradle-persistent-cache || true)", - # Build the CDK and publish it to the local maven repository. - # Do this last to not pollute the persistent cache. - f"./gradlew {' '.join(self.DEFAULT_GRADLE_TASK_OPTIONS)} :airbyte-cdk:java:airbyte-cdk:publishSnapshotIfNeeded", - ] - ) - ) - # Also mount the transient cache volume. - .with_mounted_cache( - "/root/gradle-transient-cache", self.connector_transient_cache_volume, sharing=CacheSharingMode.LOCKED - ).with_exec( - sh_dash_c( - [ - # Store to the transient cache. - "(rsync -a --stats /root/.gradle/ /root/gradle-transient-cache || true)", ] ) ) @@ -193,18 +167,15 @@ async def _run(self) -> StepResult: # Populate the local maven repository. # Awaiting on this other container's directory ensures that the caches have been warmed. .with_directory("/root/.m2", await with_whole_git_repo.directory("/root/.m2")) - # Mount the cache volume for the transient gradle cache used for this connector only. - # We deliberately don't mount any cache volumes before mounting the git repo otherwise these will effectively be always empty. - # This volume is LOCKED instead of SHARED and we rsync to it instead of mounting it directly to $GRADLE_HOME. - # This is because gradle doesn't cope well with concurrency. - .with_mounted_cache("/root/gradle-transient-cache", self.connector_transient_cache_volume, sharing=CacheSharingMode.LOCKED) + # Mount the cache volume for the persistent gradle dependency cache. + .with_mounted_cache("/root/gradle-persistent-cache", self.persistent_cache_volume, sharing=CacheSharingMode.LOCKED) + # Warm the gradle cache. + .with_exec(sh_dash_c(["(rsync -a --stats --mkpath /root/gradle-persistent-cache/ /root/.gradle || true)"])) ) # From this point on, we add layers which are task-dependent. - if self.mount_connector_secrets: - gradle_container = gradle_container.with_( - await environments.mounted_connector_secrets(self.context, f"{self.context.connector.code_directory}/secrets") - ) + secrets_dir = f"{self.context.connector.code_directory}/secrets" if self.mount_connector_secrets else None + gradle_container = gradle_container.with_(await environments.mounted_connector_secrets(self.context, secrets_dir)) if self.bind_to_docker_host: # If this GradleTask subclass needs docker, then install it and bind it to the existing global docker host container. gradle_container = environments.with_bound_docker_host(self.context, gradle_container) @@ -213,5 +184,5 @@ async def _run(self) -> StepResult: # Run the gradle task that we actually care about. connector_task = f":airbyte-integrations:connectors:{self.context.connector.technical_name}:{self.gradle_task_name}" - gradle_container = gradle_container.with_exec(self._get_gradle_command(connector_task)) + gradle_container = gradle_container.with_exec(sh_dash_c([self._get_gradle_command(connector_task)])) return await self.get_step_result(gradle_container) diff --git a/settings.gradle b/settings.gradle index 72fcdddfdd77..5634629cef0e 100644 --- a/settings.gradle +++ b/settings.gradle @@ -54,7 +54,7 @@ if (isCiServer || isAirbyteCI) { bucket = 'airbyte-buildcache' prefix = 'cache/' push = isCiServer - enabled = isCiServer && !isAirbyteCI + enabled = isCiServer || isAirbyteCI // Credentials will be taken from S3_BUILD_CACHE_... environment variables // anonymous access will be used if environment variables are missing } From b1ac061b478ef65c946e1126490b702c9df55787 Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Thu, 19 Oct 2023 13:40:14 -0400 Subject: [PATCH 09/22] format --- .../connectors/pipelines/pipelines/dagger/actions/secrets.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py b/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py index 30bc4f66e6a9..deaa8bd495dd 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py +++ b/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py @@ -148,7 +148,9 @@ def with_secrets_mounted_as_regular_files(container: Container) -> Container: if secret_directory_path: container = container.with_exec(["mkdir", "-p", secret_directory_path], skip_entrypoint=True) for secret_file_name, secret_content_str in contents.items(): - container = container.with_new_file(f"{secret_directory_path}/{secret_file_name}", secret_content_str, permissions=0o600) + container = container.with_new_file( + f"{secret_directory_path}/{secret_file_name}", secret_content_str, permissions=0o600 + ) return container return with_secrets_mounted_as_regular_files From 722b652dfa77112083548fc7558bdb826e8ebea3 Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Thu, 19 Oct 2023 13:51:04 -0400 Subject: [PATCH 10/22] fix typo --- .../pipelines/airbyte_ci/connectors/context.py | 14 +++++++++++++- .../pipelines/pipelines/dagger/actions/secrets.py | 4 ++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/context.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/context.py index 7fa705d54826..188a0b581ef7 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/context.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/context.py @@ -11,7 +11,7 @@ import yaml from anyio import Path from asyncer import asyncify -from dagger import Directory +from dagger import Directory, Secret from github import PullRequest from pipelines.airbyte_ci.connectors.reports import ConnectorReport from pipelines.dagger.actions import secrets @@ -127,6 +127,18 @@ def __init__( open_report_in_browser=open_report_in_browser, ) + @property + def s3_build_cache_access_key_id_secret(self) -> Optional[Secret]: + if self.s3_build_cache_access_key_id: + return self.dagger_client.set_secret("s3_build_cache_access_key_id", self.s3_build_cache_access_key_id) + return None + + @property + def s3_build_cache_secret_key_secret(self) -> Optional[Secret]: + if self.s3_build_cache_access_key_id and self.s3_build_cache_secret_key: + return self.dagger_client.set_secret("s3_build_cache_secret_key", self.s3_build_cache_secret_key) + return None + @property def modified_files(self): return self.connector.modified_files diff --git a/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py b/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py index deaa8bd495dd..acfd446127b6 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py +++ b/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py @@ -156,9 +156,9 @@ def with_secrets_mounted_as_regular_files(container: Container) -> Container: return with_secrets_mounted_as_regular_files def with_secrets_mounted_as_dagger_secrets(container: Container) -> Container: - if context.s3_build_cache_access_key_id_secret: + if context.s3_build_cache_access_key_id: container = container.with_secret_variable("S3_BUILD_CACHE_ACCESS_KEY_ID", context.s3_build_cache_access_key_id_secret) - if context.s3_build_cache_secret_key_secret: + if context.s3_build_cache_secret_key: container = container.with_secret_variable("S3_BUILD_CACHE_SECRET_KEY", context.s3_build_cache_secret_key_secret) if secret_directory_path: container = container.with_exec(["mkdir", "-p", secret_directory_path], skip_entrypoint=True) From d23c2b3edff407ba3e96c1e96cf9b6accb268def Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Thu, 19 Oct 2023 14:01:13 -0400 Subject: [PATCH 11/22] make S3 caching contingent on credentials being available --- settings.gradle | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/settings.gradle b/settings.gradle index a9df9f12c6d0..0ba8d289ae30 100644 --- a/settings.gradle +++ b/settings.gradle @@ -150,9 +150,7 @@ if (isCiServer || isAirbyteCI) { bucket = 'airbyte-buildcache' prefix = 'cache/' push = isCiServer - enabled = isCiServer || isAirbyteCI - // Credentials will be taken from S3_BUILD_CACHE_... environment variables - // anonymous access will be used if environment variables are missing + enabled = System.getenv().containsKey("S3_BUILD_CACHE_ACCESS_KEY_ID") } } } From 6e48486c5b0669e079bffa63f74dfc2f333a9b07 Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Thu, 19 Oct 2023 14:38:18 -0400 Subject: [PATCH 12/22] rsync really needs to be in the same step as gradle --- .../pipelines/pipelines/airbyte_ci/steps/gradle.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py index 81e67d55a417..644334146117 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py @@ -168,8 +168,6 @@ async def _run(self) -> StepResult: .with_directory("/root/.m2", await with_whole_git_repo.directory("/root/.m2")) # Mount the cache volume for the persistent gradle dependency cache. .with_mounted_cache("/root/gradle-persistent-cache", self.persistent_cache_volume, sharing=CacheSharingMode.LOCKED) - # Warm the gradle cache. - .with_exec(sh_dash_c(["(rsync -a --stats --mkpath /root/gradle-persistent-cache/ /root/.gradle || true)"])) ) # From this point on, we add layers which are task-dependent. @@ -183,5 +181,14 @@ async def _run(self) -> StepResult: # Run the gradle task that we actually care about. connector_task = f":airbyte-integrations:connectors:{self.context.connector.technical_name}:{self.gradle_task_name}" - gradle_container = gradle_container.with_exec(sh_dash_c([self._get_gradle_command(connector_task)])) + gradle_container = gradle_container.with_exec( + sh_dash_c( + [ + # Warm the gradle cache. + "(rsync -a --stats --mkpath /root/gradle-persistent-cache/ /root/.gradle || true)", + # Run the gradle task. + self._get_gradle_command(connector_task), + ] + ) + ) return await self.get_step_result(gradle_container) From cdb4f5da3b7d40f5dafdb910736b6b54b2f6bf4a Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Thu, 19 Oct 2023 15:55:56 -0400 Subject: [PATCH 13/22] set s3 cache in us-west-2 --- settings.gradle | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/settings.gradle b/settings.gradle index 0ba8d289ae30..f1f98542a149 100644 --- a/settings.gradle +++ b/settings.gradle @@ -146,10 +146,10 @@ if (isCiServer || isAirbyteCI) { enabled = isAirbyteCI } remote(com.github.burrunan.s3cache.AwsS3BuildCache) { - region = 'us-east-2' - bucket = 'airbyte-buildcache' - prefix = 'cache/' - push = isCiServer + region = 'us-west-2' + bucket = 'ab-ci-cache' + prefix = 'connectors-ci-cache/' + push = isAirbyteCI enabled = System.getenv().containsKey("S3_BUILD_CACHE_ACCESS_KEY_ID") } } From 4f805e9fdaaa06fb837ae5c39457e73f8bd993be Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Thu, 19 Oct 2023 15:56:10 -0400 Subject: [PATCH 14/22] add --no-watch-fs flag --- .../connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py index 644334146117..82232e8648b0 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py @@ -26,7 +26,7 @@ class GradleTask(Step, ABC): mount_connector_secrets (bool): Whether to mount connector secrets. """ - DEFAULT_GRADLE_TASK_OPTIONS = ("--no-daemon", "--scan", "--build-cache", "--console=plain") + DEFAULT_GRADLE_TASK_OPTIONS = ("--no-daemon", "--no-watch-fs", "--scan", "--build-cache", "--console=plain") gradle_task_name: ClassVar[str] bind_to_docker_host: ClassVar[bool] = False From 1c0294b9194a2a5d7d6b8b95a29852e1109cd6a4 Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Thu, 19 Oct 2023 17:07:22 -0400 Subject: [PATCH 15/22] add comment --- settings.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/settings.gradle b/settings.gradle index f1f98542a149..b15260b96351 100644 --- a/settings.gradle +++ b/settings.gradle @@ -146,7 +146,7 @@ if (isCiServer || isAirbyteCI) { enabled = isAirbyteCI } remote(com.github.burrunan.s3cache.AwsS3BuildCache) { - region = 'us-west-2' + region = 'us-west-2' // close to dagger runners bucket = 'ab-ci-cache' prefix = 'connectors-ci-cache/' push = isAirbyteCI From 0e606b8d786dfcc94fd28d0a2fc76ac513cda0d5 Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Thu, 19 Oct 2023 23:14:16 -0400 Subject: [PATCH 16/22] simplifications --- .../pipelines/airbyte_ci/steps/gradle.py | 32 +++++++------------ 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py index 82232e8648b0..fddac37b18d4 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py @@ -122,52 +122,42 @@ async def _run(self) -> StepResult: .with_workdir("/airbyte") ) - # Mount the whole git repo to update the persistent gradle cache and build the CDK. + # Mount the whole git repo to update the cache volume contents and build the CDK. with_whole_git_repo = ( gradle_container_base # Mount the whole repo. - .with_mounted_directory("/airbyte", self.context.get_repo_dir(".").with_timestamps(1)) - # Mount the cache volume for the gradle cache which is persisted throughout all pipeline runs. - # We deliberately don't mount any cache volumes before mounting the git repo otherwise these will effectively be always empty. - # This volume is LOCKED instead of SHARED and we rsync to it instead of mounting it directly to $GRADLE_HOME. - # This is because gradle doesn't cope well with concurrency. - .with_mounted_cache("/root/gradle-persistent-cache", self.persistent_cache_volume, sharing=CacheSharingMode.LOCKED) - # Update the persistent gradle cache by resolving all dependencies. - # Also, build the java CDK and publish it to the local maven repository. + .with_directory("/airbyte", self.context.get_repo_dir(".")) + # Mount the cache volume to $GRADLE_HOME. + # We can only do this because we never write to the cache volume more than once per run. + .with_mounted_cache("/root/.gradle", self.persistent_cache_volume, sharing=CacheSharingMode.LOCKED) + # Update the cache in place by executing a gradle task which will update all dependencies and build the CDK. .with_exec( sh_dash_c( [ - # Ensure that the local maven repository root directory exists. + # Ensure that the .m2 directory exists. "mkdir -p /root/.m2", - # Load from the persistent cache. - "(rsync -a --stats --mkpath /root/gradle-persistent-cache/ /root/.gradle || true)", # Resolve all dependencies and write their checksums to './gradle/verification-metadata.dryrun.xml'. self._get_gradle_command("help", "--write-verification-metadata", "sha256", "--dry-run"), # Build the CDK and publish it to the local maven repository. self._get_gradle_command(":airbyte-cdk:java:airbyte-cdk:publishSnapshotIfNeeded"), - # Store to the persistent cache. - "(rsync -a --stats /root/.gradle/ /root/gradle-persistent-cache || true)", ] ) ) ) # Mount only the code needed to build the connector. - # This reduces the scope of the inputs to help dagger reuse container layers. - # The contents of '/root/.gradle' and '/root/.m2' are by design not overly sensitive to changes in the rest of the git repo. gradle_container = ( gradle_container_base # TODO: remove this once we finish the project to boost source-postgres CI performance. .with_env_variable("CACHEBUSTER", hacks.get_cachebuster(self.context, self.logger)) + # Copy the local maven repository and force evaluation of `with_whole_git_repo` container. + .with_directory("/root/.m2", await with_whole_git_repo.directory("/root/.m2")) # Mount the connector-agnostic whitelisted files in the git repo. .with_mounted_directory("/airbyte", self.context.get_repo_dir(".", include=include)) # Mount the sources for the connector and its dependencies in the git repo. .with_mounted_directory(str(self.context.connector.code_directory), await self.context.get_connector_dir()) - # Populate the local maven repository. - # Awaiting on this other container's directory ensures that the caches have been warmed. - .with_directory("/root/.m2", await with_whole_git_repo.directory("/root/.m2")) # Mount the cache volume for the persistent gradle dependency cache. - .with_mounted_cache("/root/gradle-persistent-cache", self.persistent_cache_volume, sharing=CacheSharingMode.LOCKED) + .with_mounted_cache("/root/gradle-cache", self.persistent_cache_volume, sharing=CacheSharingMode.PRIVATE) ) # From this point on, we add layers which are task-dependent. @@ -185,7 +175,7 @@ async def _run(self) -> StepResult: sh_dash_c( [ # Warm the gradle cache. - "(rsync -a --stats --mkpath /root/gradle-persistent-cache/ /root/.gradle || true)", + "(rsync -a --stats --mkpath /root/gradle-cache/ /root/.gradle || true)", # Run the gradle task. self._get_gradle_command(connector_task), ] From 42b46219211bb09527982588ef4ac9babc3c9b6e Mon Sep 17 00:00:00 2001 From: postamar Date: Fri, 20 Oct 2023 03:43:45 +0000 Subject: [PATCH 17/22] Automated Commit - Formatting Changes --- .../connectors/source-paypal-transaction/metadata.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-paypal-transaction/metadata.yaml b/airbyte-integrations/connectors/source-paypal-transaction/metadata.yaml index 3f8d1079e504..0e8df098b323 100644 --- a/airbyte-integrations/connectors/source-paypal-transaction/metadata.yaml +++ b/airbyte-integrations/connectors/source-paypal-transaction/metadata.yaml @@ -20,7 +20,7 @@ data: name: Paypal Transaction registries: cloud: - dockerImageTag: 2.0.0 #https://github.com/airbytehq/oncall/issues/3347 + dockerImageTag: 2.0.0 #https://github.com/airbytehq/oncall/issues/3347 enabled: true oss: enabled: true From 4a7b57e5523ba17b28094c44d02e9d8902c0fcc8 Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Thu, 19 Oct 2023 23:44:43 -0400 Subject: [PATCH 18/22] fixes --- .../pipelines/pipelines/airbyte_ci/steps/gradle.py | 14 +++++++++----- build.gradle | 1 - settings.gradle | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py index fddac37b18d4..4fa94cc97380 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py @@ -127,19 +127,23 @@ async def _run(self) -> StepResult: gradle_container_base # Mount the whole repo. .with_directory("/airbyte", self.context.get_repo_dir(".")) - # Mount the cache volume to $GRADLE_HOME. - # We can only do this because we never write to the cache volume more than once per run. - .with_mounted_cache("/root/.gradle", self.persistent_cache_volume, sharing=CacheSharingMode.LOCKED) + # Mount the persistent cache volume, but not to $GRADLE_HOME, because gradle doesn't expect concurrent modifications. + .with_mounted_cache("/root/gradle-cache", self.persistent_cache_volume, sharing=CacheSharingMode.LOCKED) # Update the cache in place by executing a gradle task which will update all dependencies and build the CDK. .with_exec( sh_dash_c( [ # Ensure that the .m2 directory exists. "mkdir -p /root/.m2", + # Load from the cache volume. + "(rsync -a --stats --mkpath /root/gradle-cache/ /root/.gradle || true)", # Resolve all dependencies and write their checksums to './gradle/verification-metadata.dryrun.xml'. self._get_gradle_command("help", "--write-verification-metadata", "sha256", "--dry-run"), # Build the CDK and publish it to the local maven repository. self._get_gradle_command(":airbyte-cdk:java:airbyte-cdk:publishSnapshotIfNeeded"), + # Store to the cache volume. + "(rsync -a --stats /root/.gradle/ /root/gradle-cache || true)", + ] ) ) @@ -157,7 +161,7 @@ async def _run(self) -> StepResult: # Mount the sources for the connector and its dependencies in the git repo. .with_mounted_directory(str(self.context.connector.code_directory), await self.context.get_connector_dir()) # Mount the cache volume for the persistent gradle dependency cache. - .with_mounted_cache("/root/gradle-cache", self.persistent_cache_volume, sharing=CacheSharingMode.PRIVATE) + .with_mounted_cache("/root/gradle-cache", self.persistent_cache_volume) ) # From this point on, we add layers which are task-dependent. @@ -177,7 +181,7 @@ async def _run(self) -> StepResult: # Warm the gradle cache. "(rsync -a --stats --mkpath /root/gradle-cache/ /root/.gradle || true)", # Run the gradle task. - self._get_gradle_command(connector_task), + self._get_gradle_command(connector_task, f"-Ds3BuildCachePrefix={self.context.connector.technical_name}"), ] ) ) diff --git a/build.gradle b/build.gradle index 54a66dd407ff..bee172871803 100644 --- a/build.gradle +++ b/build.gradle @@ -37,7 +37,6 @@ ext { version = System.getenv("VERSION") ?: env.VERSION image_tag = System.getenv("VERSION") ?: 'dev' skipSlowTests = (System.getProperty('skipSlowTests', 'false') != 'false') - } // Pyenv support. try { diff --git a/settings.gradle b/settings.gradle index b15260b96351..c9b9aaf27b13 100644 --- a/settings.gradle +++ b/settings.gradle @@ -148,7 +148,7 @@ if (isCiServer || isAirbyteCI) { remote(com.github.burrunan.s3cache.AwsS3BuildCache) { region = 'us-west-2' // close to dagger runners bucket = 'ab-ci-cache' - prefix = 'connectors-ci-cache/' + prefix = "${System.getProperty('s3BuildCachePrefix', 'connectors')}-ci-cache/" push = isAirbyteCI enabled = System.getenv().containsKey("S3_BUILD_CACHE_ACCESS_KEY_ID") } From 3bf9fcfe8adc4ef858ab7aff0c88d0fea25525c2 Mon Sep 17 00:00:00 2001 From: postamar Date: Fri, 20 Oct 2023 04:18:24 +0000 Subject: [PATCH 19/22] Automated Commit - Formatting Changes --- .../connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py | 1 - 1 file changed, 1 deletion(-) diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py index 4fa94cc97380..56062dea5d4f 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py @@ -143,7 +143,6 @@ async def _run(self) -> StepResult: self._get_gradle_command(":airbyte-cdk:java:airbyte-cdk:publishSnapshotIfNeeded"), # Store to the cache volume. "(rsync -a --stats /root/.gradle/ /root/gradle-cache || true)", - ] ) ) From 1c9c7cade8c9731054b5bb657e92081e4c1f5bbf Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Fri, 20 Oct 2023 14:04:55 -0400 Subject: [PATCH 20/22] mount the gradle cache volume as early as possible in the pipeline --- .../pipelines/pipelines/airbyte_ci/steps/gradle.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py index 56062dea5d4f..0033f13cbced 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py @@ -92,6 +92,12 @@ async def _run(self) -> StepResult: self.dagger_client.container() # Use a linux+jdk base image with long-term support, such as amazoncorretto. .from_(AMAZONCORRETTO_IMAGE) + # Mount the persistent cache volume, but not to $GRADLE_HOME, because gradle doesn't expect concurrent modifications. + .with_mounted_cache("/root/gradle-cache", self.persistent_cache_volume, sharing=CacheSharingMode.LOCKED) + # Set GRADLE_HOME to the directory which will be rsync-ed with the gradle cache volume. + .with_env_variable("GRADLE_HOME", "/root/.gradle") + # Same for GRADLE_USER_HOME. + .with_env_variable("GRADLE_USER_HOME", "/root/.gradle") # Install a bunch of packages as early as possible. .with_exec( sh_dash_c( @@ -109,10 +115,6 @@ async def _run(self) -> StepResult: ] ) ) - # Set GRADLE_HOME to the directory which will be rsync-ed with the gradle cache volume. - .with_env_variable("GRADLE_HOME", "/root/.gradle") - # Same for GRADLE_USER_HOME. - .with_env_variable("GRADLE_USER_HOME", "/root/.gradle") # Set RUN_IN_AIRBYTE_CI to tell gradle how to configure its build cache. # This is consumed by settings.gradle in the repo root. .with_env_variable("RUN_IN_AIRBYTE_CI", "1") @@ -127,8 +129,6 @@ async def _run(self) -> StepResult: gradle_container_base # Mount the whole repo. .with_directory("/airbyte", self.context.get_repo_dir(".")) - # Mount the persistent cache volume, but not to $GRADLE_HOME, because gradle doesn't expect concurrent modifications. - .with_mounted_cache("/root/gradle-cache", self.persistent_cache_volume, sharing=CacheSharingMode.LOCKED) # Update the cache in place by executing a gradle task which will update all dependencies and build the CDK. .with_exec( sh_dash_c( @@ -159,8 +159,6 @@ async def _run(self) -> StepResult: .with_mounted_directory("/airbyte", self.context.get_repo_dir(".", include=include)) # Mount the sources for the connector and its dependencies in the git repo. .with_mounted_directory(str(self.context.connector.code_directory), await self.context.get_connector_dir()) - # Mount the cache volume for the persistent gradle dependency cache. - .with_mounted_cache("/root/gradle-cache", self.persistent_cache_volume) ) # From this point on, we add layers which are task-dependent. From d538c55b491433f565a815a252c4426109374d36 Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Mon, 23 Oct 2023 10:07:28 -0400 Subject: [PATCH 21/22] apply review comments --- .../workflows/connectors_nightly_build.yml | 2 + .github/workflows/publish_connectors.yml | 4 ++ .../pipelines/airbyte_ci/steps/gradle.py | 40 +++++++++++++------ .../pipelines/dagger/actions/secrets.py | 31 +++++--------- 4 files changed, 42 insertions(+), 35 deletions(-) diff --git a/.github/workflows/connectors_nightly_build.yml b/.github/workflows/connectors_nightly_build.yml index e34fd9837dd5..c7f7eb7dddf5 100644 --- a/.github/workflows/connectors_nightly_build.yml +++ b/.github/workflows/connectors_nightly_build.yml @@ -41,4 +41,6 @@ jobs: sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }} git_branch: ${{ steps.extract_branch.outputs.branch }} github_token: ${{ secrets.GITHUB_TOKEN }} + s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} + s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} subcommand: "connectors ${{ inputs.test-connectors-options || '--concurrency=8 --support-level=certified' }} test" diff --git a/.github/workflows/publish_connectors.yml b/.github/workflows/publish_connectors.yml index 44a7426548b9..5fdc8dfcde60 100644 --- a/.github/workflows/publish_connectors.yml +++ b/.github/workflows/publish_connectors.yml @@ -40,6 +40,8 @@ jobs: sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }} slack_webhook_url: ${{ secrets.PUBLISH_ON_MERGE_SLACK_WEBHOOK }} spec_cache_gcs_credentials: ${{ secrets.SPEC_CACHE_SERVICE_ACCOUNT_KEY_PUBLISH }} + s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} + s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} subcommand: "connectors --concurrency=1 --execute-timeout=3600 --metadata-changes-only publish --main-release" - name: Publish connectors [manual] @@ -57,6 +59,8 @@ jobs: sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }} slack_webhook_url: ${{ secrets.PUBLISH_ON_MERGE_SLACK_WEBHOOK }} spec_cache_gcs_credentials: ${{ secrets.SPEC_CACHE_SERVICE_ACCOUNT_KEY_PUBLISH }} + s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} + s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} subcommand: "connectors ${{ github.event.inputs.connectors-options }} publish ${{ github.event.inputs.publish-options }}" set-instatus-incident-on-failure: diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py index 0033f13cbced..e711abf58c15 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py @@ -27,6 +27,9 @@ class GradleTask(Step, ABC): """ DEFAULT_GRADLE_TASK_OPTIONS = ("--no-daemon", "--no-watch-fs", "--scan", "--build-cache", "--console=plain") + LOCAL_MAVEN_REPOSITORY_PATH = "/root/.m2" + GRADLE_DEP_CACHE_PATH = "/root/gradle-cache" + GRADLE_HOME_PATH = "/root/.gradle" gradle_task_name: ClassVar[str] bind_to_docker_host: ClassVar[bool] = False @@ -36,8 +39,8 @@ def __init__(self, context: PipelineContext) -> None: super().__init__(context) @property - def persistent_cache_volume(self) -> CacheVolume: - """This cache volume is for sharing gradle state across all pipeline runs.""" + def dependency_cache_volume(self) -> CacheVolume: + """This cache volume is for sharing gradle dependencies (jars and poms) across all pipeline runs.""" return self.context.dagger_client.cache_volume("gradle-dependency-cache") @property @@ -92,12 +95,12 @@ async def _run(self) -> StepResult: self.dagger_client.container() # Use a linux+jdk base image with long-term support, such as amazoncorretto. .from_(AMAZONCORRETTO_IMAGE) - # Mount the persistent cache volume, but not to $GRADLE_HOME, because gradle doesn't expect concurrent modifications. - .with_mounted_cache("/root/gradle-cache", self.persistent_cache_volume, sharing=CacheSharingMode.LOCKED) + # Mount the dependency cache volume, but not to $GRADLE_HOME, because gradle doesn't expect concurrent modifications. + .with_mounted_cache(self.GRADLE_DEP_CACHE_PATH, self.dependency_cache_volume, sharing=CacheSharingMode.LOCKED) # Set GRADLE_HOME to the directory which will be rsync-ed with the gradle cache volume. - .with_env_variable("GRADLE_HOME", "/root/.gradle") + .with_env_variable("GRADLE_HOME", self.GRADLE_HOME_PATH) # Same for GRADLE_USER_HOME. - .with_env_variable("GRADLE_USER_HOME", "/root/.gradle") + .with_env_variable("GRADLE_USER_HOME", self.GRADLE_HOME_PATH) # Install a bunch of packages as early as possible. .with_exec( sh_dash_c( @@ -124,6 +127,16 @@ async def _run(self) -> StepResult: .with_workdir("/airbyte") ) + # Augment the base container with S3 build cache secrets when available. + if self.context.s3_build_cache_access_key_id: + gradle_container_base = gradle_container_base.with_secret_variable( + "S3_BUILD_CACHE_ACCESS_KEY_ID", self.context.s3_build_cache_access_key_id_secret + ) + if self.context.s3_build_cache_secret_key: + gradle_container_base = gradle_container_base.with_secret_variable( + "S3_BUILD_CACHE_SECRET_KEY", self.context.s3_build_cache_secret_key_secret + ) + # Mount the whole git repo to update the cache volume contents and build the CDK. with_whole_git_repo = ( gradle_container_base @@ -134,15 +147,15 @@ async def _run(self) -> StepResult: sh_dash_c( [ # Ensure that the .m2 directory exists. - "mkdir -p /root/.m2", + f"mkdir -p {self.LOCAL_MAVEN_REPOSITORY_PATH}", # Load from the cache volume. - "(rsync -a --stats --mkpath /root/gradle-cache/ /root/.gradle || true)", + f"(rsync -a --stats --mkpath {self.GRADLE_DEP_CACHE_PATH}/ {self.GRADLE_HOME_PATH} || true)", # Resolve all dependencies and write their checksums to './gradle/verification-metadata.dryrun.xml'. self._get_gradle_command("help", "--write-verification-metadata", "sha256", "--dry-run"), # Build the CDK and publish it to the local maven repository. self._get_gradle_command(":airbyte-cdk:java:airbyte-cdk:publishSnapshotIfNeeded"), # Store to the cache volume. - "(rsync -a --stats /root/.gradle/ /root/gradle-cache || true)", + f"(rsync -a --stats {self.GRADLE_HOME_PATH}/ {self.GRADLE_DEP_CACHE_PATH} || true)", ] ) ) @@ -154,7 +167,7 @@ async def _run(self) -> StepResult: # TODO: remove this once we finish the project to boost source-postgres CI performance. .with_env_variable("CACHEBUSTER", hacks.get_cachebuster(self.context, self.logger)) # Copy the local maven repository and force evaluation of `with_whole_git_repo` container. - .with_directory("/root/.m2", await with_whole_git_repo.directory("/root/.m2")) + .with_directory(self.LOCAL_MAVEN_REPOSITORY_PATH, await with_whole_git_repo.directory(self.LOCAL_MAVEN_REPOSITORY_PATH)) # Mount the connector-agnostic whitelisted files in the git repo. .with_mounted_directory("/airbyte", self.context.get_repo_dir(".", include=include)) # Mount the sources for the connector and its dependencies in the git repo. @@ -162,8 +175,9 @@ async def _run(self) -> StepResult: ) # From this point on, we add layers which are task-dependent. - secrets_dir = f"{self.context.connector.code_directory}/secrets" if self.mount_connector_secrets else None - gradle_container = gradle_container.with_(await secrets.mounted_connector_secrets(self.context, secrets_dir)) + if self.mount_connector_secrets: + secrets_dir = f"{self.context.connector.code_directory}/secrets" + gradle_container = gradle_container.with_(await secrets.mounted_connector_secrets(self.context, secrets_dir)) if self.bind_to_docker_host: # If this GradleTask subclass needs docker, then install it and bind it to the existing global docker host container. gradle_container = pipelines.dagger.actions.system.docker.with_bound_docker_host(self.context, gradle_container) @@ -176,7 +190,7 @@ async def _run(self) -> StepResult: sh_dash_c( [ # Warm the gradle cache. - "(rsync -a --stats --mkpath /root/gradle-cache/ /root/.gradle || true)", + f"(rsync -a --stats --mkpath {self.GRADLE_DEP_CACHE_PATH}/ {self.GRADLE_HOME_PATH} || true)", # Run the gradle task. self._get_gradle_command(connector_task, f"-Ds3BuildCachePrefix={self.context.connector.technical_name}"), ] diff --git a/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py b/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py index acfd446127b6..1d1385403603 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py +++ b/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py @@ -6,7 +6,7 @@ from __future__ import annotations import datetime -from typing import TYPE_CHECKING, Callable +from typing import TYPE_CHECKING, Callable, Optional from dagger import Container, Secret from pipelines.helpers.utils import get_file_contents, get_secret_host_variable @@ -136,34 +136,21 @@ async def mounted_connector_secrets(context: PipelineContext, secret_directory_p # Special case for local development. # Query dagger for the contents of the secrets and mount these strings as files in the container. contents = {} - if secret_directory_path: - for secret_file_name, secret in context.connector_secrets.items(): - contents[secret_file_name] = await secret.plaintext() + for secret_file_name, secret in context.connector_secrets.items(): + contents[secret_file_name] = await secret.plaintext() def with_secrets_mounted_as_regular_files(container: Container) -> Container: - if context.s3_build_cache_access_key_id: - container = container.with_env_variable("S3_BUILD_CACHE_ACCESS_KEY_ID", context.s3_build_cache_access_key_id) - if context.s3_build_cache_secret_key: - container = container.with_env_variable("S3_BUILD_CACHE_SECRET_KEY", context.s3_build_cache_secret_key) - if secret_directory_path: - container = container.with_exec(["mkdir", "-p", secret_directory_path], skip_entrypoint=True) - for secret_file_name, secret_content_str in contents.items(): - container = container.with_new_file( - f"{secret_directory_path}/{secret_file_name}", secret_content_str, permissions=0o600 - ) + container = container.with_exec(["mkdir", "-p", secret_directory_path], skip_entrypoint=True) + for secret_file_name, secret_content_str in contents.items(): + container = container.with_new_file(f"{secret_directory_path}/{secret_file_name}", secret_content_str, permissions=0o600) return container return with_secrets_mounted_as_regular_files def with_secrets_mounted_as_dagger_secrets(container: Container) -> Container: - if context.s3_build_cache_access_key_id: - container = container.with_secret_variable("S3_BUILD_CACHE_ACCESS_KEY_ID", context.s3_build_cache_access_key_id_secret) - if context.s3_build_cache_secret_key: - container = container.with_secret_variable("S3_BUILD_CACHE_SECRET_KEY", context.s3_build_cache_secret_key_secret) - if secret_directory_path: - container = container.with_exec(["mkdir", "-p", secret_directory_path], skip_entrypoint=True) - for secret_file_name, secret in context.connector_secrets.items(): - container = container.with_mounted_secret(f"{secret_directory_path}/{secret_file_name}", secret) + container = container.with_exec(["mkdir", "-p", secret_directory_path], skip_entrypoint=True) + for secret_file_name, secret in context.connector_secrets.items(): + container = container.with_mounted_secret(f"{secret_directory_path}/{secret_file_name}", secret) return container return with_secrets_mounted_as_dagger_secrets From 3fa7a528cfdb5345f089d54f40d95922ab597bf5 Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Mon, 23 Oct 2023 10:52:40 -0400 Subject: [PATCH 22/22] tweaks --- .../pipelines/pipelines/airbyte_ci/steps/gradle.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py index e711abf58c15..b2b383f59ab7 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py @@ -125,6 +125,8 @@ async def _run(self) -> StepResult: .with_env_variable("TESTCONTAINERS_RYUK_DISABLED", "true") # Set the current working directory. .with_workdir("/airbyte") + # TODO: remove this once we finish the project to boost source-postgres CI performance. + .with_env_variable("CACHEBUSTER", hacks.get_cachebuster(self.context, self.logger)) ) # Augment the base container with S3 build cache secrets when available. @@ -137,6 +139,13 @@ async def _run(self) -> StepResult: "S3_BUILD_CACHE_SECRET_KEY", self.context.s3_build_cache_secret_key_secret ) + # Running a gradle task like "help" with these arguments will trigger updating all dependencies. + # When the cache is cold, this downloads many gigabytes of jars and poms from all over the internet. + warm_dependency_cache_args = ["--write-verification-metadata", "sha256", "--dry-run"] + if self.context.is_local: + # When running locally, this dependency update is slower and less useful than within a CI runner. Skip it. + warm_dependency_cache_args = ["--dry-run"] + # Mount the whole git repo to update the cache volume contents and build the CDK. with_whole_git_repo = ( gradle_container_base @@ -151,7 +160,7 @@ async def _run(self) -> StepResult: # Load from the cache volume. f"(rsync -a --stats --mkpath {self.GRADLE_DEP_CACHE_PATH}/ {self.GRADLE_HOME_PATH} || true)", # Resolve all dependencies and write their checksums to './gradle/verification-metadata.dryrun.xml'. - self._get_gradle_command("help", "--write-verification-metadata", "sha256", "--dry-run"), + self._get_gradle_command("help", *warm_dependency_cache_args), # Build the CDK and publish it to the local maven repository. self._get_gradle_command(":airbyte-cdk:java:airbyte-cdk:publishSnapshotIfNeeded"), # Store to the cache volume. @@ -164,8 +173,6 @@ async def _run(self) -> StepResult: # Mount only the code needed to build the connector. gradle_container = ( gradle_container_base - # TODO: remove this once we finish the project to boost source-postgres CI performance. - .with_env_variable("CACHEBUSTER", hacks.get_cachebuster(self.context, self.logger)) # Copy the local maven repository and force evaluation of `with_whole_git_repo` container. .with_directory(self.LOCAL_MAVEN_REPOSITORY_PATH, await with_whole_git_repo.directory(self.LOCAL_MAVEN_REPOSITORY_PATH)) # Mount the connector-agnostic whitelisted files in the git repo.