Merge branch 'zenml-io:main' into main

kabinja · Feb 20, 2024 · fb9b34f · fb9b34f
2 parents cd7bbe0 + 4c284fe
commit fb9b34f
Show file tree

Hide file tree

Showing 47 changed files with 489 additions and 132 deletions.
diff --git a/.github/actions/setup_environment/action.yml b/.github/actions/setup_environment/action.yml
@@ -26,6 +26,9 @@ inputs:
   runners_cache_secret_access_key:
     description: Runner cache AWS secret access key
     required: true
+  discord_webhook:
+    description: Discord webhook URL
+    required: true
 runs:
   using: composite
   steps:
@@ -87,7 +90,15 @@ runs:
           ~/.cache/pip
         restore-keys: |
           ${{ inputs.os }}-${{ inputs.cache_version }}-${{ inputs.python-version }}-${{inputs.install_integrations}}
-
+    - name: Custom runners cache miss
+      uses: rjstone/discord-webhook-notify@v1
+      if: steps.custom-cache-pip.outputs.cache-hit != 'true'
+      continue-on-error: true
+      with:
+        severity: warn
+        details: The custom github runnners cache was missed and failed to restore
+          the cache from minio bucket
+        webhookUrl: ${{ inputs.discord_webhook }}
     # Disabled for now because it doesn't work well with multiple parallel jobs
     # - uses: syphar/restore-pip-download-cache@v1
     #   with:

diff --git a/.github/workflows/ci-fast.yml b/.github/workflows/ci-fast.yml
@@ -1,5 +1,5 @@
 ---
-name: Fast CI
+name: ci-fast
 on:
   workflow_dispatch:
   workflow_call:
@@ -67,7 +67,8 @@ jobs:
   update-templates-to-examples:
     # this doesn't work on forked repositories (i.e. outside contributors)
     # so we disable template updates for those PRs / branches
-    if: github.event.pull_request.head.repo.full_name == 'zenml-io/zenml'
+    if: github.event.pull_request.head.repo.full_name == 'zenml-io/zenml' && github.event.pull_request.draft
+      == false
     uses: ./.github/workflows/update-templates-to-examples.yml
     with:
       python-version: '3.8'
@@ -86,6 +87,7 @@ jobs:
       os: ${{ matrix.os }}
     secrets: inherit
   custom-ubuntu-runners-integration-test:
+    if: github.event.pull_request.draft == false
     strategy:
       matrix:
         os: [ubuntu-dind-runners]

diff --git a/.github/workflows/ci-slow.yml b/.github/workflows/ci-slow.yml
@@ -1,5 +1,5 @@
 ---
-name: Slow CI
+name: ci-slow
 on:
   push:
     branches: [main]
@@ -14,6 +14,7 @@ concurrency:
 jobs:
   run-slow-ci-label-is-set:
     runs-on: ubuntu-latest
+    if: github.event.pull_request.draft == false
     steps:
       # using this instead of contains(github.event.pull_request.labels.*.name, 'run-slow-ci')
       # to make it dynamic, otherwise github context is fixed at the moment of trigger event.
@@ -37,6 +38,7 @@ jobs:
           echo "Please add the 'run-slow-ci' label to this PR before merging."
           exit 1
   docstring-check:
+    if: github.event.pull_request.draft == false
     needs: run-slow-ci-label-is-set
     runs-on: ubuntu-latest
     steps:
@@ -50,6 +52,7 @@ jobs:
       - name: Check docstrings
         run: bash scripts/docstring.sh
   sqlite-db-migration-testing:
+    if: github.event.pull_request.draft == false
     needs: run-slow-ci-label-is-set
     runs-on: ubuntu-dind-runners
     steps:
@@ -61,6 +64,7 @@ jobs:
       - name: Test migrations across versions
         run: bash scripts/test-migrations-mysql.sh sqlite
   small-checks:
+    if: github.event.pull_request.draft == false
     needs: run-slow-ci-label-is-set
     runs-on: ubuntu-latest
     steps:
@@ -98,6 +102,7 @@ jobs:
           pip install alembic
           bash scripts/check-alembic-branches.sh
   custom-ubuntu-unit-test:
+    if: github.event.pull_request.draft == false
     needs: run-slow-ci-label-is-set
     strategy:
       matrix:
@@ -110,6 +115,7 @@ jobs:
       os: ${{ matrix.os }}
     secrets: inherit
   windows-unit-test:
+    if: github.event.pull_request.draft == false
     needs: run-slow-ci-label-is-set
     strategy:
       matrix:
@@ -122,6 +128,7 @@ jobs:
       os: ${{ matrix.os }}
     secrets: inherit
   macos-unit-test:
+    if: github.event.pull_request.draft == false
     needs: run-slow-ci-label-is-set
     strategy:
       matrix:
@@ -134,6 +141,7 @@ jobs:
       os: ${{ matrix.os }}
     secrets: inherit
   windows-integration-test:
+    if: github.event.pull_request.draft == false
     needs: run-slow-ci-label-is-set
     strategy:
       matrix:
@@ -148,6 +156,7 @@ jobs:
       test_environment: ${{ matrix.test_environment }}
     secrets: inherit
   macos-integration-test:
+    if: github.event.pull_request.draft == false
     needs: run-slow-ci-label-is-set
     strategy:
       matrix:
@@ -162,6 +171,7 @@ jobs:
       test_environment: ${{ matrix.test_environment }}
     secrets: inherit
   custom-ubuntu-integration-test:
+    if: github.event.pull_request.draft == false
     needs: run-slow-ci-label-is-set
     strategy:
       matrix:

diff --git a/.github/workflows/generate-test-duration.yml b/.github/workflows/generate-test-duration.yml
@@ -33,6 +33,7 @@ jobs:
           os: ubuntu-dind-runners
           runners_cache_access_key_id: ${{ secrets.RUNNERS_CACHE_ACCESS_KEY_ID }}
           runners_cache_secret_access_key: ${{ secrets.RUNNERS_CACHE_SECRET_ACCESS_KEY }}
+          discord_webhook: ${{ secrets.DISCORD_WEBHOOK }}
       - name: Generate test duration file
         continue-on-error: true
             # Ubuntu integration tests run as 6 shards

diff --git a/.github/workflows/integration-test-fast.yml b/.github/workflows/integration-test-fast.yml
@@ -146,6 +146,7 @@ jobs:
           os: ${{ inputs.os }}
           runners_cache_access_key_id: ${{ secrets.RUNNERS_CACHE_ACCESS_KEY_ID }}
           runners_cache_secret_access_key: ${{ secrets.RUNNERS_CACHE_SECRET_ACCESS_KEY }}
+          discord_webhook: ${{ secrets.DISCORD_WEBHOOK }}
       - name: Install docker-compose for non-default environments
         if: inputs.test_environment != 'default'
         run: |

diff --git a/.github/workflows/integration-test-slow.yml b/.github/workflows/integration-test-slow.yml
@@ -144,6 +144,7 @@ jobs:
           os: ${{ inputs.os }}
           runners_cache_access_key_id: ${{ secrets.RUNNERS_CACHE_ACCESS_KEY_ID }}
           runners_cache_secret_access_key: ${{ secrets.RUNNERS_CACHE_SECRET_ACCESS_KEY }}
+          discord_webhook: ${{ secrets.DISCORD_WEBHOOK }}
       - name: Install docker-compose for non-default environments
         if: inputs.test_environment != 'default'
         run: |

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -9,12 +9,12 @@ jobs:
   setup-and-test:
     uses: ./.github/workflows/unit-test.yml
     with:
-      os: ubuntu-latest
+      os: ubuntu-dind-runners
       python-version: '3.8'
     secrets: inherit
   mlstacks-compatibility-check:
     needs: setup-and-test
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-dind-runners
     steps:
       - name: Checkout code
         uses: actions/checkout@v4.1.1

diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml
@@ -63,6 +63,7 @@ jobs:
       ZENML_DEBUG: 1
       ZENML_ANALYTICS_OPT_IN: false
       PYTHONIOENCODING: utf-8
+
       # on MAC OS, we need to set this environment variable
       # to fix problems with the fork() calls (see this thread
       # for more information: http://sealiesoftware.com/blog/archive/2017/6/5/Objective-C_and_fork_in_macOS_1013.html)
@@ -87,6 +88,7 @@ jobs:
           install_integrations: ${{ inputs.install_integrations }}
           runners_cache_access_key_id: ${{ secrets.RUNNERS_CACHE_ACCESS_KEY_ID }}
           runners_cache_secret_access_key: ${{ secrets.RUNNERS_CACHE_SECRET_ACCESS_KEY }}
+          discord_webhook: ${{ secrets.DISCORD_WEBHOOK }}
       - name: Setup tmate session before tests
         if: ${{ inputs.enable_tmate == 'before-tests' }}
         uses: mxschmitt/action-tmate@v3.17

diff --git a/README.md b/README.md
@@ -91,7 +91,7 @@
     <a href="https://www.zenml.io/company#team">Meet the Team</a>
     <br />
     <br />
-    🎉 Version 0.55.2 is out. Check out the release notes
+    🎉 Version 0.55.3 is out. Check out the release notes
     <a href="https://github.com/zenml-io/zenml/releases">here</a>.
     <br />
     <br />
@@ -296,6 +296,14 @@ Or, if you
 prefer, [open an issue](https://github.com/zenml-io/zenml/issues/new/choose) on
 our GitHub repo.
 
+# Vulnerability affecting `zenml<0.67` (CVE-2024-25723)
+
+We have identified a critical security vulnerability in ZenML versions prior to
+0.46.7. This vulnerability potentially allows unauthorized users to take
+ownership of ZenML accounts through the user activation feature. Please [read our
+blog post](https://www.zenml.io/blog/critical-security-update-for-zenml-users)
+for more information on how we've addressed this.
+
 # 📜 License
 
 ZenML is distributed under the terms of the Apache License Version 2.0.

diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -1,4 +1,41 @@
 <!-- markdown-link-check-disable -->
+# 0.55.3
+
+This patch comes with a variety of bug fixes and documentation updates.
+
+With this release you can now download files directly from artifact versions
+that you get back from the client without the need to materialize them. If you
+would like to bypass materialization entirely and just download the data or
+files associated with a particular artifact version, you can use the
+`download_files` method:
+
+```python
+from zenml.client import Client
+
+client = Client()
+artifact = client.get_artifact_version(name_id_or_prefix="iris_dataset")
+artifact.download_files("path/to/save.zip")
+```
+
+
+## What's Changed
+* Backport: Add HyperAI to TOC (#2406) by @strickvl in https://github.com/zenml-io/zenml/pull/2407
+* Fix conditional statements in GitHub workflows by @strickvl in https://github.com/zenml-io/zenml/pull/2404
+* Ensure proper spacing in error messages by @christianversloot in https://github.com/zenml-io/zenml/pull/2399
+* Fix hyperai markdown table by @strickvl in https://github.com/zenml-io/zenml/pull/2426
+* Upgrade Vertex integration `google-cloud-aiplatform` minimum required version to 1.34.0 by @francoisserra in https://github.com/zenml-io/zenml/pull/2428
+* Close code block left open in the docs by @jlopezpena in https://github.com/zenml-io/zenml/pull/2432
+* Simplify HF example and notify when cache is down by @safoinme in https://github.com/zenml-io/zenml/pull/2300
+* Adding the latest version id and name to the artifact response by @bcdurak in https://github.com/zenml-io/zenml/pull/2430
+* Adding the ID of the producer pipeline run to artifact versions by @bcdurak in https://github.com/zenml-io/zenml/pull/2431
+* Add vulnerability notice to README by @strickvl in https://github.com/zenml-io/zenml/pull/2437
+* REVERTED: Allow more recent `adlfs` and `s3fs` versions by @strickvl in https://github.com/zenml-io/zenml/pull/2402
+* Add new property for filtering service account events by @strickvl in https://github.com/zenml-io/zenml/pull/2405
+* Add `download_files` method for `ArtifactVersion` by @strickvl in https://github.com/zenml-io/zenml/pull/2434
+* Fixing `update_model`s and revert #2402 by @bcdurak in https://github.com/zenml-io/zenml/pull/2440
+
+
+**Full Changelog**: https://github.com/zenml-io/zenml/compare/0.55.2...0.55.3
 
 # 0.55.2
 

diff --git a/docs/book/getting-started/core-concepts.md b/docs/book/getting-started/core-concepts.md
@@ -31,6 +31,7 @@ def step_2(input_one: str, input_two: str) -> str:
     """Combines the two strings passed in."""
     combined_str = f"{input_one} {input_two}"
     return combined_str
+```
 
 #### Pipelines
 

diff --git a/docs/book/stacks-and-components/component-guide/data-validators/deepchecks.md b/docs/book/stacks-and-components/component-guide/data-validators/deepchecks.md
@@ -139,7 +139,9 @@ data_validator = deepchecks_data_integrity_check_step.with_options(
 The step can then be inserted into your pipeline where it can take in a dataset, e.g.:
 
 ```python
-@pipeline(required_integrations=[DEEPCHECKS, SKLEARN])
+docker_settings = DockerSettings(required_integrations=[DEEPCHECKS, SKLEARN])
+
+@pipeline(settings={"docker": docker_settings})
 def data_validation_pipeline():
     df_train, df_test = data_loader()
     data_validator(dataset=df_train)

diff --git a/...ook/stacks-and-components/component-guide/data-validators/great-expectations.md b/...ook/stacks-and-components/component-guide/data-validators/great-expectations.md
@@ -136,7 +136,9 @@ The step can then be inserted into your pipeline where it can take in a pandas d
 ```python
 from zenml import pipeline
 
-@pipeline(required_integrations=[SKLEARN, GREAT_EXPECTATIONS])
+docker_settings = DockerSettings(required_integrations=[SKLEARN, GREAT_EXPECTATIONS])
+
+@pipeline(settings={"docker": docker_settings})
 def profiling_pipeline():
     """Data profiling pipeline for Great Expectations.
 
@@ -194,7 +196,9 @@ ge_validator_step = great_expectations_validator_step.with_options(
 The step can then be inserted into your pipeline where it can take in a pandas dataframe and a bool flag used solely for order reinforcement purposes, e.g.:
 
 ```python
-@pipeline(required_integrations=[SKLEARN, GREAT_EXPECTATIONS])
+docker_settings = DockerSettings(required_integrations=[SKLEARN, GREAT_EXPECTATIONS])
+
+@pipeline(settings={"docker": docker_settings})
 def validation_pipeline():
     """Data validation pipeline for Great Expectations.
 

diff --git a/docs/book/user-guide/starter-guide/manage-artifacts.md b/docs/book/user-guide/starter-guide/manage-artifacts.md
@@ -257,6 +257,22 @@ if __name__ == "__main__":
 Calls of `Client` methods like `get_artifact_version` directly inside the pipeline code makes use of ZenML's [late materialization](../advanced-guide/data-management/late-materialization.md) behind the scenes.
 {% endhint %}
 
+If you would like to bypass materialization entirely and just download the
+data or files associated with a particular artifact version, you can use the
+`.download_files` method:
+
+```python
+from zenml.client import Client
+
+client = Client()
+artifact = client.get_artifact_version(name_id_or_prefix="iris_dataset")
+artifact.download_files("path/to/save.zip")
+```
+
+Take note that the path must have the `.zip` extension, as the artifact data
+will be saved as a zip file. Make sure to handle any exceptions that may arise
+from this operation.
+
 ## Managing artifacts **not** produced by ZenML pipelines
 
 Sometimes, artifacts can be produced completely outside of ZenML. A good example of this is the predictions produced by a deployed model.

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "zenml"
-version = "0.55.2"
+version = "0.55.3"
 packages = [{ include = "zenml", from = "src" }]
 description = "ZenML: Write production-ready ML code."
 authors = ["ZenML GmbH <info@zenml.io>"]

diff --git a/scripts/test-migrations-mariadb.sh b/scripts/test-migrations-mariadb.sh
@@ -39,7 +39,7 @@ docker run --name mariadb -d -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password mariad
 sleep $DB_STARTUP_DELAY
 
 # List of versions to test
-VERSIONS=("0.54.0" "0.54.1" "0.55.0" "0.55.1")
+VERSIONS=("0.54.0" "0.54.1" "0.55.0" "0.55.1" "0.55.2")
 
 # Start completely fresh
 rm -rf ~/.config/zenml

diff --git a/scripts/test-migrations-mysql.sh b/scripts/test-migrations-mysql.sh
@@ -58,7 +58,7 @@ if [ "$1" == "mysql" ]; then
 fi
 
 # List of versions to test
-VERSIONS=("0.40.0" "0.40.3" "0.41.0" "0.43.0" "0.44.1" "0.44.3" "0.45.2" "0.45.3" "0.45.4" "0.45.5" "0.45.6" "0.46.0" "0.47.0" "0.50.0" "0.51.0" "0.52.0" "0.53.0" "0.53.1" "0.54.0" "0.54.1" "0.55.0" "0.55.1")
+VERSIONS=("0.40.0" "0.40.3" "0.41.0" "0.43.0" "0.44.1" "0.44.3" "0.45.2" "0.45.3" "0.45.4" "0.45.5" "0.45.6" "0.46.0" "0.47.0" "0.50.0" "0.51.0" "0.52.0" "0.53.0" "0.53.1" "0.54.0" "0.54.1" "0.55.0" "0.55.1" "0.55.2")
 
 # Start completely fresh
 rm -rf ~/.config/zenml

diff --git a/src/zenml/VERSION b/src/zenml/VERSION
@@ -1 +1 @@
-0.55.2
+0.55.3
diff --git a/src/zenml/analytics/context.py b/src/zenml/analytics/context.py
@@ -52,6 +52,7 @@ def __init__(self) -> None:
 
         self.user_id: Optional[UUID] = None
         self.external_user_id: Optional[UUID] = None
+        self.executed_by_service_account: Optional[bool] = None
         self.client_id: Optional[UUID] = None
         self.server_id: Optional[UUID] = None
 
@@ -82,11 +83,17 @@ def __enter__(self) -> "AnalyticsContext":
                 auth_context = get_auth_context()
                 if auth_context is not None:
                     self.user_id = auth_context.user.id
+                    self.executed_by_service_account = (
+                        auth_context.user.is_service_account
+                    )
                     self.external_user_id = auth_context.user.external_user_id
             else:
                 # If the code is running on the client, use the default user.
                 active_user = gc.zen_store.get_user()
                 self.user_id = active_user.id
+                self.executed_by_service_account = (
+                    active_user.is_service_account
+                )
                 self.external_user_id = active_user.external_user_id
 
             # Fetch the `client_id`
@@ -247,6 +254,7 @@ def track(
                 "server_id": str(self.server_id),
                 "deployment_type": str(self.deployment_type),
                 "database_type": str(self.database_type),
+                "executed_by_service_account": self.executed_by_service_account,
             }
         )