From 16f89067c6700ade47d5daa7c6f4dd1b858278bf Mon Sep 17 00:00:00 2001 From: 18alantom <2.alan.tom@gmail.com> Date: Mon, 6 May 2024 12:23:18 +0530 Subject: [PATCH 1/7] fix: calc md5 for app cache --- bench/app.py | 25 ++++++++++++++++++++++--- bench/utils/__init__.py | 14 ++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/bench/app.py b/bench/app.py index a4b3dc748..2fc9b0060 100755 --- a/bench/app.py +++ b/bench/app.py @@ -6,6 +6,7 @@ import shutil import subprocess import sys +import uuid import tarfile import typing from collections import OrderedDict @@ -34,6 +35,7 @@ is_valid_frappe_branch, log, run_frappe_cmd, + get_file_md5, ) from bench.utils.bench import build_assets, install_python_dev_dependencies from bench.utils.render import step @@ -338,6 +340,20 @@ def validate_app_dependencies(self, throw=False) -> None: def get_app_path(self) -> Path: return Path(self.bench.name) / "apps" / self.app_name + def get_app_cache_temp_path(self, is_compressed=False) -> Path: + cache_path = get_bench_cache_path("apps") + ext = "tgz" if is_compressed else "tar" + tarfile_name = f"{self.app_name}.{uuid.uuid4().hex}.{ext}" + return cache_path / tarfile_name + + def get_app_cache_hashed_name(self, temp_path: Path) -> Path: + assert self.cache_key is not None + + ext = temp_path.suffix[1:] + md5 = get_file_md5(temp_path.as_posix()) + tarfile_name = f"{self.app_name}.{self.cache_key}.md5-{md5}.{ext}" + return temp_path.with_name(tarfile_name) + def get_app_cache_path(self, is_compressed=False) -> Path: assert self.cache_key is not None @@ -392,7 +408,7 @@ def set_cache(self, compress_artifacts=False) -> bool: return False cwd = os.getcwd() - cache_path = self.get_app_cache_path(compress_artifacts) + cache_path = self.get_app_cache_temp_path(compress_artifacts) mode = "w:gz" if compress_artifacts else "w" message = f"Caching {self.app_name} app directory" @@ -407,9 +423,12 @@ def set_cache(self, compress_artifacts=False) -> bool: try: with tarfile.open(cache_path, mode) as tar: tar.add(app_path.name) + hashed_path = self.get_app_cache_hashed_name(cache_path) + cache_path.rename(hashed_path) + success = True - except Exception: - log(f"Failed to cache {app_path}", level=3) + except Exception as exc: + log(f"Failed to cache {app_path} {exc}", level=3) success = False finally: os.chdir(cwd) diff --git a/bench/utils/__init__.py b/bench/utils/__init__.py index 4b9e65b62..39e15eca1 100644 --- a/bench/utils/__init__.py +++ b/bench/utils/__init__.py @@ -5,6 +5,7 @@ import re import subprocess import sys +import hashlib from functools import lru_cache from glob import glob from pathlib import Path @@ -23,6 +24,12 @@ InvalidRemoteException, ) +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Optional + + logger = logging.getLogger(PROJECT_NAME) paths_in_app = ("hooks.py", "modules.txt", "patches.txt") paths_in_bench = ("apps", "sites", "config", "logs", "config/pids") @@ -605,3 +612,10 @@ def filter_function(member: TarInfo, dest_path: str) -> Optional[TarInfo]: return None return filter_function + +def get_file_md5(p: str) -> "str": + with open(p, "rb") as f: + file_md5 = hashlib.md5() + while chunk := f.read(2**16): + file_md5.update(chunk) + return file_md5.hexdigest() From 8854551eeebf2eabcac8ba24311f832f192c7eec Mon Sep 17 00:00:00 2001 From: 18alantom <2.alan.tom@gmail.com> Date: Mon, 6 May 2024 12:28:38 +0530 Subject: [PATCH 2/7] chore: update secho messages --- bench/app.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/bench/app.py b/bench/app.py index 2fc9b0060..db4bb85e4 100755 --- a/bench/app.py +++ b/bench/app.py @@ -385,13 +385,13 @@ def get_cached(self) -> bool: if app_path.is_dir(): shutil.rmtree(app_path) - click.secho(f"Getting {self.app_name} from cache", fg="yellow") + click.secho(f"Bench app-cache: getting {self.app_name} from cache", fg="yellow") with tarfile.open(cache_path, mode) as tar: extraction_filter = get_app_cache_extract_filter(count_threshold=150_000) try: tar.extractall(app_path.parent, filter=extraction_filter) except Exception: - message = f"Cache extraction failed for {self.app_name}, skipping cache" + message = f"Bench app-cache: extraction failed for {self.app_name}, skipping cache" click.secho(message, fg="yellow") logger.exception(message) shutil.rmtree(app_path) @@ -411,7 +411,7 @@ def set_cache(self, compress_artifacts=False) -> bool: cache_path = self.get_app_cache_temp_path(compress_artifacts) mode = "w:gz" if compress_artifacts else "w" - message = f"Caching {self.app_name} app directory" + message = f"Bench app-cache: caching {self.app_name} app directory" if compress_artifacts: message += " (compressed)" click.secho(message) @@ -428,7 +428,7 @@ def set_cache(self, compress_artifacts=False) -> bool: success = True except Exception as exc: - log(f"Failed to cache {app_path} {exc}", level=3) + log(f"Bench app-cache: failed to cache {app_path} {exc}", level=3) success = False finally: os.chdir(cwd) @@ -501,7 +501,7 @@ def can_frappe_use_cached(app: App) -> bool: """ return sv.Version("15.12.0") not in sv.SimpleSpec(min_frappe) except ValueError: - click.secho(f"Invalid value found for frappe version '{min_frappe}'", fg="yellow") + click.secho(f"Bench app-cache: invalid value found for frappe version '{min_frappe}'", fg="yellow") # Invalid expression return False @@ -610,6 +610,7 @@ def remove_unused_node_modules(app_path: Path) -> None: can_delete = "vite build" in build_script if can_delete: + click.secho(f"Bench app-cache: removing {node_modules.as_posix()}") shutil.rmtree(node_modules) From efb51712d92cc98331e76caad1755a22782c994b Mon Sep 17 00:00:00 2001 From: 18alantom <2.alan.tom@gmail.com> Date: Mon, 6 May 2024 12:58:16 +0530 Subject: [PATCH 3/7] fix: check md5 when fetching from cache --- bench/app.py | 82 +++++++++++++++++++++++++++++++---------- bench/utils/__init__.py | 4 +- 2 files changed, 65 insertions(+), 21 deletions(-) diff --git a/bench/app.py b/bench/app.py index db4bb85e4..5eb0cfed3 100755 --- a/bench/app.py +++ b/bench/app.py @@ -350,35 +350,49 @@ def get_app_cache_hashed_name(self, temp_path: Path) -> Path: assert self.cache_key is not None ext = temp_path.suffix[1:] - md5 = get_file_md5(temp_path.as_posix()) + md5 = get_file_md5(temp_path) tarfile_name = f"{self.app_name}.{self.cache_key}.md5-{md5}.{ext}" + return temp_path.with_name(tarfile_name) - def get_app_cache_path(self, is_compressed=False) -> Path: + def get_app_cache_path(self) -> "Optional[Path]": assert self.cache_key is not None cache_path = get_bench_cache_path("apps") - tarfile_name = get_cache_filename( - self.app_name, - self.cache_key, - is_compressed, - ) - return cache_path / tarfile_name + glob_pattern = f"{self.app_name}.{self.cache_key}.md5-*" - def get_cached(self) -> bool: + for app_cache_path in cache_path.glob(glob_pattern): + return app_cache_path + + return None + + def validate_cache_and_get_path(self) -> "Optional[Path]": if not self.cache_key: - return False + return - cache_path = self.get_app_cache_path(False) - mode = "r" + if not (cache_path := self.get_app_cache_path()): + return - # Check if cache exists without gzip if not cache_path.is_file(): - cache_path = self.get_app_cache_path(True) - mode = "r:gz" + click.secho( + f"Bench app-cache: file check failed for {cache_path.as_posix()}, skipping cache", + fg="yellow", + ) + unlink_no_throw(cache_path) + return - # Check if cache exists with gzip - if not cache_path.is_file(): + if not is_cache_hash_valid(cache_path): + click.secho( + f"Bench app-cache: hash validation failed for {cache_path.as_posix()}, skipping cache", + fg="yellow", + ) + unlink_no_throw(cache_path) + return + + return cache_path + + def get_cached(self) -> bool: + if not (cache_path := self.validate_cache_and_get_path()): return False app_path = self.get_app_path() @@ -386,13 +400,18 @@ def get_cached(self) -> bool: shutil.rmtree(app_path) click.secho(f"Bench app-cache: getting {self.app_name} from cache", fg="yellow") + + mode = "r:gz" if cache_path.suffix.endswith(".tgz") else "r" with tarfile.open(cache_path, mode) as tar: extraction_filter = get_app_cache_extract_filter(count_threshold=150_000) try: tar.extractall(app_path.parent, filter=extraction_filter) except Exception: message = f"Bench app-cache: extraction failed for {self.app_name}, skipping cache" - click.secho(message, fg="yellow") + click.secho( + message, + fg="yellow", + ) logger.exception(message) shutil.rmtree(app_path) return False @@ -423,7 +442,10 @@ def set_cache(self, compress_artifacts=False) -> bool: try: with tarfile.open(cache_path, mode) as tar: tar.add(app_path.name) + hashed_path = self.get_app_cache_hashed_name(cache_path) + unlink_no_throw(hashed_path) + cache_path.rename(hashed_path) success = True @@ -501,7 +523,10 @@ def can_frappe_use_cached(app: App) -> bool: """ return sv.Version("15.12.0") not in sv.SimpleSpec(min_frappe) except ValueError: - click.secho(f"Bench app-cache: invalid value found for frappe version '{min_frappe}'", fg="yellow") + click.secho( + f"Bench app-cache: invalid value found for frappe version '{min_frappe}'", + fg="yellow", + ) # Invalid expression return False @@ -1056,3 +1081,22 @@ def get_apps_json(path): with open(path) as f: return json.load(f) + + +def is_cache_hash_valid(cache_path: Path) -> bool: + parts = cache_path.name.split(".") + if len(parts) < 2 or not parts[-2].startswith("md5-"): + return False + + md5 = parts[-2].split("-")[1] + return get_file_md5(cache_path) == md5 + + +def unlink_no_throw(path: Path): + if not path.exists(): + return + + try: + path.unlink(True) + except Exception: + pass diff --git a/bench/utils/__init__.py b/bench/utils/__init__.py index 39e15eca1..6e4df5396 100644 --- a/bench/utils/__init__.py +++ b/bench/utils/__init__.py @@ -613,8 +613,8 @@ def filter_function(member: TarInfo, dest_path: str) -> Optional[TarInfo]: return filter_function -def get_file_md5(p: str) -> "str": - with open(p, "rb") as f: +def get_file_md5(p: Path) -> "str": + with open(p.as_posix(), "rb") as f: file_md5 = hashlib.md5() while chunk := f.read(2**16): file_md5.update(chunk) From 6f074a7b3e627f305650d8a5061903c600547191 Mon Sep 17 00:00:00 2001 From: 18alantom <2.alan.tom@gmail.com> Date: Mon, 6 May 2024 13:11:51 +0530 Subject: [PATCH 4/7] fix: update pre url coercion check --- bench/app.py | 102 ++++++++++++++++++++++----------------------------- 1 file changed, 44 insertions(+), 58 deletions(-) diff --git a/bench/app.py b/bench/app.py index 5eb0cfed3..f2c97fc4f 100755 --- a/bench/app.py +++ b/bench/app.py @@ -346,7 +346,7 @@ def get_app_cache_temp_path(self, is_compressed=False) -> Path: tarfile_name = f"{self.app_name}.{uuid.uuid4().hex}.{ext}" return cache_path / tarfile_name - def get_app_cache_hashed_name(self, temp_path: Path) -> Path: + def get_app_cache_hashed_path(self, temp_path: Path) -> Path: assert self.cache_key is not None ext = temp_path.suffix[1:] @@ -355,44 +355,11 @@ def get_app_cache_hashed_name(self, temp_path: Path) -> Path: return temp_path.with_name(tarfile_name) - def get_app_cache_path(self) -> "Optional[Path]": - assert self.cache_key is not None - - cache_path = get_bench_cache_path("apps") - glob_pattern = f"{self.app_name}.{self.cache_key}.md5-*" - - for app_cache_path in cache_path.glob(glob_pattern): - return app_cache_path - - return None - - def validate_cache_and_get_path(self) -> "Optional[Path]": + def get_cached(self) -> bool: if not self.cache_key: - return - - if not (cache_path := self.get_app_cache_path()): - return - - if not cache_path.is_file(): - click.secho( - f"Bench app-cache: file check failed for {cache_path.as_posix()}, skipping cache", - fg="yellow", - ) - unlink_no_throw(cache_path) - return - - if not is_cache_hash_valid(cache_path): - click.secho( - f"Bench app-cache: hash validation failed for {cache_path.as_posix()}, skipping cache", - fg="yellow", - ) - unlink_no_throw(cache_path) - return - - return cache_path + return False - def get_cached(self) -> bool: - if not (cache_path := self.validate_cache_and_get_path()): + if not (cache_path := validate_cache_and_get_path(self.app_name, self.cache_key)): return False app_path = self.get_app_path() @@ -443,7 +410,7 @@ def set_cache(self, compress_artifacts=False) -> bool: with tarfile.open(cache_path, mode) as tar: tar.add(app_path.name) - hashed_path = self.get_app_cache_hashed_name(cache_path) + hashed_path = self.get_app_cache_hashed_path(cache_path) unlink_no_throw(hashed_path) cache_path.rename(hashed_path) @@ -478,28 +445,11 @@ def can_get_cached(app_name: str, cache_key: str) -> bool: checking local remote and fetching can be skipped while keeping get-app command params the same. """ - cache_path = get_bench_cache_path("apps") - tarfile_path = cache_path / get_cache_filename( - app_name, - cache_key, - True, - ) - if tarfile_path.is_file(): - return True + if cache_path := get_app_cache_path(app_name, cache_key): + return cache_path.exists() - tarfile_path = cache_path / get_cache_filename( - app_name, - cache_key, - False, - ) - - return tarfile_path.is_file() - - -def get_cache_filename(app_name: str, cache_key: str, is_compressed=False): - ext = "tgz" if is_compressed else "tar" - return f"{app_name}-{cache_key[:10]}.{ext}" + return False def can_frappe_use_cached(app: App) -> bool: @@ -1100,3 +1050,39 @@ def unlink_no_throw(path: Path): path.unlink(True) except Exception: pass + + +def get_app_cache_path(app_name: str, cache_key: str) -> "Optional[Path]": + cache_path = get_bench_cache_path("apps") + glob_pattern = f"{app_name}.{cache_key}.md5-*" + + for app_cache_path in cache_path.glob(glob_pattern): + return app_cache_path + + return None + + +def validate_cache_and_get_path(app_name: str, cache_key: str) -> "Optional[Path]": + if not cache_key: + return + + if not (cache_path := get_app_cache_path(app_name, cache_key)): + return + + if not cache_path.is_file(): + click.secho( + f"Bench app-cache: file check failed for {cache_path.as_posix()}, skipping cache", + fg="yellow", + ) + unlink_no_throw(cache_path) + return + + if not is_cache_hash_valid(cache_path): + click.secho( + f"Bench app-cache: hash validation failed for {cache_path.as_posix()}, skipping cache", + fg="yellow", + ) + unlink_no_throw(cache_path) + return + + return cache_path From 1f0d8ab5f51f3fe6761621f739b20c02fe49d8ad Mon Sep 17 00:00:00 2001 From: 18alantom <2.alan.tom@gmail.com> Date: Mon, 6 May 2024 13:21:07 +0530 Subject: [PATCH 5/7] chore: consistency in app-cache messages --- bench/app.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/bench/app.py b/bench/app.py index f2c97fc4f..288a7ee53 100755 --- a/bench/app.py +++ b/bench/app.py @@ -366,15 +366,21 @@ def get_cached(self) -> bool: if app_path.is_dir(): shutil.rmtree(app_path) - click.secho(f"Bench app-cache: getting {self.app_name} from cache", fg="yellow") + click.secho( + f"Bench app-cache: extracting {self.app_name} from {cache_path.as_posix()}", + ) mode = "r:gz" if cache_path.suffix.endswith(".tgz") else "r" with tarfile.open(cache_path, mode) as tar: extraction_filter = get_app_cache_extract_filter(count_threshold=150_000) try: tar.extractall(app_path.parent, filter=extraction_filter) + click.secho( + f"Bench app-cache: extraction succeeded for {self.app_name}", + fg="green", + ) except Exception: - message = f"Bench app-cache: extraction failed for {self.app_name}, skipping cache" + message = f"Bench app-cache: extraction failed for {self.app_name}" click.secho( message, fg="yellow", @@ -397,7 +403,7 @@ def set_cache(self, compress_artifacts=False) -> bool: cache_path = self.get_app_cache_temp_path(compress_artifacts) mode = "w:gz" if compress_artifacts else "w" - message = f"Bench app-cache: caching {self.app_name} app directory" + message = f"Bench app-cache: caching {self.app_name}" if compress_artifacts: message += " (compressed)" click.secho(message) @@ -414,10 +420,14 @@ def set_cache(self, compress_artifacts=False) -> bool: unlink_no_throw(hashed_path) cache_path.rename(hashed_path) + click.secho( + f"Bench app-cache: caching succeeded for {self.app_name} into {hashed_path.as_posix()}", + fg="green", + ) success = True except Exception as exc: - log(f"Bench app-cache: failed to cache {app_path} {exc}", level=3) + log(f"Bench app-cache: caching failed for {self.app_name} {exc}", level=3) success = False finally: os.chdir(cwd) From d3ca36d59640621c2349c0e277faa62001af279f Mon Sep 17 00:00:00 2001 From: 18alantom <2.alan.tom@gmail.com> Date: Mon, 6 May 2024 15:35:24 +0530 Subject: [PATCH 6/7] chore: deprecate 3.7 support --- .github/workflows/ci.yml | 11 +++-------- README.md | 2 +- bench/app.py | 5 ++++- pyproject.toml | 2 +- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 74b1b0d21..1c9dac499 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ '3.7', '3.8', '3.9', '3.10' ] + python-version: ['3.8', '3.9', '3.10' ] name: Base (${{ matrix.python-version }}) @@ -58,7 +58,7 @@ jobs: strategy: matrix: - python-version: [ '3.7', '3.10' ] + python-version: ['3.10' ] name: Production (${{ matrix.python-version }}) @@ -96,7 +96,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ '3.7', '3.10' ] + python-version: ['3.10' ] name: Tests (${{ matrix.python-version }}) @@ -120,11 +120,6 @@ jobs: with: node-version: 18 - - uses: actions/setup-node@v3 - if: ${{ matrix.python-version == '3.7' }} - with: - node-version: 14 - - run: | wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.focal_amd64.deb; sudo apt install ./wkhtmltox_0.12.6-1.focal_amd64.deb; diff --git a/README.md b/README.md index e523672c0..3a6eb130a 100755 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Bench is a command-line utility that helps you to install, update, and manage mu