Skip to content

Commit

Permalink
Merge pull request #1555 from frappe/calculate-cache-checksum
Browse files Browse the repository at this point in the history
fix: check cache hash before using
  • Loading branch information
18alantom authored May 7, 2024
2 parents 1751b2d + fbdae39 commit 4c38272
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 59 deletions.
11 changes: 3 additions & 8 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [ '3.7', '3.8', '3.9', '3.10' ]
python-version: ['3.8', '3.9', '3.10' ]

name: Base (${{ matrix.python-version }})

Expand Down Expand Up @@ -58,7 +58,7 @@ jobs:

strategy:
matrix:
python-version: [ '3.7', '3.10' ]
python-version: ['3.10' ]

name: Production (${{ matrix.python-version }})

Expand Down Expand Up @@ -96,7 +96,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [ '3.7', '3.10' ]
python-version: ['3.10' ]

name: Tests (${{ matrix.python-version }})

Expand All @@ -120,11 +120,6 @@ jobs:
with:
node-version: 18

- uses: actions/setup-node@v3
if: ${{ matrix.python-version == '3.7' }}
with:
node-version: 14

- run: |
wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.focal_amd64.deb;
sudo apt install ./wkhtmltox_0.12.6-1.focal_amd64.deb;
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Bench is a command-line utility that helps you to install, update, and manage mu

<div align="center">
<a target="_blank" href="https://www.python.org/downloads/" title="Python version">
<img src="https://img.shields.io/badge/python-%3E=_3.7-green.svg">
<img src="https://img.shields.io/badge/python-%3E=_3.8-green.svg">
</a>
<a target="_blank" href="https://app.travis-ci.com/github/frappe/bench" title="CI Status">
<img src="https://app.travis-ci.com/frappe/bench.svg?branch=develop">
Expand Down
155 changes: 109 additions & 46 deletions bench/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import shutil
import subprocess
import sys
import uuid
import tarfile
import typing
from collections import OrderedDict
Expand Down Expand Up @@ -34,6 +35,7 @@
is_valid_frappe_branch,
log,
run_frappe_cmd,
get_file_md5,
)
from bench.utils.bench import build_assets, install_python_dev_dependencies
from bench.utils.render import step
Expand Down Expand Up @@ -338,45 +340,51 @@ def validate_app_dependencies(self, throw=False) -> None:
def get_app_path(self) -> Path:
return Path(self.bench.name) / "apps" / self.app_name

def get_app_cache_path(self, is_compressed=False) -> Path:
assert self.cache_key is not None

def get_app_cache_temp_path(self, is_compressed=False) -> Path:
cache_path = get_bench_cache_path("apps")
tarfile_name = get_cache_filename(
self.app_name,
self.cache_key,
is_compressed,
)
ext = "tgz" if is_compressed else "tar"
tarfile_name = f"{self.app_name}.{uuid.uuid4().hex}.{ext}"
return cache_path / tarfile_name

def get_app_cache_hashed_path(self, temp_path: Path) -> Path:
assert self.cache_key is not None

ext = temp_path.suffix[1:]
md5 = get_file_md5(temp_path)
tarfile_name = f"{self.app_name}.{self.cache_key}.md5-{md5}.{ext}"

return temp_path.with_name(tarfile_name)

def get_cached(self) -> bool:
if not self.cache_key:
return False

cache_path = self.get_app_cache_path(False)
mode = "r"

# Check if cache exists without gzip
if not cache_path.is_file():
cache_path = self.get_app_cache_path(True)
mode = "r:gz"

# Check if cache exists with gzip
if not cache_path.is_file():
if not (cache_path := validate_cache_and_get_path(self.app_name, self.cache_key)):
return False

app_path = self.get_app_path()
if app_path.is_dir():
shutil.rmtree(app_path)

click.secho(f"Getting {self.app_name} from cache", fg="yellow")
click.secho(
f"Bench app-cache: extracting {self.app_name} from {cache_path.as_posix()}",
)

mode = "r:gz" if cache_path.suffix.endswith(".tgz") else "r"
with tarfile.open(cache_path, mode) as tar:
extraction_filter = get_app_cache_extract_filter(count_threshold=150_000)
try:
tar.extractall(app_path.parent, filter=extraction_filter)
click.secho(
f"Bench app-cache: extraction succeeded for {self.app_name}",
fg="green",
)
except Exception:
message = f"Cache extraction failed for {self.app_name}, skipping cache"
click.secho(message, fg="yellow")
message = f"Bench app-cache: extraction failed for {self.app_name}"
click.secho(
message,
fg="yellow",
)
logger.exception(message)
shutil.rmtree(app_path)
return False
Expand All @@ -392,10 +400,10 @@ def set_cache(self, compress_artifacts=False) -> bool:
return False

cwd = os.getcwd()
cache_path = self.get_app_cache_path(compress_artifacts)
cache_path = self.get_app_cache_temp_path(compress_artifacts)
mode = "w:gz" if compress_artifacts else "w"

message = f"Caching {self.app_name} app directory"
message = f"Bench app-cache: caching {self.app_name}"
if compress_artifacts:
message += " (compressed)"
click.secho(message)
Expand All @@ -407,9 +415,19 @@ def set_cache(self, compress_artifacts=False) -> bool:
try:
with tarfile.open(cache_path, mode) as tar:
tar.add(app_path.name)

hashed_path = self.get_app_cache_hashed_path(cache_path)
unlink_no_throw(hashed_path)

cache_path.rename(hashed_path)
click.secho(
f"Bench app-cache: caching succeeded for {self.app_name} as {hashed_path.as_posix()}",
fg="green",
)

success = True
except Exception:
log(f"Failed to cache {app_path}", level=3)
except Exception as exc:
log(f"Bench app-cache: caching failed for {self.app_name} {exc}", level=3)
success = False
finally:
os.chdir(cwd)
Expand Down Expand Up @@ -437,28 +455,11 @@ def can_get_cached(app_name: str, cache_key: str) -> bool:
checking local remote and fetching can be skipped while keeping
get-app command params the same.
"""
cache_path = get_bench_cache_path("apps")
tarfile_path = cache_path / get_cache_filename(
app_name,
cache_key,
True,
)

if tarfile_path.is_file():
return True

tarfile_path = cache_path / get_cache_filename(
app_name,
cache_key,
False,
)

return tarfile_path.is_file()

if cache_path := get_app_cache_path(app_name, cache_key):
return cache_path.exists()

def get_cache_filename(app_name: str, cache_key: str, is_compressed=False):
ext = "tgz" if is_compressed else "tar"
return f"{app_name}-{cache_key[:10]}.{ext}"
return False


def can_frappe_use_cached(app: App) -> bool:
Expand All @@ -482,7 +483,10 @@ def can_frappe_use_cached(app: App) -> bool:
"""
return sv.Version("15.12.0") not in sv.SimpleSpec(min_frappe)
except ValueError:
click.secho(f"Invalid value found for frappe version '{min_frappe}'", fg="yellow")
click.secho(
f"Bench app-cache: invalid value found for frappe version '{min_frappe}'",
fg="yellow",
)
# Invalid expression
return False

Expand Down Expand Up @@ -591,6 +595,10 @@ def remove_unused_node_modules(app_path: Path) -> None:
can_delete = "vite build" in build_script

if can_delete:
click.secho(
f"Bench app-cache: removing {node_modules.as_posix()}",
fg="yellow",
)
shutil.rmtree(node_modules)


Expand Down Expand Up @@ -1036,3 +1044,58 @@ def get_apps_json(path):

with open(path) as f:
return json.load(f)


def is_cache_hash_valid(cache_path: Path) -> bool:
parts = cache_path.name.split(".")
if len(parts) < 2 or not parts[-2].startswith("md5-"):
return False

md5 = parts[-2].split("-")[1]
return get_file_md5(cache_path) == md5


def unlink_no_throw(path: Path):
if not path.exists():
return

try:
path.unlink(True)
except Exception:
pass


def get_app_cache_path(app_name: str, cache_key: str) -> "Optional[Path]":
cache_path = get_bench_cache_path("apps")
glob_pattern = f"{app_name}.{cache_key}.md5-*"

for app_cache_path in cache_path.glob(glob_pattern):
return app_cache_path

return None


def validate_cache_and_get_path(app_name: str, cache_key: str) -> "Optional[Path]":
if not cache_key:
return

if not (cache_path := get_app_cache_path(app_name, cache_key)):
return

if not cache_path.is_file():
click.secho(
f"Bench app-cache: file check failed for {cache_path.as_posix()}, skipping cache",
fg="yellow",
)
unlink_no_throw(cache_path)
return

if not is_cache_hash_valid(cache_path):
click.secho(
f"Bench app-cache: hash validation failed for {cache_path.as_posix()}, skipping cache",
fg="yellow",
)
unlink_no_throw(cache_path)
return

return cache_path
14 changes: 14 additions & 0 deletions bench/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import re
import subprocess
import sys
import hashlib
from functools import lru_cache
from glob import glob
from pathlib import Path
Expand All @@ -23,6 +24,12 @@
InvalidRemoteException,
)

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from typing import Optional


logger = logging.getLogger(PROJECT_NAME)
paths_in_app = ("hooks.py", "modules.txt", "patches.txt")
paths_in_bench = ("apps", "sites", "config", "logs", "config/pids")
Expand Down Expand Up @@ -605,3 +612,10 @@ def filter_function(member: TarInfo, dest_path: str) -> Optional[TarInfo]:
return None

return filter_function

def get_file_md5(p: Path) -> "str":
with open(p.as_posix(), "rb") as f:
file_md5 = hashlib.md5()
while chunk := f.read(2**16):
file_md5.update(chunk)
return file_md5.hexdigest()
6 changes: 3 additions & 3 deletions bench/utils/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,15 +688,15 @@ def cache_list() -> None:
created = datetime.fromtimestamp(stat.st_ctime)
accessed = datetime.fromtimestamp(stat.st_atime)

app = item.name.split("-")[0]
app = item.name.split(".")[0]
tot_items += 1
tot_size += stat.st_size
compressed = item.suffix == ".tgz"

if not printed_header:
click.echo(
f"{'APP':15} "
f"{'FILE':25} "
f"{'FILE':90} "
f"{'SIZE':>13} "
f"{'COMPRESSED'} "
f"{'CREATED':19} "
Expand All @@ -706,7 +706,7 @@ def cache_list() -> None:

click.echo(
f"{app:15} "
f"{item.name:25} "
f"{item.name:90} "
f"{size_mb:10.3f} MB "
f"{str(compressed):10} "
f"{created:%Y-%m-%d %H:%M:%S} "
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "frappe-bench"
description = "CLI to manage Multi-tenant deployments for Frappe apps"
readme = "README.md"
license = "GPL-3.0-only"
requires-python = ">=3.7"
requires-python = ">=3.8"
authors = [
{ name = "Frappe Technologies Pvt Ltd", email = "developers@frappe.io" },
]
Expand Down

0 comments on commit 4c38272

Please sign in to comment.