From 74ffa7f6c61354741160d61a3f6e9ea57dfaf1e3 Mon Sep 17 00:00:00 2001 From: bojiang <5886138+bojiang@users.noreply.github.com> Date: Fri, 2 Aug 2024 18:18:48 +0800 Subject: [PATCH] chore: use uv instead of venv layers (#1054) * chmod * chore: discard venv layers * apply black * fix * ci: auto fixes from pre-commit.ci For more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- release.sh | 0 src/openllm/accelerator_spec.py | 1 - src/openllm/clean.py | 19 +++++++- src/openllm/common.py | 34 ++++++++++--- src/openllm/venv.py | 84 ++++++--------------------------- 5 files changed, 58 insertions(+), 80 deletions(-) mode change 100644 => 100755 release.sh diff --git a/release.sh b/release.sh old mode 100644 new mode 100755 diff --git a/src/openllm/accelerator_spec.py b/src/openllm/accelerator_spec.py index d0653ab1f..bcbd900d5 100644 --- a/src/openllm/accelerator_spec.py +++ b/src/openllm/accelerator_spec.py @@ -61,7 +61,6 @@ def __bool__(self): 'nvidia-tesla-a100': {'model': 'A100', 'memory_size': 40.0}, } - ACCELERATOR_SPECS: dict[str, Accelerator] = {key: Accelerator(**value) for key, value in ACCELERATOR_SPEC_DICT.items()} diff --git a/src/openllm/clean.py b/src/openllm/clean.py index edfc2b693..0f26a1beb 100644 --- a/src/openllm/clean.py +++ b/src/openllm/clean.py @@ -1,3 +1,4 @@ +import os import pathlib import shutil @@ -8,7 +9,6 @@ app = OpenLLMTyper(help='clean up and release disk space used by OpenLLM') - HUGGINGFACE_CACHE = pathlib.Path.home() / '.cache' / 'huggingface' / 'hub' @@ -30,7 +30,22 @@ def model_cache(verbose: bool = False): def venvs(verbose: bool = False): if verbose: VERBOSE_LEVEL.set(20) - used_space = sum(f.stat().st_size for f in VENV_DIR.rglob('*')) + + # Set to store paths of files to avoid double counting + seen_paths = set() + used_space = 0 + + for f in VENV_DIR.rglob('*'): + if os.name == 'nt': # Windows system + # On Windows, directly add file sizes without considering hard links + used_space += f.stat().st_size + else: + # On non-Windows systems, use inodes to avoid double counting + stat = f.stat() + if stat.st_ino not in seen_paths: + seen_paths.add(stat.st_ino) + used_space += stat.st_size + sure = questionary.confirm( f'This will remove all virtual environments created by OpenLLM (~{used_space / 1024 / 1024:.2f}MB), are you sure?' ).ask() diff --git a/src/openllm/common.py b/src/openllm/common.py index e60691b16..1cd95749b 100644 --- a/src/openllm/common.py +++ b/src/openllm/common.py @@ -21,7 +21,6 @@ ERROR_STYLE = 'red' SUCCESS_STYLE = 'green' - OPENLLM_HOME = pathlib.Path(os.getenv('OPENLLM_HOME', pathlib.Path.home() / '.openllm')) REPO_DIR = OPENLLM_HOME / 'repos' TEMP_DIR = OPENLLM_HOME / 'temp' @@ -231,14 +230,33 @@ def tolist(self): class VenvSpec(SimpleNamespace): python_version: str - python_packages: list[str] - options: list[str] = [] + requirements_txt: str name_prefix = '' + @functools.cached_property + def normalized_requirements_txt(self) -> str: + parameter_lines: list[str] = [] + dependency_lines: list[str] = [] + comment_lines: list[str] = [] + + for line in self.requirements_txt.splitlines(): + if not line.strip(): + continue + elif line.strip().startswith('#'): + comment_lines.append(line.strip()) + elif line.strip().startswith('-'): + parameter_lines.append(line.strip()) + else: + dependency_lines.append(line.strip()) + + parameter_lines.sort() + dependency_lines.sort() + return '\n'.join(parameter_lines + dependency_lines).strip() + def __hash__(self): return md5( # self.python_version, - *sorted(self.python_packages) + self.normalized_requirements_txt ) @@ -307,11 +325,13 @@ def run_command(cmd, cwd=None, env=None, copy_env=True, venv=None, silent=False) try: if silent: return subprocess.run( # type: ignore - cmd, cwd=cwd, env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL + cmd, cwd=cwd, env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True ) else: - return subprocess.run(cmd, cwd=cwd, env=env) - except subprocess.CalledProcessError: + return subprocess.run(cmd, cwd=cwd, env=env, check=True) + except Exception as e: + if VERBOSE_LEVEL.get() >= 10: + output(e, style='red') output('Command failed', style='red') raise typer.Exit(1) diff --git a/src/openllm/venv.py b/src/openllm/venv.py index f9b59741f..57356197e 100644 --- a/src/openllm/venv.py +++ b/src/openllm/venv.py @@ -2,8 +2,6 @@ import os import pathlib import shutil -import typing -from typing import Iterable import typer @@ -11,30 +9,7 @@ @functools.lru_cache -def _resolve_packages(requirement: typing.Union[pathlib.Path, str]): - from pip_requirements_parser import RequirementsFile - - requirements_txt = RequirementsFile.from_file(str(requirement), include_nested=True) - return requirements_txt - - -def _filter_preheat_packages(requirements: Iterable) -> list[str]: - PREHEAT_PIP_PACKAGES = ['torch', 'vllm'] - - deps: list[str] = [] - for req in requirements: - if req.is_editable or req.is_local_path or req.is_url or req.is_wheel or not req.name or not req.specifier: - continue - for sp in req.specifier: - if sp.operator == '==' and req.name in PREHEAT_PIP_PACKAGES: - assert req.line is not None - deps.append(req.line) - break - return deps - - -@functools.lru_cache -def _resolve_bento_env_specs(bento: BentoInfo): +def _resolve_bento_env_spec(bento: BentoInfo): ver_file = bento.path / 'env' / 'python' / 'version.txt' assert ver_file.exists(), f'cannot find version file in {bento.path}' @@ -42,32 +17,13 @@ def _resolve_bento_env_specs(bento: BentoInfo): if not lock_file.exists(): lock_file = bento.path / 'env' / 'python' / 'requirements.txt' - reqs = _resolve_packages(lock_file) - packages = reqs.requirements - options = reqs.options - preheat_packages = _filter_preheat_packages(packages) ver = ver_file.read_text().strip() - return ( - VenvSpec( - python_version=ver, python_packages=preheat_packages, name_prefix=f"{bento.tag.replace(':', '_')}-1-" - ), - VenvSpec( - python_version=ver, - python_packages=[v.line for v in packages], - options=[o.line for o in options], - name_prefix=f"{bento.tag.replace(':', '_')}-2-", - ), - ) + reqs = lock_file.read_text().strip() + return VenvSpec(python_version=ver, requirements_txt=reqs, name_prefix=f"{bento.tag.replace(':', '_')}-1-") -def _get_lib_dir(venv: pathlib.Path) -> pathlib.Path: - if os.name == 'nt': - return venv / 'Lib/site-packages' - else: - return next(venv.glob('lib/python*')) / 'site-packages' - -def _ensure_venv(env_spec: VenvSpec, parrent_venv: typing.Optional[pathlib.Path] = None) -> pathlib.Path: +def _ensure_venv(env_spec: VenvSpec) -> pathlib.Path: venv = VENV_DIR / str(hash(env_spec)) if venv.exists() and not (venv / 'DONE').exists(): shutil.rmtree(venv, ignore_errors=True) @@ -77,22 +33,18 @@ def _ensure_venv(env_spec: VenvSpec, parrent_venv: typing.Optional[pathlib.Path] venv_py = venv / 'Scripts' / 'python.exe' if os.name == 'nt' else venv / 'bin' / 'python' try: run_command(['python', '-m', 'uv', 'venv', venv], silent=VERBOSE_LEVEL.get() < 10) - lib_dir = _get_lib_dir(venv) - if parrent_venv is not None: - parent_lib_dir = _get_lib_dir(parrent_venv) - with open(lib_dir / f'{parrent_venv.name}.pth', 'w+') as f: - f.write(str(parent_lib_dir)) with open(venv / 'requirements.txt', 'w') as f: - with open(venv / 'requirements.txt', 'w') as f: - f.write('\n'.join(env_spec.options + sorted(env_spec.python_packages))) + f.write(env_spec.normalized_requirements_txt) run_command( ['python', '-m', 'uv', 'pip', 'install', '-p', str(venv_py), '-r', venv / 'requirements.txt'], silent=VERBOSE_LEVEL.get() < 10, ) with open(venv / 'DONE', 'w') as f: f.write('DONE') - except Exception: + except Exception as e: shutil.rmtree(venv, ignore_errors=True) + if VERBOSE_LEVEL.get() >= 10: + output(e, style='red') output(f'Failed to install dependencies to {venv}. Cleaned up.', style='red') raise typer.Exit(1) output(f'Successfully installed dependencies to {venv}.', style='green') @@ -101,26 +53,18 @@ def _ensure_venv(env_spec: VenvSpec, parrent_venv: typing.Optional[pathlib.Path] return venv -def _ensure_venvs(env_spec_list: Iterable[VenvSpec]) -> pathlib.Path: - last_venv = None - for env_spec in env_spec_list: - last_venv = _ensure_venv(env_spec, last_venv) - assert last_venv is not None - return last_venv - - def ensure_venv(bento: BentoInfo) -> pathlib.Path: - return _ensure_venvs(_resolve_bento_env_specs(bento)) + env_spec = _resolve_bento_env_spec(bento) + venv = _ensure_venv(env_spec) + assert venv is not None + return venv -def _check_venv(env_spec: VenvSpec) -> bool: +def check_venv(bento: BentoInfo) -> bool: + env_spec = _resolve_bento_env_spec(bento) venv = VENV_DIR / str(hash(env_spec)) if not venv.exists(): return False if venv.exists() and not (venv / 'DONE').exists(): return False return True - - -def check_venv(bento: BentoInfo) -> bool: - return all(_check_venv(env_spec) for env_spec in _resolve_bento_env_specs(bento))