Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[core] [1/N] Validate uv options #48479

Merged
merged 9 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions python/ray/_private/runtime_env/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
load("@rules_python//python:defs.bzl", "py_library")

package(default_visibility = ["//visibility:public"])

py_library(
name = "validation",
srcs = ["validation.py"],
)
64 changes: 64 additions & 0 deletions python/ray/_private/runtime_env/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,69 @@ def parse_and_validate_conda(conda: Union[str, dict]) -> Union[str, dict]:
return result


# TODO(hjiang): More package installation options to implement:
# 1. Allow users to pass in a local requirements.txt file, which relates to all
# packages to install;
# 2. Allow specific version of `uv` to use; as of now we only use default version.
# 3. `pip_check` has different semantics for `uv` and `pip`, see
# https://github.com/astral-sh/uv/pull/2544/files, consider whether we need to support
# it; or simply ignore the field when people come from `pip`.
def parse_and_validate_uv(uv: Union[str, List[str], Dict]) -> Optional[Dict]:
"""Parses and validates a user-provided 'uv' option.

The value of the input 'uv' field can be one of two cases:
1) A List[str] describing the requirements. This is passed through.
Example usage: ["tensorflow", "requests"]
2) A python dictionary that has three fields:
a) packages (required, List[str]): a list of uv packages, it same as 1).
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what are the other 2 fields?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I don't understand, I'm trying to mimic the documentation for pip.

"""Parses and validates a user-provided 'pip' option.
The value of the input 'pip' field can be one of two cases:
1) A List[str] describing the requirements. This is passed through.
2) A string pointing to a local requirements file. In this case, the
file contents will be read split into a list.
3) A python dictionary that has three fields:
a) packages (required, List[str]): a list of pip packages, it same as 1).
b) pip_check (optional, bool): whether to enable pip check at the end of pip
install, default to False.
c) pip_version (optional, str): the version of pip, ray will spell
the package name 'pip' in front of the `pip_version` to form the final
requirement string, the syntax of a requirement specifier is defined in
full in PEP 508.
The returned parsed value will be a list of pip packages. If a Ray library
(e.g. "ray[serve]") is specified, it will be deleted and replaced by its
dependencies (e.g. "uvicorn", "requests").

Could you please tell me which part am I missing here?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean we should support pip_check field as well and possibly uv_version field.

Copy link
Contributor Author

@dentiny dentiny Nov 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

About uv_version, since the functionality is not implemented yet, I already left a TODO comment at the start of the function. IMO, only implemented feature should be officially documented.
I will definitely document it when feature implemented. Let me know if you're fine with it.

About pip_check, my concern is it doesn't have 100% compatibility between uv and pip, in some cases it would report failure, check out https://github.com/astral-sh/uv/pull/2544/files

One concrete example might be, uv's pip_check warns against multiple versions of a package, while pip version doesn't.
So I'm hesitant whether to implement it or not; but anyway I left a TODO comment at the beginning, so we don't forget to


The returned parsed value will be a list of packages. If a Ray library
(e.g. "ray[serve]") is specified, it will be deleted and replaced by its
dependencies (e.g. "uvicorn", "requests").
"""
assert uv is not None
if sys.platform == "win32":
logger.warning(
"runtime environment support is experimental on Windows. "
"If you run into issues please file a report at "
"https://github.com/ray-project/ray/issues."
)

result: str = ""
if isinstance(uv, list) and all(isinstance(dep, str) for dep in uv):
result = dict(packages=uv)
elif isinstance(uv, dict):
if set(uv.keys()) - {"packages"}:
raise ValueError(
"runtime_env['uv'] can only have these fields: "
"packages, but got: "
f"{list(uv.keys())}"
)
if "packages" not in uv:
raise ValueError(
f"runtime_env['uv'] must include field 'packages', but got {uv}"
)

result = uv.copy()
if not isinstance(uv["packages"], list):
raise ValueError(
"runtime_env['uv']['packages'] must be of type list, "
f"got: {type(uv['packages'])}"
)
else:
raise TypeError(
"runtime_env['uv'] must be of type " f"List[str], or dict, got {type(uv)}"
)

# Deduplicate packages for package lists.
result["packages"] = list(OrderedDict.fromkeys(result["packages"]))

if len(result["packages"]) == 0:
result = None
logger.debug(f"Rewrote runtime_env `uv` field from {uv} to {result}.")
return result


def parse_and_validate_pip(pip: Union[str, List[str], Dict]) -> Optional[Dict]:
"""Parses and validates a user-provided 'pip' option.

Expand Down Expand Up @@ -280,6 +343,7 @@ def parse_and_validate_env_vars(env_vars: Dict[str, str]) -> Optional[Dict[str,
"excludes": parse_and_validate_excludes,
"conda": parse_and_validate_conda,
"pip": parse_and_validate_pip,
"uv": parse_and_validate_uv,
"env_vars": parse_and_validate_env_vars,
"container": parse_and_validate_container,
}
58 changes: 0 additions & 58 deletions python/ray/tests/test_runtime_env_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
parse_and_validate_excludes,
parse_and_validate_working_dir,
parse_and_validate_conda,
parse_and_validate_pip,
parse_and_validate_env_vars,
parse_and_validate_py_modules,
)
from ray._private.runtime_env.plugin_schema_manager import RuntimeEnvPluginSchemaManager
Expand Down Expand Up @@ -175,62 +173,6 @@ def test_validate_conda_valid_dict(self):
assert parse_and_validate_conda(CONDA_DICT) == CONDA_DICT


class TestValidatePip:
def test_validate_pip_invalid_types(self):
with pytest.raises(TypeError):
parse_and_validate_pip(1)

with pytest.raises(TypeError):
parse_and_validate_pip(True)

def test_validate_pip_invalid_path(self):
with pytest.raises(ValueError):
parse_and_validate_pip("../bad_path.txt")

@pytest.mark.parametrize("absolute_path", [True, False])
def test_validate_pip_valid_file(self, test_directory, absolute_path):
_, requirements_file, _, _ = test_directory

if absolute_path:
requirements_file = requirements_file.resolve()

result = parse_and_validate_pip(str(requirements_file))
assert result["packages"] == PIP_LIST
assert not result["pip_check"]
assert "pip_version" not in result

def test_validate_pip_valid_list(self):
result = parse_and_validate_pip(PIP_LIST)
assert result["packages"] == PIP_LIST
assert not result["pip_check"]
assert "pip_version" not in result

def test_validate_ray(self):
result = parse_and_validate_pip(["pkg1", "ray", "pkg2"])
assert result["packages"] == ["pkg1", "ray", "pkg2"]
assert not result["pip_check"]
assert "pip_version" not in result


class TestValidateEnvVars:
def test_type_validation(self):
# Only strings allowed.
with pytest.raises(TypeError, match=".*Dict[str, str]*"):
parse_and_validate_env_vars({"INT_ENV": 1})

with pytest.raises(TypeError, match=".*Dict[str, str]*"):
parse_and_validate_env_vars({1: "hi"})

with pytest.raises(TypeError, match=".*value 123 is of type <class 'int'>*"):
parse_and_validate_env_vars({"hi": 123})

with pytest.raises(TypeError, match=".*value True is of type <class 'bool'>*"):
parse_and_validate_env_vars({"hi": True})

with pytest.raises(TypeError, match=".*key 1.23 is of type <class 'float'>*"):
parse_and_validate_env_vars({1.23: "hi"})


class TestParsedRuntimeEnv:
def test_empty(self):
assert RuntimeEnv() == {}
Expand Down
8 changes: 8 additions & 0 deletions python/ray/tests/unit/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
py_test(
name = "test_runtime_env_validation",
srcs = ["test_runtime_env_validation.py"],
tags = ["team:core"],
deps = [
"//python/ray/_private/runtime_env:validation",
],
)
115 changes: 115 additions & 0 deletions python/ray/tests/unit/test_runtime_env_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# TODO(hjiang): Move conda related unit test to this file also, after addressing the `
# yaml` third-party dependency issue.

from ray._private.runtime_env import validation

import os
from pathlib import Path
import tempfile
import pytest
import sys

_PIP_LIST = ["requests==1.0.0", "pip-install-test"]


@pytest.fixture
def test_directory():
with tempfile.TemporaryDirectory() as tmp_dir:
path = Path(tmp_dir)
subdir = path / "subdir"
subdir.mkdir(parents=True)
requirements_file = subdir / "requirements.txt"
with requirements_file.open(mode="w") as f:
print("\n".join(_PIP_LIST), file=f)

old_dir = os.getcwd()
os.chdir(tmp_dir)
yield subdir, requirements_file
os.chdir(old_dir)


class TestVaidationUv:
def test_parse_and_validate_uv(self):
# Valid case w/o duplication.
result = validation.parse_and_validate_uv({"packages": ["tensorflow"]})
assert result == {"packages": ["tensorflow"]}

# Valid case w/ duplication.
result = validation.parse_and_validate_uv(
{"packages": ["tensorflow", "tensorflow"]}
)
assert result == {"packages": ["tensorflow"]}

# Valid case, use `list` to represent necessary packages.
result = validation.parse_and_validate_uv(
["requests==1.0.0", "aiohttp", "ray[serve]"]
)
assert result == {"packages": ["requests==1.0.0", "aiohttp", "ray[serve]"]}

# Invalid case, `str` is not supported for now.
with pytest.raises(TypeError):
result = validation.parse_and_validate_uv("./requirements.txt")

# Invalid case, unsupport keys.
with pytest.raises(ValueError):
result = validation.parse_and_validate_uv({"random_key": "random_value"})


class TestValidatePip:
def test_validate_pip_invalid_types(self):
with pytest.raises(TypeError):
validation.parse_and_validate_pip(1)

with pytest.raises(TypeError):
validation.parse_and_validate_pip(True)

def test_validate_pip_invalid_path(self):
with pytest.raises(ValueError):
validation.parse_and_validate_pip("../bad_path.txt")

@pytest.mark.parametrize("absolute_path", [True, False])
def test_validate_pip_valid_file(self, test_directory, absolute_path):
_, requirements_file = test_directory

if absolute_path:
requirements_file = requirements_file.resolve()

result = validation.parse_and_validate_pip(str(requirements_file))
assert result["packages"] == _PIP_LIST
assert not result["pip_check"]
assert "pip_version" not in result

def test_validate_pip_valid_list(self):
result = validation.parse_and_validate_pip(_PIP_LIST)
assert result["packages"] == _PIP_LIST
assert not result["pip_check"]
assert "pip_version" not in result

def test_validate_ray(self):
result = validation.parse_and_validate_pip(["pkg1", "ray", "pkg2"])
assert result["packages"] == ["pkg1", "ray", "pkg2"]
assert not result["pip_check"]
assert "pip_version" not in result


class TestValidateEnvVars:
def test_type_validation(self):
# Only strings allowed.
with pytest.raises(TypeError, match=".*Dict[str, str]*"):
validation.parse_and_validate_env_vars({"INT_ENV": 1})

with pytest.raises(TypeError, match=".*Dict[str, str]*"):
validation.parse_and_validate_env_vars({1: "hi"})

with pytest.raises(TypeError, match=".*value 123 is of type <class 'int'>*"):
validation.parse_and_validate_env_vars({"hi": 123})

with pytest.raises(TypeError, match=".*value True is of type <class 'bool'>*"):
validation.parse_and_validate_env_vars({"hi": True})

with pytest.raises(TypeError, match=".*key 1.23 is of type <class 'float'>*"):
validation.parse_and_validate_env_vars({1.23: "hi"})


if __name__ == "__main__":
sys.exit(pytest.main(["-vv", __file__]))
Loading