Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[core] [1/N] Validate uv options #48479

Merged
merged 9 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions python/ray/_private/runtime_env/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
load("@rules_python//python:defs.bzl", "py_library")

package(default_visibility = ["//visibility:public"])

py_library(
name = "validation",
srcs = ["validation.py"],
)
64 changes: 64 additions & 0 deletions python/ray/_private/runtime_env/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,69 @@ def parse_and_validate_conda(conda: Union[str, dict]) -> Union[str, dict]:
return result


# TODO(hjiang): More package installation options to implement:
# 1. Allow users to pass in a local requirements.txt file, which relates to all
# packages to install;
# 2. Allow specific version of `uv` to use; as of now we only use default version.
# 3. `pip_check` has different semantics for `uv` and `pip`, see
# https://github.com/astral-sh/uv/pull/2544/files, consider whether we need to support
# it; or simply ignore the field when people come from `pip`.
def parse_and_validate_uv(uv: Union[str, List[str], Dict]) -> Optional[Dict]:
"""Parses and validates a user-provided 'uv' option.

The value of the input 'uv' field can be one of two cases:
1) A List[str] describing the requirements. This is passed through.
Example usage: ["tensorflow", "requests"]
2) A python dictionary that has three fields:
a) packages (required, List[str]): a list of uv packages, it same as 1).
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what are the other 2 fields?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I don't understand, I'm trying to mimic the documentation for pip.

"""Parses and validates a user-provided 'pip' option.
The value of the input 'pip' field can be one of two cases:
1) A List[str] describing the requirements. This is passed through.
2) A string pointing to a local requirements file. In this case, the
file contents will be read split into a list.
3) A python dictionary that has three fields:
a) packages (required, List[str]): a list of pip packages, it same as 1).
b) pip_check (optional, bool): whether to enable pip check at the end of pip
install, default to False.
c) pip_version (optional, str): the version of pip, ray will spell
the package name 'pip' in front of the `pip_version` to form the final
requirement string, the syntax of a requirement specifier is defined in
full in PEP 508.
The returned parsed value will be a list of pip packages. If a Ray library
(e.g. "ray[serve]") is specified, it will be deleted and replaced by its
dependencies (e.g. "uvicorn", "requests").

Could you please tell me which part am I missing here?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean we should support pip_check field as well and possibly uv_version field.

Copy link
Contributor Author

@dentiny dentiny Nov 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

About uv_version, since the functionality is not implemented yet, I already left a TODO comment at the start of the function. IMO, only implemented feature should be officially documented.
I will definitely document it when feature implemented. Let me know if you're fine with it.

About pip_check, my concern is it doesn't have 100% compatibility between uv and pip, in some cases it would report failure, check out https://github.com/astral-sh/uv/pull/2544/files

One concrete example might be, uv's pip_check warns against multiple versions of a package, while pip version doesn't.
So I'm hesitant whether to implement it or not; but anyway I left a TODO comment at the beginning, so we don't forget to


The returned parsed value will be a list of packages. If a Ray library
(e.g. "ray[serve]") is specified, it will be deleted and replaced by its
dependencies (e.g. "uvicorn", "requests").
"""
assert uv is not None
if sys.platform == "win32":
logger.warning(
"runtime environment support is experimental on Windows. "
"If you run into issues please file a report at "
"https://github.com/ray-project/ray/issues."
)

result: str = ""
if isinstance(uv, list) and all(isinstance(dep, str) for dep in uv):
result = dict(packages=uv)
elif isinstance(uv, dict):
if set(uv.keys()) - {"packages"}:
raise ValueError(
"runtime_env['uv'] can only have these fields: "
"packages, but got: "
f"{list(uv.keys())}"
)
if "packages" not in uv:
raise ValueError(
f"runtime_env['uv'] must include field 'packages', but got {uv}"
)

result = uv.copy()
if not isinstance(uv["packages"], list):
raise ValueError(
"runtime_env['uv']['packages'] must be of type list, "
f"got: {type(uv['packages'])}"
)
else:
raise TypeError(
"runtime_env['uv'] must be of type " f"List[str], or dict, got {type(uv)}"
)

# Deduplicate packages for package lists.
result["packages"] = list(OrderedDict.fromkeys(result["packages"]))

if len(result["packages"]) == 0:
result = None
logger.debug(f"Rewrote runtime_env `uv` field from {uv} to {result}.")
return result


def parse_and_validate_pip(pip: Union[str, List[str], Dict]) -> Optional[Dict]:
"""Parses and validates a user-provided 'pip' option.

Expand Down Expand Up @@ -280,6 +343,7 @@ def parse_and_validate_env_vars(env_vars: Dict[str, str]) -> Optional[Dict[str,
"excludes": parse_and_validate_excludes,
"conda": parse_and_validate_conda,
"pip": parse_and_validate_pip,
"uv": parse_and_validate_uv,
"env_vars": parse_and_validate_env_vars,
"container": parse_and_validate_container,
}
9 changes: 9 additions & 0 deletions python/ray/tests/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -791,3 +791,12 @@ py_test_module_list(
"spark/discover_4_gpu.sh"
],
)

py_test(
name = "runtime_env_validation_unit_test",
srcs = ["runtime_env_validation_unit_test.py"],
tags = ["team:core"],
deps = [
"//python/ray/_private/runtime_env:validation",
],
)
121 changes: 121 additions & 0 deletions python/ray/tests/runtime_env_validation_unit_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# TODO(hjiang): Move conda related unit test to this file also, after addressing the `
# yaml` third-party dependency issue.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's follow what serve does here (python/ray/serve/tests/unit) and create python/ray/tests/unit. Also checkout out python/ray/serve/tests/unit/BUILD. The rule is everything under unit should have no import ray.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we take a look at the so-called unit test python/ray/serve/tests/unit, it's still importing ray, so still integration test sadly :(

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But yeah, I could make another folder for unit test only, SGTM :)

from python.ray._private.runtime_env import validation

import unittest
import os
import shutil
from pathlib import Path
import tempfile

_PIP_LIST = ["requests==1.0.0", "pip-install-test"]


class TestVaidationUv(unittest.TestCase):
def test_parse_and_validate_uv(self):
# Valid case w/o duplication.
result = validation.parse_and_validate_uv({"packages": ["tensorflow"]})
self.assertEqual(result, {"packages": ["tensorflow"]})

# Valid case w/ duplication.
result = validation.parse_and_validate_uv(
{"packages": ["tensorflow", "tensorflow"]}
)
self.assertEqual(result, {"packages": ["tensorflow"]})

# Valid case, use `list` to represent necessary packages.
result = validation.parse_and_validate_uv(
["requests==1.0.0", "aiohttp", "ray[serve]"]
)
self.assertEqual(
result, {"packages": ["requests==1.0.0", "aiohttp", "ray[serve]"]}
)

# Invalid case, `str` is not supported for now.
with self.assertRaises(TypeError) as _:
result = validation.parse_and_validate_uv("./requirements.txt")

# Invalid case, unsupport keys.
with self.assertRaises(ValueError) as _:
result = validation.parse_and_validate_uv({"random_key": "random_value"})


class TestValidatePip(unittest.TestCase):
def setUp(self):
self.tmp_dir = tempfile.mkdtemp()
path = Path(self.tmp_dir)
self.subdir = path / "subdir"
self.subdir.mkdir(parents=True)
self.requirements_file = self.subdir / "requirements.txt"
with self.requirements_file.open(mode="w") as f:
print("\n".join(_PIP_LIST), file=f)

self.old_dir = os.getcwd()
os.chdir(self.tmp_dir)

def tearDown(self):
os.chdir(self.old_dir)
shutil.rmtree(self.tmp_dir)

def test_validate_pip_invalid_types(self):
with self.assertRaises(TypeError) as _:
validation.parse_and_validate_pip(1)

with self.assertRaises(TypeError) as _:
validation.parse_and_validate_pip(True)

def test_validate_pip_invalid_path(self):
with self.assertRaises(ValueError) as _:
validation.parse_and_validate_pip("../bad_path.txt")

def test_validate_pip_valid_file(self):
result = validation.parse_and_validate_pip(str(self.requirements_file))
self.assertEqual(result["packages"], _PIP_LIST)
self.assertFalse(result["pip_check"])
self.assertFalse("pip_version" in result)

def test_validate_pip_valid_list(self):
result = validation.parse_and_validate_pip(_PIP_LIST)
self.assertEqual(result["packages"], _PIP_LIST)
self.assertFalse(result["pip_check"])
self.assertFalse("pip_version" in result)

def test_validate_ray(self):
result = validation.parse_and_validate_pip(["pkg1", "ray", "pkg2"])
self.assertEqual(result["packages"], ["pkg1", "ray", "pkg2"])
self.assertFalse(result["pip_check"])
self.assertFalse("pip_version" in result)


class TestValidateEnvVars(unittest.TestCase):
def test_type_validation(self):
with self.assertRaises(TypeError) as context:
validation.parse_and_validate_env_vars({"INT_ENV": 1})
self.assertRegex(str(context.exception), ".*Dict\\[str, str\\]*")

with self.assertRaises(TypeError) as context:
validation.parse_and_validate_env_vars({1: "hi"})
self.assertRegex(str(context.exception), ".*Dict\\[str, str\\]*")

with self.assertRaises(TypeError) as context:
validation.parse_and_validate_env_vars({"hi": 123})
self.assertRegex(
str(context.exception), ".*value 123 is of type <class 'int'>*"
)

with self.assertRaises(TypeError) as context:
validation.parse_and_validate_env_vars({"hi": True})
self.assertRegex(
str(context.exception), ".*value True is of type <class 'bool'>*"
)

with self.assertRaises(TypeError) as context:
validation.parse_and_validate_env_vars({1.23: "hi"})
self.assertRegex(
str(context.exception), ".*key 1.23 is of type <class 'float'>*"
)


if __name__ == "__main__":
unittest.main()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sys.exit(pytest.main(["-v", "-s", __file__]))? just to be consistent with other tests.

If we want to move away from pytest, it's a separate topic.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sadly, our current bazel setup cannot import pytest without any pain :(

58 changes: 0 additions & 58 deletions python/ray/tests/test_runtime_env_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
parse_and_validate_excludes,
parse_and_validate_working_dir,
parse_and_validate_conda,
parse_and_validate_pip,
parse_and_validate_env_vars,
parse_and_validate_py_modules,
)
from ray._private.runtime_env.plugin_schema_manager import RuntimeEnvPluginSchemaManager
Expand Down Expand Up @@ -175,62 +173,6 @@ def test_validate_conda_valid_dict(self):
assert parse_and_validate_conda(CONDA_DICT) == CONDA_DICT


class TestValidatePip:
def test_validate_pip_invalid_types(self):
with pytest.raises(TypeError):
parse_and_validate_pip(1)

with pytest.raises(TypeError):
parse_and_validate_pip(True)

def test_validate_pip_invalid_path(self):
with pytest.raises(ValueError):
parse_and_validate_pip("../bad_path.txt")

@pytest.mark.parametrize("absolute_path", [True, False])
def test_validate_pip_valid_file(self, test_directory, absolute_path):
_, requirements_file, _, _ = test_directory

if absolute_path:
requirements_file = requirements_file.resolve()

result = parse_and_validate_pip(str(requirements_file))
assert result["packages"] == PIP_LIST
assert not result["pip_check"]
assert "pip_version" not in result

def test_validate_pip_valid_list(self):
result = parse_and_validate_pip(PIP_LIST)
assert result["packages"] == PIP_LIST
assert not result["pip_check"]
assert "pip_version" not in result

def test_validate_ray(self):
result = parse_and_validate_pip(["pkg1", "ray", "pkg2"])
assert result["packages"] == ["pkg1", "ray", "pkg2"]
assert not result["pip_check"]
assert "pip_version" not in result


class TestValidateEnvVars:
def test_type_validation(self):
# Only strings allowed.
with pytest.raises(TypeError, match=".*Dict[str, str]*"):
parse_and_validate_env_vars({"INT_ENV": 1})

with pytest.raises(TypeError, match=".*Dict[str, str]*"):
parse_and_validate_env_vars({1: "hi"})

with pytest.raises(TypeError, match=".*value 123 is of type <class 'int'>*"):
parse_and_validate_env_vars({"hi": 123})

with pytest.raises(TypeError, match=".*value True is of type <class 'bool'>*"):
parse_and_validate_env_vars({"hi": True})

with pytest.raises(TypeError, match=".*key 1.23 is of type <class 'float'>*"):
parse_and_validate_env_vars({1.23: "hi"})


class TestParsedRuntimeEnv:
def test_empty(self):
assert RuntimeEnv() == {}
Expand Down