Skip to content

Commit

Permalink
Merge branch 'master' into 0705-get_internal_block_refs
Browse files Browse the repository at this point in the history
Signed-off-by: Scott Lee <sjl@anyscale.com>
  • Loading branch information
Scott Lee committed Jul 11, 2024
2 parents fc0a334 + 306cd9e commit 615b03a
Show file tree
Hide file tree
Showing 32 changed files with 223 additions and 101 deletions.
5 changes: 2 additions & 3 deletions .buildkite/lint.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,10 @@ steps:
- lint
key: lint-medium
instance_type: medium
depends_on:
- oss-ci-base_build
depends_on: docbuild
job_env: docbuild
commands:
- ./ci/lint/lint.sh {{matrix}}
job_env: oss-ci-base_build
matrix:
- api_annotations
- api_policy_check data
Expand Down
5 changes: 5 additions & 0 deletions .buildkite/release-automation/pre_release.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ group: Pre-release checks
depends_on:
- forge
steps:
- block: "Run pre-release checks"
if: build.env("RAY_VERSION") != null
key: block-pre-release-checks
depends_on: []

- label: "Check release blockers"
key: check-release-blockers
instance_type: small_branch
Expand Down
2 changes: 1 addition & 1 deletion SECURITY.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Security Policy

If you are deploying Ray, read [Security](ray-security/index.html).
If you are deploying Ray, read [Security](doc/source/ray-security/index.md).

## Reporting a vulnerability

Expand Down
3 changes: 3 additions & 0 deletions ci/lint/lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ api_annotations() {
}

api_policy_check() {
# install ray and compile doc to generate API files
RAY_DISABLE_EXTRA_CPP=1 pip install -e "python[all]"
make -C doc/ html
# validate the API files
bazel run //ci/ray_ci/doc:cmd_check_api_discrepancy -- /ray "$@"
}

Expand Down
58 changes: 48 additions & 10 deletions ci/ray_ci/doc/autodoc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import re
from typing import List
from typing import List, Set

from ci.ray_ci.doc.api import (
API,
Expand All @@ -11,6 +11,7 @@

_SPHINX_CURRENTMODULE_HEADER = ".. currentmodule::"
_SPHINX_TOCTREE_HEADER = ".. toctree::"
_SPHINX_INCLUDE_HEADER = ".. include::"


class Autodoc:
Expand Down Expand Up @@ -42,26 +43,60 @@ def walk(self) -> None:
for rst in rsts:
self._apis.extend(self._parse_autodoc_rst(rst))

def _get_autodoc_rsts(self) -> List[str]:
def _get_autodoc_rsts(self) -> Set[str]:
"""
Recursively parse the head_rst_file to find all the autodoc rsts
"""
if self._autodoc_rsts is not None:
return self._autodoc_rsts

self._autodoc_rsts = {self._head_rst_file}
visit_current = {self._head_rst_file}
while visit_current:
visit_next = set()
for rst in visit_current:
for child_rst in self._get_autodoc_rsts_in_file(rst):
if child_rst not in self._autodoc_rsts:
self._autodoc_rsts.add(child_rst)
visit_next.add(child_rst)
visit_current = visit_next

return self._autodoc_rsts

def _get_autodoc_rsts_in_file(self, rst_file: str) -> Set[str]:
"""
Parse the list of rst declared in the head_rst_file, for example:
.. include:: area_00.rst
.. toctree::
:option
area_01.rst
area_02.rst
"""
if self._autodoc_rsts is not None:
return self._autodoc_rsts
if not os.path.exists(rst_file):
return set()

dir = os.path.dirname(self._head_rst_file)
self._autodoc_rsts = [self._head_rst_file]
with open(self._head_rst_file, "r") as f:
rsts = set()
dir = os.path.dirname(rst_file)
with open(rst_file, "r") as f:
line = f.readline()
while line:
line = line.strip()

# look for the include block
if line.startswith(_SPHINX_INCLUDE_HEADER):
rsts.add(
os.path.join(
dir, line.removeprefix(_SPHINX_INCLUDE_HEADER).strip()
)
)
line = f.readline()
continue

# look for the toctree block
if not line.strip() == _SPHINX_TOCTREE_HEADER:
if not line == _SPHINX_TOCTREE_HEADER:
line = f.readline()
continue

Expand All @@ -73,10 +108,10 @@ def _get_autodoc_rsts(self) -> List[str]:
# the line is not empty and not starting with empty space
break
if line.strip().endswith(".rst"):
self._autodoc_rsts.append(os.path.join(dir, line.strip()))
rsts.add(os.path.join(dir, line.strip()))
line = f.readline()

return self._autodoc_rsts
return rsts

def _parse_autodoc_rst(self, rst_file: str) -> List[API]:
"""
Expand All @@ -92,6 +127,9 @@ def _parse_autodoc_rst(self, rst_file: str) -> List[API]:
myclass.myfunc_01
myclass.myfunc_02
"""
if not os.path.exists(rst_file):
return []

apis = []
module = None
with open(rst_file, "r") as f:
Expand Down
8 changes: 0 additions & 8 deletions ci/ray_ci/doc/cmd_check_api_discrepancy.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,6 @@
"ray.data.dataset.MaterializedDataset",
# special case where we cannot deprecate although we want to
"ray.data.random_access_dataset.RandomAccessDataset",
# TODO(can): apis that are auto-generated so falsely classified as not
# documented with the current logic
"ray.data.iterator.DataIterator",
"ray.data.iterator.DataIterator.iter_batches",
"ray.data.iterator.DataIterator.iter_torch_batches",
"ray.data.iterator.DataIterator.to_tf",
"ray.data.iterator.DataIterator.materialize",
"ray.data.iterator.DataIterator.stats",
},
},
}
Expand Down
40 changes: 30 additions & 10 deletions ci/ray_ci/doc/test_autodoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,48 +15,68 @@ def test_walk():
f.write("\tapi_01.rst\n")
f.write("\tapi_02.rst\n")
with open(os.path.join(tmp, "api_01.rst"), "w") as f:
f.write(".. include:: api_03.rst\n")
f.write(".. currentmodule:: ci.ray_ci.doc\n")
f.write(".. autosummary::\n")
f.write("\t~mock.mock_function\n")
f.write("\tmock.mock_module.mock_w00t\n")
with open(os.path.join(tmp, "api_02.rst"), "w") as f:
f.write(".. currentmodule:: ci.ray_ci.doc.mock\n")
f.write(".. autoclass:: MockClass\n")
with open(os.path.join(tmp, "api_03.rst"), "w") as f:
f.write(".. currentmodule:: ci.ray_ci.doc\n")
f.write(".. autosummary::\n")
f.write("\t~mock.mock_function\n")

autodoc = Autodoc(os.path.join(tmp, "head.rst"))
apis = autodoc.get_apis()
apis = sorted(autodoc.get_apis(), key=lambda x: x.name)
assert str(apis) == str(
[
API(
name="ci.ray_ci.doc.mock.mock_function",
name="ci.ray_ci.doc.mock.MockClass",
annotation_type=AnnotationType.PUBLIC_API,
code_type=CodeType.FUNCTION,
code_type=CodeType.CLASS,
),
API(
name="ci.ray_ci.doc.mock.mock_module.mock_w00t",
name="ci.ray_ci.doc.mock.mock_function",
annotation_type=AnnotationType.PUBLIC_API,
code_type=CodeType.FUNCTION,
),
API(
name="ci.ray_ci.doc.mock.MockClass",
name="ci.ray_ci.doc.mock.mock_module.mock_w00t",
annotation_type=AnnotationType.PUBLIC_API,
code_type=CodeType.CLASS,
code_type=CodeType.FUNCTION,
),
]
)
assert (
apis[0].get_canonical_name()
== f"{mock_function.__module__}.{mock_function.__qualname__}"
== f"{MockClass.__module__}.{MockClass.__qualname__}"
)
assert (
apis[1].get_canonical_name()
== f"{mock_w00t.__module__}.{mock_w00t.__qualname__}"
== f"{mock_function.__module__}.{mock_function.__qualname__}"
)
assert (
apis[2].get_canonical_name()
== f"{MockClass.__module__}.{MockClass.__qualname__}"
== f"{mock_w00t.__module__}.{mock_w00t.__qualname__}"
)


def test_get_autodoc_rsts_in_file():
with tempfile.TemporaryDirectory() as tmp:
with open(os.path.join(tmp, "head.rst"), "w") as f:
f.write(".. include:: api_00.rst\n")
f.write(".. toctree::\n\n")
f.write("\tapi_01.rst\n")
f.write("\tapi_02.rst\n")

autodoc = Autodoc("head.rst")
sorted(autodoc._get_autodoc_rsts_in_file(os.path.join(tmp, "head.rst"))) == {
os.path.join(tmp, "api_00.rst"),
os.path.join(tmp, "api_01.rst"),
os.path.join(tmp, "api_02.rst"),
}


if __name__ == "__main__":
sys.exit(pytest.main(["-vv", __file__]))
2 changes: 1 addition & 1 deletion doc/source/_templates/extrahead.html
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
j.async = true;
j.src = 'https://www.googletagmanager.com/gtm.js?id=' + i + dl;
f.parentNode.insertBefore(j, f);
})(window, document, 'script', 'dataLayer', 'GTM-TQX5VVP7');
})(window, document, 'script', 'dataLayer', 'GTM-P8H6KQG');
</script>
<!-- End Google Tag Manager -->
<!-- Data to be shared with JS on every page -->
Expand Down
2 changes: 1 addition & 1 deletion doc/source/ray-core/starting-ray.rst
Original file line number Diff line number Diff line change
Expand Up @@ -260,4 +260,4 @@ Note that the machine calling ``ray up`` will not be considered as part of the R
What's next?
------------

Check out our `Deployment section <cluster/index.html>`_ for more information on deploying Ray in different settings, including Kubernetes, YARN, and SLURM.
Check out our `Deployment section <../cluster/getting-started.html>`_ for more information on deploying Ray in different settings, including `Kubernetes <../cluster/kubernetes/index.html>`_, `YARN <../cluster/vms/user-guides/community/yarn.html>`_, and `SLURM <../cluster/vms/user-guides/community/slurm.html>`_.
10 changes: 9 additions & 1 deletion doc/source/serve/advanced-guides/performance.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,15 @@ proper backpressure. You can increase the value in the deployment decorator; e.g

By default, Serve lets client HTTP requests run to completion no matter how long they take. However, slow requests could bottleneck the replica processing, blocking other requests that are waiting. Set an end-to-end timeout, so slow requests can be terminated and retried.

You can set an end-to-end timeout for HTTP requests by setting the `request_timeout_s` in the `http_options` field of the Serve config. HTTP Proxies wait for that many seconds before terminating an HTTP request. This config is global to your Ray cluster, and it can't be updated during runtime. Use [client-side retries](serve-best-practices-http-requests) to retry requests that time out due to transient failures.
You can set an end-to-end timeout for HTTP requests by setting the `request_timeout_s` parameter
in the `http_options` field of the Serve config. HTTP Proxies wait for that many
seconds before terminating an HTTP request. This config is global to your Ray cluster,
and you can't update it during runtime. Use [client-side retries](serve-best-practices-http-requests)
to retry requests that time out due to transient failures.

:::{note}
Serve returns a response with status code `408` when a request times out. Clients can retry when they receive this `408` response.
:::

### Give the Serve Controller more time to process requests

Expand Down
10 changes: 7 additions & 3 deletions python/ray/_private/runtime_env/uri_cache.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from typing import Set, Callable
from typing import Set, Callable, Optional

default_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -29,14 +29,18 @@ class URICache:

def __init__(
self,
delete_fn: Callable[[str, logging.Logger], int] = lambda uri, logger: 0,
delete_fn: Optional[Callable[[str, logging.Logger], int]] = None,
max_total_size_bytes: int = DEFAULT_MAX_URI_CACHE_SIZE_BYTES,
debug_mode: bool = False,
):
# Maps URIs to the size in bytes of their corresponding disk contents.
self._used_uris: Set[str] = set()
self._unused_uris: Set[str] = set()
self._delete_fn = delete_fn

if delete_fn is None:
self._delete_fn = lambda uri, logger: 0
else:
self._delete_fn = delete_fn

# Total size of both used and unused URIs in the cache.
self._total_size_bytes = 0
Expand Down
7 changes: 4 additions & 3 deletions python/ray/autoscaler/_private/load_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@ def add_resources(dict1: Dict[str, float], dict2: Dict[str, float]) -> Dict[str,
return new_dict


def freq_of_dicts(
dicts: List[Dict], serializer=lambda d: frozenset(d.items()), deserializer=dict
) -> DictCount:
def freq_of_dicts(dicts: List[Dict], serializer=None, deserializer=dict) -> DictCount:
"""Count a list of dictionaries (or unhashable types).
This is somewhat annoying because mutable data structures aren't hashable,
Expand All @@ -53,6 +51,9 @@ def freq_of_dicts(
is a tuple containing a unique entry from `dicts` and its
corresponding frequency count.
"""
if serializer is None:
serializer = lambda d: frozenset(d.items()) # noqa: E731

freqs = Counter(serializer(d) for d in dicts)
as_list = []
for as_set, count in freqs.items():
Expand Down
5 changes: 3 additions & 2 deletions python/ray/data/_internal/arrow_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,11 +278,12 @@ def to_numpy(
columns = [columns]
should_be_single_ndarray = True

column_names_set = set(self._table.column_names)
for column in columns:
if column not in self._table.column_names:
if column not in column_names_set:
raise ValueError(
f"Cannot find column {column}, available columns: "
f"{self._table.column_names}"
f"{column_names_set}"
)

arrays = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def __init__(
self._started = False
self._in_task_submission_backpressure = False
self._metrics = OpRuntimeMetrics(self)
self._estimated_output_blocks = None
self._estimated_num_output_bundles = None
self._execution_completed = False

def __reduce__(self):
Expand Down Expand Up @@ -269,8 +269,8 @@ def num_outputs_total(self) -> int:
The value returned may be an estimate based off the consumption so far.
This is useful for reporting progress.
"""
if self._estimated_output_blocks is not None:
return self._estimated_output_blocks
if self._estimated_num_output_bundles is not None:
return self._estimated_num_output_bundles
if len(self.input_dependencies) == 1:
return self.input_dependencies[0].num_outputs_total()
raise AttributeError
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def __init__(
assert input_data_factory is not None
self._input_data_factory = input_data_factory
self._is_input_initialized = False
self._num_output_blocks = num_output_blocks
self._input_data_index = 0
super().__init__("Input", [], target_max_block_size=None)

Expand All @@ -67,11 +66,8 @@ def _get_next_inner(self) -> RefBundle:
self._input_data_index += 1
return bundle

def _set_num_output_blocks(self, num_output_blocks):
self._num_output_blocks = num_output_blocks

def num_outputs_total(self) -> int:
return self._num_output_blocks or self._num_output_bundles
return self._num_output_bundles

def get_stats(self) -> StatsDict:
return {}
Expand Down
Loading

0 comments on commit 615b03a

Please sign in to comment.