Merge branch 'master' into 0705-get_internal_block_refs

Signed-off-by: Scott Lee <sjl@anyscale.com>
ray-project · Jul 11, 2024 · 615b03a · 615b03a
2 parents fc0a334 + 306cd9e
commit 615b03a
Show file tree

Hide file tree

Showing 32 changed files with 223 additions and 101 deletions.
diff --git a/.buildkite/lint.rayci.yml b/.buildkite/lint.rayci.yml
@@ -26,11 +26,10 @@ steps:
       - lint
     key: lint-medium
     instance_type: medium
-    depends_on:
-      - oss-ci-base_build
+    depends_on: docbuild
+    job_env: docbuild
     commands:
       - ./ci/lint/lint.sh {{matrix}}
-    job_env: oss-ci-base_build
     matrix:
       - api_annotations
       - api_policy_check data

diff --git a/.buildkite/release-automation/pre_release.rayci.yml b/.buildkite/release-automation/pre_release.rayci.yml
@@ -2,6 +2,11 @@ group: Pre-release checks
 depends_on:
   - forge
 steps:
+  - block: "Run pre-release checks"
+    if: build.env("RAY_VERSION") != null
+    key: block-pre-release-checks
+    depends_on: []
+
   - label: "Check release blockers"
     key: check-release-blockers
     instance_type: small_branch

diff --git a/SECURITY.md b/SECURITY.md
@@ -1,6 +1,6 @@
 # Security Policy
 
-If you are deploying Ray, read [Security](ray-security/index.html).
+If you are deploying Ray, read [Security](doc/source/ray-security/index.md).
 
 ## Reporting a vulnerability
 

diff --git a/ci/lint/lint.sh b/ci/lint/lint.sh
@@ -63,7 +63,10 @@ api_annotations() {
 }
 
 api_policy_check() {
+  # install ray and compile doc to generate API files
   RAY_DISABLE_EXTRA_CPP=1 pip install -e "python[all]"
+  make -C doc/ html
+  # validate the API files
   bazel run //ci/ray_ci/doc:cmd_check_api_discrepancy -- /ray "$@"
 }
 

diff --git a/ci/ray_ci/doc/autodoc.py b/ci/ray_ci/doc/autodoc.py
@@ -1,6 +1,6 @@
 import os
 import re
-from typing import List
+from typing import List, Set
 
 from ci.ray_ci.doc.api import (
     API,
@@ -11,6 +11,7 @@
 
 _SPHINX_CURRENTMODULE_HEADER = ".. currentmodule::"
 _SPHINX_TOCTREE_HEADER = ".. toctree::"
+_SPHINX_INCLUDE_HEADER = ".. include::"
 
 
 class Autodoc:
@@ -42,26 +43,60 @@ def walk(self) -> None:
         for rst in rsts:
             self._apis.extend(self._parse_autodoc_rst(rst))
 
-    def _get_autodoc_rsts(self) -> List[str]:
+    def _get_autodoc_rsts(self) -> Set[str]:
+        """
+        Recursively parse the head_rst_file to find all the autodoc rsts
+        """
+        if self._autodoc_rsts is not None:
+            return self._autodoc_rsts
+
+        self._autodoc_rsts = {self._head_rst_file}
+        visit_current = {self._head_rst_file}
+        while visit_current:
+            visit_next = set()
+            for rst in visit_current:
+                for child_rst in self._get_autodoc_rsts_in_file(rst):
+                    if child_rst not in self._autodoc_rsts:
+                        self._autodoc_rsts.add(child_rst)
+                        visit_next.add(child_rst)
+            visit_current = visit_next
+
+        return self._autodoc_rsts
+
+    def _get_autodoc_rsts_in_file(self, rst_file: str) -> Set[str]:
         """
         Parse the list of rst declared in the head_rst_file, for example:
 
+        .. include:: area_00.rst
+
         .. toctree::
             :option
 
             area_01.rst
             area_02.rst
         """
-        if self._autodoc_rsts is not None:
-            return self._autodoc_rsts
+        if not os.path.exists(rst_file):
+            return set()
 
-        dir = os.path.dirname(self._head_rst_file)
-        self._autodoc_rsts = [self._head_rst_file]
-        with open(self._head_rst_file, "r") as f:
+        rsts = set()
+        dir = os.path.dirname(rst_file)
+        with open(rst_file, "r") as f:
             line = f.readline()
             while line:
+                line = line.strip()
+
+                # look for the include block
+                if line.startswith(_SPHINX_INCLUDE_HEADER):
+                    rsts.add(
+                        os.path.join(
+                            dir, line.removeprefix(_SPHINX_INCLUDE_HEADER).strip()
+                        )
+                    )
+                    line = f.readline()
+                    continue
+
                 # look for the toctree block
-                if not line.strip() == _SPHINX_TOCTREE_HEADER:
+                if not line == _SPHINX_TOCTREE_HEADER:
                     line = f.readline()
                     continue
 
@@ -73,10 +108,10 @@ def _get_autodoc_rsts(self) -> List[str]:
                         # the line is not empty and not starting with empty space
                         break
                     if line.strip().endswith(".rst"):
-                        self._autodoc_rsts.append(os.path.join(dir, line.strip()))
+                        rsts.add(os.path.join(dir, line.strip()))
                     line = f.readline()
 
-        return self._autodoc_rsts
+        return rsts
 
     def _parse_autodoc_rst(self, rst_file: str) -> List[API]:
         """
@@ -92,6 +127,9 @@ def _parse_autodoc_rst(self, rst_file: str) -> List[API]:
             myclass.myfunc_01
             myclass.myfunc_02
         """
+        if not os.path.exists(rst_file):
+            return []
+
         apis = []
         module = None
         with open(rst_file, "r") as f:

diff --git a/ci/ray_ci/doc/cmd_check_api_discrepancy.py b/ci/ray_ci/doc/cmd_check_api_discrepancy.py
@@ -16,14 +16,6 @@
             "ray.data.dataset.MaterializedDataset",
             # special case where we cannot deprecate although we want to
             "ray.data.random_access_dataset.RandomAccessDataset",
-            # TODO(can): apis that are auto-generated so falsely classified as not
-            # documented with the current logic
-            "ray.data.iterator.DataIterator",
-            "ray.data.iterator.DataIterator.iter_batches",
-            "ray.data.iterator.DataIterator.iter_torch_batches",
-            "ray.data.iterator.DataIterator.to_tf",
-            "ray.data.iterator.DataIterator.materialize",
-            "ray.data.iterator.DataIterator.stats",
         },
     },
 }

diff --git a/ci/ray_ci/doc/test_autodoc.py b/ci/ray_ci/doc/test_autodoc.py
@@ -15,48 +15,68 @@ def test_walk():
             f.write("\tapi_01.rst\n")
             f.write("\tapi_02.rst\n")
         with open(os.path.join(tmp, "api_01.rst"), "w") as f:
+            f.write(".. include:: api_03.rst\n")
             f.write(".. currentmodule:: ci.ray_ci.doc\n")
             f.write(".. autosummary::\n")
-            f.write("\t~mock.mock_function\n")
             f.write("\tmock.mock_module.mock_w00t\n")
         with open(os.path.join(tmp, "api_02.rst"), "w") as f:
             f.write(".. currentmodule:: ci.ray_ci.doc.mock\n")
             f.write(".. autoclass:: MockClass\n")
+        with open(os.path.join(tmp, "api_03.rst"), "w") as f:
+            f.write(".. currentmodule:: ci.ray_ci.doc\n")
+            f.write(".. autosummary::\n")
+            f.write("\t~mock.mock_function\n")
 
         autodoc = Autodoc(os.path.join(tmp, "head.rst"))
-        apis = autodoc.get_apis()
+        apis = sorted(autodoc.get_apis(), key=lambda x: x.name)
         assert str(apis) == str(
             [
                 API(
-                    name="ci.ray_ci.doc.mock.mock_function",
+                    name="ci.ray_ci.doc.mock.MockClass",
                     annotation_type=AnnotationType.PUBLIC_API,
-                    code_type=CodeType.FUNCTION,
+                    code_type=CodeType.CLASS,
                 ),
                 API(
-                    name="ci.ray_ci.doc.mock.mock_module.mock_w00t",
+                    name="ci.ray_ci.doc.mock.mock_function",
                     annotation_type=AnnotationType.PUBLIC_API,
                     code_type=CodeType.FUNCTION,
                 ),
                 API(
-                    name="ci.ray_ci.doc.mock.MockClass",
+                    name="ci.ray_ci.doc.mock.mock_module.mock_w00t",
                     annotation_type=AnnotationType.PUBLIC_API,
-                    code_type=CodeType.CLASS,
+                    code_type=CodeType.FUNCTION,
                 ),
             ]
         )
         assert (
             apis[0].get_canonical_name()
-            == f"{mock_function.__module__}.{mock_function.__qualname__}"
+            == f"{MockClass.__module__}.{MockClass.__qualname__}"
         )
         assert (
             apis[1].get_canonical_name()
-            == f"{mock_w00t.__module__}.{mock_w00t.__qualname__}"
+            == f"{mock_function.__module__}.{mock_function.__qualname__}"
         )
         assert (
             apis[2].get_canonical_name()
-            == f"{MockClass.__module__}.{MockClass.__qualname__}"
+            == f"{mock_w00t.__module__}.{mock_w00t.__qualname__}"
         )
 
 
+def test_get_autodoc_rsts_in_file():
+    with tempfile.TemporaryDirectory() as tmp:
+        with open(os.path.join(tmp, "head.rst"), "w") as f:
+            f.write(".. include:: api_00.rst\n")
+            f.write(".. toctree::\n\n")
+            f.write("\tapi_01.rst\n")
+            f.write("\tapi_02.rst\n")
+
+        autodoc = Autodoc("head.rst")
+        sorted(autodoc._get_autodoc_rsts_in_file(os.path.join(tmp, "head.rst"))) == {
+            os.path.join(tmp, "api_00.rst"),
+            os.path.join(tmp, "api_01.rst"),
+            os.path.join(tmp, "api_02.rst"),
+        }
+
+
 if __name__ == "__main__":
     sys.exit(pytest.main(["-vv", __file__]))
diff --git a/doc/source/_templates/extrahead.html b/doc/source/_templates/extrahead.html
@@ -53,7 +53,7 @@
     j.async = true;
     j.src = 'https://www.googletagmanager.com/gtm.js?id=' + i + dl;
     f.parentNode.insertBefore(j, f);
-  })(window, document, 'script', 'dataLayer', 'GTM-TQX5VVP7');
+  })(window, document, 'script', 'dataLayer', 'GTM-P8H6KQG');
 </script>
 <!-- End Google Tag Manager -->
 <!-- Data to be shared with JS on every page -->

diff --git a/doc/source/ray-core/starting-ray.rst b/doc/source/ray-core/starting-ray.rst
@@ -260,4 +260,4 @@ Note that the machine calling ``ray up`` will not be considered as part of the R
 What's next?
 ------------
 
-Check out our `Deployment section <cluster/index.html>`_ for more information on deploying Ray in different settings, including Kubernetes, YARN, and SLURM.
+Check out our `Deployment section <../cluster/getting-started.html>`_ for more information on deploying Ray in different settings, including `Kubernetes <../cluster/kubernetes/index.html>`_, `YARN <../cluster/vms/user-guides/community/yarn.html>`_, and `SLURM <../cluster/vms/user-guides/community/slurm.html>`_.
diff --git a/doc/source/serve/advanced-guides/performance.md b/doc/source/serve/advanced-guides/performance.md
@@ -55,7 +55,15 @@ proper backpressure. You can increase the value in the deployment decorator; e.g
 
 By default, Serve lets client HTTP requests run to completion no matter how long they take. However, slow requests could bottleneck the replica processing, blocking other requests that are waiting. Set an end-to-end timeout, so slow requests can be terminated and retried.
 
-You can set an end-to-end timeout for HTTP requests by setting the `request_timeout_s` in the `http_options` field of the Serve config. HTTP Proxies wait for that many seconds before terminating an HTTP request. This config is global to your Ray cluster, and it can't be updated during runtime. Use [client-side retries](serve-best-practices-http-requests) to retry requests that time out due to transient failures.
+You can set an end-to-end timeout for HTTP requests by setting the `request_timeout_s` parameter
+in the `http_options` field of the Serve config. HTTP Proxies wait for that many
+seconds before terminating an HTTP request. This config is global to your Ray cluster,
+and you can't update it during runtime. Use [client-side retries](serve-best-practices-http-requests)
+to retry requests that time out due to transient failures.
+
+:::{note}
+Serve returns a response with status code `408` when a request times out. Clients can retry when they receive this `408` response.
+:::
 
 ### Give the Serve Controller more time to process requests
 

diff --git a/python/ray/_private/runtime_env/uri_cache.py b/python/ray/_private/runtime_env/uri_cache.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Set, Callable
+from typing import Set, Callable, Optional
 
 default_logger = logging.getLogger(__name__)
 
@@ -29,14 +29,18 @@ class URICache:
 
     def __init__(
         self,
-        delete_fn: Callable[[str, logging.Logger], int] = lambda uri, logger: 0,
+        delete_fn: Optional[Callable[[str, logging.Logger], int]] = None,
         max_total_size_bytes: int = DEFAULT_MAX_URI_CACHE_SIZE_BYTES,
         debug_mode: bool = False,
     ):
         # Maps URIs to the size in bytes of their corresponding disk contents.
         self._used_uris: Set[str] = set()
         self._unused_uris: Set[str] = set()
-        self._delete_fn = delete_fn
+
+        if delete_fn is None:
+            self._delete_fn = lambda uri, logger: 0
+        else:
+            self._delete_fn = delete_fn
 
         # Total size of both used and unused URIs in the cache.
         self._total_size_bytes = 0

diff --git a/python/ray/autoscaler/_private/load_metrics.py b/python/ray/autoscaler/_private/load_metrics.py
@@ -32,9 +32,7 @@ def add_resources(dict1: Dict[str, float], dict2: Dict[str, float]) -> Dict[str,
     return new_dict
 
 
-def freq_of_dicts(
-    dicts: List[Dict], serializer=lambda d: frozenset(d.items()), deserializer=dict
-) -> DictCount:
+def freq_of_dicts(dicts: List[Dict], serializer=None, deserializer=dict) -> DictCount:
     """Count a list of dictionaries (or unhashable types).
 
     This is somewhat annoying because mutable data structures aren't hashable,
@@ -53,6 +51,9 @@ def freq_of_dicts(
             is a tuple containing a unique entry from `dicts` and its
             corresponding frequency count.
     """
+    if serializer is None:
+        serializer = lambda d: frozenset(d.items())  # noqa: E731
+
     freqs = Counter(serializer(d) for d in dicts)
     as_list = []
     for as_set, count in freqs.items():

diff --git a/python/ray/data/_internal/arrow_block.py b/python/ray/data/_internal/arrow_block.py
@@ -278,11 +278,12 @@ def to_numpy(
             columns = [columns]
             should_be_single_ndarray = True
 
+        column_names_set = set(self._table.column_names)
         for column in columns:
-            if column not in self._table.column_names:
+            if column not in column_names_set:
                 raise ValueError(
                     f"Cannot find column {column}, available columns: "
-                    f"{self._table.column_names}"
+                    f"{column_names_set}"
                 )
 
         arrays = []

diff --git a/python/ray/data/_internal/execution/interfaces/physical_operator.py b/python/ray/data/_internal/execution/interfaces/physical_operator.py
@@ -184,7 +184,7 @@ def __init__(
         self._started = False
         self._in_task_submission_backpressure = False
         self._metrics = OpRuntimeMetrics(self)
-        self._estimated_output_blocks = None
+        self._estimated_num_output_bundles = None
         self._execution_completed = False
 
     def __reduce__(self):
@@ -269,8 +269,8 @@ def num_outputs_total(self) -> int:
         The value returned may be an estimate based off the consumption so far.
         This is useful for reporting progress.
         """
-        if self._estimated_output_blocks is not None:
-            return self._estimated_output_blocks
+        if self._estimated_num_output_bundles is not None:
+            return self._estimated_num_output_bundles
         if len(self.input_dependencies) == 1:
             return self.input_dependencies[0].num_outputs_total()
         raise AttributeError

diff --git a/python/ray/data/_internal/execution/operators/input_data_buffer.py b/python/ray/data/_internal/execution/operators/input_data_buffer.py
@@ -40,7 +40,6 @@ def __init__(
             assert input_data_factory is not None
             self._input_data_factory = input_data_factory
             self._is_input_initialized = False
-        self._num_output_blocks = num_output_blocks
         self._input_data_index = 0
         super().__init__("Input", [], target_max_block_size=None)
 
@@ -67,11 +66,8 @@ def _get_next_inner(self) -> RefBundle:
         self._input_data_index += 1
         return bundle
 
-    def _set_num_output_blocks(self, num_output_blocks):
-        self._num_output_blocks = num_output_blocks
-
     def num_outputs_total(self) -> int:
-        return self._num_output_blocks or self._num_output_bundles
+        return self._num_output_bundles
 
     def get_stats(self) -> StatsDict:
         return {}