ray-project · richardliaw · Mar 15, 2023 · Mar 2, 2023 · Mar 6, 2023 · Mar 6, 2023
@@ -34,15 +34,6 @@ py_test(
     tags = ["exclusive", "team:ml"]
 )
 
-py_test(
-    name = "big_data_ingestion",
-    size = "small",
-    main = "test_myst_doc.py",
-    srcs = ["test_myst_doc.py"],
-    args = ["--path", "doc/source/data/examples/big_data_ingestion.ipynb"],
-    data = ["//doc/source/data/examples:data_examples"],
-    tags = ["exclusive", "team:core", "py37"]
-)
 
 py_test(
     name = "datasets_train",

@@ -94,8 +94,6 @@ parts:
                 title: Processing the NYC taxi dataset
               - file: data/examples/batch_training
                 title: Batch Training with Ray Datasets
-              - file: data/examples/big_data_ingestion
-                title: Large-scale ML Ingest
               - file: data/examples/ocr_example
                 title: Scaling OCR with Ray Datasets
               - file: data/examples/advanced-pipelines

@@ -34,7 +34,7 @@ Data Loading and Preprocessing for ML Training
 ----------------------------------------------
 
 Use Ray Datasets to load and preprocess data for distributed :ref:`ML training pipelines <train-docs>`.
-Compared to other loading solutions, Datasets are more flexible (e.g., can express higher-quality `per-epoch global shuffles <examples/big_data_ingestion.html>`__) and provides `higher overall performance <https://www.anyscale.com/blog/why-third-generation-ml-platforms-are-more-performant>`__.
+Compared to other loading solutions, Datasets are more flexible (e.g., can express higher-quality per-epoch global shuffles) and provides `higher overall performance <https://www.anyscale.com/blog/why-third-generation-ml-platforms-are-more-performant>`__.
 
 Use Datasets as a last-mile bridge from storage or ETL pipeline outputs to distributed 
 applications and libraries in Ray. Don't use it as a replacement for more general data 

@@ -5,20 +5,3 @@ filegroup(
     srcs = glob(["*.ipynb"]),
     visibility = ["//doc:__subpackages__"]
 )
-
-# --------------------------------------------------------------------
-# Test all doc/source/data/examples notebooks.
-# --------------------------------------------------------------------
-
-# big_data_ingestion.ipynb is not tested right now due to large resource requirements
-# and a need of a general overhaul.
-
-py_test_run_all_notebooks(
-    size = "large",
-    include = ["*.ipynb"],
-    exclude = [
-        "big_data_ingestion.ipynb",
-    ],
-    data = ["//doc/source/data/examples:data_examples"],
-    tags = ["exclusive", "team:ml"]
-)