Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AIR] Add TorchDetectionPredictor #32199

Merged
merged 25 commits into from
Feb 8, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
3203f0c
Initial commit
bveeramani Jan 26, 2023
28fa37c
Address review comments
bveeramani Jan 30, 2023
2f5fd21
Update utils.py
bveeramani Jan 31, 2023
679f3b1
Merge remote-tracking branch 'upstream/master' into promote-create-ra…
bveeramani Jan 31, 2023
e3a7630
Merge remote-tracking branch 'upstream/master' into promote-create-ra…
bveeramani Jan 31, 2023
10d78cf
Address review comments
bveeramani Jan 31, 2023
0d331c5
Update tensor_extension.py
bveeramani Jan 31, 2023
328b3f8
Update tensor_extension.py
bveeramani Jan 31, 2023
694764e
Merge remote-tracking branch 'upstream/master' into promote-create-ra…
bveeramani Feb 2, 2023
9855e94
Initial commit
bveeramani Feb 2, 2023
bbbda1f
Merge branch 'dtype-optional' into detection-predictor
bveeramani Feb 2, 2023
22b3aa9
Add `TorchDetectionPredictor`
bveeramani Feb 3, 2023
ab67b37
Fix test
bveeramani Feb 6, 2023
750cc0b
Address review comments
bveeramani Feb 6, 2023
edf14b8
Merge branch 'dtype-optional' into detection-predictor
bveeramani Feb 6, 2023
43693e5
Merge branch 'promote-create-ragged' into detection-predictor
bveeramani Feb 6, 2023
dd3a9eb
Address review comments
bveeramani Feb 7, 2023
6ecf830
Update torch_detection_predictor.py
bveeramani Feb 7, 2023
6374151
Fix Bazel
bveeramani Feb 8, 2023
c85f5d1
Merge remote-tracking branch 'upstream/master' into detection-predictor
bveeramani Feb 8, 2023
2f41527
Merge remote-tracking branch 'upstream/master' into detection-predictor
bveeramani Feb 8, 2023
bdf28f9
Update BUILD
bveeramani Feb 8, 2023
163d2af
Update torch_predictor.py
bveeramani Feb 8, 2023
a55bc0a
Merge remote-tracking branch 'upstream/master' into detection-predictor
bveeramani Feb 8, 2023
cea988f
Update torch_predictor.py
bveeramani Feb 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/data/api/data_representations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Batch API

.. autosummary::
:toctree: doc/

block.DataBatch

Row API
Expand All @@ -42,6 +42,7 @@ Tensor Column Extension API
.. autosummary::
:toctree: doc/

extensions.tensor_extension.create_ragged_ndarray
extensions.tensor_extension.TensorDtype
extensions.tensor_extension.TensorArray
extensions.tensor_extension.ArrowTensorType
Expand Down
14 changes: 12 additions & 2 deletions doc/source/train/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,21 @@ PyTorch
``TorchPredictor``
******************

.. automodule:: ray.train.torch
.. autoclass:: ray.train.torch.TorchPredictor
:members:
:exclude-members: TorchTrainer
:show-inheritance:

.. automethod:: __init__

``TorchDetectionPredictor``
***************************

.. autoclass:: ray.train.torch.TorchDetectionPredictor
:members:
:show-inheritance:

.. automethod:: __init__

Horovod
~~~~~~~

Expand Down
18 changes: 16 additions & 2 deletions python/ray/air/tests/test_tensor_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,22 @@
ArrowVariableShapedTensorType,
)
from ray.air.util.tensor_extensions.pandas import TensorArray, TensorDtype
from ray.air.util.tensor_extensions.utils import create_ragged_ndarray
from ray._private.utils import _get_pyarrow_version
from ray.air.util.tensor_extensions.utils import _create_strict_ragged_ndarray


@pytest.mark.parametrize(
"values",
[
[np.zeros((3, 1)), np.zeros((3, 2))],
[np.zeros((3,))],
],
)
def test_create_ragged_ndarray(values):
ragged_array = create_ragged_ndarray(values)
assert len(ragged_array) == len(values)
for actual_array, expected_array in zip(ragged_array, values):
np.testing.assert_array_equal(actual_array, expected_array)


def test_tensor_array_validation():
Expand Down Expand Up @@ -582,7 +596,7 @@ def test_arrow_tensor_array_slice(test_arr, dtype):
for shape in pytest_tensor_array_concat_shapes
]
pytest_tensor_array_concat_arrs += [
_create_strict_ragged_ndarray(
create_ragged_ndarray(
[np.arange(4).reshape((2, 2)), np.arange(4, 13).reshape((3, 3))]
)
]
Expand Down
4 changes: 2 additions & 2 deletions python/ray/air/util/tensor_extensions/arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from ray.air.util.tensor_extensions.utils import (
_is_ndarray_variable_shaped_tensor,
_create_strict_ragged_ndarray,
create_ragged_ndarray,
)
from ray._private.utils import _get_pyarrow_version
from ray.util.annotations import PublicAPI
Expand Down Expand Up @@ -783,7 +783,7 @@ def _to_numpy(self, index: Optional[int] = None, zero_copy_only: bool = False):
arrs = [self._to_numpy(i, zero_copy_only) for i in range(len(self))]
# Return ragged NumPy ndarray in the ndarray of ndarray pointers
# representation.
return _create_strict_ragged_ndarray(arrs)
return create_ragged_ndarray(arrs)
data = self.storage.field("data")
shapes = self.storage.field("shape")

Expand Down
58 changes: 45 additions & 13 deletions python/ray/air/util/tensor_extensions/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import numpy as np

from ray.util import PublicAPI

if TYPE_CHECKING:
from pandas.core.dtypes.generic import ABCSeries

Expand Down Expand Up @@ -60,25 +62,55 @@ def _create_possibly_ragged_ndarray(
or "The requested array has an inhomogeneous shape" in error_str
):
# Fall back to strictly creating a ragged ndarray.
return _create_strict_ragged_ndarray(values)
return create_ragged_ndarray(values)
else:
# Re-raise original error if the failure wasn't a broadcast error.
raise e from None


def _create_strict_ragged_ndarray(values: Any) -> np.ndarray:
"""Create a ragged ndarray; the representation will be ragged (1D array of
subndarray pointers) even if it's possible to represent it as a non-ragged ndarray.
"""
# Use the create-empty-and-fill method. This avoids the following pitfalls of the
# np.array constructor - np.array(values, dtype=object):
# 1. It will fail to construct an ndarray if the first element dimension is
# uniform, e.g. for imagery whose first element dimension is the channel.
# 2. It will construct the wrong representation for a single-row column (i.e. unit
# outer dimension). Namely, it will consolidate it into a single multi-dimensional
# ndarray rather than a 1D array of subndarray pointers, resulting in the single
# row not being well-typed (having object dtype).
@PublicAPI(stability="alpha")
def create_ragged_ndarray(values: Sequence[np.ndarray]) -> np.ndarray:
"""Create an array that contains arrays of different length

If you're working with variable-length arrays like images, use this function to
create ragged arrays instead of ``np.array``.

.. note::
``np.array`` fails to construct ragged arrays if the input arrays have a uniform
first dimension:

.. testsetup::

import numpy as np
from ray.air.util.tensor_extensions.utils import create_ragged_ndarray

.. doctest::

>>> values = [np.zeros((3, 1)), np.zeros((3, 2))]
>>> np.array(values, dtype=object)
Traceback (most recent call last):
...
ValueError: could not broadcast input array from shape (3,1) into shape (3,)
>>> create_ragged_ndarray(values)
array([array([[0.],
[0.],
[0.]]), array([[0., 0.],
[0., 0.],
[0., 0.]])], dtype=object)

Or if you're creating a ragged array from a single array:

.. doctest::

>>> values = [np.zeros((3, 1))]
>>> np.array(values, dtype=object)[0].dtype
dtype('O')
>>> create_ragged_ndarray(values)[0].dtype
dtype('float64')

``create_ragged_ndarray`` avoids the limitations of ``np.array`` by creating an
empty array and filling it with pointers to the variable-length arrays.
""" # noqa: E501
# Create an empty object-dtyped 1D array.
arr = np.empty(len(values), dtype=object)
# Try to fill the 1D array of pointers with the (ragged) tensors.
Expand Down
1 change: 1 addition & 0 deletions python/ray/data/extensions/tensor_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
ArrowVariableShapedTensorType,
ArrowVariableShapedTensorArray,
)
from ray.air.util.tensor_extensions.utils import create_ragged_ndarray # noqa: F401
12 changes: 7 additions & 5 deletions python/ray/train/_internal/dl_predictor.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import abc
from typing import Dict, TypeVar, Union
from typing import Dict, Optional, TypeVar, Union

import numpy as np
import pandas as pd

from ray.air.util.data_batch_conversion import (
BatchFormat,
convert_pandas_to_batch_type,
convert_batch_type_to_pandas,
convert_pandas_to_batch_type,
)
from ray.train.predictor import Predictor
from ray.util.annotations import DeveloperAPI
Expand All @@ -21,7 +21,7 @@ class DLPredictor(Predictor):
def _arrays_to_tensors(
self,
numpy_arrays: Union[np.ndarray, Dict[str, np.ndarray]],
dtype: Union[TensorDtype, Dict[str, TensorDtype]],
dtype: Optional[Union[TensorDtype, Dict[str, TensorDtype]]],
) -> Union[TensorType, Dict[str, TensorType]]:
"""Converts a NumPy ndarray batch to the tensor type for the DL framework.

Expand Down Expand Up @@ -72,7 +72,9 @@ def preferred_batch_format(cls) -> BatchFormat:
return BatchFormat.NUMPY

def _predict_pandas(
self, data: pd.DataFrame, dtype: Union[TensorDtype, Dict[str, TensorDtype]]
self,
data: pd.DataFrame,
dtype: Optional[Union[TensorDtype, Dict[str, TensorDtype]]],
) -> pd.DataFrame:
numpy_input = convert_pandas_to_batch_type(
data,
Expand All @@ -85,7 +87,7 @@ def _predict_pandas(
def _predict_numpy(
self,
data: Union[np.ndarray, Dict[str, np.ndarray]],
dtype: Union[TensorDtype, Dict[str, TensorDtype]],
dtype: Optional[Union[TensorDtype, Dict[str, TensorDtype]]],
) -> Union[np.ndarray, Dict[str, np.ndarray]]:
# Single column selection return numpy array so preprocessors can be
# reused in both training and prediction
Expand Down
4 changes: 2 additions & 2 deletions python/ray/train/tensorflow/tensorflow_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,9 @@ def predict(
def _arrays_to_tensors(
self,
numpy_arrays: Union[np.ndarray, Dict[str, np.ndarray]],
dtypes: Union[tf.dtypes.DType, Dict[str, tf.dtypes.DType]],
dtype: Optional[Union[tf.dtypes.DType, Dict[str, tf.dtypes.DType]]],
) -> Union[tf.Tensor, Dict[str, tf.Tensor]]:
return convert_ndarray_batch_to_tf_tensor_batch(numpy_arrays, dtypes=dtypes)
return convert_ndarray_batch_to_tf_tensor_batch(numpy_arrays, dtypes=dtype)

def _tensor_to_array(self, tensor: tf.Tensor) -> np.ndarray:
if not isinstance(tensor, tf.Tensor):
Expand Down
59 changes: 59 additions & 0 deletions python/ray/train/tests/test_torch_detection_predictor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import numpy as np
import pytest
from torchvision import models

import ray
from ray.air.util.tensor_extensions.utils import create_ragged_ndarray
from ray.train.batch_predictor import BatchPredictor
from ray.train.torch import TorchCheckpoint, TorchDetectionPredictor


@pytest.fixture(name="predictor")
def predictor_fixture():
model = models.detection.maskrcnn_resnet50_fpn()
yield TorchDetectionPredictor(model=model)


@pytest.mark.parametrize(
"data",
[
np.zeros((1, 3, 32, 32), dtype=np.float32),
{"image": np.zeros((1, 3, 32, 32), dtype=np.float32)},
create_ragged_ndarray(
[
np.zeros((3, 32, 32), dtype=np.float32),
np.zeros((3, 64, 64), dtype=np.float32),
]
),
],
)
def test_predict(predictor, data):
bveeramani marked this conversation as resolved.
Show resolved Hide resolved
predictions = predictor.predict(data)

assert all(len(value) == len(data) for value in predictions.values())
# Boxes should have shape `(# detections, 4)`.
assert all(boxes.ndim == 2 for boxes in predictions["pred_boxes"])
assert all(boxes.shape[-1] == 4 for boxes in predictions["pred_boxes"])
# Labels should have shape `(# detections,)`.
assert all(labels.ndim == 1 for labels in predictions["pred_labels"])
# Scores should have shape `(# detections,)`.
assert all(labels.ndim == 1 for labels in predictions["pred_scores"])
bveeramani marked this conversation as resolved.
Show resolved Hide resolved


def test_multi_column_batch_raises_value_error(predictor):
data = {
"image": np.zeros((2, 3, 32, 32), dtype=np.float32),
"boxes": np.zeros((2, 0, 4), dtype=np.float32),
"labels": np.zeros((2, 0), dtype=np.int64),
}
with pytest.raises(ValueError):
# `data` should only contain one key. Otherwise, `TorchDetectionPredictor`
# doesn't know which column contains the input images.
predictor.predict(data)


def test_invalid_dtype_raises_value_error(predictor):
data = np.zeros((1, 3, 32, 32), dtype=np.float32)
with pytest.raises(ValueError):
# `dtype` should be a single `torch.dtype`.
predictor.predict(data, dtype=np.float32)
4 changes: 3 additions & 1 deletion python/ray/train/torch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
)
# isort: on

from ray.train.torch.torch_checkpoint import TorchCheckpoint
from ray.train.torch.config import TorchConfig
from ray.train.torch.torch_checkpoint import TorchCheckpoint
from ray.train.torch.torch_detection_predictor import TorchDetectionPredictor
from ray.train.torch.torch_predictor import TorchPredictor
from ray.train.torch.torch_trainer import TorchTrainer
from ray.train.torch.train_loop_utils import (
Expand All @@ -33,4 +34,5 @@
"backward",
"enable_reproducibility",
"TorchPredictor",
"TorchDetectionPredictor",
]
Loading