Skip to content

Commit

Permalink
Adds raycasting-based camera implementation (#159)
Browse files Browse the repository at this point in the history
This MR adds the `RayCasterCamera` as a ray-cast-based camera for
"distance_to_camera", "distance_to_image_plane" and "normals"
annotations. It has the same interface and functionalities as the USD
Camera wrapper, while it is, on average, 30% faster.

Fixes #131

- Bug fix (non-breaking change which fixes an issue)
- New feature (non-breaking change which adds functionality)
- This change requires a documentation update

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./orbit.sh --format`
- [ ] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [x] I have added tests that prove my fix is effective or that my
feature works
- [x] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file

---------

Signed-off-by: Pascal Roth <57946385+pascal-roth@users.noreply.github.com>
Co-authored-by: Mayank Mittal <mittalma@leggedrobotics.com>
  • Loading branch information
pascal-roth and Mayankm96 committed Nov 1, 2023
1 parent e7d43de commit 5c3fa70
Show file tree
Hide file tree
Showing 20 changed files with 1,567 additions and 234 deletions.
2 changes: 1 addition & 1 deletion source/extensions/omni.isaac.orbit/config/extension.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

# Note: Semantic Versioning is used: https://semver.org/
version = "0.9.30"
version = "0.9.31"

# Description
title = "ORBIT framework for Robot Learning"
Expand Down
12 changes: 11 additions & 1 deletion source/extensions/omni.isaac.orbit/docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
Changelog
---------

0.9.31 (2023-11-02)
~~~~~~~~~~~~~~~~~~~

Added
^^^^^

* Added the :class:`omni.isaac.orbit.sensors.RayCasterCamera` class, as a ray-casting based camera for
"distance_to_camera", "distance_to_image_plane" and "normals" annotations. It has the same interface and
functionalities as the USD Camera while it is on average 30% faster.


0.9.30 (2023-11-01)
~~~~~~~~~~~~~~~~~~~

Expand Down Expand Up @@ -155,7 +166,6 @@ Added


0.9.18 (2023-10-23)
~~~~~~~~~~~~~~~~~~~

Added
^^^^^
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
radius=0.02,
visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(1.0, 0.0, 0.0)),
),
}
},
)
"""Configuration for the ray-caster marker."""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,10 @@
from typing_extensions import Literal

import omni.isaac.core.utils.prims as prim_utils
import omni.isaac.core.utils.stage as stage_utils
import omni.kit.commands
import omni.usd
from omni.isaac.core.prims import XFormPrimView
from omni.isaac.core.utils.rotations import gf_quat_to_np_array
from pxr import Gf, Usd, UsdGeom
from pxr import Usd, UsdGeom

# omni-isaac-orbit
from omni.isaac.orbit.utils import to_camel_case
Expand All @@ -27,7 +25,7 @@

from ..sensor_base import SensorBase
from .camera_data import CameraData
from .utils import convert_orientation_convention
from .utils import convert_orientation_convention, create_rotation_matrix_from_view

if TYPE_CHECKING:
from .camera_cfg import CameraCfg
Expand Down Expand Up @@ -66,6 +64,8 @@ class Camera(SensorBase):

cfg: CameraCfg
"""The configuration parameters."""
UNSUPPORTED_TYPES: set[str] = {"bounding_box_2d_tight", "bounding_box_2d_loose", "bounding_box_3d"}
"""The set of sensor types that are not supported by the camera class."""

def __init__(self, cfg: CameraCfg):
"""Initializes the camera sensor.
Expand Down Expand Up @@ -101,8 +101,7 @@ def __init__(self, cfg: CameraCfg):
self._data = CameraData()
# check if there is any intersection in unsupported types
# reason: these use np structured data types which we can't yet convert to torch tensor
unsupported_types = {"bounding_box_2d_tight", "bounding_box_2d_loose", "bounding_box_3d"}
common_elements = set(self.cfg.data_types) & unsupported_types
common_elements = set(self.cfg.data_types) & Camera.UNSUPPORTED_TYPES
if common_elements:
raise ValueError(
f"Camera class does not support the following sensor types: {common_elements}."
Expand Down Expand Up @@ -167,7 +166,7 @@ def image_shape(self) -> tuple[int, int]:
"""

def set_intrinsic_matrices(
self, matrices: torch.Tensor, focal_length: float = 1.0, indices: Sequence[int] | None = None
self, matrices: torch.Tensor, focal_length: float = 1.0, env_ids: Sequence[int] | None = None
):
"""Set parameters of the USD camera from its intrinsic matrix.
Expand All @@ -186,20 +185,15 @@ def set_intrinsic_matrices(
is not true in the input intrinsic matrix, then the camera will not set up correctly.
Args:
matrices: The intrinsic matrices for the camera. Shape is :math:`(N, 3, 3)`.
matrices: The intrinsic matrices for the camera. Shape is (N, 3, 3).
focal_length: Focal length to use when computing aperture values. Defaults to 1.0.
indices: A list of indices of length :obj:`N` to specify the prims to manipulate.
Defaults to None, which means all prims will be manipulated.
env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices.
"""
# resolve indices
# check camera prim exists
if not self._is_initialized:
raise RuntimeError("Camera is not initialized. Please call 'sim.play()' first.")
# resolve indices
if indices is None:
indices = self._ALL_INDICES
# iterate over indices
for i, matrix in zip(indices, matrices):
# resolve env_ids
if env_ids is None:
env_ids = self._ALL_INDICES
# iterate over env_ids
for i, matrix in zip(env_ids, matrices):
# convert to numpy for sanity
intrinsic_matrix = np.asarray(matrix, dtype=float)
# extract parameters from matrix
Expand Down Expand Up @@ -240,40 +234,35 @@ def set_world_poses(
self,
positions: torch.Tensor | None = None,
orientations: torch.Tensor | None = None,
indices: Sequence[int] | None = None,
env_ids: Sequence[int] | None = None,
convention: Literal["opengl", "ros", "world"] = "ros",
):
r"""Set the pose of the camera w.r.t. the world frame using specified convention.
Since different fields use different conventions for camera orientations, the method allows users to
set the camera poses in the specified convention. Possible conventions are:
- :obj:"opengl" - forward axis: -Z - up axis +Y - Offset is applied in the OpenGL (Usd.Camera) convention
- :obj:"ros" - forward axis: +Z - up axis -Y - Offset is applied in the ROS convention
- :obj:"world" - forward axis: +X - up axis +Z - Offset is applied in the World Frame convention
- :obj:`"opengl"` - forward axis: -Z - up axis +Y - Offset is applied in the OpenGL (Usd.Camera) convention
- :obj:`"ros"` - forward axis: +Z - up axis -Y - Offset is applied in the ROS convention
- :obj:`"world"` - forward axis: +X - up axis +Z - Offset is applied in the World Frame convention
See :meth:`omni.isaac.orbit.sensors.camera.utils.convert_orientation_convention` for more details
on the conventions.
Args:
positions: The cartesian coordinates (in meters). Shape is :math:`(N, 3)`.
positions: The cartesian coordinates (in meters). Shape is (N, 3).
Defaults to None, in which case the camera position in not changed.
orientations: The quaternion orientation in (w, x, y, z). Shape is :math:`(N, 4)`.
orientations: The quaternion orientation in (w, x, y, z). Shape is (N, 4).
Defaults to None, in which case the camera orientation in not changed.
indices: A list of indices of length :obj:`N` to specify the prims to manipulate.
Defaults to None, which means all prims will be manipulated.
convention: The convention in which the poses are fed.
Defaults to "ros".
env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices.
convention: The convention in which the poses are fed. Defaults to "ros".
Raises:
RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first.
"""
# check camera prim exists
if not self._is_initialized:
raise RuntimeError("Camera is not initialized. Please call 'sim.play()' first.")
# resolve indices
if indices is None:
indices = self._ALL_INDICES
# resolve env_ids
if env_ids is None:
env_ids = self._ALL_INDICES
# convert to backend tensor
if positions is not None:
if isinstance(positions, np.ndarray):
Expand All @@ -288,71 +277,28 @@ def set_world_poses(
orientations = torch.tensor(orientations, device=self._device)
orientations = convert_orientation_convention(orientations, origin=convention, target="opengl")
# set the pose
self._view.set_world_poses(positions, orientations, indices)
self._view.set_world_poses(positions, orientations, env_ids)

def set_world_poses_from_view(
self, eyes: torch.Tensor, targets: torch.Tensor, indices: Sequence[int] | None = None
self, eyes: torch.Tensor, targets: torch.Tensor, env_ids: Sequence[int] | None = None
):
"""Set the poses of the camera from the eye position and look-at target position.
Args:
eyes: The positions of the camera's eye. Shape is :math:`(N, 3)`.
targets: The target locations to look at. Shape is :math:`(N, 3)`.
indices: A list of indices of length :math:`N` to specify the prims to manipulate.
Defaults to None, which means all prims will be manipulated.
eyes: The positions of the camera's eye. Shape is (N, 3).
targets: The target locations to look at. Shape is (N, 3).
env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices.
Raises:
RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first.
NotImplementedError: If the stage up-axis is not "Y" or "Z".
"""
# check camera prim exists
if not self._is_initialized:
raise RuntimeError("Camera is not initialized. Please 'sim.play()' first.")
# resolve indices
if indices is None:
indices = self._ALL_INDICES
# create tensors for storing poses
positions = torch.zeros((len(indices), 3), device=self._device)
orientations = torch.zeros((len(indices), 4), device=self._device)
# iterate over all indices
# TODO: Can we do this in parallel?
for i, eye, target in zip(indices, eyes, targets):
# compute camera's eye pose
eye_position = Gf.Vec3d(np.asarray(eye).tolist())
target_position = Gf.Vec3d(np.asarray(target).tolist())
# compute forward direction
forward_dir = (eye_position - target_position).GetNormalized()
# get up axis
up_axis_token = stage_utils.get_stage_up_axis()
if up_axis_token == UsdGeom.Tokens.y:
# deal with degenerate case
if forward_dir == Gf.Vec3d(0, 1, 0):
up_axis = Gf.Vec3d(0, 0, 1)
elif forward_dir == Gf.Vec3d(0, -1, 0):
up_axis = Gf.Vec3d(0, 0, -1)
else:
up_axis = Gf.Vec3d(0, 1, 0)
elif up_axis_token == UsdGeom.Tokens.z:
# deal with degenerate case
if forward_dir == Gf.Vec3d(0, 0, 1):
up_axis = Gf.Vec3d(0, 1, 0)
elif forward_dir == Gf.Vec3d(0, 0, -1):
up_axis = Gf.Vec3d(0, -1, 0)
else:
up_axis = Gf.Vec3d(0, 0, 1)
else:
raise NotImplementedError(f"This method is not supported for up-axis '{up_axis_token}'.")
# compute matrix transformation
# view matrix: camera_T_world
matrix_gf = Gf.Matrix4d(1).SetLookAt(eye_position, target_position, up_axis)
# camera position and rotation in world frame
matrix_gf = matrix_gf.GetInverse()
positions[i] = torch.from_numpy(np.asarray(matrix_gf.ExtractTranslation())).to(device=self._device)
orientations[i] = torch.from_numpy(gf_quat_to_np_array(matrix_gf.ExtractRotationQuat())).to(
device=self._device
)
# resolve env_ids
if env_ids is None:
env_ids = self._ALL_INDICES
# set camera poses using the view
self._view.set_world_poses(positions, orientations, indices)
orientations = quat_from_matrix(create_rotation_matrix_from_view(eyes, targets, device=self._device))
self._view.set_world_poses(eyes, orientations, env_ids)

"""
Operations
Expand Down Expand Up @@ -401,7 +347,7 @@ def _initialize_impl(self):
f" the number of environments ({self._num_envs})."
)

# Create all indices buffer
# Create all env_ids buffer
self._ALL_INDICES = torch.arange(self._view.count, device=self._device, dtype=torch.long)
# Create frame count buffer
self._frame = torch.zeros(self._view.count, device=self._device, dtype=torch.long)
Expand Down Expand Up @@ -455,9 +401,6 @@ def _initialize_impl(self):
self._create_buffers()

def _update_buffers_impl(self, env_ids: Sequence[int]):
# check camera prim exists
if not self._is_initialized:
raise RuntimeError("Camera is not initialized. Please call 'sim.play()' first.")
# Increment frame count
self._frame[env_ids] += 1
# -- intrinsic matrix
Expand Down Expand Up @@ -493,9 +436,7 @@ def _create_buffers(self):
# create the data object
# -- pose of the cameras
self._data.pos_w = torch.zeros((self._view.count, 3), device=self._device)
self._data.quat_w_ros = torch.zeros((self._view.count, 4), device=self._device)
self._data.quat_w_world = torch.zeros_like(self._data.quat_w_ros)
self._data.quat_w_opengl = torch.zeros_like(self._data.quat_w_ros)
self._data.quat_w_world = torch.zeros((self._view.count, 4), device=self._device)
# -- intrinsic matrix
self._data.intrinsic_matrices = torch.zeros((self._view.count, 3, 3), device=self._device)
self._data.image_shape = self.image_shape
Expand Down Expand Up @@ -564,13 +505,9 @@ def _update_poses(self, env_ids: Sequence[int]):
# extract the position (convert it to SI units-- assumed that stage units is 1.0)
self._data.pos_w[env_ids] = torch.tensor(prim_tf_all[:, 0:3, 3], device=self._device, dtype=torch.float32)

# save opengl convention
# save quat in world convention
quat_opengl = quat_from_matrix(torch.tensor(prim_tf_all[:, 0:3, 0:3], device=self._device, dtype=torch.float32))
self._data.quat_w_opengl[env_ids] = quat_opengl

# save world and ros convention
self._data.quat_w_world[env_ids] = convert_orientation_convention(quat_opengl, origin="opengl", target="world")
self._data.quat_w_ros[env_ids] = convert_orientation_convention(quat_opengl, origin="opengl", target="ros")

def _create_annotator_data(self):
"""Create the buffers to store the annotator data.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from tensordict import TensorDict
from typing import Any

from .utils import convert_orientation_convention


@dataclass
class CameraData:
Expand All @@ -25,15 +27,6 @@ class CameraData:
Shape is (N, 3) where ``N`` is the number of sensors.
"""

quat_w_ros: torch.Tensor = None
"""Quaternion orientation `(w, x, y, z)` of the sensor origin in world frame, following ROS convention.
.. note::
ROS convention follows the camera aligned with forward axis +Z and up axis -Y.
Shape is (N, 4) where ``N`` is the number of sensors.
"""

quat_w_world: torch.Tensor = None
"""Quaternion orientation `(w, x, y, z)` of the sensor origin in world frame, following the world coordinate frame
Expand All @@ -43,15 +36,6 @@ class CameraData:
Shape is ``(N, 4)`` where ``N`` is the number of sensors.
"""

quat_w_opengl: torch.Tensor = None
"""Quaternion orientation `(w, x, y, z)` of the sensor origin in world frame, following Opengl / Usd.Camera convention
.. note::
OpenGL convention follows the camera aligned with forward axis -Z and up axis +Y.
Shape is ``(N, 4)`` where ``N`` is the number of sensors.
"""

##
# Camera data
##
Expand Down Expand Up @@ -81,3 +65,30 @@ class CameraData:
For semantic-based data, this corresponds to the ``"info"`` key in the output of the sensor. For other sensor
types, the info is empty.
"""

##
# Additional Frame orientation conventions
##

@property
def quat_w_ros(self) -> torch.Tensor:
"""Quaternion orientation `(w, x, y, z)` of the sensor origin in the world frame, following ROS convention.
.. note::
ROS convention follows the camera aligned with forward axis +Z and up axis -Y.
Shape is (N, 4) where ``N`` is the number of sensors.
"""
return convert_orientation_convention(self.quat_w_world, origin="world", target="ros")

@property
def quat_w_opengl(self) -> torch.Tensor:
"""Quaternion orientation `(w, x, y, z)` of the sensor origin in the world frame, following
Opengl / USD Camera convention.
.. note::
OpenGL convention follows the camera aligned with forward axis -Z and up axis +Y.
Shape is ``(N, 4)`` where ``N`` is the number of sensors.
"""
return convert_orientation_convention(self.quat_w_world, origin="world", target="opengl")
Loading

0 comments on commit 5c3fa70

Please sign in to comment.