Lightning-Universe · tchaton · Jul 16, 2021 · Jul 15, 2021 · Jul 16, 2021 · Jul 16, 2021
@@ -20,6 +20,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Added `PointCloudSegmentation` Task ([#566](https://github.com/PyTorchLightning/lightning-flash/pull/566))
 
+- Added `PointCloudObjectDetection` Task ([#600](https://github.com/PyTorchLightning/lightning-flash/pull/600))
+
 - Added a `GraphClassifier` task ([#73](https://github.com/PyTorchLightning/lightning-flash/pull/73))
 
 - Added the option to pass `pretrained` as a string to `SemanticSegmentation` to change pretrained weights to load from `segmentation-models.pytorch` ([#587](https://github.com/PyTorchLightning/lightning-flash/pull/587))

@@ -23,3 +23,19 @@ ____________
     segmentation.data.PointCloudSegmentationPreprocess
     segmentation.data.PointCloudSegmentationFoldersDataSource
     segmentation.data.PointCloudSegmentationDatasetDataSource
+
+
+Object Detection
+________________
+
+.. autosummary::
+    :toctree: generated/
+    :nosignatures:
+    :template: classtemplate.rst
+
+    ~detection.model.PointCloudObjectDetector
+    ~detection.data.PointCloudObjectDetectorData
+
+    detection.data.PointCloudObjectDetectorPreprocess
+    detection.data.PointCloudObjectDetectorFoldersDataSource
+    detection.data.PointCloudObjectDetectorDatasetDataSource
@@ -60,6 +60,7 @@ Lightning Flash
    :caption: Point Cloud
 
    reference/pointcloud_segmentation
+   reference/pointcloud_object_detection
 
 .. toctree::
    :maxdepth: 1

@@ -0,0 +1,82 @@
+
+.. _pointcloud_object_detection:
+
+############################
+Point Cloud Object Detection
+############################
+
+********
+The Task
+********
+
+A Point Cloud is a set of data points in space, usually describes by ``x``, ``y`` and ``z`` coordinates.
+
+PointCloud Object Detection is the task of identifying 3D objects in point clouds and their associated classes and 3D bounding boxes.
+
+The current integration builds on top `Open3D-ML <https://github.com/intel-isl/Open3D-ML>`_.
+
+------
+
+*******
+Example
+*******
+
+Let's look at an example using a data set generated from the `KITTI Vision Benchmark  <http://www.semantic-kitti.org/dataset.html>`_.
+The data are a tiny subset of the original dataset and contains sequences of point clouds.
+
+The data contains:
+    *  one folder for scans
+    *  one folder for scan calibrations
+    *  one folder for labels
+    *  a meta.yaml file describing the classes and their official associated color map.
+
+Here's the structure:
+
+.. code-block::
+
+    data
+    ├── meta.yaml
+    ├── train
+    │   ├── scans
+    |   |    ├── 00000.bin
+    |   |    ├── 00001.bin
+    |   |    ...
+    │   ├── calibs
+    |   |    ├── 00000.txt
+    |   |    ├── 00001.txt
+    |   |   ...
+    │   ├── labels
+    |   |    ├── 00000.txt
+    |   |    ├── 00001.txt
+    │   ...
+    ├── val
+    │   ...
+    ├── predict
+        ├── scans
+        |   ├── 00000.bin
+        |   ├── 00001.bin
+        |
+        ├── calibs
+        |   ├── 00000.txt
+        |   ├── 00001.txt
+        ├── meta.yaml
+
+
+
+Learn more: http://www.semantic-kitti.org/dataset.html
+
+
+Once we've downloaded the data using :func:`~flash.core.data.download_data`, we create the :class:`~flash.image.detection.data.PointCloudObjectDetectorData`.
+We select a pre-trained ``randlanet_semantic_kitti`` backbone for our :class:`~flash.image.detection.model.PointCloudObjectDetector` task.
+We then use the trained :class:`~flash.image.detection.model.PointCloudObjectDetector` for inference.
+Finally, we save the model.
+Here's the full example:
+
+.. literalinclude:: ../../../flash_examples/pointcloud_detection.py
+    :language: python
+    :lines: 14-
+
+
+
+.. image:: https://raw.githubusercontent.com/intel-isl/Open3D-ML/master/docs/images/visualizer_BoundingBoxes.png
+   :width: 100%
@@ -176,6 +176,13 @@ def __hash__(self) -> int:
         return hash(self.value)
 
 
+class BaseDataFormat(LightningEnum):
+    """The base class for creating ``data_format`` for :class:`~flash.core.data.data_source.DataSource`."""
+
+    def __hash__(self) -> int:
+        return hash(self.value)
+
+
 class MockDataset:
     """The ``MockDataset`` catches any metadata that is attached through ``__setattr__``. This is passed to
     :meth:`~flash.core.data.data_source.DataSource.load_data` so that attributes can be set on the generated

@@ -4,6 +4,24 @@
 from flash.core.data.properties import ProcessState
 
 
+@dataclass(unsafe_hash=True, frozen=True)
+class PreTensorTransform(ProcessState):
+
+    transform: Optional[Callable] = None
+
+
+@dataclass(unsafe_hash=True, frozen=True)
+class ToTensorTransform(ProcessState):
+
+    transform: Optional[Callable] = None
+
+
+@dataclass(unsafe_hash=True, frozen=True)
+class PostTensorTransform(ProcessState):
+
+    transform: Optional[Callable] = None
+
+
 @dataclass(unsafe_hash=True, frozen=True)
 class CollateFn(ProcessState):
 

@@ -188,21 +188,32 @@ def step(self, batch: Any, batch_idx: int, metrics: nn.ModuleDict) -> Any:
         losses = {name: l_fn(y_hat, y) for name, l_fn in self.loss_fn.items()}
         logs = {}
         y_hat = self.to_metrics_format(output["y_hat"])
+
+        logs = {}
+
         for name, metric in metrics.items():
             if isinstance(metric, torchmetrics.metric.Metric):
                 metric(y_hat, y)
                 logs[name] = metric  # log the metric itself if it is of type Metric
             else:
                 logs[name] = metric(y_hat, y)
-        logs.update(losses)
+
         if len(losses.values()) > 1:
             logs["total_loss"] = sum(losses.values())
             return logs["total_loss"], logs
-        output["loss"] = list(losses.values())[0]
-        output["logs"] = logs
+
+        output["loss"] = self.compute_loss(losses)
+        output["logs"] = self.compute_logs(logs, losses)
         output["y"] = y
         return output
 
+    def compute_loss(self, losses: Dict[str, torch.Tensor]) -> torch.Tensor:
+        return list(losses.values())[0]
+
+    def compute_logs(self, logs: Dict[str, Any], losses: Dict[str, torch.Tensor]):
+        logs.update(losses)
+        return logs
+
     @staticmethod
     def apply_filtering(y: torch.Tensor, y_hat: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
         """This function is used to filter some labels or predictions which aren't conform."""

@@ -1,3 +1,4 @@
+from flash.pointcloud.detection.data import PointCloudObjectDetectorData  # noqa: F401
+from flash.pointcloud.detection.model import PointCloudObjectDetector  # noqa: F401
 from flash.pointcloud.segmentation.data import PointCloudSegmentationData  # noqa: F401
 from flash.pointcloud.segmentation.model import PointCloudSegmentation  # noqa: F401
-from flash.pointcloud.segmentation.open3d_ml.app import launch_app  # noqa: F401
@@ -0,0 +1,3 @@
+from flash.pointcloud.detection.data import PointCloudObjectDetectorData  # noqa: F401
+from flash.pointcloud.detection.model import PointCloudObjectDetector  # noqa: F401
+from flash.pointcloud.detection.open3d_ml.app import launch_app  # noqa: F401
@@ -0,0 +1,19 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from flash.core.registry import FlashRegistry
+from flash.pointcloud.detection.open3d_ml.backbones import register_open_3d_ml
+
+POINTCLOUD_OBJECT_DETECTION_BACKBONES = FlashRegistry("backbones")
+
+register_open_3d_ml(POINTCLOUD_OBJECT_DETECTION_BACKBONES)