diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py index cd3ce5945a6..92366352b92 100644 --- a/torchvision/models/detection/faster_rcnn.py +++ b/torchvision/models/detection/faster_rcnn.py @@ -32,8 +32,8 @@ class FasterRCNN(GeneralizedRCNN): During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values - between 0 and H and 0 and W + - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values of x + between 0 and W and values of y between 0 and H - labels (Int64Tensor[N]): the class label for each ground-truth box The model returns a Dict[Tensor] during training, containing the classification and regression @@ -42,8 +42,8 @@ class FasterRCNN(GeneralizedRCNN): During inference, the model requires only the input tensors, and returns the post-processed predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as follows: - - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values between - 0 and H and 0 and W + - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values of x + between 0 and W and values of y between 0 and H - labels (Int64Tensor[N]): the predicted labels for each image - scores (Tensor[N]): the scores or each prediction @@ -300,8 +300,8 @@ def fasterrcnn_resnet50_fpn(pretrained=False, progress=True, During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values - between ``0`` and ``H`` and ``0`` and ``W`` + - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` + between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box The model returns a ``Dict[Tensor]`` during training, containing the classification and regression @@ -310,8 +310,8 @@ def fasterrcnn_resnet50_fpn(pretrained=False, progress=True, During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between - ``0`` and ``H`` and ``0`` and ``W`` + - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` + between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction diff --git a/torchvision/models/detection/mask_rcnn.py b/torchvision/models/detection/mask_rcnn.py index 90e02405b47..a8a980fa3ce 100644 --- a/torchvision/models/detection/mask_rcnn.py +++ b/torchvision/models/detection/mask_rcnn.py @@ -28,8 +28,8 @@ class MaskRCNN(FasterRCNN): During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values - between 0 and H and 0 and W + - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values of x + between 0 and W and values of y between 0 and H - labels (Int64Tensor[N]): the class label for each ground-truth box - masks (UInt8Tensor[N, H, W]): the segmentation binary masks for each instance @@ -39,8 +39,8 @@ class MaskRCNN(FasterRCNN): During inference, the model requires only the input tensors, and returns the post-processed predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as follows: - - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values between - 0 and H and 0 and W + - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values of x + between 0 and W and values of y between 0 and H - labels (Int64Tensor[N]): the predicted labels for each image - scores (Tensor[N]): the scores or each prediction - masks (UInt8Tensor[N, 1, H, W]): the predicted masks for each instance, in 0-1 range. In order to @@ -276,8 +276,8 @@ def maskrcnn_resnet50_fpn(pretrained=False, progress=True, During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values - between ``0`` and ``H`` and ``0`` and ``W`` + - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` + between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box - masks (``UInt8Tensor[N, H, W]``): the segmentation binary masks for each instance @@ -287,8 +287,8 @@ def maskrcnn_resnet50_fpn(pretrained=False, progress=True, During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between - ``0`` and ``H`` and ``0`` and ``W`` + - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` + between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction - masks (``UInt8Tensor[N, 1, H, W]``): the predicted masks for each instance, in ``0-1`` range. In order to