diff --git a/torchvision/models/detection/mask_rcnn.py b/torchvision/models/detection/mask_rcnn.py index 4f9f95aa868..f992b49fdc4 100644 --- a/torchvision/models/detection/mask_rcnn.py +++ b/torchvision/models/detection/mask_rcnn.py @@ -31,7 +31,7 @@ class MaskRCNN(FasterRCNN): - boxes (Tensor[N, 4]): the ground-truth boxes in [x0, y0, x1, y1] format, with values between 0 and H and 0 and W - labels (Tensor[N]): the class label for each ground-truth box - - masks (Tensor[N, H, W]): the segmentation binary masks for each instance + - masks (Tensor[N, 1, H, W]): the segmentation binary masks for each instance The model returns a Dict[Tensor] during training, containing the classification and regression losses for both the RPN and the R-CNN, and the mask loss. @@ -43,7 +43,7 @@ class MaskRCNN(FasterRCNN): 0 and H and 0 and W - labels (Tensor[N]): the predicted labels for each image - scores (Tensor[N]): the scores or each prediction - - masks (Tensor[N, H, W]): the predicted masks for each instance, in 0-1 range. In order to + - masks (Tensor[N, 1, H, W]): the predicted masks for each instance, in 0-1 range. In order to obtain the final segmentation masks, the soft masks can be thresholded, generally with a value of 0.5 (mask >= 0.5)