diff --git a/docs/source/ops.rst b/docs/source/ops.rst
index 7124c85bb79..c73aadf4cd8 100644
--- a/docs/source/ops.rst
+++ b/docs/source/ops.rst
@@ -22,6 +22,7 @@ The below operators perform pre-processing as well as post-processing required i
 
     batched_nms
     masks_to_boxes
+    masks_to_boundaries
     nms
     roi_align
     roi_pool
diff --git a/test/conftest.py b/test/conftest.py
index a9768598ded..53f2e8a60b0 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -82,6 +82,10 @@ def pytest_collection_modifyitems(items):
     items[:] = out_items
 
 
+def pytest_addoption(parser):
+    parser.addoption("--debug-images", action="store_true", help="Enable debug mode for saving images.")
+
+
 def pytest_sessionfinish(session, exitstatus):
     # This hook is called after all tests have run, and just before returning an exit status.
     # We here change exit code 5 into 0.
diff --git a/test/test_ops.py b/test/test_ops.py
index 1ba7a2c9efa..512f426a753 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -1,3 +1,4 @@
+import logging
 import math
 import os
 from abc import ABC, abstractmethod
@@ -7,12 +8,13 @@
 
 import numpy as np
 import pytest
+import scipy.ndimage
 import torch
 import torch.fx
 import torch.nn.functional as F
 import torch.testing._internal.optests as optests
 from common_utils import assert_equal, cpu_and_cuda, cpu_and_cuda_and_mps, needs_cuda, needs_mps
-from PIL import Image
+from PIL import Image, ImageDraw
 from torch import nn, Tensor
 from torch._dynamo.utils import is_compile_supported
 from torch.autograd import gradcheck
@@ -741,6 +743,111 @@ def test_is_leaf_node(self, device):
         assert len(graph_node_names[0]) == 1 + op_obj.n_inputs
 
 
+import matplotlib.pyplot as plt
+
+
+class TestMasksToBoundaries(ABC):
+    def save_and_images(
+        self, original_masks, expected_boundaries, actual_boundaries, diff, filename_prefix, visualize=True
+    ):
+        """
+        Saves images separately for original masks, expected boundaries, actual boundaries, and their difference.
+
+        Parameters:
+        - original_masks: The starting binary masks tensor.
+        - expected_boundaries: The expected boundaries tensor.
+        - actual_boundaries: The actual boundaries tensor calculated by the function.
+        - diff: The absolute difference between expected and actual boundaries.
+        - filename_prefix: Prefix for the saved filename.
+        - visualize: Flag to enable or disable visualization.
+        """
+        # Ensure directory exists
+        output_dir = "test_outputs"
+        os.makedirs(output_dir, exist_ok=True)
+        filepath_prefix = os.path.join(output_dir, filename_prefix)
+
+        num_images = original_masks.shape[0]
+
+        original_masks = original_masks.cpu().numpy() if original_masks.is_cuda else original_masks.numpy()
+        expected_boundaries = (
+            expected_boundaries.cpu().numpy() if expected_boundaries.is_cuda else expected_boundaries.numpy()
+        )
+        actual_boundaries = actual_boundaries.cpu().numpy() if actual_boundaries.is_cuda else actual_boundaries.numpy()
+        diff = diff.cpu().numpy() if diff.is_cuda else diff.numpy()
+
+        # Plot and save each image separately
+        for i in range(num_images):
+            original = original_masks[i].squeeze()
+            expected = expected_boundaries[i].squeeze()
+            actual = actual_boundaries[i].squeeze()
+            difference = diff[i].squeeze()
+
+            if visualize:
+                # Plotting
+                fig, axes = plt.subplots(1, 4, figsize=(20, 5))
+                titles = ["Original Mask", "Expected Boundaries", "Actual Boundaries", "Absolute Difference"]
+                images = [original, expected, actual, difference]
+
+                for ax, img, title in zip(axes, images, titles):
+                    ax.imshow(img, cmap="gray", interpolation="nearest")
+                    ax.axis("off")
+                    ax.set_title(title)
+
+                plt.subplots_adjust(top=0.85)
+
+                # Save the figure
+                fig.tight_layout()
+                plt.savefig(f"{filepath_prefix}_image_{i}.png", bbox_inches="tight")
+                plt.close(fig)
+
+    @pytest.mark.parametrize("device", ["cpu", "cuda"])
+    @pytest.mark.parametrize("kernel_size", [3, 5])  # Example kernel sizes
+    @pytest.mark.parametrize("canvas_size", [32, 64])  # Example canvas sizes
+    @pytest.mark.parametrize("batch_size", [1, 4])  # Parametrizing over batch sizes, e.g., 1 and 4
+    def test_masks_to_boundaries(self, request, tmpdir, device, kernel_size, canvas_size, batch_size):
+        if device == "cuda" and not torch.cuda.is_available():
+            pytest.skip("CUDA is not available on this system.")
+        debug_mode = request.config.getoption("--debug-images")
+        # Create masks with the specified canvas size and batch size
+        mask = torch.zeros(batch_size, canvas_size, canvas_size, dtype=torch.bool)
+
+        for b in range(batch_size):
+            if b % 4 == 0:
+                mask[b, 1:10, 1:10] = True
+            elif b % 4 == 1:
+                mask[b, 15:23, 15:23] = True
+            elif b % 4 == 2:
+                mask[b, 1:5, 22:30] = True
+            elif b % 4 == 3:
+                pil_img = Image.new("L", (canvas_size, canvas_size))
+                draw = ImageDraw.Draw(pil_img)
+                draw.ellipse([2, 7, min(26, canvas_size - 6), min(26, canvas_size - 6)], fill=1, outline=1, width=1)
+                ellipse_mask = torch.from_numpy(np.array(pil_img, dtype=np.uint8)).bool()
+                mask[b, ...] = ellipse_mask
+        mask = mask.to(device)
+        actual_boundaries = ops.masks_to_boundaries(mask, kernel_size)
+        expected_boundaries = torch.zeros_like(mask)
+        struct = np.ones((kernel_size, kernel_size), dtype=np.uint8)
+
+        # Calculate expected boundaries using scipy's binary_erosion
+        for i in range(batch_size):
+            single_mask = mask[i].cpu().numpy()
+            eroded_mask = scipy.ndimage.binary_erosion(single_mask, structure=struct, border_value=0)
+            single_expected_boundary = single_mask ^ eroded_mask
+            expected_boundaries[i] = torch.from_numpy(single_expected_boundary).to(device)
+
+        if debug_mode:
+            diff = torch.abs(expected_boundaries.float() - actual_boundaries.float())
+            filename_prefix = f"kernel_{kernel_size}_canvas_{canvas_size}_batch_{batch_size}"
+            output_file_path = tmpdir.join(f"{filename_prefix}.png")
+            # Log the path where the debug image will be saved
+            logging.info(f"Debug image saved at: {output_file_path}")
+
+            self.save_and_images(mask, expected_boundaries, actual_boundaries, diff, str(output_file_path))
+
+        torch.testing.assert_close(actual_boundaries, expected_boundaries)
+
+
 class TestNMS:
     def _reference_nms(self, boxes, scores, iou_threshold):
         """
diff --git a/torchvision/ops/__init__.py b/torchvision/ops/__init__.py
index 827505b842d..80aed924779 100644
--- a/torchvision/ops/__init__.py
+++ b/torchvision/ops/__init__.py
@@ -9,6 +9,7 @@
     distance_box_iou,
     generalized_box_iou,
     masks_to_boxes,
+    masks_to_boundaries,
     nms,
     remove_small_boxes,
 )
@@ -32,6 +33,7 @@
 
 __all__ = [
     "masks_to_boxes",
+    "masks_to_boundaries",
     "deform_conv2d",
     "DeformConv2d",
     "nms",
diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py
index 96631278d48..91918026c97 100644
--- a/torchvision/ops/boxes.py
+++ b/torchvision/ops/boxes.py
@@ -1,6 +1,7 @@
 from typing import Tuple
 
 import torch
+import torch.nn.functional as F
 import torchvision
 from torch import Tensor
 from torchvision.extension import _assert_has_ops
@@ -379,7 +380,6 @@ def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tenso
 
 
 def _box_diou_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tuple[Tensor, Tensor]:
-
     iou = box_iou(boxes1, boxes2)
     lti = torch.min(boxes1[:, None, :2], boxes2[:, :2])
     rbi = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
@@ -399,6 +399,45 @@ def _box_diou_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tuple[Te
     return iou - (centers_distance_squared / diagonal_distance_squared), iou
 
 
+def masks_to_boundaries(masks: torch.Tensor, kernel_size: int) -> torch.Tensor:
+    """
+    Compute the boundaries around the provided binary masks using morphological operations with a custom structuring element.
+    Enforces the use of an odd-sized kernel for the structuring element.
+
+    Parameters:
+    - masks: Input binary masks tensor of shape [N, H, W].
+    - kernel_size: Size of the kernel for the structuring element, must be odd.
+
+    Returns:
+    - Tensor representing the boundaries of the masks with shape [N, H, W].
+    """
+    if masks.numel() == 0:
+        return torch.zeros_like(masks)
+
+    # Ensure kernel_size is odd
+    if kernel_size % 2 == 0:
+        raise ValueError("kernel_size must be odd.")
+
+    # Define the structuring element based on kernel_size
+    selem = torch.ones((1, 1, kernel_size, kernel_size), dtype=torch.float32, device=masks.device)
+
+    masks_float = masks.float().unsqueeze(1)
+
+    # Apply convolution with the structuring element
+    padding = (kernel_size - 1) // 2
+    eroded_masks = F.conv2d(masks_float, selem, padding=padding, stride=1)
+    eroded_masks = eroded_masks.squeeze(1)  # Remove channel dimension after convolution
+
+    # Thresholding: a pixel in the eroded mask should be set if the convolution result
+    # is equal to the sum of the structuring element (i.e., all ones in the kernel)
+    threshold = torch.sum(selem).item()
+    eroded_masks = (eroded_masks == threshold).float()
+
+    contours = torch.logical_xor(masks, eroded_masks.bool())
+
+    return contours
+
+
 def masks_to_boxes(masks: torch.Tensor) -> torch.Tensor:
     """
     Compute the bounding boxes around the provided masks.