diff --git a/lite/examples/object-detection.ipynb b/lite/examples/object-detection.ipynb
index 5fb06d7a0..50189079f 100644
--- a/lite/examples/object-detection.ipynb
+++ b/lite/examples/object-detection.ipynb
@@ -959,7 +959,7 @@
       "id": "98edc4dd",
       "metadata": {},
       "source": [
-        "### Hallucinations"
+        "### Unmatched Predictions"
       ]
     },
     {
@@ -1764,7 +1764,7 @@
         }
       ],
       "source": [
-        "metric.value[\"hallucinations\"]"
+        "metric.value[\"unmatched_predictions\"]"
       ]
     },
     {
@@ -1773,7 +1773,7 @@
       "id": "415335e4",
       "metadata": {},
       "source": [
-        "### Ground Truths Missing Predictions"
+        "### Unmatched Ground Truths"
       ]
     },
     {
@@ -2995,7 +2995,7 @@
         }
       ],
       "source": [
-        "metric.value[\"missing_predictions\"]"
+        "metric.value[\"unmatched_ground_truths\"]"
       ]
     }
   ],
diff --git a/lite/examples/tabular_classification.ipynb b/lite/examples/tabular_classification.ipynb
index ed06e48c2..db468ee8e 100644
--- a/lite/examples/tabular_classification.ipynb
+++ b/lite/examples/tabular_classification.ipynb
@@ -605,7 +605,7 @@
     }
    ],
    "source": [
-    "cm.value[\"missing_predictions\"]"
+    "cm.value[\"unmatched_ground_truths\"]"
    ]
   }
  ],
diff --git a/lite/tests/classification/test_confusion_matrix.py b/lite/tests/classification/test_confusion_matrix.py
index e4a98758e..f42c3dafc 100644
--- a/lite/tests/classification/test_confusion_matrix.py
+++ b/lite/tests/classification/test_confusion_matrix.py
@@ -40,7 +40,7 @@ def test_compute_confusion_matrix():
 
     score_thresholds = np.array([0.25, 0.75], dtype=np.float64)
 
-    confusion_matrix, missing_predictions = compute_confusion_matrix(
+    confusion_matrix, unmatched_ground_truths = compute_confusion_matrix(
         data=data,
         label_metadata=label_metadata,
         score_thresholds=score_thresholds,
@@ -74,15 +74,15 @@ def test_compute_confusion_matrix():
         )
     ).all()
 
-    assert missing_predictions.shape == (2, 4, 1)
+    assert unmatched_ground_truths.shape == (2, 4, 1)
     assert (
         # score >= 0.25
-        missing_predictions[0, :, 0]
+        unmatched_ground_truths[0, :, 0]
         == np.array([-1.0, -1.0, -1.0, -1.0])
     ).all()
     assert (
         # score >= 0.75
-        missing_predictions[1, :, 0]
+        unmatched_ground_truths[1, :, 0]
         == np.array([-1.0, -1.0, -1.0, 1.0])
     ).all()
 
@@ -144,7 +144,7 @@ def test_confusion_matrix_basic(basic_classifications: list[Classification]):
                         }
                     },
                 },
-                "missing_predictions": {},
+                "unmatched_ground_truths": {},
             },
             "parameters": {
                 "score_threshold": 0.25,
@@ -166,7 +166,7 @@ def test_confusion_matrix_basic(basic_classifications: list[Classification]):
                         },
                     }
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "3": {"count": 1, "examples": [{"datum": "uid2"}]}
                 },
             },
@@ -179,7 +179,7 @@ def test_confusion_matrix_basic(basic_classifications: list[Classification]):
     for m in actual_metrics:
         _filter_elements_with_zero_count(
             cm=m["value"]["confusion_matrix"],
-            mp=m["value"]["missing_predictions"],
+            mp=m["value"]["unmatched_ground_truths"],
         )
         assert m in expected_metrics
     for m in expected_metrics:
@@ -212,7 +212,7 @@ def test_confusion_matrix_unit(
                     "1": {"1": {"count": 1, "examples": []}},
                     "2": {"1": {"count": 2, "examples": []}},
                 },
-                "missing_predictions": {},
+                "unmatched_ground_truths": {},
             },
             "parameters": {
                 "score_threshold": 0.5,
@@ -223,7 +223,7 @@ def test_confusion_matrix_unit(
     for m in actual_metrics:
         _filter_elements_with_zero_count(
             cm=m["value"]["confusion_matrix"],
-            mp=m["value"]["missing_predictions"],
+            mp=m["value"]["unmatched_ground_truths"],
         )
         assert m in expected_metrics
     for m in expected_metrics:
@@ -282,7 +282,7 @@ def test_confusion_matrix_with_animal_example(
                         }
                     },
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "dog": {"count": 1, "examples": [{"datum": "uid5"}]}
                 },
             },
@@ -295,7 +295,7 @@ def test_confusion_matrix_with_animal_example(
     for m in actual_metrics:
         _filter_elements_with_zero_count(
             cm=m["value"]["confusion_matrix"],
-            mp=m["value"]["missing_predictions"],
+            mp=m["value"]["unmatched_ground_truths"],
         )
         assert m in expected_metrics
     for m in expected_metrics:
@@ -356,7 +356,7 @@ def test_confusion_matrix_with_color_example(
                         }
                     },
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "red": {"count": 1, "examples": [{"datum": "uid2"}]}
                 },
             },
@@ -369,7 +369,7 @@ def test_confusion_matrix_with_color_example(
     for m in actual_metrics:
         _filter_elements_with_zero_count(
             cm=m["value"]["confusion_matrix"],
-            mp=m["value"]["missing_predictions"],
+            mp=m["value"]["unmatched_ground_truths"],
         )
         assert m in expected_metrics
     for m in expected_metrics:
@@ -438,7 +438,7 @@ def test_confusion_matrix_multiclass(
                         }
                     },
                 },
-                "missing_predictions": {},
+                "unmatched_ground_truths": {},
             },
             "parameters": {
                 "score_threshold": 0.05,
@@ -466,7 +466,7 @@ def test_confusion_matrix_multiclass(
                         }
                     },
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "cat": {
                         "count": 2,
                         "examples": [{"datum": "uid0"}, {"datum": "uid2"}],
@@ -483,7 +483,7 @@ def test_confusion_matrix_multiclass(
             "type": "ConfusionMatrix",
             "value": {
                 "confusion_matrix": {},
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "cat": {
                         "count": 2,
                         "examples": [{"datum": "uid0"}, {"datum": "uid2"}],
@@ -504,7 +504,7 @@ def test_confusion_matrix_multiclass(
     for m in actual_metrics:
         _filter_elements_with_zero_count(
             cm=m["value"]["confusion_matrix"],
-            mp=m["value"]["missing_predictions"],
+            mp=m["value"]["unmatched_ground_truths"],
         )
         assert m in expected_metrics
     for m in expected_metrics:
@@ -560,7 +560,7 @@ def test_confusion_matrix_without_hardmax_animal_example(
                         },
                     }
                 },
-                "missing_predictions": {},
+                "unmatched_ground_truths": {},
             },
             "parameters": {
                 "score_threshold": 0.05,
@@ -580,7 +580,7 @@ def test_confusion_matrix_without_hardmax_animal_example(
                         }
                     }
                 },
-                "missing_predictions": {},
+                "unmatched_ground_truths": {},
             },
             "parameters": {
                 "score_threshold": 0.4,
@@ -591,7 +591,7 @@ def test_confusion_matrix_without_hardmax_animal_example(
             "type": "ConfusionMatrix",
             "value": {
                 "confusion_matrix": {},
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "ant": {
                         "count": 1,
                         "examples": [
@@ -611,7 +611,7 @@ def test_confusion_matrix_without_hardmax_animal_example(
     for m in actual_metrics:
         _filter_elements_with_zero_count(
             cm=m["value"]["confusion_matrix"],
-            mp=m["value"]["missing_predictions"],
+            mp=m["value"]["unmatched_ground_truths"],
         )
         assert m in expected_metrics
     for m in expected_metrics:
diff --git a/lite/tests/classification/test_dataloader.py b/lite/tests/classification/test_dataloader.py
index 26dee6562..4fa2ae842 100644
--- a/lite/tests/classification/test_dataloader.py
+++ b/lite/tests/classification/test_dataloader.py
@@ -8,7 +8,7 @@ def test_no_data():
         loader.finalize()
 
 
-def test_missing_predictions(
+def test_unmatched_ground_truths(
     classifications_no_predictions: list[Classification],
 ):
     loader = DataLoader()
diff --git a/lite/tests/object_detection/conftest.py b/lite/tests/object_detection/conftest.py
index ed32b6434..2eec09766 100644
--- a/lite/tests/object_detection/conftest.py
+++ b/lite/tests/object_detection/conftest.py
@@ -704,7 +704,7 @@ def false_negatives_two_images_one_only_with_different_class_high_confidence_of_
 
 
 @pytest.fixture
-def detections_fp_hallucination_edge_case() -> list[Detection]:
+def detections_fp_unmatched_prediction_edge_case() -> list[Detection]:
     return [
         Detection(
             uid="uid1",
@@ -1093,7 +1093,7 @@ def detections_for_detailed_counting(
                     xmax=rect4[1],
                     ymin=rect4[2],
                     ymax=rect4[3],
-                    labels=["hallucination"],
+                    labels=["no_overlap"],
                     scores=[0.1],
                 ),
             ],
diff --git a/lite/tests/object_detection/test_accuracy.py b/lite/tests/object_detection/test_accuracy.py
index 16d3be78a..f62dd8f2f 100644
--- a/lite/tests/object_detection/test_accuracy.py
+++ b/lite/tests/object_detection/test_accuracy.py
@@ -95,9 +95,9 @@ def test_accuracy_metrics_first_class(
     groundtruths
         datum uid1
             box 1 - label v1 - tp
-            box 3 - label v2 - fn missing prediction
+            box 3 - label v2 - fn unmatched ground truths
         datum uid2
-            box 2 - label v1 - fn missing prediction
+            box 2 - label v1 - fn unmatched ground truths
 
     predictions
         datum uid1
@@ -176,7 +176,7 @@ def test_accuracy_metrics_second_class(
 
     groundtruths
         datum uid1
-            box 3 - label v2 - fn missing prediction
+            box 3 - label v2 - fn unmatched ground truths
         datum uid2
            none
     predictions
diff --git a/lite/tests/object_detection/test_average_precision.py b/lite/tests/object_detection/test_average_precision.py
index 947b2f403..68c7dc24c 100644
--- a/lite/tests/object_detection/test_average_precision.py
+++ b/lite/tests/object_detection/test_average_precision.py
@@ -72,7 +72,7 @@ def test_ap_metrics_first_class(
         datum uid1
             box 1 - label v1 - tp
         datum uid2
-            box 2 - label v1 - fn missing prediction
+            box 2 - label v1 - fn unmatched ground truths
 
     predictions
         datum uid1
@@ -192,7 +192,7 @@ def test_ap_metrics_second_class(
 
     groundtruths
         datum uid1
-            box 3 - label v2 - fn missing prediction
+            box 3 - label v2 - fn unmatched ground truths
         datum uid2
            none
     predictions
diff --git a/lite/tests/object_detection/test_average_recall.py b/lite/tests/object_detection/test_average_recall.py
index 3d3472c27..2bccef4e4 100644
--- a/lite/tests/object_detection/test_average_recall.py
+++ b/lite/tests/object_detection/test_average_recall.py
@@ -86,7 +86,7 @@ def test_ar_metrics_first_class(
         datum uid1
             box 1 - label v1 - tp
         datum uid2
-            box 2 - label v1 - fn missing prediction
+            box 2 - label v1 - fn unmatched ground truths
 
     predictions
         datum uid1
@@ -198,7 +198,7 @@ def test_ar_metrics_second_class(
 
     groundtruths
         datum uid1
-            box 3 - label v2 - fn missing prediction
+            box 3 - label v2 - fn unmatched ground truths
         datum uid2
            none
     predictions
diff --git a/lite/tests/object_detection/test_confusion_matrix.py b/lite/tests/object_detection/test_confusion_matrix.py
index 85e29e481..d2668327d 100644
--- a/lite/tests/object_detection/test_confusion_matrix.py
+++ b/lite/tests/object_detection/test_confusion_matrix.py
@@ -35,8 +35,8 @@ def _test_compute_confusion_matrix(
 
     (
         confusion_matrix,
-        hallucinations,
-        missing_predictions,
+        unmatched_predictions,
+        unmatched_ground_truths,
     ) = compute_confusion_matrix(
         data=sorted_pairs,
         label_metadata=label_metadata,
@@ -52,37 +52,37 @@ def _test_compute_confusion_matrix(
         2,
         1 + n_examples * 4,
     )  # iou, score, gt label, pd label, metrics
-    assert hallucinations.shape == (
+    assert unmatched_predictions.shape == (
         1,
         100,
         2,
         1 + n_examples * 3,
     )  # iou, score, pd label, metrics
-    assert missing_predictions.shape == (
+    assert unmatched_ground_truths.shape == (
         1,
         100,
         2,
         1 + n_examples * 2,
     )  # iou, score, gt label, metrics
 
-    return (confusion_matrix, hallucinations, missing_predictions)
+    return (confusion_matrix, unmatched_predictions, unmatched_ground_truths)
 
 
 def test_compute_confusion_matrix():
 
     (
         confusion_matrix,
-        hallucinations,
-        missing_predictions,
+        unmatched_predictions,
+        unmatched_ground_truths,
     ) = _test_compute_confusion_matrix(n_examples=0)
 
     """
     @ iou=0.5, score<0.1
     3x tp
     1x fp misclassification
-    1x fp hallucination
+    1x fp unmatched prediction
     0x fn misclassification
-    1x fn missing prediction
+    1x fn unmatched ground truth
     """
 
     indices = slice(10)
@@ -98,25 +98,26 @@ def test_compute_confusion_matrix():
 
     hal_pd0 = np.array([1.0])
     hal_pd1 = np.array([-1.0])
-    expected_hallucinations = np.array([hal_pd0, hal_pd1])
+    expected_unmatched_predictions = np.array([hal_pd0, hal_pd1])
     assert np.isclose(
-        hallucinations[0, indices, :, :], expected_hallucinations
+        unmatched_predictions[0, indices, :, :], expected_unmatched_predictions
     ).all()
 
     misprd_gt0 = np.array([1.0])
     misprd_gt1 = np.array([-1.0])
-    expected_missing_predictions = np.array([misprd_gt0, misprd_gt1])
+    expected_unmatched_ground_truths = np.array([misprd_gt0, misprd_gt1])
     assert np.isclose(
-        missing_predictions[0, indices, :, :], expected_missing_predictions
+        unmatched_ground_truths[0, indices, :, :],
+        expected_unmatched_ground_truths,
     ).all()
 
     """
     @ iou=0.5, 0.1 <= score < 0.65
     1x tp
     1x fp misclassification
-    1x fp hallucination
+    1x fp unmatched prediction
     1x fn misclassification
-    3x fn missing prediction
+    3x fn unmatched ground truth
     """
 
     indices = slice(10, 65)
@@ -132,25 +133,26 @@ def test_compute_confusion_matrix():
 
     hal_pd0 = np.array([1.0])
     hal_pd1 = np.array([-1.0])
-    expected_hallucinations = np.array([hal_pd0, hal_pd1])
+    expected_unmatched_predictions = np.array([hal_pd0, hal_pd1])
     assert np.isclose(
-        hallucinations[0, indices, :, :], expected_hallucinations
+        unmatched_predictions[0, indices, :, :], expected_unmatched_predictions
     ).all()
 
     misprd_gt0 = np.array([3.0])
     misprd_gt1 = np.array([-1.0])
-    expected_missing_predictions = np.array([misprd_gt0, misprd_gt1])
+    expected_unmatched_ground_truths = np.array([misprd_gt0, misprd_gt1])
     assert np.isclose(
-        missing_predictions[0, indices, :, :], expected_missing_predictions
+        unmatched_ground_truths[0, indices, :, :],
+        expected_unmatched_ground_truths,
     ).all()
 
     """
     @ iou=0.5, 0.65 <= score < 0.9
     1x tp
     1x fp misclassification
-    0x fp hallucination
+    0x fp unmatched prediction
     1x fn misclassification
-    3x fn missing prediction
+    3x fn unmatched ground truth
     """
 
     indices = slice(65, 90)
@@ -166,25 +168,26 @@ def test_compute_confusion_matrix():
 
     hal_pd0 = np.array([-1.0])
     hal_pd1 = np.array([-1.0])
-    expected_hallucinations = np.array([hal_pd0, hal_pd1])
+    expected_unmatched_predictions = np.array([hal_pd0, hal_pd1])
     assert np.isclose(
-        hallucinations[0, indices, :, :], expected_hallucinations
+        unmatched_predictions[0, indices, :, :], expected_unmatched_predictions
     ).all()
 
     misprd_gt0 = np.array([3.0])
     misprd_gt1 = np.array([-1.0])
-    expected_missing_predictions = np.array([misprd_gt0, misprd_gt1])
+    expected_unmatched_ground_truths = np.array([misprd_gt0, misprd_gt1])
     assert np.isclose(
-        missing_predictions[0, indices, :, :], expected_missing_predictions
+        unmatched_ground_truths[0, indices, :, :],
+        expected_unmatched_ground_truths,
     ).all()
 
     """
     @ iou=0.5, score>=0.9
     0x tp
     0x fp misclassification
-    0x fp hallucination
+    0x fp unmatched prediction
     0x fn misclassification
-    4x fn missing prediction
+    4x fn unmatched ground truth
     """
 
     indices = slice(90, None)
@@ -200,16 +203,17 @@ def test_compute_confusion_matrix():
 
     hal_pd0 = np.array([-1.0])
     hal_pd1 = np.array([-1.0])
-    expected_hallucinations = np.array([hal_pd0, hal_pd1])
+    expected_unmatched_predictions = np.array([hal_pd0, hal_pd1])
     assert np.isclose(
-        hallucinations[0, indices, :, :], expected_hallucinations
+        unmatched_predictions[0, indices, :, :], expected_unmatched_predictions
     ).all()
 
     misprd_gt0 = np.array([4.0])
     misprd_gt1 = np.array([1.0])
-    expected_missing_predictions = np.array([misprd_gt0, misprd_gt1])
+    expected_unmatched_ground_truths = np.array([misprd_gt0, misprd_gt1])
     assert np.isclose(
-        missing_predictions[0, indices, :, :], expected_missing_predictions
+        unmatched_ground_truths[0, indices, :, :],
+        expected_unmatched_ground_truths,
     ).all()
 
 
@@ -217,17 +221,17 @@ def test_compute_confusion_matrix_with_examples():
 
     (
         confusion_matrix,
-        hallucinations,
-        missing_predictions,
+        unmatched_predictions,
+        unmatched_ground_truths,
     ) = _test_compute_confusion_matrix(n_examples=2)
 
     """
     @ iou=0.5, score<0.1
     3x tp
     1x fp misclassification
-    1x fp hallucination
+    1x fp unmatched prediction
     0x fn misclassification
-    1x fn missing prediction
+    1x fn unmatched ground truth
     """
 
     indices = slice(10)
@@ -249,26 +253,27 @@ def test_compute_confusion_matrix_with_examples():
     # total count, datum 0, pd 0, score 0, datum 1, pd 1, score 1
     hal_pd0 = np.array([1.0, 2.0, 4.0, 0.65, -1.0, -1.0, -1.0])
     hal_pd1 = np.array([-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0])
-    expected_hallucinations = np.array([hal_pd0, hal_pd1])
+    expected_unmatched_predictions = np.array([hal_pd0, hal_pd1])
     assert np.isclose(
-        hallucinations[0, indices, :, :], expected_hallucinations
+        unmatched_predictions[0, indices, :, :], expected_unmatched_predictions
     ).all()
 
     # total count, datum 0, gt 0, datum1, gt 1
     misprd_gt0 = np.array([1.0, 4.0, 5.0, -1.0, -1.0])
     misprd_gt1 = np.array([-1.0, -1.0, -1.0, -1.0, -1.0])
-    expected_missing_predictions = np.array([misprd_gt0, misprd_gt1])
+    expected_unmatched_ground_truths = np.array([misprd_gt0, misprd_gt1])
     assert np.isclose(
-        missing_predictions[0, indices, :, :], expected_missing_predictions
+        unmatched_ground_truths[0, indices, :, :],
+        expected_unmatched_ground_truths,
     ).all()
 
     """
     @ iou=0.5, 0.1 <= score < 0.65
     1x tp
     1x fp misclassification
-    1x fp hallucination
+    1x fp unmatched prediction
     1x fn misclassification
-    2x fn missing prediction
+    2x fn unmatched ground truth
     """
 
     indices = slice(10, 65)
@@ -290,26 +295,27 @@ def test_compute_confusion_matrix_with_examples():
     # total count, datum 0, pd 0, score 0, datum 1, pd 1, score 1
     hal_pd0 = np.array([1.0, 2.0, 4.0, 0.65, -1.0, -1.0, -1.0])
     hal_pd1 = np.array([-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0])
-    expected_hallucinations = np.array([hal_pd0, hal_pd1])
+    expected_unmatched_predictions = np.array([hal_pd0, hal_pd1])
     assert np.isclose(
-        hallucinations[0, indices, :, :], expected_hallucinations
+        unmatched_predictions[0, indices, :, :], expected_unmatched_predictions
     ).all()
 
     # total count, datum 0, gt 0, datum1, gt 1
     misprd_gt0 = np.array([3.0, 1.0, 2.0, 3.0, 4.0])
     misprd_gt1 = np.array([-1.0, -1.0, -1.0, -1.0, -1.0])
-    expected_missing_predictions = np.array([misprd_gt0, misprd_gt1])
+    expected_unmatched_ground_truths = np.array([misprd_gt0, misprd_gt1])
     assert np.isclose(
-        missing_predictions[0, indices, :, :], expected_missing_predictions
+        unmatched_ground_truths[0, indices, :, :],
+        expected_unmatched_ground_truths,
     ).all()
 
     """
     @ iou=0.5, 0.65 <= score < 0.9
     1x tp
     1x fp misclassification
-    0x fp hallucination
+    0x fp unmatched prediction
     1x fn misclassification
-    2x fn missing prediction
+    2x fn unmatched ground truth
     """
 
     indices = slice(65, 90)
@@ -331,26 +337,27 @@ def test_compute_confusion_matrix_with_examples():
     # total count, datum 0, pd 0, score 0, datum 1, pd 1, score 1
     hal_pd0 = np.array([-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0])
     hal_pd1 = np.array([-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0])
-    expected_hallucinations = np.array([hal_pd0, hal_pd1])
+    expected_unmatched_predictions = np.array([hal_pd0, hal_pd1])
     assert np.isclose(
-        hallucinations[0, indices, :, :], expected_hallucinations
+        unmatched_predictions[0, indices, :, :], expected_unmatched_predictions
     ).all()
 
     # total count, datum 0, gt 0, datum1, gt 1
     misprd_gt0 = np.array([3.0, 1.0, 2.0, 3.0, 4.0])
     misprd_gt1 = np.array([-1.0, -1.0, -1.0, -1.0, -1.0])
-    expected_missing_predictions = np.array([misprd_gt0, misprd_gt1])
+    expected_unmatched_ground_truths = np.array([misprd_gt0, misprd_gt1])
     assert np.isclose(
-        missing_predictions[0, indices, :, :], expected_missing_predictions
+        unmatched_ground_truths[0, indices, :, :],
+        expected_unmatched_ground_truths,
     ).all()
 
     """
     @ iou=0.5, score>=0.9
     0x tp
     0x fp misclassification
-    0x fp hallucination
+    0x fp unmatched prediction
     0x fn misclassification
-    4x fn missing prediction
+    4x fn unmatched ground truth
     """
 
     indices = slice(90, None)
@@ -376,17 +383,18 @@ def test_compute_confusion_matrix_with_examples():
     # total count, datum 0, pd 0, score 0, datum 1, pd 1, score 1
     hal_pd0 = np.array([-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0])
     hal_pd1 = np.array([-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0])
-    expected_hallucinations = np.array([hal_pd0, hal_pd1])
+    expected_unmatched_predictions = np.array([hal_pd0, hal_pd1])
     assert np.isclose(
-        hallucinations[0, indices, :, :], expected_hallucinations
+        unmatched_predictions[0, indices, :, :], expected_unmatched_predictions
     ).all()
 
     # total count, datum 0, gt 0, datum1, gt 1
     misprd_gt0 = np.array([4.0, 0.0, 0.0, 1.0, 2.0])
     misprd_gt1 = np.array([1.0, 1.0, 1.0, -1.0, -1.0])
-    expected_missing_predictions = np.array([misprd_gt0, misprd_gt1])
+    expected_unmatched_ground_truths = np.array([misprd_gt0, misprd_gt1])
     assert np.isclose(
-        missing_predictions[0, indices, :, :], expected_missing_predictions
+        unmatched_ground_truths[0, indices, :, :],
+        expected_unmatched_ground_truths,
     ).all()
 
 
@@ -424,7 +432,7 @@ def test_confusion_matrix(
 
     assert evaluator.ignored_prediction_labels == [
         "not_v2",
-        "hallucination",
+        "no_overlap",
     ]
     assert evaluator.missing_prediction_labels == [
         "missed_detection",
@@ -467,7 +475,7 @@ def test_confusion_matrix(
                         }
                     }
                 },
-                "hallucinations": {
+                "unmatched_predictions": {
                     "not_v2": {
                         "count": 1,
                         "examples": [
@@ -478,7 +486,7 @@ def test_confusion_matrix(
                             }
                         ],
                     },
-                    "hallucination": {
+                    "no_overlap": {
                         "count": 1,
                         "examples": [
                             {
@@ -499,7 +507,7 @@ def test_confusion_matrix(
                         ],
                     },
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "missed_detection": {
                         "count": 1,
                         "examples": [
@@ -544,7 +552,7 @@ def test_confusion_matrix(
                         }
                     }
                 },
-                "hallucinations": {
+                "unmatched_predictions": {
                     "not_v2": {
                         "count": 1,
                         "examples": [
@@ -566,7 +574,7 @@ def test_confusion_matrix(
                         ],
                     },
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "missed_detection": {
                         "count": 1,
                         "examples": [
@@ -611,7 +619,7 @@ def test_confusion_matrix(
                         }
                     }
                 },
-                "hallucinations": {
+                "unmatched_predictions": {
                     "low_iou": {
                         "count": 1,
                         "examples": [
@@ -623,7 +631,7 @@ def test_confusion_matrix(
                         ],
                     }
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "missed_detection": {
                         "count": 1,
                         "examples": [
@@ -668,7 +676,7 @@ def test_confusion_matrix(
                         }
                     }
                 },
-                "hallucinations": {
+                "unmatched_predictions": {
                     "low_iou": {
                         "count": 1,
                         "examples": [
@@ -680,7 +688,7 @@ def test_confusion_matrix(
                         ],
                     }
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "missed_detection": {
                         "count": 1,
                         "examples": [
@@ -711,8 +719,8 @@ def test_confusion_matrix(
             "type": "ConfusionMatrix",
             "value": {
                 "confusion_matrix": {},
-                "hallucinations": {},
-                "missing_predictions": {
+                "unmatched_predictions": {},
+                "unmatched_ground_truths": {
                     "v1": {
                         "count": 1,
                         "examples": [
@@ -749,8 +757,8 @@ def test_confusion_matrix(
             "type": "ConfusionMatrix",
             "value": {
                 "confusion_matrix": {},
-                "hallucinations": {},
-                "missing_predictions": {
+                "unmatched_predictions": {},
+                "unmatched_ground_truths": {
                     "v1": {
                         "count": 1,
                         "examples": [
@@ -787,8 +795,8 @@ def test_confusion_matrix(
     for m in actual_metrics:
         _filter_out_zero_counts(
             m["value"]["confusion_matrix"],
-            m["value"]["hallucinations"],
-            m["value"]["missing_predictions"],
+            m["value"]["unmatched_predictions"],
+            m["value"]["unmatched_ground_truths"],
         )
         assert m in expected_metrics
     for m in expected_metrics:
@@ -835,8 +843,8 @@ def test_confusion_matrix(
                         }
                     },
                 },
-                "hallucinations": {
-                    "hallucination": {
+                "unmatched_predictions": {
+                    "no_overlap": {
                         "count": 1,
                         "examples": [
                             {
@@ -857,7 +865,7 @@ def test_confusion_matrix(
                         ],
                     },
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "missed_detection": {
                         "count": 1,
                         "examples": [
@@ -912,7 +920,7 @@ def test_confusion_matrix(
                         }
                     },
                 },
-                "hallucinations": {
+                "unmatched_predictions": {
                     "low_iou": {
                         "count": 1,
                         "examples": [
@@ -924,7 +932,7 @@ def test_confusion_matrix(
                         ],
                     }
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "missed_detection": {
                         "count": 1,
                         "examples": [
@@ -966,7 +974,7 @@ def test_confusion_matrix(
                         }
                     }
                 },
-                "hallucinations": {
+                "unmatched_predictions": {
                     "low_iou": {
                         "count": 1,
                         "examples": [
@@ -978,7 +986,7 @@ def test_confusion_matrix(
                         ],
                     }
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "missed_detection": {
                         "count": 1,
                         "examples": [
@@ -1026,7 +1034,7 @@ def test_confusion_matrix(
                         }
                     }
                 },
-                "hallucinations": {
+                "unmatched_predictions": {
                     "low_iou": {
                         "count": 1,
                         "examples": [
@@ -1038,7 +1046,7 @@ def test_confusion_matrix(
                         ],
                     }
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "missed_detection": {
                         "count": 1,
                         "examples": [
@@ -1072,8 +1080,8 @@ def test_confusion_matrix(
             "type": "ConfusionMatrix",
             "value": {
                 "confusion_matrix": {},
-                "hallucinations": {},
-                "missing_predictions": {
+                "unmatched_predictions": {},
+                "unmatched_ground_truths": {
                     "v1": {
                         "count": 1,
                         "examples": [
@@ -1116,8 +1124,8 @@ def test_confusion_matrix(
             "type": "ConfusionMatrix",
             "value": {
                 "confusion_matrix": {},
-                "hallucinations": {},
-                "missing_predictions": {
+                "unmatched_predictions": {},
+                "unmatched_ground_truths": {
                     "v1": {
                         "count": 1,
                         "examples": [
@@ -1161,8 +1169,8 @@ def test_confusion_matrix(
     for m in actual_metrics:
         _filter_out_zero_counts(
             m["value"]["confusion_matrix"],
-            m["value"]["hallucinations"],
-            m["value"]["missing_predictions"],
+            m["value"]["unmatched_predictions"],
+            m["value"]["unmatched_ground_truths"],
         )
         assert m in expected_metrics
     for m in expected_metrics:
@@ -1210,8 +1218,10 @@ def test_confusion_matrix_using_torch_metrics_example(
                     "0": {"0": {"count": 5, "examples": []}},
                     "49": {"49": {"count": 9, "examples": []}},
                 },
-                "hallucinations": {},
-                "missing_predictions": {"49": {"count": 1, "examples": []}},
+                "unmatched_predictions": {},
+                "unmatched_ground_truths": {
+                    "49": {"count": 1, "examples": []}
+                },
             },
             "parameters": {
                 "score_threshold": 0.05,
@@ -1232,8 +1242,8 @@ def test_confusion_matrix_using_torch_metrics_example(
                     "0": {"0": {"count": 5, "examples": []}},
                     "49": {"49": {"count": 6, "examples": []}},
                 },
-                "hallucinations": {},
-                "missing_predictions": {
+                "unmatched_predictions": {},
+                "unmatched_ground_truths": {
                     "4": {"count": 1, "examples": []},
                     "49": {"count": 4, "examples": []},
                 },
@@ -1253,8 +1263,8 @@ def test_confusion_matrix_using_torch_metrics_example(
                     "0": {"0": {"count": 4, "examples": []}},
                     "49": {"49": {"count": 4, "examples": []}},
                 },
-                "hallucinations": {},
-                "missing_predictions": {
+                "unmatched_predictions": {},
+                "unmatched_ground_truths": {
                     "4": {"count": 1, "examples": []},
                     "2": {"count": 1, "examples": []},
                     "1": {"count": 1, "examples": []},
@@ -1276,8 +1286,8 @@ def test_confusion_matrix_using_torch_metrics_example(
                     "0": {"0": {"count": 3, "examples": []}},
                     "49": {"49": {"count": 3, "examples": []}},
                 },
-                "hallucinations": {},
-                "missing_predictions": {
+                "unmatched_predictions": {},
+                "unmatched_ground_truths": {
                     "4": {"count": 2, "examples": []},
                     "2": {"count": 1, "examples": []},
                     "1": {"count": 1, "examples": []},
@@ -1298,8 +1308,8 @@ def test_confusion_matrix_using_torch_metrics_example(
                     "0": {"0": {"count": 2, "examples": []}},
                     "49": {"49": {"count": 2, "examples": []}},
                 },
-                "hallucinations": {},
-                "missing_predictions": {
+                "unmatched_predictions": {},
+                "unmatched_ground_truths": {
                     "4": {"count": 2, "examples": []},
                     "2": {"count": 2, "examples": []},
                     "1": {"count": 1, "examples": []},
@@ -1320,8 +1330,8 @@ def test_confusion_matrix_using_torch_metrics_example(
                     "0": {"0": {"count": 2, "examples": []}},
                     "49": {"49": {"count": 1, "examples": []}},
                 },
-                "hallucinations": {},
-                "missing_predictions": {
+                "unmatched_predictions": {},
+                "unmatched_ground_truths": {
                     "4": {"count": 2, "examples": []},
                     "2": {"count": 2, "examples": []},
                     "1": {"count": 1, "examples": []},
@@ -1342,8 +1352,8 @@ def test_confusion_matrix_using_torch_metrics_example(
                     "0": {"0": {"count": 1, "examples": []}},
                     "49": {"49": {"count": 1, "examples": []}},
                 },
-                "hallucinations": {},
-                "missing_predictions": {
+                "unmatched_predictions": {},
+                "unmatched_ground_truths": {
                     "4": {"count": 2, "examples": []},
                     "2": {"count": 2, "examples": []},
                     "1": {"count": 1, "examples": []},
@@ -1361,8 +1371,8 @@ def test_confusion_matrix_using_torch_metrics_example(
             "type": "ConfusionMatrix",
             "value": {
                 "confusion_matrix": {"0": {"0": {"count": 1, "examples": []}}},
-                "hallucinations": {},
-                "missing_predictions": {
+                "unmatched_predictions": {},
+                "unmatched_ground_truths": {
                     "4": {"count": 2, "examples": []},
                     "2": {"count": 2, "examples": []},
                     "1": {"count": 1, "examples": []},
@@ -1384,14 +1394,14 @@ def test_confusion_matrix_using_torch_metrics_example(
                     "0": {"0": {"count": 1, "examples": []}},
                     "49": {"49": {"count": 2, "examples": []}},
                 },
-                "hallucinations": {
+                "unmatched_predictions": {
                     "4": {"count": 2, "examples": []},
                     "3": {"count": 1, "examples": []},
                     "1": {"count": 1, "examples": []},
                     "0": {"count": 4, "examples": []},
                     "49": {"count": 7, "examples": []},
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "4": {"count": 2, "examples": []},
                     "2": {"count": 1, "examples": []},
                     "1": {"count": 1, "examples": []},
@@ -1413,14 +1423,14 @@ def test_confusion_matrix_using_torch_metrics_example(
                     "0": {"0": {"count": 1, "examples": []}},
                     "49": {"49": {"count": 2, "examples": []}},
                 },
-                "hallucinations": {
+                "unmatched_predictions": {
                     "4": {"count": 1, "examples": []},
                     "3": {"count": 1, "examples": []},
                     "1": {"count": 1, "examples": []},
                     "0": {"count": 4, "examples": []},
                     "49": {"count": 4, "examples": []},
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "4": {"count": 2, "examples": []},
                     "2": {"count": 1, "examples": []},
                     "1": {"count": 1, "examples": []},
@@ -1441,12 +1451,12 @@ def test_confusion_matrix_using_torch_metrics_example(
                     "2": {"2": {"count": 1, "examples": []}},
                     "49": {"49": {"count": 2, "examples": []}},
                 },
-                "hallucinations": {
+                "unmatched_predictions": {
                     "4": {"count": 1, "examples": []},
                     "0": {"count": 4, "examples": []},
                     "49": {"count": 2, "examples": []},
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "4": {"count": 2, "examples": []},
                     "2": {"count": 1, "examples": []},
                     "1": {"count": 1, "examples": []},
@@ -1467,11 +1477,11 @@ def test_confusion_matrix_using_torch_metrics_example(
                     "2": {"2": {"count": 1, "examples": []}},
                     "49": {"49": {"count": 1, "examples": []}},
                 },
-                "hallucinations": {
+                "unmatched_predictions": {
                     "0": {"count": 3, "examples": []},
                     "49": {"count": 2, "examples": []},
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "4": {"count": 2, "examples": []},
                     "2": {"count": 1, "examples": []},
                     "1": {"count": 1, "examples": []},
@@ -1491,11 +1501,11 @@ def test_confusion_matrix_using_torch_metrics_example(
                 "confusion_matrix": {
                     "49": {"49": {"count": 1, "examples": []}}
                 },
-                "hallucinations": {
+                "unmatched_predictions": {
                     "0": {"count": 2, "examples": []},
                     "49": {"count": 1, "examples": []},
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "4": {"count": 2, "examples": []},
                     "2": {"count": 2, "examples": []},
                     "1": {"count": 1, "examples": []},
@@ -1515,8 +1525,8 @@ def test_confusion_matrix_using_torch_metrics_example(
                 "confusion_matrix": {
                     "49": {"49": {"count": 1, "examples": []}}
                 },
-                "hallucinations": {"0": {"count": 2, "examples": []}},
-                "missing_predictions": {
+                "unmatched_predictions": {"0": {"count": 2, "examples": []}},
+                "unmatched_ground_truths": {
                     "4": {"count": 2, "examples": []},
                     "2": {"count": 2, "examples": []},
                     "1": {"count": 1, "examples": []},
@@ -1536,8 +1546,8 @@ def test_confusion_matrix_using_torch_metrics_example(
                 "confusion_matrix": {
                     "49": {"49": {"count": 1, "examples": []}}
                 },
-                "hallucinations": {"0": {"count": 1, "examples": []}},
-                "missing_predictions": {
+                "unmatched_predictions": {"0": {"count": 1, "examples": []}},
+                "unmatched_ground_truths": {
                     "4": {"count": 2, "examples": []},
                     "2": {"count": 2, "examples": []},
                     "1": {"count": 1, "examples": []},
@@ -1555,8 +1565,8 @@ def test_confusion_matrix_using_torch_metrics_example(
             "type": "ConfusionMatrix",
             "value": {
                 "confusion_matrix": {},
-                "hallucinations": {"0": {"count": 1, "examples": []}},
-                "missing_predictions": {
+                "unmatched_predictions": {"0": {"count": 1, "examples": []}},
+                "unmatched_ground_truths": {
                     "4": {"count": 2, "examples": []},
                     "2": {"count": 2, "examples": []},
                     "1": {"count": 1, "examples": []},
@@ -1574,20 +1584,20 @@ def test_confusion_matrix_using_torch_metrics_example(
     for m in actual_metrics:
         _filter_out_zero_counts(
             m["value"]["confusion_matrix"],
-            m["value"]["hallucinations"],
-            m["value"]["missing_predictions"],
+            m["value"]["unmatched_predictions"],
+            m["value"]["unmatched_ground_truths"],
         )
         assert m in expected_metrics
     for m in expected_metrics:
         assert m in actual_metrics
 
 
-def test_confusion_matrix_fp_hallucination_edge_case(
-    detections_fp_hallucination_edge_case: list[Detection],
+def test_confusion_matrix_fp_unmatched_prediction_edge_case(
+    detections_fp_unmatched_prediction_edge_case: list[Detection],
 ):
 
     loader = DataLoader()
-    loader.add_bounding_boxes(detections_fp_hallucination_edge_case)
+    loader.add_bounding_boxes(detections_fp_unmatched_prediction_edge_case)
     evaluator = loader.finalize()
 
     assert evaluator.ignored_prediction_labels == []
@@ -1635,7 +1645,7 @@ def test_confusion_matrix_fp_hallucination_edge_case(
                         }
                     }
                 },
-                "hallucinations": {
+                "unmatched_predictions": {
                     "v1": {
                         "count": 1,
                         "examples": [
@@ -1652,7 +1662,7 @@ def test_confusion_matrix_fp_hallucination_edge_case(
                         ],
                     }
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "v1": {
                         "count": 1,
                         "examples": [
@@ -1679,8 +1689,8 @@ def test_confusion_matrix_fp_hallucination_edge_case(
             "type": "ConfusionMatrix",
             "value": {
                 "confusion_matrix": {},
-                "hallucinations": {},
-                "missing_predictions": {
+                "unmatched_predictions": {},
+                "unmatched_ground_truths": {
                     "v1": {
                         "count": 2,
                         "examples": [
@@ -1707,8 +1717,8 @@ def test_confusion_matrix_fp_hallucination_edge_case(
     for m in actual_metrics:
         _filter_out_zero_counts(
             m["value"]["confusion_matrix"],
-            m["value"]["hallucinations"],
-            m["value"]["missing_predictions"],
+            m["value"]["unmatched_predictions"],
+            m["value"]["unmatched_ground_truths"],
         )
         assert m in expected_metrics
     for m in expected_metrics:
@@ -1763,11 +1773,11 @@ def test_confusion_matrix_ranked_pair_ordering(
                         "label1": {"label1": {"count": 1, "examples": []}},
                         "label2": {"label2": {"count": 1, "examples": []}},
                     },
-                    "hallucinations": {
+                    "unmatched_predictions": {
                         "label3": {"count": 1, "examples": []},
                         "label4": {"count": 1, "examples": []},
                     },
-                    "missing_predictions": {
+                    "unmatched_ground_truths": {
                         "label3": {"count": 1, "examples": []}
                     },
                 },
@@ -1781,8 +1791,8 @@ def test_confusion_matrix_ranked_pair_ordering(
         for m in actual_metrics:
             _filter_out_zero_counts(
                 m["value"]["confusion_matrix"],
-                m["value"]["hallucinations"],
-                m["value"]["missing_predictions"],
+                m["value"]["unmatched_predictions"],
+                m["value"]["unmatched_ground_truths"],
             )
             assert m in expected_metrics
         for m in expected_metrics:
diff --git a/lite/tests/object_detection/test_counts.py b/lite/tests/object_detection/test_counts.py
index c267e3b82..2ddc55167 100644
--- a/lite/tests/object_detection/test_counts.py
+++ b/lite/tests/object_detection/test_counts.py
@@ -12,7 +12,7 @@ def test_counts_metrics_first_class(
         datum uid1
             box 1 - label v1 - tp
         datum uid2
-            box 2 - label v1 - fn missing prediction
+            box 2 - label v1 - fn unmatched ground truths
 
     predictions
         datum uid1
@@ -112,7 +112,7 @@ def test_counts_metrics_second_class(
 
     groundtruths
         datum uid1
-            box 3 - label v2 - fn missing prediction
+            box 3 - label v2 - fn unmatched ground truths
         datum uid2
            none
     predictions
diff --git a/lite/tests/object_detection/test_f1.py b/lite/tests/object_detection/test_f1.py
index 078581559..20b38deca 100644
--- a/lite/tests/object_detection/test_f1.py
+++ b/lite/tests/object_detection/test_f1.py
@@ -49,9 +49,9 @@ def test_f1_metrics_first_class(
     groundtruths
         datum uid1
             box 1 - label v1 - tp
-            box 3 - label v2 - fn missing prediction
+            box 3 - label v2 - fn unmatched ground truths
         datum uid2
-            box 2 - label v1 - fn missing prediction
+            box 2 - label v1 - fn unmatched ground truths
 
     predictions
         datum uid1
@@ -134,7 +134,7 @@ def test_f1_metrics_second_class(
 
     groundtruths
         datum uid1
-            box 3 - label v2 - fn missing prediction
+            box 3 - label v2 - fn unmatched ground truths
         datum uid2
            none
     predictions
diff --git a/lite/tests/object_detection/test_filtering.py b/lite/tests/object_detection/test_filtering.py
index 4d0daa4e8..33b707bec 100644
--- a/lite/tests/object_detection/test_filtering.py
+++ b/lite/tests/object_detection/test_filtering.py
@@ -68,7 +68,7 @@ def test_filtering_one_detection(one_detection: list[Detection]):
     groundtruths
         datum uid1
             box 1 - label v1 - tp
-            box 3 - label v2 - fn missing prediction
+            box 3 - label v2 - fn unmatched ground truths
 
     predictions
         datum uid1
@@ -184,7 +184,7 @@ def test_filtering_two_detections(two_detections: list[Detection]):
     groundtruths
         datum uid1
             box 1 - label v1 - tp
-            box 3 - label v2 - fn missing prediction
+            box 3 - label v2 - fn unmatched ground truths
         datum uid2
             box 2 - label v1 - fn misclassification
 
@@ -305,12 +305,12 @@ def test_filtering_four_detections(four_detections: list[Detection]):
     groundtruths
         datum uid1
             box 1 - label v1 - tp
-            box 3 - label v2 - fn missing prediction
+            box 3 - label v2 - fn unmatched ground truths
         datum uid2
             box 2 - label v1 - fn misclassification
         datum uid3
             box 1 - label v1 - tp
-            box 3 - label v2 - fn missing prediction
+            box 3 - label v2 - fn unmatched ground truths
         datum uid4
             box 2 - label v1 - fn misclassification
 
@@ -443,12 +443,12 @@ def test_filtering_all_detections(four_detections: list[Detection]):
     groundtruths
         datum uid1
             box 1 - label v1 - tp
-            box 3 - label v2 - fn missing prediction
+            box 3 - label v2 - fn unmatched ground truths
         datum uid2
             box 2 - label v1 - fn misclassification
         datum uid3
             box 1 - label v1 - tp
-            box 3 - label v2 - fn missing prediction
+            box 3 - label v2 - fn unmatched ground truths
         datum uid4
             box 2 - label v1 - fn misclassification
 
diff --git a/lite/tests/object_detection/test_precision.py b/lite/tests/object_detection/test_precision.py
index 0a86d4e5e..9b6ae6868 100644
--- a/lite/tests/object_detection/test_precision.py
+++ b/lite/tests/object_detection/test_precision.py
@@ -49,9 +49,9 @@ def test_precision_metrics_first_class(
     groundtruths
         datum uid1
             box 1 - label v1 - tp
-            box 3 - label v2 - fn missing prediction
+            box 3 - label v2 - fn unmatched ground truths
         datum uid2
-            box 2 - label v1 - fn missing prediction
+            box 2 - label v1 - fn unmatched ground truths
 
     predictions
         datum uid1
@@ -134,7 +134,7 @@ def test_precision_metrics_second_class(
 
     groundtruths
         datum uid1
-            box 3 - label v2 - fn missing prediction
+            box 3 - label v2 - fn unmatched ground truths
         datum uid2
            none
     predictions
diff --git a/lite/tests/object_detection/test_recall.py b/lite/tests/object_detection/test_recall.py
index 662aa00e2..4e210b4d5 100644
--- a/lite/tests/object_detection/test_recall.py
+++ b/lite/tests/object_detection/test_recall.py
@@ -50,7 +50,7 @@ def test_recall_metrics_first_class(
         datum uid1
             box 1 - label v1 - tp
         datum uid2
-            box 2 - label v1 - fn missing prediction
+            box 2 - label v1 - fn unmatched ground truths
 
     predictions
         datum uid1
@@ -133,7 +133,7 @@ def test_recall_metrics_second_class(
 
     groundtruths
         datum uid1
-            box 3 - label v2 - fn missing prediction
+            box 3 - label v2 - fn unmatched ground truths
         datum uid2
            none
     predictions
diff --git a/lite/tests/semantic_segmentation/test_confusion_matrix.py b/lite/tests/semantic_segmentation/test_confusion_matrix.py
index 2ad2afdd5..5b4178b52 100644
--- a/lite/tests/semantic_segmentation/test_confusion_matrix.py
+++ b/lite/tests/semantic_segmentation/test_confusion_matrix.py
@@ -25,11 +25,11 @@ def test_confusion_matrix_basic_segmentations(
                     "v1": {"v1": {"iou": 0.5}, "v2": {"iou": 0.0}},
                     "v2": {"v1": {"iou": 0.0}, "v2": {"iou": 0.5}},
                 },
-                "hallucinations": {
+                "unmatched_predictions": {
                     "v1": {"ratio": 0.0},
                     "v2": {"ratio": 0.5},
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "v1": {"ratio": 0.5},
                     "v2": {"ratio": 0.0},
                 },
@@ -71,13 +71,13 @@ def test_confusion_matrix_segmentations_from_boxes(
                         },
                     },
                 },
-                "hallucinations": {
+                "unmatched_predictions": {
                     "v1": {"ratio": 5000 / 10000},  # 50% overlap
                     "v2": {
                         "ratio": 4999 / 5000
                     },  # overlaps 1 pixel out of 5000 predictions
                 },
-                "missing_predictions": {
+                "unmatched_ground_truths": {
                     "v1": {"ratio": 5000 / 10000},
                     "v2": {
                         "ratio": 14999 / 15000
diff --git a/lite/valor_lite/classification/computation.py b/lite/valor_lite/classification/computation.py
index 04a6fd2af..4a463a74d 100644
--- a/lite/valor_lite/classification/computation.py
+++ b/lite/valor_lite/classification/computation.py
@@ -282,7 +282,7 @@ def compute_confusion_matrix(
     NDArray[np.float64]
         Confusion matrix.
     NDArray[np.int32]
-        Ground truths with missing predictions.
+        Unmatched Ground Truths.
     """
 
     n_labels = label_metadata.shape[0]
@@ -292,7 +292,7 @@ def compute_confusion_matrix(
         (n_scores, n_labels, n_labels, 2 * n_examples + 1),
         dtype=np.float32,
     )
-    missing_predictions = -1 * np.ones(
+    unmatched_ground_truths = -1 * np.ones(
         (n_scores, n_labels, n_examples + 1),
         dtype=np.int32,
     )
@@ -339,7 +339,7 @@ def compute_confusion_matrix(
             score_idx, misclf_labels[:, 0], misclf_labels[:, 1], 0
         ] = misclf_counts
 
-        missing_predictions[score_idx, misprd_labels, 0] = misprd_counts
+        unmatched_ground_truths[score_idx, misprd_labels, 0] = misprd_counts
 
         if n_examples > 0:
             for label_idx in range(n_labels):
@@ -375,16 +375,16 @@ def compute_confusion_matrix(
                                 1 : 2 * misclf_label_examples.shape[0] + 1,
                             ] = misclf_label_examples[:, [0, 3]].flatten()
 
-                # missing prediction examples
+                # unmatched ground truth examples
                 mask_misprd_label = misprd_examples[:, 1] == label_idx
                 if misprd_examples.size > 0:
                     misprd_label_examples = misprd_examples[mask_misprd_label][
                         :n_examples
                     ]
-                    missing_predictions[
+                    unmatched_ground_truths[
                         score_idx,
                         label_idx,
                         1 : misprd_label_examples.shape[0] + 1,
                     ] = misprd_label_examples[:, 0].flatten()
 
-    return confusion_matrix, missing_predictions
+    return confusion_matrix, unmatched_ground_truths
diff --git a/lite/valor_lite/classification/metric.py b/lite/valor_lite/classification/metric.py
index cce5e8a52..3810d48dd 100644
--- a/lite/valor_lite/classification/metric.py
+++ b/lite/valor_lite/classification/metric.py
@@ -321,7 +321,7 @@ def confusion_matrix(
                 ],
             ],
         ],
-        missing_predictions: dict[
+        unmatched_ground_truths: dict[
             str,  # ground truth label value
             dict[
                 str,  # either `count` or `examples`
@@ -335,8 +335,8 @@ def confusion_matrix(
         The confusion matrix and related metrics for the classification task.
 
         This class encapsulates detailed information about the model's performance, including correct
-        predictions, misclassifications, hallucinations (false positives), and missing predictions
-        (false negatives). It provides counts and examples for each category to facilitate in-depth analysis.
+        predictions, misclassifications, unmatched predictions (subset of false positives), and unmatched ground truths
+        (subset of false negatives). It provides counts and examples for each category to facilitate in-depth analysis.
 
         Confusion Matrix Structure:
         {
@@ -358,7 +358,7 @@ def confusion_matrix(
             ...
         }
 
-        Missing Prediction Structure:
+        Unmatched Ground Truths Structure:
         {
             ground_truth_label: {
                 'count': int,
@@ -379,7 +379,7 @@ def confusion_matrix(
             A nested dictionary where the first key is the ground truth label value, the second key
             is the prediction label value, and the innermost dictionary contains either a `count`
             or a list of `examples`. Each example includes the datum UID and prediction score.
-        missing_predictions : dict
+        unmatched_ground_truths : dict
             A dictionary where each key is a ground truth label value for which the model failed to predict
             (false negatives). The value is a dictionary containing either a `count` or a list of `examples`.
             Each example includes the datum UID.
@@ -396,7 +396,7 @@ def confusion_matrix(
             type=MetricType.ConfusionMatrix.value,
             value={
                 "confusion_matrix": confusion_matrix,
-                "missing_predictions": missing_predictions,
+                "unmatched_ground_truths": unmatched_ground_truths,
             },
             parameters={
                 "score_threshold": score_threshold,
diff --git a/lite/valor_lite/classification/utilities.py b/lite/valor_lite/classification/utilities.py
index a82c5d330..86faf0c70 100644
--- a/lite/valor_lite/classification/utilities.py
+++ b/lite/valor_lite/classification/utilities.py
@@ -153,20 +153,20 @@ def _unpack_confusion_matrix_value(
     }
 
 
-def _unpack_missing_predictions_value(
-    missing_predictions: NDArray[np.int32],
+def _unpack_unmatched_ground_truths_value(
+    unmatched_ground_truths: NDArray[np.int32],
     number_of_labels: int,
     number_of_examples: int,
     index_to_uid: dict[int, str],
     index_to_label: dict[int, str],
 ) -> dict[str, dict[str, int | list[dict[str, str]]]]:
     """
-    Unpacks a numpy array of missing prediction counts and examples.
+    Unpacks a numpy array of unmatched ground truth counts and examples.
     """
 
     datum_idx = (
         lambda gt_label_idx, example_idx: int(  # noqa: E731 - lambda fn
-            missing_predictions[
+            unmatched_ground_truths[
                 gt_label_idx,
                 example_idx + 1,
             ]
@@ -176,7 +176,7 @@ def _unpack_missing_predictions_value(
     return {
         index_to_label[gt_label_idx]: {
             "count": max(
-                int(missing_predictions[gt_label_idx, 0]),
+                int(unmatched_ground_truths[gt_label_idx, 0]),
                 0,
             ),
             "examples": [
@@ -197,7 +197,7 @@ def unpack_confusion_matrix_into_metric_list(
     index_to_label: dict[int, str],
 ) -> list[Metric]:
 
-    (confusion_matrix, missing_predictions) = results
+    (confusion_matrix, unmatched_ground_truths) = results
     n_scores, n_labels, _, _ = confusion_matrix.shape
     return [
         Metric.confusion_matrix(
@@ -210,8 +210,10 @@ def unpack_confusion_matrix_into_metric_list(
                 index_to_label=index_to_label,
                 index_to_uid=index_to_uid,
             ),
-            missing_predictions=_unpack_missing_predictions_value(
-                missing_predictions=missing_predictions[score_idx, :, :],
+            unmatched_ground_truths=_unpack_unmatched_ground_truths_value(
+                unmatched_ground_truths=unmatched_ground_truths[
+                    score_idx, :, :
+                ],
                 number_of_labels=n_labels,
                 number_of_examples=number_of_examples,
                 index_to_label=index_to_label,
diff --git a/lite/valor_lite/object_detection/computation.py b/lite/valor_lite/object_detection/computation.py
index 3f21c8e1c..6e0e9163f 100644
--- a/lite/valor_lite/object_detection/computation.py
+++ b/lite/valor_lite/object_detection/computation.py
@@ -669,9 +669,9 @@ def compute_confusion_matrix(
     NDArray[np.float64]
         Confusion matrix.
     NDArray[np.float64]
-        Hallucinations.
+        Unmatched Predictions.
     NDArray[np.int32]
-        Missing Predictions.
+        Unmatched Ground Truths.
     """
 
     n_labels = label_metadata.shape[0]
@@ -683,12 +683,12 @@ def compute_confusion_matrix(
         (n_ious, n_scores, n_labels, n_labels, 4 * n_examples + 1),
         dtype=np.float32,
     )
-    hallucinations = -1 * np.ones(
+    unmatched_predictions = -1 * np.ones(
         # (datum idx, pd idx, pd score) * n_examples + count
         (n_ious, n_scores, n_labels, 3 * n_examples + 1),
         dtype=np.float32,
     )
-    missing_predictions = -1 * np.ones(
+    unmatched_ground_truths = -1 * np.ones(
         # (datum idx, gt idx) * n_examples + count
         (n_ious, n_scores, n_labels, 2 * n_examples + 1),
         dtype=np.int32,
@@ -793,7 +793,7 @@ def compute_confusion_matrix(
                 data[mask_misclf], unique_idx=[0, 1, 2, 4, 5], label_idx=[3, 4]
             )
 
-            # count hallucinations
+            # count unmatched predictions
             (
                 halluc_examples,
                 halluc_labels,
@@ -802,7 +802,7 @@ def compute_confusion_matrix(
                 data[mask_halluc], unique_idx=[0, 2, 5], label_idx=2
             )
 
-            # count missing predictions
+            # count unmatched ground truths
             (
                 misprd_examples,
                 misprd_labels,
@@ -822,13 +822,13 @@ def compute_confusion_matrix(
                 misclf_labels[:, 1],
                 0,
             ] = misclf_counts
-            hallucinations[
+            unmatched_predictions[
                 iou_idx,
                 score_idx,
                 halluc_labels,
                 0,
             ] = halluc_counts
-            missing_predictions[
+            unmatched_ground_truths[
                 iou_idx,
                 score_idx,
                 misprd_labels,
@@ -877,26 +877,26 @@ def compute_confusion_matrix(
                                     :, [0, 1, 2, 6]
                                 ].flatten()
 
-                    # hallucination examples
+                    # unmatched prediction examples
                     mask_halluc_label = halluc_examples[:, 5] == label_idx
                     if mask_halluc_label.sum() > 0:
                         halluc_label_examples = halluc_examples[
                             mask_halluc_label
                         ][:n_examples]
-                        hallucinations[
+                        unmatched_predictions[
                             iou_idx,
                             score_idx,
                             label_idx,
                             1 : 3 * halluc_label_examples.shape[0] + 1,
                         ] = halluc_label_examples[:, [0, 2, 6]].flatten()
 
-                    # missing prediction examples
+                    # unmatched ground truth examples
                     mask_misprd_label = misprd_examples[:, 4] == label_idx
                     if misprd_examples.size > 0:
                         misprd_label_examples = misprd_examples[
                             mask_misprd_label
                         ][:n_examples]
-                        missing_predictions[
+                        unmatched_ground_truths[
                             iou_idx,
                             score_idx,
                             label_idx,
@@ -905,6 +905,6 @@ def compute_confusion_matrix(
 
     return (
         confusion_matrix,
-        hallucinations,
-        missing_predictions,
+        unmatched_predictions,
+        unmatched_ground_truths,
     )
diff --git a/lite/valor_lite/object_detection/metric.py b/lite/valor_lite/object_detection/metric.py
index d8a589cd1..171a72861 100644
--- a/lite/valor_lite/object_detection/metric.py
+++ b/lite/valor_lite/object_detection/metric.py
@@ -619,7 +619,7 @@ def confusion_matrix(
                 ],
             ],
         ],
-        hallucinations: dict[
+        unmatched_predictions: dict[
             str,  # prediction label value
             dict[
                 str,  # either `count` or `examples`
@@ -636,7 +636,7 @@ def confusion_matrix(
                 ],
             ],
         ],
-        missing_predictions: dict[
+        unmatched_ground_truths: dict[
             str,  # ground truth label value
             dict[
                 str,  # either `count` or `examples`
@@ -660,8 +660,8 @@ def confusion_matrix(
         Confusion matrix for object detection tasks.
 
         This class encapsulates detailed information about the model's performance, including correct
-        predictions, misclassifications, hallucinations (false positives), and missing predictions
-        (false negatives). It provides counts and examples for each category to facilitate in-depth analysis.
+        predictions, misclassifications, unmatched_predictions (subset of false positives), and unmatched ground truths
+        (subset of false negatives). It provides counts and examples for each category to facilitate in-depth analysis.
 
         Confusion Matrix Format:
         {
@@ -683,7 +683,7 @@ def confusion_matrix(
             ...
         }
 
-        Hallucinations Format:
+        Unmatched Predictions Format:
         {
             <prediction label>: {
                 'count': int,
@@ -699,7 +699,7 @@ def confusion_matrix(
             ...
         }
 
-        Missing Prediction Format:
+        Unmatched Ground Truths Format:
         {
             <ground truth label>: {
                 'count': int,
@@ -721,13 +721,13 @@ def confusion_matrix(
             is the prediction label value, and the innermost dictionary contains either a `count`
             or a list of `examples`. Each example includes the datum UID, ground truth bounding box,
             predicted bounding box, and prediction scores.
-        hallucinations : dict
+        unmatched_predictions : dict
             A dictionary where each key is a prediction label value with no corresponding ground truth
-            (false positives). The value is a dictionary containing either a `count` or a list of
+            (subset of false positives). The value is a dictionary containing either a `count` or a list of
             `examples`. Each example includes the datum UID, predicted bounding box, and prediction score.
-        missing_predictions : dict
+        unmatched_ground_truths : dict
             A dictionary where each key is a ground truth label value for which the model failed to predict
-            (false negatives). The value is a dictionary containing either a `count` or a list of `examples`.
+            (subset of false negatives). The value is a dictionary containing either a `count` or a list of `examples`.
             Each example includes the datum UID and ground truth bounding box.
         score_threshold : float
             The confidence score threshold used to filter predictions.
@@ -744,8 +744,8 @@ def confusion_matrix(
             type=MetricType.ConfusionMatrix.value,
             value={
                 "confusion_matrix": confusion_matrix,
-                "hallucinations": hallucinations,
-                "missing_predictions": missing_predictions,
+                "unmatched_predictions": unmatched_predictions,
+                "unmatched_ground_truths": unmatched_ground_truths,
             },
             parameters={
                 "score_threshold": score_threshold,
diff --git a/lite/valor_lite/object_detection/utilities.py b/lite/valor_lite/object_detection/utilities.py
index 9e9de43a4..dde4b33e6 100644
--- a/lite/valor_lite/object_detection/utilities.py
+++ b/lite/valor_lite/object_detection/utilities.py
@@ -321,8 +321,8 @@ def _unpack_confusion_matrix_value(
     }
 
 
-def _unpack_hallucinations_value(
-    hallucinations: NDArray[np.float64],
+def _unpack_unmatched_predictions_value(
+    unmatched_predictions: NDArray[np.float64],
     number_of_labels: int,
     number_of_examples: int,
     index_to_uid: dict[int, str],
@@ -336,12 +336,12 @@ def _unpack_hallucinations_value(
     ],
 ]:
     """
-    Unpacks a numpy array of hallucination counts and examples.
+    Unpacks a numpy array of unmatched_prediction counts and examples.
     """
 
     datum_idx = (
         lambda pd_label_idx, example_idx: int(  # noqa: E731 - lambda fn
-            hallucinations[
+            unmatched_predictions[
                 pd_label_idx,
                 example_idx * 3 + 1,
             ]
@@ -350,7 +350,7 @@ def _unpack_hallucinations_value(
 
     prediction_idx = (
         lambda pd_label_idx, example_idx: int(  # noqa: E731 - lambda fn
-            hallucinations[
+            unmatched_predictions[
                 pd_label_idx,
                 example_idx * 3 + 2,
             ]
@@ -359,7 +359,7 @@ def _unpack_hallucinations_value(
 
     score_idx = (
         lambda pd_label_idx, example_idx: float(  # noqa: E731 - lambda fn
-            hallucinations[
+            unmatched_predictions[
                 pd_label_idx,
                 example_idx * 3 + 3,
             ]
@@ -369,7 +369,7 @@ def _unpack_hallucinations_value(
     return {
         index_to_label[pd_label_idx]: {
             "count": max(
-                int(hallucinations[pd_label_idx, 0]),
+                int(unmatched_predictions[pd_label_idx, 0]),
                 0,
             ),
             "examples": [
@@ -392,8 +392,8 @@ def _unpack_hallucinations_value(
     }
 
 
-def _unpack_missing_predictions_value(
-    missing_predictions: NDArray[np.int32],
+def _unpack_unmatched_ground_truths_value(
+    unmatched_ground_truths: NDArray[np.int32],
     number_of_labels: int,
     number_of_examples: int,
     index_to_uid: dict[int, str],
@@ -401,12 +401,12 @@ def _unpack_missing_predictions_value(
     groundtruth_examples: dict[int, NDArray[np.float16]],
 ) -> dict[str, dict[str, int | list[dict[str, str | dict[str, float]]]]]:
     """
-    Unpacks a numpy array of missing prediction counts and examples.
+    Unpacks a numpy array of unmatched ground truth counts and examples.
     """
 
     datum_idx = (
         lambda gt_label_idx, example_idx: int(  # noqa: E731 - lambda fn
-            missing_predictions[
+            unmatched_ground_truths[
                 gt_label_idx,
                 example_idx * 2 + 1,
             ]
@@ -415,7 +415,7 @@ def _unpack_missing_predictions_value(
 
     groundtruth_idx = (
         lambda gt_label_idx, example_idx: int(  # noqa: E731 - lambda fn
-            missing_predictions[
+            unmatched_ground_truths[
                 gt_label_idx,
                 example_idx * 2 + 2,
             ]
@@ -425,7 +425,7 @@ def _unpack_missing_predictions_value(
     return {
         index_to_label[gt_label_idx]: {
             "count": max(
-                int(missing_predictions[gt_label_idx, 0]),
+                int(unmatched_ground_truths[gt_label_idx, 0]),
                 0,
             ),
             "examples": [
@@ -463,8 +463,8 @@ def unpack_confusion_matrix_into_metric_list(
 ) -> list[Metric]:
     (
         confusion_matrix,
-        hallucinations,
-        missing_predictions,
+        unmatched_predictions,
+        unmatched_ground_truths,
     ) = results
     n_labels = len(index_to_label)
     return [
@@ -481,16 +481,18 @@ def unpack_confusion_matrix_into_metric_list(
                 groundtruth_examples=groundtruth_examples,
                 prediction_examples=prediction_examples,
             ),
-            hallucinations=_unpack_hallucinations_value(
-                hallucinations=hallucinations[iou_idx, score_idx, :, :],
+            unmatched_predictions=_unpack_unmatched_predictions_value(
+                unmatched_predictions=unmatched_predictions[
+                    iou_idx, score_idx, :, :
+                ],
                 number_of_labels=n_labels,
                 number_of_examples=number_of_examples,
                 index_to_label=index_to_label,
                 index_to_uid=index_to_uid,
                 prediction_examples=prediction_examples,
             ),
-            missing_predictions=_unpack_missing_predictions_value(
-                missing_predictions=missing_predictions[
+            unmatched_ground_truths=_unpack_unmatched_ground_truths_value(
+                unmatched_ground_truths=unmatched_ground_truths[
                     iou_idx, score_idx, :, :
                 ],
                 number_of_labels=n_labels,
diff --git a/lite/valor_lite/semantic_segmentation/computation.py b/lite/valor_lite/semantic_segmentation/computation.py
index 807c2bde6..25c43a180 100644
--- a/lite/valor_lite/semantic_segmentation/computation.py
+++ b/lite/valor_lite/semantic_segmentation/computation.py
@@ -98,9 +98,9 @@ def compute_metrics(
     NDArray[np.float64]
         Confusion matrix containing IOU values.
     NDArray[np.float64]
-        Hallucination ratios.
+        Unmatched prediction ratios.
     NDArray[np.float64]
-        Missing prediction ratios.
+        Unmatched ground truth ratios.
     """
     n_labels = label_metadata.shape[0]
     gt_counts = label_metadata[:, 0]
@@ -108,7 +108,7 @@ def compute_metrics(
 
     counts = data.sum(axis=0)
 
-    # compute iou, missing_predictions and hallucinations
+    # compute iou, unmatched_ground_truth and unmatched predictions
     intersection_ = counts[1:, 1:]
     union_ = (
         gt_counts[:, np.newaxis] + pd_counts[np.newaxis, :] - intersection_
@@ -122,20 +122,20 @@ def compute_metrics(
         out=ious,
     )
 
-    hallucination_ratio = np.zeros((n_labels), dtype=np.float64)
+    unmatched_prediction_ratio = np.zeros((n_labels), dtype=np.float64)
     np.divide(
         counts[0, 1:],
         pd_counts,
         where=pd_counts > 1e-9,
-        out=hallucination_ratio,
+        out=unmatched_prediction_ratio,
     )
 
-    missing_prediction_ratio = np.zeros((n_labels), dtype=np.float64)
+    unmatched_ground_truth_ratio = np.zeros((n_labels), dtype=np.float64)
     np.divide(
         counts[1:, 0],
         gt_counts,
         where=gt_counts > 1e-9,
-        out=missing_prediction_ratio,
+        out=unmatched_ground_truth_ratio,
     )
 
     # compute precision, recall, f1
@@ -168,6 +168,6 @@ def compute_metrics(
         f1_score,
         accuracy,
         ious,
-        hallucination_ratio,
-        missing_prediction_ratio,
+        unmatched_prediction_ratio,
+        unmatched_ground_truth_ratio,
     )
diff --git a/lite/valor_lite/semantic_segmentation/metric.py b/lite/valor_lite/semantic_segmentation/metric.py
index 509d1f424..949ca02c4 100644
--- a/lite/valor_lite/semantic_segmentation/metric.py
+++ b/lite/valor_lite/semantic_segmentation/metric.py
@@ -209,11 +209,11 @@ def confusion_matrix(
                 dict[str, float],  # iou
             ],
         ],
-        hallucinations: dict[
+        unmatched_predictions: dict[
             str,  # prediction label value
             dict[str, float],  # pixel ratio
         ],
-        missing_predictions: dict[
+        unmatched_ground_truths: dict[
             str,  # ground truth label value
             dict[str, float],  # pixel ratio
         ],
@@ -222,8 +222,8 @@ def confusion_matrix(
         The confusion matrix and related metrics for semantic segmentation tasks.
 
         This class encapsulates detailed information about the model's performance, including correct
-        predictions, misclassifications, hallucinations (false positives), and missing predictions
-        (false negatives). It provides counts for each category to facilitate in-depth analysis.
+        predictions, misclassifications, unmatched_predictions (subset of false positives), and unmatched ground truths
+        (subset of false negatives). It provides counts for each category to facilitate in-depth analysis.
 
         Confusion Matrix Format:
         {
@@ -234,14 +234,14 @@ def confusion_matrix(
             },
         }
 
-        Hallucinations Format:
+        Unmatched Predictions Format:
         {
             <prediction label>: {
                 'iou': <float>,
             },
         }
 
-        Missing Predictions Format:
+        Unmatched Ground Truths Format:
         {
             <ground truth label>: {
                 'iou': <float>,
@@ -253,10 +253,10 @@ def confusion_matrix(
         confusion_matrix : dict
             Nested dictionaries representing the Intersection over Union (IOU) scores for each
             ground truth label and prediction label pair.
-        hallucinations : dict
+        unmatched_predictions : dict
             Dictionary representing the pixel ratios for predicted labels that do not correspond
             to any ground truth labels (false positives).
-        missing_predictions : dict
+        unmatched_ground_truths : dict
             Dictionary representing the pixel ratios for ground truth labels that were not predicted
             (false negatives).
 
@@ -268,8 +268,8 @@ def confusion_matrix(
             type=MetricType.ConfusionMatrix.value,
             value={
                 "confusion_matrix": confusion_matrix,
-                "hallucinations": hallucinations,
-                "missing_predictions": missing_predictions,
+                "unmatched_predictions": unmatched_predictions,
+                "unmatched_ground_truths": unmatched_ground_truths,
             },
             parameters={},
         )
diff --git a/lite/valor_lite/semantic_segmentation/utilities.py b/lite/valor_lite/semantic_segmentation/utilities.py
index 577e974f5..452ad5b4a 100644
--- a/lite/valor_lite/semantic_segmentation/utilities.py
+++ b/lite/valor_lite/semantic_segmentation/utilities.py
@@ -18,8 +18,8 @@ def unpack_precision_recall_iou_into_metric_lists(
         f1_score,
         accuracy,
         ious,
-        hallucination_ratios,
-        missing_prediction_ratios,
+        unmatched_prediction_ratios,
+        unmatched_ground_truth_ratios,
     ) = results
 
     metrics = defaultdict(list)
@@ -43,16 +43,16 @@ def unpack_precision_recall_iou_into_metric_lists(
                 for gt_label_idx in range(n_labels)
                 if label_metadata[gt_label_idx, 0] > 0
             },
-            hallucinations={
+            unmatched_predictions={
                 index_to_label[pd_label_idx]: {
-                    "ratio": float(hallucination_ratios[pd_label_idx])
+                    "ratio": float(unmatched_prediction_ratios[pd_label_idx])
                 }
                 for pd_label_idx in range(n_labels)
                 if label_metadata[pd_label_idx, 0] > 0
             },
-            missing_predictions={
+            unmatched_ground_truths={
                 index_to_label[gt_label_idx]: {
-                    "ratio": float(missing_prediction_ratios[gt_label_idx])
+                    "ratio": float(unmatched_ground_truth_ratios[gt_label_idx])
                 }
                 for gt_label_idx in range(n_labels)
                 if label_metadata[gt_label_idx, 0] > 0