diff --git a/changelog/12948.bugfix.md b/changelog/12948.bugfix.md
new file mode 100644
index 000000000000..7479161831be
--- /dev/null
+++ b/changelog/12948.bugfix.md
@@ -0,0 +1 @@
+Fixed UnexpecTEDIntentlessPolicy training errors that resulted from a change to batching behavior. Changed the batching behavior back to the original for all components. Made the changed batching behavior accessible in DietClassifier using `drop_small_last_batch: True`.
diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 1cc65c89b3c9..bea4735da6fe 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -50,6 +50,7 @@
 from rasa.shared.nlu.training_data.training_data import TrainingData
 from rasa.shared.nlu.training_data.message import Message
 from rasa.utils.tensorflow.constants import (
+    DROP_SMALL_LAST_BATCH,
     LABEL,
     IDS,
     HIDDEN_LAYERS_SIZES,
@@ -288,6 +289,9 @@ def get_default_config() -> Dict[Text, Any]:
             # a few steps, as the compilation of the graph tends to take more time than
             # running it. It is recommended to not adjust the optimization parameter.
             RUN_EAGERLY: False,
+            # Determines whether the last batch should be dropped if it contains fewer
+            # than half a batch size of examples
+            DROP_SMALL_LAST_BATCH: False,
         }
 
     def __init__(
@@ -931,6 +935,7 @@ def train(self, training_data: TrainingData) -> Resource:
             self.component_config[BATCH_STRATEGY],
             self.component_config[EVAL_NUM_EXAMPLES],
             self.component_config[RANDOM_SEED],
+            drop_small_last_batch=self.component_config[DROP_SMALL_LAST_BATCH],
         )
         callbacks = train_utils.create_common_callbacks(
             self.component_config[EPOCHS],
diff --git a/rasa/utils/tensorflow/constants.py b/rasa/utils/tensorflow/constants.py
index 047db9878c67..39d5ea6d0560 100644
--- a/rasa/utils/tensorflow/constants.py
+++ b/rasa/utils/tensorflow/constants.py
@@ -113,3 +113,4 @@
 
 USE_GPU = "use_gpu"
 RUN_EAGERLY = "run_eagerly"
+DROP_SMALL_LAST_BATCH = "drop_small_last_batch"
diff --git a/rasa/utils/tensorflow/data_generator.py b/rasa/utils/tensorflow/data_generator.py
index a696f607c026..e54b95dad335 100644
--- a/rasa/utils/tensorflow/data_generator.py
+++ b/rasa/utils/tensorflow/data_generator.py
@@ -344,6 +344,7 @@ def __init__(
         epochs: int = 1,
         batch_strategy: Text = SEQUENCE,
         shuffle: bool = True,
+        drop_small_last_batch: bool = False,
     ):
         """Initializes the increasing batch size data generator.
 
@@ -353,6 +354,8 @@ def __init__(
             epochs: The total number of epochs.
             batch_strategy: The batch strategy.
             shuffle: If 'True', data will be shuffled.
+            drop_small_last_batch: if 'True', the last batch in an epoch will be dropped
+                if it has less examples than half the batch size
         """
         super().__init__(model_data, batch_size, batch_strategy, shuffle)
 
@@ -370,6 +373,7 @@ def __init__(
         self._current_batch_size = 0
         # create separate data variable that will store modified data for each batch
         self._data: Data = {}
+        self.drop_small_last_batch = drop_small_last_batch
         self.on_epoch_end()
 
     def __len__(self) -> int:
@@ -381,11 +385,16 @@ def __len__(self) -> int:
         # data was rebalanced, so need to recalculate number of examples
         num_examples = self.model_data.number_of_examples(self._data)
         batch_size = self._current_batch_size
-        # keep last batch only if it has at least half a batch size of examples
-        last_batch_half_full = num_examples % batch_size >= math.ceil(batch_size / 2)
-        num_batches = num_examples // batch_size + int(last_batch_half_full)
-        # Return at least 1 if there is an example
-        return max(num_batches, int(num_examples > 0))
+        if self.drop_small_last_batch:
+            # keep last batch only if it has at least half a batch size of examples
+            last_batch_half_full = num_examples % batch_size >= math.ceil(
+                batch_size / 2
+            )
+            num_batches = num_examples // batch_size + int(last_batch_half_full)
+            # Return at least 1 if there is an example
+            return max(num_batches, int(num_examples > 0))
+        else:
+            return num_examples // batch_size + int(num_examples % batch_size > 0)
 
     def __getitem__(self, index: int) -> Tuple[Any, Any]:
         """Gets batch at position `index`.
diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py
index 36de0370d210..764507d7e39d 100644
--- a/rasa/utils/train_utils.py
+++ b/rasa/utils/train_utils.py
@@ -302,6 +302,7 @@ def create_data_generators(
     eval_num_examples: int = 0,
     random_seed: Optional[int] = None,
     shuffle: bool = True,
+    drop_small_last_batch: bool = False,
 ) -> Tuple[RasaBatchDataGenerator, Optional[RasaBatchDataGenerator]]:
     """Create data generators for train and optional validation data.
 
@@ -313,6 +314,8 @@ def create_data_generators(
         eval_num_examples: Number of examples to use for validation data.
         random_seed: The random seed.
         shuffle: Whether to shuffle data inside the data generator.
+        drop_small_last_batch: whether to drop the last batch if it has fewer than half
+                               a batch size of examples
 
     Returns:
         The training data generator and optional validation data generator.
@@ -328,6 +331,7 @@ def create_data_generators(
             epochs=epochs,
             batch_strategy=batch_strategy,
             shuffle=shuffle,
+            drop_small_last_batch=drop_small_last_batch,
         )
 
     data_generator = RasaBatchDataGenerator(
@@ -336,6 +340,7 @@ def create_data_generators(
         epochs=epochs,
         batch_strategy=batch_strategy,
         shuffle=shuffle,
+        drop_small_last_batch=drop_small_last_batch,
     )
 
     return data_generator, validation_data_generator
diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py
index 1f0c37a85faa..1fd84fdac47d 100644
--- a/tests/nlu/classifiers/test_diet_classifier.py
+++ b/tests/nlu/classifiers/test_diet_classifier.py
@@ -971,24 +971,35 @@ async def test_no_bilou_when_entity_recognition_off(
 
 @pytest.mark.timeout(120, func_only=True)
 @pytest.mark.parametrize(
-    "batch_size, expected_num_batches",
+    "batch_size, expected_num_batches, drop_small_last_batch",
     # the training dataset has 48 NLU examples
     [
-        (1, 48),
-        (8, 6),
-        (15, 3),
-        (16, 3),
-        (18, 3),
-        (20, 2),
-        (32, 2),
-        (64, 1),
-        (128, 1),
-        (256, 1),
+        (1, 48, True),
+        (8, 6, True),
+        (15, 3, True),
+        (16, 3, True),
+        (18, 3, True),
+        (20, 2, True),
+        (32, 2, True),
+        (64, 1, True),
+        (128, 1, True),
+        (256, 1, True),
+        (1, 48, False),
+        (8, 6, False),
+        (15, 4, False),
+        (16, 3, False),
+        (18, 3, False),
+        (20, 3, False),
+        (32, 2, False),
+        (64, 1, False),
+        (128, 1, False),
+        (256, 1, False),
     ],
 )
 async def test_dropping_of_last_partial_batch(
     batch_size: int,
     expected_num_batches: int,
+    drop_small_last_batch: bool,
     create_diet: Callable[..., DIETClassifier],
     train_and_preprocess: Callable[..., Tuple[TrainingData, List[GraphComponent]]],
 ):
@@ -1012,7 +1023,9 @@ async def test_dropping_of_last_partial_batch(
     )
 
     model_data = diet.preprocess_train_data(training_data)
-    data_generator, _ = train_utils.create_data_generators(model_data, batch_size, 1)
+    data_generator, _ = train_utils.create_data_generators(
+        model_data, batch_size, 1, drop_small_last_batch=drop_small_last_batch
+    )
 
     assert len(data_generator) == expected_num_batches
 
@@ -1041,6 +1054,8 @@ async def test_dropping_of_last_partial_batch_empty_data(
     )
 
     model_data = diet.preprocess_train_data(training_data)
-    data_generator, _ = train_utils.create_data_generators(model_data, 64, 1)
+    data_generator, _ = train_utils.create_data_generators(
+        model_data, 64, 1, drop_small_last_batch=True
+    )
 
     assert len(data_generator) == 0