diff --git a/recipes/full_finetune_distributed.py b/recipes/full_finetune_distributed.py
index 7e6ca16b9f..3375f8fc4b 100644
--- a/recipes/full_finetune_distributed.py
+++ b/recipes/full_finetune_distributed.py
@@ -546,7 +546,7 @@ def _setup_data(
             batch_size=batch_size,
             sampler=sampler,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=partial(
                 collate_fn,
                 padding_idx=self._tokenizer.pad_id,
diff --git a/recipes/full_finetune_single_device.py b/recipes/full_finetune_single_device.py
index fa1b7b14ff..2addd92944 100644
--- a/recipes/full_finetune_single_device.py
+++ b/recipes/full_finetune_single_device.py
@@ -516,7 +516,7 @@ def _setup_data(
             batch_size=batch_size,
             sampler=sampler,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=partial(
                 collate_fn,
                 padding_idx=self._tokenizer.pad_id,
diff --git a/recipes/knowledge_distillation_single_device.py b/recipes/knowledge_distillation_single_device.py
index 833c9aec56..c2ee8c7cc4 100644
--- a/recipes/knowledge_distillation_single_device.py
+++ b/recipes/knowledge_distillation_single_device.py
@@ -526,7 +526,7 @@ def _setup_data(
             sampler=sampler,
             batch_size=batch_size,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=(
                 partial(
                     padded_collate_sft,
diff --git a/recipes/lora_dpo_distributed.py b/recipes/lora_dpo_distributed.py
index d655889305..e903ab274a 100644
--- a/recipes/lora_dpo_distributed.py
+++ b/recipes/lora_dpo_distributed.py
@@ -475,7 +475,7 @@ def _setup_data(
             batch_size=batch_size,
             sampler=sampler,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=partial(
                 padded_collate_dpo,
                 padding_idx=self._tokenizer.pad_id,
diff --git a/recipes/lora_dpo_single_device.py b/recipes/lora_dpo_single_device.py
index b7d931accc..c158d17875 100644
--- a/recipes/lora_dpo_single_device.py
+++ b/recipes/lora_dpo_single_device.py
@@ -364,7 +364,7 @@ def _setup_data(
             sampler=sampler,
             batch_size=batch_size,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=partial(
                 padded_collate_dpo,
                 padding_idx=self._tokenizer.pad_id,
diff --git a/recipes/lora_finetune_distributed.py b/recipes/lora_finetune_distributed.py
index 2be9aa94a2..1569dfee63 100644
--- a/recipes/lora_finetune_distributed.py
+++ b/recipes/lora_finetune_distributed.py
@@ -623,7 +623,7 @@ def _setup_data(
             batch_size=batch_size,
             sampler=sampler,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=partial(
                 collate_fn,
                 padding_idx=self._tokenizer.pad_id,
diff --git a/recipes/lora_finetune_single_device.py b/recipes/lora_finetune_single_device.py
index 6641863e4d..5d39b72086 100644
--- a/recipes/lora_finetune_single_device.py
+++ b/recipes/lora_finetune_single_device.py
@@ -535,7 +535,7 @@ def _setup_data(
             sampler=sampler,
             batch_size=batch_size,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=(
                 partial(
                     collate_fn,
diff --git a/recipes/ppo_full_finetune_single_device.py b/recipes/ppo_full_finetune_single_device.py
index 9f645b5fdd..7679af3fd3 100644
--- a/recipes/ppo_full_finetune_single_device.py
+++ b/recipes/ppo_full_finetune_single_device.py
@@ -580,7 +580,7 @@ def _setup_data(
             sampler=sampler,
             batch_size=batch_size,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=partial(
                 padded_collate,
                 pad_direction="left",
diff --git a/recipes/qat_distributed.py b/recipes/qat_distributed.py
index c6a7ec0ed1..eb2e44fae2 100644
--- a/recipes/qat_distributed.py
+++ b/recipes/qat_distributed.py
@@ -524,7 +524,7 @@ def _setup_data(
             batch_size=batch_size,
             sampler=sampler,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=partial(
                 padded_collate_sft,
                 padding_idx=self._tokenizer.pad_id,