Set drop_last to always True (pytorch#1761)

mori360 · Oct 14, 2024 · a20dd56 · a20dd56
1 parent 951294f
commit a20dd56
Show file tree

Hide file tree

Showing 9 changed files with 9 additions and 9 deletions.
diff --git a/recipes/full_finetune_distributed.py b/recipes/full_finetune_distributed.py
@@ -546,7 +546,7 @@ def _setup_data(
             batch_size=batch_size,
             sampler=sampler,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=partial(
                 collate_fn,
                 padding_idx=self._tokenizer.pad_id,

diff --git a/recipes/full_finetune_single_device.py b/recipes/full_finetune_single_device.py
@@ -516,7 +516,7 @@ def _setup_data(
             batch_size=batch_size,
             sampler=sampler,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=partial(
                 collate_fn,
                 padding_idx=self._tokenizer.pad_id,

diff --git a/recipes/knowledge_distillation_single_device.py b/recipes/knowledge_distillation_single_device.py
@@ -526,7 +526,7 @@ def _setup_data(
             sampler=sampler,
             batch_size=batch_size,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=(
                 partial(
                     padded_collate_sft,

diff --git a/recipes/lora_dpo_distributed.py b/recipes/lora_dpo_distributed.py
@@ -475,7 +475,7 @@ def _setup_data(
             batch_size=batch_size,
             sampler=sampler,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=partial(
                 padded_collate_dpo,
                 padding_idx=self._tokenizer.pad_id,

diff --git a/recipes/lora_dpo_single_device.py b/recipes/lora_dpo_single_device.py
@@ -364,7 +364,7 @@ def _setup_data(
             sampler=sampler,
             batch_size=batch_size,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=partial(
                 padded_collate_dpo,
                 padding_idx=self._tokenizer.pad_id,

diff --git a/recipes/lora_finetune_distributed.py b/recipes/lora_finetune_distributed.py
@@ -623,7 +623,7 @@ def _setup_data(
             batch_size=batch_size,
             sampler=sampler,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=partial(
                 collate_fn,
                 padding_idx=self._tokenizer.pad_id,

diff --git a/recipes/lora_finetune_single_device.py b/recipes/lora_finetune_single_device.py
@@ -535,7 +535,7 @@ def _setup_data(
             sampler=sampler,
             batch_size=batch_size,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=(
                 partial(
                     collate_fn,

diff --git a/recipes/ppo_full_finetune_single_device.py b/recipes/ppo_full_finetune_single_device.py
@@ -580,7 +580,7 @@ def _setup_data(
             sampler=sampler,
             batch_size=batch_size,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=partial(
                 padded_collate,
                 pad_direction="left",

diff --git a/recipes/qat_distributed.py b/recipes/qat_distributed.py
@@ -524,7 +524,7 @@ def _setup_data(
             batch_size=batch_size,
             sampler=sampler,
             # dropping last avoids shape issues with compile + flex attention
-            drop_last=cfg_dataset.get("drop_last", True),
+            drop_last=True,
             collate_fn=partial(
                 padded_collate_sft,
                 padding_idx=self._tokenizer.pad_id,