Merge branch 'main' into 1810-move-gemma-eval-and-generate

pytorch · Oct 12, 2024 · 673f665 · 673f665
2 parents 736f31e + 256e41a
commit 673f665
Show file tree

Hide file tree

Showing 5 changed files with 44 additions and 60 deletions.
diff --git a/torchtune/modules/low_precision/_utils.py b/torchtune/modules/low_precision/_utils.py
diff --git a/torchtune/modules/transformer.py b/torchtune/modules/transformer.py
@@ -11,6 +11,7 @@
 from torch import nn
 from torchtune.modules import MultiHeadAttention
 from torchtune.modules.attention_utils import _MaskType
+from torchtune.utils._logging import deprecated
 
 
 class TransformerSelfAttentionLayer(nn.Module):
@@ -619,6 +620,11 @@ def forward(
         return output
 
 
+@deprecated(
+    msg="Please use torchtune.modules.TransformerDecoder instead. \
+If you need an example, see torchtune.models.qwen2._component_builders.py \
+on how to use torch.modules.TiedLinear for the output projection."
+)
 class TiedEmbeddingTransformerDecoder(nn.Module):
     """
     Transformer Decoder with tied embedding weight. A key difference between

diff --git a/torchtune/training/quantization.py b/torchtune/training/quantization.py
@@ -6,7 +6,13 @@
 
 from typing import Callable, Optional
 
-from torchao.dtypes import TensorCoreTiledLayoutType
+from torchtune.utils._import_guard import _USE_NEW_TENSOR_CORE_TILED_LAYOUT_API
+
+if _USE_NEW_TENSOR_CORE_TILED_LAYOUT_API:
+    from torchao.dtypes import TensorCoreTiledLayout
+else:
+    from torchao.dtypes import TensorCoreTiledLayoutType as TensorCoreTiledLayout
+
 from torchao.quantization import (
     int4_weight_only,
     int8_dynamic_activation_int4_weight,
@@ -88,7 +94,7 @@ def __init__(self, groupsize: int = 128, inner_k_tiles: int = 8):
         self.inner_k_tiles = inner_k_tiles
 
     def quantize(self, model):
-        layout_type = TensorCoreTiledLayoutType(self.inner_k_tiles)
+        layout_type = TensorCoreTiledLayout(self.inner_k_tiles)
         quantize_fn = int4_weight_only(self.groupsize, layout_type)
         quantize_(model, quantize_fn)
         return model

diff --git a/torchtune/utils/_import_guard.py b/torchtune/utils/_import_guard.py
@@ -5,11 +5,19 @@
 # LICENSE file in the root directory of this source tree.
 
 import torch
-from torchtune.utils._version import torch_version_ge
+import torchao
+from torchtune.utils._version import _is_fbcode, _nightly_version_ge, torch_version_ge
 
 # We can only use flex attention / BlockMask if torch version >= 2.5.0 and GPU is Turing / SM75 and above
 _SUPPORTS_FLEX_ATTENTION = (
     torch_version_ge("2.5.0")
     and torch.cuda.is_available()
     and torch.cuda.get_device_capability() >= (7, 5)
 )
+
+torchao_version = torchao.__version__
+
+_USE_NEW_TENSOR_CORE_TILED_LAYOUT_API = not _is_fbcode() and (
+    ("dev" not in torchao_version and torchao_version >= "0.6.0")
+    or ("dev" in torchao_version and _nightly_version_ge(torchao_version, "2024-10-10"))
+)
diff --git a/torchtune/utils/_version.py b/torchtune/utils/_version.py
@@ -3,6 +3,9 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+
+from datetime import datetime
+
 import torch
 
 
@@ -23,3 +26,21 @@ def torch_version_ge(version: str) -> bool:
         True
     """
     return version in torch.__version__ or torch.__version__ >= version
+
+
+def _is_fbcode():
+    return not hasattr(torch.version, "git_version")
+
+
+def _nightly_version_ge(ao_version_str: str, date: str) -> bool:
+    """
+    Compare a torchao nightly version to a date of the form
+    %Y-%m-%d.
+
+    Returns True if the nightly version is greater than or equal to
+        the date, False otherwise
+    """
+    ao_datetime = datetime.strptime(
+        ao_version_str.split("+")[0].split("dev")[1], "%Y%m%d"
+    )
+    return ao_datetime >= datetime.strptime(date, "%Y-%m-%d")