keras-team · SamanehSaadat · Jun 24, 2024 · Jun 5, 2024 · Jun 5, 2024 · Jun 6, 2024
diff --git a/keras_nlp/src/models/backbone.py b/keras_nlp/src/models/backbone.py
@@ -20,15 +20,16 @@
 from keras_nlp.src.utils.preset_utils import CONFIG_FILE
 from keras_nlp.src.utils.preset_utils import MODEL_WEIGHTS_FILE
 from keras_nlp.src.utils.preset_utils import check_config_class
+from keras_nlp.src.utils.preset_utils import check_format
 from keras_nlp.src.utils.preset_utils import get_file
 from keras_nlp.src.utils.preset_utils import jax_memory_cleanup
 from keras_nlp.src.utils.preset_utils import list_presets
 from keras_nlp.src.utils.preset_utils import list_subclasses
 from keras_nlp.src.utils.preset_utils import load_serialized_object
 from keras_nlp.src.utils.preset_utils import save_metadata
 from keras_nlp.src.utils.preset_utils import save_serialized_object
-from keras_nlp.src.utils.preset_utils import validate_metadata
 from keras_nlp.src.utils.python_utils import classproperty
+from keras_nlp.src.utils.transformers.convert import load_transformers_backbone
 
 
 @keras_nlp_export("keras_nlp.models.Backbone")
@@ -198,7 +199,11 @@ class like `keras_nlp.models.Backbone.from_preset()`, or from
         )
         ```
         """
-        validate_metadata(preset)
+        format = check_format(preset)
+
+        if format == "transformers":
+            return load_transformers_backbone(cls, preset, load_weights)
+
         preset_cls = check_config_class(preset)
         if not issubclass(preset_cls, cls):
             raise ValueError(

diff --git a/keras_nlp/src/models/preprocessor.py b/keras_nlp/src/models/preprocessor.py
@@ -21,11 +21,11 @@
 from keras_nlp.src.utils.preset_utils import TOKENIZER_CONFIG_FILE
 from keras_nlp.src.utils.preset_utils import check_config_class
 from keras_nlp.src.utils.preset_utils import check_file_exists
+from keras_nlp.src.utils.preset_utils import check_format
 from keras_nlp.src.utils.preset_utils import list_presets
 from keras_nlp.src.utils.preset_utils import list_subclasses
 from keras_nlp.src.utils.preset_utils import load_serialized_object
 from keras_nlp.src.utils.preset_utils import save_serialized_object
-from keras_nlp.src.utils.preset_utils import validate_metadata
 from keras_nlp.src.utils.python_utils import classproperty
 
 
@@ -127,7 +127,12 @@ def from_preset(
         )
         ```
         """
-        validate_metadata(preset)
+        format = check_format(preset)
+
+        if format == "transformers":
+            tokenizer = cls.tokenizer_cls.from_preset(preset)
+            return cls(tokenizer=tokenizer, **kwargs)
+
         if cls == Preprocessor:
             raise ValueError(
                 "Do not call `Preprocessor.from_preset()` directly. Instead call a "

diff --git a/keras_nlp/src/models/task.py b/keras_nlp/src/models/task.py
@@ -29,13 +29,13 @@
 from keras_nlp.src.utils.preset_utils import TASK_WEIGHTS_FILE
 from keras_nlp.src.utils.preset_utils import check_config_class
 from keras_nlp.src.utils.preset_utils import check_file_exists
+from keras_nlp.src.utils.preset_utils import check_format
 from keras_nlp.src.utils.preset_utils import get_file
 from keras_nlp.src.utils.preset_utils import jax_memory_cleanup
 from keras_nlp.src.utils.preset_utils import list_presets
 from keras_nlp.src.utils.preset_utils import list_subclasses
 from keras_nlp.src.utils.preset_utils import load_serialized_object
 from keras_nlp.src.utils.preset_utils import save_serialized_object
-from keras_nlp.src.utils.preset_utils import validate_metadata
 from keras_nlp.src.utils.python_utils import classproperty
 
 
@@ -213,7 +213,12 @@ def from_preset(
         )
         ```
         """
-        validate_metadata(preset)
+        format = check_format(preset)
+
+        if format == "transformers":
+            backbone = cls.backbone_cls.from_preset(preset)
+            preprocessor = cls.preprocessor_cls.from_preset(preset)
+            return cls(backbone=backbone, preprocessor=preprocessor, **kwargs)
 
         if cls == Task:
             raise ValueError(

diff --git a/keras_nlp/src/tokenizers/tokenizer.py b/keras_nlp/src/tokenizers/tokenizer.py
@@ -20,14 +20,15 @@
 from keras_nlp.src.utils.preset_utils import TOKENIZER_ASSET_DIR
 from keras_nlp.src.utils.preset_utils import TOKENIZER_CONFIG_FILE
 from keras_nlp.src.utils.preset_utils import check_config_class
+from keras_nlp.src.utils.preset_utils import check_format
 from keras_nlp.src.utils.preset_utils import get_file
 from keras_nlp.src.utils.preset_utils import list_presets
 from keras_nlp.src.utils.preset_utils import list_subclasses
 from keras_nlp.src.utils.preset_utils import load_serialized_object
 from keras_nlp.src.utils.preset_utils import save_serialized_object
 from keras_nlp.src.utils.preset_utils import save_tokenizer_assets
-from keras_nlp.src.utils.preset_utils import validate_metadata
 from keras_nlp.src.utils.python_utils import classproperty
+from keras_nlp.src.utils.transformers.convert import load_transformers_tokenizer
 
 
 @keras_nlp_export(
@@ -215,7 +216,10 @@ class like `keras_nlp.models.Tokenizer.from_preset()`, or from
         tokenizer.detokenize([5, 6, 7, 8, 9])
         ```
         """
-        validate_metadata(preset)
+        format = check_format(preset)
+        if format == "transformers":
+            return load_transformers_tokenizer(cls, preset)
+
         preset_cls = check_config_class(
             preset, config_file=TOKENIZER_CONFIG_FILE
         )

diff --git a/keras_nlp/src/utils/preset_utils.py b/keras_nlp/src/utils/preset_utils.py
@@ -546,7 +546,12 @@ def load_config(preset, config_file=CONFIG_FILE):
     return config
 
 
-def validate_metadata(preset):
+def check_format(preset):
+    if check_file_exists(preset, "model.safetensors") or check_file_exists(
+        preset, "model.safetensors.index.json"
+    ):
+        return "transformers"
+
     if not check_file_exists(preset, METADATA_FILE):
         raise FileNotFoundError(
             f"The preset directory `{preset}` doesn't have a file named `{METADATA_FILE}`. "
@@ -559,6 +564,7 @@ def validate_metadata(preset):
             f"`{METADATA_FILE}` in the preset directory `{preset}` doesn't have `keras_version`. "
             "Please verify that the model you are trying to load is a Keras model."
         )
+    return "keras"
 
 
 def load_serialized_object(

diff --git a/keras_nlp/src/utils/preset_utils_test.py b/keras_nlp/src/utils/preset_utils_test.py
@@ -26,7 +26,7 @@
 from keras_nlp.src.utils.preset_utils import CONFIG_FILE
 from keras_nlp.src.utils.preset_utils import METADATA_FILE
 from keras_nlp.src.utils.preset_utils import TOKENIZER_CONFIG_FILE
-from keras_nlp.src.utils.preset_utils import validate_metadata
+from keras_nlp.src.utils.preset_utils import check_format
 
 
 class PresetUtilsTest(TestCase):
@@ -102,7 +102,7 @@ def test_missing_metadata(self):
         with self.assertRaisesRegex(
             FileNotFoundError, f"doesn't have a file named `{METADATA_FILE}`"
         ):
-            validate_metadata(preset_dir)
+            check_format(preset_dir)
 
     def test_incorrect_metadata(self):
         temp_dir = self.get_temp_dir()
@@ -114,4 +114,4 @@ def test_incorrect_metadata(self):
             json.dump(data, f)
 
         with self.assertRaisesRegex(ValueError, "doesn't have `keras_version`"):
-            validate_metadata(preset_dir)
+            check_format(preset_dir)
diff --git a/keras_nlp/src/utils/transformers/__init__.py b/keras_nlp/src/utils/transformers/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2023 The KerasNLP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/keras_nlp/src/utils/transformers/convert.py b/keras_nlp/src/utils/transformers/convert.py
@@ -0,0 +1,38 @@
+# Copyright 2024 The KerasNLP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Convert huggingface models to KerasNLP."""
+
+
+from keras_nlp.src.utils.transformers.convert_gemma import load_gemma_backbone
+from keras_nlp.src.utils.transformers.convert_gemma import load_gemma_tokenizer
+from keras_nlp.src.utils.transformers.convert_llama3 import load_llama3_backbone
+from keras_nlp.src.utils.transformers.convert_llama3 import (
+    load_llama3_tokenizer,
+)
+
+
+def load_transformers_backbone(cls, preset, load_weights):
+    if cls.__name__ == "GemmaBackbone":
+        return load_gemma_backbone(cls, preset, load_weights)
+    if cls.__name__ == "Llama3Backbone":
+        return load_llama3_backbone(cls, preset, load_weights)
+    raise ValueError(f"No conversion huggingface/transformers to {cls}")
+
+
+def load_transformers_tokenizer(cls, preset):
+    if cls.__name__ == "GemmaTokenizer":
+        return load_gemma_tokenizer(cls, preset)
+    if cls.__name__ == "Llama3Tokenizer":
+        return load_llama3_tokenizer(cls, preset)
+    raise ValueError(f"No conversion huggingface/transformers to {cls}")
diff --git a/keras_nlp/src/utils/transformers/convert_gemma.py b/keras_nlp/src/utils/transformers/convert_gemma.py
@@ -0,0 +1,182 @@
+# Copyright 2024 The KerasNLP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from functools import partial
+
+import numpy as np
+
+from keras_nlp.src.utils.preset_utils import get_file
+from keras_nlp.src.utils.preset_utils import load_config
+from keras_nlp.src.utils.transformers.safetensor_utils import set_keras_weight
+
+
+def load_gemma_backbone(cls, preset, load_weights):
+    """
+    Load and initialize the Gemma backbone model.
+
+    Args:
+        cls (class): Keras model class.
+        preset (str): Preset configuration name.
+        load_weights (bool): Whether to load the weights.
+
+    Returns:
+        backbone: Initialized Keras model backbone.
+    """
+    transformers_config = load_config(preset, "config.json")
+
+    backbone = cls(
+        vocabulary_size=transformers_config["vocab_size"],
+        num_layers=transformers_config["num_hidden_layers"],
+        num_query_heads=transformers_config["num_attention_heads"],
+        num_key_value_heads=transformers_config["num_key_value_heads"],
+        hidden_dim=transformers_config["hidden_size"],
+        intermediate_dim=transformers_config["intermediate_size"] * 2,
+        head_dim=transformers_config["head_dim"],
+    )
+
+    if load_weights:
+        safetensor_config = load_config(preset, "model.safetensors.index.json")
+        safetensor_files = {
+            fname: get_file(preset, fname)
+            for fname in set(safetensor_config["weight_map"].values())
+        }
+        port_weight = partial(
+            set_keras_weight,
+            safetensor_files=safetensor_files,
+            safetensor_config=safetensor_config,
+        )
+
+        # Embedding layer
+        port_weight(
+            keras_variable=backbone.get_layer("token_embedding").variables[0],
+            hf_weight_key="model.embed_tokens.weight",
+        )
+
+        # Attention blocks
+        for i in range(backbone.num_layers):
+            decoder_layer = backbone.get_layer(f"decoder_block_{i}")
+            # Norm layers
+            port_weight(
+                keras_variable=decoder_layer.pre_attention_norm.variables[0],
+                hf_weight_key=f"model.layers.{i}.input_layernorm.weight",
+            )
+            port_weight(
+                keras_variable=decoder_layer.pre_ffw_norm.variables[0],
+                hf_weight_key=f"model.layers.{i}.post_attention_layernorm.weight",
+            )
+
+            # Attention layers
+            port_weight(
+                keras_variable=decoder_layer.attention.query_dense.variables[0],
+                hf_weight_key=f"model.layers.{i}.self_attn.q_proj.weight",
+                # rearrange_patterns="(a c) b -> a b c",
+                # rearrange_dims={"a": backbone.num_query_heads},
+                hook_fn=lambda hf_tensor, keras_shape: np.transpose(
+                    np.reshape(
+                        hf_tensor,
+                        (keras_shape[0], keras_shape[2], keras_shape[1]),
+                    ),
+                    axes=(0, 2, 1),
+                ),
+            )
+            port_weight(
+                keras_variable=decoder_layer.attention.key_dense.variables[0],
+                hf_weight_key=f"model.layers.{i}.self_attn.k_proj.weight",
+                # rearrange_patterns="(a c) b -> a b c",
+                # rearrange_dims={"a": backbone.num_key_value_heads},
+                hook_fn=lambda hf_tensor, keras_shape: np.transpose(
+                    np.reshape(
+                        hf_tensor,
+                        (keras_shape[0], keras_shape[2], keras_shape[1]),
+                    ),
+                    axes=(0, 2, 1),
+                ),
+            )
+            port_weight(
+                keras_variable=decoder_layer.attention.value_dense.variables[0],
+                hf_weight_key=f"model.layers.{i}.self_attn.v_proj.weight",
+                # rearrange_patterns="(a c) b -> a b c",
+                # rearrange_dims={"a": backbone.num_key_value_heads},
+                hook_fn=lambda hf_tensor, keras_shape: np.transpose(
+                    np.reshape(
+                        hf_tensor,
+                        (keras_shape[0], keras_shape[2], keras_shape[1]),
+                    ),
+                    axes=(0, 2, 1),
+                ),
+            )
+            port_weight(
+                keras_variable=decoder_layer.attention.output_dense.variables[
+                    0
+                ],
+                hf_weight_key=f"model.layers.{i}.self_attn.o_proj.weight",
+                # rearrange_patterns="c (a b) -> a b c",
+                # rearrange_dims={"a": backbone.num_query_heads},
+                hook_fn=lambda hf_tensor, keras_shape: np.transpose(
+                    np.reshape(
+                        hf_tensor,
+                        (keras_shape[2], keras_shape[0], keras_shape[1]),
+                    ),
+                    axes=(1, 2, 0),
+                ),
+            )
+
+            # MLP layers
+            port_weight(
+                keras_variable=decoder_layer.gating_ffw.variables[0],
+                hf_weight_key=f"model.layers.{i}.mlp.gate_proj.weight",
+                # rearrange_patterns="b a -> a b",
+                hook_fn=lambda hf_tensor, _: np.transpose(
+                    hf_tensor, axes=(1, 0)
+                ),
+            )
+            port_weight(
+                keras_variable=decoder_layer.gating_ffw_2.variables[0],
+                hf_weight_key=f"model.layers.{i}.mlp.up_proj.weight",
+                # rearrange_patterns="b a -> a b",
+                hook_fn=lambda hf_tensor, _: np.transpose(
+                    hf_tensor, axes=(1, 0)
+                ),
+            )
+            port_weight(
+                keras_variable=decoder_layer.ffw_linear.variables[0],
+                hf_weight_key=f"model.layers.{i}.mlp.down_proj.weight",
+                # rearrange_patterns="b a -> a b",
+                hook_fn=lambda hf_tensor, _: np.transpose(
+                    hf_tensor, axes=(1, 0)
+                ),
+            )
+
+        # Final normalization layer
+        port_weight(
+            keras_variable=backbone.get_layer("final_normalization").variables[
+                0
+            ],
+            hf_weight_key="model.norm.weight",
+        )
+
+    return backbone
+
+
+def load_gemma_tokenizer(cls, preset):
+    """
+    Load the Gemma tokenizer.
+
+    Args:
+        cls (class): Tokenizer class.
+        preset (str): Preset configuration name.
+
+    Returns:
+        tokenizer: Initialized tokenizer.
+    """
+    return cls(get_file(preset, "tokenizer.model"))