diff --git a/eanet.py b/eanet_keras_core.py
index 5eda90f..6aaf0f9 100644
--- a/eanet.py
+++ b/eanet_keras_core.py
@@ -1,8 +1,9 @@
 """
 Title: Image classification with EANet (External Attention Transformer)
 Author: [ZhiYong Chang](https://github.com/czy00000)
+Converted to Keras Core: [Muhammad Anas Raza](https://anasrz.com)
 Date created: 2021/10/19
-Last modified: 2021/10/19
+Last modified: 2023/07/18
 Description: Image classification with a Transformer that leverages external attention.
 Accelerator: GPU
 """
@@ -18,25 +19,18 @@ shared memories, which can be implemented easily by simply using two cascaded
 linear layers and two normalization layers. It conveniently replaces self-attention
 as used in existing architectures. External attention has linear complexity, as it only
 implicitly considers the correlations between all samples.
-
-This example requires TensorFlow 2.5 or higher, as well as
-[TensorFlow Addons](https://www.tensorflow.org/addons/overview) package,
-which can be installed using the following command:
-
-```python
-pip install -U tensorflow-addons
-```
 """
 
 """
 ## Setup
 """
+import keras_core as keras
+from keras_core import layers
+from keras_core import ops
 
 import numpy as np
 import tensorflow as tf
-from tensorflow import keras
-from tensorflow.keras import layers
-import tensorflow_addons as tfa
+
 import matplotlib.pyplot as plt
 
 
@@ -144,21 +138,21 @@ def external_attention(
 
     x = layers.Dense(dim * dim_coefficient)(x)
     # create tensor [batch_size, num_patches, num_heads, dim*dim_coefficient//num_heads]
-    x = tf.reshape(
-        x, shape=(-1, num_patch, num_heads, dim * dim_coefficient // num_heads)
+    x = ops.reshape(
+        x, (-1, num_patch, num_heads, dim * dim_coefficient // num_heads)
     )
-    x = tf.transpose(x, perm=[0, 2, 1, 3])
+    x = ops.transpose(x, axes=[0, 2, 1, 3])
     # a linear layer M_k
     attn = layers.Dense(dim // dim_coefficient)(x)
     # normalize attention map
     attn = layers.Softmax(axis=2)(attn)
     # dobule-normalization
-    attn = attn / (1e-9 + tf.reduce_sum(attn, axis=-1, keepdims=True))
+    attn = ops.divide(attn, ops.convert_to_tensor(1e-9) + ops.sum(attn, axis=-1, keepdims=True))
     attn = layers.Dropout(attention_dropout)(attn)
     # a linear layer M_v
     x = layers.Dense(dim * dim_coefficient // num_heads)(attn)
-    x = tf.transpose(x, perm=[0, 2, 1, 3])
-    x = tf.reshape(x, [-1, num_patch, dim * dim_coefficient])
+    x = ops.transpose(x, axes=[0, 2, 1, 3])
+    x = ops.reshape(x, [-1, num_patch, dim * dim_coefficient])
     # a linear layer to project original dim
     x = layers.Dense(dim)(x)
     x = layers.Dropout(projection_dropout)(x)
@@ -171,7 +165,7 @@ def external_attention(
 
 
 def mlp(x, embedding_dim, mlp_dim, drop_rate=0.2):
-    x = layers.Dense(mlp_dim, activation=tf.nn.gelu)(x)
+    x = layers.Dense(mlp_dim, activation=ops.gelu)(x)
     x = layers.Dropout(drop_rate)(x)
     x = layers.Dense(embedding_dim)(x)
     x = layers.Dropout(drop_rate)(x)
@@ -272,7 +266,7 @@ model = get_model(attention_type="external_attention")
 
 model.compile(
     loss=keras.losses.CategoricalCrossentropy(label_smoothing=label_smoothing),
-    optimizer=tfa.optimizers.AdamW(
+    optimizer=keras.optimizers.AdamW(
         learning_rate=learning_rate, weight_decay=weight_decay
     ),
     metrics=[