diff --git a/eanet.py b/eanet_keras_core.py index 5eda90f..6aaf0f9 100644 --- a/eanet.py +++ b/eanet_keras_core.py @@ -1,8 +1,9 @@ """ Title: Image classification with EANet (External Attention Transformer) Author: [ZhiYong Chang](https://github.com/czy00000) +Converted to Keras Core: [Muhammad Anas Raza](https://anasrz.com) Date created: 2021/10/19 -Last modified: 2021/10/19 +Last modified: 2023/07/18 Description: Image classification with a Transformer that leverages external attention. Accelerator: GPU """ @@ -18,25 +19,18 @@ shared memories, which can be implemented easily by simply using two cascaded linear layers and two normalization layers. It conveniently replaces self-attention as used in existing architectures. External attention has linear complexity, as it only implicitly considers the correlations between all samples. - -This example requires TensorFlow 2.5 or higher, as well as -[TensorFlow Addons](https://www.tensorflow.org/addons/overview) package, -which can be installed using the following command: - -```python -pip install -U tensorflow-addons -``` """ """ ## Setup """ +import keras_core as keras +from keras_core import layers +from keras_core import ops import numpy as np import tensorflow as tf -from tensorflow import keras -from tensorflow.keras import layers -import tensorflow_addons as tfa + import matplotlib.pyplot as plt @@ -144,21 +138,21 @@ def external_attention( x = layers.Dense(dim * dim_coefficient)(x) # create tensor [batch_size, num_patches, num_heads, dim*dim_coefficient//num_heads] - x = tf.reshape( - x, shape=(-1, num_patch, num_heads, dim * dim_coefficient // num_heads) + x = ops.reshape( + x, (-1, num_patch, num_heads, dim * dim_coefficient // num_heads) ) - x = tf.transpose(x, perm=[0, 2, 1, 3]) + x = ops.transpose(x, axes=[0, 2, 1, 3]) # a linear layer M_k attn = layers.Dense(dim // dim_coefficient)(x) # normalize attention map attn = layers.Softmax(axis=2)(attn) # dobule-normalization - attn = attn / (1e-9 + tf.reduce_sum(attn, axis=-1, keepdims=True)) + attn = ops.divide(attn, ops.convert_to_tensor(1e-9) + ops.sum(attn, axis=-1, keepdims=True)) attn = layers.Dropout(attention_dropout)(attn) # a linear layer M_v x = layers.Dense(dim * dim_coefficient // num_heads)(attn) - x = tf.transpose(x, perm=[0, 2, 1, 3]) - x = tf.reshape(x, [-1, num_patch, dim * dim_coefficient]) + x = ops.transpose(x, axes=[0, 2, 1, 3]) + x = ops.reshape(x, [-1, num_patch, dim * dim_coefficient]) # a linear layer to project original dim x = layers.Dense(dim)(x) x = layers.Dropout(projection_dropout)(x) @@ -171,7 +165,7 @@ def external_attention( def mlp(x, embedding_dim, mlp_dim, drop_rate=0.2): - x = layers.Dense(mlp_dim, activation=tf.nn.gelu)(x) + x = layers.Dense(mlp_dim, activation=ops.gelu)(x) x = layers.Dropout(drop_rate)(x) x = layers.Dense(embedding_dim)(x) x = layers.Dropout(drop_rate)(x) @@ -272,7 +266,7 @@ model = get_model(attention_type="external_attention") model.compile( loss=keras.losses.CategoricalCrossentropy(label_smoothing=label_smoothing), - optimizer=tfa.optimizers.AdamW( + optimizer=keras.optimizers.AdamW( learning_rate=learning_rate, weight_decay=weight_decay ), metrics=[