Merge pull request #10 from tensorops/dev

Dev
tensorops · Sep 2, 2022 · 51db09f · 51db09f
2 parents dfed348 + 4de1ee5
commit 51db09f
Show file tree

Hide file tree

Showing 18 changed files with 219 additions and 154 deletions.
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
@@ -106,7 +106,7 @@ Violating these terms may lead to a permanent ban.
 ### 4. Permanent Ban
 
 **Community Impact**: Demonstrating a pattern of violation of community
-standards, including sustained inappropriate behavior,  harassment of an
+standards, including sustained inappropriate behavior, harassment of an
 individual, or aggression toward or disparagement of classes of individuals.
 
 **Consequence**: A permanent ban from any sort of public interaction within

diff --git a/README.md b/README.md
@@ -35,6 +35,7 @@
 - [x] Support CPU/GPU
 - [x] Vectorized operations
 - [x] Standard API
+
 </div>
 <h2>If you found it helpful, please give us a <span>:star:</span></h2>
 
@@ -45,10 +46,12 @@
 
 <div class="footer"><pre>Copyright &copy; 2021-2022 <b>TensorOps</b> Developers
 
-<a href="https://soran-ghaderi.github.io/">Soran Ghaderi</a> (soran.gdr.cs@gmail.com) 
-follow me on <a href="https://github.com/soran-ghaderi"><img alt="Github" src="https://img.shields.io/badge/GitHub-100000?&logo=github&logoColor=white"></a> <a href="https://twitter.com/soranghadri"><img alt="Twitter" src="https://img.shields.io/badge/Twitter-1DA1F2?&logo=twitter&logoColor=white"></a> <a href="https://www.linkedin.com/in/soran-ghaderi/"><img alt="Linkedin" src="https://img.shields.io/badge/LinkedIn-0077B5?&logo=linkedin&logoColor=white"></a>
+<a href="https://soran-ghaderi.github.io/">Soran Ghaderi</a> (soran.gdr.cs@gmail.com)
+follow me
+on <a href="https://github.com/soran-ghaderi"><img alt="Github" src="https://img.shields.io/badge/GitHub-100000?&logo=github&logoColor=white"></a> <a href="https://twitter.com/soranghadri"><img alt="Twitter" src="https://img.shields.io/badge/Twitter-1DA1F2?&logo=twitter&logoColor=white"></a> <a href="https://www.linkedin.com/in/soran-ghaderi/"><img alt="Linkedin" src="https://img.shields.io/badge/LinkedIn-0077B5?&logo=linkedin&logoColor=white"></a>
 <br>
-<a href="https://uk.linkedin.com/in/taleb-zarhesh">Taleb Zarhesh</a> (taleb.zarhesh@gmail.com) 
-follow me on <a href="https://github.com/sigma1326"><img alt="Github" src="https://img.shields.io/badge/GitHub-100000?&logo=github&logoColor=white"></a> <a href="https://twitter.com/taleb__z"><img alt="Twitter" src="https://img.shields.io/badge/Twitter-1DA1F2?&logo=twitter&logoColor=white"></a> <a href="https://www.linkedin.com/in/taleb-zarhesh/"><img alt="Linkedin" src="https://img.shields.io/badge/LinkedIn-0077B5?&logo=linkedin&logoColor=white"></a>
+<a href="https://uk.linkedin.com/in/taleb-zarhesh">Taleb Zarhesh</a> (taleb.zarhesh@gmail.com)
+follow me
+on <a href="https://github.com/sigma1326"><img alt="Github" src="https://img.shields.io/badge/GitHub-100000?&logo=github&logoColor=white"></a> <a href="https://twitter.com/taleb__z"><img alt="Twitter" src="https://img.shields.io/badge/Twitter-1DA1F2?&logo=twitter&logoColor=white"></a> <a href="https://www.linkedin.com/in/taleb-zarhesh/"><img alt="Linkedin" src="https://img.shields.io/badge/LinkedIn-0077B5?&logo=linkedin&logoColor=white"></a>
 </pre>
 </div>
diff --git a/__init__.py b/__init__.py
@@ -1 +1 @@
-from utils import use_device
+
diff --git a/data_loader.py b/data_loader.py
@@ -1,12 +1,10 @@
 import collections
-from abc import ABC
-from typing import Optional, Tuple, List
 import hashlib
 import os
 import tarfile
 import zipfile
+from typing import Optional, Tuple, List
 
-import numpy as np
 import requests
 import tensorflow as tf
 
@@ -27,7 +25,7 @@ def val_dataloader(self):
         return self.get_dataloader(train=False)
 
     def get_tensorloader(
-        self, tensors: List[tf.Tensor], train: bool, indices: int = slice(0, None)
+            self, tensors: List[tf.Tensor], train: bool, indices: int = slice(0, None)
     ):
         """Prepare tensors for training
 
@@ -56,7 +54,7 @@ class Vocab:
     """Vocabulary for text"""
 
     def __init__(
-        self, tokens: list = [], min_freq: int = 0, reserved_tokens: Optional[list] = []
+            self, tokens: list = [], min_freq: int = 0, reserved_tokens: Optional[list] = []
     ):
         """Initialize the Vocab class
 
@@ -74,13 +72,13 @@ def __init__(
         self.token_freqs = sorted(counter.items(), key=lambda x: x[1], reverse=True)
         # The list of unique tokens
         self.idx_to_token = list(
-            sorted(
-                set(
-                    ["<unk>"]
-                    + reserved_tokens
-                    + [token for token, freq in self.token_freqs if freq >= min_freq]
+                sorted(
+                        set(
+                                ["<unk>"]
+                                + reserved_tokens
+                                + [token for token, freq in self.token_freqs if freq >= min_freq]
+                        )
                 )
-            )
         )
         self.token_to_idx = {token: idx for idx, token in enumerate(self.idx_to_token)}
 
@@ -122,12 +120,12 @@ class BaseDataset(DataModule):
     """Base dataset class for downloading and processing."""
 
     def __init__(
-        self,
-        batch_size,
-        num_steps=9,
-        num_train=512,
-        num_val=128,
-        url="http://d2l-data.s3-accelerate.amazonaws.com/",
+            self,
+            batch_size,
+            num_steps=9,
+            num_train=512,
+            num_val=128,
+            url="http://d2l-data.s3-accelerate.amazonaws.com/",
     ):
         """Initialize the class
 
@@ -146,7 +144,7 @@ def __init__(
         # self.save_hyperparameters()
         self.url = url
         self.arrays, self.src_vocab, self.tgt_vocab = self._build_arrays(
-            self._download()
+                self._download()
         )
 
     @staticmethod
@@ -207,11 +205,11 @@ def extract(filename, folder: Optional[str] = None):
 
     def _download(self):
         self.extract(
-            self.download(
-                self.url + "fra-eng.zip",
-                self.data_directory,
-                "94646ad1522d915e7b0f9296181140edcf86a4f5",
-            )
+                self.download(
+                        self.url + "fra-eng.zip",
+                        self.data_directory,
+                        "94646ad1522d915e7b0f9296181140edcf86a4f5",
+                )
         )
         with open(self.data_directory + "/fra-eng/fra.txt", encoding="utf-8") as f:
             return f.read()
@@ -290,7 +288,7 @@ def _build_array(sentences, vocab, is_tgt=False):
             return array, vocab, valid_len
 
         src, tgt = self._tokenize(
-            self._preprocess(raw_text), self.num_train + self.num_val
+                self._preprocess(raw_text), self.num_train + self.num_val
         )
         src_array, src_vocab, src_valid_len = _build_array(src, src_vocab)
         tgt_array, tgt_vocab, _ = _build_array(tgt, tgt_vocab, True)
@@ -328,7 +326,7 @@ def build(self, src_sentences, tgt_sentences):
         arrays : source and target arrays
         """
         raw_text = "\n".join(
-            [src + "\t" + tgt for src, tgt in zip(src_sentences, tgt_sentences)]
+                [src + "\t" + tgt for src, tgt in zip(src_sentences, tgt_sentences)]
         )
         arrays, _, _ = self._build_arrays(raw_text, self.src_vocab, self.tgt_vocab)
         return arrays
@@ -354,5 +352,5 @@ def __init__(self, batch_size, num_steps=9, num_train=512, num_val=128):
         self.num_val = num_val
         # self.save_hyperparameters()
         self.arrays, self.src_vocab, self.tgt_vocab = self._build_arrays(
-            self._download()
+                self._download()
         )
diff --git a/examples/eng2fr_translation.py b/examples/eng2fr_translation.py
@@ -12,22 +12,22 @@
 data = BaseDataset(batch_size=128)
 norm_shape = [2]
 encoder = TransformerEncoder(
-    len(data.src_vocab),
-    depth,
-    norm_shape,
-    ffn_num_hiddens,
-    num_heads,
-    n_blocks,
-    dropout,
+        len(data.src_vocab),
+        depth,
+        norm_shape,
+        ffn_num_hiddens,
+        num_heads,
+        n_blocks,
+        dropout,
 )
 decoder = TransformerDecoder(
-    len(data.tgt_vocab),
-    depth,
-    norm_shape,
-    ffn_num_hiddens,
-    num_heads,
-    n_blocks,
-    dropout,
+        len(data.tgt_vocab),
+        depth,
+        norm_shape,
+        ffn_num_hiddens,
+        num_heads,
+        n_blocks,
+        dropout,
 )
 model = Transformer(encoder, decoder, tgt_pad=data.tgt_vocab["<pad>"], lr=0.001)
 trainer = Trainer(max_epochs=2, gradient_clip_val=1)

diff --git a/layers/dot_product_attention.py b/layers/dot_product_attention.py
@@ -18,22 +18,22 @@ def __init__(self, dropout, num_heads=8):
     def call(self, queries, keys, values, valid_lens=None, window_mask=None, **kwargs):
         d = queries.shape[-1]
         scores = tf.matmul(queries, keys, transpose_b=True) / tf.math.sqrt(
-            tf.cast(d, dtype=tf.float32)
+                tf.cast(d, dtype=tf.float32)
         )
         if window_mask is not None:  # To be covered later
             num_windows = window_mask.shape[0]
             n, num_queries, num_kv_pairs = scores.shape
             # Shape of window_mask: (num_windows, no. of queries,
             # no. of key-value pairs)
             scores = tf.reshape(
-                scores,
-                (
-                    n // (num_windows * self.num_heads),
-                    num_windows,
-                    self.num_heads,
-                    num_queries,
-                    num_kv_pairs,
-                ),
+                    scores,
+                    (
+                        n // (num_windows * self.num_heads),
+                        num_windows,
+                        self.num_heads,
+                        num_queries,
+                        num_kv_pairs,
+                    ),
             ) + tf.expand_dims(tf.expand_dims(window_mask, 1), 0)
             scores = tf.reshape(scores, (n, num_queries, num_kv_pairs))
         self.attention_weights = masked_softmax(scores, valid_lens)

diff --git a/layers/multihead_attention.py b/layers/multihead_attention.py
@@ -8,12 +8,12 @@ class MultiHeadAttention(tf.keras.layers.Layer):
     """Multi-head attention."""
 
     def __init__(
-        self,
-        d_model,
-        num_heads,
-        dropout,
-        bias=False,
-        **kwargs,
+            self,
+            d_model,
+            num_heads,
+            dropout,
+            bias=False,
+            **kwargs,
     ):
         super(MultiHeadAttention, self).__init__()
         self.num_heads = num_heads
@@ -84,7 +84,7 @@ def call(self, queries, values, keys, valid_lens, window_mask=None, **kwargs):
         # Shape of output: (batch_size * num_heads, no. of queries,
         # depth / num_heads)
         output = self.attention(
-            queries, keys, values, valid_lens, window_mask, **kwargs
+                queries, keys, values, valid_lens, window_mask, **kwargs
         )
 
         # Shape of output_concat: (batch_size, no. of queries, depth)

diff --git a/layers/positional_encoding.py b/layers/positional_encoding.py
@@ -11,14 +11,14 @@ def __init__(self, num_hiddens, dropout, max_len=1000):
         self.P = np.zeros((1, max_len, num_hiddens))
         print("P.shape", self.P.shape)
         X = np.arange(max_len, dtype=np.float32).reshape(-1, 1) / np.power(
-            10000, np.arange(0, num_hiddens, 2, dtype=np.float32) / num_hiddens
+                10000, np.arange(0, num_hiddens, 2, dtype=np.float32) / num_hiddens
         )
 
         self.P[:, :, 0::2] = tf.sin(
-            X
+                X
         )  # x[low::stride] -> positions: 0, 2, 4, ... of all rows and columns
         self.P[:, :, 1::2] = tf.cos(
-            X
+                X
         )  # x[low::stride] -> positions: 1, 3, 5 , ... of all rows and columns
 
     def call(self, X, **kwargs):

diff --git a/layers/transformer_decoder.py b/layers/transformer_decoder.py
@@ -8,14 +8,14 @@ class TransformerDecoder(tf.keras.layers.Layer):
     """Transformer decoder that encompasses one or more TransformerDecoderBlock blocks."""
 
     def __init__(
-        self,
-        vocab_size,
-        depth,
-        norm_shape,
-        ffn_num_hiddens,
-        num_heads,
-        n_blocks,
-        dropout,
+            self,
+            vocab_size,
+            depth,
+            norm_shape,
+            ffn_num_hiddens,
+            num_heads,
+            n_blocks,
+            dropout,
     ):
         super().__init__()
         self.depth = depth
@@ -24,12 +24,12 @@ def __init__(
         self.pos_encoding = PositionalEncoding(depth, dropout)
         self.blocks = [
             TransformerDecoderBlock(
-                depth,
-                norm_shape,
-                ffn_num_hiddens,
-                num_heads,
-                dropout,
-                i,
+                    depth,
+                    norm_shape,
+                    ffn_num_hiddens,
+                    num_heads,
+                    dropout,
+                    i,
             )
             for i in range(n_blocks)
         ]
@@ -40,8 +40,8 @@ def init_state(self, enc_outputs, enc_valid_lens):
 
     def call(self, X, state, **kwargs):
         X = self.pos_encoding(
-            self.embedding(X) * tf.math.sqrt(tf.cast(self.depth, dtype=tf.float32)),
-            **kwargs,
+                self.embedding(X) * tf.math.sqrt(tf.cast(self.depth, dtype=tf.float32)),
+                **kwargs,
         )
         # 2 attention layers in decoder
         self._attention_weights = [[None] * len(self.blocks) for _ in range(2)]

diff --git a/layers/transformer_decoder_block.py b/layers/transformer_decoder_block.py
@@ -9,13 +9,13 @@ class TransformerDecoderBlock(tf.keras.layers.Layer):
     """Transformer decoder block."""
 
     def __init__(
-        self,
-        num_hiddens,
-        norm_shape,
-        ffn_num_hiddens,
-        num_heads,
-        dropout,
-        i,
+            self,
+            num_hiddens,
+            norm_shape,
+            ffn_num_hiddens,
+            num_heads,
+            dropout,
+            i,
     ):
         super().__init__()
         self.i = i
@@ -43,9 +43,9 @@ def call(self, X, state, **kwargs):
             # Shape of dec_valid_lens: (batch_size, num_steps), where every
             # row is [1, 2, ..., num_steps]
             dec_valid_lens = tf.repeat(
-                tf.reshape(tf.range(1, num_steps + 1), shape=(-1, num_steps)),
-                repeats=batch_size,
-                axis=0,
+                    tf.reshape(tf.range(1, num_steps + 1), shape=(-1, num_steps)),
+                    repeats=batch_size,
+                    axis=0,
             )
         else:
             dec_valid_lens = None