Skip to content

Commit

Permalink
Merge pull request #10 from tensorops/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
soran-ghaderi authored Sep 2, 2022
2 parents dfed348 + 4de1ee5 commit 51db09f
Show file tree
Hide file tree
Showing 18 changed files with 219 additions and 154 deletions.
2 changes: 1 addition & 1 deletion CODE_OF_CONDUCT.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ Violating these terms may lead to a permanent ban.
### 4. Permanent Ban

**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.

**Consequence**: A permanent ban from any sort of public interaction within
Expand Down
11 changes: 7 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
- [x] Support CPU/GPU
- [x] Vectorized operations
- [x] Standard API

</div>
<h2>If you found it helpful, please give us a <span>:star:</span></h2>

Expand All @@ -45,10 +46,12 @@

<div class="footer"><pre>Copyright &copy; 2021-2022 <b>TensorOps</b> Developers

<a href="https://soran-ghaderi.github.io/">Soran Ghaderi</a> (soran.gdr.cs@gmail.com)
follow me on <a href="https://github.com/soran-ghaderi"><img alt="Github" src="https://img.shields.io/badge/GitHub-100000?&logo=github&logoColor=white"></a> <a href="https://twitter.com/soranghadri"><img alt="Twitter" src="https://img.shields.io/badge/Twitter-1DA1F2?&logo=twitter&logoColor=white"></a> <a href="https://www.linkedin.com/in/soran-ghaderi/"><img alt="Linkedin" src="https://img.shields.io/badge/LinkedIn-0077B5?&logo=linkedin&logoColor=white"></a>
<a href="https://soran-ghaderi.github.io/">Soran Ghaderi</a> (soran.gdr.cs@gmail.com)
follow me
on <a href="https://github.com/soran-ghaderi"><img alt="Github" src="https://img.shields.io/badge/GitHub-100000?&logo=github&logoColor=white"></a> <a href="https://twitter.com/soranghadri"><img alt="Twitter" src="https://img.shields.io/badge/Twitter-1DA1F2?&logo=twitter&logoColor=white"></a> <a href="https://www.linkedin.com/in/soran-ghaderi/"><img alt="Linkedin" src="https://img.shields.io/badge/LinkedIn-0077B5?&logo=linkedin&logoColor=white"></a>
<br>
<a href="https://uk.linkedin.com/in/taleb-zarhesh">Taleb Zarhesh</a> (taleb.zarhesh@gmail.com)
follow me on <a href="https://github.com/sigma1326"><img alt="Github" src="https://img.shields.io/badge/GitHub-100000?&logo=github&logoColor=white"></a> <a href="https://twitter.com/taleb__z"><img alt="Twitter" src="https://img.shields.io/badge/Twitter-1DA1F2?&logo=twitter&logoColor=white"></a> <a href="https://www.linkedin.com/in/taleb-zarhesh/"><img alt="Linkedin" src="https://img.shields.io/badge/LinkedIn-0077B5?&logo=linkedin&logoColor=white"></a>
<a href="https://uk.linkedin.com/in/taleb-zarhesh">Taleb Zarhesh</a> (taleb.zarhesh@gmail.com)
follow me
on <a href="https://github.com/sigma1326"><img alt="Github" src="https://img.shields.io/badge/GitHub-100000?&logo=github&logoColor=white"></a> <a href="https://twitter.com/taleb__z"><img alt="Twitter" src="https://img.shields.io/badge/Twitter-1DA1F2?&logo=twitter&logoColor=white"></a> <a href="https://www.linkedin.com/in/taleb-zarhesh/"><img alt="Linkedin" src="https://img.shields.io/badge/LinkedIn-0077B5?&logo=linkedin&logoColor=white"></a>
</pre>
</div>
2 changes: 1 addition & 1 deletion __init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from utils import use_device

50 changes: 24 additions & 26 deletions data_loader.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import collections
from abc import ABC
from typing import Optional, Tuple, List
import hashlib
import os
import tarfile
import zipfile
from typing import Optional, Tuple, List

import numpy as np
import requests
import tensorflow as tf

Expand All @@ -27,7 +25,7 @@ def val_dataloader(self):
return self.get_dataloader(train=False)

def get_tensorloader(
self, tensors: List[tf.Tensor], train: bool, indices: int = slice(0, None)
self, tensors: List[tf.Tensor], train: bool, indices: int = slice(0, None)
):
"""Prepare tensors for training
Expand Down Expand Up @@ -56,7 +54,7 @@ class Vocab:
"""Vocabulary for text"""

def __init__(
self, tokens: list = [], min_freq: int = 0, reserved_tokens: Optional[list] = []
self, tokens: list = [], min_freq: int = 0, reserved_tokens: Optional[list] = []
):
"""Initialize the Vocab class
Expand All @@ -74,13 +72,13 @@ def __init__(
self.token_freqs = sorted(counter.items(), key=lambda x: x[1], reverse=True)
# The list of unique tokens
self.idx_to_token = list(
sorted(
set(
["<unk>"]
+ reserved_tokens
+ [token for token, freq in self.token_freqs if freq >= min_freq]
sorted(
set(
["<unk>"]
+ reserved_tokens
+ [token for token, freq in self.token_freqs if freq >= min_freq]
)
)
)
)
self.token_to_idx = {token: idx for idx, token in enumerate(self.idx_to_token)}

Expand Down Expand Up @@ -122,12 +120,12 @@ class BaseDataset(DataModule):
"""Base dataset class for downloading and processing."""

def __init__(
self,
batch_size,
num_steps=9,
num_train=512,
num_val=128,
url="http://d2l-data.s3-accelerate.amazonaws.com/",
self,
batch_size,
num_steps=9,
num_train=512,
num_val=128,
url="http://d2l-data.s3-accelerate.amazonaws.com/",
):
"""Initialize the class
Expand All @@ -146,7 +144,7 @@ def __init__(
# self.save_hyperparameters()
self.url = url
self.arrays, self.src_vocab, self.tgt_vocab = self._build_arrays(
self._download()
self._download()
)

@staticmethod
Expand Down Expand Up @@ -207,11 +205,11 @@ def extract(filename, folder: Optional[str] = None):

def _download(self):
self.extract(
self.download(
self.url + "fra-eng.zip",
self.data_directory,
"94646ad1522d915e7b0f9296181140edcf86a4f5",
)
self.download(
self.url + "fra-eng.zip",
self.data_directory,
"94646ad1522d915e7b0f9296181140edcf86a4f5",
)
)
with open(self.data_directory + "/fra-eng/fra.txt", encoding="utf-8") as f:
return f.read()
Expand Down Expand Up @@ -290,7 +288,7 @@ def _build_array(sentences, vocab, is_tgt=False):
return array, vocab, valid_len

src, tgt = self._tokenize(
self._preprocess(raw_text), self.num_train + self.num_val
self._preprocess(raw_text), self.num_train + self.num_val
)
src_array, src_vocab, src_valid_len = _build_array(src, src_vocab)
tgt_array, tgt_vocab, _ = _build_array(tgt, tgt_vocab, True)
Expand Down Expand Up @@ -328,7 +326,7 @@ def build(self, src_sentences, tgt_sentences):
arrays : source and target arrays
"""
raw_text = "\n".join(
[src + "\t" + tgt for src, tgt in zip(src_sentences, tgt_sentences)]
[src + "\t" + tgt for src, tgt in zip(src_sentences, tgt_sentences)]
)
arrays, _, _ = self._build_arrays(raw_text, self.src_vocab, self.tgt_vocab)
return arrays
Expand All @@ -354,5 +352,5 @@ def __init__(self, batch_size, num_steps=9, num_train=512, num_val=128):
self.num_val = num_val
# self.save_hyperparameters()
self.arrays, self.src_vocab, self.tgt_vocab = self._build_arrays(
self._download()
self._download()
)
28 changes: 14 additions & 14 deletions examples/eng2fr_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,22 @@
data = BaseDataset(batch_size=128)
norm_shape = [2]
encoder = TransformerEncoder(
len(data.src_vocab),
depth,
norm_shape,
ffn_num_hiddens,
num_heads,
n_blocks,
dropout,
len(data.src_vocab),
depth,
norm_shape,
ffn_num_hiddens,
num_heads,
n_blocks,
dropout,
)
decoder = TransformerDecoder(
len(data.tgt_vocab),
depth,
norm_shape,
ffn_num_hiddens,
num_heads,
n_blocks,
dropout,
len(data.tgt_vocab),
depth,
norm_shape,
ffn_num_hiddens,
num_heads,
n_blocks,
dropout,
)
model = Transformer(encoder, decoder, tgt_pad=data.tgt_vocab["<pad>"], lr=0.001)
trainer = Trainer(max_epochs=2, gradient_clip_val=1)
Expand Down
18 changes: 9 additions & 9 deletions layers/dot_product_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,22 @@ def __init__(self, dropout, num_heads=8):
def call(self, queries, keys, values, valid_lens=None, window_mask=None, **kwargs):
d = queries.shape[-1]
scores = tf.matmul(queries, keys, transpose_b=True) / tf.math.sqrt(
tf.cast(d, dtype=tf.float32)
tf.cast(d, dtype=tf.float32)
)
if window_mask is not None: # To be covered later
num_windows = window_mask.shape[0]
n, num_queries, num_kv_pairs = scores.shape
# Shape of window_mask: (num_windows, no. of queries,
# no. of key-value pairs)
scores = tf.reshape(
scores,
(
n // (num_windows * self.num_heads),
num_windows,
self.num_heads,
num_queries,
num_kv_pairs,
),
scores,
(
n // (num_windows * self.num_heads),
num_windows,
self.num_heads,
num_queries,
num_kv_pairs,
),
) + tf.expand_dims(tf.expand_dims(window_mask, 1), 0)
scores = tf.reshape(scores, (n, num_queries, num_kv_pairs))
self.attention_weights = masked_softmax(scores, valid_lens)
Expand Down
14 changes: 7 additions & 7 deletions layers/multihead_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ class MultiHeadAttention(tf.keras.layers.Layer):
"""Multi-head attention."""

def __init__(
self,
d_model,
num_heads,
dropout,
bias=False,
**kwargs,
self,
d_model,
num_heads,
dropout,
bias=False,
**kwargs,
):
super(MultiHeadAttention, self).__init__()
self.num_heads = num_heads
Expand Down Expand Up @@ -84,7 +84,7 @@ def call(self, queries, values, keys, valid_lens, window_mask=None, **kwargs):
# Shape of output: (batch_size * num_heads, no. of queries,
# depth / num_heads)
output = self.attention(
queries, keys, values, valid_lens, window_mask, **kwargs
queries, keys, values, valid_lens, window_mask, **kwargs
)

# Shape of output_concat: (batch_size, no. of queries, depth)
Expand Down
6 changes: 3 additions & 3 deletions layers/positional_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ def __init__(self, num_hiddens, dropout, max_len=1000):
self.P = np.zeros((1, max_len, num_hiddens))
print("P.shape", self.P.shape)
X = np.arange(max_len, dtype=np.float32).reshape(-1, 1) / np.power(
10000, np.arange(0, num_hiddens, 2, dtype=np.float32) / num_hiddens
10000, np.arange(0, num_hiddens, 2, dtype=np.float32) / num_hiddens
)

self.P[:, :, 0::2] = tf.sin(
X
X
) # x[low::stride] -> positions: 0, 2, 4, ... of all rows and columns
self.P[:, :, 1::2] = tf.cos(
X
X
) # x[low::stride] -> positions: 1, 3, 5 , ... of all rows and columns

def call(self, X, **kwargs):
Expand Down
32 changes: 16 additions & 16 deletions layers/transformer_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ class TransformerDecoder(tf.keras.layers.Layer):
"""Transformer decoder that encompasses one or more TransformerDecoderBlock blocks."""

def __init__(
self,
vocab_size,
depth,
norm_shape,
ffn_num_hiddens,
num_heads,
n_blocks,
dropout,
self,
vocab_size,
depth,
norm_shape,
ffn_num_hiddens,
num_heads,
n_blocks,
dropout,
):
super().__init__()
self.depth = depth
Expand All @@ -24,12 +24,12 @@ def __init__(
self.pos_encoding = PositionalEncoding(depth, dropout)
self.blocks = [
TransformerDecoderBlock(
depth,
norm_shape,
ffn_num_hiddens,
num_heads,
dropout,
i,
depth,
norm_shape,
ffn_num_hiddens,
num_heads,
dropout,
i,
)
for i in range(n_blocks)
]
Expand All @@ -40,8 +40,8 @@ def init_state(self, enc_outputs, enc_valid_lens):

def call(self, X, state, **kwargs):
X = self.pos_encoding(
self.embedding(X) * tf.math.sqrt(tf.cast(self.depth, dtype=tf.float32)),
**kwargs,
self.embedding(X) * tf.math.sqrt(tf.cast(self.depth, dtype=tf.float32)),
**kwargs,
)
# 2 attention layers in decoder
self._attention_weights = [[None] * len(self.blocks) for _ in range(2)]
Expand Down
20 changes: 10 additions & 10 deletions layers/transformer_decoder_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ class TransformerDecoderBlock(tf.keras.layers.Layer):
"""Transformer decoder block."""

def __init__(
self,
num_hiddens,
norm_shape,
ffn_num_hiddens,
num_heads,
dropout,
i,
self,
num_hiddens,
norm_shape,
ffn_num_hiddens,
num_heads,
dropout,
i,
):
super().__init__()
self.i = i
Expand Down Expand Up @@ -43,9 +43,9 @@ def call(self, X, state, **kwargs):
# Shape of dec_valid_lens: (batch_size, num_steps), where every
# row is [1, 2, ..., num_steps]
dec_valid_lens = tf.repeat(
tf.reshape(tf.range(1, num_steps + 1), shape=(-1, num_steps)),
repeats=batch_size,
axis=0,
tf.reshape(tf.range(1, num_steps + 1), shape=(-1, num_steps)),
repeats=batch_size,
axis=0,
)
else:
dec_valid_lens = None
Expand Down
Loading

0 comments on commit 51db09f

Please sign in to comment.