Merge branch 'master' into informer

LongxingTan · Oct 11, 2023 · c3b2e11 · c3b2e11
2 parents 38ec583 + 4b179e8
commit c3b2e11
Show file tree

Hide file tree

Showing 31 changed files with 405 additions and 276 deletions.
diff --git a/Makefile b/Makefile
@@ -5,7 +5,7 @@ check_dirs := tfts examples tests
 # run checks on all files and potentially modifies some of them
 
 style:
-	black --preview $(check_dirs)
+	black $(check_dirs)
 	isort $(check_dirs)
 	flake8
 	pre-commit run --all-files

diff --git a/README.md b/README.md
@@ -52,6 +52,7 @@ $ pip install tfts
 **Basic usage**
 
 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1LHdbrXmQGBSQuNTsbbM5-lAk5WENWF-Q?usp=sharing)
+[![Open in Kaggle](https://kaggle.com/static/images/open-in-kaggle.svg)](https://www.kaggle.com/code/tanlongxing/tensorflow-time-series-starter-tfts/notebook)
 
 ``` python
 import matplotlib.pyplot as plt
@@ -112,6 +113,7 @@ x_train = (
     np.random.rand(1, predict_length, n_decoder_feature),  # decoder_feature: (batch, predict_length, decoder_features)
 )
 y_train = np.random.rand(1, predict_length, 1)  # target: (batch, predict_length, 1)
+
 x_valid = (
     np.random.rand(1, train_length, 1),
     np.random.rand(1, train_length, n_encoder_feature),

diff --git a/docs/source/quick-start.rst b/docs/source/quick-start.rst
@@ -76,18 +76,19 @@ Train your first model
 ---------------------------
 
 1. Prepare the data
-~~~~~~~~~~~~~~~~~~~~~~~
-
+~~~~~~~~~~~~~~~~~~~~~~~~
 After you prepare the raw data, maybe you need preprocess the data.
 
 
 2. Train the model
-~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~
+
 
 3. Evaluate the model
 ~~~~~~~~~~~~~~~~~~~~~~~
 
 4. Serve the model
 ~~~~~~~~~~~~~~~~~~~
 
+
 .. currentmodule:: tfts
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 tensorflow>=2.3.1
-optuna>=2.0
-pandas>=1.0
+optuna>=2.3
+pandas>=1.2
 scikit-learn>0.23
 joblib
 matplotlib
diff --git a/tests/test_layers/test_cnn_layer.py b/tests/test_layers/test_cnn_layer.py
@@ -2,7 +2,7 @@
 
 import tensorflow as tf
 
-from tfts.layers.cnn_layer import ConvAttTemp, ConvTemp
+from tfts.layers.cnn_layer import ConvTemp
 
 
 class CNNLayerTest(unittest.TestCase):

diff --git a/tests/test_models/test_seq2seq.py b/tests/test_models/test_seq2seq.py
@@ -52,5 +52,71 @@ def test_model(self):
         y = model(x)
         self.assertEqual(y.shape, (2, predict_sequence_length, 1), "incorrect output shape")
 
+    def test_model_gru_attn(self):
+        predict_sequence_length = 8
+        custom_model_params = {
+            "rnn_type": "gru",
+            "bi_direction": False,
+            "rnn_size": 64,
+            "dense_size": 64,
+            "num_stacked_layers": 1,
+            "scheduler_sampling": 0,  # teacher forcing
+            "use_attention": True,
+            "attention_sizes": 64,
+            "attention_heads": 2,
+            "attention_dropout": 0,
+            "skip_connect_circle": False,
+            "skip_connect_mean": False,
+        }
+        model = Seq2seq(predict_sequence_length=predict_sequence_length, custom_model_params=custom_model_params)
+
+        x = tf.random.normal([2, 16, 3])
+        y = model(x)
+        self.assertEqual(y.shape, (2, predict_sequence_length, 1), "incorrect output shape")
+
+    def test_model_lstm(self):
+        predict_sequence_length = 8
+        custom_model_params = {
+            "rnn_type": "lstm",
+            "bi_direction": False,
+            "rnn_size": 64,
+            "dense_size": 64,
+            "num_stacked_layers": 1,
+            "scheduler_sampling": 0,  # teacher forcing
+            "use_attention": False,
+            "attention_sizes": 64,
+            "attention_heads": 2,
+            "attention_dropout": 0,
+            "skip_connect_circle": False,
+            "skip_connect_mean": False,
+        }
+        model = Seq2seq(predict_sequence_length=predict_sequence_length, custom_model_params=custom_model_params)
+
+        x = tf.random.normal([2, 16, 3])
+        y = model(x)
+        self.assertEqual(y.shape, (2, predict_sequence_length, 1), "incorrect output shape")
+
+    def test_model_lstm_gru(self):
+        predict_sequence_length = 8
+        custom_model_params = {
+            "rnn_type": "lstm",
+            "bi_direction": False,
+            "rnn_size": 64,
+            "dense_size": 64,
+            "num_stacked_layers": 1,
+            "scheduler_sampling": 0,  # teacher forcing
+            "use_attention": True,
+            "attention_sizes": 64,
+            "attention_heads": 2,
+            "attention_dropout": 0,
+            "skip_connect_circle": False,
+            "skip_connect_mean": False,
+        }
+        model = Seq2seq(predict_sequence_length=predict_sequence_length, custom_model_params=custom_model_params)
+
+        x = tf.random.normal([2, 16, 3])
+        y = model(x)
+        self.assertEqual(y.shape, (2, predict_sequence_length, 1), "incorrect output shape")
+
     def test_train(self):
         pass
diff --git a/tfts/datasets/get_data.py b/tfts/datasets/get_data.py
@@ -2,6 +2,7 @@
 
 import logging
 import random
+from typing import List, Optional, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -11,7 +12,9 @@
 )
 
 
-def get_data(name: str = "sine", train_length: int = 24, predict_length: int = 8, test_size: float = 0.1):
+def get_data(
+    name: str = "sine", train_length: int = 24, predict_length: int = 8, test_size: float = 0.1
+) -> Union[Tuple[np.ndarray, np.ndarray], Tuple[Tuple[np.ndarray, np.ndarray]], None]:
     assert (test_size >= 0) & (test_size <= 1), "test_size is the ratio of test dataset"
     if name == "sine":
         return get_sine(train_length, predict_length, test_size=test_size)
@@ -23,7 +26,9 @@ def get_data(name: str = "sine", train_length: int = 24, predict_length: int = 8
         raise ValueError("unsupported data of {} yet, try 'sine', 'airpassengers'".format(name))
 
 
-def get_sine(train_sequence_length: int = 24, predict_sequence_length: int = 8, test_size: float = 0.2, n_examples=100):
+def get_sine(
+    train_sequence_length: int = 24, predict_sequence_length: int = 8, test_size: float = 0.2, n_examples: int = 100
+) -> Union[Tuple[np.ndarray, np.ndarray], Tuple[Tuple[np.ndarray, np.ndarray]]]:
     """
     Generate synthetic sine wave data.
 
@@ -36,8 +41,8 @@ def get_sine(train_sequence_length: int = 24, predict_sequence_length: int = 8,
     Returns:
     (tuple): Two tuples of numpy arrays containing training and validation data.
     """
-    x = []
-    y = []
+    x: List[np.ndarray] = []
+    y: List[np.ndarray] = []
     for _ in range(n_examples):
         rand = random.random() * 2 * np.pi
         sig1 = np.sin(np.linspace(rand, 3.0 * np.pi + rand, train_sequence_length + predict_sequence_length))
@@ -54,18 +59,18 @@ def get_sine(train_sequence_length: int = 24, predict_sequence_length: int = 8,
         x.append(x_.T)
         y.append(y_.T)
 
-    x = np.array(x)[:, :, 0:1]
-    y = np.array(y)[:, :, 0:1]
-    logging.info("Load sine data", x.shape, y.shape)
+    x_array = np.array(x)[:, :, 0:1]
+    y_array = np.array(y)[:, :, 0:1]
+    logging.info("Load sine data", x_array.shape, y_array.shape)
 
     if test_size > 0:
         slice = int(n_examples * (1 - test_size))
-        x_train = x[:slice]
-        y_train = y[:slice]
-        x_valid = x[slice:]
-        y_valid = y[slice:]
+        x_train = x_array[:slice]
+        y_train = y_array[:slice]
+        x_valid = x_array[slice:]
+        y_valid = y_array[slice:]
         return (x_train, y_train), (x_valid, y_valid)
-    return x, y
+    return x_array, y_array
 
 
 def get_air_passengers(train_sequence_length: int = 24, predict_sequence_length: int = 8, test_size: float = 0.2):
@@ -86,25 +91,26 @@ def get_air_passengers(train_sequence_length: int = 24, predict_sequence_length:
     v = df.iloc[:, 1:2].values
     v = (v - np.max(v)) / (np.max(v) - np.min(v))  # MinMaxScaler
 
-    x, y = [], []
+    x: List[np.ndarray] = []
+    y: List[np.ndarray] = []
     for seq in range(1, train_sequence_length + 1):
         x_roll = np.roll(v, seq, axis=0)
         x.append(x_roll)
-    x = np.stack(x, axis=1)
-    x = x[train_sequence_length:-predict_sequence_length, ::-1, :]
+    x_array = np.stack(x, axis=1)
+    x_array = x_array[train_sequence_length:-predict_sequence_length, ::-1, :]
 
     for seq in range(predict_sequence_length):
         y_roll = np.roll(v, -seq)
         y.append(y_roll)
-    y = np.stack(y, axis=1)
-    y = y[train_sequence_length:-predict_sequence_length]
-    logging.info("Load air passenger data", x.shape, y.shape)
+    y_array = np.stack(y, axis=1)
+    y_array = y_array[train_sequence_length:-predict_sequence_length]
+    logging.info("Load air passenger data", x_array.shape, y_array.shape)
 
     if test_size > 0:
-        slice = int(len(x) * (1 - test_size))
-        x_train = x[:slice]
-        y_train = y[:slice]
-        x_valid = x[slice:]
-        y_valid = y[slice:]
+        slice = int(len(x_array) * (1 - test_size))
+        x_train = x_array[:slice]
+        y_train = y_array[:slice]
+        x_valid = x_array[slice:]
+        y_valid = y_array[slice:]
         return (x_train, y_train), (x_valid, y_valid)
-    return x, y
+    return x_array, y_array
diff --git a/tfts/layers/attention_layer.py b/tfts/layers/attention_layer.py
@@ -15,7 +15,7 @@
 class FullAttention(tf.keras.layers.Layer):
     """Multi-head attention layer"""
 
-    def __init__(self, hidden_size: int, num_heads: int, attention_dropout: float = 0.0):
+    def __init__(self, hidden_size: int, num_heads: int, attention_dropout: float = 0.0) -> None:
         """Initialize the layer.
 
         Parameters:
@@ -36,7 +36,7 @@ def __init__(self, hidden_size: int, num_heads: int, attention_dropout: float =
         self.num_heads = num_heads
         self.attention_dropout = attention_dropout
 
-    def build(self, input_shape):
+    def build(self, input_shape: Tuple[Optional[int], ...]) -> None:
         self.dense_q = Dense(self.hidden_size, use_bias=False)
         self.dense_k = Dense(self.hidden_size, use_bias=False)
         self.dense_v = Dense(self.hidden_size, use_bias=False)
@@ -94,14 +94,16 @@ def get_config(self):
 
 
 class SelfAttention(tf.keras.layers.Layer):
-    def __init__(self, hidden_size: int, num_heads: int, attention_dropout: float = 0.0, **kwargs):
+    def __init__(
+        self, hidden_size: int, num_heads: int, attention_dropout: float = 0.0, **kwargs: Dict[str, Any]
+    ) -> None:
         super(SelfAttention, self).__init__()
         self.attention = FullAttention(hidden_size, num_heads, attention_dropout=attention_dropout)
 
-    def build(self, input_shape):
+    def build(self, input_shape: Tuple[Optional[int], ...]) -> None:
         super(SelfAttention, self).build(input_shape)
 
-    def call(self, x, mask=None):
+    def call(self, x: tf.Tensor, mask: Optional[tf.Tensor] = None):
         """_summary_
 
         Parameters
@@ -132,7 +134,7 @@ def __init__(self, hidden_size: int = 128, num_heads: int = 1, attention_dropout
         self.factor = 5
         self.scale = None
 
-    def build(self, input_shape):
+    def build(self, input_shape: Tuple[Optional[int], ...]) -> None:
         self.dense_q = Dense(self.hidden_size, use_bias=False)
         self.dense_k = Dense(self.hidden_size, use_bias=False)
         self.dense_v = Dense(self.hidden_size, use_bias=False)
@@ -239,7 +241,7 @@ class SparseAttention(tf.keras.layers.Layer):
     def __init__(self, hidden_size: int, num_heads: int, attention_dropout: float = 0.0, **kwargs):
         super().__init__()
 
-    def build(self, input_shape):
+    def build(self, input_shape: Tuple[Optional[int], ...]):
         super().build(input_shape)
 
     def call(self, x, mask=None):
@@ -260,10 +262,10 @@ def get_config(self):
 
 
 class FastAttention(tf.keras.layers.Layer):
-    def __init__(self, **kwargs):
+    def __init__(self, **kwargs) -> None:
         super().__init__()
 
-    def build(self, input_shape):
+    def build(self, input_shape: Tuple[Optional[int], ...]) -> None:
         super().build(input_shape)
 
     def call(self, x, mask=None):

diff --git a/tfts/layers/autoformer_layer.py b/tfts/layers/autoformer_layer.py
@@ -3,6 +3,7 @@
 """Layer for :py:class:`~tfts.models.autoformer`"""
 
 import math
+from typing import Any, Callable, Dict, Optional, Tuple, Type, Union
 
 import tensorflow as tf
 from tensorflow.keras.layers import AveragePooling1D, Conv1D, Dense, Dropout
@@ -14,7 +15,7 @@ def __init__(self, kernel_size: int) -> None:
         self.kernel_size = kernel_size
         self.moving_avg = AveragePooling1D(pool_size=kernel_size, strides=1, padding="same")
 
-    def call(self, x):
+    def call(self, x: tf.Tensor):
         """
         Perform time-series decomposition on the input tensor.
 
@@ -51,12 +52,13 @@ def __init__(self, d_model: int, num_heads: int, attention_dropout: float = 0.0)
         self.depth = d_model // num_heads
         self.attention_dropout = attention_dropout
 
-    def build(self, input_shape):
+    def build(self, input_shape: Tuple[Optional[int], ...]):
         self.wq = Dense(self.d_model, name="q")
         self.wk = Dense(self.d_model, name="k")
         self.wv = Dense(self.d_model, name="v")
         self.drop = Dropout(self.attention_dropout)
         self.dense = Dense(self.d_model, name="project")
+        super().build(input_shape)
 
     def time_delay_agg(self, q, k, v):  # TODO: v not used in process
         """Compute time-delayed autocorrelation between queries and keys.