diff --git a/src/Dockerfile b/src/Dockerfile
new file mode 100755
index 0000000..1e4053b
--- /dev/null
+++ b/src/Dockerfile
@@ -0,0 +1,3 @@
+FROM pytorch/pytorch:1.8.1-cuda11.1-cudnn8-runtime
+RUN pip install pytorch-lightning==1.3.8 yacs==0.1.8 pandas==1.3.0 numpy==1.21 iopath==0.1.9 --no-cache-dir
+
diff --git a/src/checkpoints/README.md b/src/checkpoints/README.md
new file mode 100644
index 0000000..8d1c8b6
--- /dev/null
+++ b/src/checkpoints/README.md
@@ -0,0 +1 @@
+ 
diff --git a/src/conf/eev.yaml b/src/conf/eev.yaml
new file mode 100755
index 0000000..e5bc950
--- /dev/null
+++ b/src/conf/eev.yaml
@@ -0,0 +1,31 @@
+MODEL:
+  TEMPORAL_TYPE: 'tcn'
+  USE_POSITION: True
+  FC_HIDDEN: 128
+  FEATURES: ['effb0', ]
+TCN:
+  NUM_STACK: 2
+  NUM_CHANNELS: 512
+  DILATIONS: 4
+  K_SIZE: 3
+  DROPOUT: 0.3
+  NORM: False
+OPTIM:
+  BASE_LR: 0.005
+  MAX_EPOCH: 20
+  LR_POLICY: 'none'
+  USE_SWA: True
+TRAIN:
+  ACCUM_GRAD_BATCHES: 32
+TEST:
+  WEIGHTS: ''
+DATA_LOADER:
+  DATA_DIR: '/mnt/sXProject/EvokedExpression/'
+  NUM_WORKERS: 20
+  PIN_MEMORY: False
+
+VERBOSE: True
+OUT_DIR: './tmp'
+RNG_SEED: 1
+FAST_DEV_RUN: 2
+LOGGER: "TensorBoard"
diff --git a/src/conf/eev_audio.yaml b/src/conf/eev_audio.yaml
new file mode 100755
index 0000000..1aec492
--- /dev/null
+++ b/src/conf/eev_audio.yaml
@@ -0,0 +1,36 @@
+MODEL:
+  TEMPORAL_TYPE: 'tcn'
+  USE_POSITION: True
+  FC_HIDDEN: 128
+  FEATURES: ['audio', ]
+TCN:
+  NUM_STACK: 2
+  NUM_CHANNELS: 512
+  DILATIONS: 4
+  K_SIZE: 3
+  DROPOUT: 0.3
+  NORM: False
+LSTM:
+  HIDDEN_SIZE: 512
+  NUM_LAYERS: 2
+  BIDIREC: False
+  DROPOUT: 0.3
+OPTIM:
+  BASE_LR: 0.005
+  MAX_EPOCH: 20
+  LR_POLICY: 'none'
+  USE_SWA: True
+TRAIN:
+  ACCUM_GRAD_BATCHES: 32
+TEST:
+  WEIGHTS: ''
+DATA_LOADER:
+  DATA_DIR: '/mnt/sXProject/EvokedExpression/dataset/eev2021/'
+  NUM_WORKERS: 20
+  PIN_MEMORY: False
+
+VERBOSE: True
+OUT_DIR: './tmp'
+RNG_SEED: 1
+FAST_DEV_RUN: 2
+LOGGER: "TensorBoard"
diff --git a/src/conf/eev_audio_mediaeval18.yaml b/src/conf/eev_audio_mediaeval18.yaml
new file mode 100755
index 0000000..2c5571c
--- /dev/null
+++ b/src/conf/eev_audio_mediaeval18.yaml
@@ -0,0 +1,37 @@
+MODEL:
+  TEMPORAL_TYPE: 'tcn'
+  USE_POSITION: True
+  FC_HIDDEN: 128
+  FEATURES: ['audio', ]
+TCN:
+  NUM_STACK: 2
+  NUM_CHANNELS: 512
+  DILATIONS: 4
+  K_SIZE: 3
+  DROPOUT: 0.3
+  NORM: False
+LSTM:
+  HIDDEN_SIZE: 512
+  NUM_LAYERS: 2
+  BIDIREC: False
+  DROPOUT: 0.3
+OPTIM:
+  BASE_LR: 0.005
+  MAX_EPOCH: 20
+  LR_POLICY: 'none'
+  USE_SWA: True
+TRAIN:
+  ACCUM_GRAD_BATCHES: 32
+TEST:
+  WEIGHTS: ''
+DATA_LOADER:
+  DATA_DIR: '/mnt/sXProject/EvokedExpression/dataset/mediaeval18/'
+  NUM_WORKERS: 20
+  PIN_MEMORY: False
+
+DATA_NAME: 'mediaeval18'
+VERBOSE: True
+OUT_DIR: './tmp'
+RNG_SEED: 1
+FAST_DEV_RUN: 2
+LOGGER: "TensorBoard"
diff --git a/src/conf/eev_effb0.yaml b/src/conf/eev_effb0.yaml
new file mode 100755
index 0000000..18ade24
--- /dev/null
+++ b/src/conf/eev_effb0.yaml
@@ -0,0 +1,36 @@
+MODEL:
+  TEMPORAL_TYPE: 'tcn'
+  USE_POSITION: True
+  FC_HIDDEN: 128
+  FEATURES: ['effb0', ]
+TCN:
+  NUM_STACK: 2
+  NUM_CHANNELS: 512
+  DILATIONS: 4
+  K_SIZE: 3
+  DROPOUT: 0.3
+  NORM: False
+LSTM:
+  HIDDEN_SIZE: 512
+  NUM_LAYERS: 2
+  BIDIREC: False
+  DROPOUT: 0.3
+OPTIM:
+  BASE_LR: 0.005
+  MAX_EPOCH: 20
+  LR_POLICY: 'none'
+  USE_SWA: True
+TRAIN:
+  ACCUM_GRAD_BATCHES: 32
+TEST:
+  WEIGHTS: ''
+DATA_LOADER:
+  DATA_DIR: '/mnt/sXProject/EvokedExpression/dataset/eev2021/'
+  NUM_WORKERS: 20
+  PIN_MEMORY: False
+
+VERBOSE: True
+OUT_DIR: './tmp'
+RNG_SEED: 1
+FAST_DEV_RUN: 2
+LOGGER: "TensorBoard"
diff --git a/src/conf/eev_effb0_mediaeval18.yaml b/src/conf/eev_effb0_mediaeval18.yaml
new file mode 100755
index 0000000..427c217
--- /dev/null
+++ b/src/conf/eev_effb0_mediaeval18.yaml
@@ -0,0 +1,37 @@
+MODEL:
+  TEMPORAL_TYPE: 'tcn'
+  USE_POSITION: True
+  FC_HIDDEN: 128
+  FEATURES: ['effb0', ]
+TCN:
+  NUM_STACK: 2
+  NUM_CHANNELS: 512
+  DILATIONS: 4
+  K_SIZE: 3
+  DROPOUT: 0.3
+  NORM: False
+LSTM:
+  HIDDEN_SIZE: 512
+  NUM_LAYERS: 2
+  BIDIREC: False
+  DROPOUT: 0.3
+OPTIM:
+  BASE_LR: 0.005
+  MAX_EPOCH: 20
+  LR_POLICY: 'none'
+  USE_SWA: True
+TRAIN:
+  ACCUM_GRAD_BATCHES: 32
+TEST:
+  WEIGHTS: ''
+DATA_LOADER:
+  DATA_DIR: '/mnt/sXProject/EvokedExpression/dataset/mediaeval18/'
+  NUM_WORKERS: 20
+  PIN_MEMORY: False
+
+DATA_NAME: 'mediaeval18'
+VERBOSE: True
+OUT_DIR: './tmp'
+RNG_SEED: 1
+FAST_DEV_RUN: 2
+LOGGER: "TensorBoard"
diff --git a/src/conf/eev_multi_mediaeval18.yaml b/src/conf/eev_multi_mediaeval18.yaml
new file mode 100755
index 0000000..41a8e0d
--- /dev/null
+++ b/src/conf/eev_multi_mediaeval18.yaml
@@ -0,0 +1,37 @@
+MODEL:
+  TEMPORAL_TYPE: 'tcn'
+  USE_POSITION: True
+  FC_HIDDEN: 128
+  FEATURES: ['audio', 'effb0']
+TCN:
+  NUM_STACK: 2
+  NUM_CHANNELS: 512
+  DILATIONS: 4
+  K_SIZE: 3
+  DROPOUT: 0.3
+  NORM: False
+LSTM:
+  HIDDEN_SIZE: 512
+  NUM_LAYERS: 2
+  BIDIREC: False
+  DROPOUT: 0.3
+OPTIM:
+  BASE_LR: 0.005
+  MAX_EPOCH: 20
+  LR_POLICY: 'none'
+  USE_SWA: True
+TRAIN:
+  ACCUM_GRAD_BATCHES: 32
+TEST:
+  WEIGHTS: ''
+DATA_LOADER:
+  DATA_DIR: '/mnt/sXProject/EvokedExpression/dataset/mediaeval18/'
+  NUM_WORKERS: 20
+  PIN_MEMORY: False
+
+DATA_NAME: 'mediaeval18'
+VERBOSE: True
+OUT_DIR: './tmp'
+RNG_SEED: 1
+FAST_DEV_RUN: 2
+LOGGER: "TensorBoard"
diff --git a/src/core/__init__.py b/src/core/__init__.py
new file mode 100755
index 0000000..0ad619c
--- /dev/null
+++ b/src/core/__init__.py
@@ -0,0 +1,2 @@
+from .models import EEVModel
+from .eev_data import EEVDataModule
\ No newline at end of file
diff --git a/src/core/config.py b/src/core/config.py
new file mode 100755
index 0000000..2f122af
--- /dev/null
+++ b/src/core/config.py
@@ -0,0 +1,276 @@
+"""
+Original source: https://github.com/facebookresearch/pycls/blob/master/pycls/core/config.py
+Latest commit 2c152a6 on May 6, 2021
+"""
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Configuration file (powered by YACS)."""
+
+import argparse
+import os
+import sys
+
+from .io import cache_url, pathmgr
+from yacs.config import CfgNode
+
+# Global config object (example usage: from core.config import cfg)
+_C = CfgNode()
+cfg = _C
+
+# ---------------------------------- Model options ----------------------------------- #
+_C.MODEL = CfgNode()
+
+# Number of classes
+_C.MODEL.NUM_CLASSES = 15
+
+# Loss function (see pycls/models/loss.py for options)
+_C.MODEL.LOSS_FUN = "mse"
+
+# Number of hidden units in last layers
+_C.MODEL.FC_HIDDEN = 32
+
+# Temporal model type
+_C.MODEL.TEMPORAL_TYPE = 'tcn'  # tcn or lstm
+
+# Use position or not
+_C.MODEL.USE_POSITION = True
+
+# List of pre-trained features
+_C.MODEL.FEATURES = ['effb0']
+# ------------------------------- TCN options ------------------------------- #
+_C.TCN = CfgNode()
+
+_C.TCN.NUM_CHANNELS = 512
+# TCN channels
+_C.TCN.NUM_STACK = 2
+
+# TCN Dilations
+_C.TCN.DILATIONS = 4
+
+# TCN Kernel size
+_C.TCN.K_SIZE = 3
+
+# TCN Dropout
+_C.TCN.DROPOUT = 0.
+
+# Use WeightNorm in TCN or not
+_C.TCN.NORM = True
+
+# Number of temporal module (head)
+_C.TCN.NUM_HEAD = 1
+
+# ------------------------------- LSTM options ------------------------------- #
+_C.LSTM = CfgNode()
+
+# LSTM HIDDEN_SIZE
+_C.LSTM.HIDDEN_SIZE = 64
+
+# LSTM Num layers
+_C.LSTM.NUM_LAYERS = 4
+
+# LSTM Bidirectional or not
+_C.LSTM.BIDIREC = False
+
+# LSTM Dropout
+_C.LSTM.DROPOUT = 0.
+
+# -------------------------------- Optimizer options --------------------------------- #
+_C.OPTIM = CfgNode()
+
+# Learning rate ranges from BASE_LR to MIN_LR*BASE_LR according to the LR_POLICY
+_C.OPTIM.BASE_LR = 0.1
+_C.OPTIM.MIN_LR = 0.0
+
+# Learning rate policy select from {'cos', 'exp', 'lin', 'steps'}
+_C.OPTIM.LR_POLICY = "cos"
+
+# Steps for 'steps' policy (in epochs)
+_C.OPTIM.STEPS = []
+
+# Learning rate multiplier for 'steps' policy
+_C.OPTIM.LR_MULT = 0.1
+
+# Maximal number of epochs
+_C.OPTIM.MAX_EPOCH = 200
+
+# Momentum
+_C.OPTIM.MOMENTUM = 0.9
+
+# Momentum dampening
+_C.OPTIM.DAMPENING = 0.0
+
+# Nesterov momentum
+_C.OPTIM.NESTEROV = True
+
+# L2 regularization
+_C.OPTIM.WEIGHT_DECAY = 5e-4
+
+# Start the warm up from OPTIM.BASE_LR * OPTIM.WARMUP_FACTOR
+_C.OPTIM.WARMUP_FACTOR = 0.1
+
+# Gradually warm up the OPTIM.BASE_LR over this number of epochs
+_C.OPTIM.WARMUP_EPOCHS = 0
+
+# Exponential Moving Average (EMA) update value
+_C.OPTIM.EMA_ALPHA = 1e-5
+
+# Iteration frequency with which to update EMA weights
+_C.OPTIM.EMA_UPDATE_PERIOD = 32
+
+# Use swa or not
+_C.OPTIM.USE_SWA = True
+# --------------------------------- Training options --------------------------------- #
+_C.TRAIN = CfgNode()
+
+# Dataset and split
+_C.TRAIN.DATASET = ""
+_C.TRAIN.SPLIT = "train"
+
+# Total mini-batch size
+_C.TRAIN.BATCH_SIZE = 1
+
+# If True train using mixed precision
+_C.TRAIN.MIXED_PRECISION = False
+
+# Accumulated gradients runs K small batches of size N before doing a backwards pass
+_C.TRAIN.ACCUM_GRAD_BATCHES = 1
+
+# Resume training from the latest checkpoint in the output directory
+_C.TRAIN.AUTO_RESUME = True
+
+_C.TRAIN.DROP_PERC = 0.3
+
+# Weights to start training from
+_C.TRAIN.WEIGHTS = ""
+# --------------------------------- Testing options ---------------------------------- #
+_C.TEST = CfgNode()
+
+# Dataset and split
+_C.TEST.DATASET = ""
+_C.TEST.SPLIT = "val"
+
+# Total mini-batch size
+_C.TEST.BATCH_SIZE = 1
+
+# Weights to use for testing
+_C.TEST.WEIGHTS = ""
+
+# ------------------------------- Data loader options -------------------------------- #
+_C.DATA_LOADER = CfgNode()
+
+# Number of data loader workers per process
+_C.DATA_LOADER.NUM_WORKERS = 8
+
+# Load data to pinned host memory
+_C.DATA_LOADER.PIN_MEMORY = False
+
+# ROOT of DATASET
+_C.DATA_LOADER.DATA_DIR = '/mnt/XProject/EvokedExpression/dataset'
+
+_C.DATA_LOADER.EMO_INDEX = -1
+
+
+_C.DATA_NAME = 'eev'
+# ---------------------------------- CUDNN options ----------------------------------- #
+_C.CUDNN = CfgNode()
+
+# Perform benchmarking to select fastest CUDNN algorithms (best for fixed input sizes)
+_C.CUDNN.BENCHMARK = True
+
+# ----------------------------------- Misc options ----------------------------------- #
+# Optional description of a config
+_C.DESC = ""
+
+# If True output additional info to log
+_C.VERBOSE = True
+
+# Number of GPUs to use (applies to both training and testing)
+_C.NUM_GPUS = 1
+
+# Output directory
+_C.OUT_DIR = "./tmp"
+
+# Config destination (in OUT_DIR)
+_C.CFG_DEST = "config.yaml"
+
+# Note that non-determinism is still be present due to non-deterministic GPU ops
+_C.RNG_SEED = 1
+
+# Log destination ('stdout' or 'file')
+_C.LOG_DEST = "stdout"
+
+# Log period in iters
+_C.LOG_PERIOD = 10
+
+# Logger (wandb or TensorBoard)
+_C.LOGGER = "TensorBoard"
+
+# Models weights referred to by URL are downloaded to this local cache
+_C.DOWNLOAD_CACHE = "/tmp/pycls-download-cache"
+
+# Fast dev run, > 0 run fast dev only for check training/validation logic
+_C.FAST_DEV_RUN = 0
+# ---------------------------------- Default config ---------------------------------- #
+_CFG_DEFAULT = _C.clone()
+_CFG_DEFAULT.freeze()
+
+
+def assert_and_infer_cfg(cache_urls=True):
+    """Checks config values invariants."""
+    err_str = "The first lr step must start at 0"
+    assert not _C.OPTIM.STEPS or _C.OPTIM.STEPS[0] == 0, err_str
+    data_splits = ["train", "val", "test"]
+    err_str = "Data split '{}' not supported"
+    assert _C.TRAIN.SPLIT in data_splits, err_str.format(_C.TRAIN.SPLIT)
+    assert _C.TEST.SPLIT in data_splits, err_str.format(_C.TEST.SPLIT)
+    err_str = "Mini-batch size should be a multiple of NUM_GPUS."
+    assert _C.TRAIN.BATCH_SIZE % _C.NUM_GPUS == 0, err_str
+    assert _C.TEST.BATCH_SIZE % _C.NUM_GPUS == 0, err_str
+    err_str = "Log destination '{}' not supported"
+    assert _C.LOG_DEST in ["stdout", "file"], err_str.format(_C.LOG_DEST)
+    if cache_urls:
+        cache_cfg_urls()
+
+
+def cache_cfg_urls():
+    """Download URLs in config, cache them, and rewrite cfg to use cached file."""
+    _C.TRAIN.WEIGHTS = cache_url(_C.TRAIN.WEIGHTS, _C.DOWNLOAD_CACHE)
+    _C.TEST.WEIGHTS = cache_url(_C.TEST.WEIGHTS, _C.DOWNLOAD_CACHE)
+
+
+def dump_cfg():
+    """Dumps the config to the output directory."""
+    cfg_file = os.path.join(_C.OUT_DIR, '{}_{}{}'.format(_C.CFG_DEST[:-5], '-'.join(_C.MODEL.FEATURES), _C.CFG_DEST[-5:]))
+    with pathmgr.open(cfg_file, "w") as f:
+        _C.dump(stream=f)
+    return cfg_file
+
+
+def load_cfg(cfg_file):
+    """Loads config from specified file."""
+    with pathmgr.open(cfg_file, "r") as f:
+        _C.merge_from_other_cfg(_C.load_cfg(f))
+
+
+def reset_cfg():
+    """Reset config to initial state."""
+    _C.merge_from_other_cfg(_CFG_DEFAULT)
+
+
+def load_cfg_fom_args(description="Config file options."):
+    """Load config from command line arguments and set any specified options."""
+    parser = argparse.ArgumentParser(description=description)
+    help_s = "Config file location"
+    parser.add_argument("--cfg", dest="cfg_file", help=help_s, required=True, type=str)
+    help_s = "See pycls/core/config.py for all options"
+    parser.add_argument("opts", help=help_s, default=None, nargs=argparse.REMAINDER)
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    args = parser.parse_args()
+    load_cfg(args.cfg_file)
+    _C.merge_from_list(args.opts)
diff --git a/src/core/eev_data.py b/src/core/eev_data.py
new file mode 100755
index 0000000..64a5f8c
--- /dev/null
+++ b/src/core/eev_data.py
@@ -0,0 +1,171 @@
+"""
+Author: HuynhVanThong
+Department of AI Convergence, Chonnam Natl. Univ.
+"""
+import sys
+from copy import deepcopy
+
+import pytorch_lightning as pl
+from torch.utils import data
+import numpy as np
+import pandas as pd
+import torch
+from torchvision import transforms
+from core.config import cfg
+
+
+class ToTensor(object):
+    """ Convert ndarrays in sample to Tensors"""
+
+    def __call__(self, sample):
+        transforms_sample = {}
+        for sp_key in sample.keys():
+            if sp_key == 'file_id':
+                transforms_sample[sp_key] = sample[sp_key]
+            elif sp_key == 'timestamps':
+                transforms_sample[sp_key] = torch.from_numpy(sample[sp_key]).type(torch.LongTensor),
+            else:
+                transforms_sample[sp_key] = torch.from_numpy(sample[sp_key]).type(torch.FloatTensor)
+
+        return transforms_sample
+
+
+class TimeDropout(object):
+    """ Timestamp dropout """
+    def __init__(self, drop_perc=0.3):
+        self.drop_perc = drop_perc
+
+    def __call__(self, sample):
+        random_drop_perc = np.random.rand()
+        mask = np.ones(sample['scores'].shape[0], dtype=np.bool)
+
+        if random_drop_perc <= self.drop_perc:
+            drop_index = np.random.choice(len(mask), size=int(random_drop_perc * len(mask)), replace=False)
+            mask[drop_index] = False
+
+        for sp_key in sample.keys():
+            if sp_key != 'file_id':
+                sample[sp_key] = sample[sp_key][mask] if sample[sp_key].ndim == 1 else sample[sp_key][mask, :]
+
+        return sample
+
+class EEVDataset(data.Dataset):
+    def __init__(self, root_path='/mnt/Work/Dataset/EEV/', split='train', features=('resnet',), emotion_index=-1,
+                 transforms=None, save_pt=False, use_position=True, dataset='eev', drop_perc=0.3):
+
+        self.dataset_name = dataset
+        self.save_pt = save_pt
+        self.features = features
+        self.emotion_index = emotion_index
+        self.root_path = root_path
+        self.root_path_npy = self.root_path
+        self.split = split
+        self.transforms = transforms
+        self.use_position = use_position
+        self.drop_perc = drop_perc
+        if not self.use_position:
+            print('Do not use position encoding.')
+        if split not in ['train', 'val', 'test']:
+            raise ValueError('Do not support {} split for EEV dataset'.format(split))
+
+        if self.dataset_name == 'eev':
+            data_csv = pd.read_csv('{}/features_v2/{}.csv'.format(self.root_path, self.split))
+
+            id_header = 'Video ID' if split == 'test' else 'YouTube ID'
+
+            excluded_ids = np.loadtxt('excluded_files.txt', dtype=str)
+            self.video_ids = list(set(data_csv[id_header].unique()) - set(excluded_ids))
+        else:
+            video_ids = np.loadtxt('{}features_v2/{}.txt'.format(self.root_path, self.split), dtype=str)
+            self.video_ids = [x.replace('.mp4', '') for x in video_ids]
+            self.video_feats = []
+            for vid_id in self.video_ids:
+                sample = torch.load('{}features_v2/{}/{}.pt'.format(self.root_path_npy, self.split, vid_id))
+                self.video_feats.append(sample)
+
+    def __len__(self):
+        return len(self.video_ids)
+
+    def __getitem__(self, index):
+        current_id = self.video_ids[index]
+        if 'mediaeval' in self.dataset_name:
+            sample = deepcopy(self.video_feats[index])
+        else:
+            sample = torch.load('{}features_v2/{}/{}.pt'.format(self.root_path_npy, self.split, current_id))
+
+        # Check and do drop positions
+        if self.split in ['train', ]:  # 'train', 'val'
+            if self.dataset_name == 'eev':
+                mask = np.sum(sample['scores'], axis=-1) > 1e-6
+            else:
+                mask = np.ones(sample['scores'].shape[0], dtype=np.bool)
+        else:
+            mask = np.ones(sample['scores'].shape[0], dtype=np.bool)
+
+        if self.emotion_index > -1:
+            scores = np.reshape(sample['scores'][mask, self.emotion_index], (-1, 1))
+        else:
+            smooth_scores = np.zeros_like(sample['scores'][mask, :])
+
+            scores = sample['scores'][mask, :] + smooth_scores
+
+        use_sample = {}
+        if self.use_position:
+            norm_eff = 1e6 if self.dataset_name == 'eev' else 1e0
+            position_info = sample['timestamps'][mask].reshape(-1, 1) / norm_eff
+            for feat_indx in self.features:
+                use_sample[feat_indx] = np.hstack([sample[feat_indx][mask, :], position_info])
+        else:
+            for feat_indx in self.features:
+                use_sample[feat_indx] = sample[feat_indx][mask, :]
+
+        use_sample.update({'timestamps': sample['timestamps'][mask], 'scores': scores, 'file_id': sample['file_id']})
+
+        if self.transforms is not None:
+            use_sample = self.transforms(use_sample)
+
+        return use_sample
+
+
+class EEVDataModule(pl.LightningDataModule):
+    def __init__(self, data_dir, features, dataset_name='eev', emotion_index=-1, drop_perc=0.3):
+        super(EEVDataModule, self).__init__()
+        self.data_dir = data_dir
+        self.features = features
+        self.transforms = transforms.Compose([ToTensor()])
+        self.use_position = cfg.MODEL.USE_POSITION
+        self.dataset_name = dataset_name
+        self.emotion_index = emotion_index
+        self.drop_perc = drop_perc
+
+    def setup(self, stage=None):
+        if stage == 'fit' or stage is None:
+            if self.drop_perc > 0.:
+                train_transforms = transforms.Compose([TimeDropout(drop_perc=self.drop_perc), ToTensor()])
+            else:
+                train_transforms = self.transforms
+
+            self.train_set = EEVDataset(self.data_dir, split='train', features=self.features,
+                                        transforms=train_transforms,
+                                        use_position=self.use_position, dataset=self.dataset_name,
+                                        emotion_index=self.emotion_index)
+            self.val_set = EEVDataset(self.data_dir, split='val', features=self.features, transforms=self.transforms,
+                                      use_position=self.use_position, dataset=self.dataset_name,
+                                      emotion_index=self.emotion_index)
+
+        if stage == 'test' or stage is None:
+            self.test_set = EEVDataset(self.data_dir, split='test', features=self.features, transforms=self.transforms,
+                                       use_position=self.use_position, dataset=self.dataset_name,
+                                       emotion_index=self.emotion_index)
+
+    def train_dataloader(self):
+        return data.DataLoader(self.train_set, batch_size=cfg.TRAIN.BATCH_SIZE, num_workers=cfg.DATA_LOADER.NUM_WORKERS,
+                               shuffle=True, prefetch_factor=2)
+
+    def val_dataloader(self):
+        return data.DataLoader(self.val_set, batch_size=cfg.TEST.BATCH_SIZE, num_workers=cfg.DATA_LOADER.NUM_WORKERS,
+                               shuffle=False, prefetch_factor=2)
+
+    def test_dataloader(self):
+        return data.DataLoader(self.test_set, batch_size=cfg.TEST.BATCH_SIZE, num_workers=cfg.DATA_LOADER.NUM_WORKERS,
+                               shuffle=False, prefetch_factor=2)
diff --git a/src/core/io.py b/src/core/io.py
new file mode 100755
index 0000000..efb584e
--- /dev/null
+++ b/src/core/io.py
@@ -0,0 +1,83 @@
+"""
+Original source: https://github.com/facebookresearch/pycls/blob/master/pycls/core/io.py
+Latest commit 4fab913 on Mar 18, 2021
+"""
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""IO utilities (adapted from Detectron)"""
+
+import logging
+import os
+import re
+import sys
+from urllib import request as urlrequest
+
+from iopath.common.file_io import PathManagerFactory
+
+# instantiate global path manager for pycls
+pathmgr = PathManagerFactory.get()
+
+logger = logging.getLogger(__name__)
+
+_PYCLS_BASE_URL = "https://dl.fbaipublicfiles.com/pycls"
+
+
+def cache_url(url_or_file, cache_dir, base_url=_PYCLS_BASE_URL):
+    """Download the file specified by the URL to the cache_dir and return the path to
+    the cached file. If the argument is not a URL, simply return it as is.
+    """
+    is_url = re.match(r"^(?:http)s?://", url_or_file, re.IGNORECASE) is not None
+    if not is_url:
+        return url_or_file
+    url = url_or_file
+    assert url.startswith(base_url), "url must start with: {}".format(base_url)
+    cache_file_path = url.replace(base_url, cache_dir)
+    if pathmgr.exists(cache_file_path):
+        return cache_file_path
+    cache_file_dir = os.path.dirname(cache_file_path)
+    if not pathmgr.exists(cache_file_dir):
+        pathmgr.mkdirs(cache_file_dir)
+    logger.info("Downloading remote file {} to {}".format(url, cache_file_path))
+    download_url(url, cache_file_path)
+    return cache_file_path
+
+
+def _progress_bar(count, total):
+    """Report download progress. Credit:
+    https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console/27871113
+    """
+    bar_len = 60
+    filled_len = int(round(bar_len * count / float(total)))
+    percents = round(100.0 * count / float(total), 1)
+    bar = "=" * filled_len + "-" * (bar_len - filled_len)
+    sys.stdout.write(
+        "  [{}] {}% of {:.1f}MB file  \r".format(bar, percents, total / 1024 / 1024)
+    )
+    sys.stdout.flush()
+    if count >= total:
+        sys.stdout.write("\n")
+
+
+def download_url(url, dst_file_path, chunk_size=8192, progress_hook=_progress_bar):
+    """Download url and write it to dst_file_path. Credit:
+    https://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
+    """
+    req = urlrequest.Request(url)
+    response = urlrequest.urlopen(req)
+    total_size = response.info().get("Content-Length").strip()
+    total_size = int(total_size)
+    bytes_so_far = 0
+    with pathmgr.open(dst_file_path, "wb") as f:
+        while 1:
+            chunk = response.read(chunk_size)
+            bytes_so_far += len(chunk)
+            if not chunk:
+                break
+            if progress_hook:
+                progress_hook(bytes_so_far, total_size)
+            f.write(chunk)
+    return bytes_so_far
diff --git a/src/core/loss.py b/src/core/loss.py
new file mode 100755
index 0000000..508c80f
--- /dev/null
+++ b/src/core/loss.py
@@ -0,0 +1,44 @@
+"""
+Author: HuynhVanThong
+Department of AI Convergence, Chonnam Natl. Univ.
+"""
+import sys
+
+import torch
+
+
+def EEVMSELoss(targets, preds, scale_factor=1.0):
+    mse_exp = torch.squeeze(
+        torch.mean((targets * scale_factor - preds) ** 2, dim=1))
+
+    # if torch.isnan(mse_exp):
+    #     print('Check MSE: ', targets, preds, mse_exp)
+    #     sys.exit(0)
+    return torch.mean(mse_exp)
+
+
+def EEVPearsonLoss(targets, preds, scale_factor=1.0, ):
+    x_mean = torch.mean(targets * scale_factor, dim=1, keepdim=True)
+    xhat_mean = torch.mean(preds, dim=1, keepdim=True)
+
+    numerator = torch.sum(torch.mul(targets * scale_factor - x_mean, preds - xhat_mean), dim=1)
+    denominator = torch.sqrt(
+        torch.sum((targets * scale_factor - x_mean) ** 2, dim=1) * torch.sum((preds - xhat_mean) ** 2, dim=1))
+
+    pearsonr_score = numerator / denominator
+
+    return 1.0 - torch.mean(pearsonr_score)
+
+
+def EEVMSEPCCLoss(targets, preds, scale_factor=1.0, alpha=0.5):
+    pcc_loss = EEVPearsonLoss(targets, preds, scale_factor)
+    mse_loss = EEVMSELoss(targets, preds, scale_factor)
+    # print('PCC loss: ', pcc_loss, torch.isnan(pcc_loss))
+    # if torch.isnan(pcc_loss) or torch.isinf(pcc_loss):
+    #     print('Loss is NAN ', pcc_loss, mse_loss)
+    #     sys.exit(0)
+    #     pcc_loss = 2
+    #     alpha = 0.0
+
+    loss = alpha * pcc_loss + (1 - alpha) * mse_loss
+    return loss
diff --git a/src/core/metrics.py b/src/core/metrics.py
new file mode 100755
index 0000000..326d76a
--- /dev/null
+++ b/src/core/metrics.py
@@ -0,0 +1,42 @@
+"""
+Author: HuynhVanThong
+Department of AI Convergence, Chonnam Natl. Univ.
+"""
+import sys
+
+import torchmetrics
+import torch
+
+
+class EEVPearsonr(torchmetrics.Metric):
+    def __init__(self, dist_sync_on_step=False):
+        super(EEVPearsonr, self).__init__(dist_sync_on_step=dist_sync_on_step)
+        self.add_state("sum", default=torch.tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("total", default=torch.tensor(0), dist_reduce_fx="sum")
+
+    def get_score(self, x, x_hat):
+        # print(x.shape, x_hat.shape)
+        # sys.exit(0)
+        x_mean = torch.mean(x, dim=0, keepdim=True)
+        x_hat_mean = torch.mean(x_hat, dim=0, keepdim=True)
+
+        numerator = torch.sum(torch.mul(x - x_mean, x_hat - x_hat_mean), dim=0)
+        denominator = torch.sqrt(torch.sum((x - x_mean) ** 2, dim=0) * torch.sum((x_hat - x_hat_mean) ** 2, dim=0))
+        pearsonr_score = numerator / denominator
+        pearsonr_score[pearsonr_score != pearsonr_score] = -1
+        return torch.mean(pearsonr_score)
+
+    def update(self, preds, target):
+        # Update metric states
+        update_scores = 0.
+
+        # update_scores = update_scores + self.get_score(target[0, :, :], preds[0, :, :])
+        for idx in range(target.shape[0]):
+            update_scores = update_scores + self.get_score(target[idx, :,], preds[idx, :,])
+
+        self.sum += update_scores
+
+        self.total = self.total + target.shape[0]
+
+    def compute(self):
+        return self.sum / self.total
diff --git a/src/core/models.py b/src/core/models.py
new file mode 100755
index 0000000..636e287
--- /dev/null
+++ b/src/core/models.py
@@ -0,0 +1,373 @@
+"""
+Author: HuynhVanThong
+Department of AI Convergence, Chonnam Natl. Univ.
+"""
+import itertools
+import math
+import sys
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+import pytorch_lightning as pl
+
+from .tcn import TemporalConvNet
+from .loss import EEVMSELoss, EEVPearsonLoss, EEVMSEPCCLoss
+from .metrics import EEVPearsonr
+import os
+from collections import ChainMap
+import pandas as pd
+import numpy as np
+from .config import cfg
+from functools import partial
+
+
+class EEVModel(pl.LightningModule):
+    @staticmethod
+    def get_params():
+        if cfg.MODEL.TEMPORAL_TYPE == 'tcn':
+            # get tcn params
+            return {
+                "num_channels": cfg.TCN.NUM_CHANNELS,
+                "num_stack": cfg.TCN.NUM_STACK,
+                "dilation": cfg.TCN.DILATIONS,
+                "kernel_size": cfg.TCN.K_SIZE,
+                "dropout": cfg.TCN.DROPOUT,
+                "use_norm": cfg.TCN.NORM,
+                "fc_head": cfg.MODEL.FC_HIDDEN,
+                "learning_rate": cfg.OPTIM.BASE_LR
+            }
+        elif cfg.MODEL.TEMPORAL_TYPE == 'lstm':
+            # get lstm params
+            return {
+                "num_hidden": cfg.LSTM.HIDDEN_SIZE,
+                "num_layers": cfg.LSTM.NUM_LAYERS,
+                "bidirec": cfg.LSTM.BIDIREC,
+                "dropout": cfg.LSTM.DROPOUT,
+                "fc_head": cfg.MODEL.FC_HIDDEN,
+                "learning_rate": cfg.OPTIM.BASE_LR
+            }
+        else:
+            raise ValueError("Do not support temporal type of {}".format(cfg.MODEL.TEMPORAL_TYPE))
+
+    num_features = {'resnet': 2048, 'audio': 2048, 'effb0': 1280}
+
+    def __init__(self, params, num_outputs=15, features=('resnet50',), result_dir='', dataset_name='eev',
+                 emotion_index=-1):
+        super(EEVModel, self).__init__()
+        self.emotion_index = emotion_index
+        self.dataset_name = dataset_name
+        self.result_dir = result_dir
+        self.use_position = cfg.MODEL.USE_POSITION
+        self.num_outputs = num_outputs
+
+        self.features = features
+
+        for feat_idx in self.features:
+            cur_num_features = self.num_features[feat_idx] + cfg.MODEL.USE_POSITION
+            if cfg.MODEL.TEMPORAL_TYPE == 'tcn':
+                cur_temporal, num_temporal_out, fc_head = self.get_tcn_layers(params, cur_num_features)
+            else:
+                cur_temporal, num_temporal_out, fc_head = self.get_lstm_layers(params, cur_num_features)
+
+            self.add_module('temporal_{}'.format(feat_idx), cur_temporal)
+
+            cur_regression = nn.Sequential(nn.Linear(num_temporal_out, fc_head, bias=False), nn.ReLU(),
+                                           nn.Linear(fc_head, num_outputs, bias=False))
+            self.add_module('regression_{}'.format(feat_idx), cur_regression)
+
+            if len(self.features) > 1:
+                # Add some layer for fusion module at uni-modal level
+                pass
+
+        if len(self.features) > 1:
+            # Add some layer for fusion module at uni-modal level
+            self.fusion_layer = nn.Sequential(nn.Linear(len(self.features), 1, bias=False),
+                                              )
+
+        # weight initialization
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out')
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.ones_(m.weight)
+                nn.init.zeros_(m.bias)
+
+        self.pearsonr = EEVPearsonr()
+
+        self.scale_factor = 10.0 if self.dataset_name == 'eev' else 10.0
+
+        if self.dataset_name == 'eev':
+            self.loss_func = EEVMSELoss
+        else:
+            self.loss_func = partial(EEVMSEPCCLoss, alpha=0.)
+
+    def get_lstm_layers(self, params, embed_dim):
+        if params is None:
+            params = self.get_params()
+
+        vs = ["num_hidden", "num_layers", "bidirec", 'dropout', "fc_head", "learning_rate"]
+        num_hidden, num_layers, bidirec, dropout, fc_head, learning_rate = [params[v] for v in vs]
+        temporal_layers = nn.LSTM(input_size=embed_dim, num_layers=num_layers, hidden_size=num_hidden, dropout=dropout,
+                                  bidirectional=bidirec, batch_first=True)
+        self.learning_rate = learning_rate
+        return temporal_layers, num_hidden * (1 + bidirec), fc_head
+
+    def get_tcn_layers(self, params, tcn_in):
+        if params is None:
+            params = self.get_params()
+
+        vs = ["num_channels", "num_stack", "dilation", "kernel_size", "dropout", "use_norm", "fc_head", "learning_rate"]
+        num_channels, num_stack, dilation, kernel_size, dropout, use_norm, fc_head, learning_rate = [params[v] for v in
+                                                                                                     vs]
+
+        # input of TCN should have dimension (N, C, L)
+        if num_stack == 1:
+            temporal_layers = TemporalConvNet(tcn_in, (num_channels,) * dilation, kernel_size, dropout,
+                                              use_norm=use_norm)
+        else:
+            list_layers = []
+            for idx in range(num_stack):
+                tcn_in_index = tcn_in if idx == 0 else num_channels
+                list_layers.append(
+                    TemporalConvNet(tcn_in_index, (num_channels,) * dilation, kernel_size, dropout, use_norm=use_norm))
+            temporal_layers = nn.Sequential(*list_layers)
+
+        self.learning_rate = learning_rate
+        return temporal_layers, num_channels, fc_head
+
+    def forwardx(self, x, feat_idx):
+        # Input has size batch_size x sequence_length x num_channels (N x L x C)
+
+        if cfg.MODEL.TEMPORAL_TYPE == 'tcn':
+            # Transform to (N, C, L) first
+            x = x.permute(0, 2, 1)
+            x = self._modules['temporal_{}'.format(feat_idx)](x)
+            # Transform back to (N, L, C)
+            x = x.permute(0, 2, 1)
+        else:
+            x, _ = self._modules['temporal_{}'.format(feat_idx)](x)
+        x = self._modules['regression_{}'.format(feat_idx)](x)
+
+        if len(self.features) > 1:
+            # return something for fusion
+            return x
+            pass
+        else:
+            return x
+
+    def forward(self, batch):
+        pred_scores = []
+        for feat_idx in self.features:
+            if len(self.features) == 1:
+                # if len(batch[feat_idx].shape) > 3:
+                #     print(batch[feat_idx].shape, batch['file_id'])
+                #     pass
+                pred_scores = self.forwardx(batch[feat_idx], feat_idx)  # 1 x k x 15
+                # if self.use_position and self.dataset_name != 'eev':
+                #     pred_scores = pred_scores / 1e0
+                # TODO: Moving average smoothing
+                # pred_scores = pred_scores.permute(0, 2, 1)
+                # w_size = pred_scores.shape[2] // 4
+                # if w_size % 2 == 0:
+                #    w_size -= 1
+                # pad1d = (w_size-1) // 2
+                # w_f = torch.ones((pred_scores.shape[1], pred_scores.shape[1], w_size), device=pred_scores.device)
+                # pred_scores = F.conv1d(pred_scores, w_f, padding=pad1d) / w_size
+                # pred_scores = pred_scores.permute(0, 2, 1)
+
+            else:
+
+                feat_scores = self.forwardx(batch[feat_idx], feat_idx)
+
+                # TODO: Moving average smoothing
+                # Do something for fusion
+                pred_scores.append(feat_scores)
+
+        if len(self.features) > 1:
+            # Do something for fusion and return final score on pred_scores
+            pred_scores = torch.stack(pred_scores, dim=-1)
+            # print(pred_scores.shape)
+            pred_scores = self.fusion_layer(pred_scores)
+            pred_scores = torch.squeeze(pred_scores, dim=-1)
+            # print(pred_scores.shape)
+            # sys.exit(0)
+            pass
+
+        return pred_scores
+
+    def training_step(self, batch, batch_idx):
+
+        out = self._shared_eval(batch, batch_idx)
+
+        scores = batch['scores']
+
+        loss = self.loss_func(scores, out, scale_factor=self.scale_factor)
+        self.pearsonr.update(preds=out / self.scale_factor, target=scores)
+
+        return {'loss': loss}
+
+    def validation_step(self, batch, batch_idx):
+        out = self._shared_eval(batch, batch_idx)
+        scores = batch['scores']
+
+        loss = self.loss_func(scores, out, scale_factor=self.scale_factor)
+        self.pearsonr.update(preds=out / self.scale_factor, target=scores)
+
+        return {'val_loss': loss,
+                'file_id': (out.data.cpu().numpy()[0, :, :] / self.scale_factor, batch['timestamps'], batch['scores'])}
+
+    def predict(self, batch, batch_idx, dataloader_idx=None):
+        out = self._shared_eval(batch, batch_idx)
+
+        # return {'file_id': (out.data.cpu().numpy()[0, :, :] / self.scale_factor, batch['timestamps'], batch['scores'])}
+        return {batch['file_id'][0]: out.data.cpu().numpy()[0, :, :] / self.scale_factor}
+
+    def _shared_eval(self, batch, batch_idx):
+        out = self(batch)
+        return out
+
+    def test_step(self, batch, batch_idx):
+        out = self._shared_eval(batch, batch_idx)
+
+        if torch.sum(torch.abs(batch['scores'])) > 0:
+            self.pearsonr.update(preds=out / self.scale_factor, target=batch['scores'])
+
+        return {batch['file_id'][0]: out.data.cpu().numpy()[0, :, :] / self.scale_factor}
+
+    def configure_optimizers(self):
+        optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate)
+
+        if cfg.OPTIM.LR_POLICY == 'none':
+            return optimizer
+        else:
+            # Return lr scheduler policy, not implemented yet
+            raise ValueError('Return lr scheduler policy, not implemented yet')
+
+    def training_epoch_end(self, training_step_outputs):
+        train_pearsonr = self.pearsonr.compute()
+        loss_mean = torch.tensor([x['loss'] for x in training_step_outputs]).mean() / (self.scale_factor**2) # .data.cpu().numpy()
+        # self.log('loss', loss_mean.item(), prog_bar=True, logger=True, on_epoch=True, on_step=False)
+        # self.log('pearsonr', train_pearsonr.detach().cpu().item(), prog_bar=True, logger=True, on_epoch=True,
+        #          on_step=False)
+        self.log('loss', loss_mean, prog_bar=True, logger=True, on_epoch=True, on_step=False)
+        self.log('pearsonr', train_pearsonr, prog_bar=True, logger=True, on_epoch=True,
+                 on_step=False)
+        # print('Step {}. Learning rate: {}'.format(self.trainer.global_step, self.current_lr))
+
+        self.pearsonr.reset()
+
+    # def training_epoch_end(self):
+    #     self.pearsonr.reset()
+
+    def validation_epoch_end(self, validation_step_outputs):
+        val_pearsonr = self.pearsonr.compute()
+        loss_mean = torch.tensor([x['val_loss'] for x in validation_step_outputs]).mean() / (self.scale_factor**2)  # .data.cpu().numpy()
+        # self.log('val_pearsonr', val_pearsonr.detach().cpu().item(), prog_bar=True, logger=True, on_epoch=True)
+        # self.log('val_loss', loss_mean.item() / self.scale_factor , prog_bar=True, logger=True, on_epoch=True, on_step=False)
+
+        self.log('val_pearsonr', val_pearsonr, prog_bar=True, logger=True, on_epoch=True)
+        self.log('val_loss', loss_mean, prog_bar=True, logger=True, on_epoch=True,
+                 on_step=False)
+
+        print_str = 'Epoch: {:5d}  |   Val-PCC {:10.5f}  |   Loss{:10.5f}'.format(self.current_epoch,
+                                                                                  val_pearsonr.detach().cpu().item(),
+                                                                                  loss_mean.item())
+        print(print_str)
+
+        with open(os.path.join(self.logger.log_dir, 'run_logs.txt'), 'a') as flog:
+            flog.write(print_str)
+            flog.write('\n')
+
+        self.pearsonr.reset()
+
+        # self.loss_func = partial(EEVMSEPCCLoss, alpha=0.5 - 0.5*(self.current_epoch / self.trainer.max_epochs))
+
+    def test_epoch_end(self, test_step_outputs):
+        if self.pearsonr.total > 0:
+            print('Test PCC scores: ', self.pearsonr.compute())
+            self.pearsonr.reset()
+
+        if isinstance(test_step_outputs[0], list):
+            test_results = list(itertools.chain.from_iterable(test_step_outputs))
+        else:
+            test_results = test_step_outputs
+        test_write = dict(ChainMap(*test_results))
+        if self.result_dir == '':
+            self.result_dir = self.logger.log_dir
+
+        print('Test end, saving to {}'.format(os.path.join(self.result_dir, 'test_results.pt')))
+        torch.save(test_write, os.path.join(self.result_dir, 'test_results.pt'))
+
+        if self.dataset_name == 'eev':
+            self.test2csv(test_write)
+        elif self.dataset_name == 'mediaeval18':
+            write_path = os.path.join(self.result_dir, 'test_results')
+            os.makedirs(write_path, exist_ok=True)
+            for vid in test_write.keys():
+                write_data = np.hstack([np.arange(test_write[vid].shape[0]).reshape(-1, 1), test_write[vid]])
+                if self.emotion_index == -1:
+                    pd.DataFrame(write_data, columns=['Time', 'Valence', 'Arousal']).to_csv(
+                        os.path.join(write_path, vid + '.txt'), sep='\t', index=False)
+                else:
+                    emotion_name = 'Valence' if self.emotion_index == 0 else 'Arousal'
+                    pd.DataFrame(write_data, columns=['Time', emotion_name]).to_csv(
+                        os.path.join(write_path, vid + '.txt'), sep='\t', index=False)
+        else:
+            raise ValueError('Do not support {} dataset'.format(self.dataset_name))
+
+        self.pearsonr.reset()
+
+    def test2csv(self, test_prediction):
+        dataset_root_path = '/mnt/sXProject/EvokedExpression/'
+        emotions = ['amusement', 'anger', 'awe', 'concentration',
+                    'confusion', 'contempt', 'contentment', 'disappointment', 'doubt', 'elation', 'interest',
+                    'pain', 'sadness', 'surprise', 'triumph']
+
+        test_csv = pd.read_csv('{}/eev/{}.csv'.format(dataset_root_path, 'test'))
+        val_csv = pd.read_csv('{}/eev/{}.csv'.format(dataset_root_path, 'val'))
+
+        list_ids = test_csv['Video ID'].unique()
+        if list_ids[0] not in test_prediction:
+            print('Use val set')
+            use_set_csv = val_csv
+            use_key = 'YouTube ID'
+            list_ids = val_csv['YouTube ID'].unique()
+        else:
+            use_set_csv = test_csv
+            use_key = 'Video ID'
+
+        list_scores = []
+        # cnt = 0
+        for id in list_ids:
+            current_id_times = use_set_csv.loc[use_set_csv[use_key] == id].values[:, :2]  # k x 2
+            if id not in test_prediction:
+                current_scores = np.zeros((current_id_times.shape[0], self.num_outputs))
+            else:
+                current_scores = test_prediction[id]  # k x 15
+
+            current_data = np.hstack([current_id_times, current_scores])
+            list_scores.append(current_data)
+            # cnt += 1
+            # if cnt > 4:
+            #     break
+        if self.num_outputs == 1:
+            columns_name = ['Video ID', 'Timestamp (milliseconds)', emotions[self.emotion_index]]
+        else:
+            columns_name = ['Video ID', 'Timestamp (milliseconds)', ] + emotions
+
+        if isinstance(self.features, (list, tuple)):
+            postfix = '_'.join(self.features)
+        else:
+            postfix = self.features
+
+        idx = 0
+        while os.path.isfile('{}/{}.csv'.format(self.result_dir, 'test_results_{}_{}'.format(postfix, idx))):
+            idx += 1
+
+        list_scores = np.vstack(list_scores)
+        pd.DataFrame(data=list_scores,
+                     columns=columns_name, ).to_csv(
+            '{}/{}.csv'.format(self.result_dir, 'test_results_{}_{}'.format(postfix, idx)), index=False)
diff --git a/src/tcn.py b/src/core/tcn.py
old mode 100644
new mode 100755
similarity index 95%
rename from src/tcn.py
rename to src/core/tcn.py
index ca9612a..93c9aeb
--- a/src/tcn.py
+++ b/src/core/tcn.py
@@ -1,3 +1,6 @@
+"""
+Original source https://github.com/locuslab/TCN/blob/master/TCN/tcn.py
+"""
 import torch
 import torch.nn as nn
 from torch.nn.utils import weight_norm
@@ -82,9 +85,9 @@ def __init__(self, num_inputs, num_channels, kernel_size=3, dropout=0.2, use_nor
             in_channels = num_inputs if i == 0 else num_channels[i - 1]
             out_channels = num_channels[i]
             layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
-                                     padding=(kernel_size - 1) * dilation_size, dropout=dropout, use_norm=False)]
+                                     padding=(kernel_size - 1) * dilation_size, dropout=dropout, use_norm=use_norm)]
 
         self.network = nn.Sequential(*layers)
 
     def forward(self, x):
-        return self.network(x)
+        return self.network(x)
\ No newline at end of file
diff --git a/src/feats/Dockerfile b/src/feats/Dockerfile
new file mode 100755
index 0000000..8f688e7
--- /dev/null
+++ b/src/feats/Dockerfile
@@ -0,0 +1,9 @@
+FROM nvcr.io/nvidia/tensorflow:21.03-tf2-py3
+RUN apt-get update && apt-get -y install apt-utils gcc libpq-dev libsndfile-dev libgl1-mesa-glx ffmpeg graphviz \
+       && rm -rf /var/lib/apt/lists/*
+RUN pip install --no-cache-dir librosa pandas opencv-python tabulate moviepy pydot \
+                     --index-url=http://ftp.daumkakao.com/pypi/simple \
+                      --trusted-host=ftp.daumkakao.com
+RUN pip install --no-cache-dir tensorflow-hub tensorflow-io==0.17.0 tensorflow-addons --no-deps \
+                    --index-url=http://ftp.daumkakao.com/pypi/simple \
+                      --trusted-host=ftp.daumkakao.com
diff --git a/src/feats/feature_extractor.py b/src/feats/feature_extractor.py
new file mode 100755
index 0000000..30d8196
--- /dev/null
+++ b/src/feats/feature_extractor.py
@@ -0,0 +1,286 @@
+"""
+Author: Huynh Van Thong
+Department of AI Convergence, Chonnam Natl. Univ.
+"""
+import gc
+import os
+import sys
+
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+import tensorflow as tf
+from tensorflow.keras import layers
+import tensorflow_io as tfio
+import tensorflow_hub as hub
+import librosa
+import pandas as pd
+import cv2
+import numpy as np
+import pathlib
+import argparse
+
+DEFAULT_SR = 16000
+
+model_link_dict = {'efficientnet-b0': ("https://tfhub.dev/tensorflow/efficientnet/b0/feature-vector/1", 224),
+                   'resnetv2-50': ("https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4", 224)}
+
+
+def create_feature_extractor_model(modelname='efficientnet-b0'):
+    target_size = model_link_dict[modelname][1]
+    target_link = model_link_dict[modelname][0]
+
+    model = tf.keras.Sequential([  # layers.experimental.preprocessing.Resizing(target_size, target_size),
+        layers.experimental.preprocessing.Rescaling(1. / 255),
+        hub.KerasLayer(target_link, trainable=False)])
+    model.build([None, 224, 224, 3])
+    return model
+
+
+def resize_pad(img, target=224):
+    h, w = img.shape[:2]
+    if h < w:
+        pad_h = w - h
+        pad_w = 0
+    else:
+        pad_w = h - w
+        pad_h = 0
+
+    img = np.pad(img, ((pad_h // 2, pad_h - pad_h // 2), (pad_w // 2, pad_w - pad_w // 2), (0, 0)),
+                 mode='constant')
+    if img.shape[0] != img.shape[1] or img.shape[2] != 3:
+        print('Error in here, please stop ', img.shape)
+        sys.exit(0)
+    img = cv2.resize(img, (target, target))
+    return img
+
+
+def read_video(path, num_segments=-1, unlabelled_rows=None, get_audio=False):
+    print(path)
+    if not get_audio:
+        cap = cv2.VideoCapture(path)
+        vd_frames = []
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+
+            frame = cv2.cvtColor(resize_pad(frame, 224), cv2.COLOR_BGR2RGB)
+            vd_frames.append(frame)
+        cap.release()
+
+        vd_frames = np.array(vd_frames)
+        if num_segments > 0:
+            segment_len = int(vd_frames.shape[0] / num_segments)
+            use_indexes = np.linspace(segment_len // 2, vd_frames.shape[0], num=num_segments, dtype=int, endpoint=False)
+            vd_frames = vd_frames[use_indexes, :, :, :]
+        # if unlabelled_rows is not None:
+        #     vd_frames = vd_frames[np.logical_not(unlabelled_rows), :, :, :]
+
+        # h, w = vd_frames.shape[1: 3]
+        #
+        #
+        # vd_frames = np.pad(vd_frames, ((0, 0), (pad_h // 2, pad_h - pad_h // 2), (pad_w // 2, pad_w - pad_w // 2), (0, 0)),
+        #                    mode='constant')
+        return vd_frames
+
+    else:
+        audio, sr = librosa.load(path)
+        if sr != DEFAULT_SR:
+            audio = librosa.resample(audio, sr, DEFAULT_SR)
+        if num_segments > 0 and audio.shape[0] % num_segments > 0:
+            num_pad = num_segments - (audio.shape[0] % num_segments)
+            audio = np.pad(audio, ((0, num_pad)), mode='constant')
+
+        audio = audio.reshape(num_segments, -1)
+        # if unlabelled_rows is not None:
+        #     audio = audio[np.logical_not(unlabelled_rows), :]
+
+        return audio
+
+
+def mediaeval_feature_extractor(split, visual=True):
+    with open(os.path.join(dataset_root_path, '{}.txt'.format(split)), 'r') as fd:
+        list_files = fd.readlines()
+
+    list_files = [x.replace('\n', '') for x in list_files]
+    model_created = {}
+    prev_shape = {}
+
+    if visual:
+        use_model = ['resnetv2-50', 'efficientnet-b0']  # list(model_link_dict.keys())  #
+        for model_id in use_model:
+            model_created[model_id] = create_feature_extractor_model(model_id)
+            prev_shape[model_id] = None
+    else:
+        module = tf.keras.Sequential([hub.KerasLayer('https://tfhub.dev/google/nonsemantic-speech-benchmark/trill-distilled/3',
+                                                 arguments={'sample_rate': tf.constant(DEFAULT_SR, tf.int32)},
+                                                 trainable=False, output_key='embedding',
+                                                 output_shape=[None, 2048])])
+
+    folder_write = pathlib.Path('{}/dataset/features_v2/{}'.format(dataset_root_path, split))
+    folder_write.mkdir(parents=True, exist_ok=True)
+
+    count = 0
+    for vid in list_files:
+        count += 1
+        if count % 10 == 0 :
+            print(count, '**', len(list_files))
+
+        video_path = os.path.join(dataset_root_path, 'raw/{}/{}'.format(split, vid))
+        video_anno_path = video_path.replace('.mp4', '_Valence-Arousal.txt')
+        video_anno = pd.read_csv(video_anno_path, sep='\t').values
+
+        feature_dict = {}
+
+        if visual:
+            vd_frames = read_video(video_path, num_segments=video_anno.shape[0])
+
+            feature_dict.update({
+                        'file_id': vid,
+                        'timestamps': video_anno[:, 0].astype(np.int64),
+                        'scores': video_anno[:, 1:]})
+
+            for model_id in use_model:
+                feature_dict['feature'] = model_created[model_id].predict(vd_frames, batch_size=128)
+                np.save('{}/{}_{}.npy'.format(folder_write.__str__(), vid, model_id), feature_dict)
+                _ = feature_dict.pop('feature')
+
+            del vd_frames
+        else:
+            audio = read_video(video_path, num_segments=video_anno.shape[0], get_audio=True)
+            # Audio embedding extraction
+            # audio_emb = module(samples=audio, sample_rate=DEFAULT_SR)['embedding']
+            audio_emb = module.predict(audio, batch_size=2048)
+
+            # `emb` is a [batch_size, time, feature_dim] Tensor. In EvokedExpression, time=1
+            # audio_emb.shape.assert_is_compatible_with([None, 512])
+            audio_emb = np.squeeze(audio_emb)
+
+            feature_dict['feature'] = audio_emb
+
+            np.save('{}/{}_{}.npy'.format(folder_write.__str__(), vid, 'audio'), feature_dict)
+
+            del audio_emb
+            del feature_dict
+            del audio
+
+    print('Number of files: ', len(list_files))
+    tf.keras.backend.clear_session()
+    pass
+
+def eev_feature_extractor(split, visual=True):
+    data_csv = pd.read_csv('{}/eev/{}.csv'.format(dataset_root_path, split))
+    id_header = 'Video ID' if split == 'test' else 'YouTube ID'
+    video_ids = data_csv[id_header].unique()
+
+    if not visual:
+        module = tf.keras.Sequential([hub.KerasLayer('https://tfhub.dev/google/nonsemantic-speech-benchmark/trill-distilled/3',
+                                                    arguments={'sample_rate': tf.constant(DEFAULT_SR, tf.int32)},
+                                                    trainable=False, output_key='embedding',
+                                                    output_shape=[None, 2048])])
+    else:
+        model_created = {}
+        prev_shape = {}
+        use_model = ['resnetv2-50', 'efficientnet-b0']  # list(model_link_dict.keys())  #
+        for model_id in use_model:
+            model_created[model_id] = create_feature_extractor_model(model_id)
+            prev_shape[model_id] = None
+
+    count = 0
+    num_vids = len(video_ids)
+    excluded_ids = np.loadtxt('excluded_files.txt', dtype=str)
+    is_continue=True
+    for vid in video_ids:
+        gc.collect()
+        if count % 10 == 0:
+            print(count, "/", num_vids)
+        count += 1
+
+        if vid in excluded_ids:
+            continue
+        folder_write = pathlib.Path('{}/dataset/features_v2/{}'.format(dataset_root_path, split))
+        folder_write.mkdir(parents=True, exist_ok=True)
+        if os.path.isfile('{}/{}_{}.npy'.format(folder_write.__str__(), vid, 'audio')):
+            tmp = np.load('{}/{}_{}.npy'.format(folder_write.__str__(), vid, 'audio'), allow_pickle=True)
+            if tmp.item()['feature'].shape[-1] != 2048:
+                print(tmp.item()['feature'].shape)
+            else:
+                continue
+
+        feature_dict = {}
+        current_frames = data_csv.loc[data_csv[id_header] == vid]
+        if split in ['train', 'val']:
+            scores = current_frames.values[:, 2:].astype(np.float32)
+            unlabelled_rows = np.sum(scores, axis=-1) <= 1e-6
+        else:
+            scores = -1 * np.ones((current_frames.shape[0], 15)).astype(np.float32)
+            unlabelled_rows = np.zeros(current_frames.shape[0], dtype=np.bool)
+
+        unlabelled_rows = np.zeros(current_frames.shape[0], dtype=np.bool)
+        if visual:
+            try:
+                vd_frames = read_video("{}/dataset/{}/{}.mp4".format(dataset_root_path, split, vid),
+                                    num_segments=current_frames.shape[0], unlabelled_rows=unlabelled_rows)
+                feature_dict.update({
+                    'file_id': current_frames.values[:, 0][np.logical_not(unlabelled_rows)].astype(np.str),
+                    'timestamps': current_frames.values[:, 1][np.logical_not(unlabelled_rows)].astype(np.int64),
+                    'scores': scores[np.logical_not(unlabelled_rows), :]})
+
+                for model_id in use_model:
+                    feature_dict['feature'] = model_created[model_id].predict(vd_frames, batch_size=128)
+                    np.save('{}/{}_{}.npy'.format(folder_write.__str__(), vid, model_id), feature_dict)
+                    _ = feature_dict.pop('feature')
+            except:
+                with open('excluded_files_v3.txt', 'a') as fd:
+                    fd.write('{} {}\n'.format(split, vid))
+                break
+                continue
+
+            del vd_frames
+
+        else:
+            audio = read_video("{}/dataset/{}/{}.mp4".format(dataset_root_path, split, vid),
+                            num_segments=current_frames.shape[0], unlabelled_rows=unlabelled_rows, get_audio=True)
+            # Audio embedding extraction
+            # audio_emb = module(samples=audio, sample_rate=DEFAULT_SR)['embedding']
+            audio_emb = module.predict(audio, batch_size=2048)
+
+            # `emb` is a [batch_size, time, feature_dim] Tensor. In EvokedExpression, time=1
+            # audio_emb.shape.assert_is_compatible_with([None, 512])
+            audio_emb = np.squeeze(audio_emb)
+
+            feature_dict['feature'] = audio_emb
+
+            np.save('{}/{}_{}.npy'.format(folder_write.__str__(), vid, 'audio'), feature_dict)
+
+            del audio_emb
+            del feature_dict
+            del audio
+        scores = None
+        unlabelled_rows = None
+
+    tf.keras.backend.clear_session()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Video feature extractor')
+    parser.add_argument('--dataset', type=str, default='eev', help='Dataset eev or mediaeval18 (default: eev)')
+    parser.add_argument('--dataset_root', type=str, default='/mnt/Work/Dataset/EEV/', help='Dataset root path (default: /mnt/Work/Dataset/EEV/)')
+    parser.add_argument('--visual', action='store_true', help='Extract visual or audio (default: audio)')
+
+    args = parser.parse_args()
+
+    print(args.dataset, args.dataset_root, args.visual)
+
+    dataset_root_path = args.dataset_root
+
+    physical_devices = tf.config.list_physical_devices('GPU')
+    if len(physical_devices) > 0:
+        tf.config.experimental.set_memory_growth(physical_devices[0], True)
+
+    for split in ['train', 'val', 'test']:
+        if args.dataset == 'eev':
+            eev_feature_extractor(split, args.visual)
+        elif args.dataset == 'mediaeval18':
+            mediaeval_feature_extractor(split, args.visual)
+        else:
+            raise ValueError('Do not support {} dataset'.format(args.dataset))
diff --git a/src/feature_extractor.py b/src/feature_extractor.py
deleted file mode 100644
index eb2a48a..0000000
--- a/src/feature_extractor.py
+++ /dev/null
@@ -1,210 +0,0 @@
-"""
-Author: Huynh Van Thong
-Department of AI Convergence, Chonnam Natl. Univ.
-"""
-import gc
-import os
-import sys
-
-os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
-import tensorflow as tf
-from tensorflow.keras import layers
-import tensorflow_io as tfio
-import tensorflow_hub as hub
-import librosa
-import pandas as pd
-import cv2
-import numpy as np
-import pathlib
-
-dataset_root_path = '/mnt/Work/Dataset/EEV/'
-DEFAULT_SR = 16000
-
-model_link_dict = {'efficientnet-b0': ("https://tfhub.dev/tensorflow/efficientnet/b0/feature-vector/1", 224),
-                   'efficientnet-b1': ("https://tfhub.dev/tensorflow/efficientnet/b1/feature-vector/1", 240),
-                   'efficientnet-b2': ("https://tfhub.dev/tensorflow/efficientnet/b2/feature-vector/1", 260),
-                   'efficientnet-b3': ("https://tfhub.dev/tensorflow/efficientnet/b3/feature-vector/1", 300),
-                   'resnetv2-50': ("https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4", 224)}
-
-
-def create_feature_extractor_model(modelname='efficientnet-b0'):
-    target_size = model_link_dict[modelname][1]
-    target_link = model_link_dict[modelname][0]
-
-    model = tf.keras.Sequential([  # layers.experimental.preprocessing.Resizing(target_size, target_size),
-        layers.experimental.preprocessing.Rescaling(1. / 255),
-        hub.KerasLayer(target_link, trainable=False)])
-    model.build([None, 224, 224, 3])
-    return model
-
-
-def resize_pad(img, target=224):
-    h, w = img.shape[:2]
-    if h < w:
-        pad_h = w - h
-        pad_w = 0
-    else:
-        pad_w = h - w
-        pad_h = 0
-
-    img = np.pad(img, ((pad_h // 2, pad_h - pad_h // 2), (pad_w // 2, pad_w - pad_w // 2), (0, 0)),
-                 mode='constant')
-    if img.shape[0] != img.shape[1] or img.shape[2] != 3:
-        print('Error in here, please stop ', img.shape)
-        sys.exit(0)
-    img = cv2.resize(img, (target, target))
-    return img
-
-
-def read_video(path, num_segments=-1, unlabelled_rows=None, get_audio=False):
-    print(path)
-    if not get_audio:
-        cap = cv2.VideoCapture(path)
-        vd_frames = []
-        while True:
-            ret, frame = cap.read()
-            if not ret:
-                break
-
-            frame = cv2.cvtColor(resize_pad(frame, 224), cv2.COLOR_BGR2RGB)
-            vd_frames.append(frame)
-        cap.release()
-
-        vd_frames = np.array(vd_frames)
-        if num_segments > 0:
-            segment_len = int(vd_frames.shape[0] / num_segments)
-            use_indexes = np.linspace(segment_len // 2, vd_frames.shape[0], num=num_segments, dtype=int, endpoint=False)
-            vd_frames = vd_frames[use_indexes, :, :, :]
-        # if unlabelled_rows is not None:
-        #     vd_frames = vd_frames[np.logical_not(unlabelled_rows), :, :, :]
-
-        # h, w = vd_frames.shape[1: 3]
-        #
-        #
-        # vd_frames = np.pad(vd_frames, ((0, 0), (pad_h // 2, pad_h - pad_h // 2), (pad_w // 2, pad_w - pad_w // 2), (0, 0)),
-        #                    mode='constant')
-        return vd_frames
-
-    else:
-        audio, sr = librosa.load(path)
-        if sr != DEFAULT_SR:
-            audio = librosa.resample(audio, sr, DEFAULT_SR)
-        if num_segments > 0 and audio.shape[0] % num_segments > 0:
-            num_pad = num_segments - (audio.shape[0] % num_segments)
-            audio = np.pad(audio, ((0, num_pad)), mode='constant')
-
-        audio = audio.reshape(num_segments, -1)
-        # if unlabelled_rows is not None:
-        #     audio = audio[np.logical_not(unlabelled_rows), :]
-
-        return audio
-
-
-def feature_extractor(split):
-    data_csv = pd.read_csv('{}/eev/{}.csv'.format(dataset_root_path, split))
-    id_header = 'Video ID' if split == 'test' else 'YouTube ID'
-    video_ids = data_csv[id_header].unique()
-
-    module = tf.keras.Sequential([hub.KerasLayer('https://tfhub.dev/google/nonsemantic-speech-benchmark/trill-distilled/3',
-                                                 arguments={'sample_rate': tf.constant(DEFAULT_SR, tf.int32)},
-                                                 trainable=False, output_key='embedding',
-                                                 output_shape=[None, 2048])])
-
-    model_created = {}
-    prev_shape = {}
-    use_model = ['resnetv2-50', 'efficientnet-b0']  # list(model_link_dict.keys())  #
-    for model_id in use_model:
-        model_created[model_id] = create_feature_extractor_model(model_id)
-        prev_shape[model_id] = None
-
-    count = 0
-    num_vids = len(video_ids)
-    excluded_ids = np.loadtxt('excluded_files.txt', dtype=str)
-    is_continue=True
-    for vid in video_ids:
-        gc.collect()
-        if count % 10 == 0:
-            print(count, "/", num_vids)
-        count += 1
-        # if vid not in  ['zy6jKmYv0LM', 'zwmJl7OXvg0']:
-        #     if is_continue:
-        #         continue
-        # else:
-        #     is_continue = False
-
-        if vid in excluded_ids:
-            continue
-        folder_write = pathlib.Path('{}/dataset/features_v2/{}'.format(dataset_root_path, split))
-        folder_write.mkdir(parents=True, exist_ok=True)
-        if os.path.isfile('{}/{}_{}.npy'.format(folder_write.__str__(), vid, 'audio')):
-            tmp = np.load('{}/{}_{}.npy'.format(folder_write.__str__(), vid, 'audio'), allow_pickle=True)
-            if tmp.item()['feature'].shape[-1] != 2048:
-                print(tmp.item()['feature'].shape)
-            else:
-                continue
-        #     tmp2 = np.load('{}/{}_{}.npy'.format(folder_write.__str__(), vid, 'resnetv2-50'), allow_pickle=True)
-        #     continue
-        # else:
-        #     continue
-
-        feature_dict = {}
-        current_frames = data_csv.loc[data_csv[id_header] == vid]
-        if split in ['train', 'val']:
-            scores = current_frames.values[:, 2:].astype(np.float32)
-            unlabelled_rows = np.sum(scores, axis=-1) <= 1e-6
-        else:
-            scores = -1 * np.ones((current_frames.shape[0], 15)).astype(np.float32)
-            unlabelled_rows = np.zeros(current_frames.shape[0], dtype=np.bool)
-
-        unlabelled_rows = np.zeros(current_frames.shape[0], dtype=np.bool)
-        # try:
-        #     vd_frames = read_video("{}/dataset/{}/{}.mp4".format(dataset_root_path, split, vid),
-        #                            num_segments=current_frames.shape[0], unlabelled_rows=unlabelled_rows)
-        #     feature_dict.update({
-        #         'file_id': current_frames.values[:, 0][np.logical_not(unlabelled_rows)].astype(np.str),
-        #         'timestamps': current_frames.values[:, 1][np.logical_not(unlabelled_rows)].astype(np.int64),
-        #         'scores': scores[np.logical_not(unlabelled_rows), :]})
-        #
-        #     for model_id in use_model:
-        #         feature_dict['feature'] = model_created[model_id].predict(vd_frames, batch_size=128)
-        #         np.save('{}/{}_{}.npy'.format(folder_write.__str__(), vid, model_id), feature_dict)
-        #         _ = feature_dict.pop('feature')
-        # except:
-        #     with open('excluded_files_v3.txt', 'a') as fd:
-        #         fd.write('{} {}\n'.format(split, vid))
-        #     break
-        #     continue
-        #
-        # del vd_frames
-
-        audio = read_video("{}/dataset/{}/{}.mp4".format(dataset_root_path, split, vid),
-                           num_segments=current_frames.shape[0], unlabelled_rows=unlabelled_rows, get_audio=True)
-        # Audio embedding extraction
-        # audio_emb = module(samples=audio, sample_rate=DEFAULT_SR)['embedding']
-        audio_emb = module.predict(audio, batch_size=2048)
-
-        # `emb` is a [batch_size, time, feature_dim] Tensor. In EvokedExpression, time=1
-        # audio_emb.shape.assert_is_compatible_with([None, 512])
-        audio_emb = np.squeeze(audio_emb)
-
-        feature_dict['feature'] = audio_emb
-        np.save('{}/{}_{}.npy'.format(folder_write.__str__(), vid, 'audio'), feature_dict)
-        # print("Writing to tfrecords")
-
-        # count += 1
-
-        del audio_emb
-        del feature_dict
-        del audio
-        scores = None
-        unlabelled_rows = None
-
-    tf.keras.backend.clear_session()
-
-
-if __name__ == '__main__':
-    physical_devices = tf.config.list_physical_devices('GPU')
-    tf.config.experimental.set_memory_growth(physical_devices[0], True)
-    for split in ['train', 'val', 'test']:
-        feature_extractor(split)
-    pass
diff --git a/src/main.py b/src/main.py
old mode 100644
new mode 100755
index 89fe7f9..188d9f6
--- a/src/main.py
+++ b/src/main.py
@@ -2,109 +2,113 @@
 Author: Huynh Van Thong
 Department of AI Convergence, Chonnam Natl. Univ.
 """
-import argparse
+
+import os.path as osp
 import glob
-import os.path
-import pathlib
 import shutil
-import sys
+import time
 
 import torch
 from pytorch_lightning.callbacks import ModelCheckpoint
-from torchvision import transforms
-from torch.utils.data import DataLoader
-from utils import EEVdataset, ToTensor, eev_collatefn
 import pytorch_lightning as pl
-from models import EEVModel
 from pytorch_lightning.loggers import TensorBoardLogger
 
-
-def get_dataloader(emotion_index=-1, feature='resnet'):
-    loaders = {}
-    for split in ['train', 'val', 'test']:
-        current_split = EEVdataset(root_path='/mnt/sXProject/EvokedExpression/', split=split,
-                                   feature=feature, emotion_index=emotion_index,
-                                   transforms=transforms.Compose([ToTensor()]))
-        shuffle = (split == 'train')
-        loaders[split] = DataLoader(current_split, batch_size=1, shuffle=shuffle, num_workers=20,  # pin_memory=True,
-                                    prefetch_factor=2, collate_fn=None)
-    #     for b in loaders[split]:
-    #         # print(b['effb0'].shape, b['resnet'].shape, b['audio'].shape, b['mask'].shape)
-    #         tmp = b['audio']
-    # #         # if tmp.shape[1] == 1:
-    # #         #     print(split, ' ', b['file_id'])
-    # # #
-    # sys.exit(0)
-    return loaders
+from core import EEVModel, EEVDataModule, config
+from core.config import cfg
+from core.io import pathmgr
 
 
 def copyfiles(source_dir, dest_dir, ext='*.py'):
-    files = glob.iglob(os.path.join(source_dir, ext))
+    # Copy source files or compress to zip
+    files = glob.iglob(osp.join(source_dir, ext))
     for file in files:
-        if os.path.isfile(file):
+        if osp.isfile(file):
             shutil.copy2(file, dest_dir)
 
+    if osp.isdir(osp.join(source_dir, 'core')) and not osp.isdir(osp.join(dest_dir, 'core')):
+        shutil.copytree(osp.join(source_dir, 'core'), osp.join(dest_dir, 'core'), copy_function=shutil.copy2)
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Evoked Expression')
-    parser.add_argument('--dir', type=str, default='./trial', help="Training directory")
-    parser.add_argument('--lr_init', type=float, default=1e-3, help="Initial learning rate")
-    parser.add_argument('--batch_size', type=int, default=32, help="Batch size")
-    parser.add_argument('--seed', type=int, default=1, help="Random seed")
-    parser.add_argument('--epoch', type=int, default=5, help="Number of epochs")
-    parser.add_argument('--opt', type=str, default='sgd', help="Optimizer")
-    parser.add_argument('--feature', type=str, default='resnet', help="Feature type (audio, resnet, effb0")
-    parser.add_argument('--emotion', type=int, default=0, help="Emotion index to be learned (0-14) or all (-1)")
-
-    args = parser.parse_args()
-
-    if args.feature not in ['resnet', 'audio', 'effb0']:
-        raise ValueError('Do not support {} at this time.'.format(args.feature))
-    if args.emotion not in range(-1, 15):
-        raise ValueError('Do not support emotion {} at this time.'.format(args.emotion))
-
-    tcn_in = {'resnet': 2048, 'audio': 2048, 'effb0': 1280}
-    if args.emotion == -1:
-        num_outputs = 15
-        emotion_index = -1
+    config.load_cfg_fom_args("EEV 2021 Challenges")
+    config.assert_and_infer_cfg()
+    cfg.freeze()
+
+    pl.seed_everything(2)  # cfg.RNG_SEED
+
+    # torch.backends.cudnn.benchmark = cfg.CUDNN.BENCHMARK
+    st_time = time.time()
+    # Ensure that the output dir exists
+    pathmgr.mkdirs(cfg.OUT_DIR)
+    cfg_file = config.dump_cfg()
+    copyfiles(source_dir='./', dest_dir=cfg.OUT_DIR)
+    copyfiles(source_dir='./', dest_dir=cfg.OUT_DIR, ext='.sh')
+    copyfiles(source_dir='./', dest_dir=cfg.OUT_DIR, ext='.txt')
+
+    if cfg.LOGGER == 'TensorBoard':
+        logger = TensorBoardLogger(cfg.OUT_DIR, name='{}_emo'.format('full'), version='_'.join(cfg.MODEL.FEATURES))
     else:
+        raise ValueError('Do not implement with {} logger yet.'.format(cfg.LOGGER))
+
+    params = None  # Default is None
+    num_outputs = 15 if cfg.DATA_NAME == 'eev' else 2  # TODO
+    if cfg.DATA_LOADER.EMO_INDEX > -1:
         num_outputs = 1
-        emotion_index = args.emotion
-
-    pl.seed_everything(args.seed)
-
-    emotions = ['amusement', 'anger', 'awe', 'concentration', 'confusion', 'contempt', 'contentment', 'disappointment',
-                'doubt', 'elation', 'interest', 'pain', 'sadness', 'surprise', 'triumph']
-
-    if num_outputs == 15:
-        save_dir = args.dir  # os.path.join(args.dir, 'emo_{}'.format(emotions[emo_ind]))
-        pathlib.Path(save_dir).mkdir(exist_ok=True, parents=True)
-        logger = TensorBoardLogger(save_dir, name='{}_emo'.format('full'), version=args.feature)
-        copyfiles('./', save_dir, ext='*.txt')
-        copyfiles('./', save_dir, ext='*.sh')
-        copyfiles('./', save_dir, ext='*.py')
-
-        if args.feature == 'effb0':
-            tcn_channels = (512, )
-        elif args.feature == 'audio':
-            tcn_channels = (512, 512, )
-        else:
-            tcn_channels = (128, )
-
-        model = EEVModel(num_outputs=15, tcn_in=tcn_in[args.feature] + 0, tcn_channels=tcn_channels, tcn_kernel_size=3,
-                         dropout=0.3, mtloss=False,
-                         opt=args.opt, lr=args.lr_init, use_norm=True, features_dropout=0., temporal_size=-1,
-                         num_dilations=4, features=args.feature, emotion_index=-1, warmup_steps=200,
-                         accum_grad=args.batch_size)
-        fast_dev_run = False
-        loaders = get_dataloader(emotion_index=-1, feature=args.feature)
-        checkpoint_callback = ModelCheckpoint(monitor='val_loss', mode="min", save_top_k=1, save_last=True)
-        trainer = pl.Trainer(gpus=1, accumulate_grad_batches=args.batch_size, max_epochs=args.epoch,
-                             fast_dev_run=fast_dev_run,
-                             deterministic=True, callbacks=checkpoint_callback, num_sanity_val_steps=0,
-                             progress_bar_refresh_rate=0, logger=logger)
-        trainer.fit(model, loaders['train'], loaders['val'])
-        print('Best model scores: ', checkpoint_callback.best_model_score)
+
+    if cfg.TEST.WEIGHTS != '':
+        result_dir = cfg.OUT_DIR
+    else:
+        result_dir = ''
+
+    eev_model = EEVModel(params=params, num_outputs=num_outputs, features=cfg.MODEL.FEATURES, result_dir=result_dir,
+                         dataset_name=cfg.DATA_NAME, emotion_index=cfg.DATA_LOADER.EMO_INDEX)
+    eev_data = EEVDataModule(cfg.DATA_LOADER.DATA_DIR, features=cfg.MODEL.FEATURES, dataset_name=cfg.DATA_NAME,
+                             emotion_index=cfg.DATA_LOADER.EMO_INDEX, drop_perc=cfg.TRAIN.DROP_PERC)
+
+    fast_dev_run = cfg.FAST_DEV_RUN
+    max_epochs = cfg.OPTIM.MAX_EPOCH if cfg.TEST.WEIGHTS == '' else 1
+    if cfg.DATA_NAME == 'eev':
+        check_val_every_n_epoch = 1
+    elif 'mediaeval' in cfg.DATA_NAME:
+        check_val_every_n_epoch = cfg.OPTIM.MAX_EPOCH
+    else:
+        check_val_every_n_epoch = 1
+
+    ckpt_callbacks = ModelCheckpoint(monitor='val_loss', mode="min", save_top_k=1, save_last=True)
+    trainer = pl.Trainer(gpus=1, fast_dev_run=fast_dev_run, accumulate_grad_batches=cfg.TRAIN.ACCUM_GRAD_BATCHES,
+                         max_epochs=max_epochs, deterministic=True, callbacks=ckpt_callbacks,
+                         num_sanity_val_steps=0, progress_bar_refresh_rate=0, logger=logger,
+                         stochastic_weight_avg=cfg.OPTIM.USE_SWA, weights_summary=None,
+                         check_val_every_n_epoch=check_val_every_n_epoch, gradient_clip_val=10. if num_outputs<15 else 0)
+
+    if cfg.TEST.WEIGHTS == '':
+        trainer.fit(eev_model, datamodule=eev_data)
         if not fast_dev_run:
-            ckpt_path = None  # checkpoint_callback.best_model_path
-            trainer.test(test_dataloaders=loaders['test'], ckpt_path=ckpt_path)
+            print('Best scores: ', ckpt_callbacks.best_model_score)
+
+            ckpt_path = None  # None  # ckpt_callbacks.best_model_path
+            print('Generate test predictions 1')
+            trainer.test(datamodule=eev_data, ckpt_path=ckpt_path)
+
+            if cfg.OPTIM.USE_SWA:
+                ckpt_path = ckpt_callbacks.last_model_path.replace('last', 'swa_last')
+                trainer.save_checkpoint(ckpt_path)
+            else:
+                ckpt_path = ckpt_callbacks.last_model_path
+
+    else:
+        # Do for testing
+        eev_data.setup()
+        # Load pre-trained weights
+        pretrained_weights = torch.load(cfg.TEST.WEIGHTS)['state_dict']
+        eev_model.load_state_dict(pretrained_weights, strict=True)
+
+        # trainer.setup(eev_model, stage='test')
+        print('Do testing ', cfg.TEST.WEIGHTS)
+
+        print('Generate validation prediction')
+        trainer.test(model=eev_model, test_dataloaders=eev_data.val_dataloader(), ckpt_path=None)
+        # trainer.test(datamodule=eev_data, ckpt_path=cfg.TEST.WEIGHTS)
+        print('Generate testing prediction')
+        trainer.test(test_dataloaders=eev_data.test_dataloader(), ckpt_path=cfg.TEST.WEIGHTS)
+
+    print('Finished. Total time: {} minutes.'.format((time.time() - st_time) / 60))
diff --git a/src/models.py b/src/models.py
deleted file mode 100644
index c71dea7..0000000
--- a/src/models.py
+++ /dev/null
@@ -1,215 +0,0 @@
-import itertools
-import math
-
-import torch
-from torch import nn
-import torch.nn.functional as F
-from torch.utils.data import ConcatDataset, DataLoader
-import pytorch_lightning as pl
-from tcn import TemporalConvNet
-from utils import EEVMSELoss, EEVPersonr, EEVPearsonLoss
-from scipy import stats
-import os
-from collections import ChainMap
-import pandas as pd
-import numpy as np
-
-
-class EEVModel(pl.LightningModule):
-    def __init__(self, num_outputs=15, tcn_in=2048, tcn_channels=(512, 512), num_dilations=4, tcn_kernel_size=3,
-                 dropout=0.2,
-                 mtloss=False, opt=None, lr=1e-3, use_norm=False, features_dropout=0., temporal_size=-1,
-                 num_last_regress=128, features='resnet', emotion_index=-1, warmup_steps=500, accum_grad=1):
-        super(EEVModel, self).__init__()
-        self.accum_grad = accum_grad
-        self.warmup_steps = warmup_steps
-        self.learning_rate = lr
-        self.args = {'opt': opt, 'lr_init': lr}
-        self.num_outputs = num_outputs
-        self.emotion_index = emotion_index
-        self.temporal_size = temporal_size
-        self.num_stacks_tcn = len(tcn_channels)
-
-        if features_dropout > 0:
-            self._dropout = nn.Dropout(p=features_dropout)
-        else:
-            self._dropout = None
-
-        self.features = features
-
-        self._temporal = self.get_temporal_layers(tcn_in, tcn_channels, num_dilations, tcn_kernel_size, dropout,
-                                                  use_norm)
-        self._regression = nn.Sequential(nn.Linear(tcn_channels[-1], num_last_regress, bias=False), nn.ReLU(),
-                                         nn.Linear(num_last_regress, num_outputs, bias=False))
-        # weight initialization
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight, mode='fan_out')
-                if m.bias is not None:
-                    nn.init.zeros_(m.bias)
-            elif isinstance(m, nn.BatchNorm2d):
-                nn.init.ones_(m.weight)
-                nn.init.zeros_(m.bias)
-
-        self.pearsonr = EEVPersonr()
-        self.loss_func = EEVMSELoss
-        self.scale_factor = 1.0
-        # self.automatic_optimization = False
-        self.current_lr = None
-
-    def get_temporal_layers(self, tcn_in, tcn_channels, num_dilations, tcn_kernel_size, dropout, use_norm):
-        # input of TCN should have dimension (N, C, L)
-        if self.num_stacks_tcn == 1:
-            temporal_layers = TemporalConvNet(tcn_in, (tcn_channels[0],) * num_dilations, tcn_kernel_size, dropout,
-                                              use_norm=use_norm)
-        else:
-            list_layers = []
-            for idx in range(self.num_stacks_tcn):
-                tcn_in_index = tcn_in if idx == 0 else tcn_channels[idx - 1]
-                list_layers.append(
-                    TemporalConvNet(tcn_in_index, (tcn_channels[idx],) * num_dilations, tcn_kernel_size, dropout,
-                                    use_norm=use_norm))
-            temporal_layers = nn.Sequential(*list_layers)
-
-        return temporal_layers
-
-    def forwardx(self, x, temporal_module, regression_module, feat_dropout):
-        # Input has size batch_size x sequence_length x num_channels (N x L x C)
-
-        # print("Before: ", x.shape)
-        if self.temporal_size > 0:
-            # Resize to L / temporal_size x temporal_size C
-            L_size = x.shape[1]
-            if L_size % self.temporal_size == 0:
-                n_pad = 0
-                x = torch.reshape(x, (L_size // self.temporal_size, self.temporal_size, -1))
-            else:
-                n_pad = self.temporal_size - L_size % self.temporal_size
-                x = F.pad(x, (0, 0, n_pad, 0), "constant", 0)
-                x = torch.reshape(x, (L_size // self.temporal_size + 1, self.temporal_size, -1))
-        else:
-            n_pad = 0
-
-        if feat_dropout is not None:
-            x = feat_dropout(x)
-
-        # Transform to (N, C, L) first
-        x = x.permute(0, 2, 1)
-        x = temporal_module(x)
-        # Transform back to (N, L, C)
-
-        x = x.permute(0, 2, 1)
-        x = regression_module(x)
-
-        if self.temporal_size > 0:
-            x = torch.reshape(x, (1, -1, self.num_outputs))
-            if n_pad > 0:
-                end_index = x.shape[1] - n_pad
-                x = x[:, n_pad:, :]
-
-        return x
-
-    def forward(self, x):
-        pred_scores = self.forwardx(x, self._temporal, self._regression,
-                                    self._dropout)  # 1 x k x 15
-
-        return pred_scores
-
-    def training_step(self, batch, batch_idx):
-
-        out = self._shared_eval(batch, batch_idx)
-
-        scores = batch['scores']
-
-        loss = self.loss_func(scores, out, scale_factor=self.scale_factor)
-        self.pearsonr.update(preds=out / self.scale_factor, target=scores)
-
-        return {'loss': loss}
-
-    def validation_step(self, batch, batch_idx):
-        out = self._shared_eval(batch, batch_idx)
-        scores = batch['scores']
-
-        loss = self.loss_func(scores, out, scale_factor=self.scale_factor)
-        self.pearsonr.update(preds=out / self.scale_factor, target=scores)
-
-        return {'val_loss': loss,
-                'file_id': (out.data.cpu().numpy()[0, :, :] / self.scale_factor, batch['timestamps'], batch['scores'])}
-
-    def _shared_eval(self, batch, batch_idx):
-        out = self(batch['feature'])
-        return out
-
-    def test_step(self, batch, batch_idx):
-        out = self._shared_eval(batch, batch_idx)
-
-        return {batch['file_id'][0]: out.data.cpu().numpy()[0, :, :] / self.scale_factor}
-
-    def configure_optimizers(self):
-        if self.args['opt'] == 'adam':
-            optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
-        else:
-            optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate)
-
-        return optimizer
-
-    def training_epoch_end(self, training_step_outputs):
-        train_pearsonr = self.pearsonr.compute()
-        loss_mean = torch.tensor([x['loss'] for x in training_step_outputs]).mean()  # .data.cpu().numpy()
-        self.log('loss', loss_mean, prog_bar=True, logger=True, on_epoch=True, on_step=False)
-        self.log('pearsonr', train_pearsonr, prog_bar=True, logger=True, on_epoch=True, on_step=False)
-        print('Step {}. Learning rate: {}'.format(self.trainer.global_step, self.current_lr))
-        self.pearsonr.reset()
-
-    def validation_epoch_end(self, validation_step_outputs):
-        val_pearsonr = self.pearsonr.compute()
-        loss_mean = torch.tensor([x['val_loss'] for x in validation_step_outputs]).mean()  # .data.cpu().numpy()
-        self.log('val_pearsonr', val_pearsonr, prog_bar=True, logger=True, on_epoch=True)
-        self.log('val_loss', loss_mean, prog_bar=True, logger=True, on_epoch=True, on_step=False)
-        print(self.current_epoch, val_pearsonr, loss_mean)
-        self.pearsonr.reset()
-
-    def test_epoch_end(self, test_step_outputs):
-        if isinstance(test_step_outputs[0], list):
-            test_results = list(itertools.chain.from_iterable(test_step_outputs))
-        else:
-            test_results = test_step_outputs
-        test_write = dict(ChainMap(*test_results))
-        torch.save(test_write, os.path.join(self.logger.log_dir, 'test_results.pt'))
-        self.test2csv(test_write)
-
-        self.pearsonr.reset()
-
-    def test2csv(self, test_prediction):
-        dataset_root_path = '/mnt/sXProject/EvokedExpression/'
-        emotions = ['amusement', 'anger', 'awe', 'concentration',
-                    'confusion', 'contempt', 'contentment', 'disappointment', 'doubt', 'elation', 'interest',
-                    'pain', 'sadness', 'surprise', 'triumph']
-        result_dir = self.logger.log_dir
-        test_csv = pd.read_csv('{}/eev/{}.csv'.format(dataset_root_path, 'test'))
-
-        list_ids = test_csv['Video ID'].unique()
-
-        list_scores = []
-        # cnt = 0
-        for id in list_ids:
-            current_id_times = test_csv.loc[test_csv['Video ID'] == id].values  # k x 2
-            if id not in test_prediction:
-                current_scores = np.zeros((current_id_times.shape[0], self.num_outputs))
-            else:
-                current_scores = test_prediction[id]  # k x 15
-
-            current_data = np.hstack([current_id_times, current_scores])
-            list_scores.append(current_data)
-            # cnt += 1
-            # if cnt > 4:
-            #     break
-        if self.num_outputs == 1:
-            columns_name = ['Video ID', 'Timestamp (milliseconds)', emotions[self.emotion_index]]
-        else:
-            columns_name = ['Video ID', 'Timestamp (milliseconds)', ] + emotions
-
-        list_scores = np.vstack(list_scores)
-        pd.DataFrame(data=list_scores,
-                     columns=columns_name, ).to_csv(
-            '{}/{}.csv'.format(result_dir, 'test_results_{}'.format(self.features)), index=False)
diff --git a/src/run_dockerfile.sh b/src/run_dockerfile.sh
new file mode 100755
index 0000000..e080cf3
--- /dev/null
+++ b/src/run_dockerfile.sh
@@ -0,0 +1,21 @@
+# Run EEV
+docker run --gpus all --ipc=host -it --rm \
+        --user $UID:$GID \
+        --volume="/etc/group:/etc/group:ro" \
+        --volume="/etc/passwd:/etc/passwd:ro" \
+        --volume="/etc/shadow:/etc/shadow:ro" \
+        -v /home/hvthong/sXProject/EvokedExpression/dataset/eev2021:/mnt/sXProject/EvokedExpression \
+        -v /mnt/XProject/EvokedExpression:/mnt/XProject/EvokedExpression \
+        -w /mnt/XProject/EvokedExpression \
+        eev:pytorch1.8.1 bash testing.sh
+
+# Run MediaEval
+#docker run --gpus all --ipc=host -it --rm \
+#        --user $UID:$GID \
+#        --volume="/etc/group:/etc/group:ro" \
+#        --volume="/etc/passwd:/etc/passwd:ro" \
+#        --volume="/etc/shadow:/etc/shadow:ro" \
+#        -v /home/hvthong/sXProject/EvokedExpression/dataset:/mnt/sXProject/EvokedExpression/dataset \
+#        -v /mnt/XProject/EvokedExpression:/mnt/XProject/EvokedExpression \
+#        -w /mnt/XProject/EvokedExpression \
+#        eev:pytorch1.8.1 bash scripts/mediaeval18_train.sh
diff --git a/src/scripts/eev_train.sh b/src/scripts/eev_train.sh
new file mode 100755
index 0000000..95934f8
--- /dev/null
+++ b/src/scripts/eev_train.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+trap "exit" INT
+
+lr_init=0.005
+batch_size=32
+dropout=0.0
+max_epoch=20
+#test_weights='' #'train_logs/best_checkpoints/train_logs_v9/effb0/checkpoints/epoch=19-step=1859.ckpt'
+
+train_dir='/home/hvarch/media/Work/Dataset/EvokedExpression/train_logs/check_train_v5_noTimePos_noDropout/audio'
+#train_dir='./train_logs/lstm_withTimePos/audio'
+python main.py --cfg conf/eev_audio.yaml \
+  FAST_DEV_RUN 0 \
+  OUT_DIR $train_dir \
+  OPTIM.MAX_EPOCH $max_epoch \
+  OPTIM.BASE_LR $lr_init \
+  OPTIM.USE_SWA True \
+  TRAIN.ACCUM_GRAD_BATCHES $batch_size \
+  TCN.DROPOUT $dropout \
+  MODEL.USE_POSITION False
+
+sleep 1
+train_dir='/home/hvarch/media/Work/Dataset/EvokedExpression/train_logs/check_train_v5_noTimePos_noDropout/effb0'
+python main.py --cfg conf/eev_effb0.yaml \
+  FAST_DEV_RUN 0 \
+  OUT_DIR $train_dir \
+  OPTIM.MAX_EPOCH $max_epoch \
+  OPTIM.BASE_LR $lr_init \
+  OPTIM.USE_SWA True \
+  TRAIN.ACCUM_GRAD_BATCHES $batch_size \
+  TCN.DROPOUT $dropout \
+  MODEL.USE_POSITION False
+
+echo "Use position"
+train_dir='/home/hvarch/media/Work/Dataset/EvokedExpression/train_logs/check_train_v5_withTimePos_noDropout/audio'
+#train_dir='./train_logs/lstm_withTimePos/audio'
+python main.py --cfg conf/eev_audio.yaml \
+  FAST_DEV_RUN 0 \
+  OUT_DIR $train_dir \
+  OPTIM.MAX_EPOCH $max_epoch \
+  OPTIM.BASE_LR $lr_init \
+  OPTIM.USE_SWA True \
+  TRAIN.ACCUM_GRAD_BATCHES $batch_size \
+  TCN.DROPOUT $dropout \
+  MODEL.USE_POSITION True
+
+sleep 1
+train_dir='/home/hvarch/media/Work/Dataset/EvokedExpression/train_logs/check_train_v5_withTimePos_noDropout/effb0'
+python main.py --cfg conf/eev_effb0.yaml \
+  FAST_DEV_RUN 0 \
+  OUT_DIR $train_dir \
+  OPTIM.MAX_EPOCH $max_epoch \
+  OPTIM.BASE_LR $lr_init \
+  OPTIM.USE_SWA True \
+  TRAIN.ACCUM_GRAD_BATCHES $batch_size \
+  TCN.DROPOUT $dropout \
+  MODEL.USE_POSITION True
diff --git a/src/scripts/mediaeval18_train.sh b/src/scripts/mediaeval18_train.sh
new file mode 100755
index 0000000..88470d0
--- /dev/null
+++ b/src/scripts/mediaeval18_train.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+trap "exit" INT
+
+lr_init=0.005
+batch_size=32
+dropout=0.3
+max_epoch=20
+
+run_ver="v17tmp"
+
+for train_drop_perc in 0.0 0.5
+do
+  for use_position in 'True' 'False'
+  do
+    if [ "$use_position" = 'True' ] && [ "$train_drop_perc" = '0.0' ]; then
+      continue
+    fi
+    if [ "$use_position" = 'False' ]; then
+      continue
+    fi
+    for feat in 'audio' 'effb0'
+    do
+      for emo in 'valence' 'arousal' 'full'
+      do
+        if [ "$emo" = "valence" ]; then
+          emo_index=0
+          continue
+        elif [ "$emo" = "arousal" ]; then
+          emo_index=1
+          continue
+        else
+          emo_index=-1
+        fi
+
+        if [ "$use_position" = 'True' ]; then
+          prefix='time_pos_'
+        else
+          prefix='no_time_pos_'
+        fi
+
+        if [ "$use_position" = 'False' ] && [ "$train_drop_perc" = '0.5' ]; then
+          postfix='_time_dropout_aug'
+        fi
+
+        train_dir='train_logs_mediaeval18_v2/'$prefix'epochs_full_'$run_ver$postfix'/'$feat'_'$emo
+        echo $train_dir
+        sleep 3
+        python -W ignore main.py --cfg conf/eev_${feat}_mediaeval18.yaml \
+                  FAST_DEV_RUN 0 \
+                  OUT_DIR $train_dir \
+                  OPTIM.MAX_EPOCH $max_epoch \
+                  OPTIM.BASE_LR $lr_init \
+                  OPTIM.USE_SWA True \
+                  TRAIN.ACCUM_GRAD_BATCHES $batch_size \
+                  TRAIN.DROP_PERC $train_drop_perc \
+                  TCN.DROPOUT $dropout \
+                  DATA_LOADER.EMO_INDEX $emo_index \
+                  DATA_LOADER.NUM_WORKERS 16 \
+                  MODEL.USE_POSITION $use_position
+      done
+    done
+  done
+done
diff --git a/src/scripts/testing.sh b/src/scripts/testing.sh
new file mode 100755
index 0000000..5bb2331
--- /dev/null
+++ b/src/scripts/testing.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+trap "exit" INT
+
+
+test_weights=PATH_TO_CKPT_FILE # e.g. './train_logs/audio/checkpoints/swa_last.ckpt'
+test_dir=PATH_TO_OUT_DIR  # folder to write output,  e.g. './train_logs/tmp/testing1'
+config_path=PATH_TO_CONFIG_FILE  # e.g., ./train_logs/audio/config_audio.yaml
+
+python main.py --cfg $config_path \
+                OUT_DIR $test_dir \
+                TEST.WEIGHTS $test_weights
diff --git a/src/train.sh b/src/train.sh
deleted file mode 100644
index e50a6e8..0000000
--- a/src/train.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-
-trap "exit" INT
-
-train_dir=./train_logs/ablation_bestAudioEff_notime
-lr_val=5e-3
-num_epoch=20
-batch_size=32
-
-feature_name=audio
-echo $feature_name
-sleep 1
-python -W ignore main.py --epoch $num_epoch --dir $train_dir --emotion -1 --lr_init $lr_val --feature $feature_name --batch_size $batch_size
-
-feature_name=effb0
-echo $feature_name
-sleep 1
-python -W ignore main.py --epoch $num_epoch --dir $train_dir --emotion -1 --lr_init $lr_val --feature $feature_name --batch_size $batch_size
-
diff --git a/src/utils.py b/src/utils.py
deleted file mode 100644
index 4d193cb..0000000
--- a/src/utils.py
+++ /dev/null
@@ -1,207 +0,0 @@
-"""
-Author: Huynh Van Thong
-Department of AI Convergence, Chonnam Natl. Univ.
-"""
-
-import os
-import sys
-
-os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
-import pandas as pd
-from torch.utils import data
-import numpy as np
-import torch
-from torchvision import transforms
-import torch.nn.functional as F
-import torchmetrics
-from torch.nn.utils.rnn import pad_sequence
-
-dataset_root_path = '/mnt/Work/Dataset/EEV/'
-
-
-class ToTensor(object):
-    """ Convert ndarrays in sample to Tensors"""
-
-    def __call__(self, sample):
-        feature = sample['feature']
-        scores = sample['scores']
-
-        return {'feature': torch.from_numpy(feature).type(torch.FloatTensor),
-                'timestamps': torch.from_numpy(sample['timestamps']).type(torch.LongTensor),
-                'scores': torch.from_numpy(scores).type(torch.FloatTensor),
-                'file_id': sample['file_id']}
-
-
-def eev_collatefn(batch):
-    batch_sample = {'audio': [], 'resnet': [], 'timestamps': [], 'scores': [], 'file_id': [], 'length': []}
-    max_dim = -1
-    for item in batch:
-        max_dim = max(item['resnet'].shape[0], max_dim)
-    for item in batch:
-        current_len = -1
-        for ky in item.keys():
-            current_val = item[ky]
-            if ky == 'file_id':
-                batch_sample[ky].append(current_val)
-            else:
-                batch_sample[ky].append(current_val)
-                current_len = current_val.shape[0]
-        batch_sample['length'].append(current_len)
-    for ky in batch_sample:
-        if ky in ['file_id', 'length']:
-            pass
-            # batch_sample[ky] = torch.stack(batch_sample[ky], dim=0)
-        else:
-            batch_sample[ky] = pad_sequence(batch_sample[ky], batch_first=True, padding_value=0)
-    return batch_sample
-
-
-class EEVdataset(data.Dataset):
-    def __init__(self, root_path='/mnt/Work/Dataset/EEV/', split='train', feature='resnet', emotion_index=-1,
-                 transforms=None, save_pt=False):
-        self.save_pt = save_pt
-        self.feature = feature
-        self.emotion_index = emotion_index
-        self.root_path = root_path
-        self.root_path_npy = self.root_path  # '.'
-        self.split = split
-        self.transforms = transforms
-        if split not in ['train', 'val', 'test']:
-            raise ValueError('Do not support {} split for EEV dataset'.format(split))
-        data_csv = pd.read_csv('{}/eev/{}.csv'.format(self.root_path, self.split))
-
-        id_header = 'Video ID' if split == 'test' else 'YouTube ID'
-
-        excluded_ids = np.loadtxt('excluded_files.txt', dtype=str)
-        excluded_ids_single = np.loadtxt('check_1.txt', dtype=str)  #  - set(excluded_ids_single)
-        self.video_ids = list(set(data_csv[id_header].unique()) - set(excluded_ids))
-
-    def __len__(self):
-        return len(self.video_ids)
-
-    def __getitem__(self, index):
-        current_id = self.video_ids[index]
-        if self.save_pt:
-            resnet_npy = np.load(
-                '{}/dataset/features_v2/{}/{}_{}.npy'.format(self.root_path_npy, self.split, current_id, 'resnetv2-50'),
-                allow_pickle=True)
-
-            audio_npy = np.load(
-                '{}/dataset/features_v2/{}/{}_{}.npy'.format(self.root_path_npy, self.split, current_id, 'audio'),
-                allow_pickle=True)
-
-            effb0_npy = np.load(
-                '{}/dataset/features_v2/{}/{}_{}.npy'.format(self.root_path_npy, self.split, current_id, 'efficientnet-b0'),
-                allow_pickle=True)
-
-            audio_features = audio_npy.item()['feature']
-            resnet_features = resnet_npy.item()['feature']
-            effb0_features = effb0_npy.item()['feature']
-            timestamps = resnet_npy.item()['timestamps']
-            scores = resnet_npy.item()['scores']
-            mask = np.sum(scores, axis=-1) > 0
-            file_id = resnet_npy.item()['file_id'][0]
-
-            assert mask.shape[0] == scores.shape[0]
-            assert audio_features.shape[0] == resnet_features.shape[0]
-            if audio_features.ndim == 1:
-                audio_features = audio_features.reshape(1, -1)
-                print(current_id)
-            if audio_features.shape[-1] != 2048:
-                print('Check: ', current_id)
-
-            sample = {'resnet': resnet_features, 'audio': audio_features, 'effb0': effb0_features, 'timestamps': timestamps,
-                      'scores': scores, 'mask': mask,
-                      'file_id': file_id}
-            torch.save(sample, '{}dataset/features_pt/{}/{}.pt'.format(self.root_path_npy, self.split, current_id))
-            return sample
-        else:
-            sample = torch.load('{}dataset/features_v2/{}/{}.pt'.format(self.root_path_npy, self.split, current_id))
-            if self.split in ['train', 'val']:
-                mask = np.sum(sample['scores'], axis=-1) > 1e-6
-            else:
-                mask = np.ones(sample['scores'].shape[0], dtype=np.bool)
-
-            num_timestamps = np.sum(mask)
-            if self.emotion_index > -1:
-                scores = np.reshape(sample['scores'][mask, self.emotion_index], (-1, 1))
-            else:
-                smooth_scores = np.zeros_like(sample['scores'][mask, :])
-                # smooth_scores[:num_timestamps//2, :] = smooth_scores[:num_timestamps//2, :] + 1e-8
-                scores = sample['scores'][mask, :] + smooth_scores
-            # if scores.shape[0] < 2:
-            #     print('Check ', current_id)
-            position_info = sample['timestamps'][mask].reshape(-1, 1) / 1e6
-            features = sample[self.feature][mask, :]  #np.hstack([sample[self.feature][mask, :], position_info])
-            use_sample = {'feature': features, 'timestamps': sample['timestamps'][mask],
-                          'scores': scores, 'file_id': sample['file_id']}
-
-            if self.transforms is not None:
-                use_sample = self.transforms(use_sample)
-
-            return use_sample
-
-
-class EEVPersonr(torchmetrics.Metric):
-    def __init__(self, dist_sync_on_step=False):
-        super(EEVPersonr, self).__init__(dist_sync_on_step=dist_sync_on_step)
-        self.add_state("sum", default=torch.tensor(0.0), dist_reduce_fx="sum")
-        self.add_state("total", default=torch.tensor(0), dist_reduce_fx="sum")
-
-    def get_score(self, x, x_hat):
-        x_mean = torch.mean(x, dim=0, keepdim=True)
-        x_hat_mean = torch.mean(x_hat, dim=0, keepdim=True)
-
-        numerator = torch.sum(torch.mul(x - x_mean, x_hat - x_hat_mean), dim=0)
-        denominator = torch.sqrt(torch.sum((x - x_mean) ** 2, dim=0) * torch.sum((x_hat - x_hat_mean) ** 2, dim=0))
-        pearsonr_score = numerator / denominator
-        pearsonr_score[pearsonr_score != pearsonr_score] = -1
-        return torch.mean(pearsonr_score)
-
-    def update(self, preds, target):
-        # Update metric states
-        update_scores = 0.
-        # print(target.shape, preds.shape)
-        update_scores = update_scores + self.get_score(target[0, :, :], preds[0, :, :])
-
-        self.sum += update_scores
-
-        self.total = self.total + 1
-
-    def compute(self):
-        return self.sum / self.total
-
-
-def EEVMSELoss(targets, preds, scale_factor=1.0):
-    mse_exp = torch.squeeze(
-        torch.mean((targets * scale_factor - preds) ** 2, dim=1))
-    return torch.mean(mse_exp)
-
-
-def EEVPearsonLoss(targets, preds, scale_factor=1.0, ):
-    x_mean = torch.mean(targets * scale_factor, dim=1, keepdim=True)
-    xhat_mean = torch.mean(preds, dim=1, keepdim=True)
-
-    numerator = torch.sum(torch.mul(targets * scale_factor - x_mean, preds - xhat_mean), dim=1)
-    denominator = torch.sqrt(
-        torch.sum((targets * scale_factor - x_mean) ** 2, dim=1) * torch.sum((preds - xhat_mean) ** 2, dim=1))
-
-    pearsonr_score = numerator / denominator
-    if torch.sum(torch.isnan(pearsonr_score)) > 0:
-        print(numerator, denominator)
-        print('Stop here')
-        sys.exit(0)
-    return 1.0 - torch.mean(pearsonr_score)
-
-
-if __name__ == '__main__':
-    tmp = EEVdataset(split='test', transforms=transforms.Compose([ToTensor()]))
-    tmp_loader = data.DataLoader(tmp, batch_size=1)
-
-    max_size = 0
-    for i, b in enumerate(tmp_loader):
-        max_size = max(b['resnet'].shape[1], max_size)
-        print(i, max_size)
-
-    print(max_size)
-    pass