From 45e284c722d3bc5e2fc2b257409a27cfd39886da Mon Sep 17 00:00:00 2001
From: Muammar El Khatib <muammarelkhatib@gmail.com>
Date: Fri, 3 Jan 2020 11:29:58 -0800
Subject: [PATCH] General improvements.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- ml4chem.data.visualization: Added kwargs to plot_atomic_features().
- ml4chem.features:
    * New base class to build AtomisticFeatures.¬
    * .gaussian now supports conversion to pandas DataFrames.¬
- ml4chem.models: docstrings for base module.
- Black cleaned.
---
 examples/autoencoder/cu_inference.py |  4 +---
 ml4chem/data/visualization.py        |  6 ++++--
 ml4chem/features/base.py             | 23 +++++++++++++++++++++++
 ml4chem/features/gaussian.py         | 27 +++++++++++++++++++--------
 ml4chem/models/base.py               |  4 ++++
 ml4chem/optim/handler.py             |  3 +--
 6 files changed, 52 insertions(+), 15 deletions(-)
 create mode 100644 ml4chem/features/base.py

diff --git a/examples/autoencoder/cu_inference.py b/examples/autoencoder/cu_inference.py
index f324134..234cbea 100644
--- a/examples/autoencoder/cu_inference.py
+++ b/examples/autoencoder/cu_inference.py
@@ -50,9 +50,7 @@ def autoencode():
         save_preprocessor="latent_space_min_max.scaler",
     )
 
-    features = features.calculate(
-        images, purpose=purpose, data=data_handler, svm=True
-    )
+    features = features.calculate(images, purpose=purpose, data=data_handler, svm=True)
 
     latent_svm = []
     for e in list(features.values()):
diff --git a/ml4chem/data/visualization.py b/ml4chem/data/visualization.py
index 82827fb..9ce7a47 100644
--- a/ml4chem/data/visualization.py
+++ b/ml4chem/data/visualization.py
@@ -174,7 +174,9 @@ def read_log(logfile, metric="loss", refresh=None):
         plt.show(block=True)
 
 
-def plot_atomic_features(latent_space, method="PCA", dimensions=2, backend="seaborn"):
+def plot_atomic_features(
+    latent_space, method="PCA", dimensions=2, backend="seaborn", **kwargs
+):
     """Plot high dimensional atomic feature vectors
 
     This function can take a feature space dictionary, or a database file
@@ -198,7 +200,7 @@ def plot_atomic_features(latent_space, method="PCA", dimensions=2, backend="seab
     """
     method = method.lower()
     backend = backend.lower()
-    dot_size = 4.0
+    dot_size = kwargs["dot_size"]
 
     supported_methods = ["pca", "tsne"]
 
diff --git a/ml4chem/features/base.py b/ml4chem/features/base.py
new file mode 100644
index 0000000..eee7e8b
--- /dev/null
+++ b/ml4chem/features/base.py
@@ -0,0 +1,23 @@
+from abc import ABC, abstractmethod
+
+
+class AtomisticFeatures(ABC):
+    @abstractmethod
+    def name(cls):
+        """Return name of the class"""
+        pass
+
+    @abstractmethod
+    def __init__(self, **kwargs):
+        """Arguments needed to instantiate Features"""
+        pass
+
+    @abstractmethod
+    def calculate(self, **kwargs):
+        """Calculate features"""
+        pass
+
+    @abstractmethod
+    def to_pandas(self):
+        """Convert features to pandas DataFrame"""
+        pass
diff --git a/ml4chem/features/gaussian.py b/ml4chem/features/gaussian.py
index 8702010..2b05b00 100644
--- a/ml4chem/features/gaussian.py
+++ b/ml4chem/features/gaussian.py
@@ -4,9 +4,11 @@
 import time
 import torch
 import numpy as np
+import pandas as pd
 from ase.data import atomic_numbers
 from collections import OrderedDict
-from .cutoff import Cosine
+from ml4chem.features.cutoff import Cosine
+from ml4chem.features.base import AtomisticFeatures
 from ml4chem.data.serialization import dump, load
 from ml4chem.data.preprocessing import Preprocessing
 from ml4chem.utils import get_chunks, get_neighborlist, convert_elapsed_time
@@ -14,7 +16,7 @@
 logger = logging.getLogger()
 
 
-class Gaussian(object):
+class Gaussian(AtomisticFeatures):
     """Behler-Parrinello symmetry functions
     This class builds local chemical environments for atoms based on the
     Behler-Parrinello Gaussian type symmetry functions. It is modular enough
@@ -423,9 +425,8 @@ def calculate(self, images=None, purpose="training", data=None, svm=False):
             " seconds.".format(h, m, s)
         )
 
-
         if svm and purpose == "training":
-            client.restart()    # Reclaims memory aggressively
+            client.restart()  # Reclaims memory aggressively
             preprocessor.save_to_file(preprocessor, self.save_preprocessor)
 
             if self.filename is not None:
@@ -433,18 +434,28 @@ def calculate(self, images=None, purpose="training", data=None, svm=False):
                 data = {"feature_space": feature_space}
                 data.update({"reference_space": reference_space})
                 dump(data, filename=self.filename)
-            return feature_space, reference_space
+                self.feature_space = feature_space
+                self.reference_space = reference_space
+
+            return self.feature_space, self.reference_space
 
         elif svm is False and purpose == "training":
-            client.restart()    # Reclaims memory aggressively
+            client.restart()  # Reclaims memory aggressively
             preprocessor.save_to_file(preprocessor, self.save_preprocessor)
 
             if self.filename is not None:
                 logger.info("features saved to {}.".format(self.filename))
                 dump(feature_space, filename=self.filename)
-            return feature_space
+                self.feature_space = feature_space
+
+            return self.feature_space
         else:
-            return feature_space
+            self.feature_space = feature_space
+            return self.feature_space
+
+    def to_pandas(self):
+        """Convert features to pandas DataFrame"""
+        return pd.DataFrame.from_dict(self.feature_space, orient="index")
 
     def stack_features(self, indices, stacked_features):
         """Stack features """
diff --git a/ml4chem/models/base.py b/ml4chem/models/base.py
index e7a7674..0a5ad42 100644
--- a/ml4chem/models/base.py
+++ b/ml4chem/models/base.py
@@ -4,16 +4,20 @@
 class DeepLearningModel(ABC):
     @abstractmethod
     def name(cls):
+        """Return name of the class"""
         pass
 
     @abstractmethod
     def __init__(self, **kwargs):
+        """Arguments needed to instantiate the model"""
         pass
 
     @abstractmethod
     def prepare_model(self, **kwargs):
+        """Prepare model for training or inference"""
         pass
 
     @abstractmethod
     def forward(self, X):
+        """Forward propagation pass"""
         pass
diff --git a/ml4chem/optim/handler.py b/ml4chem/optim/handler.py
index 589d733..5c0a921 100644
--- a/ml4chem/optim/handler.py
+++ b/ml4chem/optim/handler.py
@@ -155,7 +155,6 @@ def get_lr_scheduler(optimizer, lr_scheduler):
         scheduler = torch.optim.lr_scheduler.StepLR(optimizer, **kwargs)
         name = "StepLR"
 
-
     logger.info("Learning Rate Scheduler")
     logger.info("-----------------------")
     logger.info("    - Name: {}.".format(name))
@@ -179,4 +178,4 @@ def get_lr(optimizer):
         Current learning rate.
     """
     for param_group in optimizer.param_groups:
-        return param_group['lr']
+        return param_group["lr"]