From 45e284c722d3bc5e2fc2b257409a27cfd39886da Mon Sep 17 00:00:00 2001 From: Muammar El Khatib Date: Fri, 3 Jan 2020 11:29:58 -0800 Subject: [PATCH] General improvements. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ml4chem.data.visualization: Added kwargs to plot_atomic_features(). - ml4chem.features: * New base class to build AtomisticFeatures.¬ * .gaussian now supports conversion to pandas DataFrames.¬ - ml4chem.models: docstrings for base module. - Black cleaned. --- examples/autoencoder/cu_inference.py | 4 +--- ml4chem/data/visualization.py | 6 ++++-- ml4chem/features/base.py | 23 +++++++++++++++++++++++ ml4chem/features/gaussian.py | 27 +++++++++++++++++++-------- ml4chem/models/base.py | 4 ++++ ml4chem/optim/handler.py | 3 +-- 6 files changed, 52 insertions(+), 15 deletions(-) create mode 100644 ml4chem/features/base.py diff --git a/examples/autoencoder/cu_inference.py b/examples/autoencoder/cu_inference.py index f324134..234cbea 100644 --- a/examples/autoencoder/cu_inference.py +++ b/examples/autoencoder/cu_inference.py @@ -50,9 +50,7 @@ def autoencode(): save_preprocessor="latent_space_min_max.scaler", ) - features = features.calculate( - images, purpose=purpose, data=data_handler, svm=True - ) + features = features.calculate(images, purpose=purpose, data=data_handler, svm=True) latent_svm = [] for e in list(features.values()): diff --git a/ml4chem/data/visualization.py b/ml4chem/data/visualization.py index 82827fb..9ce7a47 100644 --- a/ml4chem/data/visualization.py +++ b/ml4chem/data/visualization.py @@ -174,7 +174,9 @@ def read_log(logfile, metric="loss", refresh=None): plt.show(block=True) -def plot_atomic_features(latent_space, method="PCA", dimensions=2, backend="seaborn"): +def plot_atomic_features( + latent_space, method="PCA", dimensions=2, backend="seaborn", **kwargs +): """Plot high dimensional atomic feature vectors This function can take a feature space dictionary, or a database file @@ -198,7 +200,7 @@ def plot_atomic_features(latent_space, method="PCA", dimensions=2, backend="seab """ method = method.lower() backend = backend.lower() - dot_size = 4.0 + dot_size = kwargs["dot_size"] supported_methods = ["pca", "tsne"] diff --git a/ml4chem/features/base.py b/ml4chem/features/base.py new file mode 100644 index 0000000..eee7e8b --- /dev/null +++ b/ml4chem/features/base.py @@ -0,0 +1,23 @@ +from abc import ABC, abstractmethod + + +class AtomisticFeatures(ABC): + @abstractmethod + def name(cls): + """Return name of the class""" + pass + + @abstractmethod + def __init__(self, **kwargs): + """Arguments needed to instantiate Features""" + pass + + @abstractmethod + def calculate(self, **kwargs): + """Calculate features""" + pass + + @abstractmethod + def to_pandas(self): + """Convert features to pandas DataFrame""" + pass diff --git a/ml4chem/features/gaussian.py b/ml4chem/features/gaussian.py index 8702010..2b05b00 100644 --- a/ml4chem/features/gaussian.py +++ b/ml4chem/features/gaussian.py @@ -4,9 +4,11 @@ import time import torch import numpy as np +import pandas as pd from ase.data import atomic_numbers from collections import OrderedDict -from .cutoff import Cosine +from ml4chem.features.cutoff import Cosine +from ml4chem.features.base import AtomisticFeatures from ml4chem.data.serialization import dump, load from ml4chem.data.preprocessing import Preprocessing from ml4chem.utils import get_chunks, get_neighborlist, convert_elapsed_time @@ -14,7 +16,7 @@ logger = logging.getLogger() -class Gaussian(object): +class Gaussian(AtomisticFeatures): """Behler-Parrinello symmetry functions This class builds local chemical environments for atoms based on the Behler-Parrinello Gaussian type symmetry functions. It is modular enough @@ -423,9 +425,8 @@ def calculate(self, images=None, purpose="training", data=None, svm=False): " seconds.".format(h, m, s) ) - if svm and purpose == "training": - client.restart() # Reclaims memory aggressively + client.restart() # Reclaims memory aggressively preprocessor.save_to_file(preprocessor, self.save_preprocessor) if self.filename is not None: @@ -433,18 +434,28 @@ def calculate(self, images=None, purpose="training", data=None, svm=False): data = {"feature_space": feature_space} data.update({"reference_space": reference_space}) dump(data, filename=self.filename) - return feature_space, reference_space + self.feature_space = feature_space + self.reference_space = reference_space + + return self.feature_space, self.reference_space elif svm is False and purpose == "training": - client.restart() # Reclaims memory aggressively + client.restart() # Reclaims memory aggressively preprocessor.save_to_file(preprocessor, self.save_preprocessor) if self.filename is not None: logger.info("features saved to {}.".format(self.filename)) dump(feature_space, filename=self.filename) - return feature_space + self.feature_space = feature_space + + return self.feature_space else: - return feature_space + self.feature_space = feature_space + return self.feature_space + + def to_pandas(self): + """Convert features to pandas DataFrame""" + return pd.DataFrame.from_dict(self.feature_space, orient="index") def stack_features(self, indices, stacked_features): """Stack features """ diff --git a/ml4chem/models/base.py b/ml4chem/models/base.py index e7a7674..0a5ad42 100644 --- a/ml4chem/models/base.py +++ b/ml4chem/models/base.py @@ -4,16 +4,20 @@ class DeepLearningModel(ABC): @abstractmethod def name(cls): + """Return name of the class""" pass @abstractmethod def __init__(self, **kwargs): + """Arguments needed to instantiate the model""" pass @abstractmethod def prepare_model(self, **kwargs): + """Prepare model for training or inference""" pass @abstractmethod def forward(self, X): + """Forward propagation pass""" pass diff --git a/ml4chem/optim/handler.py b/ml4chem/optim/handler.py index 589d733..5c0a921 100644 --- a/ml4chem/optim/handler.py +++ b/ml4chem/optim/handler.py @@ -155,7 +155,6 @@ def get_lr_scheduler(optimizer, lr_scheduler): scheduler = torch.optim.lr_scheduler.StepLR(optimizer, **kwargs) name = "StepLR" - logger.info("Learning Rate Scheduler") logger.info("-----------------------") logger.info(" - Name: {}.".format(name)) @@ -179,4 +178,4 @@ def get_lr(optimizer): Current learning rate. """ for param_group in optimizer.param_groups: - return param_group['lr'] + return param_group["lr"]