Skip to content

Commit

Permalink
General improvements.
Browse files Browse the repository at this point in the history
- ml4chem.data.visualization: Added kwargs to plot_atomic_features().
- ml4chem.features:
    * New base class to build AtomisticFeatures.¬
    * .gaussian now supports conversion to pandas DataFrames.¬
- ml4chem.models: docstrings for base module.
- Black cleaned.
  • Loading branch information
muammar committed Jan 3, 2020
1 parent d5f2cae commit 45e284c
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 15 deletions.
4 changes: 1 addition & 3 deletions examples/autoencoder/cu_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,7 @@ def autoencode():
save_preprocessor="latent_space_min_max.scaler",
)

features = features.calculate(
images, purpose=purpose, data=data_handler, svm=True
)
features = features.calculate(images, purpose=purpose, data=data_handler, svm=True)

latent_svm = []
for e in list(features.values()):
Expand Down
6 changes: 4 additions & 2 deletions ml4chem/data/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,9 @@ def read_log(logfile, metric="loss", refresh=None):
plt.show(block=True)


def plot_atomic_features(latent_space, method="PCA", dimensions=2, backend="seaborn"):
def plot_atomic_features(
latent_space, method="PCA", dimensions=2, backend="seaborn", **kwargs
):
"""Plot high dimensional atomic feature vectors
This function can take a feature space dictionary, or a database file
Expand All @@ -198,7 +200,7 @@ def plot_atomic_features(latent_space, method="PCA", dimensions=2, backend="seab
"""
method = method.lower()
backend = backend.lower()
dot_size = 4.0
dot_size = kwargs["dot_size"]

supported_methods = ["pca", "tsne"]

Expand Down
23 changes: 23 additions & 0 deletions ml4chem/features/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from abc import ABC, abstractmethod


class AtomisticFeatures(ABC):
@abstractmethod
def name(cls):
"""Return name of the class"""
pass

@abstractmethod
def __init__(self, **kwargs):
"""Arguments needed to instantiate Features"""
pass

@abstractmethod
def calculate(self, **kwargs):
"""Calculate features"""
pass

@abstractmethod
def to_pandas(self):
"""Convert features to pandas DataFrame"""
pass
27 changes: 19 additions & 8 deletions ml4chem/features/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,19 @@
import time
import torch
import numpy as np
import pandas as pd
from ase.data import atomic_numbers
from collections import OrderedDict
from .cutoff import Cosine
from ml4chem.features.cutoff import Cosine
from ml4chem.features.base import AtomisticFeatures
from ml4chem.data.serialization import dump, load
from ml4chem.data.preprocessing import Preprocessing
from ml4chem.utils import get_chunks, get_neighborlist, convert_elapsed_time

logger = logging.getLogger()


class Gaussian(object):
class Gaussian(AtomisticFeatures):
"""Behler-Parrinello symmetry functions
This class builds local chemical environments for atoms based on the
Behler-Parrinello Gaussian type symmetry functions. It is modular enough
Expand Down Expand Up @@ -423,28 +425,37 @@ def calculate(self, images=None, purpose="training", data=None, svm=False):
" seconds.".format(h, m, s)
)


if svm and purpose == "training":
client.restart() # Reclaims memory aggressively
client.restart() # Reclaims memory aggressively
preprocessor.save_to_file(preprocessor, self.save_preprocessor)

if self.filename is not None:
logger.info("features saved to {}.".format(self.filename))
data = {"feature_space": feature_space}
data.update({"reference_space": reference_space})
dump(data, filename=self.filename)
return feature_space, reference_space
self.feature_space = feature_space
self.reference_space = reference_space

return self.feature_space, self.reference_space

elif svm is False and purpose == "training":
client.restart() # Reclaims memory aggressively
client.restart() # Reclaims memory aggressively
preprocessor.save_to_file(preprocessor, self.save_preprocessor)

if self.filename is not None:
logger.info("features saved to {}.".format(self.filename))
dump(feature_space, filename=self.filename)
return feature_space
self.feature_space = feature_space

return self.feature_space
else:
return feature_space
self.feature_space = feature_space
return self.feature_space

def to_pandas(self):
"""Convert features to pandas DataFrame"""
return pd.DataFrame.from_dict(self.feature_space, orient="index")

def stack_features(self, indices, stacked_features):
"""Stack features """
Expand Down
4 changes: 4 additions & 0 deletions ml4chem/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,20 @@
class DeepLearningModel(ABC):
@abstractmethod
def name(cls):
"""Return name of the class"""
pass

@abstractmethod
def __init__(self, **kwargs):
"""Arguments needed to instantiate the model"""
pass

@abstractmethod
def prepare_model(self, **kwargs):
"""Prepare model for training or inference"""
pass

@abstractmethod
def forward(self, X):
"""Forward propagation pass"""
pass
3 changes: 1 addition & 2 deletions ml4chem/optim/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,6 @@ def get_lr_scheduler(optimizer, lr_scheduler):
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, **kwargs)
name = "StepLR"


logger.info("Learning Rate Scheduler")
logger.info("-----------------------")
logger.info(" - Name: {}.".format(name))
Expand All @@ -179,4 +178,4 @@ def get_lr(optimizer):
Current learning rate.
"""
for param_group in optimizer.param_groups:
return param_group['lr']
return param_group["lr"]

0 comments on commit 45e284c

Please sign in to comment.