Skip to content

Commit

Permalink
Preparing new release.
Browse files Browse the repository at this point in the history
- Most of the modules print time when they were accessed.
- All features now provide a .to_pandas() method.
- Black cleaned.
  • Loading branch information
muammar committed Jan 7, 2020
1 parent 45e284c commit a041d4c
Show file tree
Hide file tree
Showing 9 changed files with 43 additions and 8 deletions.
2 changes: 1 addition & 1 deletion ml4chem/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@


__all__ = ["Potentials"]
__version__ = "0.0.5-dev"
__version__ = "0.0.5"
3 changes: 3 additions & 0 deletions ml4chem/data/handler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from collections import OrderedDict
from ml4chem.utils import get_hash
import datetime
import logging

logger = logging.getLogger()
Expand Down Expand Up @@ -32,6 +33,8 @@ def __init__(self, images, purpose=None):
self.unique_element_symbols = None
logger.info("Data")
logger.info("====")
now = datetime.datetime.now()
logger.info("Module accessed on {}.".format(now.strftime("%Y-%m-%d %H:%M:%S")))

if self.is_valid_structure(images) is False:
logger.warning("Data structure is not compatible with ML4Chem.")
Expand Down
9 changes: 8 additions & 1 deletion ml4chem/features/autoencoders.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
import json
import logging
import pandas as pd
import torch
from collections import OrderedDict
from ml4chem.data.preprocessing import Preprocessing
from ml4chem.features.base import AtomisticFeatures
from ml4chem.utils import dynamic_import

# Starting logger object
logger = logging.getLogger()


class LatentFeatures(object):
class LatentFeatures(AtomisticFeatures):
"""Extraction of features using AutoEncoder model class.
The latent space represents a feature space from the inputs that an
Expand Down Expand Up @@ -176,6 +178,7 @@ def calculate(self, images, purpose="training", data=None, svm=False):
feature_space, svm=svm, purpose=purpose
)

self.feature_space = latent_space
return latent_space

def load_encoder(self, encoder, **kwargs):
Expand Down Expand Up @@ -217,3 +220,7 @@ def load_encoder(self, encoder, **kwargs):
autoencoder.load_state_dict(torch.load(model_path), strict=True)

return autoencoder.eval()

def to_pandas(self):
"""Convert features to pandas DataFrame"""
return pd.DataFrame.from_dict(self.feature_space, orient="index")
16 changes: 13 additions & 3 deletions ml4chem/features/cartesian.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import dask
import datetime
import logging
import os
import time
import torch
import numpy as np
import pandas as pd
from collections import OrderedDict
from ml4chem.features.base import AtomisticFeatures
from ml4chem.data.preprocessing import Preprocessing
from ml4chem.data.serialization import dump, load
from ml4chem.utils import convert_elapsed_time
Expand All @@ -13,7 +16,7 @@
logger = logging.getLogger()


class Cartesian(object):
class Cartesian(AtomisticFeatures):
"""Cartesian Coordinates
Cartesian coordinates are features, too (not very useful ones though). This
Expand Down Expand Up @@ -47,7 +50,7 @@ def __init__(
self,
scheduler="distributed",
filename="cartesians.db",
preprocessor=("Normalizer",),
preprocessor=("Normalizer", None),
save_preprocessor="ml4chem",
overwrite=True,
):
Expand Down Expand Up @@ -83,6 +86,8 @@ def calculate(self, images=None, purpose="training", data=None, svm=False):
logger.info(" ")
logger.info("Featurization")
logger.info("=============")
now = datetime.datetime.now()
logger.info("Module accessed on {}.".format(now.strftime("%Y-%m-%d %H:%M:%S")))

if os.path.isfile(self.filename) and self.overwrite is False:
logger.warning("Loading features from {}.".format(self.filename))
Expand Down Expand Up @@ -257,7 +262,12 @@ def calculate(self, images=None, purpose="training", data=None, svm=False):
else:
dump(feature_space, filename=self.filename)

return feature_space
self.feature_space = feature_space
return self.feature_space

def to_pandas(self):
"""Convert features to pandas DataFrame"""
return pd.DataFrame.from_dict(self.feature_space, orient="index")

@dask.delayed
def get_atomic_features(self, atom, svm=False):
Expand Down
3 changes: 3 additions & 0 deletions ml4chem/features/gaussian.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import dask
import datetime
import logging
import os
import time
Expand Down Expand Up @@ -186,6 +187,8 @@ def calculate(self, images=None, purpose="training", data=None, svm=False):
logger.info(" ")
logger.info("Featurization")
logger.info("=============")
now = datetime.datetime.now()
logger.info("Module accessed on {}.".format(now.strftime("%Y-%m-%d %H:%M:%S")))

# FIXME the block below should become a function.
if os.path.isfile(self.filename) and self.overwrite is False:
Expand Down
4 changes: 3 additions & 1 deletion ml4chem/models/autoencoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,9 @@ def prepare_model(
)

if self.name() == "VAE":
logger.info("Variant: {}.".format(self.variant))
logger.info(
"Variant: {}. One for all: {}.".format(self.variant, self.one_for_all)
)

try:
unique_element_symbols = data.unique_element_symbols[purpose]
Expand Down
8 changes: 6 additions & 2 deletions ml4chem/models/kernelridge.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import dask
import datetime
import logging
import time
import numpy as np
Expand Down Expand Up @@ -180,8 +181,11 @@ def prepare_model(
"""
if purpose == "training":
logger.info(" ")
logger.info("Model Training")
logger.info("==============")
logger.info("Model")
logger.info("=====")
logger.info(
"Module accessed on {}.".format(now.strftime("%Y-%m-%d %H:%M:%S"))
)
logger.info("Model name: {}.".format(self.name()))
logger.info("Kernel parameters:")
logger.info(" - Kernel function: {}.".format(self.kernel))
Expand Down
2 changes: 2 additions & 0 deletions ml4chem/models/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ def train(
logger.info(" ")
logging.info("Model Merger")
logging.info("============")
now = datetime.datetime.now()
logger.info("Module accessed on {}.".format(now.strftime("%Y-%m-%d %H:%M:%S")))
logging.info("Merging the following models:")

for model in self.models:
Expand Down
4 changes: 4 additions & 0 deletions ml4chem/models/neuralnetwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ def prepare_model(self, input_dimension, data=None, purpose="training"):
logger.info(" ")
logger.info("Model")
logger.info("=====")
now = datetime.datetime.now()
logger.info(
"Module accessed on {}.".format(now.strftime("%Y-%m-%d %H:%M:%S"))
)
logger.info("Model name: {}.".format(self.name()))
logger.info("Number of hidden-layers: {}".format(hl))
logger.info(
Expand Down

0 comments on commit a041d4c

Please sign in to comment.