Skip to content

Commit

Permalink
Training progress is being printed to logging file
Browse files Browse the repository at this point in the history
- This is another step forward #10.
- Beautification of code formatting using black.
  • Loading branch information
muammar committed Jun 20, 2019
1 parent 420dad7 commit 1f03c61
Show file tree
Hide file tree
Showing 14 changed files with 297 additions and 248 deletions.
85 changes: 43 additions & 42 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,30 @@
import os
import sys
from unittest.mock import MagicMock
sys.path.insert(0, os.path.abspath('../../'))

#class Mock(MagicMock):
sys.path.insert(0, os.path.abspath("../../"))

# class Mock(MagicMock):
# @classmethod
# def __getattr__(cls, name):
# return MagicMock()
#
#MOCK_MODULES = ['ase', 'ase.calculators', 'ase.calculators.calculator',
# MOCK_MODULES = ['ase', 'ase.calculators', 'ase.calculators.calculator',
# 'ase.neighborlist' 'torch']
#
#sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
# sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)


# -- Project information -----------------------------------------------------

project = 'ML4Chem'
copyright = '2019, Muammar El Khatib'
author = 'Muammar El Khatib'
project = "ML4Chem"
copyright = "2019, Muammar El Khatib"
author = "Muammar El Khatib"

# The short X.Y version
version = ''
version = ""
# The full version, including alpha/beta/rc tags
release = ''
release = ""


# -- General configuration ---------------------------------------------------
Expand All @@ -50,30 +51,30 @@
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.napoleon',
'sphinx.ext.doctest',
'sphinx.ext.intersphinx',
'sphinx.ext.todo',
'sphinx.ext.coverage',
'sphinx.ext.mathjax',
'sphinx.ext.ifconfig',
'sphinx.ext.viewcode',
"sphinx.ext.autodoc",
"sphinx.ext.napoleon",
"sphinx.ext.doctest",
"sphinx.ext.intersphinx",
"sphinx.ext.todo",
"sphinx.ext.coverage",
"sphinx.ext.mathjax",
"sphinx.ext.ifconfig",
"sphinx.ext.viewcode",
#'recommonmark',
'm2r',
'sphinx.ext.githubpages'
"m2r",
"sphinx.ext.githubpages",
]

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
templates_path = ["_templates"]

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
source_suffix = ['.rst', '.md']
source_suffix = [".rst", ".md"]

# The master toctree document.
master_doc = 'index'
master_doc = "index"

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand All @@ -88,15 +89,15 @@
exclude_patterns = []

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'tango'
pygments_style = "tango"


# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
html_theme = "sphinx_rtd_theme"

# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
Expand All @@ -107,7 +108,7 @@
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_static_path = ["_static"]

# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
Expand All @@ -118,14 +119,15 @@
# 'searchbox.html']``.
#
# html_sidebars = {}
html_sidebars = {'**': ['globaltoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html']}

html_sidebars = {
"**": ["globaltoc.html", "relations.html", "sourcelink.html", "searchbox.html"]
}


# -- Options for HTMLHelp output ---------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = 'ML4Chemdoc'
htmlhelp_basename = "ML4Chemdoc"


# -- Options for LaTeX output ------------------------------------------------
Expand All @@ -134,15 +136,12 @@
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',

# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',

# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',

# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
Expand All @@ -152,19 +151,15 @@
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'ML4Chem.tex', 'ML4Chem Documentation',
'Muammar El Khatib', 'manual'),
(master_doc, "ML4Chem.tex", "ML4Chem Documentation", "Muammar El Khatib", "manual")
]


# -- Options for manual page output ------------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'ml4chem', 'ML4Chem Documentation',
[author], 1)
]
man_pages = [(master_doc, "ml4chem", "ML4Chem Documentation", [author], 1)]


# -- Options for Texinfo output ----------------------------------------------
Expand All @@ -173,9 +168,15 @@
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'ML4Chem', 'ML4Chem Documentation',
author, 'ML4Chem', 'One line description of project.',
'Miscellaneous'),
(
master_doc,
"ML4Chem",
"ML4Chem Documentation",
author,
"ML4Chem",
"One line description of project.",
"Miscellaneous",
)
]


Expand All @@ -194,4 +195,4 @@
# epub_uid = ''

# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']
epub_exclude_files = ["search.html"]
57 changes: 34 additions & 23 deletions examples/autoencoder/cu_inference.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import sys
sys.path.append('../../')

sys.path.append("../../")
from ase.io import Trajectory
from dask.distributed import Client, LocalCluster
from ml4chem.data.handler import DataSet
Expand All @@ -13,39 +14,49 @@

def autoencode():
# Load the images with ASE
latent_space = load('cu_training.latent')
print('Latent space from file')
latent_space = load("cu_training.latent")
print("Latent space from file")
print(latent_space)

images = Trajectory('cu_training.traj')
purpose = 'training'
images = Trajectory("cu_training.traj")
purpose = "training"

# Arguments for fingerprinting the images
normalized = True

data_handler = DataSet(images, purpose=purpose)
images, energies = data_handler.get_images(purpose=purpose)

fingerprints = ('Gaussian', {'cutoff': 6.5, 'normalized': normalized,
'save_preprocessor': 'inference.scaler'})
encoder = {'model': 'model.ml4c',
'params': 'model.params'}
preprocessor = ('MinMaxScaler', {'feature_range': (-1, 1)})

fingerprints = LatentFeatures(features=fingerprints, encoder=encoder,
preprocessor=preprocessor,
save_preprocessor='latent_space_min_max.scaler')
fingerprints = fingerprints.calculate_features(images, purpose=purpose,
data=data_handler,
svm=False)

print('Latent space from LatentFeatures class')
fingerprints = (
"Gaussian",
{
"cutoff": 6.5,
"normalized": normalized,
"save_preprocessor": "inference.scaler",
},
)
encoder = {"model": "model.ml4c", "params": "model.params"}
preprocessor = ("MinMaxScaler", {"feature_range": (-1, 1)})

fingerprints = LatentFeatures(
features=fingerprints,
encoder=encoder,
preprocessor=preprocessor,
save_preprocessor="latent_space_min_max.scaler",
)
fingerprints = fingerprints.calculate_features(
images, purpose=purpose, data=data_handler, svm=False
)

print("Latent space from LatentFeatures class")
print(fingerprints)

if __name__ == '__main__':
#logging.basicConfig(filename='cu_inference.log', level=logging.INFO,
logging.basicConfig(level=logging.INFO,
format='%(filename)s:%(lineno)s %(levelname)s:%(message)s')

if __name__ == "__main__":
# logging.basicConfig(filename='cu_inference.log', level=logging.INFO,
logging.basicConfig(
level=logging.INFO, format="%(filename)s:%(lineno)s %(levelname)s:%(message)s"
)
cluster = LocalCluster()
client = Client(cluster, asyncronous=True)
autoencode()
62 changes: 36 additions & 26 deletions examples/autoencoder/cu_training.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import sys
sys.path.append('../../')

sys.path.append("../../")
from ase.io import Trajectory
from dask.distributed import Client, LocalCluster
from ml4chem import Potentials
Expand All @@ -12,8 +13,8 @@

def autoencode():
# Load the images with ASE
images = Trajectory('cu_training.traj')
purpose = 'training'
images = Trajectory("cu_training.traj")
purpose = "training"

# Arguments for fingerprinting the images
normalized = True
Expand All @@ -27,55 +28,64 @@ def autoencode():
"""
Let's create the targets of the model
"""
fingerprints = Gaussian(cutoff=6.5, normalized=normalized,
save_preprocessor='cu_training.scaler')
fingerprints = Gaussian(
cutoff=6.5, normalized=normalized, save_preprocessor="cu_training.scaler"
)

targets = fingerprints.calculate_features(training_set,
data=data_handler,
purpose=purpose,
svm=False)
targets = fingerprints.calculate_features(
training_set, data=data_handler, purpose=purpose, svm=False
)
output_dimension = len(list(targets.values())[0][0][1])

"""
Building AutoEncoder
"""
# Arguments for building the model
hiddenlayers = {'encoder': (20, 10, 4),
'decoder': (4, 10, 20)}
activation = 'tanh'
autoencoder = AutoEncoder(hiddenlayers=hiddenlayers,
activation=activation)
hiddenlayers = {"encoder": (20, 10, 4), "decoder": (4, 10, 20)}
activation = "tanh"
autoencoder = AutoEncoder(hiddenlayers=hiddenlayers, activation=activation)

data_handler.get_unique_element_symbols(images, purpose=purpose)
autoencoder.prepare_model(output_dimension, output_dimension,
data=data_handler)
autoencoder.prepare_model(output_dimension, output_dimension, data=data_handler)
# Arguments for training the potential
convergence = {'rmse': 5e-2}
convergence = {"rmse": 5e-2}
epochs = 2000
lr = 1e-0
weight_decay = 0
regularization = None

opt_kwars = {'lr': lr}
optimizer = ('lbfgs', opt_kwars)
opt_kwars = {"lr": lr}
optimizer = ("lbfgs", opt_kwars)

inputs = targets
train(inputs, targets, model=autoencoder, data=data_handler,
optimizer=optimizer, regularization=regularization, epochs=epochs,
convergence=convergence, lossfxn=None, device='cpu')
train(
inputs,
targets,
model=autoencoder,
data=data_handler,
optimizer=optimizer,
regularization=regularization,
epochs=epochs,
convergence=convergence,
lossfxn=None,
device="cpu",
)

latent_space = autoencoder.get_latent_space(targets, svm=True)

dump(latent_space, filename='cu_training.latent')
dump(latent_space, filename="cu_training.latent")

Potentials.save(autoencoder)

return latent_space, energy_targets, data_handler


if __name__ == '__main__':
logging.basicConfig(filename='cu_training.log', level=logging.INFO,
format='%(filename)s:%(lineno)s %(levelname)s:%(message)s')
if __name__ == "__main__":
logging.basicConfig(
filename="cu_training.log",
level=logging.INFO,
format="%(filename)s:%(lineno)s %(levelname)s:%(message)s",
)
cluster = LocalCluster()
client = Client(cluster, asyncronous=True)
inputs, outputs, data_handler = autoencode()
Loading

0 comments on commit 1f03c61

Please sign in to comment.