-
Notifications
You must be signed in to change notification settings - Fork 51
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #72 from vtlim/master
NAMD parser for FEP files Close #7
- Loading branch information
Showing
5 changed files
with
169 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -110,4 +110,5 @@ See the documentation for the package you are using for more details on parser u | |
|
||
gmx | ||
amber | ||
namd | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
NAMD parsing | ||
============= | ||
.. automodule:: alchemlyb.parsing.namd | ||
|
||
The parsers featured in this module are constructed to properly parse `NAMD`_ ``.fepout`` output files containing derivatives of the Hamiltonian and FEP (BAR) data. | ||
See the NAMD documentation for the `theoretical backdrop <https://www.ks.uiuc.edu/Research/namd/2.13/ug/node60.html>`_ and `implementation details <https://www.ks.uiuc.edu/Research/namd/2.13/ug/node61.html>`_. | ||
|
||
If you wish to use BAR on FEP data, be sure to provide the ``.fepout`` file from both the forward and reverse transformations. | ||
|
||
After calling :meth:`~alchemlyb.parsing.namd.extract_u_nk` on the forward and reverse work values, these dataframes can be combined into one: | ||
|
||
.. code-block:: python | ||
# replace zeroes in initial dataframe with nan | ||
u_nk_fwd.replace(0, np.nan, inplace=True) | ||
# replace the nan values with the reverse dataframe -- | ||
# this should not overwrite any of the fwd work values | ||
u_nk_fwd[u_nk_fwd.isnull()] = u_nk_rev | ||
# replace remaining nan values back to zero | ||
u_nk_fwd.replace(np.nan, 0, inplace=True) | ||
# sort final dataframe by `fep-lambda` (as opposed to `timestep`) | ||
u_nk = u_nk_fwd.sort_index(level=u_nk_fwd.index.names[1:]) | ||
The ``fep-lambda`` index states at which lambda this particular frame was sampled, whereas the columns are the evaluations of the Hamiltonian (or the potential energy U) at other lambdas (sometimes called "foreign lambdas"). | ||
|
||
.. _`NAMD`: http://www.ks.uiuc.edu/Research/namd/ | ||
|
||
|
||
API Reference | ||
------------- | ||
This submodule includes these parsing functions: | ||
|
||
.. autofunction:: alchemlyb.parsing.namd.extract_u_nk |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
"""Parsers for extracting alchemical data from `NAMD <http://www.ks.uiuc.edu/Research/namd/>`_ output files. | ||
""" | ||
import pandas as pd | ||
import numpy as np | ||
from .util import anyopen | ||
|
||
def extract_u_nk(fep_file): | ||
"""Return reduced potentials `u_nk` from NAMD fepout file. | ||
Parameters | ||
---------- | ||
fep_file : str | ||
Path to fepout file to extract data from. | ||
Returns | ||
------- | ||
u_nk : DataFrame | ||
Potential energy for each alchemical state (k) for each frame (n). | ||
""" | ||
# lists to get timesteps and work values of each window | ||
win_ts = [] | ||
win_de = [] | ||
|
||
# create dataframe for results | ||
u_nk = pd.DataFrame(columns=['timestep','fep-lambda']) | ||
|
||
# boolean flag to parse data after equil time | ||
parsing = False | ||
|
||
# open and get data from fep file. | ||
with anyopen(fep_file, 'r') as f: | ||
data = f.readlines() | ||
|
||
for line in data: | ||
l = line.strip().split() | ||
|
||
# this line marks end of window; dump data into dataframe | ||
if '#Free' in l: | ||
|
||
# convert last window's work and timestep values to np arrays | ||
win_de_arr = np.asarray(win_de) | ||
win_ts_arr = np.asarray(win_ts) | ||
|
||
# extract lambda values for finished window | ||
# lambda1 = sampling lambda (row), lambda2 = evaluated lambda (col) | ||
lambda1 = "{0:.2f}".format(float(l[7])) | ||
lambda2 = "{0:.2f}".format(float(l[8])) | ||
|
||
# create dataframe of timestep and work values | ||
# this window's data goes in row LAMBDA1 and column LAMBDA2 | ||
tempDF = pd.DataFrame({ | ||
'timestep':win_ts_arr, | ||
'fep-lambda': np.full(len(win_de_arr),lambda1), | ||
lambda1:0, | ||
lambda2:win_de_arr}) | ||
|
||
# join the new window's df to existing df | ||
u_nk = pd.concat([u_nk, tempDF]) | ||
u_nk.fillna(0, inplace=True) | ||
|
||
# reset values for next window of fepout file | ||
win_de = [] | ||
win_ts = [] | ||
parsing = False | ||
|
||
# append work value from 'dE' column of fepout file | ||
if parsing: | ||
win_de.append(float(l[6])) | ||
win_ts.append(float(l[1])) | ||
|
||
# turn parsing on after line 'STARTING COLLECTION OF ENSEMBLE AVERAGE' | ||
if '#STARTING' in l: | ||
parsing = True | ||
|
||
# create last dataframe for fep-lambda at last LAMBDA2 | ||
tempDF = pd.DataFrame({ | ||
'timestep':win_ts_arr, | ||
'fep-lambda': lambda2}) | ||
u_nk = pd.concat([u_nk, tempDF]) | ||
u_nk.fillna(0, inplace=True) | ||
|
||
u_nk.set_index(['timestep','fep-lambda'], inplace=True) | ||
|
||
return u_nk | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
"""NAMD parser tests. | ||
""" | ||
|
||
from alchemlyb.parsing.namd import extract_u_nk | ||
from alchemtest.namd import load_tyr2ala | ||
from numpy.testing import assert_almost_equal | ||
|
||
|
||
def test_u_nk(): | ||
"""Test that u_nk has the correct form when extracted from files. | ||
""" | ||
dataset = load_tyr2ala() | ||
|
||
for direction in dataset['data']: | ||
for filename in dataset['data'][direction]: | ||
u_nk = extract_u_nk(filename) | ||
|
||
assert u_nk.index.names == ['timestep', 'fep-lambda'] | ||
if direction == 'forward': | ||
assert u_nk.shape == (21021, 21) | ||
elif direction == 'backward': | ||
assert u_nk.shape == (21021, 21) | ||
|
||
def test_bar_namd(): | ||
"""Test BAR calculation on NAMD data. | ||
""" | ||
from alchemlyb.estimators import BAR | ||
import numpy as np | ||
|
||
# load data | ||
dataset = load_tyr2ala() | ||
u_nk1 = extract_u_nk(dataset['data']['forward'][0]) | ||
u_nk2 = extract_u_nk(dataset['data']['backward'][0]) | ||
|
||
# combine dataframes of fwd and rev directions | ||
u_nk1.replace(0, np.nan, inplace=True) | ||
u_nk1[u_nk1.isnull()] = u_nk2 | ||
u_nk1.replace(np.nan, 0, inplace=True) | ||
u_nk = u_nk1.sort_index(level=u_nk1.index.names[1:]) | ||
|
||
# after loading BOTH fwd and rev data, do BAR calculation | ||
bar = BAR() | ||
bar.fit(u_nk) | ||
dg = (bar.delta_f_.iloc[0].iloc[-1]) | ||
assert_almost_equal(dg, 6.03126982925, decimal=4) |