Merge pull request #72 from vtlim/master

NAMD parser for FEP files Close #7
alchemistry · Mar 21, 2019 · 4ed712e · 4ed712e
2 parents 9857cd6 + d668819
commit 4ed712e
Show file tree

Hide file tree

Showing 5 changed files with 169 additions and 2 deletions.
diff --git a/docs/parsing.rst b/docs/parsing.rst
@@ -110,4 +110,5 @@ See the documentation for the package you are using for more details on parser u
 
     gmx
     amber
+    namd
 
diff --git a/docs/parsing/alchemlyb.parsing.namd.rst b/docs/parsing/alchemlyb.parsing.namd.rst
@@ -0,0 +1,33 @@
+NAMD parsing
+=============
+.. automodule:: alchemlyb.parsing.namd
+
+The parsers featured in this module are constructed to properly parse `NAMD`_ ``.fepout`` output files containing derivatives of the Hamiltonian and FEP (BAR) data.
+See the NAMD documentation for the `theoretical backdrop <https://www.ks.uiuc.edu/Research/namd/2.13/ug/node60.html>`_ and `implementation details <https://www.ks.uiuc.edu/Research/namd/2.13/ug/node61.html>`_.
+
+If you wish to use BAR on FEP data, be sure to provide the ``.fepout`` file from both the forward and reverse transformations.
+
+After calling :meth:`~alchemlyb.parsing.namd.extract_u_nk` on the forward and reverse work values, these dataframes can be combined into one:
+
+.. code-block:: python
+
+    # replace zeroes in initial dataframe with nan
+    u_nk_fwd.replace(0, np.nan, inplace=True)
+    # replace the nan values with the reverse dataframe --
+    # this should not overwrite any of the fwd work values 
+    u_nk_fwd[u_nk_fwd.isnull()] = u_nk_rev
+    # replace remaining nan values back to zero
+    u_nk_fwd.replace(np.nan, 0, inplace=True)
+    # sort final dataframe by `fep-lambda` (as opposed to `timestep`)
+    u_nk = u_nk_fwd.sort_index(level=u_nk_fwd.index.names[1:])
+
+The ``fep-lambda`` index states at which lambda this particular frame was sampled, whereas the columns are the evaluations of the Hamiltonian (or the potential energy U) at other lambdas (sometimes called "foreign lambdas").
+
+.. _`NAMD`: http://www.ks.uiuc.edu/Research/namd/
+
+
+API Reference
+-------------
+This submodule includes these parsing functions:
+
+.. autofunction:: alchemlyb.parsing.namd.extract_u_nk
diff --git a/src/alchemlyb/estimators/bar_.py b/src/alchemlyb/estimators/bar_.py
@@ -18,7 +18,7 @@ class BAR(BaseEstimator):
     relative_tolerance : float, optional
         Set to determine the relative tolerance convergence criteria.
 
-    method : str, optional, defualt='false-position'
+    method : str, optional, default='false-position'
         choice of method to solve BAR nonlinear equations,
         one of 'self-consistent-iteration' or 'false-position' (default: 'false-position')
 
@@ -57,7 +57,7 @@ def fit(self, u_nk):
 
         Parameters
         ----------
-        u_nk : DataFrame 
+        u_nk : DataFrame
             u_nk[n,k] is the reduced potential energy of uncorrelated
             configuration n evaluated at state k.
 

diff --git a/src/alchemlyb/parsing/namd.py b/src/alchemlyb/parsing/namd.py
@@ -0,0 +1,87 @@
+"""Parsers for extracting alchemical data from `NAMD <http://www.ks.uiuc.edu/Research/namd/>`_ output files.
+
+"""
+import pandas as pd
+import numpy as np
+from .util import anyopen
+
+def extract_u_nk(fep_file):
+    """Return reduced potentials `u_nk` from NAMD fepout file.
+
+    Parameters
+    ----------
+    fep_file : str
+        Path to fepout file to extract data from.
+
+    Returns
+    -------
+    u_nk : DataFrame
+        Potential energy for each alchemical state (k) for each frame (n).
+
+    """
+    # lists to get timesteps and work values of each window
+    win_ts = []
+    win_de = []
+
+    # create dataframe for results
+    u_nk = pd.DataFrame(columns=['timestep','fep-lambda'])
+
+    # boolean flag to parse data after equil time
+    parsing = False
+
+    # open and get data from fep file.
+    with anyopen(fep_file, 'r') as f:
+        data = f.readlines()
+
+    for line in data:
+        l = line.strip().split()
+
+        # this line marks end of window; dump data into dataframe
+        if '#Free' in l:
+
+            # convert last window's work and timestep values to np arrays
+            win_de_arr = np.asarray(win_de)
+            win_ts_arr = np.asarray(win_ts)
+
+            # extract lambda values for finished window
+            # lambda1 = sampling lambda (row), lambda2 = evaluated lambda (col)
+            lambda1 = "{0:.2f}".format(float(l[7]))
+            lambda2 = "{0:.2f}".format(float(l[8]))
+
+            # create dataframe of timestep and work values
+            # this window's data goes in row LAMBDA1 and column LAMBDA2
+            tempDF = pd.DataFrame({
+                'timestep':win_ts_arr,
+                'fep-lambda': np.full(len(win_de_arr),lambda1),
+                lambda1:0,
+                lambda2:win_de_arr})
+
+            # join the new window's df to existing df
+            u_nk = pd.concat([u_nk, tempDF])
+            u_nk.fillna(0, inplace=True)
+
+            # reset values for next window of fepout file
+            win_de = []
+            win_ts = []
+            parsing = False
+
+        # append work value from 'dE' column of fepout file
+        if parsing:
+            win_de.append(float(l[6]))
+            win_ts.append(float(l[1]))
+
+        # turn parsing on after line 'STARTING COLLECTION OF ENSEMBLE AVERAGE'
+        if '#STARTING' in l:
+            parsing = True
+
+    # create last dataframe for fep-lambda at last LAMBDA2
+    tempDF = pd.DataFrame({
+        'timestep':win_ts_arr,
+        'fep-lambda': lambda2})
+    u_nk = pd.concat([u_nk, tempDF])
+    u_nk.fillna(0, inplace=True)
+
+    u_nk.set_index(['timestep','fep-lambda'], inplace=True)
+
+    return u_nk
+
diff --git a/src/alchemlyb/tests/parsing/test_namd.py b/src/alchemlyb/tests/parsing/test_namd.py
@@ -0,0 +1,46 @@
+"""NAMD parser tests.
+
+"""
+
+from alchemlyb.parsing.namd import extract_u_nk
+from alchemtest.namd import load_tyr2ala
+from numpy.testing import assert_almost_equal
+
+
+def test_u_nk():
+    """Test that u_nk has the correct form when extracted from files.
+    """
+    dataset = load_tyr2ala()
+
+    for direction in dataset['data']:
+        for filename in dataset['data'][direction]:
+            u_nk = extract_u_nk(filename)
+
+            assert u_nk.index.names == ['timestep', 'fep-lambda']
+            if direction == 'forward':
+                assert u_nk.shape == (21021, 21)
+            elif direction == 'backward':
+                assert u_nk.shape == (21021, 21)
+
+def test_bar_namd():
+    """Test BAR calculation on NAMD data.
+    """
+    from alchemlyb.estimators import BAR
+    import numpy as np
+
+    # load data
+    dataset = load_tyr2ala()
+    u_nk1 = extract_u_nk(dataset['data']['forward'][0])
+    u_nk2 = extract_u_nk(dataset['data']['backward'][0])
+
+    # combine dataframes of fwd and rev directions
+    u_nk1.replace(0, np.nan, inplace=True)
+    u_nk1[u_nk1.isnull()] = u_nk2
+    u_nk1.replace(np.nan, 0, inplace=True)
+    u_nk = u_nk1.sort_index(level=u_nk1.index.names[1:])
+
+    # after loading BOTH fwd and rev data, do BAR calculation
+    bar = BAR()
+    bar.fit(u_nk)
+    dg = (bar.delta_f_.iloc[0].iloc[-1])
+    assert_almost_equal(dg, 6.03126982925, decimal=4)