From e8e5416368c4de4976052d3ac8c7ec07eca41f6b Mon Sep 17 00:00:00 2001
From: Victoria Lim <lim.t.victoria@gmail.com>
Date: Thu, 21 Feb 2019 13:48:46 -0800
Subject: [PATCH 1/3] namd fep parser

---
 src/alchemlyb/estimators/bar_.py         |   4 +-
 src/alchemlyb/parsing/namd.py            | 104 +++++++++++++++++++++++
 src/alchemlyb/tests/parsing/test_namd.py |  46 ++++++++++
 3 files changed, 152 insertions(+), 2 deletions(-)
 create mode 100644 src/alchemlyb/parsing/namd.py
 create mode 100644 src/alchemlyb/tests/parsing/test_namd.py

diff --git a/src/alchemlyb/estimators/bar_.py b/src/alchemlyb/estimators/bar_.py
index bcbe44fb..a701bdc9 100644
--- a/src/alchemlyb/estimators/bar_.py
+++ b/src/alchemlyb/estimators/bar_.py
@@ -18,7 +18,7 @@ class BAR(BaseEstimator):
     relative_tolerance : float, optional
         Set to determine the relative tolerance convergence criteria.
 
-    method : str, optional, defualt='false-position'
+    method : str, optional, default='false-position'
         choice of method to solve BAR nonlinear equations,
         one of 'self-consistent-iteration' or 'false-position' (default: 'false-position')
 
@@ -57,7 +57,7 @@ def fit(self, u_nk):
 
         Parameters
         ----------
-        u_nk : DataFrame 
+        u_nk : DataFrame
             u_nk[n,k] is the reduced potential energy of uncorrelated
             configuration n evaluated at state k.
 
diff --git a/src/alchemlyb/parsing/namd.py b/src/alchemlyb/parsing/namd.py
new file mode 100644
index 00000000..68ab12c4
--- /dev/null
+++ b/src/alchemlyb/parsing/namd.py
@@ -0,0 +1,104 @@
+"""Parsers for extracting alchemical data from `Namd <http://www.ks.uiuc.edu/Research/namd/>`_ output files.
+
+"""
+import pandas as pd
+import numpy as np
+from .util import anyopen
+
+def extract_u_nk(fep_file):
+    """Return reduced potentials `u_nk` from NAMD fepout file.
+
+    Parameters
+    ----------
+    fepout : str
+        Path to fepout file to extract data from.
+
+    Returns
+    -------
+    u_nk : DataFrame
+        Potential energy for each alchemical state (k) for each frame (n).
+
+    """
+    # lists to get timesteps and work values of each window
+    win_ts = []
+    win_de = []
+
+    # create dataframe for results
+    df = pd.DataFrame(columns=['timestep','fep-lambda'])
+
+    # boolean flag to parse data after equil time
+    parsing = False
+
+    # open and get data from fep file.
+    with anyopen(fep_file, 'r') as f:
+        data = f.readlines()
+
+    for line in data:
+        l = line.strip().split()
+
+        # this line marks end of window; dump data into dataframe
+        if '#Free' in l:
+
+            # convert last window's work and timestep values to np arrays
+            win_de_arr = np.asarray(win_de)
+            win_ts_arr = np.asarray(win_ts)
+
+            # extract lambda values for finished window
+            # lambda1 = sampling lambda (row), lambda2 = evaluated lambda (col)
+            lambda1 = "{0:.2f}".format(float(l[7]))
+            lambda2 = "{0:.2f}".format(float(l[8]))
+
+            # create dataframe of timestep and work values
+            # this window's data goes in row LAMBDA1 and column LAMBDA2
+            tempDF = pd.DataFrame({
+                'timestep':win_ts_arr,
+                'fep-lambda': np.full(len(win_de_arr),lambda1),
+                lambda1:0,
+                lambda2:win_de_arr})
+
+            # join the new window's df to existing df
+            df = pd.concat([df, tempDF])
+            df.fillna(0, inplace=True)
+
+            # reset values for next window of fepout file
+            win_de = []
+            win_ts = []
+            parsing = False
+
+        # append work value from 'dE' column of fepout file
+        if parsing:
+            win_de.append(float(l[6]))
+            win_ts.append(float(l[1]))
+
+        # turn parsing on after line 'STARTING COLLECTION OF ENSEMBLE AVERAGE'
+        if '#STARTING' in l:
+            parsing = True
+
+    # create last dataframe for fep-lambda at last LAMBDA2
+    tempDF = pd.DataFrame({
+        'timestep':win_ts_arr,
+        'fep-lambda': lambda2})
+    df = pd.concat([df, tempDF])
+    df.fillna(0, inplace=True)
+
+    df.set_index(['timestep','fep-lambda'], inplace=True)
+    return df
+
+
+
+
+def extract_dHdl(fepout):
+    """Return gradients `dH/dl` from a NAMD TI fepout file.
+
+    Parameters
+    ----------
+    xvg : str
+        Path to fepout file to extract data from.
+
+    Returns
+    -------
+    dH/dl : Series
+        dH/dl as a function of time.
+
+    """
+    pass
diff --git a/src/alchemlyb/tests/parsing/test_namd.py b/src/alchemlyb/tests/parsing/test_namd.py
new file mode 100644
index 00000000..693f67cd
--- /dev/null
+++ b/src/alchemlyb/tests/parsing/test_namd.py
@@ -0,0 +1,46 @@
+"""NAMD parser tests.
+
+"""
+
+from alchemlyb.parsing.namd import extract_u_nk
+from alchemtest.namd import load_tyr2ala
+from numpy.testing import assert_almost_equal
+
+
+def test_u_nk():
+    """Test that u_nk has the correct form when extracted from files.
+    """
+    dataset = load_tyr2ala()
+
+    for direction in dataset['data']:
+        for filename in dataset['data'][direction]:
+            u_nk = extract_u_nk(filename)
+
+            assert u_nk.index.names == ['timestep', 'fep-lambda']
+            if direction == 'forward':
+                assert u_nk.shape == (21021, 21)
+            elif direction == 'backward':
+                assert u_nk.shape == (21021, 21)
+
+def test_bar_namd():
+    """Test BAR calculation on NAMD data.
+    """
+    from alchemlyb.estimators import BAR
+    import numpy as np
+
+    # load data
+    dataset = load_tyr2ala()
+    u_nk1 = extract_u_nk(dataset['data']['forward'][0])
+    u_nk2 = extract_u_nk(dataset['data']['backward'][0])
+
+    # combine dataframes of fwd and rev directions
+    u_nk1.replace(0, np.nan, inplace=True)
+    u_nk1[u_nk1.isnull()] = u_nk2
+    u_nk1.replace(np.nan, 0, inplace=True)
+    u_nk = u_nk1.sort_index(level=u_nk1.index.names[1:])
+
+    # after loading BOTH fwd and rev data, do BAR calculation
+    bar = BAR()
+    bar.fit(u_nk)
+    dg = (bar.delta_f_.iloc[0].iloc[-1])
+    assert_almost_equal(dg, 6.03126982925, decimal=4)

From 32d3b548e0f7e442f98b9c9bd915641593e24988 Mon Sep 17 00:00:00 2001
From: Victoria Lim <lim.t.victoria@gmail.com>
Date: Wed, 20 Mar 2019 16:56:16 -0700
Subject: [PATCH 2/3] integrate code into docs

---
 docs/parsing.rst                        |  1 +
 docs/parsing/alchemlyb.parsing.namd.rst | 33 +++++++++++++++++++++++++
 2 files changed, 34 insertions(+)
 create mode 100644 docs/parsing/alchemlyb.parsing.namd.rst

diff --git a/docs/parsing.rst b/docs/parsing.rst
index bb123ed5..0fcf060c 100644
--- a/docs/parsing.rst
+++ b/docs/parsing.rst
@@ -110,4 +110,5 @@ See the documentation for the package you are using for more details on parser u
 
     gmx
     amber
+    namd
     
diff --git a/docs/parsing/alchemlyb.parsing.namd.rst b/docs/parsing/alchemlyb.parsing.namd.rst
new file mode 100644
index 00000000..9e71e519
--- /dev/null
+++ b/docs/parsing/alchemlyb.parsing.namd.rst
@@ -0,0 +1,33 @@
+NAMD parsing
+=============
+.. automodule:: alchemlyb.parsing.namd
+
+The parsers featured in this module are constructed to properly parse `NAMD`_ ``.fepout`` output files containing derivatives of the Hamiltonian and FEP (BAR) data.
+See the NAMD documentation for the `theoretical backdrop <https://www.ks.uiuc.edu/Research/namd/2.13/ug/node60.html>`_ and `implementation details <https://www.ks.uiuc.edu/Research/namd/2.13/ug/node61.html>`_.
+
+If you wish to use BAR on FEP data, be sure to provide the ``.fepout`` file from both the forward and reverse transformations.
+
+After calling :meth:`~alchemlyb.parsing.namd.extract_u_nk` on the forward and reverse work values, these dataframes can be combined into one:
+
+.. code-block:: python
+
+    # replace zeroes in initial dataframe with nan
+    u_nk_fwd.replace(0, np.nan, inplace=True)
+    # replace the nan values with the reverse dataframe --
+    # this should not overwrite any of the fwd work values 
+    u_nk_fwd[u_nk_fwd.isnull()] = u_nk_rev
+    # replace remaining nan values back to zero
+    u_nk_fwd.replace(np.nan, 0, inplace=True)
+    # sort final dataframe by `fep-lambda` (as opposed to `timestep`)
+    u_nk = u_nk_fwd.sort_index(level=u_nk_fwd.index.names[1:])
+
+The ``fep-lambda`` index states at which lambda this particular frame was sampled, whereas the columns are the evaluations of the Hamiltonian (or the potential energy U) at other lambdas (sometimes called "foreign lambdas").
+
+.. _`NAMD`: http://www.ks.uiuc.edu/Research/namd/
+
+
+API Reference
+-------------
+This submodule includes these parsing functions:
+
+.. autofunction:: alchemlyb.parsing.namd.extract_u_nk

From d668819b9ec8c574d3cfd0b7dc6abc048b2587fc Mon Sep 17 00:00:00 2001
From: Victoria Lim <lim.t.victoria@gmail.com>
Date: Wed, 20 Mar 2019 16:56:51 -0700
Subject: [PATCH 3/3] update documentation; remove placeholder function

---
 src/alchemlyb/parsing/namd.py | 35 +++++++++--------------------------
 1 file changed, 9 insertions(+), 26 deletions(-)

diff --git a/src/alchemlyb/parsing/namd.py b/src/alchemlyb/parsing/namd.py
index 68ab12c4..643e5d81 100644
--- a/src/alchemlyb/parsing/namd.py
+++ b/src/alchemlyb/parsing/namd.py
@@ -1,4 +1,4 @@
-"""Parsers for extracting alchemical data from `Namd <http://www.ks.uiuc.edu/Research/namd/>`_ output files.
+"""Parsers for extracting alchemical data from `NAMD <http://www.ks.uiuc.edu/Research/namd/>`_ output files.
 
 """
 import pandas as pd
@@ -10,7 +10,7 @@ def extract_u_nk(fep_file):
 
     Parameters
     ----------
-    fepout : str
+    fep_file : str
         Path to fepout file to extract data from.
 
     Returns
@@ -24,7 +24,7 @@ def extract_u_nk(fep_file):
     win_de = []
 
     # create dataframe for results
-    df = pd.DataFrame(columns=['timestep','fep-lambda'])
+    u_nk = pd.DataFrame(columns=['timestep','fep-lambda'])
 
     # boolean flag to parse data after equil time
     parsing = False
@@ -57,8 +57,8 @@ def extract_u_nk(fep_file):
                 lambda2:win_de_arr})
 
             # join the new window's df to existing df
-            df = pd.concat([df, tempDF])
-            df.fillna(0, inplace=True)
+            u_nk = pd.concat([u_nk, tempDF])
+            u_nk.fillna(0, inplace=True)
 
             # reset values for next window of fepout file
             win_de = []
@@ -78,27 +78,10 @@ def extract_u_nk(fep_file):
     tempDF = pd.DataFrame({
         'timestep':win_ts_arr,
         'fep-lambda': lambda2})
-    df = pd.concat([df, tempDF])
-    df.fillna(0, inplace=True)
+    u_nk = pd.concat([u_nk, tempDF])
+    u_nk.fillna(0, inplace=True)
 
-    df.set_index(['timestep','fep-lambda'], inplace=True)
-    return df
+    u_nk.set_index(['timestep','fep-lambda'], inplace=True)
 
+    return u_nk
 
-
-
-def extract_dHdl(fepout):
-    """Return gradients `dH/dl` from a NAMD TI fepout file.
-
-    Parameters
-    ----------
-    xvg : str
-        Path to fepout file to extract data from.
-
-    Returns
-    -------
-    dH/dl : Series
-        dH/dl as a function of time.
-
-    """
-    pass