Add tests

This adds a number of tests for slicing() and statistical_inefficieny(): * Test that slicing() respects upper and lower time bounds (currently passes) * Test that statistical_inefficieny() respects upper and lower time bounds when it is used without series to subsample (currently passes) * Test that statistical_inefficieny() respects upper and lower time bounds when it is used without series to subsample (currently fails) * Test that first using slicing() on the data frame, then statistical_inefficieny() without time bounds yields the same results as a single call to statistical_inefficieny() with time bounds (currently fails) Refs alchemistry#198
ptmerz · Jun 22, 2022 · 888602b · 888602b
1 parent 9153bbd
commit 888602b
Showing 1 changed file with 114 additions and 0 deletions.
diff --git a/src/alchemlyb/tests/test_preprocessing.py b/src/alchemlyb/tests/test_preprocessing.py
@@ -68,6 +68,29 @@ def slicer(self, *args, **kwargs):
     def test_basic_slicing(self, data, size):
         assert len(self.slicer(data, lower=1000, upper=34000, step=5)) == size
 
+    @pytest.mark.parametrize(('data', 'lower', 'upper'),
+                             [
+                                 (gmx_benzene_dHdl(), 1000, 34000),
+                                 (gmx_benzene_u_nk(), 1000, 34000),
+                             ])
+    def test_lower_and_upper_bound(self, data, lower, upper):
+        """
+        Test that the lower and upper time is respected
+        """
+        original_length = len(data)
+        # Check that the input data is appropriate for the test
+        assert any(data.reset_index()['time'] < lower)
+        assert any(data.reset_index()['time'] > upper)
+
+        # Slice data, and check that we don't observe times outside
+        # the prescribed range
+        sliced = self.slicer(data, lower=lower, upper=upper, step=5)
+        assert all(sliced.reset_index()['time'] >= lower)
+        assert all(sliced.reset_index()['time'] <= upper)
+
+        # Make sure we didn't change input data
+        assert len(data) == original_length
+
     @pytest.mark.parametrize('data', [gmx_benzene_dHdl(),
                                       gmx_benzene_u_nk()])
     def test_disordered_exception(self, data):
@@ -213,6 +236,97 @@ def test_raise_ValueError_for_mismatched_data(self, series):
         with pytest.raises(ValueError):
             self.slicer(data, series=series)
 
+    @pytest.mark.parametrize(('data', 'lower', 'upper'),
+                             [
+                                 (gmx_benzene_dHdl(), 1000, 34000),
+                                 (gmx_benzene_u_nk(), 1000, 34000),
+                             ])
+    def test_lower_and_upper_bound_slicer(self, data, lower, upper):
+        """
+        Test that the lower and upper time is respected when using statistical_inefficiency
+        without a series. In this case, statistical_inefficiency should behave like slicing
+        """
+        original_length = len(data)
+        # Check that the input data is appropriate for the test
+        assert any(data.reset_index()['time'] < lower)
+        assert any(data.reset_index()['time'] > upper)
+
+        # Slice data, and check that we don't observe times outside
+        # the prescribed range
+        sliced = self.slicer(data,
+                             series=None,
+                             lower=lower,
+                             upper=upper,
+                             step=5)
+        assert all(sliced.reset_index()['time'] >= lower)
+        assert all(sliced.reset_index()['time'] <= upper)
+
+        # Make sure we didn't change input data
+        assert len(data) == original_length
+
+    @pytest.mark.parametrize(('data', 'lower', 'upper'),
+                             [
+                                 (gmx_benzene_dHdl(), 1000, 34000),
+                                 (gmx_benzene_u_nk(), 1000, 34000),
+                             ])
+    def test_lower_and_upper_bound_inefficiency(self, data, lower, upper):
+        """
+        Test that the lower and upper time is respected when using statistical_inefficiency
+        with a series. In this case, statistical_inefficiency should slice the series, then
+        subsample the data frame.
+        """
+        original_length = len(data)
+        # Check that the input data is appropriate for the test
+        assert any(data.reset_index()['time'] < lower)
+        assert any(data.reset_index()['time'] > upper)
+
+        # Subsample data, and check that we don't observe times outside
+        # the prescribed range
+        sliced = self.slicer(data,
+                             series=data.sum(axis=1),
+                             lower=lower,
+                             upper=upper,
+                             step=5)
+        assert all(sliced.reset_index()['time'] >= lower)
+        assert all(sliced.reset_index()['time'] <= upper)
+        # Make sure we didn't change input data
+        assert len(data) == original_length
+
+    @pytest.mark.parametrize(('data', 'lower', 'upper', 'conservative'),
+                             [
+                                 (gmx_benzene_dHdl(), 1000, 34000, True),
+                                 (gmx_benzene_u_nk(), 1000, 34000, True),
+                                 (gmx_benzene_dHdl(), 1000, 34000, False),
+                                 (gmx_benzene_u_nk(), 1000, 34000, False),
+                             ])
+    def test_slicing_inefficiency_equivalence(self, data, lower, upper, conservative):
+        """
+        Test that first slicing the data frame, then subsampling is equivalent to
+        subsampling with lower / upper bounds set
+        """
+        original_length = len(data)
+        # Check that the input data is appropriate for the test
+        assert any(data.reset_index()['time'] < lower)
+        assert any(data.reset_index()['time'] > upper)
+
+        # Slice dataframe, then subsample it based on the sum of its components
+        sliced_data = slicing(data, lower=lower, upper=upper)
+        subsampled_sliced_data = self.slicer(sliced_data,
+                                             series=sliced_data.sum(axis=1),
+                                             conservative=conservative)
+
+        # Make sure we didn't change input data
+        assert len(data) == original_length
+        # Subsample the dataframe based on the sum of its components while
+        # also specifying the slicing range
+        subsampled_data = self.slicer(data,
+                                      series=data.sum(axis=1),
+                                      lower=lower,
+                                      upper=upper,
+                                      conservative=conservative)
+
+        assert (subsampled_sliced_data == subsampled_data).all(axis=None)
+
 
 class TestEquilibriumDetection(TestSlicing, CorrelatedPreprocessors):