Merge pull request #18 from LRydin/dev

Release v0.4!
LRydin · Jan 19, 2021 · 56ab110 · 56ab110
2 parents 2f6dc91 + 8f471a8
commit 56ab110
Show file tree

Hide file tree

Showing 24 changed files with 314 additions and 72 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,10 +1,17 @@
 language: python
 python:
-#  - "3.4"
-#  - "3.5"
+  - "3.4"
+  - "3.5"
   - "3.6"
   - "3.7"
-#  - "nightly"
+  - "3.8"
+  - "3.9"
+  - "nightly"
+
+arch:
+  - amd64
+  - arm64
+
 # command to install dependencies
 
 before_install:
@@ -14,9 +21,25 @@ install:
   - pip install coverage
   - pip install -r requirements.txt
 
-# command to run tests
+jobs:
+  include:
+    - stage: "Extra packages"
+      name: "Testing extra packages"
+      python: 3.6
+      install:
+        - pip install coverage
+        - pip install -r requirements.txt
+        - pip install EMD-signal
+      script:
+        - coverage run -m pytest test/test_EMD.py
+        - coverage run -m pytest test/test_MFDFA_extras.py
+  allow_failures:
+    - python: "nightly"
+
+
 script:
-  coverage run -m pytest test/
+  - coverage run -m pytest test/test_fgn.py
+  - coverage run -m pytest test/test_MFDFA.py
 
 after_success:
   bash <(curl -s https://codecov.io/bash)

diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2019 Rydin
+Copyright (c) 2019-2021 Rydin
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/MFDFA/MFDFA.py b/MFDFA/MFDFA.py
@@ -11,9 +11,9 @@
 
 def MFDFA(timeseries: np.ndarray, lag: np.ndarray=None, order: int=1,
           q: np.ndarray=2, stat: bool=False, modified: bool=False,
-          extensions: dict={"EMD":False, "eDFA":False}) -> np.ndarray:
+          extensions: dict={'EMD':False, 'eDFA':False, 'window':False}) -> np.ndarray:
     """
-    Multi-Fractal Detrended Fluctuation Analysis of timeseries. MFDFA generates
+    Multifractal Detrended Fluctuation Analysis of timeseries. MFDFA generates
     a fluctuation function F²(q,s), with s the segment size and q the q-powers,
     Take a timeseries Xₜ, find the integral Yₜ = cumsum(Xₜ), and segment the
     timeseries into Nₛ segments of size s.
@@ -86,6 +86,11 @@ def MFDFA(timeseries: np.ndarray, lag: np.ndarray=None, order: int=1,
      - ``eDFA``: bool (default ``False``)
         A method to evaluate the strength of multifractality. Calls function
         `eDFA()`.
+     - ``window``: bool (default ``False``)
+        A moving window for smaller timeseries. Set ``window`` as int > 0 with
+        the number of steps the window shoud move over the data. ``window = 1``
+        will move window by ``1`` step. Since the timeseries is segmented at
+        each lag lenght, any window choise > lag is only segmented once.
 
     Returns
     -------
@@ -117,9 +122,19 @@ def MFDFA(timeseries: np.ndarray, lag: np.ndarray=None, order: int=1,
         assert timeseries.shape[1] == 1, "Timeseries needs to be 1 dimensional"
 
     timeseries = timeseries.reshape(-1,1)
+
     # Size of array
     N = timeseries.shape[0]
 
+    # Assert if window is given, that it is int and > 0
+    window = False
+    if 'window' in extensions:
+        if extensions['window'] != False:
+            window = extensions['window']
+            assert isinstance(window, int), "'window' is not integer"
+            assert window > 0, "'window' is not > 0"
+
+
     # Fractal powers as floats
     q = np.asarray_chkfinite(q, dtype = float)
 
@@ -129,7 +144,7 @@ def MFDFA(timeseries: np.ndarray, lag: np.ndarray=None, order: int=1,
     # Reshape q to perform np.float_power
     q = q.reshape(-1, 1)
 
-    # x-axis
+    # x-axis needed for polyfit
     X = np.linspace(1, lag.max(), lag.max())
 
     # "Profile" of the series
@@ -145,7 +160,7 @@ def MFDFA(timeseries: np.ndarray, lag: np.ndarray=None, order: int=1,
     if stat == True:
         f_std = np.empty((0, q.size))
 
-
+    # Check which extensions are requested
     if ('eDFA', True) in extensions.items():
         f_eDFA = np.empty((0, q.size))
 
@@ -155,7 +170,7 @@ def MFDFA(timeseries: np.ndarray, lag: np.ndarray=None, order: int=1,
             assert isinstance(extensions['EMD'], list), 'list IMFs to detrend'
 
             # Detrending of the timeseries using EMD with given IMFs in a list
-            Y = detrendedtimeseries(Y, extensions["EMD"])
+            Y = detrendedtimeseries(Y, extensions['EMD'])
 
             # Force order = 0 since the data is detrended with EMD, i.e., no
             # need to do polynomial fittings anymore
@@ -164,32 +179,63 @@ def MFDFA(timeseries: np.ndarray, lag: np.ndarray=None, order: int=1,
     # Loop over elements in lag
     # Notice that given one has to split the timeseries into different segments
     # of length lag(), some elements at the end of the array might be
-    # missing. The same procedure is run in reverse, where elements at the
-    # begining of the series are discarded instead.
+    # missing. The same procedure is run in reverse—if not using an moving
+    # window—where elements at the begining of the series are discarded instead.
     for i in lag:
 
-        # Reshape into (N/lag, lag)
-        Y_ = Y[:N - N % i].reshape((N - N % i) // i, i)
-        Y_r = Y[N % i:].reshape((N - N % i) // i, i)
-
-        # If order = 0 one gets simply a Fluctuation Analysis (FA), or one is
-        # using the EMD setting, and the data is detrended.
-        if order == 0:
-            # Skip detrending
-            F = np.append( np.var(Y_, axis=1), np.var(Y_r, axis=1) )
-
-        else:
-            # Perform a polynomial fit to each segments
-            p = polyfit(X[:i], Y_.T, order)
-            p_r = polyfit(X[:i], Y_r.T, order)
-
-            # Subtract the trend from the fit and calculate the variance
-            F = np.append(
-                np.var(Y_ - polyval(X[:i], p), axis = 1),
-                np.var(Y_r - polyval(X[:i], p_r), axis = 1)
-            )
-
-        # Caculate the Multi-Fractal (Non)-Detrended Fluctuation Analysis
+        # Standard option
+        if window == False:
+            # Reshape into (N/lag, lag)
+            Y_ = Y[:N - N % i].reshape((N - N % i) // i, i)
+            Y_r = Y[N % i:].reshape((N - N % i) // i, i)
+
+            # If order = 0 one gets simply Fluctuation Analysis (FA), or if one is
+            # using the EMD setting the data is detrended and no polynomial fitting
+            # is needed.
+            if order == 0:
+                # Skip detrending
+                F = np.append(np.var(Y_, axis=1), np.var(Y_r, axis=1))
+
+            else:
+                # Perform a polynomial fit to each segments
+                p = polyfit(X[:i], Y_.T, order)
+                p_r = polyfit(X[:i], Y_r.T, order)
+
+                # Subtract the trend from the fit and calculate the variance
+                F = np.append(
+                    np.var(Y_ - polyval(X[:i], p), axis = 1),
+                    np.var(Y_r - polyval(X[:i], p_r), axis = 1)
+                )
+
+        # For short timeseries, using a moving window instead of segmenting the
+        # timeseries. Notice the number of operations is considerably larger
+        # depending on the moving window displacement.
+        if window != False:
+
+            F = np.empty(0)
+            for j in range(0, i-1, window):
+
+                # subtract j points as the moving window shortens the data
+                N_0 = N - j
+
+                # Reshape into (N_0/lag, lag)
+                Y_ = Y[j:N - N_0 % i].reshape((N - N_0 % i) // i, i)
+
+                # If order = 0 one gets simply Fluctuation Analysis (FA), or if one
+                # is using the EMD setting the data is detrended and no polynomial
+                # fitting is needed.
+                if order == 0:
+                    # Skip detrending
+                    F = np.append(F, np.var(Y_, axis=1))
+
+                else:
+                    # Perform a polynomial fit to each segments
+                    p = polyfit(X[:i], Y_.T, order)
+
+                    # Subtract the trend from the fit and calculate the variance
+                    F = np.append(F, np.var(Y_ - polyval(X[:i], p), axis = 1))
+
+        # Caculate the Multifractal (Non)-Detrended Fluctuation Analysis
         f = np.append(f,
               np.float_power(
                 np.mean(np.float_power(F, q / 2), axis = 1),

diff --git a/MFDFA/emddetrender.py b/MFDFA/emddetrender.py
@@ -2,9 +2,11 @@
 # Empirical Mode Decomposition algorithm', https://github.com/laszukdawid/PyEMD,
 # licenced under the Apache 2.0 Licencing.
 
-from PyEMD import EMD
+# from PyEMD import EMD
 import numpy as np
 
+# Import of PyEMD is called inside the function
+
 def detrendedtimeseries(timeseries: np.ndarray, modes: list):
     """
     The function calculates the Intrinsic Mode Functions (IMFs) of a given
@@ -50,6 +52,7 @@ def detrendedtimeseries(timeseries: np.ndarray, modes: list):
 
     return detrendedTimeseries
 
+
 def IMFs(timeseries: np.ndarray):
     """
     Extract the Intrinsic Mode Functions (IMFs) of a given timeseries.
@@ -62,14 +65,19 @@ def IMFs(timeseries: np.ndarray):
     Notes
     -----
     .. versionadded:: 0.3
-    
+
     Returns
     -------
     IMFs: np.ndarray
         The Intrinsic Mode Functions (IMFs) of the Empirical Mode Decomposition.
         These are of shape ``(..., timeseries.size)``, with the first dimension
         varying depending on the data. Last entry is the residuals.
     """
+
+    # Check if EMD-signal is installed
+    missing_library()
+    from PyEMD import EMD
+
     # Initiate pyEMD's EMD function
     emd = EMD()
 
@@ -78,3 +86,13 @@ def IMFs(timeseries: np.ndarray):
 
     # Returns the IMFs as a (..., timeseries.size) numpy array.
     return IMFs
+
+
+def missing_library():
+    try:
+        import PyEMD.EMD as _EMD
+    except ImportError:
+        raise ImportError(("PyEMD is required to do Empirical Mode "
+                           "decomposition. Please install PyEMD with 'pip "
+                           "install EMD-signal'.")
+                         )
diff --git a/README.md b/README.md
@@ -7,17 +7,12 @@
 [![Documentation Status](https://readthedocs.org/projects/mfdfa/badge/?version=latest)](https://mfdfa.readthedocs.io/en/latest/?badge=latest)
 
 
-### Preparation for version 0.4
-I have been deliberating over the presence of the Empirical Mode Decomposition [EMD](https://en.wikipedia.org/wiki/Hilbert%E2%80%93Huang_transform) in v0.3, i.e., the [PyEMD](https://github.com/laszukdawid/PyEMD) by [Dawid Laszuk](https://github.com/laszukdawid).
-The EMD package is phenomenal, but comprises an overhead of ~700 kb (and usually some additional packages).
-My plan now is to create a separate repository (likely to be named MFDFA-EMD) which adds the EMD functionality included in v0.3.
-
-The cause is simple: the original MFDFA is meant to be lightweight, i.e., depend only of `numpy`, so that one day maybe someone can make a parallel-isable version of it.
-
 # MFDFA
 Multifractal Detrended Fluctuation Analysis `MFDFA` is a model-independent method to uncover the self-similarity of a stochastic process or auto-regressive model.
 `DFA` was first developed by Peng *et al.*<sup>1</sup> and later extended to study multifractality `MFDFA` by Kandelhardt *et al.*<sup>2</sup>.
 
+In the latest release there is as well added a moving window system, especially useful for short timeseries, a recent extension to DFA called *extended DFA*, and the extra feature of Empirical Mode Decomposition as detrending method.
+
 # Installation
 To install MFDFA you can simply use
 
@@ -36,6 +31,8 @@ There is an added library `fgn` to generate fractional Gaussian noise.
 # Employing the `MFDFA` library
 
 ## An exemplary one-dimensional fractional Ornstein–Uhlenbeck process
+The rationale here is simple: Numerically integrate a stochastic process in which we know exactly the fractal properties, characterised by the Hurst coefficient, and recover this with MFDFA.
+We will use a fractional Ornstein–Uhlenbeck, a commonly employ stochastic process with mean-reverting properties.
 For a more detailed explanation on how to integrate an Ornstein–Uhlenbeck process, see the [kramersmoyal's package](https://github.com/LRydin/KramersMoyal#a-one-dimensional-stochastic-process).
 You can also follow the [fOU.ipynb](/examples/fOU.ipynb)
 
@@ -77,7 +74,7 @@ To now utilise the `MFDFA`, we take this exemplary process and run the (multifra
 ```python
 # Select a band of lags, which usually ranges from
 # very small segments of data, to very long ones, as
-lag = np.logspace(0.7, 4, 30).astype(int)
+lag = np.unique(np.logspace(0.5, 3, 100).astype(int))
 # Notice these must be ints, since these will segment
 # the data into chucks of lag size
 
@@ -105,15 +102,14 @@ np.polyfit(np.log(lag[:15]), np.log(dfa[:15]),1)[0]
 # Now what you should obtain is: slope = H + 1
 ```
 
-<img src="/other/fig1.png" title="MFDFA of a fractional Ornstein–Uhlenbeck process" height="250"/>
+<img src="docs/_static/fig1.png" title="MFDFA of a fractional Ornstein–Uhlenbeck process" height="250"/>
 
 
 ## Uncovering multifractality in stochastic processes
-`MFDFA`, as an extension to `DFA`, was developed to uncover if a given process is mono or multi fractal.
-Let `Xₜ` be a multi fractal stochastic process. This mean `Xₜ` scales with some function alpha(t) as `Xcₜ = |c|alpha(t) Xₜ`.
-With the help of taking different powers variations of the `DFA`, one we can distinguish monofractal and multifractal processes.
+You can find more about multifractality in the [documentation](https://mfdfa.readthedocs.io/en/latest/1dLevy.html).
 
 # Changelog
+- Version 0.4 - EMD is now optional. Restored back compatibility: py3.3 to py3.9. For EMD py3.6 or larger is needed.
 - Version 0.3 - Adding EMD detrending. First release. PyPI code.
 - Version 0.2 - Removed experimental features. Added documentation
 - Version 0.1 - Uploaded initial working code
@@ -128,11 +124,11 @@ This package abides to a [Conduct of Fairness](contributions.md).
 This library has been submitted for publication at [The Journal of Open Source Software](https://joss.theoj.org/) in December 2019. It was rejected. The review process can be found [here on GitHub](https://github.com/openjournals/joss-reviews/issues/1966). The plan is to extend the library and find another publisher.
 
 ### History
-This project was started in 2019 at the [Faculty of Mathematics, University of Oslo](https://www.mn.uio.no/math/english/research/groups/risk-stochastics/) in the Risk and Stochastics section by Leonardo Rydin Gorjão and is supported by Dirk Witthaut and the [Institute of Energy and Climate Research Systems Analysis and Technology Evaluation](https://www.fz-juelich.de/iek/iek-ste/EN/Home/home_node.html). I'm very thankful to all the folk in Section 3 in the Faculty of Mathematics, University of Oslo, for helping me getting around the world of stochastic processes: Dennis, Anton, Michele, Fabian, Marc, Prof. Benth and Prof. di Nunno. In April 2020 Galib Hassan joined in extending `MFDFA`, particularly the implementation of EMD.
+This project was started in 2019 at the [Faculty of Mathematics, University of Oslo](https://www.mn.uio.no/math/english/research/groups/risk-stochastics/) in the Risk and Stochastics section by Leonardo Rydin Gorjão and is supported by Dirk Witthaut and the [Institute of Energy and Climate Research Systems Analysis and Technology Evaluation](https://www.fz-juelich.de/iek/iek-ste/EN/Home/home_node.html). I'm very thankful to all the folk in Section 3 in the Faculty of Mathematics, University of Oslo, for helping me getting around the world of stochastic processes: Dennis, Anton, Michele, Fabian, Marc, Prof. Benth and Prof. di Nunno. In April 2020 Galib Hassan joined in extending `MFDFA`, particularly the implementation of `EMD`.
 
 
 ### Funding
-Helmholtz Association Initiative *Energy System 2050 - A Contribution of the Research Field Energy* and the grant No. VH-NG-1025, *STORM - Stochastics for Time-Space Risk Models* project of the Research Council of Norway (RCN) No. 274410, and the *E-ON Stipendienfonds*.
+Helmholtz Association Initiative *Energy System 2050 - A Contribution of the Research Field Energy* and the grant No. VH-NG-1025; *STORM - Stochastics for Time-Space Risk Models* project of the Research Council of Norway (RCN) No. 274410, and the *E-ON Stipendienfonds*.
 
 ### References
 <sup>1</sup>Peng, C.-K., Buldyrev, S. V., Havlin, S., Simons, M., Stanley, H. E., & Goldberger, A. L. (1994). *Mosaic organization of DNA nucleotides*. [Physical Review E, 49(2), 1685–1689](https://doi.org/10.1103/PhysRevE.49.1685)\

diff --git a/contributions.md b/contributions.md
@@ -1,5 +1,5 @@
 # Contributions
-`MDFA` has lived so far out of collaborations. It welcomes reviews and ideas from everyone. If you want to share your ideas or report a bug, open an [issue](https://github.com/LRydin/KramersMoyal/issues) here on GitHub, or contact me directly: [leonardo.rydin"at"gmail.com](mailto:leonardo.rydin@gmail.com)
+`MFDFA` has lived so far out of collaborations. It welcomes reviews and ideas from everyone. If you want to share your ideas or report a bug, open an [issue](https://github.com/LRydin/KramersMoyal/issues) here on GitHub, or contact me directly: [leonardo.rydin"at"gmail.com](mailto:leonardo.rydin@gmail.com)
 
 I'm are happy to have you help me in furthering this project, helping me improve the code and documentation, make it more user-friendly, broadening its applicability, or devising other methodologies of implementation.
 

diff --git a/docs/1dLevy.rst b/docs/1dLevy.rst
@@ -0,0 +1,47 @@
+Multifractality in one dimensional distributions
+================================================
+
+To show how multifractality can be studied, let us take a sample of random numbers of a symmetric `Lévy distribution <https://en.wikipedia.org/wiki/L%C3%A9vy_distribution>`_.
+
+
+Univariate random numbers from a Lévy stable distribution
+---------------------------------------------------------
+
+To obtain a sample of random numbers of Lévy stable distributions, use `scipy`'s `levy_stable`. In particular, take an :math:`\alpha`-stable distribution, with :math:`\alpha=1.5`
+
+.. code:: python
+
+   # Imports
+   from MFDFA import MFDFA
+   from scipy.stats import levy_stable
+
+   # Generate 100000 points
+   alpha = 1.5
+   X = levy_stable.rvs(alpha=alpha, beta = 0, size=10000)
+
+
+For :code:`MFDFA` to detect the multifractal spectrum of the data, we need to vary the parameter :math:`q\in[-10,10]` and exclude :math:`0`. Let us also use a quadratic polynomial fitting by setting :code:`order=2`
+
+.. code:: python
+
+   # Select a band of lags, which are ints
+   lag = np.unique(np.logspace(0.5, 3, 100).astype(int))
+
+   # Select a list of powers q
+   q_list = np.linspace(-10,10,41)
+   q_list = q_list[q_list!=0.0]
+
+   # The order of the polynomial fitting
+   order = 2
+
+   # Obtain the (MF)DFA as
+   lag, dfa = MFDFA(y, lag = lag, q = q_list, order = order)
+
+
+Again, we plot this in a double logarithmic scale, but now we include 6 curves, from 6 selected :math:`q={-10,-5-2,2,5,10}`. Include as well are the theoretical curves for :math:`q=-10`, with a slope of :math:`1/\alpha=1/1.5` and :math:`q=10`, with a slope of :math:`1/q=1/10`
+
+
+.. image:: /_static/fig2.png
+  :height: 450
+  :align: center
+  :alt: Plot of the MFDFA of a Lévy stable distribution for a few q values.