From 9b8259bc0854278ce303077f560d4942ae4e8a6a Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Sat, 18 Dec 2021 14:22:39 -0500 Subject: [PATCH] Convert docstrings to numpydoc convention. (#72) * Start converting docstrings to numpydoc convention. * Mark a few more to change. * Add linting job. * Docstrings! * Fix kwargs. * Update core.py * Some docstrings for tests. * More work. * More docstrings. * More docstrings. * Fix up examples. * Fix up docstrings more. --- .github/workflows/testing.yml | 23 + docs/Makefile | 5 + docs/api.rst | 5 +- docs/conf.py | 4 +- examples/01_basic_io/plot_create_dataset.py | 5 +- .../plot_meta-analysis_walkthrough.py | 8 +- pymare/__init__.py | 3 +- pymare/core.py | 153 ++++--- pymare/effectsize/__init__.py | 1 + pymare/effectsize/base.py | 416 ++++++++++-------- pymare/effectsize/expressions.py | 45 +- pymare/estimators/__init__.py | 1 + pymare/estimators/combination.py | 130 +++--- pymare/estimators/estimators.py | 302 +++++++------ pymare/results.py | 180 +++++--- pymare/stats.py | 113 +++-- pymare/tests/conftest.py | 9 +- pymare/tests/test_combination_tests.py | 8 +- pymare/tests/test_core.py | 5 + ...ffect_sizes.py => test_effectsize_base.py} | 21 +- ...ions.py => test_effectsize_expressions.py} | 5 +- pymare/tests/test_estimators.py | 22 +- pymare/tests/test_results.py | 35 +- pymare/tests/test_stan_estimators.py | 2 + pymare/tests/test_stats.py | 3 + 25 files changed, 906 insertions(+), 598 deletions(-) rename pymare/tests/{test_effect_sizes.py => test_effectsize_base.py} (90%) rename pymare/tests/{test_expressions.py => test_effectsize_expressions.py} (91%) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index d2c9042..35d71ea 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -59,3 +59,26 @@ jobs: - name: 'Upload coverage to CodeCov' uses: codecov/codecov-action@v1 if: success() + + flake8-lint: + runs-on: ubuntu-latest + name: Lint with flake8 + steps: + - name: Check out source repository + uses: actions/checkout@v2 + + - name: Set up Python environment + uses: actions/setup-python@v2 + with: + python-version: "3.7" + + - name: "Install the package" + shell: bash {0} + run: | + python -m pip install --progress-bar off --upgrade pip setuptools wheel + python -m pip install -e .[tests] + + - name: "Run flake8" + shell: bash {0} + run: | + flake8 pymare diff --git a/docs/Makefile b/docs/Makefile index c9d2c6d..6c655b2 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -15,6 +15,11 @@ help: clean: rm -r _build generated auto_examples +html-noplot: + $(SPHINXBUILD) -D plot_gallery=0 -b html $(ALLSPHINXOPTS) $(SOURCEDIR) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + .PHONY: help clean Makefile # Catch-all target: route all unknown targets to Sphinx using the new diff --git a/docs/api.rst b/docs/api.rst index 213d5c6..07e1e3c 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -44,8 +44,9 @@ API estimators.SampleSizeBasedLikelihoodEstimator estimators.StanMetaRegression estimators.Hedges - estimators.Stouffers - estimators.Fishers + estimators.StoufferCombinationTest + estimators.FisherCombinationTest + estimators.estimators.BaseEstimator .. _api_results_ref: diff --git a/docs/conf.py b/docs/conf.py index 359dee9..46b3a34 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -191,8 +191,8 @@ def setup(app): """From https://github.com/rtfd/sphinx_rtd_theme/issues/117.""" - app.add_stylesheet("theme_overrides.css") - app.add_stylesheet("pymare.css") + app.add_css_file("theme_overrides.css") + app.add_css_file("pymare.css") app.connect("autodoc-process-docstring", generate_example_rst) # Fix to https://github.com/sphinx-doc/sphinx/issues/7420 # from https://github.com/life4/deal/commit/7f33cbc595ed31519cefdfaaf6f415dada5acd94 diff --git a/examples/01_basic_io/plot_create_dataset.py b/examples/01_basic_io/plot_create_dataset.py index ac6d750..fc4fd59 100644 --- a/examples/01_basic_io/plot_create_dataset.py +++ b/examples/01_basic_io/plot_create_dataset.py @@ -7,16 +7,17 @@ Creating a dataset =================== -In PyMARE, operations are performed on :class:`pymare.core.Dataset` objects. +In PyMARE, operations are performed on :class:`~pymare.core.Dataset` objects. Datasets are very lightweight objects that store the data used for meta-analyses, including study-level estimates (y), variances (v), predictors (X), and sample sizes (n). """ +from pprint import pprint + ############################################################################### # Start with the necessary imports # -------------------------------- import pandas as pd -from pprint import pprint from pymare import core diff --git a/examples/02_meta-analysis/plot_meta-analysis_walkthrough.py b/examples/02_meta-analysis/plot_meta-analysis_walkthrough.py index 4c2c772..48efcf3 100644 --- a/examples/02_meta-analysis/plot_meta-analysis_walkthrough.py +++ b/examples/02_meta-analysis/plot_meta-analysis_walkthrough.py @@ -113,7 +113,7 @@ def var_to_ci(y, v, n): ############################################################################### # Create a Dataset object containing the data # -------------------------------------------- -dset = pymare.Dataset(y=y, X=None, v=v, n=n, add_intercept=True) +dset = pymare.core.Dataset(y=y, X=None, v=v, n=n, add_intercept=True) # Here is a dictionary to house results across models results = {} @@ -123,8 +123,8 @@ def var_to_ci(y, v, n): # ----------------------------------------------------------------------------- # When you have ``z`` or ``p``: # -# - :class:`pymare.estimators.Stouffers` -# - :class:`pymare.estimators.Fishers` +# - :class:`pymare.estimators.StoufferCombinationTest` +# - :class:`pymare.estimators.FisherCombinationTest` # # When you have ``y`` and ``v`` and don't want to estimate between-study variance: # @@ -149,7 +149,7 @@ def var_to_ci(y, v, n): # ````````````````````````````````````````````````````````````````````````````` # The two combination models in PyMARE are Stouffer's and Fisher's Tests. # -# Notice that these models don't use :class:`pymare.core.Dataset` objects. +# Notice that these models don't use :class:`~pymare.core.Dataset` objects. stouff = pymare.estimators.StoufferCombinationTest() stouff.fit(z[:, None]) print("Stouffers") diff --git a/pymare/__init__.py b/pymare/__init__.py index 8007665..e91cc9b 100644 --- a/pymare/__init__.py +++ b/pymare/__init__.py @@ -1,4 +1,4 @@ -"""PyMARE: Python Meta-Analysis & Regression Engine""" +"""PyMARE: Python Meta-Analysis & Regression Engine.""" from .core import Dataset, meta_regression from .effectsize import OneSampleEffectSizeConverter, TwoSampleEffectSizeConverter @@ -12,3 +12,4 @@ from . import _version __version__ = _version.get_versions()["version"] +del _version diff --git a/pymare/core.py b/pymare/core.py index 3e4992b..84b1c99 100644 --- a/pymare/core.py +++ b/pymare/core.py @@ -19,26 +19,39 @@ class Dataset: """Container for input data and arguments to estimators. - Args: - y (array-like, str): 1d array of study-level estimates with length K, - or the name of the column in data containing the y values. - v (array-like, str, optional): 1d array of study-level variances with - length K, or the name of the column in data containing v values. - X (array-like, list, optional): 1d or 2d array containing study-level - predictors (dimensions K x P), or a list of strings giving the - names of the columns in data containing the X values. - n (array-like, str, optional): 1d array of study-level sample sizes - (length K), or the name of the corresponding column in data. - data (pandas.DataFrame, optional): A pandas DataFrame containing y, v, - X, and/or n values. By default, columns are expected to have the - same names as arguments (e.g., the y values will be expected in the - 'y' column). This can be modified by passing strings giving column - names to any of the y, v, X, or n arguments. - X_names ([str], optional): List of length P containing the names of the - predictors. Ignored if data is provided (use X to specify columns). - add_intercept (bool, optional): If True, an intercept column is - automatically added to the predictor matrix. If False, the - predictors matrix is passed as-is to estimators. + Parameters + ---------- + y : None or :obj:`numpy.ndarray` of shape (K,) or :obj:`str`, optional + 1d array of study-level estimates with length K, or the name of the column in data + containing the y values. + Default is None. + v : None or :obj:`numpy.ndarray` of shape (K,) or :obj:`str`, optional + 1d array of study-level variances with length K, or the name of the column in data + containing v values. + Default is None. + X : None or :obj:`numpy.ndarray` of shape (K,[P]) or :obj:`list` of :obj:`str`, optional + 1d or 2d array containing study-level predictors (dimensions K x P), + or a list of strings giving the names of the columns in data containing the X values. + Default is None. + n : None or :obj:`numpy.ndarray` of shape (K,) or :obj:`str`, optional + 1d array of study-level sample sizes (length K), or the name of the corresponding column + in ``data``. + Default is None. + data : None or :obj:`pandas.DataFrame`, optional + A pandas DataFrame containing y, v, X, and/or n values. + By default, columns are expected to have the same names as arguments + (e.g., the y values will be expected in the 'y' column). + This can be modified by passing strings giving column names to any of the ``y``, ``v``, + ``X``, or ``n`` arguments. + Default is None. + X_names : None or :obj:`list` of :obj:`str`, optional + List of length P containing the names of the predictors. + Ignored if ``data`` is provided (use ``X`` to specify columns). + Default is None. + add_intercept : :obj:`bool`, optional + If True, an intercept column is automatically added to the predictor matrix. + If False, the predictors matrix is passed as-is to estimators. + Default is True. """ def __init__( @@ -94,47 +107,65 @@ def meta_regression( alpha=0.05, **kwargs, ): - """Fits the standard meta-regression/meta-analysis model to provided data. - - Args: - y (array-like, str): 1d array of study-level estimates with length K, - or the name of the column in data containing the y values. - v (array-like, str, optional): 1d array of study-level variances with - length K, or the name of the column in data containing v values. - X (array-like, list, optional): 1d or 2d array containing study-level - predictors (dimensions K x P), or a list of strings giving the - names of the columns in data containing the X values. - n (array-like, str, optional): 1d array of study-level sample sizes - (length K), or the name of the corresponding column in data. - data (pandas.DataFrame, pymare.Dataset, optional): If a Dataset - instance is passed, the y, v, X, n and associated arguments are - ignored, and data is passed directly to the selected estimator. - If a pandas DataFrame, y, v, X and/or n values are taken from the - DF columns. By default, columns are expected to have the same names - as arguments (e.g., the y values will be expected in the 'y' - column). This can be modified by passing strings giving column - names to any of the y, v, X, or n arguments. - X_names ([str], optional): List of length P containing the names of the - predictors. Ignored if data is provided (use X to specify columns). - add_intercept (bool, optional): If True, an intercept column is - automatically added to the predictor matrix. If False, the - predictors matrix is passed as-is to estimators. - method (str, optional): Name of estimation method. Defaults to 'ML'. - Supported estimators include: - * 'ML': Maximum-likelihood estimator - * 'REML': Restricted maximum-likelihood estimator - * 'DL': DerSimonian-Laird estimator - * 'HE': Hedges estimator - * 'WLS' or 'FE': Weighted least squares (fixed effects only) - * 'Stan': Full Bayesian MCMC estimation via Stan - ci_method (str, optional): Estimation method to use when computing - uncertainty estimates. Currently only 'QP' is supported. Defaults - to 'QP'. Ignored if method == 'Stan'. - alpha (float, optional): Desired alpha level (CIs will have 1 - alpha - coverage). Defaults to 0.05. - kwargs: Optional keyword arguments to pass onto the chosen estimator. - - Returns: + """Fit the standard meta-regression/meta-analysis model to provided data. + + Parameters + ---------- + y : None or :obj:`numpy.ndarray` of shape (K,) or :obj:`str`, optional + 1d array of study-level estimates with length K, or the name of the column in data + containing the y values. + Default is None. + v : None or :obj:`numpy.ndarray` of shape (K,) or :obj:`str`, optional + 1d array of study-level variances with length K, or the name of the column in data + containing v values. + Default is None. + X : None or :obj:`numpy.ndarray` of shape (K,[P]) or :obj:`list` of :obj:`str`, optional + 1d or 2d array containing study-level predictors (dimensions K x P), + or a list of strings giving the names of the columns in data containing the X values. + Default is None. + n : None or :obj:`numpy.ndarray` of shape (K,) or :obj:`str`, optional + 1d array of study-level sample sizes (length K), or the name of the corresponding column + in ``data``. + Default is None. + data : None or :obj:`pandas.DataFrame` or :obj:`~pymare.core.Dataset`, optional + If a Dataset instance is passed, the y, v, X, n and associated arguments are ignored, + and data is passed directly to the selected estimator. + If a pandas DataFrame, y, v, X and/or n values are taken from the DF columns. + By default, columns are expected to have the same names as arguments + (e.g., the y values will be expected in the 'y' column). + This can be modified by passing strings giving column names to any of the y, v, X, or n + arguments. + X_names : None or :obj:`list` of :obj:`str`, optional + List of length P containing the names of the predictors. + Ignored if ``data`` is provided (use ``X`` to specify columns). + Default is None. + add_intercept : :obj:`bool`, optional + If True, an intercept column is automatically added to the predictor matrix. + If False, the predictors matrix is passed as-is to estimators. + Default is True. + method : {"ML", "REML", "DL", "HE", "WLS", "FE", "Stan"}, optional + Name of estimation method. Default is 'ML'. + Supported estimators include: + + - 'ML': Maximum-likelihood estimator + - 'REML': Restricted maximum-likelihood estimator + - 'DL': DerSimonian-Laird estimator + - 'HE': Hedges estimator + - 'WLS' or 'FE': Weighted least squares (fixed effects only) + - 'Stan': Full Bayesian MCMC estimation via Stan + ci_method : {"QP"}, optional + Estimation method to use when computing uncertainty estimates. + Currently only 'QP' is supported. Default is 'QP'. + Ignored if ``method == 'Stan'``. + alpha : :obj:`float`, optional + Desired alpha level (CIs will have 1 - alpha coverage). Default is 0.05. + **kwargs + Optional keyword arguments to pass onto the chosen estimator. + + Returns + ------- + :obj:`~pymare.results.MetaRegressionResults` or \ + :obj:`~pymare.results.BayesianMetaRegressionResults` A MetaRegressionResults or BayesianMetaRegressionResults instance, depending on the specified method ('Stan' will return the latter; all other methods return the former). diff --git a/pymare/effectsize/__init__.py b/pymare/effectsize/__init__.py index fe91405..d0a5424 100644 --- a/pymare/effectsize/__init__.py +++ b/pymare/effectsize/__init__.py @@ -1,3 +1,4 @@ +"""Tools for converting between effect-size measures.""" from .base import ( OneSampleEffectSizeConverter, TwoSampleEffectSizeConverter, diff --git a/pymare/effectsize/base.py b/pymare/effectsize/base.py index 37f2134..ca1e59b 100644 --- a/pymare/effectsize/base.py +++ b/pymare/effectsize/base.py @@ -17,21 +17,26 @@ def solve_system(system, known_vars=None): """Solve and evaluate a system of SymPy equations given known inputs. - Args: - system ([sympy.core.expr.Expr]): A list of SymPy expressions defining - the system to solve. - known_vars (dict, optional): A dictionary of known variables to use - when evaluating the solution. Keys are the names of parameters - (e.g., 'sem', 't'), values are numerical data types (including - numpy arrays). - - Returns: + Parameters + ---------- + system : :obj:`list` of :obj:`sympy.core.expr.Expr` + A list of SymPy expressions defining the system to solve. + known_vars : None or :obj:`dict`, optional + A dictionary of known variables to use + when evaluating the solution. Keys are the names of parameters + (e.g., 'sem', 't'), values are numerical data types (including + numpy arrays). Default is None. + + Returns + ------- + :obj:`dict` A dictionary of newly computed values, where the keys are parameter names and the values are numerical data types. - Notes: - The returned dictionary contains only keys that were not passed in as - input (i.e., already known variables will be ignored). + Notes + ----- + The returned dictionary contains only keys that were not passed in as + input (i.e., already known variables will be ignored). """ system = system.copy() @@ -116,6 +121,7 @@ def _validate(self, kwargs): return kwargs def __getattr__(self, key): + """Access an instance attribute.""" if key.startswith("get_"): stat = key.replace("get_", "") return partial(self.get, stat=stat) @@ -123,12 +129,15 @@ def __getattr__(self, key): def update_data(self, incremental=False, **kwargs): """Update instance data. - Args: - incremental (bool): If True, updates data incrementally (i.e., - existing data will be preserved unless they're overwritten by - incoming keys). If False, all existing data is dropped first. - kwargs: Data values or arrays; keys are the names of the - quantities. All inputs to __init__ are valid. + Parameters + ---------- + incremental : :obj:`bool`, optional + If True, updates data incrementally (i.e., existing data will be preserved unless + they're overwritten by incoming keys). If False, all existing data is dropped first. + Default is False. + **kwargs + Data values or arrays; keys are the names of the quantities. + All inputs to ``__init__`` are valid. """ if not incremental: self.known_vars = {} @@ -159,6 +168,7 @@ def _get_system(self, stat): return system def to_dataset(self, measure, **kwargs): + """Convert conversion results to a Dataset.""" measure = measure.lower() y = self.get(measure) v = self.get("v_{}".format(measure), error=False) @@ -171,21 +181,25 @@ def to_dataset(self, measure, **kwargs): def get(self, stat, error=True): """Compute and return values for the specified statistic, if possible. - Args: - stat (str): The name of the quantity to retrieve. - error (bool): Specifies behavior in the event that the requested - quantity cannot be computed. If True (default), raises an - exception. If False, returns None. - - Returns: - A float or ndarray containing the requested parameter values, if - successfully computed. - - Notes: - All values computed via get() are internally cached. Do not try to - update the instance's known values directly; any change to input - data require either initialization of a new instance, or a call to - update_data(). + Parameters + ---------- + stat : :obj:`str` + The name of the quantity to retrieve. + error : :obj:`bool`, optional + Specifies behavior in the event that the requested quantity cannot be computed. + If True (default), raises an exception. If False, returns None. + + Returns + ------- + :obj:`float` or :obj:`numpy.ndarray` + A float or ndarray containing the requested parameter values, if successfully computed. + + Notes + ----- + All values computed via get() are internally cached. Do not try to + update the instance's known values directly; any change to input + data require either initialization of a new instance, or a call to + update_data(). """ stat = stat.lower() @@ -210,24 +224,33 @@ def get(self, stat, error=True): class OneSampleEffectSizeConverter(EffectSizeConverter): """Effect size converter for metric involving a single group/set of scores. - Args: - data (DataFrame): Optional pandas DataFrame to extract variables from. - Column names must match the controlled names listed below for - kwargs. If additional kwargs are provided, they will take - precedence over the values in the data frame. - m (array-like): Means or other continuous estimates - sd (array-like): Standard deviations - n (array-like): Sample sizes - r (array-like): Correlation coefficients - **kwargs: Optional keyword arguments providing additional inputs. All - values must be floats, 1d ndarrays, or any iterable that can be - converted to an ndarray. All variables must have the same length. - - Notes: - All input variables are assumed to reflect study- or analysis-level - summaries, and are _not_ individual data points. E.g., do not pass in - a vector of point estimates as `m` and a scalar for the SDs `sd`. - The lengths of all inputs must match. + Parameters + ---------- + data : None or :obj:`pandas.DataFrame`, optional + Optional pandas DataFrame to extract variables from. + Column names must match the controlled names listed below for + kwargs. If additional kwargs are provided, they will take + precedence over the values in the data frame. + Default is None. + m : None or :obj:`numpy.ndarray`, optional + Means or other continuous estimates + sd : None or :obj:`numpy.ndarray`, optional + Standard deviations + n : None or :obj:`numpy.ndarray`, optional + Sample sizes + r : None or :obj:`numpy.ndarray`, optional + Correlation coefficients + **kwargs + Optional keyword arguments providing additional inputs. All + values must be floats, 1d ndarrays, or any iterable that can be + converted to an ndarray. All variables must have the same length. + + Notes + ----- + All input variables are assumed to reflect study- or analysis-level + summaries, and are _not_ individual data points. E.g., do not pass in + a vector of point estimates as `m` and a scalar for the SDs `sd`. + The lengths of all inputs must match. """ _type = 1 @@ -238,29 +261,34 @@ def __init__(self, data=None, m=None, sd=None, n=None, r=None, **kwargs): def to_dataset(self, measure="RM", **kwargs): """Get a Pymare Dataset with y and v mapped to the specified measure. - Args: - measure (str): The measure to map to the Dataset's y and v - attributes (where y is the desired measure, and v is its - variance). Valid values include: - * 'RM': Raw mean of the group. - * 'SM': Standardized mean. This is often called Hedges g. - (one-sample), or equivalently, Cohen's one-sample d with - a bias correction applied. - * 'D': Cohen's d. Note that no bias correction is applied - (use 'SM' instead). - * 'R': Raw correlation coefficient. - * 'ZR': Fisher z-transformed correlation coefficient. - kwargs: Optional keyword arguments to pass onto the Dataset - initializer. Provides a way of supplementing the generated y - and v arrays with additional arguments (e.g., X, X_names, n). - See pymare.Dataset docs for details. - - Returns: - A pymare.Dataset instance. - - Notes: - Measures 'RM', 'SM', and 'D' require m, sd, and n as inputs. - Measures 'R' and 'ZR' require r and n as inputs. + Parameters + ---------- + measure : {"RM", "SM", "D", "R", "ZR"}, optional + The measure to map to the Dataset's y and v attributes + (where y is the desired measure, and v is its variance). Valid values include: + + - 'RM': Raw mean of the group. This is the default. + - 'SM': Standardized mean. This is often called Hedges g. + (one-sample), or equivalently, Cohen's one-sample d with + a bias correction applied. + - 'D': Cohen's d. Note that no bias correction is applied + (use 'SM' instead). + - 'R': Raw correlation coefficient. + - 'ZR': Fisher z-transformed correlation coefficient. + **kwargs + Optional keyword arguments to pass onto the Dataset + initializer. Provides a way of supplementing the generated y + and v arrays with additional arguments (e.g., X, X_names, n). + See pymare.Dataset docs for details. + + Returns + ------- + :obj:`~pymare.core.Dataset` + + Notes + ----- + Measures 'RM', 'SM', and 'D' require m, sd, and n as inputs. + Measures 'R' and 'ZR' require r and n as inputs. """ return super().to_dataset(measure, **kwargs) @@ -268,31 +296,41 @@ def to_dataset(self, measure="RM", **kwargs): class TwoSampleEffectSizeConverter(EffectSizeConverter): """Effect size converter for two-sample comparisons. - Args: - data (DataFrame): Optional pandas DataFrame to extract variables from. - Column names must match the controlled names listed below for - kwargs. If additional kwargs are provided, they will take - precedence over the values in the data frame. - m1 (array-like): Means for group 1 - m2 (array-like): Means for group 2 - sd1 (array-like): Standard deviations for group 1 - sd2 (array-like): Standard deviations for group 2 - n1 (array-like): Sample sizes for group 1 - n2 (array-like): Sample sizes for group 2 - **kwargs: Optional keyword arguments providing additional inputs. All - values must be floats, 1d ndarrays, or any iterable that can be - converted to an ndarray. - - Notes: - All input variables are assumed to reflect study- or analysis-level - summaries, and are _not_ individual data points. E.g., do not pass in - a vector of point estimates as `m1` and a scalar for the SDs `sd1`. - The lengths of all inputs must match. All variables must be passed in - as pairs (e.g., if m1 is provided, m2 must also be provided). - - When using the TwoSampleEffectSizeConverter, it is assumed that the - variable pairs are from independent samples. Paired-sampled comparisons - are not currently supported. + Parameters + ---------- + data : None or :obj:`pandas.DataFrame`, optional + Optional pandas DataFrame to extract variables from. + Column names must match the controlled names listed below for + kwargs. If additional kwargs are provided, they will take + precedence over the values in the data frame. + m1 : None or :obj:`numpy.ndarray`, optional + Means for group 1 + m2 : None or :obj:`numpy.ndarray`, optional + Means for group 2 + sd1 : None or :obj:`numpy.ndarray`, optional + Standard deviations for group 1 + sd2 : None or :obj:`numpy.ndarray`, optional + Standard deviations for group 2 + n1 : None or :obj:`numpy.ndarray`, optional + Sample sizes for group 1 + n2 : None or :obj:`numpy.ndarray`, optional + Sample sizes for group 2 + **kwargs + Optional keyword arguments providing additional inputs. All + values must be floats, 1d ndarrays, or any iterable that can be + converted to an ndarray. + + Notes + ----- + All input variables are assumed to reflect study- or analysis-level + summaries, and are _not_ individual data points. E.g., do not pass in + a vector of point estimates as `m1` and a scalar for the SDs `sd1`. + The lengths of all inputs must match. All variables must be passed in + as pairs (e.g., if m1 is provided, m2 must also be provided). + + When using the TwoSampleEffectSizeConverter, it is assumed that the + variable pairs are from independent samples. Paired-sampled comparisons + are not currently supported. """ _type = 2 @@ -319,27 +357,34 @@ def _validate(self, kwargs): def to_dataset(self, measure="SMD", **kwargs): """Get a Pymare Dataset with y and v mapped to the specified measure. - Args: - measure (str): The measure to map to the Dataset's y and v - attributes (where y is the desired measure, and v is its - variance). Valid values include: - * 'RMD': Raw mean difference between groups. - * 'SMD': Standardized mean difference between groups. This - is often called Hedges g, or equivalently, Cohen's d with - a bias correction applied. - * 'D': Cohen's d. Note that no bias correction is applied - (use 'SMD' instead). - kwargs: Optional keyword arguments to pass onto the Dataset - initializer. Provides a way of supplementing the generated y - and v arrays with additional arguments (e.g., X, X_names, n). - See pymare.Dataset docs for details. - - Returns: - A pymare.Dataset instance. - - Notes: - All measures require that m1, m2, sd1, sd2, n1, and n2 be passed in - as inputs (or be solvable from the passed inputs). + Parameters + ---------- + measure : {"SMD", "RMD", "D"}, optional + The measure to map to the Dataset's y and v + attributes (where y is the desired measure, and v is its + variance). Valid values include: + + - 'SMD': Standardized mean difference between groups. This + is often called Hedges g, or equivalently, Cohen's d with + a bias correction applied. + This is the default. + - 'RMD': Raw mean difference between groups. + - 'D': Cohen's d. Note that no bias correction is applied + (use 'SMD' instead). + **kwargs + Optional keyword arguments to pass onto the Dataset + initializer. Provides a way of supplementing the generated y + and v arrays with additional arguments (e.g., X, X_names, n). + See pymare.Dataset docs for details. + + Returns + ------- + :obj:`~pymare.core.Dataset` + + Notes + ----- + All measures require that m1, m2, sd1, sd2, n1, and n2 be passed in + as inputs (or be solvable from the passed inputs). """ return super().to_dataset(measure, **kwargs) @@ -361,76 +406,95 @@ def compute_measure( n2=None, **dataset_kwargs, ): - """Wrapper that auto-detects and applies the right converter class. + """Auto-detect and apply the right converter class. - Args: - measure (str): The desired output effect size measure. Valid values are - listed below, with the required named inputs in parentheses: - * 'RM' (m, sd, n): Raw mean of the group. - * 'SM' (m, sd, n): Standardized mean. This is often called Hedges + Parameters + ---------- + measure : {"RM", "SM", "R", "ZR", "RMD", "SMD", "D"} + The desired output effect size measure. Valid values are + listed below, with the required named inputs in parentheses: + + - 'RM' (m, sd, n): Raw mean of the group. + - 'SM' (m, sd, n): Standardized mean. This is often called Hedges g. (one-sample), or equivalently, Cohen's one-sample d with a bias correction applied. - * 'R' (r, n): Raw correlation coefficient. - * 'ZR' (r, n): Fisher z-transformed correlation coefficient. - * 'RMD' (m1, m2, sd1, sd2, n1, n2): Raw mean difference between + - 'R' (r, n): Raw correlation coefficient. + - 'ZR' (r, n): Fisher z-transformed correlation coefficient. + - 'RMD' (m1, m2, sd1, sd2, n1, n2): Raw mean difference between groups. - * 'SMD' (m1, m2, sd1, sd2, n1, n2): Standardized mean difference + - 'SMD' (m1, m2, sd1, sd2, n1, n2): Standardized mean difference between groups. This is often called Hedges g, or equivalently, Cohen's d with a bias correction applied. - * 'D' (m, sd, n, or m1, m2, sd1, sd2, n1, n2): Cohen's d. No bias + - 'D' (m, sd, n, or m1, m2, sd1, sd2, n1, n2): Cohen's d. No bias correction is applied (for that, use 'SM' or 'SMD' instead). Note that 'D' can be either one-sample or two-sample. This is specified in type, or (if type=='infer'), inferred from the passed arguments. - data (DataFrame, optional): A pandas DataFrame to extract variables - from. Column names must match the names of other args ('m', 'sd', - 'n2', etc.). If both a DataFrame and keyword arguments are - provided, the two will be merged, with variables passed as separate - arguments taking precedence over DataFrame columns in the event of - a clash. - comparison (str, int, optional): The type of originating comparison. - This is currently unnecessary, as the type can be deterministically - inferred from the input arguments and measure, but may become - necessary in future, and provides a way of imposing constraints on - code embedded in larger pipelines. Valid values: - 'infer' (*default): Infer the type of comparison from the input - arguments. Currently - 1: One-group comparison. Must be accompanied by some/all of the + data : None or :obj:`pandas.DataFrame`, optional + A pandas DataFrame to extract variables + from. Column names must match the names of other args ('m', 'sd', + 'n2', etc.). If both a DataFrame and keyword arguments are + provided, the two will be merged, with variables passed as separate + arguments taking precedence over DataFrame columns in the event of + a clash. + comparison : {"infer", 1, 2}, optional + The type of originating comparison. + This is currently unnecessary, as the type can be deterministically + inferred from the input arguments and measure, but may become + necessary in future, and provides a way of imposing constraints on + code embedded in larger pipelines. Valid values: + + - 'infer' (default): Infer the type of comparison from the input + arguments. + - 1: One-group comparison. Must be accompanied by some/all of the following named variables: m, sd, n, r. - 2: Two-group comparison. Independent samples are assumed. Must be + - 2: Two-group comparison. Independent samples are assumed. Must be accompanied by some/all of the following named variables: m1, m2, sd1, sd2, n1, n2. - return_type (str, optional): Controls what gets returned. Valid values: - 'tuple': A 2-tuple, where the first element is a 1-d array - containing the computed estimates (i.e., y), and the second - element is a 1-d array containing the associated sampling - variances. - 'dict': A dictionary with keys 'y' and 'v' that map to the arrays - described for 'tuple'. - 'dataset': A pymare Dataset instance, with y and v attributes set - to the corresponding arrays. Note that additional keyword - arguments can be passed onto the Dataset init via kwargs. - 'converter': The EffectSizeConverter class internally initialized - to handle the desired computation. The target measures will - have already been computed (and hence, cached), and can be - retrieved via get_('{measure}') and get_('v_{measure}') - m (array-like): Means or other estimates in single-group case - sd (array-like): Standard deviations in single-group case - n (array-like): Sample sizes in single-group case - r (array-like): Correlation coefficients - m1 (array-like): Means for group 1 - m2 (array-like): Means for group 2 - sd1 (array-like): Standard deviations for group 1 - sd2 (array-like): Standard deviations for group 2 - n1 (array-like): Sample sizes for group 1 - n2 (array-like): Sample sizes for group 2 - dataset_kwargs (optional): Optional keyword arguments passed on to the - Dataset initializer. Ignored unless return_type == 'dataset'. - - Returns: - A tuple, dict, or pymare.Dataset, depending on `return_type`. + return_type : {"tuple", "dict", "dataset", "converter"}, optional + Controls what gets returned. Valid values: + + - 'tuple': A 2-tuple, where the first element is a 1-d array + containing the computed estimates (i.e., y), and the second + element is a 1-d array containing the associated sampling + variances. + - 'dict': A dictionary with keys 'y' and 'v' that map to the arrays + described for 'tuple'. + - 'dataset': A pymare Dataset instance, with y and v attributes set + to the corresponding arrays. Note that additional keyword + arguments can be passed onto the Dataset init via kwargs. + - 'converter': The EffectSizeConverter class internally initialized + to handle the desired computation. The target measures will + have already been computed (and hence, cached), and can be + retrieved via get_('{measure}') and get_('v_{measure}') + m : None or :obj:`numpy.ndarray`, optional + Means or other estimates in single-group case + sd : None or :obj:`numpy.ndarray`, optional + Standard deviations in single-group case + n : None or :obj:`numpy.ndarray`, optional + Sample sizes in single-group case + r : None or :obj:`numpy.ndarray`, optional + Correlation coefficients + m1 : None or :obj:`numpy.ndarray`, optional + Means for group 1 + m2 : None or :obj:`numpy.ndarray`, optional + Means for group 2 + sd1 : None or :obj:`numpy.ndarray`, optional + Standard deviations for group 1 + sd2 : None or :obj:`numpy.ndarray`, optional + Standard deviations for group 2 + n1 : None or :obj:`numpy.ndarray`, optional + Sample sizes for group 1 + n2 : None or :obj:`numpy.ndarray`, optional + Sample sizes for group 2 + **dataset_kwargs + Optional keyword arguments passed on to the Dataset initializer. + Ignored unless return_type == 'dataset'. + + Returns + ------- + :obj:`tuple` or :obj:`dict` or :obj:`~pymare.core.Dataset`, depending on ``return_type``. """ - var_args = dict(m=m, sd=sd, n=n, r=r, m1=m1, m2=m2, sd1=sd1, sd2=sd2, n1=n1, n2=n2) var_args = {k: v for k, v in var_args.items() if v is not None} diff --git a/pymare/effectsize/expressions.py b/pymare/effectsize/expressions.py index ce66428..8ccb4e3 100644 --- a/pymare/effectsize/expressions.py +++ b/pymare/effectsize/expressions.py @@ -12,13 +12,17 @@ class Expression: - """Represents a single statistical expression. - - Args: - expr (str): String representation of the mathematical expression. - description (str, optional): Optional text description of expression. - type (int, optional): Indicates whether the expression applies - in the one-sample case (1), two-sample case (2), or both (0). + """Represent a single statistical expression. + + Parameters + ---------- + expr : :obj:`str` + String representation of the mathematical expression. + description : :obj:`str`, optional + Optional text description of expression. + type : :obj:`int`, optional + Indicates whether the expression applies in the one-sample case (1), two-sample case (2), + or both (0). """ def __init__(self, expression, description=None, type=0): @@ -48,19 +52,23 @@ def _load_expressions(): def select_expressions(target, known_vars, type=1): - """Select a ~minimal system of expressions needed to solve for the target. - - Args: - target (str): The named statistic to solve for ('t', 'd', 'g', etc.). - known_vars (set): A set of strings giving the names of the known - variables. - type (int): Restricts the system to expressions that apply in - the one-sample case (1), two-sample case (2), or both (None). - - Returns: + """Select a minimal system of expressions needed to solve for the target. + + Parameters + ---------- + target : :obj:`str` + The named statistic to solve for ('t', 'd', 'g', etc.). + known_vars : :obj:`set` or :obj:`str` + A set of strings giving the names of the known variables. + type : :obj:`int`, optional + Restrict the system to expressions that apply in the one-sample case (1), + two-sample case (2), or both (None). + + Returns + ------- + :obj:`list` of :obj:`~pymare.effectsize.Expression` or None A list of Expression instances, or None if there is no solution. """ - exp_dict = defaultdict(list) exprs = one_sample_expressions if type == 1 else two_sample_expressions @@ -80,7 +88,6 @@ def select_expressions(target, known_vars, type=1): def df_search(sym, exprs, known, visited): """Recursively select expressions needed to solve for sym.""" - results = [] for exp in exp_dict[sym]: diff --git a/pymare/estimators/__init__.py b/pymare/estimators/__init__.py index a6e0166..c964654 100644 --- a/pymare/estimators/__init__.py +++ b/pymare/estimators/__init__.py @@ -1,3 +1,4 @@ +"""Estimators for meta-analyses and meta-regressions.""" from .combination import FisherCombinationTest, StoufferCombinationTest from .estimators import ( DerSimonianLaird, diff --git a/pymare/estimators/combination.py b/pymare/estimators/combination.py index 157abba..d9f05db 100644 --- a/pymare/estimators/combination.py +++ b/pymare/estimators/combination.py @@ -1,3 +1,4 @@ +"""Estimators for combination (p/z) tests.""" import warnings from abc import abstractmethod @@ -27,12 +28,14 @@ def __init__(self, mode="directed"): @abstractmethod def p_value(self, z, *args, **kwargs): + """Calculate p-values.""" pass def _z_to_p(self, z): return ss.norm.sf(z) def fit(self, z, *args, **kwargs): + """Fit the estimator to z-values.""" if self.mode == "concordant": ose = self.__class__(mode="directed") p1 = ose.p_value(z, *args, **kwargs) @@ -46,6 +49,7 @@ def fit(self, z, *args, **kwargs): return self def summary(self): + """Generate a summary of the estimator results.""" if not hasattr(self, "params_"): name = self.__class__.__name__ raise ValueError( @@ -61,44 +65,51 @@ class StoufferCombinationTest(CombinationTest): Takes a set of independent z-scores and combines them via Stouffer's method to produce a fixed-effect estimate of the combined effect. - Args: - mode (str): The type of test to perform-- i.e., what null hypothesis to - reject. See Winkler et al. (2016) for details. Valid options are: - * 'directed': tests a directional hypothesis--i.e., that the - observed value is consistently greater than 0 in the input - studies. - * 'undirected': tests an undirected hypothesis--i.e., that the - observed value differs from 0 in the input studies, but - allowing the direction of the deviation to vary by study. - * 'concordant': equivalent to two directed tests, one for each - sign, with correction for 2 tests. - - Notes: - (1) All input z-scores are assumed to correspond to one-sided p-values. - Do NOT pass in z-scores that have been directly converted from - two-tailed p-values, as these do not preserve directional - information. - (2) The 'directed' and 'undirected' modes are NOT the same as - one-tailed and two-tailed tests. In general, users who want to test - directed hypotheses should use the 'directed' mode, and users who - want to test for consistent effects in either the positive or - negative direction should use the 'concordant' mode. The - 'undirected' mode tests a fairly uncommon null that doesn't - constrain the sign of effects to be consistent across studies - (one can think of it as a test of extremity). In the vast majority - of meta-analysis applications, this mode is not appropriate, and - users should instead opt for 'directed' or 'concordant'. - (3) This estimator does not support meta-regression; any moderators - passed in to fit() as the X array will be ignored. + Parameters + ---------- + mode : {"directed", "undirected", "concordant"}, optional + The type of test to perform-- i.e., what null hypothesis to + reject. See Winkler et al. (2016) for details. + Valid options are: + + - 'directed': tests a directional hypothesis--i.e., that the + observed value is consistently greater than 0 in the input + studies. This is the default. + - 'undirected': tests an undirected hypothesis--i.e., that the + observed value differs from 0 in the input studies, but + allowing the direction of the deviation to vary by study. + - 'concordant': equivalent to two directed tests, one for each + sign, with correction for 2 tests. + + Notes + ----- + (1) All input z-scores are assumed to correspond to one-sided p-values. + Do NOT pass in z-scores that have been directly converted from + two-tailed p-values, as these do not preserve directional + information. + (2) The 'directed' and 'undirected' modes are NOT the same as + one-tailed and two-tailed tests. In general, users who want to test + directed hypotheses should use the 'directed' mode, and users who + want to test for consistent effects in either the positive or + negative direction should use the 'concordant' mode. The + 'undirected' mode tests a fairly uncommon null that doesn't + constrain the sign of effects to be consistent across studies + (one can think of it as a test of extremity). In the vast majority + of meta-analysis applications, this mode is not appropriate, and + users should instead opt for 'directed' or 'concordant'. + (3) This estimator does not support meta-regression; any moderators + passed in to fit() as the X array will be ignored. """ # Maps Dataset attributes onto fit() args; see BaseEstimator for details. _dataset_attr_map = {"z": "y", "w": "v"} def fit(self, z, w=None): + """Fit the estimator to z-values, optionally with weights.""" return super().fit(z, w=w) def p_value(self, z, w=None): + """Calculate p-values.""" if w is None: w = np.ones_like(z) cz = (z * w).sum(0) / np.sqrt((w ** 2).sum(0)) @@ -111,41 +122,44 @@ class FisherCombinationTest(CombinationTest): Takes a set of independent z-scores and combines them via Fisher's method to produce a fixed-effect estimate of the combined effect. - Args: - mode (str): The type of test to perform-- i.e., what null hypothesis to - reject. See Winkler et al. (2016) for details. Valid options are: - * 'directed': tests a directional hypothesis--i.e., that the - observed value is consistently greater than 0 in the input - studies. - * 'undirected': tests an undirected hypothesis--i.e., that the - observed value differs from 0 in the input studies, but - allowing the direction of the deviation to vary by study. - * 'concordant': equivalent to two directed tests, one for each - sign, with correction for 2 tests. - - Notes: - (1) All input z-scores are assumed to correspond to one-sided p-values. - Do NOT pass in z-scores that have been directly converted from - two-tailed p-values, as these do not preserve directional - information. - (2) The 'directed' and 'undirected' modes are NOT the same as - one-tailed and two-tailed tests. In general, users who want to test - directed hypotheses should use the 'directed' mode, and users who - want to test for consistent effects in either the positive or - negative direction should use the 'concordant' mode. The - 'undirected' mode tests a fairly uncommon null that doesn't - constrain the sign of effects to be consistent across studies - (one can think of it as a test of extremity). In the vast majority - of meta-analysis applications, this mode is not appropriate, and - users should instead opt for 'directed' or 'concordant'. - (3) This estimator does not support meta-regression; any moderators - passed in to fit() as the X array will be ignored. + mode : {"directed", "undirected", "concordant"}, optional + The type of test to perform-- i.e., what null hypothesis to + reject. See Winkler et al. (2016) for details. Valid options are: + + - 'directed': tests a directional hypothesis--i.e., that the + observed value is consistently greater than 0 in the input + studies. This is the default. + - 'undirected': tests an undirected hypothesis--i.e., that the + observed value differs from 0 in the input studies, but + allowing the direction of the deviation to vary by study. + - 'concordant': equivalent to two directed tests, one for each + sign, with correction for 2 tests. + + Notes + ----- + (1) All input z-scores are assumed to correspond to one-sided p-values. + Do NOT pass in z-scores that have been directly converted from + two-tailed p-values, as these do not preserve directional + information. + (2) The 'directed' and 'undirected' modes are NOT the same as + one-tailed and two-tailed tests. In general, users who want to test + directed hypotheses should use the 'directed' mode, and users who + want to test for consistent effects in either the positive or + negative direction should use the 'concordant' mode. The + 'undirected' mode tests a fairly uncommon null that doesn't + constrain the sign of effects to be consistent across studies + (one can think of it as a test of extremity). In the vast majority + of meta-analysis applications, this mode is not appropriate, and + users should instead opt for 'directed' or 'concordant'. + (3) This estimator does not support meta-regression; any moderators + passed in to fit() as the X array will be ignored. """ # Maps Dataset attributes onto fit() args; see BaseEstimator for details. _dataset_attr_map = {"z": "y"} def p_value(self, z): + """Calculate p-values.""" p = self._z_to_p(z) chi2 = -2 * np.log(p).sum(0) return ss.chi2.sf(chi2, 2 * z.shape[0]) diff --git a/pymare/estimators/estimators.py b/pymare/estimators/estimators.py index 2eea4ed..e76a550 100644 --- a/pymare/estimators/estimators.py +++ b/pymare/estimators/estimators.py @@ -14,8 +14,11 @@ @wrapt.decorator def _loopable(wrapped, instance, args, kwargs): - # Decorator for fit() method of Estimator classes to handle naive looping - # over the 2nd dimension of y/v/n inputs, and reconstruction of outputs. + """Decorate fit() method of Estimator classes. + + Designed to handle naive looping over the 2nd dimension of y/v/n inputs, and reconstruction of + outputs. + """ n_iter = kwargs["y"].shape[1] if n_iter > 10: warn( @@ -48,6 +51,7 @@ def _loopable(wrapped, instance, args, kwargs): class BaseEstimator(metaclass=ABCMeta): + """A base class for Estimators.""" # A class-level mapping from Dataset attributes to fit() arguments. Used by # fit_dataset() for estimators that take non-standard arguments (e.g., 'z' @@ -58,18 +62,23 @@ class BaseEstimator(metaclass=ABCMeta): @abstractmethod def fit(self, *args, **kwargs): + """Fit the estimator to data.""" pass def fit_dataset(self, dataset, *args, **kwargs): - """Applies the current estimator to the passed Dataset container. + """Apply the current estimator to the passed Dataset container. A convenience interface that wraps fit() and automatically aligns the variables held in a Dataset with the required arguments. - Args: - dataset (Dataset): A PyMARE Dataset instance holding the data. - args, kwargs: optional positional and keyword arguments to pass - onto the fit() method. + Parameters + ---------- + dataset : :obj:`~pymare.core.Dataset` + A PyMARE Dataset instance holding the data. + *args + Optional positional arguments to pass onto the :meth:`~pymare.core.Dataset.fit` method. + **kwargs + Optional keyword arguments to pass onto the :meth:`~pymare.core.Dataset.fit` method. """ all_kwargs = {} spec = getfullargspec(self.fit) @@ -93,16 +102,21 @@ def fit_dataset(self, dataset, *args, **kwargs): def get_v(self, dataset): """Get the variances, or an estimate thereof, from the given Dataset. - Args: - dataset (Dataset): The dataset to use to retrieve/estimate v. - - Returns: - A 2-d NDArray. - - Notes: - This is equivalent to directly accessing `dataset.v` when variances - are present, but affords a way of estimating v from sample size (n) - for any estimator that implicitly estimates a sigma^2 parameter. + Parameters + ---------- + dataset : :obj:`~pymare.core.Dataset` + The dataset to use to retrieve/estimate v. + + Returns + ------- + :obj:`numpy.ndarray` + 2-dimensional array of variances/variance estimates. + + Notes + ----- + This is equivalent to directly accessing `dataset.v` when variances are present, + but affords a way of estimating v from sample size (n) for any estimator that implicitly + estimates a sigma^2 parameter. """ if dataset.v is not None: return dataset.v @@ -122,6 +136,7 @@ def get_v(self, dataset): return self.params_["sigma2"] / dataset.n def summary(self): + """Generate a MetaRegressionResults object for the fitted estimator.""" if not hasattr(self, "params_"): name = self.__class__.__name__ raise ValueError( @@ -139,28 +154,34 @@ class WeightedLeastSquares(BaseEstimator): known/assumed between-study variance tau^2. When tau^2 = 0 (default), the model is the standard inverse-weighted fixed-effects meta-regression. - References: - Brockwell, S. E., & Gordon, I. R. (2001). A comparison of statistical - methods for meta-analysis. Statistics in Medicine, 20(6), 825–840. - https://doi.org/10.1002/sim.650 - - Args: - tau2 (float or 1-D array, optional): Assumed/known value of tau^2. Must - be >= 0. Defaults to 0. - - Notes: - This estimator accepts 2-D inputs for y and v--i.e., it can produce - estimates simultaneously for multiple independent sets of y/v values - (use the 2nd dimension for the parallel iterates). The X matrix must be - identical for all iterates. If no v argument is passed to fit(), unit - weights will be used, resulting in the ordinary least-squares (OLS) - solution. + Parameters + ---------- + tau2 : :obj:`float` or :obj:`numpy.ndarray` of shape (d), optional + Assumed/known value of tau^2. Must be >= 0. + If an array, must have ``d`` elements, where ``d`` refers to the number of datasets. + Defaults to 0. + + Notes + ----- + This estimator accepts 2-D inputs for y and v--i.e., it can produce + estimates simultaneously for multiple independent sets of y/v values + (use the 2nd dimension for the parallel iterates). The X matrix must be + identical for all iterates. If no v argument is passed to fit(), unit + weights will be used, resulting in the ordinary least-squares (OLS) + solution. + + References + ---------- + Brockwell, S. E., & Gordon, I. R. (2001). A comparison of statistical + methods for meta-analysis. Statistics in Medicine, 20(6), 825-840. + https://doi.org/10.1002/sim.650 """ def __init__(self, tau2=0.0): self.tau2 = tau2 def fit(self, y, X, v=None): + """Fit the estimator to data.""" if v is None: v = np.ones_like(y) beta, inv_cov = weighted_least_squares(y, v, X, self.tau2, return_cov=True) @@ -174,22 +195,24 @@ class DerSimonianLaird(BaseEstimator): Estimates the between-subject variance tau^2 using the DerSimonian-Laird (1986) method-of-moments approach. - References: - DerSimonian, R., & Laird, N. (1986). Meta-analysis in clinical trials. - Controlled clinical trials, 7(3), 177-188. - Kosmidis, I., Guolo, A., & Varin, C. (2017). Improving the accuracy of - likelihood-based inference in meta-analysis and meta-regression. - Biometrika, 104(2), 489–496. https://doi.org/10.1093/biomet/asx001 - - Notes: - This estimator accepts 2-D inputs for y and v--i.e., it can produce - estimates simultaneously for multiple independent sets of y/v values - (use the 2nd dimension for the parallel iterates). The X matrix must be - identical for all iterates. + Notes + ----- + This estimator accepts 2-D inputs for y and v--i.e., it can produce + estimates simultaneously for multiple independent sets of y/v values + (use the 2nd dimension for the parallel iterates). The X matrix must be + identical for all iterates. + + References + ---------- + DerSimonian, R., & Laird, N. (1986). Meta-analysis in clinical trials. + Controlled clinical trials, 7(3), 177-188. + Kosmidis, I., Guolo, A., & Varin, C. (2017). Improving the accuracy of + likelihood-based inference in meta-analysis and meta-regression. + Biometrika, 104(2), 489-496. https://doi.org/10.1093/biomet/asx001 """ def fit(self, y, v, X): - + """Fit the estimator to data.""" y = ensure_2d(y) v = ensure_2d(v) @@ -220,20 +243,22 @@ def fit(self, y, v, X): class Hedges(BaseEstimator): """Hedges meta-regression estimator. - Estimates the between-subject variance tau^2 using the Hedges & Olkin - (1985) approach. + Estimates the between-subject variance tau^2 using the Hedges & Olkin (1985) approach. - References: - Hedges LV, Olkin I. 1985. Statistical Methods for Meta‐Analysis. + Notes + ----- + This estimator accepts 2-D inputs for y and v--i.e., it can produce + estimates simultaneously for multiple independent sets of y/v values + (use the 2nd dimension for the parallel iterates). The X matrix must be + identical for all iterates. - Notes: - This estimator accepts 2-D inputs for y and v--i.e., it can produce - estimates simultaneously for multiple independent sets of y/v values - (use the 2nd dimension for the parallel iterates). The X matrix must be - identical for all iterates. + References + ---------- + Hedges LV, Olkin I. 1985. Statistical Methods for Meta-Analysis. """ def fit(self, y, v, X): + """Fit the estimator to data.""" k, p = X.shape[:2] _unit_v = np.ones_like(y) beta, inv_cov = weighted_least_squares(y, _unit_v, X, return_cov=True) @@ -252,24 +277,28 @@ class VarianceBasedLikelihoodEstimator(BaseEstimator): Iteratively estimates the between-subject variance tau^2 and fixed effect coefficients using the specified likelihood-based estimator (ML or REML). - Args: - method (str, optional): The estimation method to use. Either 'ML' (for - maximum-likelihood) or 'REML' (restricted maximum-likelihood). - Defaults to 'ML'. - kwargs (dict, optional): Keyword arguments to pass to the SciPy - minimizer. - - Notes: - The ML and REML solutions are obtained via SciPy's scalar function - minimizer (scipy.optimize.minimize). Parameters to minimize() can be - passed in as keyword arguments. - - References: - DerSimonian, R., & Laird, N. (1986). Meta-analysis in clinical trials. - Controlled clinical trials, 7(3), 177-188. - Kosmidis, I., Guolo, A., & Varin, C. (2017). Improving the accuracy of - likelihood-based inference in meta-analysis and meta-regression. - Biometrika, 104(2), 489–496. https://doi.org/10.1093/biomet/asx001 + Parameters + ---------- + method : {"ML", "REML"}, optional + The estimation method to use. Either 'ML' (for + maximum-likelihood) or 'REML' (restricted maximum-likelihood). + Defaults to 'ML'. + **kwargs + Keyword arguments to pass to the SciPy minimizer. + + Notes + ----- + The ML and REML solutions are obtained via SciPy's scalar function + minimizer (:func:`scipy.optimize.minimize`). Parameters to ``minimize()`` can be + passed in as keyword arguments. + + References + ---------- + DerSimonian, R., & Laird, N. (1986). Meta-analysis in clinical trials. + Controlled clinical trials, 7(3), 177-188. + Kosmidis, I., Guolo, A., & Varin, C. (2017). Improving the accuracy of + likelihood-based inference in meta-analysis and meta-regression. + Biometrika, 104(2), 489-496. https://doi.org/10.1093/biomet/asx001 """ def __init__(self, method="ml", **kwargs): @@ -281,6 +310,7 @@ def __init__(self, method="ml", **kwargs): @_loopable def fit(self, y, v, X): + """Fit the estimator to data.""" # use D-L estimate for initial values est_DL = DerSimonianLaird().fit(y, v, X).params_ beta = est_DL["fe_params"] @@ -319,30 +349,33 @@ def _reml_nll(self, theta, y, v, X): class SampleSizeBasedLikelihoodEstimator(BaseEstimator): - """Likelihood-based estimator for estimates with known sample sizes but - unknown sampling variances. + """Likelihood-based estimator for data with known sample sizes but unknown sampling variances. Iteratively estimates the between-subject variance tau^2 and fixed effect betas using the specified likelihood-based estimator (ML or REML). - Args: - method (str, optional): The estimation method to use. Either 'ML' (for - maximum-likelihood) or 'REML' (restricted maximum-likelihood). - Defaults to 'ML'. - kwargs (dict, optional): Keyword arguments to pass to the SciPy - minimizer. - - Notes: - Homogeneity of sigma^2 across studies is assumed. The ML and REML - solutions are obtained via SciPy's scalar function minimizer - (scipy.optimize.minimize). Parameters to minimize() can be passed in as - keyword arguments. - - References: - Sangnawakij, P., Böhning, D., Niwitpong, S. A., Adams, S., Stanton, M., - & Holling, H. (2019). Meta-analysis without study-specific variance - information: Heterogeneity case. Statistical Methods in Medical Research, - 28(1), 196–210. https://doi.org/10.1177/0962280217718867 + Parameters + ---------- + method : {"ML", "REML"}, optional + The estimation method to use. Either 'ML' (for + maximum-likelihood) or 'REML' (restricted maximum-likelihood). + Defaults to 'ML'. + **kwargs + Keyword arguments to pass to the SciPy minimizer. + + Notes + ----- + Homogeneity of sigma^2 across studies is assumed. The ML and REML + solutions are obtained via SciPy's scalar function minimizer + (scipy.optimize.minimize). Parameters to minimize() can be passed in as + keyword arguments. + + References + ---------- + Sangnawakij, P., Böhning, D., Niwitpong, S. A., Adams, S., Stanton, M., + & Holling, H. (2019). Meta-analysis without study-specific variance + information: Heterogeneity case. Statistical Methods in Medical Research, + 28(1), 196-210. https://doi.org/10.1177/0962280217718867 """ def __init__(self, method="ml", **kwargs): @@ -354,6 +387,7 @@ def __init__(self, method="ml", **kwargs): @_loopable def fit(self, y, n, X): + """Fit the estimator to data.""" if n.std() < np.sqrt(np.finfo(float).eps): raise ValueError( "Sample size-based likelihood estimator cannot " @@ -410,20 +444,24 @@ def _reml_nll(self, theta, y, n, X): class StanMetaRegression(BaseEstimator): """Bayesian meta-regression estimator using Stan. - Args: - sampling_kwargs: Optional keyword arguments to pass on to the MCMC - sampler (e.g., `iter` for number of iterations). - - Notes: - For most uses, this class should be ignored in favor of the functional - stan() estimator. The object-oriented interface is useful primarily - when fitting the meta-regression model repeatedly to different data; - the separation of .compile() and .fit() steps allows one to compile - the model only once. - - Warning: - With changes to Stan in version 3, which requires Python 3.7, this class no longer works. - We will try to fix it in the future. + Parameters + ---------- + **sampling_kwargs + Optional keyword arguments to pass on to the MCMC sampler + (e.g., `iter` for number of iterations). + + Notes + ----- + For most uses, this class should be ignored in favor of the functional + stan() estimator. The object-oriented interface is useful primarily + when fitting the meta-regression model repeatedly to different data; + the separation of .compile() and .fit() steps allows one to compile + the model only once. + + Warning + ------- + With changes to Stan in version 3, which requires Python 3.7, this class no longer works for + Python 3.7+. We will try to fix it in the future. """ _result_cls = BayesianMetaRegressionResults @@ -475,27 +513,34 @@ def compile(self): def fit(self, y, v, X, groups=None): """Run the Stan sampler and return results. - Args: - y (ndarray): 1d array of study-level estimates - v (ndarray): 1d array of study-level variances - X (ndarray): 1d or 2d array containing study-level predictors - (including intercept); has dimensions K x P, where K is the - number of studies and P is the number of predictor variables. - groups ([int], optional): 1d array of integers identifying - groups/clusters of observations in the y/v/X inputs. If - provided, values must consist of integers in the range of 1..k - (inclusive), where k is the number of distinct groups. When - None (default), it is assumed that each observation in the - inputs is a separate group. - - Returns: - A StanFit4Model object (see PyStan documentation for details). - - Notes: - This estimator supports (simple) hierarchical models. When multiple - estimates are available for at least one of the studies in `y`, the - `groups` argument can be used to specify the nesting structure - (i.e., which rows in `y`, `v`, and `X` belong to each study). + Parameters + ---------- + y : :obj:`numpy.ndarray` of shape (K,) + 1d array of study-level estimates + v : :obj:`numpy.ndarray` of shape (K,) + 1d array of study-level variances + X : :obj:`numpy.ndarray` of shape (K[, P]) + 1d or 2d array containing study-level predictors + (including intercept); has dimensions K x P, where K is the + number of studies and P is the number of predictor variables. + groups : :obj:`list` of :obj:`int`, optional + 1d array of integers identifying + groups/clusters of observations in the y/v/X inputs. If + provided, values must consist of integers in the range of 1..k + (inclusive), where k is the number of distinct groups. When + None (default), it is assumed that each observation in the + inputs is a separate group. + + Returns + ------- + A StanFit4Model object (see PyStan documentation for details). + + Notes + ----- + This estimator supports (simple) hierarchical models. When multiple + estimates are available for at least one of the studies in `y`, the + `groups` argument can be used to specify the nesting structure + (i.e., which rows in `y`, `v`, and `X` belong to each study). """ if y.ndim > 1 and y.shape[1] > 1: raise ValueError( @@ -525,6 +570,7 @@ def fit(self, y, v, X, groups=None): return self def summary(self, ci=95): + """Generate a BayesianMetaRegressionResults object from the fitted estimator.""" if self.result_ is None: name = self.__class__.__name__ raise ValueError( diff --git a/pymare/results.py b/pymare/results.py index daeb65f..6ba4017 100644 --- a/pymare/results.py +++ b/pymare/results.py @@ -10,28 +10,30 @@ try: import arviz as az -except: +except ImportError: az = None -from .stats import q_gen, q_profile +from pymare.stats import q_gen, q_profile class MetaRegressionResults: """Container for results generated by PyMARE meta-regression estimators. - Args: - estimator (`pymare.estimators.BaseEstimator`): The estimator used to - produce the results. - dataset (`pymare.Dataset`): A Dataset instance containing the inputs - to the estimator. - fe_params (NDarray): Fixed-effect coefficients. Must be a 2-d numpy - array with shape p x d, where p is the number of predictors, and - d is the number of parallel datasets (typically 1). - fe_cov (NDArray): The p x p inverse covariance (or precision) matrix - for the fixed effects. - tau2 (NDArray, float, optional): A 1-d array containing the estimated - tau^2 value for each parallel dataset (or a float, for a single - dataset). May be omitted by fixed-effects estimators. + Parameters + ---------- + estimator : :obj:`~pymare.estimators.BaseEstimator`) + The estimator used to produce the results. + dataset : :obj:`~pymare.core.Dataset`) + A Dataset instance containing the inputs to the estimator. + fe_params : :obj:`numpy.ndarray` of shape (p, d) + Fixed-effect coefficients. Must be a 2-d numpy array with shape p x d, + where p is the number of predictors, and d is the number of parallel datasets + (typically 1). + fe_cov : :obj:`numpy.ndarray` of shape (p, p) + The p x p inverse covariance (or precision) matrix for the fixed effects. + tau2 : None or :obj:`numpy.ndarray` of shape (d,) or :obj:`float`, optional + A 1-d array containing the estimated tau^2 value for each parallel dataset + (or a float, for a single dataset). May be omitted by fixed-effects estimators. """ def __init__(self, estimator, dataset, fe_params, fe_cov, tau2=None): @@ -44,12 +46,13 @@ def __init__(self, estimator, dataset, fe_params, fe_cov, tau2=None): @property @lru_cache(maxsize=1) def fe_se(self): + """Get fixed-effect standard error.""" cov = np.atleast_3d(self.fe_cov) # 3rd dim is for parallel datasets return np.sqrt(np.diagonal(cov)).T @lru_cache(maxsize=16) def get_fe_stats(self, alpha=0.05): - + """Get fixed-effect statistics.""" beta, se = self.fe_params, self.fe_se z_se = ss.norm.ppf(1 - alpha / 2) z = beta / se @@ -67,6 +70,7 @@ def get_fe_stats(self, alpha=0.05): @lru_cache(maxsize=16) def get_re_stats(self, method="QP", alpha=0.05): + """Get random-effect statistics.""" if method == "QP": n_iters = np.atleast_2d(self.tau2).shape[1] if n_iters > 10: @@ -89,7 +93,7 @@ def get_re_stats(self, method="QP", alpha=0.05): try: q_cis = q_profile(**args) - except Exception as exc: + except Exception: q_cis = {"ci_l": np.nan, "ci_u": np.nan} cis.append(q_cis) @@ -107,6 +111,7 @@ def get_re_stats(self, method="QP", alpha=0.05): @lru_cache(maxsize=16) def get_heterogeneity_stats(self): + """Get heterogeneity statistics.""" v = self.estimator.get_v(self.dataset) q_fe = q_gen(self.dataset.y, v, self.dataset.X, 0) df = self.dataset.y.shape[0] - self.dataset.X.shape[1] @@ -136,22 +141,28 @@ def to_df(self, alpha=0.05): def permutation_test(self, n_perm=1000): """Run permutation test. - Args: - n_perm (int):Number of permutations to generate. The actual number - used may be smaller in the event of an exact test (see below), - but will never be larger. + Parameters + ---------- + n_perm : :obj:`int`, optional + Number of permutations to generate. The actual number used may be smaller in the event + of an exact test (see below), but will never be larger. + Default is 1000. - Returns: + Returns + ------- + :obj:`~pymare.results.PermutationTestResults` An instance of class PermutationTestResults. - Notes: - If the number of possible permutations is smaller than n_perm, an - exact test will be conducted. Otherwise an approximate test will be - conducted by randomly shuffling the outcomes n_perm times (or, for - intercept-only models, by randomly flipping their signs). Note that - for closed-form estimators (e.g., 'DL' and 'HE'), permuted datasets - are estimated in parallel. This means that one can often set very - high n_perm values (e.g., 100k) with little performance degradation. + Notes + ----- + If the number of possible permutations is smaller than n_perm, an exact test will be + conducted. + Otherwise an approximate test will be conducted by randomly shuffling the outcomes n_perm + times (or, for intercept-only models, by randomly flipping their signs). + Note that for closed-form estimators (e.g., 'DL' and 'HE'), permuted datasets are + estimated in parallel. + This means that one can often set very high n_perm values (e.g., 100k) with little + performance degradation. """ n_obs, n_datasets = self.dataset.y.shape has_mods = self.dataset.X.shape[1] > 1 @@ -230,13 +241,16 @@ def permutation_test(self, n_perm=1000): class CombinationTestResults: """Container for results generated by p-value combination methods. - Args: - estimator (`pymare.estimators.BaseEstimator`): The estimator used to - produce the results. - dataset (`pymare.Dataset`): A Dataset instance containing the inputs - to the estimator. - z (NDArray, optional): Array of z-scores. - p (NDArray, optional): Array of right-tailed p-values. + Parameters + ---------- + estimator : :obj:`~pymare.estimators.estimators.BaseEstimator` + The estimator used to produce the results. + dataset : :obj:`~pymare.core.Dataset` + A Dataset instance containing the inputs to the estimator. + z : :obj:`numpy.ndarray`, optional + Array of z-scores. Default is None. + p : :obj:`numpy.ndarray`, optional + Array of right-tailed p-values. Default is None. """ def __init__(self, estimator, dataset, z=None, p=None): @@ -250,6 +264,7 @@ def __init__(self, estimator, dataset, z=None, p=None): @property @lru_cache(maxsize=1) def z(self): + """Z-values.""" if self._z is None: self._z = ss.norm.isf(self.p) return self._z @@ -257,6 +272,7 @@ def z(self): @property @lru_cache(maxsize=1) def p(self): + """P-values.""" if self._p is None: self._p = ss.norm.sf(self.z) return self._p @@ -264,15 +280,20 @@ def p(self): def permutation_test(self, n_perm=1000): """Run permutation test. - Args: - n_perm (int): Number of permutations to generate. The actual number - used may be smaller in the event of an exact test (see below), - but will never be larger. + Parameters + ---------- + n_perm : :obj:`int`, optional + Number of permutations to generate. The actual number used may be smaller in the event + of an exact test (see below), but will never be larger. + Default is 1000. - Returns: + Returns + ------- + :obj:`~pymare.results.PermutationTestResults` An instance of class PermutationTestResults. - Notes: + Notes + ----- If the number of possible permutations is smaller than n_perm, an exact test will be conducted. Otherwise an approximate test will be conducted by randomly shuffling the outcomes n_perm times (or, for @@ -339,12 +360,16 @@ def __init__(self, results, perm_p, n_perm, exact=False): def to_df(self, **kwargs): """Export permutation test results as a pandas DF. - Args: - kwargs: Keyword arguments to pass onto to_df() calls of parent - results class (e.g., in case of MetaRegressionResults class, - `alpha` is available). + Parameters + ---------- + **kwargs + Keyword arguments to pass onto to_df() calls of parent + results class (e.g., in case of MetaRegressionResults class, + `alpha` is available). - Returns: + Returns + ------- + :obj:`pandas.DataFrame` A pandas DataFrame that adds columns to the standard fixed effect result table based on permutation test results. A column is added for every name found in both the parent DF and the params @@ -359,20 +384,21 @@ def to_df(self, **kwargs): class BayesianMetaRegressionResults: """Container for MCMC sampling-based PyMARE meta-regression estimators. - Args: - data (`StanFit4Model` or `InferenceData`): Either a StanFit4Model - instanced returned from PyStan or an ArviZ InferenceData instance. - dataset (`pymare.Dataset`): A Dataset instance containing the inputs - to the estimator. - ci (float, optional): Desired width of highest posterior density (HPD) - interval. Defaults to 95%. + Parameters + ---------- + data : :obj:`pystan.StanFit4Model` or :obj:`arviz.InferenceData` + Either a StanFit4Model instance returned from PyStan or an ArviZ InferenceData instance. + dataset : :obj:`~pymare.core.Dataset` + A Dataset instance containing the inputs to the estimator. + ci : :obj:`float`, optional + Desired width of highest posterior density (HPD) interval. Defaults to 95.0 (95%). """ def __init__(self, data, dataset, ci=95.0): if az is None: raise ValueError( - "ArviZ package must be installed in order to work" - " with the BayesianMetaRegressionResults class." + "ArviZ package must be installed in order to work " + "with the BayesianMetaRegressionResults class." ) if data.__class__.__name__ == "StanFit4Model": data = az.from_pystan(data) @@ -383,12 +409,17 @@ def __init__(self, data, dataset, ci=95.0): def summary(self, include_theta=False, **kwargs): """Summarize the posterior estimates via ArviZ. - Args: - include_theta (bool, optional): Whether or not to include the - estimated group-level means in the summary. Defaults to False. - kwargs: Optional keyword arguments to pass onto ArviZ's summary(). - - Returns: + Parameters + ---------- + include_theta : :obj:`bool`, optional + Whether or not to include the estimated group-level means in the summary. + Defaults to False. + **kwargs + Optional keyword arguments to pass onto ArviZ's summary(). + + Returns + ------- + :obj:`pandas.DataFrame` A pandas DataFrame, unless the `fmt="xarray"` argument is passed in kwargs, in which case an xarray Dataset is returned. """ @@ -401,16 +432,19 @@ def summary(self, include_theta=False, **kwargs): def plot(self, kind="trace", **kwargs): """Generate various plots of the posterior estimates via ArviZ. - Args: - kind (str, optional): The type of ArviZ plot to generate. Can be - any named function of the form "plot_{}" in the ArviZ - namespace (e.g., 'trace', 'forest', 'posterior', etc.). - Defaults to 'trace'. - kwargs: Optional keyword arguments passed onto the corresponding - ArviZ plotting function (see ArviZ docs for details). - - Returns: - A matplotlib or bokeh object, depending on plot kind and kwargs. + Parameters + ---------- + kind : :obj:`str`, optional + The type of ArviZ plot to generate. Can be any named function of the form "plot_{}" in + the ArviZ namespace (e.g., 'trace', 'forest', 'posterior', etc.). + Defaults to 'trace'. + **kwargs + Optional keyword arguments passed onto the corresponding + ArviZ plotting function (see ArviZ docs for details). + + Returns + ------- + A matplotlib or bokeh object, depending on plot kind and kwargs. """ name = "plot_{}".format(kind) plotter = getattr(az, name) diff --git a/pymare/stats.py b/pymare/stats.py index f72537a..126ccab 100644 --- a/pymare/stats.py +++ b/pymare/stats.py @@ -6,20 +6,28 @@ def weighted_least_squares(y, v, X, tau2=0.0, return_cov=False): - """2-D weighted least squares. - - Args: - y (NDArray): 2-d array of estimates (studies x parallel datasets) - v (NDArray): 2-d array of sampling variances - X (NDArray): Fixed effect design matrix - tau2 (float): tau^2 estimate to use for weights - return_cov (bool): Whether or not to return the inverse cov matrix - - Returns: + """Perform 2-D weighted least squares. + + Parameters + ---------- + y : :obj:`numpy.ndarray` + 2-d array of estimates (studies x parallel datasets) + v : :obj:`numpy.ndarray` + 2-d array of sampling variances + X : :obj:`numpy.ndarray` + Fixed effect design matrix + tau2 : :obj:`float`, optional + tau^2 estimate to use for weights + return_cov : :obj:`bool`, optional + Whether or not to return the inverse cov matrix. + Default = False. + + Returns + ------- + params[, cov] If return_cov is True, returns both fixed parameter estimates and the inverse covariance matrix; if False, only the parameter estimates. """ - w = 1.0 / (v + tau2) # Einsum indices: k = studies, p = predictors, i = parallel iterates @@ -52,29 +60,38 @@ def ensure_2d(arr): def q_profile(y, v, X, alpha=0.05): """Get the CI for tau^2 via the Q-Profile method (Viechtbauer, 2007). - Args: - y (ndarray): 1d array of study-level estimates - v (ndarray): 1d array of study-level variances - X (ndarray): 1d or 2d array containing study-level predictors - (including intercept); has dimensions K x P, where K is the number - of studies and P is the number of predictor variables. - alpha (float, optional): alpha value defining the coverage of the CIs, - where width(CI) = 1 - alpha. Defaults to 0.05. - - Returns: + Parameters + ---------- + y : :obj:`numpy.ndarray` of shape (K,) + 1d array of study-level estimates + v : :obj:`numpy.ndarray` of shape (K,) + 1d array of study-level variances + X : :obj:`numpy.ndarray` of shape (K[, P]) + 1d or 2d array containing study-level predictors + (including intercept); has dimensions K x P, where K is the number + of studies and P is the number of predictor variables. + alpha : :obj:`float`, optional + alpha value defining the coverage of the CIs, + where width(CI) = 1 - alpha. Defaults to 0.05. + + Returns + ------- + :obj:`dict` A dictionary with keys 'ci_l' and 'ci_u', corresponding to the lower and upper bounds of the tau^2 confidence interval, respectively. - Notes: - Following the Viechtbauer implementation, this method returns the - interval that gives an equal probability mass at both tails (i.e., - P(tau^2 <= lower_bound) == P(tau^2 >= upper_bound) == alpha/2), and - *not* the smallest possible range of tau^2 values that provides the - desired coverage. - - References: - Viechtbauer, W. (2007). Confidence intervals for the amount of - heterogeneity in meta-analysis. Statistics in Medicine, 26(1), 37-52. + Notes + ----- + Following the Viechtbauer implementation, this method returns the + interval that gives an equal probability mass at both tails (i.e., + P(tau^2 <= lower_bound) == P(tau^2 >= upper_bound) == alpha/2), and + *not* the smallest possible range of tau^2 values that provides the + desired coverage. + + References + ---------- + Viechtbauer, W. (2007). Confidence intervals for the amount of + heterogeneity in meta-analysis. Statistics in Medicine, 26(1), 37-52. """ k, p = X.shape df = k - p @@ -95,24 +112,32 @@ def q_profile(y, v, X, alpha=0.05): def q_gen(y, v, X, tau2): - """Generalized form of Cochran's Q-statistic. - - Args: - y (ndarray): 1d array of study-level estimates - v (ndarray): 1d array of study-level variances - X (ndarray): 1d or 2d array containing study-level predictors - (including intercept); has dimensions K x P, where K is the number - of studies and P is the number of predictor variables. - tau2 (float): Between-study variance. Must be >= 0. - - Returns: + """Calculate a generalized form of Cochran's Q-statistic. + + Parameters + ---------- + y : :obj:`numpy.ndarray` + 1d array of study-level estimates + v : :obj:`numpy.ndarray` + 1d array of study-level variances + X : :obj:`numpy.ndarray` + 1d or 2d array containing study-level predictors + (including intercept); has dimensions K x P, where K is the number + of studies and P is the number of predictor variables. + tau2 : :obj:`float` + Between-study variance. Must be >= 0. + + Returns + ------- + :obj:`float` A float giving the value of Cochran's Q-statistic. - References: + References + ---------- Veroniki, A. A., Jackson, D., Viechtbauer, W., Bender, R., Bowden, J., Knapp, G., Kuss, O., Higgins, J. P., Langan, D., & Salanti, G. (2016). Methods to estimate the between-study variance and its uncertainty in - meta-analysis. Research synthesis methods, 7(1), 55–79. + meta-analysis. Research synthesis methods, 7(1), 55-79. https://doi.org/10.1002/jrsm.1164 """ if np.any(tau2 < 0): diff --git a/pymare/tests/conftest.py b/pymare/tests/conftest.py index 45edbdb..6a3e988 100644 --- a/pymare/tests/conftest.py +++ b/pymare/tests/conftest.py @@ -1,11 +1,13 @@ -import pytest +"""Data for tests.""" import numpy as np +import pytest from pymare import Dataset @pytest.fixture(scope="package") def variables(): + """Build basic numpy variables.""" y = np.array([[-1, 0.5, 0.5, 0.5, 1, 1, 2, 10]]).T v = np.array([[1, 1, 2.4, 0.5, 1, 1, 1.2, 1.5]]).T X = np.array([1, 1, 2, 2, 4, 4, 2.8, 2.8]) @@ -14,11 +16,13 @@ def variables(): @pytest.fixture(scope="package") def dataset(variables): + """Build a Dataset compiled from the variables fixture.""" return Dataset(*variables, X_names=["my_covariate"]) @pytest.fixture(scope="package") def small_dataset_2d(variables): + """Build a small Dataset with 2D data.""" y = np.array([[1.5, 1.9, 2.2], [4, 2, 1]]).T v = np.array([[1, 0.8, 3], [1, 1.5, 1]]).T return Dataset(y, v) @@ -26,6 +30,7 @@ def small_dataset_2d(variables): @pytest.fixture(scope="package") def dataset_2d(variables): + """Build a larger Dataset with 2D data.""" y, v, X = variables y = np.repeat(y, 3, axis=1) y[:, 1] = np.random.randint(-10, 10, size=len(y)) @@ -36,6 +41,7 @@ def dataset_2d(variables): @pytest.fixture(scope="package") def dataset_n(): + """Build a Dataset with sample sizes, but no variances.""" y = np.array([[-3.0, -0.5, 0.0, -5.01, 0.35, -2.0, -6.0, -4.0, -4.3, -0.1, -1.0]]).T n = ( np.array( @@ -48,6 +54,7 @@ def dataset_n(): @pytest.fixture(scope="package") def vars_with_intercept(): + """Build basic numpy variables with intercepts included in the design matrix.""" y = np.array([[-1, 0.5, 0.5, 0.5, 1, 1, 2, 10]]).T v = np.array([[1, 1, 2.4, 0.5, 1, 1, 1.2, 1.5]]).T X = np.array([np.ones(8), [1, 1, 2, 2, 4, 4, 2.8, 2.8]]).T diff --git a/pymare/tests/test_combination_tests.py b/pymare/tests/test_combination_tests.py index 7d8367d..feacbc9 100644 --- a/pymare/tests/test_combination_tests.py +++ b/pymare/tests/test_combination_tests.py @@ -1,10 +1,10 @@ +"""Tests for pymare.estimators.combination.""" import numpy as np -import scipy.stats as ss import pytest +import scipy.stats as ss -from pymare.estimators import StoufferCombinationTest, FisherCombinationTest from pymare import Dataset - +from pymare.estimators import FisherCombinationTest, StoufferCombinationTest _z1 = np.array([2.1, 0.7, -0.2, 4.1, 3.8])[:, None] _z2 = np.c_[_z1, np.array([-0.6, -1.61, -2.3, -0.8, -4.01])[:, None]] @@ -27,6 +27,7 @@ @pytest.mark.parametrize("Cls,data,mode,expected", _params) def test_combination_test(Cls, data, mode, expected): + """Test CombinationTest Estimators with numpy data.""" results = Cls(mode).fit(data).params_ z = ss.norm.isf(results["p"]) assert np.allclose(z, expected, atol=1e-5) @@ -34,6 +35,7 @@ def test_combination_test(Cls, data, mode, expected): @pytest.mark.parametrize("Cls,data,mode,expected", _params) def test_combination_test_from_dataset(Cls, data, mode, expected): + """Test CombinationTest Estimators with PyMARE Datasets.""" dset = Dataset(y=data) est = Cls(mode).fit_dataset(dset) results = est.summary() diff --git a/pymare/tests/test_core.py b/pymare/tests/test_core.py index 3bff35d..0757f5f 100644 --- a/pymare/tests/test_core.py +++ b/pymare/tests/test_core.py @@ -1,3 +1,4 @@ +"""Tests for pymare.core.""" import numpy as np import pandas as pd @@ -5,6 +6,7 @@ def test_dataset_init(variables): + """Test Dataset creation from numpy arrays.""" dataset = Dataset(*variables, X_names=["bork"]) n = len(variables[0]) @@ -17,6 +19,7 @@ def test_dataset_init(variables): def test_dataset_init_from_df(variables): + """Test Dataset creation from a DataFrame.""" df = pd.DataFrame({"y": [2, 4, 6], "v_alt": [100, 100, 100], "X1": [5, 2, 1], "X7": [9, 8, 7]}) dataset = Dataset(v="v_alt", X=["X1", "X7"], data=df) assert dataset.X.shape == (3, 3) @@ -26,6 +29,7 @@ def test_dataset_init_from_df(variables): def test_meta_regression_1(variables): + """Test meta_regression function.""" results = meta_regression(*variables, X_names=["my_cov"], method="REML") beta, tau2 = results.fe_params, results.tau2 assert np.allclose(beta.ravel(), [-0.1066, 0.7700], atol=1e-4) @@ -35,6 +39,7 @@ def test_meta_regression_1(variables): def test_meta_regression_2(dataset_n): + """Test meta_regression function.""" y, n = dataset_n.y, dataset_n.n df = meta_regression(y=y, n=n).to_df() assert df.shape == (1, 7) diff --git a/pymare/tests/test_effect_sizes.py b/pymare/tests/test_effectsize_base.py similarity index 90% rename from pymare/tests/test_effect_sizes.py rename to pymare/tests/test_effectsize_base.py index 714607c..45e5f05 100644 --- a/pymare/tests/test_effect_sizes.py +++ b/pymare/tests/test_effectsize_base.py @@ -1,19 +1,19 @@ -import pytest +"""Tests for pymare.effectsize.base.""" import numpy as np import pandas as pd +import pytest +from pymare import Dataset from pymare.effectsize import ( OneSampleEffectSizeConverter, - solve_system, - select_expressions, - compute_measure, TwoSampleEffectSizeConverter, + compute_measure, ) -from pymare import Dataset @pytest.fixture(scope="module") def one_samp_data(): + """Create one-sample data for tests.""" return { "m": np.array([7, 5, 4]), "sd": np.sqrt(np.array([4.2, 1.2, 1.9])), @@ -24,6 +24,7 @@ def one_samp_data(): @pytest.fixture(scope="module") def two_samp_data(): + """Create two-sample data for tests.""" return { "m1": np.array([4, 2]), "sd1": np.sqrt(np.array([1, 9])), @@ -35,6 +36,7 @@ def two_samp_data(): def test_EffectSizeConverter_smoke_test(two_samp_data): + """Perform a smoke test on the effect size converters.""" data = two_samp_data esc = OneSampleEffectSizeConverter(m=data["m1"], sd=data["sd1"], n=data["n1"]) assert set(esc.known_vars.keys()) == {"m", "sd", "n"} @@ -48,6 +50,7 @@ def test_EffectSizeConverter_smoke_test(two_samp_data): def test_esc_implicit_dtype_conversion(): + """Test effect size conversion with implicit datatype conversion.""" esc = OneSampleEffectSizeConverter(m=[10, 12, 18]) assert "m" in esc.known_vars assert isinstance(esc.known_vars["m"], np.ndarray) @@ -55,12 +58,14 @@ def test_esc_implicit_dtype_conversion(): def test_EffectSizeConverter_from_df(two_samp_data): + """Test effect size conversion from a DataFrame.""" df = pd.DataFrame(two_samp_data) esc = TwoSampleEffectSizeConverter(df) assert np.allclose(esc.get_smd(), np.array([-0.61065, -0.13707]), atol=1e-5) def test_EffectSizeConverter_to_dataset(two_samp_data): + """Test conversion of effect-size converter outputs to DataFrame.""" esc = TwoSampleEffectSizeConverter(**two_samp_data) X = np.array([1, 2]) dataset = esc.to_dataset(X=X, X_names=["dummy"]) @@ -69,6 +74,7 @@ def test_EffectSizeConverter_to_dataset(two_samp_data): def test_2d_array_conversion(): + """Test conversion of 2D data.""" shape = (10, 2) data = { "m": np.random.randint(10, size=shape), @@ -88,6 +94,7 @@ def test_2d_array_conversion(): def test_convert_r_and_n_to_rz(): + """Test Fisher's R-to-Z transform.""" r = [0.2, 0.16, 0.6] n = (68, 165, 17) esc = OneSampleEffectSizeConverter(r=r) @@ -106,6 +113,7 @@ def test_convert_r_and_n_to_rz(): def test_convert_r_to_itself(): + """Test r-to-r conversion.""" r = np.array([0.2, 0.16, 0.6]) n = np.array((68, 165, 17)) esc = OneSampleEffectSizeConverter(r=r) @@ -124,6 +132,7 @@ def test_convert_r_to_itself(): def test_compute_measure(one_samp_data, two_samp_data): + """Test the compute_measure function.""" # Default args base_result = compute_measure("SM", **one_samp_data) assert isinstance(base_result, tuple) @@ -139,7 +148,7 @@ def test_compute_measure(one_samp_data, two_samp_data): compute_measure("SM", comparison=2, **one_samp_data) # Ambiguous comparison type - with pytest.raises(ValueError, match="Requested measure \(D\)"): + with pytest.raises(ValueError, match=r"Requested measure \(D\)"): compute_measure("D", **one_samp_data, **two_samp_data) # Works with explicit comparison type: check for both comparison types diff --git a/pymare/tests/test_expressions.py b/pymare/tests/test_effectsize_expressions.py similarity index 91% rename from pymare/tests/test_expressions.py rename to pymare/tests/test_effectsize_expressions.py index c5fcc7c..b28a675 100644 --- a/pymare/tests/test_expressions.py +++ b/pymare/tests/test_effectsize_expressions.py @@ -1,6 +1,7 @@ +"""Tests for pymare.effectsize.expressions.""" import pytest -from sympy.core.sympify import SympifyError from sympy import Symbol +from sympy.core.sympify import SympifyError from pymare.effectsize.expressions import Expression, select_expressions @@ -10,6 +11,7 @@ def _symbol_set(*args): def test_Expression_init(): + """Test Expression initialization.""" # Fails because SymPy can't parse expression with pytest.raises(SympifyError): Expression('This isn"t 29 a valid + expre55!on!') @@ -26,6 +28,7 @@ def test_Expression_init(): def test_select_expressions(): + """Test select_expressions function.""" exps = select_expressions("sd", {"d", "m"}) assert len(exps) == 1 assert exps[0].symbols == _symbol_set("sd", "d", "m") diff --git a/pymare/tests/test_estimators.py b/pymare/tests/test_estimators.py index 1982cc8..c4531f2 100644 --- a/pymare/tests/test_estimators.py +++ b/pymare/tests/test_estimators.py @@ -1,17 +1,19 @@ +"""Tests for pymare.estimators.estimators.""" import numpy as np import pytest + +from pymare import Dataset from pymare.estimators import ( - WeightedLeastSquares, DerSimonianLaird, - VarianceBasedLikelihoodEstimator, - SampleSizeBasedLikelihoodEstimator, - StanMetaRegression, Hedges, + SampleSizeBasedLikelihoodEstimator, + VarianceBasedLikelihoodEstimator, + WeightedLeastSquares, ) -from pymare import Dataset def test_weighted_least_squares_estimator(dataset): + """Test WeightedLeastSquares estimator.""" # ground truth values are from metafor package in R est = WeightedLeastSquares().fit_dataset(dataset) results = est.summary() @@ -28,6 +30,7 @@ def test_weighted_least_squares_estimator(dataset): def test_dersimonian_laird_estimator(dataset): + """Test DerSimonianLaird estimator.""" # ground truth values are from metafor package in R est = DerSimonianLaird().fit_dataset(dataset) results = est.summary() @@ -37,6 +40,7 @@ def test_dersimonian_laird_estimator(dataset): def test_2d_DL_estimator(dataset_2d): + """Test DerSimonianLaird estimator on 2D Dataset.""" results = DerSimonianLaird().fit_dataset(dataset_2d).summary() beta, tau2 = results.fe_params, results.tau2 assert beta.shape == (2, 3) @@ -53,6 +57,7 @@ def test_2d_DL_estimator(dataset_2d): def test_hedges_estimator(dataset): + """Test Hedges estimator.""" # ground truth values are from metafor package in R, except that metafor # always gives negligibly different values for tau2, likely due to # algorithmic differences in the computation. @@ -64,6 +69,7 @@ def test_hedges_estimator(dataset): def test_2d_hedges(dataset_2d): + """Test Hedges estimator on 2D Dataset.""" results = Hedges().fit_dataset(dataset_2d).summary() beta, tau2 = results.fe_params, results.tau2 assert beta.shape == (2, 3) @@ -80,6 +86,7 @@ def test_2d_hedges(dataset_2d): def test_variance_based_maximum_likelihood_estimator(dataset): + """Test VarianceBasedLikelihoodEstimator estimator.""" # ground truth values are from metafor package in R est = VarianceBasedLikelihoodEstimator(method="ML").fit_dataset(dataset) results = est.summary() @@ -89,6 +96,7 @@ def test_variance_based_maximum_likelihood_estimator(dataset): def test_variance_based_restricted_maximum_likelihood_estimator(dataset): + """Test VarianceBasedLikelihoodEstimator estimator with REML.""" # ground truth values are from metafor package in R est = VarianceBasedLikelihoodEstimator(method="REML").fit_dataset(dataset) results = est.summary() @@ -98,6 +106,7 @@ def test_variance_based_restricted_maximum_likelihood_estimator(dataset): def test_sample_size_based_maximum_likelihood_estimator(dataset_n): + """Test SampleSizeBasedLikelihoodEstimator estimator.""" # test values have not been verified for convergence with other packages est = SampleSizeBasedLikelihoodEstimator(method="ML").fit_dataset(dataset_n) results = est.summary() @@ -110,6 +119,7 @@ def test_sample_size_based_maximum_likelihood_estimator(dataset_n): def test_sample_size_based_restricted_maximum_likelihood_estimator(dataset_n): + """Test SampleSizeBasedLikelihoodEstimator REML estimator.""" # test values have not been verified for convergence with other packages est = SampleSizeBasedLikelihoodEstimator(method="REML").fit_dataset(dataset_n) results = est.summary() @@ -122,6 +132,7 @@ def test_sample_size_based_restricted_maximum_likelihood_estimator(dataset_n): def test_2d_looping(dataset_2d): + """Test 2D looping in estimators.""" est = VarianceBasedLikelihoodEstimator().fit_dataset(dataset_2d) results = est.summary() beta, tau2 = results.fe_params, results.tau2 @@ -138,6 +149,7 @@ def test_2d_looping(dataset_2d): def test_2d_loop_warning(dataset_2d): + """Test 2D looping warning on certain estimators.""" est = VarianceBasedLikelihoodEstimator() y = np.random.normal(size=(10, 100)) v = np.random.randint(1, 50, size=(10, 100)) diff --git a/pymare/tests/test_results.py b/pymare/tests/test_results.py index 9f5b3cf..28aaacc 100644 --- a/pymare/tests/test_results.py +++ b/pymare/tests/test_results.py @@ -1,42 +1,40 @@ -import pytest +"""Tests for pymare.results.""" import numpy as np +import pytest from pymare import Dataset -from pymare.results import ( - MetaRegressionResults, - CombinationTestResults, - BayesianMetaRegressionResults, -) from pymare.estimators import ( - WeightedLeastSquares, DerSimonianLaird, - VarianceBasedLikelihoodEstimator, SampleSizeBasedLikelihoodEstimator, - StanMetaRegression, - Hedges, StoufferCombinationTest, - FisherCombinationTest, + VarianceBasedLikelihoodEstimator, + WeightedLeastSquares, ) +from pymare.results import CombinationTestResults, MetaRegressionResults @pytest.fixture def fitted_estimator(dataset): + """Create a fitted Estimator as a fixture.""" est = DerSimonianLaird() return est.fit_dataset(dataset) @pytest.fixture def results(fitted_estimator): + """Create a results object as a fixture.""" return fitted_estimator.summary() @pytest.fixture def results_2d(fitted_estimator, dataset_2d): + """Create a 2D results object as a fixture.""" est = VarianceBasedLikelihoodEstimator() return est.fit_dataset(dataset_2d).summary() def test_meta_regression_results_init_1d(fitted_estimator): + """Test MetaRegressionResults from 1D data.""" est = fitted_estimator results = MetaRegressionResults( est, est.dataset_, est.params_["fe_params"], est.params_["inv_cov"], est.params_["tau2"] @@ -48,6 +46,7 @@ def test_meta_regression_results_init_1d(fitted_estimator): def test_meta_regression_results_init_2d(results_2d): + """Test MetaRegressionResults from 2D data.""" assert isinstance(results_2d, MetaRegressionResults) assert results_2d.fe_params.shape == (2, 3) assert results_2d.fe_cov.shape == (2, 2, 3) @@ -55,6 +54,7 @@ def test_meta_regression_results_init_2d(results_2d): def test_mrr_fe_se(results, results_2d): + """Test MetaRegressionResults fixed-effect standard error estimates.""" se_1d, se_2d = results.fe_se, results_2d.fe_se assert se_1d.shape == (2, 1) assert se_2d.shape == (2, 3) @@ -63,6 +63,7 @@ def test_mrr_fe_se(results, results_2d): def test_mrr_get_fe_stats(results): + """Test MetaRegressionResults.get_fe_stats.""" stats = results.get_fe_stats() assert isinstance(stats, dict) assert set(stats.keys()) == {"est", "se", "ci_l", "ci_u", "z", "p"} @@ -71,6 +72,7 @@ def test_mrr_get_fe_stats(results): def test_mrr_get_re_stats(results_2d): + """Test MetaRegressionResults.get_re_stats.""" stats = results_2d.get_re_stats() assert isinstance(stats, dict) assert set(stats.keys()) == {"tau^2", "ci_l", "ci_u"} @@ -82,6 +84,7 @@ def test_mrr_get_re_stats(results_2d): def test_mrr_get_heterogeneity_stats(results_2d): + """Test MetaRegressionResults.get_heterogeneity_stats.""" stats = results_2d.get_heterogeneity_stats() assert len(stats["Q"] == 3) assert round(stats["Q"][2], 4) == 53.8052 @@ -91,6 +94,7 @@ def test_mrr_get_heterogeneity_stats(results_2d): def test_mrr_to_df(results): + """Test conversion of MetaRegressionResults to DataFrame.""" df = results.to_df() assert df.shape == (2, 7) col_names = {"estimate", "p-value", "z-score", "ci_0.025", "ci_0.975", "se", "name"} @@ -99,10 +103,11 @@ def test_mrr_to_df(results): def test_estimator_summary(dataset): + """Test Estimator's summary method.""" est = WeightedLeastSquares() # Fails if we haven't fitted yet with pytest.raises(ValueError): - results = est.summary() + est.summary() est.fit_dataset(dataset) summary = est.summary() @@ -110,6 +115,7 @@ def test_estimator_summary(dataset): def test_exact_perm_test_2d_no_mods(small_dataset_2d): + """Test the exact permutation test on 2D data.""" results = DerSimonianLaird().fit_dataset(small_dataset_2d).summary() pmr = results.permutation_test(1000) assert pmr.n_perm == 8 @@ -120,6 +126,7 @@ def test_exact_perm_test_2d_no_mods(small_dataset_2d): def test_approx_perm_test_1d_with_mods(results): + """Test the approximate permutation test on 2D data.""" pmr = results.permutation_test(1000) assert pmr.n_perm == 1000 assert not pmr.exact @@ -129,6 +136,7 @@ def test_approx_perm_test_1d_with_mods(results): def test_exact_perm_test_1d_no_mods(): + """Test the exact permutation test on 1D data.""" dataset = Dataset([1, 1, 2, 1.3], [1.5, 1, 2, 4]) results = DerSimonianLaird().fit_dataset(dataset).summary() pmr = results.permutation_test(867) @@ -140,6 +148,7 @@ def test_exact_perm_test_1d_no_mods(): def test_approx_perm_test_with_n_based_estimator(dataset_n): + """Test the approximate permutation test on an sample size-based Estimator.""" results = SampleSizeBasedLikelihoodEstimator().fit_dataset(dataset_n).summary() pmr = results.permutation_test(100) assert pmr.n_perm == 100 @@ -150,6 +159,7 @@ def test_approx_perm_test_with_n_based_estimator(dataset_n): def test_stouffers_perm_test_exact(): + """Test the exact permutation test on Stouffers Estimator.""" dataset = Dataset([1, 1, 2, 1.3], [1.5, 1, 2, 4]) results = StoufferCombinationTest().fit_dataset(dataset).summary() pmr = results.permutation_test(2000) @@ -161,6 +171,7 @@ def test_stouffers_perm_test_exact(): def test_stouffers_perm_test_approx(): + """Test the approximate permutation test on Stouffers Estimator.""" y = [2.8, -0.2, -1, 4.5, 1.9, 2.38, 0.6, 1.88, -0.4, 1.5, 3.163, 0.7] dataset = Dataset(y) results = StoufferCombinationTest().fit_dataset(dataset).summary() diff --git a/pymare/tests/test_stan_estimators.py b/pymare/tests/test_stan_estimators.py index 2f99a95..c685ecd 100644 --- a/pymare/tests/test_stan_estimators.py +++ b/pymare/tests/test_stan_estimators.py @@ -6,6 +6,7 @@ @pytest.mark.skip(reason="StanMetaRegression won't work with Python 3.7+.") def test_stan_estimator(dataset): + """Run smoke test for StanMetaRegression.""" # no ground truth here, so we use sanity checks and rough bounds est = StanMetaRegression(iter=3000).fit_dataset(dataset) results = est.summary() @@ -19,6 +20,7 @@ def test_stan_estimator(dataset): @pytest.mark.skip(reason="StanMetaRegression won't work with Python 3.7+.") def test_stan_2d_input_failure(dataset_2d): + """Run smoke test for StanMetaRegression on 2D data.""" with pytest.raises(ValueError) as exc: StanMetaRegression(iter=500).fit_dataset(dataset_2d) assert str(exc.value).startswith("The StanMetaRegression") diff --git a/pymare/tests/test_stats.py b/pymare/tests/test_stats.py index da059b1..6815e29 100644 --- a/pymare/tests/test_stats.py +++ b/pymare/tests/test_stats.py @@ -1,12 +1,15 @@ +"""Tests for pymare.stats.""" from pymare.stats import q_gen, q_profile def test_q_gen(vars_with_intercept): + """Test pymare.stats.q_gen.""" result = q_gen(*vars_with_intercept, 8) assert round(result[0], 4) == 8.0161 def test_q_profile(vars_with_intercept): + """Test pymare.stats.q_profile.""" bounds = q_profile(*vars_with_intercept, 0.05) assert set(bounds.keys()) == {"ci_l", "ci_u"} assert round(bounds["ci_l"], 4) == 3.8076