From 0a9984060b912c241633d74c9d5a91779a0a71f5 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 8 Jun 2022 16:17:35 -0400 Subject: [PATCH] Enhance Dataset example (#100) * Update plot_create_dataset.py * Enhance datasets example. * Replace "datasets" with "dependent variables". --- examples/01_basic_io/plot_create_dataset.py | 44 ++++++++++++++++++- .../plot_run_meta-analysis.py | 15 +++++++ 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/examples/01_basic_io/plot_create_dataset.py b/examples/01_basic_io/plot_create_dataset.py index 3413c36..880f8f0 100644 --- a/examples/01_basic_io/plot_create_dataset.py +++ b/examples/01_basic_io/plot_create_dataset.py @@ -22,10 +22,24 @@ ############################################################################### # Datasets can be created from arrays # ----------------------------------- +# The simplest way to create a dataset is to pass in arguments as numpy arrays. +# +# ``y`` refers to the study-level estimates, ``v`` to the variances, +# ``X`` to any study-level regressors, and ``n`` to the sample sizes. +# +# Not all Estimators require all of these arguments, so not all need to be +# used in a given Dataset. +y = [2, 4, 6] v = [100, 100, 100] X = [[5, 9], [2, 8], [1, 7]] -y = [2, 4, 6] -dataset = core.Dataset(y=y, v=v, X=X) + +dataset = core.Dataset(y=y, v=v, X=X, X_names=["X1", "X7"]) + +pprint(vars(dataset)) + +############################################################################### +# Datasets have the :meth:`~pymare.core.Dataset.to_df` method. +dataset.to_df() ############################################################################### # Datasets can also be created from pandas DataFrames @@ -38,6 +52,32 @@ "X7": [9, 8, 7], } ) + dataset = core.Dataset(v="v_alt", X=["X1", "X7"], data=df, add_intercept=False) pprint(vars(dataset)) + +############################################################################### +# Datasets can also contain multiple dependent variables +# ------------------------------------------------------ +# These variables are analyzed in parallel, but as unrelated variables, +# rather than as potentially correlated ones. +# +# This is particularly useful for image-based neuroimaging meta-analyses. +# For more information about this, see `NiMARE `_. +y = [ + [2, 4, 6], # Estimates for first study's three outcome variables. + [3, 2, 1], # Estimates for second study's three outcome variables. +] +v = [ + [100, 100, 100], # Estimate variances for first study's three outcome variables. + [8, 4, 2], # Estimate variances for second study's three outcome variables. +] +X = [ + [5, 9], # Predictors for first study. Same across all three outcome variables. + [2, 8], # Predictors for second study. Same across all three outcome variables. +] + +dataset = core.Dataset(y=y, v=v, X=X, X_names=["X1", "X7"]) + +pprint(vars(dataset)) diff --git a/examples/02_meta-analysis/plot_run_meta-analysis.py b/examples/02_meta-analysis/plot_run_meta-analysis.py index ecde5a6..aa3b71d 100644 --- a/examples/02_meta-analysis/plot_run_meta-analysis.py +++ b/examples/02_meta-analysis/plot_run_meta-analysis.py @@ -37,6 +37,21 @@ # :meth:`~pymare.estimators.estimators.BaseEstimator.fit_dataset` to fit it to # a :class:`~pymare.core.Dataset`. # +# .. tip:: +# We generally recommend using +# :meth:`~pymare.estimators.estimators.BaseEstimator.fit_dataset` over +# :meth:`~pymare.estimators.estimators.BaseEstimator.fit`. +# +# There are a number of methods, such as +# :meth:`~pymare.results.MetaRegressionResults.get_heterogeneity_stats` and +# :meth:`~pymare.results.MetaRegressionResults.permutation_test`, +# which only work when the Estimator is fitted to a Dataset. +# +# However, :meth:`~pymare.estimators.estimators.BaseEstimator.fit` requires +# less memory than :meth:`~pymare.estimators.estimators.BaseEstimator.fit_dataset`, +# so it can be useful for large-scale meta-analyses, +# such as neuroimaging image-based meta-analyses. +# # The :meth:`~pymare.estimators.estimators.BaseEstimator.summary` function # will return a :class:`~pymare.results.MetaRegressionResults` object, # which contains the results of the analysis.