-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make Multi closure internal loader data class #2111
base: master
Are you sure you want to change the base?
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,9 +7,11 @@ | |
``multiclosure_output.py`` | ||
|
||
""" | ||
|
||
import numpy as np | ||
import scipy.linalg as la | ||
import scipy.special as special | ||
import dataclasses | ||
|
||
from reportengine import collect | ||
from validphys.calcutils import calc_chi2 | ||
|
@@ -30,6 +32,31 @@ | |
SAMPLING_INTERVAL = 5 | ||
|
||
|
||
@dataclasses.dataclass(frozen=True) | ||
class MulticlosureInternalLoader: | ||
""" | ||
Parameters | ||
---------- | ||
closures_th: list | ||
list containing validphys.results.ThPredictionsResult objects | ||
for each fit | ||
|
||
law_th: ThPredictionsResult object | ||
underlying law theory predictions | ||
|
||
covmat: np.array | ||
experimental t0 covariance matrix | ||
|
||
sqrt_covmat: np.array | ||
cholesky decomposed covariance matrix | ||
""" | ||
|
||
closures_th: list | ||
law_th: ThPredictionsResult | ||
covmat: np.array | ||
sqrt_covmat: np.array | ||
|
||
|
||
# TODO: deprecate this at some point | ||
@check_fits_underlying_law_match | ||
@check_fits_areclosures | ||
|
@@ -89,12 +116,11 @@ def internal_multiclosure_dataset_loader( | |
|
||
sqrt_covmat = la.cholesky(t0_covmat_from_systematics, lower=True) | ||
# TODO: support covmat reg and theory covariance matrix | ||
# possibly make this a named tuple | ||
return ( | ||
fits_dataset_predictions, | ||
fits_underlying_predictions, | ||
t0_covmat_from_systematics, | ||
sqrt_covmat, | ||
return MulticlosureInternalLoader( | ||
closures_th=fits_dataset_predictions, | ||
law_th=fits_underlying_predictions, | ||
covmat=t0_covmat_from_systematics, | ||
sqrt_covmat=sqrt_covmat, | ||
) | ||
|
||
|
||
|
@@ -114,9 +140,7 @@ def internal_multiclosure_data_loader( | |
|
||
@check_multifit_replicas | ||
def fits_dataset_bias_variance( | ||
internal_multiclosure_dataset_loader, | ||
_internal_max_reps=None, | ||
_internal_min_reps=20, | ||
internal_multiclosure_dataset_loader, _internal_max_reps=None, _internal_min_reps=20 | ||
): | ||
"""For a single dataset, calculate the bias and variance for each fit | ||
and return tuple (bias, variance, n_data), where bias and variance are | ||
|
@@ -132,7 +156,10 @@ def fits_dataset_bias_variance( | |
``_internal_max_reps``. | ||
|
||
""" | ||
closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader | ||
closures_th = internal_multiclosure_dataset_loader.closures_th | ||
law_th = internal_multiclosure_dataset_loader.law_th | ||
sqrtcov = internal_multiclosure_dataset_loader.sqrt_covmat | ||
|
||
# The dimentions here are (fit, data point, replica) | ||
reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th]) | ||
# take mean across replicas - since we might have changed no. of reps | ||
|
@@ -159,9 +186,7 @@ def expected_dataset_bias_variance(fits_dataset_bias_variance): | |
|
||
@check_multifit_replicas | ||
def fits_data_bias_variance( | ||
internal_multiclosure_data_loader, | ||
_internal_max_reps=None, | ||
_internal_min_reps=20, | ||
internal_multiclosure_data_loader, _internal_max_reps=None, _internal_min_reps=20 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here |
||
): | ||
"""Like `fits_dataset_bias_variance` but for all data""" | ||
return fits_dataset_bias_variance( | ||
|
@@ -212,7 +237,10 @@ def dataset_replica_and_central_diff(internal_multiclosure_dataset_loader, diago | |
default behaviour. | ||
|
||
""" | ||
closures_th, law_th, covmat, _ = internal_multiclosure_dataset_loader | ||
closures_th = internal_multiclosure_dataset_loader.closures_th | ||
law_th = internal_multiclosure_dataset_loader.law_th | ||
covmat = internal_multiclosure_dataset_loader.covmat | ||
|
||
replicas = np.asarray([th.error_members for th in closures_th]) | ||
centrals = np.mean(replicas, axis=-1) | ||
underlying = law_th.central_value | ||
|
@@ -309,11 +337,7 @@ def n_replica_samples(fits_pdf, _internal_max_reps=None, _internal_min_reps=20): | |
_internal_max_reps. | ||
""" | ||
return list( | ||
range( | ||
_internal_min_reps, | ||
_internal_max_reps + SAMPLING_INTERVAL, | ||
SAMPLING_INTERVAL, | ||
) | ||
range(_internal_min_reps, _internal_max_reps + SAMPLING_INTERVAL, SAMPLING_INTERVAL) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes exactly |
||
) | ||
|
||
|
||
|
@@ -329,13 +353,7 @@ def __init__(self, data): | |
|
||
|
||
def _bootstrap_multiclosure_fits( | ||
internal_multiclosure_dataset_loader, | ||
rng, | ||
n_fit_max, | ||
n_fit, | ||
n_rep_max, | ||
n_rep, | ||
use_repeats, | ||
internal_multiclosure_dataset_loader, rng, n_fit_max, n_fit, n_rep_max, n_rep, use_repeats | ||
): | ||
"""Perform a single bootstrap resample of the multiclosure fits and return | ||
a proxy of the base internal object used by relevant estimator actions | ||
|
@@ -361,7 +379,12 @@ def _bootstrap_multiclosure_fits( | |
np.random.choice | ||
|
||
""" | ||
closure_th, *input_tuple = internal_multiclosure_dataset_loader | ||
closure_th = internal_multiclosure_dataset_loader.closures_th | ||
law_th = internal_multiclosure_dataset_loader.law_th | ||
covmat = internal_multiclosure_dataset_loader.covmat | ||
sqrtcov = internal_multiclosure_dataset_loader.sqrt_covmat | ||
input_tuple = law_th, covmat, sqrtcov | ||
|
||
fit_boot_index = rng.choice(n_fit_max, size=n_fit, replace=use_repeats) | ||
fit_boot_th = [closure_th[i] for i in fit_boot_index] | ||
boot_ths = [] | ||
|
@@ -774,9 +797,7 @@ def total_bootstrap_xi(experiments_bootstrap_xi): | |
|
||
|
||
def dataset_fits_bias_replicas_variance_samples( | ||
internal_multiclosure_dataset_loader, | ||
_internal_max_reps=None, | ||
_internal_min_reps=20, | ||
internal_multiclosure_dataset_loader, _internal_max_reps=None, _internal_min_reps=20 | ||
): | ||
"""For a single dataset, calculate the samples of chi2-quantities which | ||
are used to calculate the bias and variance for each fit. The output of this | ||
|
@@ -800,7 +821,10 @@ def dataset_fits_bias_replicas_variance_samples( | |
``_internal_max_reps``. | ||
|
||
""" | ||
closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader | ||
closures_th = internal_multiclosure_dataset_loader.closures_th | ||
law_th = internal_multiclosure_dataset_loader.law_th | ||
sqrtcov = internal_multiclosure_dataset_loader.sqrt_covmat | ||
|
||
# The dimentions here are (fit, data point, replica) | ||
reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th]) | ||
# take mean across replicas - since we might have changed no. of reps | ||
|
@@ -817,14 +841,10 @@ def dataset_fits_bias_replicas_variance_samples( | |
|
||
|
||
def dataset_inputs_fits_bias_replicas_variance_samples( | ||
internal_multiclosure_data_loader, | ||
_internal_max_reps=None, | ||
_internal_min_reps=20, | ||
internal_multiclosure_data_loader, _internal_max_reps=None, _internal_min_reps=20 | ||
): | ||
return dataset_fits_bias_replicas_variance_samples( | ||
internal_multiclosure_data_loader, | ||
_internal_max_reps=None, | ||
_internal_min_reps=20, | ||
internal_multiclosure_data_loader, _internal_max_reps=None, _internal_min_reps=20 | ||
) | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,65 +1,129 @@ | ||
""" | ||
test_closuretest.py | ||
|
||
contains some unit tests for closure test estimators | ||
contains some unit tests for closure test module | ||
|
||
""" | ||
|
||
import numpy as np | ||
from unittest.mock import Mock | ||
|
||
from validphys.api import API | ||
from validphys.tests.conftest import SINGLE_DATASET, THEORYID, PDF | ||
from validphys.closuretest import bias_dataset, variance_dataset | ||
from validphys.closuretest.multiclosure import ( | ||
internal_multiclosure_dataset_loader, | ||
internal_multiclosure_data_loader, | ||
) | ||
from validphys.results import ThPredictionsResult | ||
|
||
|
||
class TestResult: | ||
"""class for testing base level estimators which expect a results object""" | ||
|
||
def __init__(self, central_value, rawdata=None): | ||
self.central_value = central_value | ||
self.rawdata = rawdata | ||
self.error_members = rawdata | ||
self.ndata = len(central_value) | ||
self.sqrtcovmat = np.identity(self.ndata) | ||
|
||
def __len__(self,): | ||
def __len__(self): | ||
return self.ndata | ||
|
||
|
||
N_DATA = 5 | ||
N_REPLICAS = 10 | ||
|
||
#TODO: make these fixtures? | ||
# TODO: make these fixtures? | ||
# these are proxies for results tuples of data and theory | ||
ones_results = 2*[TestResult(np.ones(N_DATA), np.ones((N_DATA, N_REPLICAS)))] | ||
twos_results = 2*[TestResult(2*np.ones(N_DATA), 2*np.ones((N_DATA, N_REPLICAS)))] | ||
ones_results = 2 * [TestResult(np.ones(N_DATA), np.ones((N_DATA, N_REPLICAS)))] | ||
twos_results = 2 * [TestResult(2 * np.ones(N_DATA), 2 * np.ones((N_DATA, N_REPLICAS)))] | ||
|
||
replicas = np.arange(N_REPLICAS)[np.newaxis, :] * np.ones((N_DATA, 1)) | ||
replicas_result = 2 * [TestResult(replicas.mean(axis=1), replicas)] | ||
|
||
DATASET_INPUT = {"dataset_input": SINGLE_DATASET, "theoryid": THEORYID, "use_cuts": "internal"} | ||
|
||
DATASET_INPUTS = {"dataset_inputs": [SINGLE_DATASET], "theoryid": THEORYID, "use_cuts": "internal"} | ||
|
||
MOCK_FIT = Mock() | ||
MOCK_FIT.name = "Mock fit spec" | ||
MOCK_FIT.path = "mock/fit/path" | ||
MOCK_FIT.label = MOCK_FIT.name | ||
|
||
MOCK_FITS = [MOCK_FIT] | ||
|
||
|
||
def test_internal_multiclosure_dataset_loader(): | ||
""" | ||
Test the internal_multiclosure_dataset_loader | ||
""" | ||
dataset = API.dataset(**DATASET_INPUT) | ||
|
||
t0_covmat_from_systematics = API.t0_covmat_from_systematics( | ||
**{**DATASET_INPUT, "use_t0": True, "t0pdfset": PDF} | ||
) | ||
|
||
fits_pdf = [API.pdf(pdf=PDF)] | ||
multiclosure_underlyinglaw = API.pdf(pdf=PDF) | ||
fits = MOCK_FITS | ||
|
||
loader = internal_multiclosure_dataset_loader( | ||
dataset, fits_pdf, multiclosure_underlyinglaw, fits, t0_covmat_from_systematics | ||
) | ||
|
||
assert loader.closures_th is not None | ||
assert type(loader.closures_th) == list | ||
assert loader.law_th is not None | ||
assert type(loader.law_th) == ThPredictionsResult | ||
assert loader.covmat is not None | ||
assert type(loader.covmat) == np.ndarray | ||
assert loader.sqrt_covmat is not None | ||
assert type(loader.sqrt_covmat) == np.ndarray | ||
|
||
|
||
def test_internal_multiclosure_data_loader(): | ||
""" | ||
Test the internal_multiclosure_data_loader | ||
""" | ||
data = API.data(**DATASET_INPUTS) | ||
|
||
t0_covmat_from_systematics = API.t0_covmat_from_systematics( | ||
**{**DATASET_INPUT, "use_t0": True, "t0pdfset": PDF} | ||
) | ||
|
||
fits_pdf = [API.pdf(pdf=PDF)] | ||
multiclosure_underlyinglaw = API.pdf(pdf=PDF) | ||
fits = MOCK_FITS | ||
|
||
loader = internal_multiclosure_data_loader( | ||
data, fits_pdf, multiclosure_underlyinglaw, fits, t0_covmat_from_systematics | ||
) | ||
|
||
assert loader.closures_th is not None | ||
assert type(loader.closures_th) == list | ||
assert loader.law_th is not None | ||
assert type(loader.law_th) == ThPredictionsResult | ||
assert loader.covmat is not None | ||
assert type(loader.covmat) == np.ndarray | ||
assert loader.sqrt_covmat is not None | ||
assert type(loader.sqrt_covmat) == np.ndarray | ||
|
||
replicas = np.arange(N_REPLICAS)[np.newaxis, :]*np.ones((N_DATA, 1)) | ||
replicas_result = 2*[TestResult(replicas.mean(axis=1), replicas)] | ||
|
||
def test_bias_function(): | ||
bias_ones = bias_dataset( | ||
ones_results, | ||
[ones_results], # need list of length one to emulate collect | ||
None, | ||
None, | ||
ones_results, [ones_results], None, None # need list of length one to emulate collect | ||
) | ||
assert np.allclose(0, bias_ones.bias) | ||
bias_one_two = bias_dataset( | ||
ones_results, | ||
[twos_results], | ||
None, | ||
None, | ||
) | ||
bias_one_two = bias_dataset(ones_results, [twos_results], None, None) | ||
assert np.allclose(N_DATA, bias_one_two.bias) | ||
|
||
|
||
def test_variance_function(): | ||
vardata = variance_dataset( | ||
ones_results, | ||
None, | ||
None, | ||
) | ||
vardata = variance_dataset(ones_results, None, None) | ||
assert np.allclose(0, vardata.variance) | ||
var_reps = variance_dataset( | ||
replicas_result, | ||
None, | ||
None, | ||
) | ||
var_reps = variance_dataset(replicas_result, None, None) | ||
# calc explicitly what variance should be | ||
expected = np.sum(((np.arange(N_REPLICAS) - 4.5)**2)*N_DATA/N_REPLICAS) | ||
expected = np.sum(((np.arange(N_REPLICAS) - 4.5) ** 2) * N_DATA / N_REPLICAS) | ||
assert np.allclose(expected, var_reps.variance) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this on a single line now?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
black