Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Redefine eventdata shape from (N_events,V) to (V,N_events) for a minor performance increase #172

Merged
merged 4 commits into from
Sep 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions skyllh/core/interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def __init__(self, func, param_grid_set, **kwargs):
tdm : instance of TrialDataManager
The TrialDataManager instance holding the trial event data.
eventdata : instance of numpy ndarray
A two-dimensional (N_events,V)-shaped numpy ndarray holding
A two-dimensional (V,N_events)-shaped numpy ndarray holding
the event data, where N_events is the number of trial
events, and V the dimensionality of the event data.
gridparams_recarray : instance of numpy record ndarray
Expand Down Expand Up @@ -118,7 +118,7 @@ def __call__(self, tdm, eventdata, params_recarray):
tdm : instance of TrialDataManager
The TrialDataManager instance holding the trial data.
eventdata : numpy ndarray
The 2D (N_events,V)-shaped numpy ndarray holding the event data,
The 2D (V,N_events)-shaped numpy ndarray holding the event data,
where N_events is the number of trial events, and V the
dimensionality of the event data.
params_recarray : instance of numpy record ndarray
Expand Down Expand Up @@ -184,7 +184,7 @@ def __call__(self, tdm, eventdata, params_recarray):
tdm : instance of TrialDataManager
The TrialDataManager instance holding the trial data.
eventdata : instance of numpy.ndarray
The (N_events,V)-shaped numpy ndarray holding the event data,
The (V,N_events)-shaped numpy ndarray holding the event data,
where N_events is the number of events, and V the dimensionality of
the event data.
params_recarray : instance of numpy.ndarray
Expand Down Expand Up @@ -331,7 +331,7 @@ def __call__(self, tdm, eventdata, params_recarray):
tdm : instance of TrialDataManager
The TrialDataManager instance holding the trial data.
eventdata : instance of numpy ndarray
The (N_events,V)-shaped numpy ndarray holding the event data,
The (V,N_events)-shaped numpy ndarray holding the event data,
where N_events is the number of events, and V the dimensionality of
the event data.
params_recarray : numpy record ndarray
Expand Down Expand Up @@ -530,7 +530,7 @@ def __call__(self, tdm, eventdata, params_recarray):
tdm : instance of TrialDataManager
The TrialDataManager instance holding the trial data.
eventdata : instance of numpy ndarray
The (N_events,V)-shaped numpy ndarray holding the event data,
The (V,N_events)-shaped numpy ndarray holding the event data,
where N_events is the number of events, and V the dimensionality of
the event data.
params_recarray : numpy record ndarray
Expand Down
35 changes: 19 additions & 16 deletions skyllh/core/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1017,7 +1017,7 @@ def __init__(
TrialDataManager holding the event data for which to calculate the
PDF values, ``params_recarray`` is a numpy structured ndarray
holding the local parameter names and values, ``eventdata`` is
is a (N_values,V)-shaped numpy ndarray holding the event data
is a (V,N_values)-shaped numpy ndarray holding the event data
necessary for this PDF, and ``evt_mask`` is an optional
(N_values,)-shaped numpy ndarray holding the mask for the events,
i.e. rows in ``eventdata``, which should be considered. If ``None``,
Expand Down Expand Up @@ -1129,7 +1129,7 @@ def norm_factor_func(self):
where ``pdf`` is this PDF instance, ``tdm`` is an instance of
TrialDataManager holding the events for which to calculate the PDF
values, ``params_recarray`` is a numpy structured ndarray holding the
local parameter names and values, ``eventdata`` is a (N_values,V)-shaped
local parameter names and values, ``eventdata`` is a (V,N_values)-shaped
numpy ndarray holding the event data necessary for this PDF, and
``evt_mask`` is an optional (N_values,)-shaped numpy ndarray holding the
mask for the events, i.e. rows in ``eventdata``, which should be
Expand All @@ -1146,7 +1146,7 @@ def norm_factor_func(self, func):
# event.
def func(pdf, tdm, params_recarray, eventdata, evt_mask=None):
if evt_mask is None:
n_values = eventdata.shape[0]
n_values = eventdata.shape[1]
else:
n_values = np.count_nonzero(evt_mask)
return np.ones((n_values,), dtype=np.float64)
Expand Down Expand Up @@ -1308,7 +1308,7 @@ def get_pd_with_eventdata(
parameter names and values of the models.
By definition, this PDF does not depend on any parameters.
eventdata : instance of numpy ndarray
The (N_values,V)-shaped numpy ndarray holding the V data attributes
The (V,N_values)-shaped numpy ndarray holding the V data attributes
for each of the N_values events needed for the evaluation of the
PDF.
evt_mask : instance of numpy ndarray | None
Expand Down Expand Up @@ -1341,18 +1341,18 @@ def get_pd_with_eventdata(
if isinstance(self._pdf, RegularGridInterpolator):
with TaskTimer(tl, 'Get pd from RegularGridInterpolator.'):
if evt_mask is None:
pd = self._pdf(eventdata)
pd = self._pdf(eventdata.T)
else:
pd = self._pdf(eventdata[evt_mask])
pd = self._pdf(eventdata.T[evt_mask])
else:
with TaskTimer(tl, 'Get pd from photospline fit.'):
V = eventdata.shape[1]
V = eventdata.shape[0]
if evt_mask is None:
pd = self._pdf.evaluate_simple(
[eventdata[:, i] for i in range(0, V)])
[eventdata[i] for i in range(0, V)])
else:
pd = self._pdf.evaluate_simple(
[eventdata[:, i][evt_mask] for i in range(0, V)])
[eventdata[i][evt_mask] for i in range(0, V)])

with TaskTimer(tl, 'Normalize MultiDimGridPDF with norm factor.'):
norm = self._norm_factor_func(
Expand All @@ -1375,8 +1375,9 @@ def get_pd_with_eventdata(
@staticmethod
def create_eventdata_for_sigpdf(
tdm,
axes):
"""Creates the (N_values,V)-shaped eventdata ndarray necessary for
axes,
):
"""Creates the (V,N_values)-shaped eventdata ndarray necessary for
evaluating the signal PDF.

Parameters
Expand All @@ -1402,15 +1403,16 @@ def create_eventdata_for_sigpdf(
TypeError(
f'Unable to determine the type of the data field {name}!')

eventdata = np.array(eventdata_fields).T
eventdata = np.array(eventdata_fields)

return eventdata

@staticmethod
def create_eventdata_for_bkgpdf(
tdm,
axes):
"""Creates the (N_values,V)-shaped eventdata ndarray necessary for
axes,
):
"""Creates the (V,N_values)-shaped eventdata ndarray necessary for
evaluating the background PDF.

Parameters
Expand All @@ -1425,15 +1427,16 @@ def create_eventdata_for_bkgpdf(
for axis in axes:
eventdata_fields.append(tdm.get_data(axis.name))

eventdata = np.array(eventdata_fields).T
eventdata = np.array(eventdata_fields)

return eventdata

def get_pd(
self,
tdm,
params_recarray=None,
tl=None):
tl=None,
):
"""Calculates the probability density for the given trial events given
the specified local parameters.

Expand Down
6 changes: 3 additions & 3 deletions skyllh/core/signalpdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ def _get_eventdata(self, tdm, tl=None):
Returns
-------
eventdata : instance of numpy ndarray
The (N_values,V)-shaped eventdata ndarray.
The (V,N_values)-shaped eventdata ndarray.
"""
if (self._cache_tdm_trial_data_state_id is None) or\
(self._cache_tdm_trial_data_state_id != tdm.trial_data_state_id):
Expand Down Expand Up @@ -711,7 +711,7 @@ def _evaluate_pdfs(
tdm : instance of TrialDataManager
The instance of TrialDataManager holding the trial event data.
eventdata : instance of numpy ndarray
The (N_values,V)-shaped numpy ndarray holding the event data for
The (V,N_values)-shaped numpy ndarray holding the event data for
the PDF evaluation.
gridparams_recarray : instance of numpy structured ndarray
The numpy structured ndarray of length N_sources with the
Expand Down Expand Up @@ -976,7 +976,7 @@ def _get_eventdata(self, tdm, tl=None):
Returns
-------
eventdata : instance of numpy ndarray
The (N_values,V)-shaped eventdata ndarray.
The (V,N_values)-shaped eventdata ndarray.
"""
if (self._cache_tdm_trial_data_state_id is None) or\
(self._cache_tdm_trial_data_state_id != tdm.trial_data_state_id):
Expand Down
4 changes: 2 additions & 2 deletions skyllh/core/utils/multidimgridpdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ def kde_pdf_sig_spatial_norm_factor_func(
log10_psi_idx = pdf._axes.get_index_by_name(log10_psi_name)

if evt_mask is None:
psi = 10**eventdata[:, log10_psi_idx]
psi = 10**eventdata[log10_psi_idx]
else:
psi = 10**eventdata[:, log10_psi_idx][evt_mask]
psi = 10**eventdata[log10_psi_idx][evt_mask]

norm = 1. / (2 * np.pi * np.log(10) * psi * np.sin(psi))

Expand Down
10 changes: 5 additions & 5 deletions skyllh/i3/pdfratio.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def _evaluate_splines(
The TrialDataManager instance holding the trial data and the event
mapping to the sources via the ``src_evt_idx`` property.
eventdata : instance of numpy ndarray
The (N_events,V)-shaped numpy ndarray holding the event data, where
The (V,N_events)-shaped numpy ndarray holding the event data, where
N_events is the number of events, and V the dimensionality of the
event data.
gridparams_recarray : instance of numpy structured ndarray
Expand All @@ -328,7 +328,7 @@ def _evaluate_splines(
# We got a single parameter set. We will use it for all sources.
spline = self._get_spline_for_param_values(gridparams_recarray[0])

eventdata = np.take(eventdata, evt_idxs, axis=0)
eventdata = np.take(eventdata, evt_idxs, axis=1)
values = spline(eventdata)

return values
Expand All @@ -341,9 +341,9 @@ def _evaluate_splines(

# Select the eventdata that belongs to the current source.
m = src_idxs == sidx
src_eventdata = np.take(eventdata, evt_idxs[m], axis=0)
src_eventdata = np.take(eventdata, evt_idxs[m], axis=1)

n = src_eventdata.shape[0]
n = src_eventdata.shape[1]
sl = slice(v_start, v_start+n)
values[sl] = spline(src_eventdata)

Expand Down Expand Up @@ -402,7 +402,7 @@ def _calculate_ratio_and_grads(
"""
# Create a 2D event data array holding only the needed event data fields
# for the PDF ratio spline evaluation.
eventdata = np.vstack([tdm[fn] for fn in self._data_field_names]).T
eventdata = np.vstack([tdm[fn] for fn in self._data_field_names])

(ratio, grads) = self._interpolmethod(
tdm=tdm,
Expand Down
10 changes: 5 additions & 5 deletions tests/core/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def line(m, p, b):

p = gridparams_recarray['p']

n_selected_events = eventdata.shape[0]
n_selected_events = eventdata.shape[1]

values = np.repeat(line(m=2, p=p, b=1), n_selected_events)

Expand Down Expand Up @@ -69,7 +69,7 @@ def product(p1, p2):
p1 = gridparams_recarray['p1']
p2 = gridparams_recarray['p2']

n_selected_events = eventdata.shape[0]
n_selected_events = eventdata.shape[1]

values = np.repeat(product(p1, p2), n_selected_events)

Expand Down Expand Up @@ -144,7 +144,7 @@ def setUp(self):
self.tdm = create_tdm(n_sources=3, n_selected_events=2)

self.eventdata = np.zeros(
(self.tdm.n_selected_events, 1), dtype=np.float64)
(1, self.tdm.n_selected_events), dtype=np.float64)

def test__call__with_different_source_values(self):
"""Test for when the interpolation parameters have different values for
Expand Down Expand Up @@ -249,7 +249,7 @@ def setUp(self):
self.tdm = create_tdm(n_sources=3, n_selected_events=2)

self.eventdata = np.zeros(
(self.tdm.n_selected_events, 1), dtype=np.float64)
(1, self.tdm.n_selected_events), dtype=np.float64)

def test__call__with_different_source_values(self):
"""Test for when the interpolation parameter has different values for
Expand Down Expand Up @@ -341,7 +341,7 @@ def setUp(self):
self.tdm = create_tdm(n_sources=3, n_selected_events=2)

self.eventdata = np.zeros(
(self.tdm.n_selected_events, 1), dtype=np.float64)
(1, self.tdm.n_selected_events), dtype=np.float64)

def test__call__with_different_source_values(self):
"""Test for when the interpolation parameter has different values for
Expand Down