diff --git a/.gitignore b/.gitignore index b6e47617de..16664b18e4 100644 --- a/.gitignore +++ b/.gitignore @@ -69,7 +69,8 @@ instance/ .scrapy # Sphinx documentation -docs/_build/ +doc/sphinx/_build +doc/sphinx/reference # PyBuilder target/ @@ -127,3 +128,10 @@ dmypy.json # Pyre type checker .pyre/ + +# VisualCode IDE +.vscode + +# Scalene profiling files +profile.json +profile.html diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 6f4c894061..a6a4959421 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,3 +1,47 @@ +This file contains a log-book for major changes between releases. + +v23.2.0 +======= +- Complete overhaul of SkyLLH for more generic handling of parameters + (fixed & fit parameters) and their mapping to sources. + + - Single class core.parameters.Parameter for fixed and fit parameters. + - New class core.parameters.ParameterModelMapper to map global parameters + to local model (e.g. source) parameters. + - Class method core.pdf.PDF.get_pd takes structured numpy.ndarray as + argument, holding local parameter names and values for each source. + +- Intrinsic support for multiple sources. + + - New class property core.trialdata.TrialDataManager.src_evt_idxs with + source and event indices mapping selected events to the sources. + +- Simplification of the class hierarchy & source code structure. +- New generic flux model function concept. +- New generic PDF class for time PDFs. +- Support for Parquet data files via optional pyarrow Python package. +- Introduce factory concept for detector signal yield construction. + + DetSigYieldBuilder classes can provide a factory function to construct + DetSigYield instances efficiently for multiple flux models. + +- Speed improvements for analysis creation and trial generation. + + - i3.detsigyield.SingleParamFluxPointLikeSourceI3DetSigYieldBuilder: + + MC events are sorted in sin(true_dec) before histograms are made for + each parameter value. Thus, numpy.histogram works faster. + + - core.event_selection.SpatialBoxEventSelectionMethod: + + Improved calculation of right-ascention difference. Thus, speed-up in + trial generation when using this event selection method. + +- Usage of the tqdm Python package for progress bars. +- More unit tests. +- Improved documentation. +- Following PEP8 coding style standard. + v23.1.1 ======= - bugfix of core.pdf.MultiDimGridPDFSet @@ -5,4 +49,4 @@ v23.1.1 - removal of depricated proptery core.pdf.PDFSet.pdf_axes v23.1.0 -======= \ No newline at end of file +======= diff --git a/README.md b/README.md index f7c2257861..76a794c67d 100644 --- a/README.md +++ b/README.md @@ -19,12 +19,12 @@ pip install skyllh The current development version can be installed using pip: ```bash -pip install git+https://github.com/icecube/skyllh.git#egg=skyllh +pip install git+https://github.com/icecube/skyllh.git#egg=skyllh ``` Optionally, the editable package version with a specified reference can be installed by: ```bash -pip install -e git+https://github.com/icecube/skyllh.git@[ref]#egg=skyllh +pip install -e git+https://github.com/icecube/skyllh.git@[ref]#egg=skyllh ``` where - `-e` is an editable flag @@ -41,6 +41,15 @@ sys.path.insert(0, '/path/to/skyllh') sys.path.insert(0, '/path/to/i3skyllh') # optional ``` +# Publications + +Several publications about the SkyLLH software are available: + +- IceCube Collaboration, T. Kontrimas, M. Wolf, et al. PoS ICRC2021 (2022) 1073 + [DOI](http://doi.org/10.22323/1.395.1073) +- IceCube Collaboration, M. Wolf, et al. PoS ICRC2019 (2020) 1035 + [DOI](https://doi.org/10.22323/1.358.1035) + # i3skyllh The [`i3skyllh`](https://github.com/icecube/i3skyllh) package provides complementary pre-defined common analyses and datasets for the [IceCube Neutrino Observatory](https://icecube.wisc.edu) detector in a private [repository](https://github.com/icecube/i3skyllh). \ No newline at end of file diff --git a/doc/sphinx/Makefile b/doc/sphinx/Makefile index 3bb8a8ae1f..4e3eeb6af7 100644 --- a/doc/sphinx/Makefile +++ b/doc/sphinx/Makefile @@ -9,13 +9,13 @@ BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) .PHONY: help Makefile html clean # Build html documentation. html: - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + @$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) # Build html documentation for each branch separately. html-multiversion: @@ -23,10 +23,10 @@ html-multiversion: # Prepend clean target to remove skyllh API reference. clean: - rm -rf reference - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + rm -rf $(SOURCEDIR)/reference + @$(SPHINXBUILD) -M clean "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) # Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +# "make mode" option. %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) diff --git a/doc/sphinx/concepts/detsigyield.ipynb b/doc/sphinx/concepts/detsigyield.ipynb new file mode 100644 index 0000000000..ec962dc445 --- /dev/null +++ b/doc/sphinx/concepts/detsigyield.ipynb @@ -0,0 +1,94 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "12151349", + "metadata": {}, + "source": [ + "# Detector Signal Yield" + ] + }, + { + "cell_type": "markdown", + "id": "69d138fc", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "Several parts of an analysis will require the calculation of the detector signal yield, which is the mean number of expected signal events in the detector for a given source hypothesis, i.e. source flux function $\\Phi(\\alpha,\\delta,E,t)$." + ] + }, + { + "cell_type": "raw", + "id": "a1560531", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "SkyLLH provides two abstract base classes for creating a detector signal yield instance, :py:class:`~skyllh.core.detsigyield.DetSigYieldBuilder` and :py:class:`~skyllh.core.detsigyield.DetSigYield`. The first is the builder class, which will build a :py:class:`~skyllh.core.detsigyield.DetSigYield` class instance." + ] + }, + { + "cell_type": "raw", + "id": "3b4bd127", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "The builder class has the abstract method :py:meth:`~skyllh.core.detsigyield.DetSigYieldBuilder.construct_detsigyield`, which will take a :py:class:`~skyllh.core.dataset.Dataset`, :py:class:`~skyllh.core.dataset.DatasetData`, :py:class:`~skyllh.physics.flux_model.FluxModel` instance, and live-time to construct a :py:class:`~skyllh.core.detsigyield.DetSigYield` class instance, which will provide an evaluation method to calculate the detector signal yield for a given source, for the given dataset. Hence, the detector signal yield is dataset and source model dependent." + ] + }, + { + "cell_type": "raw", + "id": "653d0c06", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "The :py:class:`~skyllh.core.detsigyield.DetSigYield` class has two abstract methods, :py:meth:`~skyllh.core.detsigyield.DetSigYield.source_to_array` and :py:meth:`~skyllh.core.detsigyield.DetSigYield.__call__`." + ] + }, + { + "cell_type": "raw", + "id": "60c16a6d", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "The :py:meth:`~skyllh.core.detsigyield.DetSigYield.source_to_array` method takes a sequence of source models and converts it into a numpy record array suitable for the :py:meth:`~skyllh.core.detsigyield.DetSigYield.__call__` method to evaluate the detector signal yield efficiently for a list of sources. The :py:meth:`~skyllh.core.detsigyield.DetSigYield.__call__` method evaluates the :py:class:`~skyllh.core.detsigyield.DetSigYield` instance. As arguments it takes the source record array created by the :py:meth:`~skyllh.core.detsigyield.DetSigYield.source_to_array` method, and the numpy record array holding the (local) source parameter values." + ] + }, + { + "cell_type": "raw", + "id": "d75df35f", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "The record array holding the local source parameter values can be generated through the :py:class:`~skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` of the :py:class:`~skyllh.core.parameters.ParameterModelMapper` instance of the analysis. See also the :ref:`Parameter to Model mapping ` section." + ] + } + ], + "metadata": { + "celltoolbar": "Raw Cell Format", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/sphinx/concepts/flux_definition.ipynb b/doc/sphinx/concepts/flux_definition.ipynb new file mode 100644 index 0000000000..9f060e10ec --- /dev/null +++ b/doc/sphinx/concepts/flux_definition.ipynb @@ -0,0 +1,380 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "7f4a6a5a", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "b02f8348", + "metadata": {}, + "outputs": [], + "source": [ + "from astropy import units\n", + "import numpy as np" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "37fbb2ca", + "metadata": {}, + "source": [ + "# Flux Definition" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "fad6f642", + "metadata": {}, + "source": [ + "SkyLLH provides a sophisticated class collection to describe a differential particle flux function." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4ee1511d", + "metadata": {}, + "source": [ + "## General Flux Function" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "e4765264", + "metadata": {}, + "source": [ + "The most general differential flux function is:\n", + "\n", + "$\\frac{\\mathrm{d}^4\\Phi(\\alpha,\\delta,E,t | \\vec{p}_\\mathrm{s})}{\\mathrm{d}A \\mathrm{d}\\Omega \\mathrm{d}E \\mathrm{d}t}$,\n", + "\n", + "which is a function of celestial coordinates right-ascention, $\\alpha$, and declination, $\\delta$, energy $E$, and time $t$, given source parameters $\\vec{p}_\\mathrm{s}$, e.g. source location and spectral index $\\gamma$ for a power-law energy profile." + ] + }, + { + "cell_type": "raw", + "id": "1e06fcf2", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "The abstract base class for all flux function models is :py:class:`~skyllh.core.flux_model.FluxModel`, which is derived from :py:class:`~skyllh.core.math.MathFunction` and :py:class:`~skyllh.core.model.Model`." + ] + }, + { + "cell_type": "raw", + "id": "007a68ec", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "The :py:class:`~skyllh.core.flux_model.FluxModel` has the abstract :py:meth:`~skyllh.core.flux_model.FluxModel.__call__` method defined, which will evaluate the flux function for values of :math:`\\alpha`, :math:`\\delta`, :math:`E`, and :math:`t` given in specific units." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "e94984fc", + "metadata": {}, + "source": [ + "## Units for flux models" + ] + }, + { + "cell_type": "raw", + "id": "f5a5c595", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "The :py:class:`~skyllh.core.flux_model.FluxModel` class defines the units used for length, angle, energy, and time. The default units are configured through the :py:mod:`skyllh.core.config` module, in particular through the ``CFG['units']['defaults']['fluxes']`` dictionary of the :py:mod:`~skyllh.core.config` module. Units must be derived classes from the :py:class:`astropy.units.UnitBase` class." + ] + }, + { + "cell_type": "raw", + "id": "aee5873d", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "The :py:class:`~skyllh.core.flux_model.FluxModel` class has the properties :py:attr:`~skyllh.core.flux_model.FluxModel.unit_str` and :py:attr:`~skyllh.core.flux_model.FluxModel.unit_latex_str` for a representation of the units as a ``str`` object in plain text and latex code." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8f4e7358", + "metadata": {}, + "source": [ + "## Factorized Flux Function" + ] + }, + { + "cell_type": "raw", + "id": "98ebd803", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "Usually the flux function can be split into a spatial, energy, and time profile with an overall normalization constant in differential flux unit, i.e. :math:`\\mathrm{area}^{-1} \\mathrm{solid-angle}^{-1} \\mathrm{energy}^{-1} \\mathrm{time}^{-1}`. Hence, SkyLLH provides the class :py:class:`~skyllh.core.flux_model.FactorizedFluxModel`, which describes a differential flux function as the product of individual flux profiles:\n", + "\n", + ".. math::\n", + "\n", + " \\Phi(\\alpha,\\delta,E,t | \\vec{p}_\\mathrm{s}) =\n", + " \\Phi_0\n", + " \\Psi(\\alpha,\\delta|\\vec{p}_\\mathrm{s})\n", + " \\epsilon(E|\\vec{p}_\\mathrm{s})\n", + " T(t|\\vec{p}_\\mathrm{s})\n", + "\n", + "The abstract base class for any flux profile is :py:class:`~skyllh.core.flux_model.FluxProfile`, which is derived from :py:class:`~skyllh.core.math.MathFunction`.\n", + "\n", + "The abstract base class for a spatial, energy, and time flux profile is :py:class:`~skyllh.core.flux_model.SpatialFluxProfile`, :py:class:`~skyllh.core.flux_model.EnergyFluxProfile`, and :py:class:`~skyllh.core.flux_model.TimeFluxProfile`, respectively, and are derived from :py:class:`~skyllh.core.flux_model.FluxProfile`." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "3242d1d9", + "metadata": {}, + "source": [ + "### Steady Point-Like Flux" + ] + }, + { + "cell_type": "raw", + "id": "9f489769", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "A very common source hypothesis is a steadily emitting point-like source. Hence, SkyLLH provides the class :py:class:`~skyllh.core.flux_model.SteadyPointlikeFFM`. It takes a flux normalization :math:`\\Phi_0`, and an energy profile as constructor arguments. As spatial profile it uses the :py:class:`~skyllh.core.flux_model.PointSpatialFluxProfile` class." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a2ab348c", + "metadata": {}, + "source": [ + "As an example we create a steady point-like factorized flux model with a power-law energy flux profile.\n", + "\n", + "First we see what the default units are:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "3a336a3d", + "metadata": {}, + "outputs": [], + "source": [ + "from skyllh.core.flux_model import FluxModel" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9b799b48", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'angle': Unit(\"rad\"), 'energy': Unit(\"GeV\"), 'length': Unit(\"cm\"), 'time': Unit(\"s\")}\n" + ] + } + ], + "source": [ + "print(FluxModel.get_default_units())" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4cc57768", + "metadata": {}, + "source": [ + "Now we need to create the energy flux profile. As reference energy and spectral index we choose $E_0=10^3~$GeV and $\\gamma=2$, respectively:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "74d36971", + "metadata": {}, + "outputs": [], + "source": [ + "from skyllh.core.flux_model import PowerLawEnergyFluxProfile" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "2c104c38", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(E / (1000 GeV))^-2\n" + ] + } + ], + "source": [ + "energy_profile = PowerLawEnergyFluxProfile(E0=1e3, gamma=2, energy_unit=units.GeV)\n", + "print(energy_profile)" + ] + }, + { + "cell_type": "raw", + "id": "ac4b6bb9", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "The next step is to create the :py:class:`~skyllh.core.flux_model.SteadyPointlikeFFM` class instance. As normalization constant we choose :math:`\\Phi_0 = 10^{-8} \\text{GeV}^{-1}\\text{cm}^{-2}\\text{sr}^{-1}\\text{s}^{-1}`:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "13335852", + "metadata": {}, + "outputs": [], + "source": [ + "from skyllh.core.flux_model import SteadyPointlikeFFM" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "e06b2222", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.000e-08 * (E / (1000 GeV))^-2 * 1 (GeV cm^2 s)^-1\n" + ] + } + ], + "source": [ + "fluxmodel = SteadyPointlikeFFM(\n", + " Phi0=1e-8,\n", + " energy_profile=energy_profile,\n", + " angle_unit=units.radian,\n", + " time_unit=units.s,\n", + " length_unit=units.cm\n", + ")\n", + "print(fluxmodel)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9a282441", + "metadata": {}, + "source": [ + "## Evaluating the flux model function\n" + ] + }, + { + "cell_type": "raw", + "id": "bdbb055a", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "The flux model function can be evaluated by calling its :py:meth:`~skyllh.core.flux_model.FluxModel.__call__` operator:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "a22a30bf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "flux.shape = (1, 1, 1)\n", + "flux = [[[1.11111111e-09]]]\n" + ] + } + ], + "source": [ + "flux = fluxmodel(E=3e3)\n", + "print(f'flux.shape = {flux.shape}')\n", + "print(f'flux = {flux}')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9371a4cf", + "metadata": {}, + "source": [ + "It returns a 3-dimensional numpy array, where the first, second, and third dimension represents the spatial, energy, and time axes, respectively. Hence, we can evaluate the flux model for different spatial coordinates, energies, and times by a single call:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6b6596b8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "flux.shape = (1, 2, 3)\n", + "flux = [[[1.11111111e-09 1.11111111e-09 1.11111111e-09]\n", + " [2.50000000e-07 2.50000000e-07 2.50000000e-07]]]\n" + ] + } + ], + "source": [ + "flux = fluxmodel(E=[3e3, 2e2], t=[1, 2, 3])\n", + "print(f'flux.shape = {flux.shape}')\n", + "print(f'flux = {flux}')" + ] + } + ], + "metadata": { + "celltoolbar": "Raw Cell Format", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/sphinx/concepts/index.rst b/doc/sphinx/concepts/index.rst new file mode 100644 index 0000000000..146bac8b76 --- /dev/null +++ b/doc/sphinx/concepts/index.rst @@ -0,0 +1,16 @@ +.. concepts + +Concepts +======== + +This section covers a few concepts SkyLLH is persuing. + +.. toctree:: + :maxdepth: 1 + :caption: SkyLLH concepts + + source_definition + flux_definition + parameters + pdf + detsigyield diff --git a/doc/sphinx/concepts/parameters.ipynb b/doc/sphinx/concepts/parameters.ipynb new file mode 100644 index 0000000000..9695b450f0 --- /dev/null +++ b/doc/sphinx/concepts/parameters.ipynb @@ -0,0 +1,725 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "0c7fbc7d", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ee28d8c2", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "id": "452ac3ce", + "metadata": {}, + "source": [ + "# Parameter Definition" + ] + }, + { + "cell_type": "raw", + "id": "23cf3a4b", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "Parameters are fundamental parts of a likelihood function. Two types of parameters exist: *fixed parameters* and *floating parameters*.\n", + "\n", + "SkyLLH provides the class :py:class:`~skyllh.core.parameters.Parameter` for defining a parameter. The ``Parameter`` class can represent a fixed parameter or a floating parameter." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "84bb97d3", + "metadata": {}, + "outputs": [], + "source": [ + "from skyllh.core.parameters import Parameter" + ] + }, + { + "cell_type": "markdown", + "id": "4aa38262", + "metadata": {}, + "source": [ + "Creating a fixed parameter\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "05ea8128", + "metadata": {}, + "source": [ + "A fixed parameter has a name and an initial value. Its `isfixed` property is set to `True`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "1ca1c10d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Parameter: a = 2.100 [fixed]\n", + "True\n" + ] + } + ], + "source": [ + "param_a = Parameter(name='a', initial=2.1)\n", + "print(param_a)\n", + "print(param_a.isfixed)" + ] + }, + { + "cell_type": "markdown", + "id": "8ee3caed", + "metadata": {}, + "source": [ + "Changing the value of a fixed parameter\n", + "---" + ] + }, + { + "cell_type": "raw", + "id": "3a596000", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "Sometimes it is neccessary to change the value of a fixed parameter. This can be done using the :py:meth:`~skyllh.core.parameters.Parameter.change_fixed_value` method:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c44eb063", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Parameter: a = 3.300 [fixed]\n" + ] + } + ], + "source": [ + "param_a.change_fixed_value(3.3)\n", + "print(param_a)" + ] + }, + { + "cell_type": "markdown", + "id": "9c014787", + "metadata": {}, + "source": [ + "Creating a floating parameter\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "1a1d2233", + "metadata": {}, + "source": [ + "A floating parameter has a name, an initial value, and a value range from `valmin` to `valmax`. Its `isfixed` property is set to `False`. " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "fd0c5677", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Parameter: b = 6.300 [floating] {\n", + " initial: 6.300\n", + " range: (4.000, 7.300)\n", + "}\n", + "False\n" + ] + } + ], + "source": [ + "param_b = Parameter(name='b', initial=6.3, valmin=4.0, valmax=7.3)\n", + "print(param_b)\n", + "print(param_b.isfixed)" + ] + }, + { + "cell_type": "markdown", + "id": "372136e3", + "metadata": {}, + "source": [ + "Converting fixed and floating parameters\n", + "---" + ] + }, + { + "cell_type": "raw", + "id": "6fa4021b", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "Fixed parameters can be made floating parameters and vise-versa. For making a fixed parameter floating one can use the :py:meth:`~skyllh.core.parameters.Parameter.make_floating` method:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "8971a4cc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Parameter: a = 3.300 [floating] {\n", + " initial: 3.300\n", + " range: (0.000, 4.000)\n", + "}\n" + ] + } + ], + "source": [ + "# Converting a fixed parameter into a floating parameter.\n", + "param_a.make_floating(initial=3.3, valmin=0, valmax=4)\n", + "print(param_a)" + ] + }, + { + "cell_type": "raw", + "id": "eaf7659c", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "For converting a floating parameter into a fixed parameter one can use the :py:meth:`~skyllh.core.parameters.Parameter.make_fixed` method:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "4f8937e2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Parameter: b = 42.300 [fixed]\n" + ] + } + ], + "source": [ + "# Converting a floating parameter into a fixed parameter.\n", + "param_b.make_fixed(initial=42.3)\n", + "print(param_b)" + ] + }, + { + "cell_type": "markdown", + "id": "14c1f859", + "metadata": {}, + "source": [ + "Comparing parameters\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "1a0bbbd4", + "metadata": {}, + "source": [ + "Two parameters can be compared to each other. They are equal if they represent the same parameter, i.e. the same name, initial value, and value range if they are floating parameters." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c56a9337", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n", + "False\n", + "False\n" + ] + } + ], + "source": [ + "param_c = Parameter(name='b', initial=42.3)\n", + "print(param_c == param_b)\n", + "print(param_c is param_b)\n", + "print(param_c == param_a)" + ] + }, + { + "cell_type": "markdown", + "id": "7baeace5", + "metadata": {}, + "source": [ + "Creating a linear grid from a floating parameter\n", + "---" + ] + }, + { + "cell_type": "raw", + "id": "5bc65287", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "Sometimes it useful to create an equal-distanced grid of values from a floating parameter. The :py:class:`~skyllh.core.parameters.Parameter` class has the method :py:meth:`~skyllh.core.parameters.Parameter.as_linear_grid` to create a :py:class:`~skyllh.core.parameters.ParameterGrid` instance representing a grid of values with equal distances. " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "fcd28e13", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a = [0. 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1. 1.1 1.2 1.3 1.4 1.5 1.6 1.7\n", + " 1.8 1.9 2. 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3. 3.1 3.2 3.3 3.4 3.5\n", + " 3.6 3.7 3.8 3.9 4. ], decimals = 1\n" + ] + } + ], + "source": [ + "param_grid_a = param_a.as_linear_grid(delta=0.1)\n", + "print(param_grid_a)" + ] + }, + { + "cell_type": "markdown", + "id": "f95e23ba", + "metadata": {}, + "source": [ + "Parameter Sets\n", + "---" + ] + }, + { + "cell_type": "raw", + "id": "6bb9e044", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "For an analysis a set of parameters are usually required. The set of parameters for the likelihood function might depend on the particlar source in case of a multi-source analysis.\n", + "\n", + "Each analysis needs to define a global set of parameters. SkyLLH provides the :py:class:`~skyllh.core.parameters.ParameterSet` class to define a set of parameters. How these parameters are then mapped to the individual sources is defined via the *source parameter mapper*." + ] + }, + { + "cell_type": "markdown", + "id": "9fc97436", + "metadata": {}, + "source": [ + "As example we are creating a set of two Parameters, $n_{\\mathrm{s}}$, and $\\gamma$." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "dca5bff6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Parameter: ns = 100.000 [floating] {\n", + " initial: 100.000\n", + " range: (0.000, 1000.000)\n", + "}\n", + "Parameter: gamma = 2.000 [floating] {\n", + " initial: 2.000\n", + " range: (1.000, 4.000)\n", + "}\n" + ] + } + ], + "source": [ + "param_ns = Parameter('ns', 100, valmin=0, valmax=1000)\n", + "param_gamma = Parameter('gamma', 2, valmin=1, valmax=4.)\n", + "print(param_ns)\n", + "print(param_gamma)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "74243d30", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ParameterSet: 2 parameters (2 floating, 0 fixed) {\n", + " Parameter: ns = 100.000 [floating] {\n", + " initial: 100.000\n", + " range: (0.000, 1000.000)\n", + " }\n", + " Parameter: gamma = 2.000 [floating] {\n", + " initial: 2.000\n", + " range: (1.000, 4.000)\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "from skyllh.core.parameters import ParameterSet\n", + "paramset = ParameterSet((param_ns, param_gamma))\n", + "print(paramset)" + ] + }, + { + "cell_type": "raw", + "id": "d03240f5", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + ".. _sec:ParameterToModelMapping:" + ] + }, + { + "cell_type": "markdown", + "id": "e4d11628", + "metadata": {}, + "source": [ + "Parameter to Model mapping\n", + "--" + ] + }, + { + "cell_type": "raw", + "id": "306be6d3", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "After a global set of parameters is defined, those parameters need to get mapped to individual models, e.g. sources, of the analysis. SkyLLH provides the :py:class:`~skyllh.core.parameters.ParameterModelMapper` class to provide this functionality." + ] + }, + { + "cell_type": "markdown", + "id": "7357a9d8", + "metadata": {}, + "source": [ + "As example we consider the following mapping of parameters to models and sources:" + ] + }, + { + "cell_type": "raw", + "id": "23a8c0c1", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "+--------------+-----+--------+--------+\n", + "| Parameter > | ns | gamma1 | gamma2 |\n", + "+--------------+-----+--------+--------+\n", + "| Model/Source | |\n", + "+==============+=====+========+========+\n", + "| detector | ns | --- | --- |\n", + "+--------------+-----+--------+--------+\n", + "| source1 | --- | gamma | --- |\n", + "+--------------+-----+--------+--------+\n", + "| source2 | --- | --- | gamma |\n", + "+--------------+-----+--------+--------+" + ] + }, + { + "cell_type": "raw", + "id": "ed80f417", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "SkyLLH provides two main base models: :py:class:`~skyllh.core.model.DetectorModel` and :py:class:`~skyllh.core.model.SourceModel`. " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "291cd40a", + "metadata": {}, + "outputs": [], + "source": [ + "from skyllh.core.model import DetectorModel, SourceModel\n", + "from skyllh.core.parameters import ParameterModelMapper" + ] + }, + { + "cell_type": "markdown", + "id": "11dfec3c", + "metadata": {}, + "source": [ + "We define the models and sources to which we want to map parameters:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "efce19d4", + "metadata": {}, + "outputs": [], + "source": [ + "detector = DetectorModel('IceCube')\n", + "source1 = SourceModel('source1')\n", + "source2 = SourceModel('source2')" + ] + }, + { + "cell_type": "markdown", + "id": "a61d6084", + "metadata": {}, + "source": [ + "Now we define the parameters, which we want to map to the models and sources:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "c40c67f5", + "metadata": {}, + "outputs": [], + "source": [ + "param_ns = Parameter('ns', 10, valmin=0, valmax=1000)\n", + "param_gamma1 = Parameter('gamma1', 2.5, valmin=1, valmax=4)\n", + "param_gamma2 = Parameter('gamma2', 3.3, valmin=1, valmax=4)" + ] + }, + { + "cell_type": "markdown", + "id": "c0d59c37", + "metadata": {}, + "source": [ + "After creating the models and parameters we can create the `ParameterModelMapper` for the set of models." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "adfa6943", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ParameterModelMapper: 0 global parameters, 3 models (2 sources)\n" + ] + } + ], + "source": [ + "pmm = ParameterModelMapper(models=(detector,source1,source2))\n", + "print(pmm)" + ] + }, + { + "cell_type": "markdown", + "id": "f351e77f", + "metadata": {}, + "source": [ + "Finally we can define the parameter mapping to the models for each parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "1d1cad2d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ParameterModelMapper: 3 global parameters, 3 models (2 sources)\n", + " Parameters: \n", + " ns [floating (0.000 <= 10.000 <= 1000.000)]\n", + " in models:\n", + " - IceCube: ns\n", + " \n", + " gamma1 [floating (1.000 <= 2.500 <= 4.000)]\n", + " in models:\n", + " - source1: gamma\n", + " \n", + " gamma2 [floating (1.000 <= 3.300 <= 4.000)]\n", + " in models:\n", + " - source2: gamma\n", + " \n" + ] + } + ], + "source": [ + "(pmm.def_param(param_ns, models=(detector,), model_param_names='ns')\n", + " .def_param(param_gamma1, models=(source1,), model_param_names='gamma')\n", + " .def_param(param_gamma2, models=(source2,), model_param_names='gamma')\n", + ")\n", + "print(pmm)" + ] + }, + { + "cell_type": "raw", + "id": "c49c165f", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "The :py:meth:`~skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` method of the :py:class:`~skyllh.core.parameters.ParameterModelMapper` class can create a numpy record array with the local source parameters of all or selected sources: " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "8623b261", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([(1, 1.1, 2), (2, 2.2, 3)],\n", + " dtype=[('model_idx', '" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = plt.figure(figsize=(6, 4))\n", + "ax = fig.add_subplot()\n", + "ax.plot(t, pd, drawstyle='steps')\n", + "ax.set_xlabel('time')\n", + "ax.set_ylabel(r'probability density / time$^{-1}$')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/sphinx/hypothesis/index.rst b/doc/sphinx/hypothesis/index.rst deleted file mode 100644 index 795bb07811..0000000000 --- a/doc/sphinx/hypothesis/index.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. hypothesis - -Hypothesis Definition -===================== - -Before any analysis can be performed, the null hypothesis and the alternative -hypothesis need to be defined. diff --git a/doc/sphinx/hypothesis/parameters.rst b/doc/sphinx/hypothesis/parameters.rst deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/doc/sphinx/index.rst b/doc/sphinx/index.rst index f3bcea87ca..3d3ddf0076 100644 --- a/doc/sphinx/index.rst +++ b/doc/sphinx/index.rst @@ -18,8 +18,9 @@ mathematical likelihood function. :caption: User Documentation installation + concepts/index tutorials/index - hypothesis/index + examples/index unit_tests reference/skyllh notes diff --git a/doc/sphinx/installation.rst b/doc/sphinx/installation.rst index f1bc293c20..db7e82e915 100644 --- a/doc/sphinx/installation.rst +++ b/doc/sphinx/installation.rst @@ -40,19 +40,20 @@ Using pip The latest `skyllh` release can be installed from `PyPI `_ repository: .. code:: bash + pip install skyllh The current development version can be installed using pip: .. code:: bash - pip install git+https://github.com/icecube/skyllh.git#egg=skyllh + pip install git+https://github.com/icecube/skyllh.git#egg=skyllh Optionally, the editable package version with a specified reference can be installed by: .. code:: bash - pip install -e git+https://github.com/icecube/skyllh.git@[ref]#egg=skyllh + pip install -e git+https://github.com/icecube/skyllh.git@[ref]#egg=skyllh where diff --git a/doc/sphinx/tutorials/getting_started.ipynb b/doc/sphinx/tutorials/getting_started.ipynb index 85ee4172b3..70bedb4286 100644 --- a/doc/sphinx/tutorials/getting_started.ipynb +++ b/doc/sphinx/tutorials/getting_started.ipynb @@ -13,27 +13,43 @@ "\n", "Slack channel: [#skyllh](https://icecube-spno.slack.com/channels/skyllh)\n", "\n", - "The user can find pre-defined IceCube log-likelihood analyses in [i3skyllh](https://github.com/icecube/i3skyllh) project.\n", + "The IceCube user can find pre-defined IceCube log-likelihood analyses in the [i3skyllh](https://github.com/icecube/i3skyllh) project.\n", "\n", "\n", "## SkyLLH's analysis workflow\n", "\n", - "To set-up and run an analysis the following procedure applies:\n", - "\n", + "To set-up and run an analysis the following procedure applies:" + ] + }, + { + "cell_type": "raw", + "id": "ce2502d0", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ "1. Create an analysis instance (preferably based on pre-defined `create_analysis` functions).\n", "It takes care of the following parts:\n", "\n", - " 1. Add the datasets and their PDF ratio instances via the [Analysis.add_dataset](../reference/skyllh.core.analysis.html#skyllh.core.analysis.Analysis.add_dataset) method.\n", - " 2. Construct the log-likelihood ratio function via the [Analysis.construct_llhratio](../reference/skyllh.core.analysis.html#skyllh.core.analysis.Analysis.construct_llhratio) method.\n", - "\n", - "\n", - "2. Call the [Analysis.do_trial](../reference/skyllh.core.analysis.html#skyllh.core.analysis.Analysis.do_trial) or [Analysis.unblind](../reference/skyllh.core.analysis.html#skyllh.core.analysis.Analysis.unblind) method to perform a random trial or to unblind the data. Both methods will fit the global fit parameters using the set up data. Finally, the test statistic is calculated internally via the [Analysis.calculate_test_statistic](../reference/skyllh.core.analysis.html#skyllh.core.analysis.Analysis.calculate_test_statistic) method." + " 1. Add the datasets and their PDF ratio instances via the :py:meth:`skyllh.core.analysis.Analysis.add_dataset` method.\n", + " 2. Construct the log-likelihood ratio function via the :py:meth:`skyllh.core.analysis.Analysis.construct_llhratio` method." + ] + }, + { + "cell_type": "raw", + "id": "cb08d693", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "2. Call the :py:meth:`skyllh.core.analysis.Analysis.do_trial`, or :py:meth:`skyllh.core.analysis.Analysis.unblind` method to perform a random trial, or to unblind the data, respectively. Both methods will fit the global fit parameters using the set up data. Finally, the test statistic is calculated internally via the :py:meth:`skyllh.core.analysis.Analysis.calculate_test_statistic` method." ] } ], "metadata": { + "celltoolbar": "Raw Cell Format", "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, diff --git a/doc/sphinx/tutorials/index.rst b/doc/sphinx/tutorials/index.rst index 325ec04a3b..00b8bf1a15 100644 --- a/doc/sphinx/tutorials/index.rst +++ b/doc/sphinx/tutorials/index.rst @@ -9,5 +9,6 @@ Tutorials getting_started publicdata_ps + publicdata_ps_timedep kdepdf_mcbg_ps trad_ps_expbg diff --git a/doc/sphinx/tutorials/publicdata_ps.ipynb b/doc/sphinx/tutorials/publicdata_ps.ipynb index fa706cc042..6d17767baa 100644 --- a/doc/sphinx/tutorials/publicdata_ps.ipynb +++ b/doc/sphinx/tutorials/publicdata_ps.ipynb @@ -186,11 +186,11 @@ "text": [ "Help on function create_analysis in module skyllh.analyses.i3.publicdata_ps.time_integrated_ps:\n", "\n", - "create_analysis(datasets, source, refplflux_Phi0=1, refplflux_E0=1000.0, refplflux_gamma=2.0, ns_seed=100.0, ns_min=0.0, ns_max=1000.0, gamma_seed=3.0, gamma_min=1.0, gamma_max=5.0, kde_smoothing=False, minimizer_impl='LBFGS', cut_sindec=None, spl_smooth=None, cap_ratio=False, compress_data=False, keep_data_fields=None, optimize_delta_angle=10, tl=None, ppbar=None)\n", + "create_analysis(datasets, source, refplflux_Phi0=1, refplflux_E0=1000.0, refplflux_gamma=2.0, ns_seed=100.0, ns_min=0.0, ns_max=1000.0, gamma_seed=3.0, gamma_min=1.0, gamma_max=5.0, kde_smoothing=False, minimizer_impl='LBFGS', cut_sindec=None, spl_smooth=None, cap_ratio=False, compress_data=False, keep_data_fields=None, evt_sel_delta_angle_deg=10, construct_sig_generator=True, tl=None, ppbar=None, logger_name=None)\n", " Creates the Analysis instance for this particular analysis.\n", " \n", - " Parameters:\n", - " -----------\n", + " Parameters\n", + " ----------\n", " datasets : list of Dataset instances\n", " The list of Dataset instances, which should be used in the\n", " analysis.\n", @@ -218,10 +218,10 @@ " kde_smoothing : bool\n", " Apply a KDE-based smoothing to the data-driven background pdf.\n", " Default: False.\n", - " minimizer_impl : str | \"LBFGS\"\n", - " Minimizer implementation to be used. Supported options are \"LBFGS\"\n", + " minimizer_impl : str\n", + " Minimizer implementation to be used. Supported options are ``\"LBFGS\"``\n", " (L-BFG-S minimizer used from the :mod:`scipy.optimize` module), or\n", - " \"minuit\" (Minuit minimizer used by the :mod:`iminuit` module).\n", + " ``\"minuit\"`` (Minuit minimizer used by the :mod:`iminuit` module).\n", " Default: \"LBFGS\".\n", " cut_sindec : list of float | None\n", " sin(dec) values at which the energy cut in the southern sky should\n", @@ -241,16 +241,22 @@ " keep_data_fields : list of str | None\n", " List of additional data field names that should get kept when loading\n", " the data.\n", - " optimize_delta_angle : float\n", + " evt_sel_delta_angle_deg : float\n", " The delta angle in degrees for the event selection optimization methods.\n", + " construct_sig_generator : bool\n", + " Flag if the signal generator should be constructed (``True``) or not\n", + " (``False``).\n", " tl : TimeLord instance | None\n", " The TimeLord instance to use to time the creation of the analysis.\n", " ppbar : ProgressBar instance | None\n", " The instance of ProgressBar for the optional parent progress bar.\n", + " logger_name : str | None\n", + " The name of the logger to be used. If set to ``None``, ``__name__`` will\n", + " be used.\n", " \n", " Returns\n", " -------\n", - " analysis : TimeIntegratedMultiDatasetSingleSourceAnalysis\n", + " ana : instance of SingleSourceMultiDatasetLLHRatioAnalysis\n", " The Analysis instance for this analysis.\n", "\n" ] @@ -270,16 +276,16 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ - "from skyllh.physics.source import PointLikeSource" + "from skyllh.core.source_model import PointLikeSource" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -288,20 +294,20 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "[==========================================================] 100% ELT 0h:00m:12s[ ] 0% ELT 0h:00m:00s\n", - "[==========================================================] 100% ELT 0h:00m:11s[ ] 0% ELT 0h:00m:00s\n", - "[==========================================================] 100% ELT 0h:00m:12s[ ] 0% ELT 0h:00m:00s\n", - "[==========================================================] 100% ELT 0h:00m:11s[ ] 0% ELT 0h:00m:00s\n", - "[==========================================================] 100% ELT 0h:00m:13s[ ] 0% ELT 0h:00m:00s\n", - "[==========================================================] 100% ELT 0h:01m:35s\n", - "[==========================================================] 100% ELT 0h:00m:00s\n" + "100%|██████████| 43/43 [00:04<00:00, 8.93it/s]\n", + "100%|██████████| 43/43 [00:04<00:00, 9.81it/s]\n", + "100%|██████████| 43/43 [00:04<00:00, 9.72it/s]\n", + "100%|██████████| 43/43 [00:04<00:00, 9.70it/s]\n", + "100%|██████████| 43/43 [00:04<00:00, 8.63it/s]\n", + "100%|██████████| 5/5 [00:34<00:00, 6.98s/it]\n", + "100%|██████████| 220/220 [00:00<00:00, 7578.10it/s]\n" ] } ], @@ -330,7 +336,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -357,7 +363,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -367,25 +373,25 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ - "(fitparamset, log_lambda_max, fitparam_values, status) = ana.maximize_llhratio(rss)" + "(log_lambda_max, fitparam_values, status) = ana.llhratio.maximize(rss)" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "log_lambda_max = 6.572529560024986\n", + "log_lambda_max = 6.572529560024991\n", "fitparam_values = [14.58017285 2.16856498]\n", - "status = {'grad': array([ 2.22650668e-06, -7.55261157e-05]), 'task': 'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH', 'funcalls': 27, 'nit': 22, 'warnflag': 0, 'skyllh_minimizer_n_reps': 0, 'n_llhratio_func_calls': 27}\n" + "status = {'grad': array([ 2.22650668e-06, -7.55261158e-05]), 'task': 'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH', 'funcalls': 27, 'nit': 22, 'warnflag': 0, 'skyllh_minimizer_n_reps': 0, 'n_llhratio_func_calls': 27}\n" ] } ], @@ -414,7 +420,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -448,7 +454,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -458,7 +464,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -467,22 +473,25 @@ "text": [ "Help on method unblind in module skyllh.core.analysis:\n", "\n", - "unblind(rss) method of skyllh.core.analysis.TimeIntegratedMultiDatasetSingleSourceAnalysis instance\n", + "unblind(rss, tl=None) method of skyllh.core.analysis.SingleSourceMultiDatasetLLHRatioAnalysis instance\n", " Evaluates the unscrambled data, i.e. unblinds the data.\n", " \n", " Parameters\n", " ----------\n", - " rss : RandomStateService instance\n", - " The RandomStateService instance that should be used draw random\n", + " rss : instance of RandomStateService\n", + " The instance of RandomStateService that should be used draw random\n", " numbers from.\n", + " tl : instance of TimeLord | None\n", + " The optional instance of TimeLord that should be used to time the\n", + " maximization of the LLH ratio function.\n", " \n", " Returns\n", " -------\n", " TS : float\n", " The test-statistic value.\n", - " fitparam_dict : dict\n", - " The dictionary holding the global fit parameter names and their best\n", - " fit values.\n", + " global_params_dict : dict\n", + " The dictionary holding the global parameter names and their\n", + " best fit values. It includes fixed and floating parameters.\n", " status : dict\n", " The status dictionary with information about the performed\n", " minimization process of the negative of the log-likelihood ratio\n", @@ -505,7 +514,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -514,7 +523,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -551,7 +560,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -563,7 +572,7 @@ } ], "source": [ - "scaling_factor = ana.calculate_fluxmodel_scaling_factor(x['ns'], [x['gamma']])\n", + "scaling_factor = ana.calculate_fluxmodel_scaling_factor(x['ns'], [x['ns'], x['gamma']])\n", "print(f'Flux scaling factor = {scaling_factor:.3e}')" ] }, @@ -577,7 +586,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -613,7 +622,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -622,26 +631,37 @@ "text": [ "Help on method evaluate in module skyllh.core.llhratio:\n", "\n", - "evaluate(fitparam_values, tl=None) method of skyllh.core.llhratio.MultiDatasetTCLLHRatio instance\n", + "evaluate(fitparam_values, src_params_recarray=None, tl=None) method of skyllh.core.llhratio.MultiDatasetTCLLHRatio instance\n", " Evaluates the composite log-likelihood-ratio function and returns its\n", " value and global fit parameter gradients.\n", " \n", " Parameters\n", " ----------\n", - " fitparam_values : (N_fitparams)-shaped numpy 1D ndarray\n", - " The ndarray holding the current values of the global fit parameters.\n", - " The first element of that array is, by definition, the number of\n", - " signal events, ns.\n", + " fitparam_values : instance of numpy ndarray\n", + " The (N_fitparams,)-shaped numpy 1D ndarray holding the current\n", + " values of the global fit parameters.\n", + " src_params_recarray : instance of numpy record ndarray | None\n", + " The numpy record ndarray of length N_sources holding the parameter\n", + " names and values of all sources.\n", + " See the documentation of the\n", + " :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray`\n", + " method for more information about this array.\n", + " It case it is ``None``, it will be created automatically from the\n", + " ``fitparam_values`` argument using the\n", + " :class:`~skyllh.core.parameters.ParameterModelMapper` instance.\n", + " tl : instance of TimeLord | None\n", + " The optional instance of TimeLord that should be used for timing\n", + " measurements.\n", " \n", " Returns\n", " -------\n", " log_lambda : float\n", " The calculated log-lambda value of the composite\n", " log-likelihood-ratio function.\n", - " grads : (N_fitparams,)-shaped 1D ndarray\n", - " The ndarray holding the gradient value of the composite\n", - " log-likelihood-ratio function for ns and each global fit parameter.\n", - " By definition the first element is the gradient for ns.\n", + " grads : instance of numpy ndarray\n", + " The (N_fitparams,)-shaped 1D ndarray holding the gradient value of\n", + " the composite log-likelihood-ratio function for each global fit\n", + " parameter.\n", "\n" ] } @@ -652,7 +672,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -682,7 +702,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -714,7 +734,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -727,7 +747,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -736,20 +756,18 @@ "(1.5, 4.0)" ] }, - "execution_count": 27, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeEAAAF5CAYAAACof5IgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAB3GUlEQVR4nO2dd5hcVd3HP2fa9t5Ldje9N0ioopFiV6yggmJFURCkBhAQpAQQsCEa0deooIKoWBBBIIgikN7LJluT7b3vtPP+MRNI2TJz7syduTvnk2ef2Zncc8+Zu3fmd77nV46QUqLRaDQajcZ8bLEegEaj0Wg0iYo2whqNRqPRxAhthDUajUajiRHaCGs0Go1GEyO0EdZoNBqNJkZoI6zRaDQaTYwwxQgLIb4phNglhNgphPitECJZCJErhHheCFEdfMwxYywajUaj0cQLUTfCQogy4BvACinlIsAOfBJYDbwgpZwNvBB8rtFoNBpNwmDWcrQDSBFCOIBUoAk4H1gX/P91wIdNGotGo9FoNHFB1I2wlPIw8F2gAWgGeqWUzwFFUsrm4DHNQGG0x6LRaDQaTTzhiHYHQV/v+cB0oAd4UghxcRjtLwUuBUhJSTm5qqoq7DE49uwJuw2Aa4ZSMwCGs9Xa7fYqdtiv2E5MU2wI+NPU2rkVb7thtWYADCi2U/17qGLkE5ms2C5FsZ2Rsaq2tfnU2gmPYjvF/gBQbetXbGegBLF0q7Wrbe+QUhaodzw2Z5xxhuzp6VFuv2fPnn9KKd8TuRFFj6gbYeBcoFZK2Q4ghPgjcAbQKoQokVI2CyFKgLaxGksp1wJrARYsWCB//etfhz2AghUrlAZecY9SMwC2na/Wblm3YofrFdvZr1FsCAydqtbusOKixxa1ZgC8ptiu00CfKuQZaDtbsd1SxXZGvnqzFduldqm1S25Ra2fvUWsHwKDJ7RQnGgDeOrV2n/5JvXqn49PT2cmvf/xj5fYrzj03P4LDiSpmGOEG4DQhRCoBLXMOsJHAnXYJsCb4+PSkZxodhbq6sAfgDLtFEFVlAXiFYkPVybNqO7tdsSEgFW8f1bEaUcKqbY30qYkfVO9VQx471c+WajsDRlgormpFC68XDChhKxF1IyylfF0I8QdgM4HFvS0ElG068IQQ4osEDPUnoj0WjUaj0VgAny/qRlgI8U3gSwTW8XcAnycQOPx7oAqoAy6QUqquT4aEGUoYKeVtwG3HvTxKQBWHjtutpIRdYbcIYkAJj6hOZlXdQartjMz0VdWF6liHFNsZQtUpHAO/t9lqPxbuUuV7TvWrzsBKkTJmK2jAlqre1oIclTq7QEo5LIR4gkDq7AICqbNrhBCrCaTO3hDNsZhihDUajUajCRkTlDBvpc56eCt19kZgVfD/1xGIttFG+E0s5BNWVsKKQYrK7ZINzJ5V1UUsfMKmq2iTFXQsUF59MdBWWdHGArOrAhvoL958wsaNcL4QYuNRz9cGg3yBQOqsEOJI6uww8JyU8jkhxDGps0KIqKfOWumO1mg0Gk0iYDwwq0NKOW5ajNHU2UhiLSOs6BOOhRJWTUs13yeso6MnxuRE4SEDH0nV9xgTX7vJKEdHG/EJq7bV++qYgKHU2UhiLSOs0Wg0mqlP9H3CkUudNYi1jLCiT1h5Lmvg6jSqNlT17SrLdgtFRxtCVdFaKDrabEVrpehoSxGD6OjYfCjHJ8pGOJ5SZxPhjtZoNBqNlTAhOjpiqbMGsZYRNtkn7FN2JkOvakPVCalydKNyFjX4TfZdWcp3aXbRaQMkQlUw0/OLAZvZOcZG+jOyrBEFEqhilo4A0Gg0Go0mRlhLCfv92AbCjzsWioXmjShh033CdtVq+gbmYaqT5xHFdoYUm0V8wkYU9LDJfmgr+YRVFa0hH7TqZysGtaPjDXOKdcQF1jLCGo1Go5n6aCMcnwgU55bFav1JAy4W6/iEDbxJs/N9LaWENVHB7DiEmKD6HmOx41OU8PtBYdXTiljKCGs0Go0mMUiE6RZY0AgrzdcUlXBMfMKq6lJ5FxQjtaMV21kqylmVGChvVZ+w6t8jFrWjVVH17RryCVspOjrO8oQTCMsZYY1Go9FMfUSsB2ASljLCAnOVsN9ACm2DakPVAEdVn7CRPEizfcKGFLRVoqMTBNV7x+x01pjs2qSjowXaCGs0Go1GEzO0TzhOMTM62ohPuE+1oepMXzVP2IjPKyGio83uz8A4VVcKVK+rar43QIaBtirEpOa02TWgp1DFrATCckZYo9FoNFMfvRwdh5jtEzaUJ6w6sTTbJ2xEIZgeHR2LnF0LVcwyGyMrE2b7hGNRMUu1T+V12CmUJ4w2whqNRqPRxATlwkwWxHJGWGm+lq3W12gs0u5UZ/qqStgXA59wTEiAilkx8bWbTEKksyaK+ZmYRFHC+q+t0Wg0Gk2MsJQStqG4+22yWn8jRpSw6m5IZlfM8looOtqQKrWKorXKODGWt50IFbOU28YiOjr+lhgSRQlbyghrNBqNZuqjfcJxigCUUncV3+WIkbvA9OjoTLV2sYiOjokStlKfqiiOdchCXwNWqphlem7y1MoT1kpYo9FoNJoYoY1wHKKshBV9wl4jSlhV7SlPSBUHG4va0THJE06AillmE4s8YVViUjFLFV07OpGw0p2p0Wg0mgRB+4TjEIHJ0dFG7gJVf6lykKLi7NmveHGMEJO81ETYRUlxrKr7EMcCKylo1VUm5UpbU6dilnJ1RAtioU+fRqPRaBIBbYTjFLN9wjHJE1Zt51JVwomSJ2yVPi3k97aST9ifCIubUytPOFGI+p0phJgrhNh61E+fEOIqIUSuEOJ5IUR18DEn2mPRaDQajTWwGfixElFXwlLKfcAyACGEHTgM/AlYDbwgpVwjhFgdfH7DROcyPTraSIy8aqCi8oRU8daLRZ6wMhZSiVYiEWpHqyrvWFTMUu5zVLFd/JFIy9FmTxrOAQ5KKeuB84F1wdfXAR82eSwajUajiVPsBn6shNk+4U8Cvw3+XiSlbAaQUjYLIQrHaiCEuBS4FKDCBiIv/E79SiHV0BOLilmqPuFkxTdpZKY/otjOSM1hZaySt2shtW+l2tGq6IpZMSGRylaa9j6FEC7gQ8CT4bSTUq6VUq6QUq4oSJS/ikaj0WgSAjOnau8FNkspW4PPW4UQJUEVXAK0TXoGJ1AcfseqSrhRrVkAq/iEjUSNmr5/rfbrRoVYrEyorqKoCrb4EnoTo6qgjah2W/xdIKstK6tiprb8FG8tRQP8Bbgk+PslwNMmjkWj0Wg0ccqRwKxo+YTjKWvHFCUshEgFzgO+ctTLa4AnhBBfBBqAT0x6IgdqSlhxqtGr1iyA8sRStYKV4rzRyATY9BrQ5vtL7YoXSP2yWsgnbKWoalUsVTFLrVm8Es23E8msHaOYYoSllENA3nGvdRKIltZoNBqNJla8mbUjhDgfWBV8fR2wnqlghDUajUajCZUI5AnnCyE2HvV8rZRy7TjHhp21E0msZYQTITDLXqDaUK2ZkeVo0wOszA/MSlWMlOu3UlhJLJaVzd7q0yopUUYwlBIVX0GPETDCHVLKFZP281bWzo3GulNninkRNBqNRjMVMKls5ZhZOwAhZ+0YJCGUsE+p1mUgWkwZ1Rm7SFNsqJqipNgdWCowRzXAyuz+fDowa2LMVrSWKltpACPpTdFAgDCyy2ro9+Z4WTtrMClrRythjUaj0SQcR2Xt/PGol9cA5wkhqoP/tyba44iz6c8kmKyEY+ITVlbCio7vBElRUvXtpiGV2sWkMqcyqtfVwNeH2RcoFrUozFbCRhS0iC+fMHYg3UD7EJRwvGTtWMsIazQajWbqY8OYEW6P1ECij7WMsB3IDr+ZanR0TIp1qEZHq/p0dHR0VFCPqraQTzgWct9sRWukrKvZGPHrijgzBUaVsIWIsyuv0Wg0moTHqBK2ENYywjaUqjqOKiacWconrFz0Xa0ZYKmNGFR9u6rtBhFK7WJDDHzCZkdWx2LjByuVrYxFRLYGsJoR1mg0Gs3URyvhOEVRCY+oll5RVbOgPoO2paq1U53JxiQ6WhV1Ba2qaM3uz1I+4VjkCeuKWeMzhSpmaZ+wRqPRaDSxQivhOEWgpIS9qu45IypRVUXbFOuF+2KghJUxd1tBI6hGOQ9aqXZ0LFSQVaqtGbnlrJQnHG8VsxIIfeU1Go1GE1/o5eg4RfuEx8ejWGg1QaKjVRWtKqo+YSNq3/R1AivVjo7Jio8iOlJZL0drNBqNRhMztBKOUxR9wiOq+XNuxXZgfp5wAlTMMlvNgtVqR1uoEplVimsbueXMVrRTqWJWAilhC9Vk02g0Go1mahFn059JMNsnbGQWrKwwFbd8slSesJryMpLra37FLLWbx4ja71duqUgs1OyIYjvFUAtDqNad1hWzEkoJx9mV12g0Gk3CI0gY62SttynU9gZWVsKx8AnbFQdrqdrRmuhgIZ+wVe6dhImqjrOKWdoIazQajUYTI7QRjk+kohI+rNphLPKEVZWwqh9J1ccGmK28YuETVvXRpinuomTkPZruE46FmlXtMxa1o5W/A3TFrERCX3mNRqPRxBdaCccpAvyu8Jv1qfYXi9rRLpOjow2pGXOVsJHI4VjkGJuP9glHHCv5hI0Qb9HR2ghrNBqNRhNDEsQ6WeptqvqEG1Q7jIVPWHX3HdWZbEwqF8VZJOYEqFfMMjcvOTYY+TuafL/GQtGq9qmc6WDg69xunc/kVMNSRlij0Wg0CYBejo5PVJVwo2qHRpSwX3FXI9WyNx7FdjHxCasRi+hoa2Ehn7BViEUogWqlralUMUsbYY1Go9FoYoQ2wnGKYnS0shI24kdS3Q0JhTcI6mONgRJW3TM3FmrWSrWjLYWqb1f1fo2FT9jsP6WumGVJTNlFSQiRLYT4gxBirxBijxDidCFErhDieSFEdfAxx4yxaDQajUYTL5g11/g+8KyU8uNCCBeBPU1uAl6QUq4RQqwGVgM3THQSv4BRheDhmERH2wsUG6ruvKLYnaHo6Km/L7CVUF1hUBeJMYiONhsrRVXbpljFrDgcUjSIuhIWQmQCbwd+DiCldEspe4DzgXXBw9YBH472WDQajUZjAY4sR6v+WAgzhjsDaAf+TwixFNgEXAkUSSmbAaSUzUKIwrEaCyEuBS4FKJumtiNSTCpmKfuEFfOELeQTViUWCtrs2tGxwUIVsxKhdrRi0bwphfYJRxQHcBLwiJRyOTBIYOk5JKSUa6WUK6SUK3LzozVEjUaj0cQTfpv6j5UwY65xCDgkpXw9+PwPBIxwqxCiJKiCS4C2yU7kB0YULnCv6ozUiE/YlqrYcOor4Vj4Z60SdWwt33WcRdRGAyOrYaptlXdfMmB9RILIzqMQQmQDjwKLAAl8AdgH/B6oAuqAC6SU3dEcR9TnDFLKFqBRCDE3+NI5wG7gL8AlwdcuAZ6O9lg0Go1GYwEESLv6T4gcCRieBywF9hAQiC9IKWcDLxDGqq0qZk1/rgAeC0ZG1wCfJzABeEII8UUCAcyfmOwkUqj5hJUVrVuxHYBtTBf35PgUK21ZKDpaVe1ZqWKWau1oI6iq/X4r+YRVUd03W7XwndWIs4pZUoRlTMPmqIDhz0EgYBhwCyHOB1YFD1sHrGeSrB2jmHLlpZRbgRVj/Nc5ZvSv0Wg0Gmth0LebL4TYeNTztVLKtUc9NxQwHEnia/ozCX4UlXAsIipVfcKqM9KY+ITVUI84tpK/VA1rvccYKOiY7PqliOr3h2o7I/5riwUzhUCHlHIs4XeEIwHDV0gpXxdCfB8Tlp7HYupdeo1Go9FYm+j7hMcKGD6JYMAwQKgBw0axlhIWatHRyr7dmPiEraSErRMda7bCVK0dbQTV99gf4XGEhNmKVvU+z4joKOKXOMzriaZPWErZIoRoFELMlVLu462A4d0EAoXXYFLAsKWMsEaj0WimPlKYMi+ISMCwUSxlhH1Aj8ofRlWUxCJP2KMYjqk6VgtFR1sl19cIsXmP1lnRiEUMgzJm5wkbMVpRVJ2qRFMJQ/wEDMffGoRGo9FoNAmCpZSwG8W9gVV9u4ZqR2eqtVPdzcRCPuFYRAA7TFeYiTC/tZCCVl3xicUuSrEg3t5nlPOE4wlLGWGNRqPRTH0kcRkrFhUsZYTdKO4NrOovNbSfsOIdpJonrCz0jKgZc5WQlXJoYzFW9T4tVDFL+4THZyopR62ENRqNRqOJHdoIxyE+FPcGVlW0RvwkdpdaO1UlrFobNwYqKBYVs6RiW2HyvsBWUvuW8glbSUHrilkJhaWMsEaj0WimPlKAL0EmBtoIazQajSaukIDX3EWomGEpI6wcmKW6TGMkMMulOI1T3cpQeblNfUnRrnhhrbTkqrqMHYv3qF7oIxbLyop9Dpn8lWUks81KtWXicKyJYoQTRPBrNBqNRhN/WE4JKxXriEWKkmq+gGpgVgy2eDO7xKKRght+HZgVBSwUmGUldNlKpABvgkhESxlhjUaj0Ux9tE84TvFgctlKFP2zADjVmqmWiYmBT1hVtVlJ7an6hM0vk2k1VO87xa8s1c+Hcuofyl8Byu2MEGdlK7UR1mg0Go0mRujl6DjFB/SqzNhUfbv2AsWGoOxkMX0jBvOVsKovWVWVAvgV+7RZKHZR9e+hGuUeE/FkpaIbqsSibGWc+YQTCUsZYY1Go9EkBno5Oh6RqKla1ZmlSFNsCMqOHdWxKkdHWyfC1YgSVsVsBW12xLkxrHPvKH8+jCjvVMV22iesfcIajUaj0cQKifYJxyeqSthKPmHVscbAJxyLjRhUMXsDh1ioditFnSvfd4ngE9YbOAQCsxJECcfZpddoNBqNJnGwlhL2o5bzq6ouDfmEdXR0pNup+mc1E6O6otEf4XFEFdXPh5FKdDkG2ppNnH20tE9Yo9FoNJoYon3C8YiqT1i1YpYRn7DqbkiqM9IYREdbyQcZCx+tCla6psaio1V3UTLQpVXQtaPxAQOxHoRJJMhcQ6PRaDSa+CMxlLCqurQVKjZEfTckC/mEVVGtq+yxkEqMhfK2Vo6xyah+PoxEY6vWndZ5woHqiLEehElYywhrNBqNZsrjA/piPQiTsJYRlqj5d1V9wjbVkjeAW9EnnAB5wolQ+UqVWPiEVfvst1LFLCuha0drJRxphBB1BDIafIBXSrlCCJEL/B6oAuqAC6SU3WaMR6PRaDTxizbC0eGdUsqOo56vBl6QUq4RQqwOPr9h0rOoCBrlilmZig0x3yecANHRVolwBp3TPDkWqZhlpD/VtgYW4JSJM59wIhHL5ejzgVXB39cB6wnFCGs0Go1mSqN9wpFHAs8JISTwUynlWqBIStkMIKVsFkKMGYoshLgUuBSAEsytmGU34A/0WyU6Wh1VJZwIijYW+xCbXcHMUlgpv1jXjtbL0VHgTCllU9DQPi+E2Btqw6DBXgsgFooE+LbQmMVw6gBd+a0kjaSQ3VmIw2etOEWNZqpihhGOl1glU751pJRNwcc2IcSfgFOAViFESVAFlwBtk58Ic/cTxqXaEPwmV8xS9LHZLeQMMuJnlcF/w2kDdBa20FXQwmDmWwtewm8jqyuP/NYSctuLcXgDyZqquyipoppDHRtiER2t2qeeYE1InN12Ji5HRyZWyQBRvzOFEGmATUrZH/z9XcAdwF+AS4A1wcenoz0WTeIhkfRnddNZ0EJXYQsjqYF1yYyebKr2LSCvrZiRlCG6ClrpKmzhwMLtCP8OsjsLyGstIa+9GIcvFtUTNBpNDDA9VsmM6WER8CchxJH+HpdSPiuE2AA8IYT4ItAAfGLSM5m9n7ARR4mqT1i1yo5iQyNVllTbRjty2Otw05PbQXdeO935bXiSRhF+QVZXPqX1M8htL8J1VB530mgKWT15VFXPZyCzl46iJjqLmukuaOOgbwc5nQUUNk8jp6MQmwztnrCS3zs2mKyiY7GLkmrbFMV2cebXNUIElqPzhRAbj3q+NujaPBrlWKVIEnUjLKWsAZaO8XoncE60+9ckBv0ZPXTnt9GT105/VjcIsHscAUXbXkx2R8GkilYgyOjLJqMvO2CQs3roKGqmo6iJrsJWnG4XBc1lFDZNI23QQPqaRqOZkAgY4Q4p5YpJjlGOVYok1nKUqFbMEnmKHRrwCavmCZtc+cpIZKzZ0dFjtZNI6ubsprmiDiSk92VRXjuL7M4CMvqyEUHlesQnHA7pvdlk9OZQVT2P7rx22koP0TytjqbKWtJ6syivm0Vee7HSe4kGqn8PXXNaE2/4ib5POGKxSgaxlhHWaI7Cb/NRvXAbnUXNFDdWMq1mDk6PgYnTOAhpI7ejiNyOIjxON+3Fh2ktr2ffkk3M3rWUgpbyiPep0WiiRzzFKlnLCKv6hJVrQBtwsvgUjUGnaofmK+FY4nG42bd0E305XVTtn09pwwxT+nV6XJQ2TqfocAV7lm2geuE2hN9OfltJRM5vLV9yAkRHx6Jilmo7xYSMeMSEFKXIxSoZxFpGWKMBhlMG2bNsA6Mpw8zesYyC1jLTx2D325m/dQW7l79B9aIt2LfZyemMegyHRpM4RDF7Mp5ilaxlhJWVsOqXo4GlTYvsC2zEH6iqov0GfMK9OZ3sX7wZECzYdAqZvbkhqUfViGz7BNvL2P0O5m9dya6TX2Pfkk0s2Hwqmb25b45VMxEWiY7WxAZJ3OUuR4spFNSumcpIJM3ldexe/gZOj4vFG05/0+DFEofPyfwtp5A0ksKeZRsYSuuP9ZA0mqmBz8CPhbCeElaJjrYXKHZoxCes2M7k3ZCM+IRVqzuF+yf023wcmLuD1rIGctoLmbVzqWkFNEJRtE6Pi3lbVrJz5f/YvfwNFm04nbTRDBNGdyw6yjkKxMInrIpyjQGmlD/ZamglrIlrhlMG2bryFVrLGiirmcXcbSfHZQWr5JFU5m9Zic/hZe+yjXjtyhViNBqNRCvhuEVlsq+shA34hFW/gy3ku1L1e4bqn+0saKZ6wTYEgvlbVpLZmaeU72sWaQOZzNm+nD3LNrJ38SYWbjvlzTzleEZ1NcRI3XGfsk9YsZ2VdlEyu9JWvJIgCzvx/w2hSTh8Nh8H5+5g79JNJA+nsfT1s8jtLIr1sEIiu6uAGXsX0Z3fRs3sXbEejkZjTbQSjlP8KFbMSlPsz8DlsUh0dCzyhCdSsoNp/VQv3sJQej+l9dOpODAPm7QZVr9mqueipmmMpA3QVFlLymA6xYcrQ24brypfo4CqolWNNzSyiqZ9wjHDWkZYM6XpKGqiesE2HF4H8zefQk6Xqhsh9lQdmM9Q2gA1c3eSOpARF5HcGo1lOKKEEwBrGWHVPGFVn7DPwPRQ1Z9hcnS0kYjaSKq2wfQ+DizYRnpfFnO3n4zLkxTV/kJFNb/YIR3M3bmcbaf8h71LNrHs9bOO2blJYxGM+JLNVrSqhQHBWGR1tNA+YY3GHLx2D/uWbMLudTJ3x9gG2Io4vC7mbVuBz+Fl3+LNSJEg3yoajVG0TzhOUVXCyYrbzhnxCav4rgG6VDs03yesqhKPVrQSSfXCbYwmD7Ng06k43a6oKF7VcwqEoX7TBjOZuWcx1Yu2Uj9zH1UH5hs634R9Kb7HWOy+pF7SRDE62kJZB8oYUe3xGFltMWOqilbCmphyuOog3YWtVByYN2X9poUt5RQdqgi817yo74ym0WgshPWUsIrCTFaca/invk84lrsodee10ThzP/nNpZQ0VE16fCx8wpHa+7hq/wL6s7rZv3Ary14b3z+surpgPWKxA5MCVtpFyYiajbeVggSqHW0tI6yZMgyl9VO9eCtp/ZnM2LPY8LLvmNjBmWXHmWvHmeMIPtpx5joCjzl2HFl2+rYM0/R4N96e6K1/2f125uxYzvZT/0P1wm0s2HJKdN6zRjNVSJDlaIsZYQF+laAdxcpXCZAnbARVlTjqHGHv0o3YfDZmb1uO8AtDCtCebiP/3AxcBccaWkemDWE71tBJr8TT48PT5WO02cNwvZucM9PIPjWV1j/20vb3XmSULmXqUAaV1fOpnbeL7oJWctuLo9PRlMYiCjoWGFGzRiKro4FOUdJoooPf5qN6yRbcSaPM33QKSaPGIkLS5iVR9Y0CXPmOgHHt9uLp9jFU48bT7cPT5cXT5Qv83u3F2+fn+LlD8xM9lH0mh9KLcshdlU7j2g4G9owaGtd4FB+uoHlaHQ0z9pPTXqTVsEYzHno5Oh6xKVa/Uiz4b8QnrFy/X22mr1rH18w8YYlk34It9Od0M2vHUjL6ssNqf4xaFlD8kWxKL8hhtNXLnhsOMVSrFpI+0urm4HdbyVyaQsWX8pl9ewkdL/Vz6NedMKB0ynER0sa02llUL9pGV2ELeW0lETt3LKKcrYOqgjbwFamqTGNRO9pKtbWnGBYzwhorUzdrD+3Fhyk/MJu8VnXj4yp0MP3yQtLnJdP5Sj8Nj3bgHzYetNW3bZhd1xyi5GPZFH8wm6zlKTT+tIu+TZGNWslvKePQ9AM0Tq8mt61Yq2GN5nj0cnS84lSsfmVX686IT1i5Ao25la/Mio4+PK2GQ1UHKD5USUnddGV/cv45GZRfkgd+qP1RG13/jqxUlW5J02+76X51kKqvFzDjhkI6Xxrg8C+7ImLoIZB7XH5EDRe0kmdR37CRe6ffKr7deIsaThS0EdZoIkdb0SFq5uwkr62YWXuX4FbIM3MVOii/tIDMJan07Rim7sdteDqj9ykdrnez98bDlH48l6IPZ5G+MJn677UzdEC1Csux5LeW0jijmkPTD5CrfcMazYkkgpcEXaxDE2W68lrZv3ALmT15zN15cvjGxgYF789g7ndLSJuVTMOjHVTf2RxVA3wE6YOW3/dSfWsLAph9RzEFH8wkEvZSSBtldTMZzOylN7fD+Ak1Go0lsZYSFg7F5egYBGZZZEtCI0uK3kmmqj05HexZsoHUgUzmb10RdipS2twkyr+YS0qVi95NQ9Q/2m6K8T0aiWSwepS91zcx7at5lH0mh7R5SdT/qH3C5elQltvzm99Sw1ld+SG3Gw+tpROcqbR0nkDL0VoJa6JCX1YXe5ZtIHk4jYVbTsXhC30i5MiyUfH1PGZ/pxh7uo3aB9qpvdd8A3w0viE/dQ+2c+j/Osk6KYW5d5eSVKo4uQtik3ZKGqroy+miP7MnMgPVaKYKegOHOEQ4wFao0FBVCRuYo3SqNjRXCTsMOF7GG2lfVje7l79B0kgyCzefhtNzbLGU8dSecAoK35dJ0UeyEC5By596aP1jL/7RwPGjMdhvLeW4Kgbt/+hnuM5N1dWFzPpWEXuuOWwoYKvocAWHph+gqbKGuTtOMjrcBEA1oCsGKUpmpxrFW8ENIyRQ2UqthDURxeMcZc/SN3C6XSzcfBoud4gVzmww47pCSi/KoX/XCHuvOUzzb3veNMDxxMCeUWrubcWZa6fsM8Y2nbD7HBQdrqCrsIWRZJ2sqdG8SZwrYSHESiFE8VHPPyuEeFoI8QMhRMhfDNZSwmanKBkJhDXZJxyLggtjKdqaubvwObws2nQ6rtHkkH2c5Z/PJXNZCg0/7aDzhQhXyIgCQwfctP21j6Lzs+h+dZCBneoqvbixiuaKWloq6sjYv1T5PKqrGrGIJ7BM+Ukj8yKzNwXTcziz+SlwLoAQ4u3AGuAKYBmwFvh4KCfRSlgTMbryW+kobqK8dhapgxkhtyt4XyYF786k9eleSxjgIzQ/0cNIs4eKS/MQTvWwqKTRZPJaS2grPYTXrlxqTaOZOhwJzIpjJQzYpZRHdoC/EFgrpXxKSnkLMCvUk4SlhIUQTimlRwgxE+iUUvaE2M4ObAQOSyk/EJTqvweqgDrgAill9+RnUixbqbTpA8Z8EhbZktBINO7RbT0ONwfn7yC1P4PSupkTnvfoCOmslamUfTaH7tcHOfT4xI70YQaVx5qCSrnTSbYW9EDD2nbm3FZK0cczafptd2jtxqCosYKOkiZaSxopOzRDaayaiYiB8rbSVobxSPz7hO1CCIeU0gucA1x61P+FbFvDVcJ3CyHKgDuBH4XR7kpgz1HPVwMvSClnAy8En2ssTN3c3XidbmbtXoJNhnZbpc1NYsaVhQweGKX2h20nbKxgBfp3jdD5cj9FH8w2FC2d3pdNWm8WTdNqY7JvskYTV1hDCf8WeFkI8TSBqdMrAEKIWUBvqCcJ1yecCZwP3ANcHEoDIUQ58H7gLuDq4MvnA6uCv68D1gM3TH4yxTxhn6LrOyY+YTXfYiyU8BE6C5vpKGmi/OBs0vqzQmqTPM3JrBuKcXd4OXBvC9IdXcOjqqKTmDxX/NBvusg6OZWKL+RRfWeLUj8ARYcqqFm4g+6cdnK6VWIfEgGTFa2Vcm+tNNZQiP9Uo3sJiMgS4Dkp5ZEvMRsB33BIhKuE1wOpUsrtQHWIbb4HXM+xiwtFUspmgOCjSt6RJg5wu0aombeTtN4syupmhtTGVeBg9s0l+N2S6rta8PXH/7rTRHh7fTT9vpvMJalkr1TPE8lrLcbhdtJSXhe5wWk0mmjxhpTyNSnln6SUb87ypZT7pZSbQz3JpBJRCHE2cBHQA+wEXhJCJEkpfxZC2w8AbVLKTUKIVaEO6rhzXMqRtfb8HJRyfn2Kla+M2AaT84RVo6PD9V0ejQ8fBxZsx2/3MXPXYpCTn8+RY2fmLcUIJ+y8rZ7h9ujs22s27c/3UXBeJuWfzaN3y5DSn9Hmt1PUXEHTtBrcrhFcbgMV2zQaK2NSnrDBeKWIFKkLRQn/Bvgb8BowA7gV2BXi+c8EPiSEqAN+B5wthPgN0CqEKAEIPraNdwIp5Vop5Qop5Qoy1YJrNNGhZVo9vfkdVFTPI2UofdLjHdk2Zt5WiCPTzp67Gxg+NDUMMAB+aFzXSVKRk8L3hrYkPxbFhyuRNklrSWMEB6fRWBBzfMJG4pUKhBBXj/cT6gBCcZYekFL+Kfj7k6GeGEBKeSNwI0BQCV8rpbxYCHE/cAmBvKpLgKdDO6MAXJMedQKqNaAtlCds1paER+jP6KFx9j6y2wspPDRt0uMd2TZm3lqEM9fO7rvqGTgYvu97yEAiZKoJ5YT6dwzTu3mI4o9m0/PSML6B8KfyqUPpZHbn0lLWQHn9LFN2V1K9d4zlplskT9gIqt8BZlfaikdMqB0dgXglO5COQUUcihF+WQjxTeB7RzmejbIGeEII8UWgAfhEhM6rMQGv3cOexRtwuF3M2L1oUkPhyLEz87ZCnLl2au9pp3/fVIsgeYtDj3Wy4P5yCs/PpPmxHqVzFDdVsH/hVvqyusjqzYvsADWaxCBfCLHxqOdrpZRrjzvmewTilY4uanBMvJIQYqJ4pWYp5R1GBxqKEV4ILAJuEEJsArYCW6WU4ari9QRmFUgpOwnkVYWJQMkn7FeMjjYy0TdZCasSbnS0RLJ//laGUwaZv+kUHB7nhOdwFdiZeUtgCbrmrjYG940ayvdVRVVFh5tfPNo4QvsrveS/N5P2Z/rxdoc/nc9vLeXg3B20ljWEZYQTI7XJ7NrRRj6PJhcknGpzW2M+4Q4p5Yrx/jMS8UqMo4CFEGcCn5ZSfj2Uk0zqE5ZSflRKOQeYDtxGICr61DAGqplCNJfX0l58mKqD88noyZnw2KQyJ7OCOyEdvKuVwX1TyAc8AY1PtCNsgqKPZCq1t/sd5LeV0lHYjM+WAMu2Gs3xRD9P2HC8EkcJSSHEMiHEfcHzPUiIKbwQxlRNSjlMIIps42THRg+BUh1oVZ+wEZ/EFKyY1ZfZxcE5O8ltL6K8bhajjG9UU2e5mLm6COmTVN/Wwkij8XKMsaiYpcJou4fOlwbIPSed1qd7w9qC8Uh0eX5zGa2ljXQUNFPQWhatoWriEVVFO9V2UYqiTzhC8Ur5QojLgU8DAwRiplZJKeuEELWhjkXXjtaEhNs1wp4lG0kaSWHOruUT+oEzl6cw+7ZifEN+9t8aGQNsNVr/1IsQUPghNTWc1Z2HaySZ9uLDER6ZRmMR/AZ+1FkDnCeEqAbOCz4fj70EArs+HszguVdKWRf8v5DVjcV2UTLZJ2zEdijOZu2K079o7qLkF372LN6I1+lh2Ya34fSOH6GeVOJg+tUFjBz2cPCeVry9J47LSJSzVfB0+Oj69yB556TT+sfeMa/DRAgEBS2lNFXU4nG6T9iT2eqo3ufxX0TpKMyOcp76H6uoYCBe6WPAJ4HnhRD/Ap4AnpVShmU5tBLWTIhEcmDedvpyupizexlpAxPkwNqg8vIC/B5Jzb1tYRueqUbrn3sRDkH+e9XUcH5rGdIm6SxQL4Wp0VgSC9SODlbKupDAjknPAl8BDgkh/o9AieeQSBAlrOgTVt8iVrmxqqJV9QlPVuGqaVoNrWUNlNfMJq+15Jjjj29b8tFs0mYncfDBVka742sJWt2frF7D2d3ipff1IfLflU7bn3vxj4T3N0rrzyR5KJWOoiaKmyqUx6GBhIiOnmpYZNkjWLLyMeCxYMWtTxCouBUSWglrxqUrv5XaObvJbSumombOhMemzU2i5OM5dL7cT89r5qcgxSttf+3DkW4nd9XkFcWORyDIby2lN6cTj9NI5RiNxmIcKVtpvk/YEFLKLinlT6WU7wy1jcWmaooVszyKcw3l+s9gdpSzajvPOO2GUwbZt2gzaf1ZzN65FBg/ktqeZmP6FYW427w0/KJj0j5VVWksoqNV+0wN9jd0wM3gvhEK3pdBxz/7w96uMa+thEPTD9BV0ELRJGpYNU/Y7NUXY1goZUvvJ2wMiyhho2glrDkBiaRm3k4EgnnbTsY+SWBb5WUFuHId1Hy/Df9wIhSMCI/2Z/pJKnaSsTR8t0hafyZJwyl0FrRGYWQajSbWWE8Jq0Q6q86oDFWgMVcJOxTVzFie246iJnrzOpi+dyFJo+NPr0cZoeS9ueSckkbtuha6D4a8j/WU52hV2vPGIJ7uHPLfk0Hf1vBuKoEgt72YlrJ6fHYvdtW9sTVTn6lUMcuE2tHxglbCmmPwON3Uzt1NWm8WxYcqJzw2Y04KlZ8pomtDP81/7zJphNZD+qDzxQEyl6XgzAu/2ExuexHS7qcnd/Klfo1mymBBn7AK1ppWS5tapLNqoG4MlLCqf07VH3h8u7rZu/E5PMzcc8qE53Vk2ZhxdTmjHR6qH9YFJSaj88UBij6SRd7ZGbQ82TPucWNd74yeHOweB135LeS2F0VxlFOZWERHa9QRQJKB9oZSW0xFK2HNm3TntdNeepjS+hmkDYyf5ibsUHF1Ho40O/seaMQ3ZLGpZwxwt3vp3z5C3qr0sDc+s0kb2Z0FdOe3J8gmDRpN4mAtJYwAn4ISVvUtWCg6WpUjub5eu4ea+TtIHkijtHbGhPnDZZ/PIW1+Eju/d5DO+vCXoVUjjmNTaUs9T/h4OtcPMP2qAtIXJjOwM7yZek5nAZ3FzQyl901cMMUiqK749FtKmSqOdchiX8tRwQbCSL136yhh/dfWANAwZy/upBEWbj8Nm398v2Xeu9PJf1cGbU/30fqq9gOHQ++GIXxDfnLPSgvbCGd3BiYDPbkdU8IIazQTYwObkR0pDCkoU7GWEZY2NSUcE5+w2kzMjN2Qjqc7r432ssOU1s4gvS973OPSlyRT9rkcejcO0fx4j3J/VkJVfeeO8Zr0SHpeHyT71DQaH+1CekL/m7ncySQPptGX00VZw0ylMUWa2OQJaxIDu0ElbB20TzjB8Tjd1C7YSUp/OmU1s8Y9LrnCSdXV+Yw0emj4QWfYRSc0AXpeG8KeaiN9XvhBJ5k9ufRld2m/sGbqI4LL0ao/FsJaShgB0sQ8YUMuSHOjoyerAT0WEknN/J14nR7mbl6BkGLML3hnrp3pNxbgG/ZTs6YN30igL1WVmAi7KI3HwO4R/B5JxtIU+neEt1qS2ZNLW1kjQ2n9pA2qbQoxFlrRTkQMfNB6P+GEwmJGWBNJmsvq6ClsY9r+uaQOZox5jD3NxoybC7Gn2Ki+rQVPV/xk0AshmLGkiuVnL6V8ThnP/uJ59m7YH+thTYh/VDK4b4SMJSlAd1htM3tyAOjP7o6oEdZo4g+jPmHrYC0jLBWjo1V9wobijsyNjg53iXIwrY+Dc3aQ2ZFHUUPFmO2FSzD9hgKSip0cvLuV4frYbiIgU33MWTaLeSvmsvRtizjpncvJzg8EKY0Oj/Khr76XF55Yz4+u/wkt9ceWeUyNI5nQv2OE0k/l4Miw4e0PfQUjaTgV56iL/qxuig+fWEhFZTUkcYhFVLVqn4pfy1NqgclodLR1sJYR1kQEn83LnsUbsXudzNi9CDFW4qoNZlxTQNqcJOoeamdgl7kh/06Xg5lLZzBnxSzmrpjNnBWzqFpYgd0eiNxubWzjv3/7H5te3MLGFzcz2DvIp665gIuuu5AzP3Aav7jjV/z2wSfxeuIvpaU/GBmdvjCZntdC/+YUCDJ6c+jP6onSyDSaOEFoIxynKFbMspBP2AwlfGDedobS+lm05XSc7rEDhAo/mEnm8lQa1nbQ8/rYF8LIrkZjMWvZDD741fcyd8Vspi+uxOkK7B3d097Lvg3VvPznV9izcR97N+6jo/nEFISf376Ov/78Gb75/cv52ppLed8l7+aeL3+X7f/dGdEo51CYSJUO1AzjG/aTtjCJrtcGwjpvel82XYWteBxunF6FHcXiBNX7vD/C49DEK3o5WjNFaSmtp7W0kWk1c8jpKmBkjFSqpDInJZ/Ioef1QTr/FZ6RUOXU963gtidXI/2SXf/by5MP/Jl9G6vZt6GatsZ2IDSj33aonRs/dhtnvO80rvnRN/jxy9/j12t+y2PffhKfN0782T4Y2DdCxvzw957L6M0GYCCzl5yuyBUS0Wg0scFaRlgCfoWsKlVFa6Ha0aEwkNHDgbk7yO7Mp7JmLjCGYhNQcVke/lE/9Y92RMXPeLwx/eAX3sf1P72a6q0HuOb9N9LdFl7A0li8+sxrbF2yjau+dzmfu/liTnvXKdz5qftpOths+NyRYGDPCGWfysWeZsM3GPo1TusP+MAHM3sS1AjHn3sh8pjsS4Y49CfrPGHNFMPjHGX3kg04PS7m7jx5bD8wUPShLNLnJNPwiw68vdFVjjabjSvu/yo3/fw6Nr6wmcvf+c2IGOAjDA0Mc/eX7mf1R2+ldGYJj2x4iJPPXRax8xthYF9gBSJtTnj5wg6vk+TBNAYy9baRmqmMzhOOTySgEqCrrGjVg5Hsio5oVV/ZRIpVCj97Fm/E7Rpl8cYzcHicbx4/etR7TK1MovTCXDr+10vLfycv+2bEJ5yWmcbtj93MmR84nT/86E9875sPR225+OU/vULDlsPc+ZdbWPPs7fzkmp/z1Pf/EpW+QmXo4CjSJ0mfk0zflrdu0FB8+2n9mQxk9eiiHaZgRHmbrGiNrNyF7xmJLiJxfMJaCScAtXN205vbyay9i8nozx7zGJtTMOeKMrz9XmoebYnqeGYtmcn/bfwJp77nFO677CEeuOIHUffXttS1cvnp1/Lq06/z9e9dypfvuSSq/U2Gf1QyXO8mbXb4lbPS+jMZTRnG61DNvdNoNPGC9ZSwyveOci1v9VlwgclK2DNOu/biQzRPq6OkfjoFzeXjqqfKiwtJrUhm9131ePujZxDP/dwqLvvxF+nr6uPrq65i+393htzWiPJOIY2RwRFu/8QavvGjr/Kp1Z8gLSuN73/9EaQc/5pHOgL8aAYPjpJzevhLZ6nBbSaH0vvJ7FGN3zZONOMXIk8i+JKnEjpFSTMFcLtGqZ27m4yeHKqq5497XM7J6ZS8N4+mv3fSsy06Ric9J42vPfJl3n7hGWx8cTO3furOiPp/Q8Xv9/O9r/04kFe8+hP4vD5++I2fmj4OCBjhgvMySSp2MNoSupFI6w9UNxtM74upEdZooocAxt/NbSphLSMsUcv5VfaVqM+eVRWtQ1FduMfor2buTnw2HzN3LwbG9jf6c93M+lopfTWD7H2sNiw/Y6i5t8vPXsrqdd8kpyibn924jkfuewS/P7Yq6mc3rsPusHPBtR+lfk8jf3nkGdPHMFQ7CkDq9KSwjLDTnYTd42Q4XWfNTk1iEB1tKBMkWiSGtzQx3mUC0lnYTFdRC+W1s0gZSh/7IDssumomwinY+f2DSG9kA33SstK4Zu0VPPDCXQwPDPP1067ht2uejLkBPsJPrvsF217ewUU3XYDdYf6se6TRjfRKUqrCK7ohEKQOpDOUZk4Ot0ZjPkeUsOqPdbCeEjbVJ6weHR1Lf5nHOUrNvF2k9WVSVj9j3OOKP51F9rwMdn7vIMPNoxEdw1kfOZ0rfvRVcoqy+d19T7Hu248zOhzZPsJhPN/uunsf48Fn1nDGBafw3OP/GuOI6OXiSi8MH3aTUhF+5auUwXS6CluQyHHTzULFWrsome3btZIv2chYrWUKphJRV8JCiGQhxBtCiG1CiF1CiNuDr+cKIZ4XQlQHH3OiPZZEQCKpmbcLn8PDrF1LEXLsP3HmaSkUfDCDxmdbaX3V0E4Vx2Cz2fjW49dx+x9vpru1h6+tvJq1N/xfTA3wRLz27BvU7annwqs+FpP+hxvcpFaGHyGdMpSO1+XB64ztphoaTfTQSjhSjAJnSykHhBBO4D9CiH8AHwVekFKuEUKsBlYDN0x4JlUlbCGfsGru55G83/biw3QVtTCteg7Jg2lj5g8nT3NS/rUcBveNsm3dPuU+x1KXl935Jc7+1Dv42W3/x7q7H4ufUpHjIKXkqYf/zDU/upJ5J89h7yZzt0IcOeQh76wMbCkC/3Dof4eUwUDk6HDqIM7e8I14rLGW+lYlBr7dKYMgUbylUX+XMsAR55Uz+COB84F1wdfXAR+O9limOqPJQ9TP20N6TzYl9dPHPMaebqPqunz8w5K6BzuQvsh9Gb7r0+fy2Rsv4s8//Su/uONXcW+Aj/CPXz/P8OAw51/6QdP7Hj4UULLJZeEtSScPBYzwSGr0Uqg0mtiilXDEEELYgU3ALOBhKeXrQogiKWUzgJSyWQhROE7bS4FLAcgqVYuOjkGecCyU8IGF25FIZu4cJxraDpVX5+PMc3Dg2y14uiPn71p02gJu+vl1bF6/lQeu+MGEx6ruaGSEFMbPORzsG+TFJ1/m3AtX8b2rfmTq0vloU2BpJ7nEydCB0PtNGkkBv2AkNe6K/sYpieDbNfJ1Hm/XJ7opSkKIZODfQBKBC/cHKeVtQohc4PdAFVAHXCCljGoupSl6X0rpk1IuA8qBU4QQi8Jou1ZKuUJKuYJUnRM5Hk3TDzKQ00PV3gUkjYxd7q38C7lkLEqm8aedDFVHzpdYPrOUe5++k/bgDkbxuIfvZPz9l8+SnpXOWR86w9R+R1s8SJ8kudQZVjubtJE0ksxIijbCGo0CR9ykS4FlwHuEEKcRcIu+IKWcDbwQfB5VTF10l1L2AOuB9wCtQogSgOBjm5ljmUr0ZHdweMZB8ppLyGspGfOYgg9mkn9eBq1/6qX735FbwszOz+LBf9yLzWbj6vetpq+rL2LnNpOt/95O++F2zr3wnab2K33gbveSVByeEQZIGkllNDkuEzw1mghgM/AzMfHkJo36crQQogDwSCl7hBApwLnAvcBfgEuANcHHpyc9memBWeanKIW7daDbOcquRW+QNJxK5d5AVazjl6GzT0+l9KJsul8dpOl3x66sGCnLmJqRygPPrKGwvIDLz76axupDyueKNVJKXnzyZT5y2YdITU9haMA84zba6iGpKPyPYtJwCj357W8+V3VlGEtw0sQPUylFyfBydL4QYuNRz9dKKdce04MBN2kkMUMJlwAvCSG2AxuA56WUfyNgfM8TQlQD5wWfa8JAItm7aBMep5tZ25dg9534QUqbk0Tl5QUM7hul/uEOIhWU6kp2ct/TdzJn+Wy+dcHt7Hxtd2ROHEPW//EVXEkuTnvPKab2O9ruxZWvYIRHk/G4RvGL+Ch+otFEDsPFOjqOuDGDP2uP78GImzSSRH36I6XcDiwf4/VO4JywTuYFVFzkUzQwq27GXrrz2pi9e+mbRf2PxpYkqLw8H0+Xl5r72pCeE8+tooQdLgfX/+EKlr9jKd/6zG08/7exilxYjx2v7qSno5e3fegMXvzDy4bONRrGKspQ+wgF2Zl4XKO43KFHSbtGUkCAO2mE5HHiADRGsdBWhholgqu06znKTRpUwaa4SRMjEWsK0pnfQsOMfRQ1TaO4qXLMY0ovzsFV6KD+xx34BiOjlhwuBzf94RpWvv8k7vzKPTzz+D8jct54wOfz8/o/N3Dqu1YghHmLtO7OgI/FlRPel69rNBkAT1J8FkLRaIwRPZ+wEKJACJEd/P2Im3Qvb7lJIVQ3qUGsNeXyg1p2i+qMND6V8FDqAHsXbiKtL4uZe5cAJ/qSM5emUPDuTFr/2kP/nsj4N13JTlY/eTWnfOBkHv7qz/jjo2r3p6ofeqI0o0jxxnMbefdF5zJryQyqtx2Men8Ao0eMcJ4T2Rp6O9dooEiHO0k9dsFq2BW3CLVGxnosibeMhqjvolQCrAv6hW3AE1LKvwkh/gc8IYT4ItAAfCKagwCrGWENXruH3UvfQEgbC7avxO4/8UZ1ZNio+loBww1uDv8uMiluyWlJ3PKXG1i8agE/vPSn/PNnL0TkvPHGppe2ALD8HctMM8KensAXoDPbQTiJY0530Ai7tBLWTEWiZ4Qj6iY1iLWMsA9QKnOsphRUZ90QnehoKfzsXbyRkZRBFm4+DddI8pvHH+2DnP7VadjTbey6u54Rz8Rf0KEUzsjKz+Q7f/sWc06exT2feZAXHl8fclur0drYRlNtM8vevoQnfvCUKX26g0bYlePAHYYicbpdIMHjik39aGuVnow3pTcRsRhrvJkCAf54G1N0SIx3OUWonb2b7vx2Zu5eTFZP3pjHFL87h9yVGdSua2Go3rhCKplexJpn76BwWj63fexuXv3L64bPGe/seHUXJ61aalp/vkE/0idxZtgJ5wtYIHB4XHoTB43GwljLCJvsEzayHWGkfcItZfU0V9RRUj+doqaKMY9LrUyi6jNFdG/up/nvxndGmnfKHO78yy3YHXauPedmdv1vr+Fzxopw/NBbXtvKuy86l4yyNIYPm1Ob2dPvw5HpIFDIJ3QcHifeGClhI8Ryq0+NBZACpLXMkyo6OtoCDKb3UTt3N9kdBVRVzx/zGHuKjblXl+Pp91H9cJPhPmcvn8lD6+9hZGCEb5x5vaUNcLjs3Rx4r3OXzTGtT++AD0da+D4wh8eJ12GlpVaNJhSCRlj1x0JYa7ReFH3Cal9ShTFQwscXBPPZfOxftAWHx8nsXcvG3cB9+mWFJBW62PztvfT3D4x5zFiMpRBT01O4+XfX0tvZx5fO+DrdbVGtXx4yhqp7EXoe7f7tB4CAEa79uzlVwLyDPhxp4c+JHV4nHr0cHadYKU84Didy2iesiQfqZu9mOH2ABZtPwekZu5BD3gfTKTwti+pfN9K7L3QDPB43/PQaymaWcsXZ18SNATaToYEhmuqamDG/yrQ+fcN+JSVs9zr0Jg4ajYWxlhFW9gmrRUcb8Vs5FNu6j1LQnQUttE5roKRuOlld+WP6gdMWJlF8USZN/2tn719rlMd7hA9f+kHe9elz+MnNj7Ll39smPFZVmcZznvARavfWM91kI5yUH/4mDnafHZ9ejo4isajHrKOj31yOTgAS411akJHkIQ4u2E5abxYVB+eOeYyz0E7F1bmMHvay9ZF9hvtcsHIe3/zB5bz27Bv86p7HDZ/PyjRUN7LkNPNKyfrdfmyu8Kt02XwO/DZdikIzxUigwCxrvUuTfcJG8iBVd7Tx48cv/OxfvBmAWTuWgjwxf9iWIqi8Pg8E1N7Xjm9E7Yv4iCrNLczlzj/cRkdzBzdcdDND0viytpU5XNtERnYGadlpDPZEP0La75bYXLawd9ESPoHf7gu7nUYT3+g8YU0MqZ+7h8GsPmZvWz52YX4bVF6ZT3KZk5q72nC3Glu+cjgdPPDUGrLzs/n8275Mb1evofNNBVobA/Uj88tzTTHC0isRDgUlLG1Im1Se9Gk08YlWwvGJD8UdkdR8wrFQwm0lh2grb6Skroqc9sIxz1N2SQ6ZJ6XQ+NNO+ncG3puRyOGbfnwDy9+2jOsvvIm9W4wva08F2psC+/TmleVSv7Mx6v1Jn8SmUKVP+AMR1VLIiG1TqUlEdFxBrLCWEZ7i9Gd0UzdvNxlduZQfnD3mMQUfzKTgvZm0/bWXzheMLxl/7vrP8tEvnc/a7/yc556YGlsSRoKuYFR4dmGWKf1JCSjs3CRkoI02wpophUQr4fhEojZjM79iVrg+OrdrhB1LX8PpdjFzxxKQ4gQVnHNmGmWfyaH71UEO/8Z46tAZF5zCVfdexj8e/yc/vvWnYbc3or7jne6OHgAy8jJiO5BJOGKEEYlhgVU/k/0xUXpWyhOON7RPWGMifuFj15I38DjdzNu4csx84IwlyVR+PZ/+XSPUP9xhWPUsPmcBV/z6y2z69xZu+8J3jJ1sCjLYF5hgpGWFXuTDCEIQlMMajUb7hOMWiZp/1/yKWaH6hCWSffO30pfdxfztK0nrzzzhGGeenaqrChg55KHmvlak58Rzh6NKZ508g2v/dDmH9jVx6Ye+Rv9of8htEwW/38/w4DApGcnmdGgDqXC7yfBXsDVhofOEY0PiGGFdOzrGNEzfT1tpI5UH51HQVnriAXaouqoAYRPUPNCGf9iYWqpcOI07/nkzfR393Paeu+jv1QZ4PIaHRnAmj12lLNLYHAL/GJOryQm0EdoaazSWxGJTDTWfcMYJFZlDw4hPOBQl3FbcSP3MvRQ2TWNa7Wwk8gRfctknc0mfm0zNQ62MtI5fIziUvX2nzS3nO//6Fu4RN9eccyPNTa2Tv5EExuv24Ewy5yNic9qQXokt3HmxLXCf2WX0NkDXaExHAv7E0IgWM8JTh+7cNvYv2EpWVz6z9ywdc2OG7JWpFJ+fTftzfXT/z1gQVMW8ch548W4QcO25N9Ncqw3wZEgpEQoRyyrYkgS+0fAnfX6bH/wCIRPjC0uTQCRIITiLGWE/Kj5h1XxfI3nCE0VHD2T0sHvJBlIG05m37eRjKmKNBt9fcrGLyq8X0H9gmAO/PGyoGEPVggq++8JdAFy96kYa9r61M1AsIpxDUe1jYWbt6CNIk4Kl7Mk2/MNqRthmUDGo1jnXTCXiLE9YkjBGWE+fTWYodYBdy9/A6XGxYMupOHwnFu23JduYd/00pFey/8FDSK+6IZi+qJKHXr4Hv9/PN48zwJqJsTsc+H3mGCh7qh3fiIIRtnux+/RStEZjVSymhEFlxhYLJewZo63bNcqu5a8jJCzYfCoud9KJClfA7MtLSSlxsevOekY7QvNnj7cv8K1/uAG328NlZ13J4ZompfcyHqqK1iq4kpx4RtTiCcLFkWZjuCn8fYF9dh82nwU/xhrNZCTIAo3+9JqEX/jZt3gTXtcoizaeQcrw2Eur0y4oIO+UTGp/2ULfLmNG7rpHvkn5rDKuOOeaiBvgRCA5NZnR4fANowqOdDvegfDX33wOLw6v/hhrphgJtBxtsU+vmk9YNcrZiK/s6H2BJZKauTvpz+lm9o5lpPVnjunjzTozhWkfy+PwC+3UPGOsXvEHv/A+3nPxeay95RdseXnifYE1J+J0OXEluRjqNUftOzIcePrC/9bxOj3YveHvQ6wxgzjzs05InJkCbYQ1kaRlWh1t5Y2U1s4gv3WMXGAgdZ6L8q/l0r2rj32P1hvqb87y2Vzz8JW88fxG1t39mKFzJSqZOYGiKYMmGGFHuh2bQ+DpCf9L2+N0kzoQ36U1NRol9HJ0PKKWJ6xa+cpIRPKRaOeevHbq5uwhp62Q8oOzx4yadpU4qLwuD3ebl9e/uwOPL/z3eMQnnJWbxd1PfZuuti5u+PRNDPonLsZhpfrPxsZaENbR2fmBjRt62/sM9BkarpzAx9Dd7YExUtUmwut043QbKyjijUF8ppF4CzWspEo1iYTFjLC1GEzvo3rxVlIHMpi5a8mYucCObBszbi5E+qF2TTueQfUvC4fDzn1P3E1BaT5fOOvSNzch0IRPfnEeAL1t0d9b2ZUfWE52d3mB0JeW/cKP1+XB6U6K0sg0mhihl6PjFbXa0ao+YSNKeDRpmH3LNuHwOpm75WTsY0Sw2lIEM24qxJFp4+C323C3eg2pvasfuIpTz1nJLZfczs4Nu5XPo4HCskIAOg51Rb2vpKARHm33kBSGEfa4RgFwjWojHD1iUTs6FsThSoE2whpVPA43+5Zvwu/wsWDDqbjcJ24CIFyCGTcWkFTupHZNO8M1xqJwL/zax/n0Ny7kVw88xl9/9XdD59JAcUURAJ2Ho2+Ekwud+D1+3D1ewjGn7uTAhNQ1atImExqNWUi0Tzg+kdgVpkeq/qdw9wQG8Nm8bF/2KiOpg8zdsoKUwfQTFLVwQNW1BaTOSaLuex30bx9WGt8Rlr93Cdf/4Bv86+kXufv6e5XGrYKqao9F5atwKZ9RRtvhNhyjLhyE73NNInTDmFqUjLvNS5IMz5iOJgfum6SRlLDaaTSWIEGUcNQjMoQQ04QQLwkh9gghdgkhrgy+niuEeF4IUR18zIn2WKKNX/jZvWQDfVldzNy5hMzu3BOOEXaourqAzGUpNP60i97XjEXfzlwxnauf/Bp7t+3j2otuwO9PkOljlJk2axqNBw+b0ldSiZORlvCLgmgjrNFYHzOUsBe4Rkq5WQiRAWwSQjwPfA54QUq5RgixGlgN3DDRiWyo+XdVlXA4PmGJZO/CjXTltzJ79zKy2wrHrIZVeWUBWStSaXy0k86XToxcDkddls4qZvXfr6SntYcvv+8yhgbDV9RTveqVKlXzKnnpT+uj35GA5BInfdvC/9uNpA7hcLvGjDfQJCJTyH8d5cAsIcQ04FdAMYGF77VSyu8LIXKB3wNVQB1wgZSyO3ojMUEJSymbpZSbg7/3A3uAMuB8YF3wsHXAh6M9lmghkexbsIX24iam719ISVPlmMeVXZxDzmlpHFrXRcdzxvbxzS/P485/3YIQglvfczcdrZ2Gzqd5i5yCHHILcqjdUxf1vpKKHNhcNkYOh6+ER1IGSR5KjcKoNJo4wG/gZ3KOiMP5wGnA14UQCwiIwReklLOBF4LPo4qp0x8hRBWwHHgdKJJSNkPAUAshCsdpcylwKYCDfKWc3wLFKVUoSlgiOTB/G22ljVQenEt5w8wx9wXOOzuDwg9m0faPXlr/3qM0niNkF2Zx579uIS07jZveeTtN1c2GzmcVzPIlz106G4B926qj3ldKZSAUa7h+NOy2w2mDZHeGl/9sdVRXtYxNeVUxO+I4ztSsEdSKI4ZM0PYcsT/9QoijxeGq4GHrgPVMskJrFNOy9IUQ6cBTwFVSypArIEgp10opV0gpV9iJr8pARwxwS1kD02pnM612zpjHZSxJofLL+fRuHaJxnTHFmlWQyV0v3kp+eS63v/8eDm6pNXQ+zYnMWz4XgP0mGOHUKhfSJxk+FJ4S9jo8eJJGSRmM/yA3jSaemUgcAmOKw0hiytRJCOEkYIAfk1L+MfhyqxCiJKiCS4C2yc5jtk/YO0FfAQO8nbayRsprZzHt4Bxk8B+8tS9wamUSM64uZKhxlL0PNeCbJHBqIh9tdkEWd754C0XTC7jxfbez7b873vw/K1W+incWnrKAhgON9Hb1Ukh5VPtKnZHE8CE30h3ePTqUHtB2umSlJjLEWZ6wHzCWNJIvhNh41PO1Usq1xx90vDgUIryKdZEg6kZYBN7Vz4E9UsoHj/qvvwCXAGuCj09HeyyRQgo/1Qu20V5ymGk1s5lWM2fMalhJBU4W3FiBb9jPnjUN+BQ2bT9CXkku9//rToqrCrnp/bez7eUdkzfSKLH09MVsenmLKX2lzkyid1P4wXGDGYHFpNSBzEgPSaOJPX4wGDPaIaVcMdEBkRKHRjFDCZ8JfAbYIYTYGnztJgLG9wkhxBeBBuATk53IhpqqVd0Naaw9gSWSA0EDXHFgLuV1s958/Zg+M+wsuLkCm8vGjlvrgiUJJ2csRZucmsw9z95GwbR8vvmeG9j6ynaFd6MJhbLppRSWFbLlP1uj3ldSsQNnpp3B/eE7vwYzenGOuo6pljXWRFCjsSQSo0p4QuJJHEbdCEsp/8P4VenPiXb/kUQiqZm3I2iA57xpgI/HliJYcHMFrnwnu79Tz/Ch8INujubGR69lxqLpXP3e1doAR5kV7zgJgM2vbI16X+lzA8U5BhSM8EBmL2n9WdrwaqYmxpejJyNi4tAolgqncyCVIp1Va0Af3U4iqZuzm9byRspqZ1JWN2vM84okQeXqPFIqnGy/7wCd+4xtAPCpqz/Buz51Do/c+DNef27DuMep+oQTxZccamT1GeeeQVdrF4d3tkQ9Gjt9QQrePh8j4QZl2T0Mpw2Q11oSpZFprMkUyhOOMvEkDhPryisSMMB7aKmop6R+OtMOjh0FLVyCquvzSJ3rYuf3DtK51ZgBPvVdK/n6fV/hpaf+za/WPG7oXJrJEUKw8tyT2PCvzab0l7Ewmf49I4Q7RxzI6gEBGb3Z0RiWRhN7jPuELYOljLCqT1hVCfvxI5HUz9lLa0U9xfWVTKs+Ngr6CMIZqAedtjCJxoc7qXtNreThEWVaNbeS7/z+Fg7srOGmS25hOMprMxqYv2IuuUW5vPrM61HvK6nYQVKhk9a/HDtRC2V5uT+7GyRk9ubq5eioo9VlTIj+cnTcoO+SCZBI6ubtpq28keL6Kiqq5475pSdcgunX5ZOxNIWGH3fS/YqxKVxOfjY//PtDeNwervrQNQwrlKPUhM/bPnQGPp+P1559I+p9ZS4NVLrq2xb+vdKX00VaX5YuV6mZumgjHJ/YkUpKWGVXISn8HJy/g87SJkpqp1N+MFBF6XgFbEsSTL+hgPQFSTQ83EHXywElq+prdSW5+N7T36WgNJ8vrbqM5oYWpfNowuedH3s7W17eRl/XW7VkVP3Ctknq4GSfnMZIkwdPq3/SY4/GZ/fSn9VNacMMpXFprICq+rbU1/nEJNBytGkVs6yE3+Zj9+INdJY2UX4gUIhjLAVsT7Mx81uFAQP8o843DbAqNpvgnse/w+LTFnHzxbex841dhs6nCZ0ZC6uoml/J+qdeiXpfthRB+sJk+hTyg3uzO5E2SVZXXhRGptFozMZSUycHanWgw/EJe+0edi19g97cDir2zaOosWLM9o4sO7O+VURSqZPaB9vpfcP4tO1LP/4s53x0Fd/5xj389Y9/D6ut3g3JGO++6Fy8Xh8vPrk+6n1lLk/F5hT0KNwz3flt2Lz2MbfJ1IyPyj7kAD5DX5FWUrRTrmKWZbCUEY42o65hdi5/naG0PubuPInMlvwxj3MVOJh1SxGObDs1a1rp32G80vin7voY531lFT+5+2f8+oePGT6fJnRsNhvnffoc3nhuA93tPVHvL+f0VDzdXgb3h5c/LpF057eT3ZWPTdqjNDqNJg7QRjjxGEzrY+ey1/A63Szcdiq5nUVjRiQ78+zMurUIe5qNA3e0MHTAbbjvD69+Px+96QM895OXePDm7xs+nyY8VpxzEiWVxTx83U+j3pctRZC5PJWOF/rDTk0azOjDnTJMbs3s6AxOo4kXEsgnbCkjrJqi5J1kKaont529izdh89tZtOkM0vuz8OE7IaDLmWdn9m3F2NNtVH+nmaGa8Q1wqIFZH/7m+7nono/z0m9e4Ydf+0lIbTSR5fwvv5/ezl7+/fR/o95X9mlp2FyC7v+EHz/QWdgMfkFOR1EURqbRaGKBpYxwNGgpq+fg3J2kDqUzf+tKkkfG3iTdle9gzm0lAQN8Z8uEBjhUPvzN9/OlBy/hlSf+x0Ofexgp1fKZNeoUlObz9o+cxe8fehKPO7zKVSrkn5POyCE3Q9XhL0V3FDWT3Z2H0+OK0ug0mjghyrWj4wlLGWGBVNqMYbyNGOpm76G5spbsjgLm7FiOw+ccMwgrqcjB7FtKsKdOroCPMFmg1Kdu+DhfWnMJ6594hTs/fT9+X+B9JUoZSbMZL9Xogss+hs0meOaR56NepjK5wknanGQOresKu+1AZg+jqUOU145dr1yjmVpI4i5YLEpYyghHCp/NR/WirXQVtlDcWMn0/QsQcuxsreQyJ7NvKcHmEOz/TjPDtcYV8Be+czEXf+uTvPD4etZc8tCbBlhjLqkZKXzwsvfx3z+/RnNta9T7K3hPJv5RP13rB8Ju21Z6CJvPTn6brhetSRS0EY5LVEpQHt3G7Rpl39KNDGT2UrVvPsWNVQQ09onndc6EOTeWIH2SHd82vhsSwNcf+jIfu+p8/v6zf/LQVx/G748fA3zdddexYcMG1q9fP+4xq1atYuXKldx///3mDSxIpJXqBdd+lKy8TB6/58mInncs7Ok2cs9Ko+s/g/gGx/+bj5WP7rP56ChuIq+tGIfPOW7bcIp+HE1/DEpfDupym1FgKpXYTBwlnFDFOgbT+9hxyn8ZSh9gzvaTKGmcPm7t3fQlSSy8rQrfsJ+dt9ZFxAB/+sZP8LGrzucPD/2ZBy79YVwZYIANGzbwxBNPsGrVqjH/f9WqVTzxxBNs2DD+bk5WoaA8nwuu/Qgv/vZl9m2sjn5/787AlmSj/W99kx98HB1FTfgcXooOV0RhZBqNJpbE2/RnQiQSn0LSvR8/XQUtHFy4A7vXwYKNp5LWnzluOcvss1KZdlkeg4eG2Xr3ftw94QfsHO/bPe+TZ/Oluy/hn4/9iweujnwaUiR8yevXr+eCCy7giSee4IILLjhGER8xwMe/blUuvfdz2Gw2frb6lxMel8rYgXqTcfTkzpYkKHhvJr2bhhg97A1r0wWJpKW8jpSBdDJ7dIEOTaKglfCUQSI5NKOa6qVbSRlMZ9Ebp5PWnznu8YUfyaTyinwG946y+dt7lQzw8Sx/x1JuWbeazeu3ctcX7jN8vmhytCE+ooinmgFe+o7FnPPpVfzuvqdobWiPen9556bjyLTT+qfwVXB/Vg8DWb2UHKrSOyZpEogjRlj1xzpYSglDeD5hj8PNvoWb6SpoJb+pjKq987H57WOeQ9ih/Mt55J2dTte/B2h8pJM+X7/h8c5cPIN7/3wnjdWHWf2RWyZNg4mH6OijDfEjjzzCZZddFlEDrKouI4HD6eDKh79Kc22LKb5gW5Kg8Pws+ncMMxRmhSyApooa7B4nhU3lURidMbRfdyoRb4YrcZSw5YxwqAyk97J7yRuMJg9TuXc+hYemjask7Bk2pl9TQPqCZFr+0EPLE71jHhcuJVXFPPTsvQwPDHPN+1bT3xN+VGysWL9+PY888gi33nord9xxx5RQwACfWv1xqhZWcuP7v417xHik+2TkvycDZ7adugfCv6eGUgfoLGymvG4Wdv+U/ahqNOOgjXBcMpkSlkhaShuombsDh8fFkk1nktQ7flRt8jQn068rwJnroO777fT89638XiOqNK04me89dy+uZCdfOOsr1DbUKJ8rFqxatYrLLruMO+64g8suu4yXXnrJ0oY4lVQqFpRz8c0Xsv7x/7Djmd1RV+T2NBuF52fSu3mIwX3hq+DDVQex+W2UNEyPwug08YuVNn7QGGVK/dW8dg8H5m+nvfgw2Z0FzN15Ei5PEqOM/QWYuSKFym/k4x/yc+D2FoaqI6OM0nPTuOf5H5Ffks9Xz7uCg7utZ4CP9gG/9NJLlvcJ2x12rv7V5Qz1DfOzq35pSp/FH8/Cnmqj+bGesNsOpwzSVnyIkkOVuDxJkR+cRhPX6OXouETCuBHN/Zk97F+0mZGUISoOzKW8bhYCgR//mOq58AOZlF6cw1CNm5r72/B2q211djypWSl865/XUD6rlMvf9022v7YjIuc1i7GCsCaKmjYTI8r1wm99lFknz+Cuj36X3vbQA6RUc5OTS5zkvzuDrhcGGG0MPSL6SK7voRnV2KSNiro5Ief/6sAtzdQhcYzwlIiO7s5tY8eK/+K3+Vm06XSm1c0e/wvJDtO+kkfZZ3PpeWOI6m+3RM4AZ6Zwy3PXUrlkGtd+bDUbXtoYkfOaxURR0GNFTVuF5W9bxoXf+hgv/upl/venN0zps+yLufhGJC2/D98XPJDeS1vxIUobp+NyJ0dhdBpNvKOjo+MUOaaqzejJobiximm1s3F4T6z/fEQ92zNszPhmEZmLUmh+qpumJ7on3E4uHJ9wWlYqNz57FZXLKrj7Yw/wz2eeD7ltvLBy5coJle4RQ7xy5UrLLEtn5WZx92N30FrbxiNf/7kpfeafmUnGkmQOPdqFty+8giwSSe3s3Tg8TqbVmbNloY5y1mhih8WM8NjY/Q6mVy+Y8JiUChczry/CmW2n9odtdL0SuUjljNx0vvPct6hcXMGaTzzIG3/bFLFzm0kopSjXr19vGQMshOCuX3+bvKJcrj/zVoYHRqLepyPDzvTPFzN0YJTO58O/xzoLWujJa2fGvkU4vOOXqNRopjaJsxxtOSOsUjs6/XQXsy4rxTvkY+dtdQwcjNyXcU5xNt957luUzi7mzvPvY9OzWw2db7Ldl+IJVX9ptHcrOsKlt3yRt73vTO786hoOb2o1pd/pny/Gnmqn5pH2CVdZxsJn81E7exepAxmUHqqKyvg0ZqKjnI2hjbD1sUPJRVnkfyCDvr1D7HvwEJ6eyP1hi6YXcufz3yK7KJvb37+GbS/ujNi5NcZ454dXcdntl/LXXz3DH376R2azMOp95p2eScHbsmj4fRsjjeFXWjs84wAjqUMs3njGuLt6aTSJgcSuUKL4CJGJ8jEHSxnhiaKjj8eRY6fyyjzSFyTT+I9Wqn/ViPSFJ00mUqUzFldx7z/vwOFycM3ZN7F3w/6wzq0Jn1CV7KwlM7nr17ez+409PPCV7xtSwEmEFhjlzLMz88slDOwfof1PA7hwhdXPYEYvzRV1FB+uJKenQGWomqhgpZ2JrDTWibEBqQp7xx/BeK1D84ivKx8hMpYmU3FFHsIlqP9BBwf+0xDR889dMZv7n/8OwwMjXHXWDdTvaYzo+TXqFJTl88Df72GgZ4DVH7mVUROqYmGHGVcWIeyCuh+1Ee53h1/4qF64DafHNWlsg0ajmVpYzghP5BMWdii+MJuiD2cx3OCm7sF2Rpu8yn7WsaKjK+dVcM8/vk1vVx+Xv/NqWhrG3gw+HmpAJxrpWWk88Pc1pGWm8pW3fYP2pg5T+i37ZC7p85Kp+V4roy3hq5HGmdUMpw8wb8sKnN7wFPTRmJ0nPDQ1Mhw1cYgNSZpC/M8RtBKOAa5iB1XfyCd1VhId/+rn8C+7kW71P+JYFFcU8YPnv4vX4+XK864b1wBrzCcpJYn7/3o3VfMruOb9N3JwhzlVynJOT6P4/Gza/tlL96vhT7y689ppqqqh6FAFOZ2FURihRmM97GDICFsJixnhsfOE0xckM2N1IdIrqXmgjd7XIx9hnFOYw/eeu4/ktGS+9o6rOFzTFPE+rIZqBatIR1U7k5zc8dTNLD5jId+58D52/mvvCceq9jlRtaqUKheVlxUwsHeEpl92h1zZ6gijSSMcWLiNlIF0KvfPVxpfJLCSotU5zdEiviKRjfqErUTUjbAQ4hfAB4A2KeWi4Gu5wO+BKqAOuEBK2a3ax3C9m94NQzQ93o2nM/JxcRnZ6Xz/ufspLC/gyvOuM01laSYnMzeDO/78LZactZDvfukH/Pup/5rSrzPXzozVhfgG/NQ+0I4M87bzCx/7l2zGb/MxZ/tJ2P326AxUo7EgNqKvhM2wTaFghhL+JfAj4FdHvbYaeEFKuUYIsTr4/IZQTjZWdLR/0E/tD9vGbaPqnx1mkNT0VL77j7upnDeNb3zgat74nzllD83CrJzdaFA+u5S7/norRZWF3HHhvax/4hVT+rWn25h5UxH2ZBv7b2nG23uiBZ7IPyuR1M7bzUBWD3O3nUTaUEZI7aYSVlLfminLL4mgbVIl6kZYSvlvIUTVcS+fD6wK/r4OWE+U36gKySlJ/OCvD7BgxXyu+8RNvPavqWWArcyK85Zzy+9vwOf1cc05N7Pr1T2m9GtPtTHr5iKSip0cXNOqlg9cWUNbWSPltbPIay+Jwig1GmtjNDArFOLFNsXKJ1wkpWwGkFI2CyHGjUgRQlwKXAqQQRajhF/tSkUJO1wOHvjjfZz09uXcdNGtvPTn9VHvM1FQ9SUfaXf+Ve/nC9/9DA27GvnOh+6jrb590nOq9nm0MrWlCGbeVERypYu6+9sZ3DkatnLtKGqiYfZe8ltKmXZwjtKYxsOr1aVmihBDn3DItilSxH1glpRyLbAWoEiUmhIu53DaufHJqzn1PSv49hfv5NnfPWdGt5pJSElP5opHv8rbLzyDV//4Og9+9keMDI69V3SksafZmHlzISlVLuoeaqdvy3DY5+jOa6N64VYyu3OZtXtJwiw9azThEgGfcL4Q4uht7NYGbUncESsj3CqEKAnONEqA8R26JmOz27j2sW9w6odWcPfX7uXPv/hLrIcUErHw7ZrZ58wFM3jgybspm1vK/13/G56637y/iyPLxsybi0gqdVJ7f2gG+HgD25vdyb4lm0gdyGD+1pXY/WN/9MKNsD6axIgl1WhCokNKuUKhnem2KVbrV38BLgn+fgnwdIzGcQw2m+DqdV/nbZ84nUevXscTjzwV6yFpgI99+cM8tvGXZOSlc8t53zHVACeVOJh9VwmuYgc197YpKeDe7E52L3uDpJFUFmw5FYdP746k0UyEPegTVv0xgOm2yYwUpd8ScHTnCyEOAbcBa4AnhBBfBBqAT4RyLj9+JV9rKBWzhBBc+7NvsOqis/jZ6l/y24f+YKkdjVRR9ZUaIVQFnZmTwfU/uZpzLljF689t4NHP/oae1j4lBa7SJmNOCrOvL0ZKOPjtNoZr3GEvIffkdLBn2QaShlNYtPk0XJ6ksMcRKqo5tGa302gmwwyfcCRtkxHMiI7+1Dj/dU60+w6Hb/zoq7z3C+ex7tuP89t7/xDr4SQ8p717JTf94npyCrL58eq1/Oa+31EmK03rv+AdWcy8tARPh4+au9txt4ZfzKCjsJn9i7aQMpTGws2n4XJHzwBrNFMJM/KE48U2xX1g1tH48UdFnX71u1/k/K+9n9/e+wfW3f54xM8/VYmGTzg1I5XL7/sKH/nqhzi4s5ZrP3AT+7dUGz5vqLshYYOyi3Ip/mA2fduHaHioC99g+DPy1rIGDszbTmZvDgu2nhpyTWgdrKXR+xCbY4TjhanzV1Pk4psv5IJrPsKffvhXfrb6l7EeTkJz9sffwZUPfZ380jwe/+7v+em3fo57NPw8XFUc2XZmXFVIxoIU2p7tpXFdJw5feB8RiaRhzl5aKurJbS9i3o6Txw3C0mg0moT+dnj/l97NF+78DM//+kV+dGV8RK+b7aONh6jqFect5wt3fYZ5K+dwcFst3/7YPex9Yz92XKQctS9vNMeasSiZ6d8oxJZso/YHbXT9ZwAIL1rZ6/BQvWgrPfntlDXMYEb1QoSM/9zdRPDt+tBlQa2EQOJIkHj/hDXC0xdVcsUPv8Ibz27ivi98HykTY+kjnlh+9lI+d/unWfy2hbTWt7Hmkgf512/W4/eb9+ETTkHZp3Io+kA2w4fc1NzezMjh8NX3YEYv+xZvwZ08zIw9iyg/PDMKo9VoEoeJtq2dSljKCEvF6Ojj2zhdTm587GoGegf49mfvZMDbF6khJhSqqv30s0/hU7d9nEVvX0B7YwcPf/VnPP9/L+F1e0mewHdrZJVgLEWbOtNFxWX5pFS4aH+2j8O/CWx/efSxk/loJZLWskZq5+zC6XGxaNPpZPTmKI/TiE/YbEVrpL9EUN8adSTaCE9pLrruQmYtmck177+R7vaeWA8nIRBCsOpDb+cLN17C4lMX0Xm4i0e+/nP++egLeN3mbqNmT7NR8sls8s/LwNPj4+A9rUr5v27nKAcX7KC7oJWsznzm7FyGM4opSBpN4iDH3KxnKpJwRri4sojP3XwxLz65nlefeS1q/aj6L1VrTkd6j95ItXU4HZz3ybO5+IZPMWNhFYcOHuanl/6S9ev+i9ftxUkSTkIzXEbGKhAgIOftaZRenIMjw0b7P/ppeaIH/7AMW4F25bdycP52vA4vVfvnU9Iw/Zhz6Cjn6KB3X9JMNRLOCH/mhk+BEHz/6h/HeihTmsycDD78lQ/y8Ss+QkFpPgd31nLbp+/khSdeosRXYfp40uYnUXZJDqkzkhjcP0rNnZ0M14fv+x1NGqF27i66CltI7c9gwebTSBvMmLyhRqMJC70cHYeoVsw6QkZ2Ou/97Lt47rF/0XaoPYIjixxmRysb8bMeP1abzcaity3gnE+/g3MvficpaclsfG4z3/3CD9j43BaklLhIMTrk8MZY6qLi04XknZKJu8NL/Q866PnvEMjQ1OqRY6Tw01xeT8PMfUjhp/LAPErrZ2AbJ/pZtQa0kZ2QVFWiVpeaeEQb4SnI6e87jZS0FP70E2tsymAFbDYbi89ayDs+cSZv/9iZ5BbnMDw4wstPvMKTD/6Z2p31MRlXcpGT8o8XUHBWFv5RP82/7aH9b/1IT/gf7O68Nmpn72Y4fYDsznxm7F1MyrD5qV0aTaIgQfuEpyKzlszAPepm/9YDsR5KxDHTJ2x32Fl21hLe+/F3c8ZHTyWnOJuRoVE2/n0z/3nyNTY+s/nNLQbHUtqqYw2l6pWryEHJR7LJe0cG0itp/VsvrU/3YOt3IIL/QmUorZ+G2fvozm8jeSiV+dtWktdeHFV/r5UijhNDQSfUV6QmBiTUHZZfkkdvZx8+ry/WQ7EcyanJnPquFbzjI2dx5gdOIzM3k5HBETb8fQv/efJ/bHxmC6ND5uztOxapM1wUfSibnNPSkF5J27N9tPy5B29v4G8dWtHIAEOpAxyaUU1nUTN2r4Pp+xdQ2jgdm9QFHzQac5B6OXoqcmD7Qd772XeRU5AdUmpSLHYYUkV1rBOp0pzCbE5578m87SOns+Jdy0lKSaKvq5///fUN/vOn/1HzXCPuYTcANhykhHg7qY71eD+rcEDWKWkUvCeD9HnJeAf9tD7dS/s/+vH2+I5pE4p6HU4d4ND0A7QXH8bms1NWN5OKhjk4PeGYcELuL16IRc6uzhOOBkZS/eLPFGgjPAXZ+u/tAHz+ls/w4Dd+GOPRxBd2h50ZS6pYcNo8Fpw2l/mnzaN8dikArQ1t/P1n/+S/f36N7a/senMlIY+CmIw1eZqTvLMzyD0rDUemndEWD4fWddH5Yj/+4fA+uBJJf3Y3hytr6C5oxeazUVo/g7L6GTg9STgS6yOi0cQF2ic8Rdm9YS+Pf/f3fPraC6nf28hTP/5z1PqK9xrQRdMKmb9yHitOO5l5p81m1oqZJKUEFF9Xczd7/7ef5x99ia3Pb+fglto32yUdldNrph/akWYj//QMcs9OJ21WEn6PpHfDEF0vDdC/fYQjk+bxFOjxr/uFj87CFpoqahnI6sHhdjKtZjbFjVXH7PmrqmhV21nJJ6zRRBOthKcoD9+wlsp5FVz78JWc8q4VPHz9T2nY3xjrYUWV7Pws5q+cx/yVc1kQfMwtygXAM+rhwKYa/vHIc+x9rZp9r+2nvbEzxiMOYEu2kbsyg/wzMslemo7NIRhucHPo/7ro/s8gvv7wZ8qD6X20ljbQXnwYr8tD8mAaM/YuorBpGna/9vlqNBpzSQgjfIzy8sMdH7uXj3/zw1x08wX8ft+v2Lexmn8/9Sqb/7WV+j2NjAyOxG6wCgghKCsup3h6EaUzSyidWUzZrNLgYwlZ+VkA+P1+GvY08sY/NrNvQzX7NlTTva3/hLKRoSpVdT/0+O1sSYLMZSlkn5lG1kkp2Fw23B1e2p/po++/I4zUHimwIbCHuDOO1+GhtaiBltIGBrJ6EH4beW3FFDdVkN1VMKFqNTvf10jEsWpbVQWtlbcmeujArCmNx+3lt/f+gWd/+S/e9dlzOOujp/Pley6Bey4BoLW+jbrdDRzcXUPd7npqd9dRt6eegV71QiFGsDvsZGSnk1+aT+n0YkpnlFI2o4SS6SWBx6piklLeWkL1+Xy0NbTTdLCFl//wKoerm9i3sZrqzQcZHji2RnKs/LpHkzzNScaSFDKXppC+IBmbS+Dp8dH5wgDdrw4yuH8UJGH5Z712D92FrXQUNtOb14G0SVIHMpixbyGFLeW6xrNGE+don3AcYsOm5E8cYmjM17tbe/j9/U/x+/ufIr8sj3kr51C5YBqVCyqoXDCNZavOP8a4dRzupH5PI/W7Gzi0v4mRoVF8Hi9ejw+vx4vP48Xn9eMNvubz+vD7/G89+vz4fX6S05LIyEknIyed9OBjRm7GCa+l56SRkZNOasaJynGgZ4DmmhYadh/i9b9tpLO2m5aaVlpq2mira8PrOTENSyBOUK/Rrh09Fs4sO+mLkslcmkLGkmScuYHbcOSQm47n++nbOMTAnlGOfAaPKNXJ/Kxeu4fu/DY6i5rpzm9H2vy4RpIpaayiqHUa6X3ZYftqzfbtxkJdxiLfV7VPvS9wYuDHzyjWWpFUxVJGOJp0HO7kP4f/x3/+/L83X7PZbBRVFgSNcsAwVy2o4D2fP3dMw2iE4cERBroH6O8eYKB7gJa6Vvq3DNDf9dZr3a3dNNW00FzTwkDPsao8ntOpkoqcZM1PJWN+KpnzUkkpCUxsvP0++reP0L+9h/7tI3g6w8vflkiG0vvoyeugO6+d/uwupE3iGkmm+FAl+a0lpPcGDK+OctZoNPGI/maaAL/fT3NtK821rbz29w3H/F9OYTbOJCcOpx2704HDacfhdGAPPjqcduwOOza7DZvdht1hxx78fWRwlP6jDO5AzwAek7fzixbCIUgtTyJjbgqZQaPrynUC4On30r93mK7nBxnYM8pwjZtw3T5u1wi9uR305HXQk9uBJylYmas/g9L6GeR0FJLRm2OpPF2NRnMsRvcJsBIJYYSjoRJH29yM4o7IuZy4cAZrOpm9JaERn3BmTgYplS5SKl0kVzpJqXCRXOZE2AMG0N3hZWD3CAN7+hjcM8LIYc8xvt3JAqskkuG0Afqzu+nL7qI/q4fR1IBrwel2kdWZT05XAVmdBSS5Jy5paZVUIyulKFkrMCshvuqmDH7847oRpxr6ztRMii3ZRmqZi9SKZFIrk0gLPjoz3rp93O1ehhvc9G4cYrjezdABN+728NS9z+5lMKOP/uwu+rK76c/qxucMnMM5mkRmTw4lh6rI6s4jrT9Tq12NZooitRKOT4RiYJaVMFvRppAKApzZdlyFjjd/kgqCj6VOXLlv3Sa+ET8jDW56XxtmpMHDSL2H4QY3/qET15XHTe8RkuGUIYbS+xhK72covZ/B9P43VS5AykA6+a0lZPTmktmTQ9JwKk6cSu8RJlfd49Gr2C4W2wpaKRhMo5kIvRytmVoIcGbaSSpwkVTgJLnQSVKhk6QCFymFSbgKHNicx34Re7q9uNu99O8YYbTJw8hhD8P1btxt3jf9uJPl0EokHtcoQ+kDDAeN7VD6AMNpA0h7MPTZL0gZSiO9L4uipnJSBzLJ6M1Rqtes0Wg0VsNSRtiBQ0nxDcdx5PDxhKSEBThSbTgyHTgz7TgzHaRmpeDItAd/bDiP+t2RaUc4jjOyfT7cbV5G6t30bRjG3RYwuoFH37j77h5teO3Y8dm9jCQPMZoyzGjKECPJw4ymDDOSMsRIyiB+x1sRz87RJNIGMshpnE7aYCZpA5mkDqZjC7FSlaqaBfN9u20mF84A84t1GFHtOtUoGljq63xCtE9YE3VsLhuOFBv2VDuOVAfOVDuOVDspqak40gKv21NtONKCjyl2HBn2gHHNcGBzjP3F6Rvy4+n14e3zMdruZfBg4HdPlw93h/dNY+sfCRjZidSsROJzeHEnjzCaHDCyo8kjAYObPIw7eRivy3NMG5vPRtJICslDaWR155E8lErqYDqpA5lvqlsjxlSj0Ux9/Pj0cnQ8YsM25sbu9jQbFV/MR/ok0g/SL8HHic/9Ehl8xB/4fyQgAj/CJgKPAjjm90BpSN78PfD/NodAOAU2Z+BROAj87hDYnLbA/zkI/J/zqOOTRKCvCZB+iX9Y4hv04xsK/Hjb/Iwc8ODt8+Pt9QUe+3zB3yW+fh9yglgoicRn9+J1ufFkufE6PfhcHjxONx6XO/B68HeP043XNYrffmzVGpvPRtJwKkkjKWT15gYM7nAqySOpAb+txzWp6lTN2VUtIQnqvt12k33CVoqOjg2W+srSKKJ9whZDOAWpM5MQ9qAhtYGwgbCLQLrM8c9DRPoDRlpK4JjfQUqJ9IL0SPxeGXj0BB6lJ2A8/R6JPPr/jvw+KvEN+/GPyDcN7BFj6x8KvOYfkePm0PqFH5/Dg9fpDT568Lm8+Iq9eI88d3iDj4HnAaPqRtrGWWb22XG6XTjcLpxuF6kD6Tg9LpyjySSNJJM0kkrScMoxRlYrWo1GEw30cnTcMk7lox7Yd2VzOKdB2AGbQNhA+glYV3/QyB75CSKRIALlxKUI/i4kUvjx2yTS5kcKP9Im8Qs/0uYHG8HX/PhF8Jjg7367D7/Nh9/mD/4efMz04c/xB//Ph9/uP/Y4uw+f3TuuIT2CzWfD4XFi9zpxeJ0kjaSS3pcdMKpuF05P0psGN9mTgtPtwu4P/1ZQNcKqirbJwO2qqmhV26n6hFX7A/N9u8b8uhb76jEVfW0SiSnx13a7Rth1ymtKbd80sEIiBUFjK48xttFc5RN+GzafDZvfjs1nx+a3YfPZsfvtOLwObL6k4P8FjrF7Hdh9Duw+e8DQegKG1ulx4fA6cXic2GToX45azWo0mnhDL0ebhBDiPcD3ATvwqJRyzUTH2xC4ODF1Rfghp7NQfRxSIAIWOPj7kddsx70ePE4KBAKb3xYwolIEHv22QBu/DduRR3/gPEf/nz1oaG1++wn+U9UoXlVjaqTghdl+VlV1aaRP1XaxyNk1X9FOiTl8FNHXRxUzjHC49idaxOwuEULYgYeB84BDwAYhxF+klLvDPZfT62LOnmURHqExA6XRaDQaNaLtE46k/TFKLKdqpwAHpJQ1AEKI3wHnA+NeBC82OiyS82uVDdaN5HqqKlMrRQ5ba8u9RFBeifAeNSYQtv2JFrG8o8uAxqOeHwJOjdFYNBqNRhMnmFA7Om7sTyyN8FjS5ISwXyHEpcClwaejP+RLO6M6KuuSD3TEehBxjL4+46OvzfjoazMxc6N03n8SuPaqJAshNh71fK2Ucu1Rz0OyP2YQSyN8CJh21PNyoOn4g4IXbi2AEGKjlHKFOcOzFvraTIy+PuOjr8346GszMccZuoghpXxPNM57FCHZHzNQdwgaZwMwWwgxXQjhAj4J/CWG49FoNBpNYhA39idmSlhK6RVCXE5g2cEO/EJKuStW49FoNBpNYhBP9iemoYZSymeAZ8JosnbyQxIWfW0mRl+f8dHXZnz0tZkYy14fBfsTFYSUMfFFazQajUaT8MTSJ6zRaDQaTUITd0ZYCPELIUSbEGLMVCQhxCohRK8QYmvw51azxxgrhBDThBAvCSH2CCF2CSGuHOMYIYT4gRDigBBiuxDipFiM1WxCvDaJfO8kCyHeEEJsC16f28c4JlHvnVCuTcLeOxCoMCWE2CKE+NsY/5eQ902kiMfyM78EfgT8aoJjXpFSfsCc4cQVXuAaKeVmIUQGsEkI8fxxpdbeC8wO/pwKPEJiFEEJ5dpA4t47o8DZUsoBIYQT+I8Q4h9SyqN3PknUeyeUawOJe+8AXAnsATLH+L9EvW8iQtwpYSnlv4GuWI8jHpFSNkspNwd/7yfwoSg77rDzgV/JAK8B2UKIEpOHajohXpuEJXg/DASfOoM/xweEJOq9E8q1SViEEOXA+4FHxzkkIe+bSBF3RjhETg8uHf1DCLEw1oOJBUKIKmA58Ppx/zVWObaEMkYTXBtI4HsnuKS4FWgDnpdS6nsnSAjXBhL33vkecD3gH+f/E/a+iQRWNMKbgUop5VLgh8CfYzsc8xFCpANPAVdJKfuO/+8xmiTMrH6Sa5PQ946U0ielXEagOtApQohFxx2SsPdOCNcmIe8dIcQHgDYp5aaJDhvjtYS4byKB5YywlLLvyNJRMM/LKYQwUmPUUgR9Vk8Bj0kp/zjGIXFTjs1sJrs2iX7vHEFK2QOsB44vDZiw984Rxrs2CXzvnAl8SAhRB/wOOFsI8Zvjjkn4+8YIljPCQohiIYQI/n4KgffQGdtRmUPwff8c2COlfHCcw/4CfDYYsXga0CulbDZtkDEilGuT4PdOgRAiO/h7CnAusPe4wxL13pn02iTqvSOlvFFKWS6lrCJQ2vFFKeXFxx2WkPdNpIi76GghxG+BVUC+EOIQcBuBQAmklD8BPg5cJoTwAsPAJ2XiVBw5E/gMsCPovwK4CaiAN6/PM8D7gAPAEPB584cZE0K5Nol875QA60RgM3Mb8ISU8m9CiK9Cwt87oVybRL53TkDfN5FDV8zSaDQajSZGWG45WqPRaDSaqYI2whqNRqPRxAhthDUajUajiRHaCGs0Go1GEyO0EdZoNBqNJkZoI6zRaDQaTYzQRlij0Wg0mhihjbBGYwJCiNlCiDohxKzgc2dwM4DyWI9No9HEDm2ENRoTkFJWA2uBdwdfuhx4Wkp5KHaj0mg0sSbuylZqNFOYncC5Qohc4Ivojc81moRHK2GNxjz2A3OBbwPflVIOxnY4Go0m1uja0RqNSQS3WmwCDgJnSCnH2yRdo9EkCFoJazQmIaX0AH3Aam2ANRoNaCOs0ZiNE3g51oPQaDTxgTbCGo1JCCGqgPpE3odWo9Eci/YJazQajUYTI7QS1mg0Go0mRmgjrNFoNBpNjNBGWKPRaDSaGKGNsEaj0Wg0MUIbYY1Go9FoYoQ2whqNRqPRxAhthDUajUajiRHaCGs0Go1GEyP+H9CfO/arAd72AAAAAElFTkSuQmCC", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAApgAAAITCAYAAACjRmzSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAC34klEQVR4nOzdd3zcdf3A8df3duZlrzZp070nq0yBAiIgo4ggylQEy2qZRVbLKEOgigzBWkAFBAVUEPhBVRBoSwfdu0mbdGQnl3n7fn9ckra0zbh8LvdNvu8nj3tok9znPpfceN/n/Xm/P1ooFAohhBBCCCGEIqZYT0AIIYQQQvQvEmAKIYQQQgilJMAUQgghhBBKSYAphBBCCCGUkgBTCCGEEEIoJQGmEEIIIYRQSgJMIYQQQgihlASYQgghhBBCKQkwhRBCCCGEUhJgCiGEEEIIpWIaYAYCAe677z4KCwuJi4tj6NChPPTQQxx4emUoFOL+++8nNzeXuLg4pk+fzrZt22I4ayGEEEII0ZGYBpiPP/44L7zwAr/97W/ZtGkTjz/+OE888QTPPvts+8888cQT/OY3v+HFF19k2bJlJCQkcNZZZ+F2u2M4cyGEEEIIcSRa6MDlwl527rnnkp2dzcKFC9u/NmPGDOLi4vjTn/5EKBQiLy+P2267jdtvvx0Al8tFdnY2r7zyCpdeemmspi6EEEIIIY7AEssbP/7443nppZfYunUrI0aMYM2aNXzxxRc8/fTTABQXF1NWVsb06dPbr+N0Ojn22GNZsmTJYQNMj8eDx+Np/3cwGKSmpob09HQ0TYv+nRJCCCFEj4VCIRoaGsjLy8Nk6v2Eq9vtxuv1RmVsm82Gw+GIyth6EdMA8+6776a+vp5Ro0ZhNpsJBAI88sgjXH755QCUlZUBkJ2dfdD1srOz27/3bfPnz2fu3LnRnbgQQgghekVpaSkDBw7s1dt0u90MHDiQ6urqqIyfk5NDcXFxvw4yYxpgvvXWW/z5z3/m9ddfZ+zYsaxevZpbb72VvLw8rrzyyojGnDNnDrNnz27/t8vloqCggA8++ICEhAQl8874zneUjNMmf5HS4Vh3jtrxTqxTOx7/UziW+TGFgwEtR6kdb2+G2vHWqh2O5YrHq1E8nkppiscbqni8cYrHU/zQw6lwrPhahYMB9gq145ldasejSfF4zYrH86kbyl+ibqwWL9z0J5KSktSN2UVer5fq6mqlsUObpqYmzjnnHLxerwSY0XLHHXdw9913t6e6x48fz65du5g/fz5XXnklOTk5AJSXl5Obm9t+vfLyciZNmnTYMe12O3a7/ZCvJyQkkJiYqGTeqh/qyfFqx0tMVjseAcXjqby/ZtVPTrUvJDgUP1qsaodTXuan510oqu+r6lfPQ1+2ekb1UyNO5VgKAxoAR6Pa8cyq06JBnY9nVjeU36ZurFax3N6W4PeT6PerHVT1eDoV0wCzubn5kH0VZrOZYDD85CksLCQnJ4fFixe3B5T19fUsW7aMG264oXs3VlIC8WoiG9Xv8arfCPyqn4uqA0yV45kVvjAChBQ/JVT/7loMNp4QXaH6eav804ji1ynl4ykM+DWFH9K1mIYoYS4X+BR/IGpWvQKtTzH965133nk88sgjFBQUMHbsWL755huefvpprrnmGiD8qeXWW2/l4YcfZvjw4RQWFnLfffeRl5fHBRdcEMupCyGEEEKII4hpgPnss89y33338Ytf/IKKigry8vL4+c9/zv3339/+M3feeSdNTU1cd9111NXVceKJJ/LRRx91f99CSQko2uugPAGgeAXTrfrDra6zO4pXGlSvhKj+3en+g6/K1I/iv4XeV2tVj6fnzIPy55nqtzLVL6Kq6XhF1KRwD5RJB38HlwtUV5K3GCPVE9NG60lJSSxYsIBdu3bR0tLCjh07ePjhh7HZ9odwmqYxb948ysrKcLvdfPrpp4wYMSKGsxZCCCGE6B199dRDHWxwEEIIIYTQIZcLDuitrUQ3TyJsO/Xw1VdfZezYsaxYsYKrr74ap9PJzTffDOw/9fDVV19t30541llnsXHjxphVqhsnwCwpAZua5Lbei3yUp8hVF1SqHM+h+M6qTrXpvchH1yl31ZWWxnm5A/S9tUV5SlvvYpos7AKF81Na5KODFHkU1dfXH/TvI3XB+eqrrzj//PM555xwD8LBgwfzxhtv8PXXXwPh1csFCxZw7733cv755wPw2muvkZ2dzXvvvRezUw/1/qgXQgghhIgNlwvq6tReXOE+q/n5+TidzvbL/PnzDzuF448/nsWLF7N161aA9lMPzz77bKDzUw9jxWgfI4UQQgghYq60tJTk5P2Nqw+3egnROfWwNxgnwNy1C6xqktt6T5Erbjms71Sb6mpK6YPZQzpuINys86p0XW9X0DnlfTB1XKUNSPKxF7lcyrbXtWutSk9OTj4owDySaJx62BuME2AKIYQQQnRHXV3UAsyuisaph71BPgYJIYQQQuhUd049bNN26uG0adN6da4HMs4KZkmJsmMFlde1Kf4rlKodTn0VudI9AQZrtK6c6pS2gRqt6z2lbaRG64aj5xS+yhc9HVSRu1zKtte16+bRk3311EN5lgshhBBC6FSvnnqokASYQgghhBCH43KBRXGo5O9epqft1MMFCxYc8WfaTj2cN29eDyenjnECzJIS0DQlQ6muIg8oHtCldjj1aV6VjXhVnwwf1Pm2ZL2nZZXScUV6NBjjeOLo0PvZ5no4U7tDKuenci+Fzl+PRYeME2AKIYQQQnSHy6WsfqNdQPUGaX2SjwdCCCGEEEIpw6xgmpqalEXTWqaigVqpTpHrvorcrPIXqPgzkuoPlm7F4+m+MbqOq8hV39cWnVe5G6mKXHVKW3mVu+q1HNUp9+5VNRuKywUmxX+/oO7biyhhmABTCCGEEKJbXC5l9RvtQiG14+mUpMiFEEIIIYRShlnBNKEwms5RNVBYSHG2w1hV5Ko3X6sdznhnhxus8ltERu/dGnRP9e9P5XgqX5N1UH3f1BTrGfRZ8iwXQgghhBBKGWYFUwghhBCiO5RmPw8Y0wgME2CaUbjYrjhFrvsqctVpY1O8wsEUp1BUbwcwVGN01XSevlddRa76saL6saznwlfVVd/Kq8h1kOrtkF7PIjdKKNY/GSbAFEIIIYToDq31onpMI5CPB0IIIYQQQinDrGDqOUUeVHycdona4dT34FVZRa66wbLeq8iVp9z1nIY2zMtT36DyuaH3k/JUv64oJ43We4usYEZO788iIYQQQoiYkCKfyBkmwNRzH0zVRT71aodTv9qg8qhI1Zvx9b6Cqfs+mCrpeXUV9avJqv+2qo8pTVI8nkrKi3JUU73iqOfxVL6IGiUU65/0/qwUQgghhIgJSZFHTj4eCCGEEEIIpQyzgmlCv0U+yo+KVJ3m1XORj+rUmO77YOo5pQ36LvLR++9OMdUpdz0X+aguylH+uqJ4POVLQ3JU5JHICmbkZAVTCCGEEEIoZZgVTCGEEEKI7pAq8sgZJsBU2gczRdVAYR7VWQDVaV7V6SyVKfKAzqvIdU/nldp6pvsOATqm52Mn+wSjhCiiLzNMgCmEEEII0R2yBzNyEmAKIYQQQhyGBJiRM0yAaWu9KOFQNVCYW3WK3Kt4PNVpY1O8urH8Ok+R674xuoFS2oa6r6jvYKDntLbqqm+9j6frRusqHyixryIXkTNMgCmEEEII0R1S5BM5o9xPIYQQQgjRSwyzgmltvSih+LfmVh3m677RerK6sfTeaF33KXLV9D4/lVSfbW6Yl2P9d2vQe+N25eQs8o4YZc+kavr46wkhhBBCiH5D7x+rhBBCCCFiQqrII2eYANOCwhS54ipyv+p1ZD2fQQwoXThXncpSfV91fxa5nlPaRrqvUaD71wGFdJ+CVk11dbXqfVBCGCjAFEIIIYToDqkij5wEmEIIIYQQh6H0mOkDxjQCwwSYum60rvrjjOpKaOUNlhU+vYKK/xiq6f58aT2nofX+8qT4d9ei9/urkJ7T7RCF7hQ6H8+k8k1IZfhklFCsfzLQK5oQQgghRNfJCmbkjLIVQAghhBBC9BLDrGAqbbRutLPIVY9nU5ki13kVuTRa1xE9bweIAiNVkQdlraRn5CzyI5Ein8gZ5X4KIYQQQoheEtMAc/DgwWiadshl5syZALjdbmbOnEl6ejqJiYnMmDGD8vLyWE5ZCCGEEAZhjtLFCGKaIl++fDmBwP68y/r16znjjDP4wQ9+AMCsWbP44IMPePvtt3E6ndx4441cdNFFfPnll92+LT2nyP2q2/qr7pmrvIpc4ecavZ9FrpzB0rxGovuOAwqpfp6pTt+rfl3R+3h4FI8nRIwDzMzMzIP+/dhjjzF06FBOOeUUXC4XCxcu5PXXX+e0004DYNGiRYwePZqlS5dy3HHHHXZMj8eDx7P/yVJfXx+9OyCEEEKIfkuqyCOnmyIfr9fLn/70J2bPno2maaxcuRKfz8f06dPbf2bUqFEUFBSwZMmSIwaY8+fPZ+7cuYd8XUsHTdHCWVBZQ82wOtUbFVR/mldd5ONQ+AtU/UnerXY49UdFqmakFVEj3VfUP/Z0v7qvkOriQd0fZaky5FH5BhT7U7s11O8ljP296h26KfJ57733qKur46qrrgKgrKwMm81GSkrKQT+XnZ1NWVnZEceZM2cOLper/VJaWhrFWQshhBBCiG/TzceqhQsXcvbZZ5OXl9ejcex2O3a7XdGshBBCCGFUkiKPnC4CzF27dvHpp5/yzjvvtH8tJycHr9dLXV3dQauY5eXl5OTkdP9GslH2V1WdIle+xmqkIh/V/e9UF1oYrg+miJjet1Oo3D6iehuPnnt0RoOej7I0qfxjGCUU6590kSJftGgRWVlZnHPOOe1fmzp1KlarlcWLF7d/bcuWLZSUlDBt2rRYTFMIIYQQBiJtiiIX8wAzGAyyaNEirrzySiyW/Z+inE4n1157LbNnz+Y///kPK1eu5Oqrr2batGlHLPARQgghhOhP+mrP8JinyD/99FNKSkq45pprDvneM888g8lkYsaMGXg8Hs466yyef/75yG4oG2X3VnVW1qV2uCikixQ3/lT5+U31fVWeptR75bLa8cwK/yDqs576/t0pZ6S+mqrpOQUdjfFivtSkX3o4KrI3e4arFPMA88wzzyQUCh32ew6Hg+eee47nnnuul2clhBBCCBF70egZ3hvkc4sQQgghxGFEcw9mfX39QZcDD4k5krae4ddcc02XeobHUsxXMHtNFqCo+ttwVeTmzM5/pnsDqhtKdR5V91Xf+k7LxitsOdBgmK3wrfSe0lY5P9XPWyM1gY8GpVsCVL5Gxf41IJptivLz8w/6+gMPPMCDDz7Y4XVV9QzvDcYJMIUQQgghdKK0tJTk5OT2f3elh7eqnuG9QQJMIYQQQojDiGaRT3Jy8kEBZmd6pWe4QsYJMLMBRQf8BKxqxmlTonY49eknLUHxgCobrasbCtB/mlIxlVXfqqmeW8Bg2xV0/VjWe0pbdRW53sdTSWWFu+pq+T6us57hM2bMAPTTM1z+ekIIIYQQh6E5QNMUjxmi2ydjdaVneFpaGsnJydx000266BkuAaYQQgghhI71Ws9whYwTYGajrF+46hS57qvIlafIFZbhS6P1HlFZ9Q2QwOF72kZC70dzq6f6saL45V3PfxD97vQI03uKXOV4ms63jnRXAuo3YQbp9gpmX+wZLn0whRBCCCGEUsZZwRRCCCGE6I5EorOCWa14TB0yToDpBOLUDKW60bruzyJX3WhdZWWgNFrvt1Sn7xuM9reQlHbkggZL7ql8TdYUjqXnannRKfnrCSGEEEIcTgLqj/LR+wcsRSTAFEIIIYQ4nATUR0o6T36oYpwA046yKnKP4k8zhqsiV5n20H2jdX2/kqis+lY9XhOKm8/pns6ryPXcuF31ipDys9IV/y1Uj6dyR4DK1/dQ7M8iF5EzToAphBBCCNEdicgKZoQMtpNZCCGEEEJEm3FWMB0oS5G7Va/aq05pq07vmOLVjqcyhaL7Ruuqqf3oqzpFrpLquRmuitxIKW29n22ud0qrtXX+vOiuBEDx4SrK3/N1SlYwhRBCCCGEUrKCGQG/6toD1Z/mVX86MmWpHS+g4xVM5dR+mjfr/A6r7F3ZpLw3iN7pfOVHzyuiqql+mun5aEfV46ksQFJdzBSJBJSebgyAV/F4OiUrmEIIIYQQQikdfDwQQgghhNChRGQFM0LGCTAV9sGUIp8e8in6Q4D0wewh1ccxqqS6yEf19gJ9b1ZA3ylt1Q873f8xFJMjFHtPAuH4QSWP4vF0SlLkQgghhBBCKfkYJIQQQghxOAkoy362M0jkZZC7ido+mKrXfVXvx9D7UZEqKwNVp8Z0niLXc0ob1Ka1dd+S1Gh9NfX/B1FH9dNM7yltla/Jmsr7qvPfm+iQ/PWEEEIIIQ4nEVnBjJDswRRCCCGEEEoZJI5G31Xkuq+oVHxOlqGOitT30Y56Hq9J8RND9faCBqWjRYGeU9puxeMpbnShXFDxWo7qBuQqp6fy9V0PWwsSgDjFYxrkDAlZwRRCCCGEEErp4OOBEEIIIYQOWVAfKRkk8jLI3YSANXxRQXmKXO9V5GbFd1jpubfqhgL03Zxa6IzBqsiN9NyQxu09oPPHcXdJgBkxSZELIYQQQgilDBJHCyGEEEJ0kxn1kZJBinwME2CqTJHvUTPMfno/i1x1ilxlBaTqalSdpz31XPUNaiu1E9CUjRUeT+191X0VuZ5T2qrnpu/zB6Lwmqz4rVvptiWdjiV6nfz1hBBCCCEOR/ZgRkz2YAohhBBCCKUMEkdD0ApBm5qx6tUMs5/q9InqlLtNx43WdX52uN7PItf72eb6pu/HinJ6TrmrZrQqcpWUNlrXwWZFWcGMmKxgCiGEEEIIpQwSRwshhBBCdJOsYEbMIHdTbRV5iZph9tN7FbnqngoqUyh6Pm8Z0H3aUzGVldrNOq+Y1z/Vjz0dP2/1ntJWPT/VZ3SrrNY297PXPAkwIyYpciGEEEIIoZRB4mghhBBCiG6SRusRM0yAqTJFXqpmmP1Up8iDDsUDKl7o9ikcT/dV5GrpvdG6sRisitxI9N5cIaj4NVnlcEqryA0TovRL8tcTQgghhDgc2YMZMYPcTbV9MJWvYKreAK4lKB5Q0S+ujcr7q/MVTLPiP67eVxxVzq9Jen7qi8rCHNXPW70X+ej9oad0pVBW4kWYYQJMIYQQQohukRXMiMW8inzPnj38+Mc/Jj09nbi4OMaPH8+KFSvavx8Khbj//vvJzc0lLi6O6dOns23bthjOWAghhBBCdCSmcXRtbS0nnHACp556Kh9++CGZmZls27aN1NTU9p954okn+M1vfsOrr75KYWEh9913H2eddRYbN27E4eh6MYvXDB5FlVu674NpzlQ8oOLPISrTRcr7YOr7aEfV9J5y1zPV2x/UZ3l13AdT7/Seclc9P5PCv63Knpoqx4qUrGBGLKZ38/HHHyc/P59Fixa1f62wsLD9/4dCIRYsWMC9997L+eefD8Brr71GdnY27733Hpdeemmvz1kIIYQQBiFtiiIW0xT5P/7xD4466ih+8IMfkJWVxeTJk3n55Zfbv19cXExZWRnTp09v/5rT6eTYY49lyZIlhx3T4/FQX19/0EUIIYQQQvSemK5gFhUV8cILLzB79mzuueceli9fzs0334zNZuPKK6+krKwMgOzs7IOul52d3f69b5s/fz5z58495OtuM1gVfWpQHrLqvopc8cctA1WRq6b3lLvK+SWgKRurb9D3Y0/pc03181bfTwv1r/GKejqLLpAUecRiuoIZDAaZMmUKjz76KJMnT+a6667jZz/7GS+++GLEY86ZMweXy9V+KS1V3lRICCGEEEJ0IKYBZm5uLmPGjDnoa6NHj6akJFxGk5OTA0B5eflBP1NeXt7+vW+z2+0kJycfdBFCCCGE6K6gKToXI4jpQu0JJ5zAli1bDvra1q1bGTRoEBAu+MnJyWHx4sVMmjQJgPr6epYtW8YNN9zQrdvymMCq6I/qUp3uUF1FbopXPKCkyCOl9yptvafcVdL730I9nafc9Uz1a7zexzMrjHg0OSpShMU0jp41axZLly7l0UcfZfv27bz++uu89NJLzJw5EwBN07j11lt5+OGH+cc//sG6deu44ooryMvL44ILLojl1IUQQgjRz4XM0bl0V1/sGR7TAPPoo4/m3Xff5Y033mDcuHE89NBDLFiwgMsvv7z9Z+68805uuukmrrvuOo4++mgaGxv56KOPutUDUwghhBCiL2rrGW61Wvnwww/ZuHEjTz311GF7hr/44ossW7aMhIQEzjrrLNxud8zmHfP153PPPZdzzz33iN/XNI158+Yxb968Ht2O2wwWVZle1Sltr+LxTFlqxwsoDuYN1GhddVpW7+Op1KzjuYH67QUNek9pK9+OopDq91BZv4icyrR2JEt9ikW64tjZmMAhbRTtdjt2u/2Qn++rPcMNstVUCCGEEKJ7olnkk5+fj9PpbL/Mnz//sHOIRs/w3hDzFUwhhBBCCKMpLS09qNPN4VYvITo9w3uDYQJMj8oUud6bBKuuIlddyafrKnK1VKdR9ZzS1jvj/e50nHJXvrVF51S/xqseT+Vrcj/Li0YzRd7VVorBYJCjjjqKRx99FIDJkyezfv16XnzxRa688kq1k1Oonz0UhBBCCCH6j2j0DO8NEmAKIYQQQhxGyBSFNkXdjLy60zO8TVvP8GnTpvX4dxApw6TI3SaFvWRVV33rvorcSClyHacVo0DPaeMmnTeBV/27a1A6WhToOa2t+nUgSfF4RqLymBqjHHnTiVmzZnH88cfz6KOPcskll/D111/z0ksv8dJLLwEH9wwfPnw4hYWF3HfffTHvGW6YAFMIIYQQojuicbRjd8dr6xk+Z84c5s2bR2Fh4WF7hjc1NXHddddRV1fHiSeeGPOe4RJgCiGEEELoWG/1DFfJMAGmywQ+VZ9CVGfu9H4WuU/xJyCV99dgjdaNdHa4avr/3Rloe4bOuz8op/ezyFWu0MW+N7pS0awi7+8ME2AKIYQQQnSHBJiRkx20QgghhBBCKcOsYO4G4lQNprrqW3W6Q+u8cWu3BKWKPFJ6rtIGsOg6bSyff3vGQCl31VtlVL8mG4nK350O/g56KPLpqwxyN4UQQgghRG8x1Arm4U/5jIDqohzV4ylr+NlK9VGRShfNVK/S6HvVR+8roirp/b6qn5++H3tKswVS5KOv8fS6J1AHK5iyBzNysoIphBBCCCGUMswKphBCCCFEd7QdFal6TCMwTIDZAHhUDaY6pa083WFTO57qFLlb5WD6Tiuq7r2oOi0bUjyehqZ0PJX0nnJXT9/PDaWMlnJXXZun8j3IIMGT6JxhAkwhhBBCiO4ImiAgVeQRkQBTCCGEEOIw/Fr4onpMIzBMgFkKWFUNpjqlrTrlblP88Sig+KhIpekstWlAs+I/rtHSsipT7nr/3ak/elLvKW2F82vW+VuP6j+tntvNqqbyvur7JUB0QufPciGEEEKI2JAVzMgZZCeAEEIIIYToLYZZwdyDwl6yem+0rrprruoqctXHuimkPu2pluqjHYNSRd6P6T3lLiKmepuWyqWmftZE3G8KX1SPaQQGuZtCCCGEEKK3GGYFUwghhBCiO2QPZuQME2DuAXXJO6+qgdoortJWVy8fprppl46ryFWnUY2WllVZRa56O4DoKZXPNcVvPaobrSs9DALlL8nKx1NJZfpeB2eRi8gZJsAUQgghhOiOQBRWMAMGWcGUPZhCCCGEEEIpw6xg1gdQt9yuuurbnKl6QLXDqU5TGChFrroqXfXZ4UHF8zMZ6DOr6seK6ib/us4uGu3scNVU/3FVvmVIFXmXxjQCwwSYQgghhBDdIUU+kTNIHC2EEEIIIXqLcVYwfahLbatOT2gJigdUXGKo+v4qbbRurGbSqlPkqqlMuatOt+u9ib56BnpuqD68QXUKP17xeFJF3mtkBTNysoIphBBCCCGUMs4KphBCCCFENwSiUOQTMMjSnnECTJUpcqNVkau+vzquIledRtV7o3XVKXeVZ5HrfTuA3v+26il8rkkVec+o3u2hMhVtkOBJdM44AaYQQgghRDfIHszIyWcNIYQQQgihlHFWMH2oO0NcdcpYeRW5NFqPlN7PIlfdGF3oh+rtGQ1KR9M51Sl31VXpqYrH0zOVD2MdvNzJCmbkjBNgCiGEEEJ0g5zkEznjBJgqi3xUrYS2UV3kE3CoHU/1p0gd98E0WuGG3gtpVDLa31Z9H0yF46leITQa1VkllQFPPzsqUkTOOAGmEEIIIUQ3NIHyj6ZG+XxlkIVaIYQQQgjRW4yzgqkyRa46ZWzKUjteSPGf1UBFPqpZFD9YfAZK8+o9fW+8oyd1THWRj+rx3IrHk6Mie009Om8FrWOygimEEEIIIZQyzgqmEEIIIUQ3yApm5IwTYHpRV/2tuorcFK92PK/iKnJdP7v0fVSk3tO8qvtqmnScFNF7Fbnq+TXofPuI6AHVqWOVld9SRS5aGSfAFEIIIYTohnrUrymp3pKrVzFdbnjwwQfRNO2gy6hRo9q/73a7mTlzJunp6SQmJjJjxgzKy8tjOGMhhBBCGEU94FJ8qe/VexA7MV/BHDt2LJ9++mn7vy2W/VOaNWsWH3zwAW+//TZOp5Mbb7yRiy66iC+//LL7NxRCXfW36pSxOVnteHqvIpdG6xHTe8pdJTkWU28UPtf0vglN71XpindVKdXPqshF5GIeYFosFnJycg75usvlYuHChbz++uucdtppACxatIjRo0ezdOlSjjvuuN6eqhBCCCEMpAHwKB5Tdcpdr2K+I3/btm3k5eUxZMgQLr/8ckpKSgBYuXIlPp+P6dOnt//sqFGjKCgoYMmSJUccz+PxUF9ff9BFCCGEEEL0npiuYB577LG88sorjBw5kn379jF37lxOOukk1q9fT1lZGTabjZSUlIOuk52dTVlZ2RHHnD9/PnPnzj30GyqryJWnyBXH+UGdp8h1nB5TnSI3Ukob1Ka19VyRDuofK3rfnqFrRjl7r43q3SMqX+P1/bTttnrU97VXHULoVUwDzLPPPrv9/0+YMIFjjz2WQYMG8dZbbxEXFxfRmHPmzGH27Nnt/66vryc/P7/HcxVCCCGEEF2jq88aKSkpjBgxgu3bt5OTk4PX66Wuru6gnykvLz/sns02drud5OTkgy5CiO5rSqhn19DNrDn6C7aNWU11ZhkBk+y6F0IYh+oK8rZLd/TVjjsxL/I5UGNjIzt27OAnP/kJU6dOxWq1snjxYmbMmAHAli1bKCkpYdq0ad0fXOVZ5MrfY21qhwsqbrSuvJhXXTWq2WBlhqorqw9M4YcI0ZzQSE1WGdXZ+2hObGj/XqOzjoq83ZgCJlKqM8koyyO1Khtz8OCuyhqa0vmppPpceP3Tc6N11XPT1VuZsal8mhntKduBXuu4o1BMn5W333475513HoMGDWLv3r088MADmM1mLrvsMpxOJ9deey2zZ88mLS2N5ORkbrrpJqZNmyYV5EIoEiJEY7IrHFRmluFOaGr/nhY0kVqVSWpVNs2JDdRkluGJa6Emq5yarHJMfjNpVdlklOWRUp2BKSRHeAgh+pcG1AdKkXy86osdd2IaYO7evZvLLruM6upqMjMzOfHEE1m6dCmZmZkAPPPMM5hMJmbMmIHH4+Gss87i+eefj+WUhejzQlqQ+pQaqjPLqM4qw+vYf66EFjCRUpNBWkUO6ZU5WPz7t7cP3jqapqR6qrP2UZW9F098C1U5e6nK2YvZZyG9MoeMsgGk1KajhXS1+0YIISJSj/rTL9vybt/ucmO327Hb7Ye9TlvHHYfDwbRp05g/fz4FBQWddtwxbID55ptvdvh9h8PBc889x3PPPdfzG1OZIldeAqbzKnLl51qpG1D12eGqx9NLs3Cf1UtdWiW1GRXUplfit+1vqWDym0mtyiK9IofU6izMgcM/fjQ0EhucJDY4KdgxksZkF1XZe6nO3ovX4aEibzcVebuxem1klA0ge08+CU2R74E2WgW+/uk45a66M4XqqnTV40VWA3tkev08qI+Xz6j5dgHyAw88wIMPPnjIz0Wj405vkI0rQvRDbanv2vQK6tIraHDWceD2SIvXSlpVDmkV2aTUZGAKdu8zuoZGUn0KSfUpDN42mvqUGqpy9lKdVYbP5mVfQTH7CopJdKWQvTefjLI8LAHVzT6EECK66lEff7fFzaWlpQcVIh9p9TIaHXd6gwSYQvQjfrOPncM3UdMa6B0oviGJ1OosUqszSa5LQwuZlKwSamg469Jx1qUzZMs46tIqKR9QSm1GOY3OOhqddRSP2EhGeS7ZewpIcqXquhhICCF6Q6Sdbg7suHPGGWe0d9w5cBWzs447vcE4AaYPdY3WtXRFA7VRXEWu+ixy5Y3R1aXa9N7sWnWat6PxvDYPmyZ9TVNyeF+P2W/BWZNBalUmKdWZ2D0Hf9INtf6nkhbSWoPYLLw2D5U5u6kYUEpLQlN7Cj2uKYFhGyaRVJ+i9LZ7m+rHiurtGUKInmsA5R+He/rKEdWOOwoZJ8AUoh9zxzWxcfLXuOObsXpsDN04gZSaTEwxLLaxee0MKBlKXskQGpy1VOSVUpW9j5aEJjZOXsa4lceR0OiM2fyEEKIv6KsddyTAFKKPa0xysWnScnx2D/bmeMZ8cwxxLQmxnlY7DY1kVxrJrjQGbxvDponLaUitZcOUrxm34jjim5NiPUUhhDis+s5/JOr6ascd4wSYKqvITfGKBmofUO1wAcUp92q1w+k5Rd7X1KVVsmXCKgIWP/ENyYz55mhsXsWN9hWy+K2MXn00G6Yso8npYsPUZYxffjwOt+rn1KGMV5Wu46pvvTdaV70tSO/j6fUlw1jnaBxRr3bcUUivzQmEEJ0ozyth46TlBCx+kmvSGLfiOF0Hl20sAStjVh9DfGMSPruHjVOW4bV5Yj0tIYQ4VCBKFwOQFcxImLIUDdRG8Yqj6gevjot8VBdGqF4RDUahyCdEiNKhW9lTuAOAjH0DGLpxPKYIqsJV9+k0d7ElsdVnY8yqY1h39Fe445vZNOlrxq487qBWRsZbcdQ7Ha+IKn+NEqJVEPX9OA1SzycrmEL0IQGTn63jv2kPLgcWDWPYhgkxLeaJlM3rYMyqY7F4bTQl17N54kqCmkE+2gshRD/X996VhDAot72Z9UcvpSa7DC2oMXTDBPKLRvTpnpJxLQmM+eYYTH4z9WnVbB+zVlYuhRD6ISnyiBkrRa6qD6Y5U9FAbVQX+agdTvkxZzou8rEozl2oesi5nNVsmrAcn92L1WNjxNopJLvSFI2uTiTBYUJDMiPXTmHzpBVU5e7F5nYwaMeoKMxOLelb2Y/pvShHNZXHAavcBi5PsT5NVjCF0LEQIfYOLGbd1K/w2b3ENyQx/usTdBlc9kRKTSZDNo0HYG9hEeV5JTGekRBCICuYPWCcFUwh+piAyc/20WupyN0NQEZ5LkM2jMcc7J9P26x9A/HENbN7yHaKR20gviWJ1FrV2QIhhBC9oX++Ux2Oykow5SlyxVXkqqrl2+g9vaOQ6v1/kVZpt8Q1sXnCCpqTGiCoMXj7aPJKCgkQUF75rScDi4bTEt9Edc4+Nk9YwcTlJxHfnBjrafUK1ds9zIqXSQJKq8gVV6Qr38ajc6rvb1znP2JYUkUeMUmRC6Ezldl7WXPs/2hOasDqsTFu1XEMKBnSp4t5ukpDY9jGCSTWpeC3+tg4cRl+i+pPTEIIIaJNAkwhdCJgCrB91Fq2jg+fzJNcm8bEr0/CWZce66n1KlPQzKg1U7G3xNGS0MTmcSulslwIERuyBzNixkmRe1F3bzXF5zyr3lNnoEbrej8qsquBUXNCA1vHf0NzYgOEYGDxMPKLh6NF0Dw9GvPrbVafnVFrj2LdUV9Sm1FB8bCNDN4+ukdj6vW+CgNQndJWXeOn8jVe/4eJdU8Q9e+pkiIXQvSGsgElrD3mC5oTG7B67Iz55lgKikai9cHm6SolNjgZtnEiAHsG76Aqa2+MZySEEKKrjLOCKYQOVWfuo2j0OgBSqjIZtnFCnzhPvLdklg+gKcnFnsFFbBuzhvjGJOKbk2I9LSGEUUiRT8SME2CqPItcdRV5QHFAofrBq+NG66qbXfdmGtXtaGb7mLUA5JYMZvDWMZ0W8ug9zauywt3UmmAZtGMUDcku6tOq2TxxJRO+PhFLwDgvXSIGjJTSBohXOJbKpu0ehWOJXmfsHJwQMRLUAmwZv4qA1U9iXQqDto02RJV4JLSQiZHrp2Bz22lJaGTHKDlOUgjRS6TIJ2ISYAoRAztHbKLJ6cLitTJy3RRMBt9v2Rmb187IdVMhqFGVu5fyvNJYT0kIIUQHjJNn8qMuRe5IVjRQK9VV5KoOwG5To3g8HVeRq25ifriVtqrsvZTl7wJg2IaJ2DyOmK3Iqb7daK7CJrvSGLRjJLuGb6Z45HqSXCkkNCl+LnaD6see6vFUbx9pUDqa4kbrBjoMIipUbgnob03bo7HiKCuYQgjVmhMa2NFa1JNXPITU6qwYz6hvGbBrKKlVmQTNQbaO/4aAySCv1EII0cdIgClEL/GbfWyZsJKgJYCzOp2CohGxnlKfo6ExfMMkrB47zYkN7By+MdZTEkL0Z8EoXQzAOClyL2BWNJZDcVwelCrySOm90XqbECG2j1uDO6EZm9vB8PWTIupz2ZupdHOiCWuaGUuCieYiL0FP57etcn5HGsviszF8w0Q2TvmasvxdpFRnklaV3el4/fkM996hOK2tZ6pT7nofT2VaW+XcVG/3ioSkyCNmnABTiBgqHbKN2swKtICJkWunYPXZYzYXzaphTTWHL2lmrGkWbK3/u/9rZky2/QGwt9rP3j/VUvtlU8zmfaCUmkxydxWyb1AxO0avJWnpyTH9nQohhDiYBJhCRFl11j72DNkOwNDN40isT4nq7dkyLTgGWtuDR2taOJi0tf5/S3LXl/L99QFCgC3dwuBbMkk/I4ndf6jGXaKqYi5yg3aMxJVeRXNiA7sH76Bw25hYT0kI0d/ICmbEjBNgBu0QVFXhalM0Tis5i1w3VKegG5Lq2D423Ew9Z9dg0vflRS9Vq0HOxSnkzHCimTp+rAe9IXw1fnw1AXy1AXw1frw1gf1fqwngqwsQ8oXQrBpZ5yWTc6GTpDEORj2RR+W/6tn3l7oupc2jxRQ0M3jr6HCqfOAuBuwaIqcg9Rn6fQ0wJJWv8Sqbtos+zTgBphC9zGNvYcvElQTNrUU926NX1GNxmhl8SwZJ48Kbqdy7vXjK24LFtuAxgK82/LVAY9eD3JAvRPk7Lmo+a2TAFWmkTksg61wnzmMSKH25moY1sesR46zJIKkulYaUWvYM3kHh1rExm4sQoh8Kob6uoW+UDvSYBJhCREHA5GfDxGX4HB4cjQkMWxdZUU9XJI51MPiWTKwpZgLuIKUvVVP7hfq9kr7qADufqaT6343k/ywde5aFYb/MpuZ/jex5tQZ/fe8X0Who5BcNZ+OUrykfUMKAnUNlFVMIIXTAOAGmFg+aqjd4q6JxWqmuIle+PU5tOsusMIevx7PIQ4TYPG4ljcmtJ/WsmYrFr+Yxc2B6XTND7g9SybkgBc2k0VLiZcfT5Xj2Rnd/pGtNEw23NZN3SSpZ5zhJOymR5Ilx7Hq5ioZlKg8i7hpnTQaJdSk0ptSxZ1BRr+3F1HtjdGNRnXJX/NaoepFfdWcPlVXkKucmVeR9mvTBFEKhECGKRqynOqsMLWhixJopOFrUb0qy51kZ+fAAci9KRTNpVP27nk337Il6cNkm6Amx+481bL5nD807PViSzQy9LZuCmemY4nr3THUNjfzi4QCUDyzBZ9XDu5IQQhibcVYwzRlgVtUIU9U4rVQX+ShfRFK7OqBypUZvfTD3FOxgb0ExACPXTybRlaK8cCjjjCTyr0jHZDfhbwyw66Uq6pbGpn1Qc5GXzXP2tK+kpp2SSOJYByXPVdO4ofdWM1OqM0moT6YpuZ59+cUUFI3stdvWK9XPjQYjFebI0ZOijaxgRkxWMIVQpCJ7N8UjwifLFG4dQ2bFAKXjWzPMDP9lDoN+lonJbqJ+bTMbb9sds+CyTSgAe9+sZcv9e/GU+bBlWBj2QDY5l6b02iuMhsbAncMA2Je/E7859m2UhBD9gJzkEzEJMIVQoDatgq1jvwEgr6SQASVD1Q2uQfqZiYx6Ko/kifEEvUFKX6li2yNl+Gr181G4aauHLXfso+rTBgByLnIy7P5sLKmKV/yPIK0ih7jGRAJWP+UDS3rlNoUQQhyewVLkqu6uzot8dN63UmXqTnUa0B/BR8uG5Fo2TlhOyBQioyyPwVvHEGr9r6dsORYKrk8ncUz4MdKwqYVdL1bh2afPFbqAJ1zF3rDeTcHPw/Me+UQuu35bScOa7qXMI/n95e0qZMfYdewtKCanZBCm0P7gVvVWhd7daSpEFOl1S4AeXuYkRR4xWcEUogeaEurZOOlrgpYAKdWZDN8wCU1B6KFZIecHTkb9Ko/EMQ4C7iC7F9aw9cF9ug0uD1T3VRNb7tpL804vVqeZoXOyyT7fGfXbzSgbgM3twGf3UJm7N+q3J4QQ4vAkwBQiQi1xjWyYsgy/zUeiK4VRa6diUtDrMmmSg1FP5ZHzgxRMNo361S1suW0fVR839KkGvZ4yP1t/GU6ZayaNvMtTGXxLJpotemt/ppCJ3JLBAOwrKFa+aimEMJhAlC4GYJwUuSkTTKpS26pT5Irj/Gq1w+k5RW5RvFu6q/fUY29h/dSl+OweEhqSGfvNMZgDhz6duhPgWNPNDLgyjdTjEgDwVvvZ82oNdUv3N5bzqG8RoFTct86JC/lClL5UTXOxl/yr00g9IQF7noUdj1Xgj9L+0aw9+ewesp2WxEbq0itJrc6Kyu2InlBdka7zPph67lsJcryjiApZwRSim8LB5RK8DjdxTQmMWXUsFn/k59NrNo2cH6QwZsEAUo9LIBQIUf5PF5tm7TkouOzLqj9pYNtDZfhcAeIL7Qz6RUbUbssSsJK1Ox+AvYOKo3Y7QggD6MdV5EuWLOH9998/6GuvvfYahYWFZGVlcd111+HxeCIeXwJMIbohqAXYOPlr3PHN2JvjGbvqOGw+e8TjWZwmRj2ZR+4PUjDZTTRsdLP5zr3s/WMtQXf/Su82bfKw7cEygt4gyRPjSDslMWq3lVs6GIIa9WnVNCXWR+12hBCir5o3bx4bNmxo//e6deu49tprmT59OnfffTf//Oc/mT9/fsTjGyhFngHmyFeZDqa47Yrqg0d0XkWu5yPxOktp7x68g+bEBqweG2NXHYvN44h4n59mgcLbsnDkWvFWtabDl/WPFcsj8ezxse+tOgb8OI0BV6ZSv6YFf536VLndE0d6RQ7VOfvYV7CTYRsnKL8N1dszVHdEUH8IgYEarat+GqYpHk81vb7s6KGeMYj6PZM6eQtcvXo1Dz30UPu/33zzTY499lhefvllAPLz83nggQd48MEHIxpfVjCF6KLm+AZ2F24HoHDrWBzunm1cyv9ZOomjHPibgmx/qKzfB5dtKt6vp3mHB0uimYFXRe+dt63YpypnLz5r5GkeIYToj2pra8nOzm7/92effcbZZ5/d/u+jjz6a0tLSiMeXAFOILggRYseYdYRMQVKqMkkvz+3ReFnnJZN+ahKhYIidCyrw7DPQ6lAQSl6sJhQIkXp8AsmTVFcshCXWp5DgchIyBSkfEPmLpBDCwPpxFXl2djbFxeF96l6vl1WrVnHccce1f7+hoQGrNfKiZuOkyLV40CLfK3eQoKJx2sdTO5z6dId+q8hVt6E50nhlA3fSkFKLyW9myKZxXb7t4GH+uM6p8eRdngpA6SvVuNZ0/Q/WgtpjIeNIUDre4e7v4TTtclP+Lxc556Uw8KdpbJi9m5D34N9nV8fqSPbuAoqc6ygfWMLgXSPRFLSREnqk8w9oqrctqR4vOp/x+odoFOXoJEX+ve99j7vvvpvHH3+c9957j/j4eE466aT2769du5ahQyM/lS7iV9tPPvmExsZGAJ5//nmuu+46Nm/eHPFEhNArt6OZXcO2AFCwfSR2T+SvxvFD7RTekoVm0qj42EXlR8YtQNn3Vi2eSh/2LCu5F6ZE5TbSy3KxeG14HW6qM8qichtCCNEXPfTQQ1gsFk455RRefvllXn75ZWy2/bUqf/jDHzjzzDMjHj/iAPP2228nMTGRpUuX8uc//5np06dz7bXXRjwRgMceewxN07j11lvbv+Z2u5k5cybp6ekkJiYyY8YMysvLe3Q7QnRViBBFo9cRtARIqk0lZ/egiMeyZVoYdlc2ZocJ1zfNlL6ivGFpnxL0hNjd+jvI/n4K9mz1CRVTyETWnoEA7B24U/n4Qoh+rh+nyJ9//nk+/PBDamtrqa2t5cILLzzo+2+//TYPPPBAxOP3+BX9vffe4/rrr+eSSy7pUTn78uXL+d3vfseECQdXe86aNYsPPviAt99+G6fTyY033shFF13El19+2b0bMGeAWdGZ34dpqN0juq8iV9vcW88p8m+rGFCKK70aU8DE0I0TIj4G0pxoYticHKwpFpqLPRQ9U66LFxnVKXc73XuO1S1vxrW6GeekePKvzmD7Y+pXGbP2DGTv4CLq0itpim8gvjl67ZFEVxkspa13er2/On+Y9HVz587l+uuvJyvr8IdRpKX1rAgz4hXMvLw8fvKTn/DGG29w7rnn4vF4CAQie8dsbGzk8ssv5+WXXyY1NbX96y6Xi4ULF/L0009z2mmnMXXqVBYtWsRXX33F0qVLI526EF3ijmtm5/BNAOTvGElcS2T7FU12jWF35xA30Ia3ys/2x8r6XY/LnihdVE3QH8I5JT4qBT92dzwpVZkAlOXtUj6+EKIf68crmKFQdN+HIg4w//rXv3LhhRfy6aefkpqaSk1NDb/61a8iGmvmzJmcc845TJ8+/aCvr1y5Ep/Pd9DXR40aRUFBAUuWLDnsWB6Ph/r6+oMuQnRXiBDbx6xtT423tbzpNjMMmZ1N4ggH/oYA2x7Zhy9KRyT2VZ59Pio+dAEw8Ip05W1mIXx8JEB5XilBTX7/QggBoGmRZeW6osu53uXLl3P33XdTWVnJsGHDmDRpEpMmTcJuD1dU5+bmkpvb/dYtb775JqtWrWL58uWHfK+srAybzUZKSspBX8/Ozqas7PCptPnz5zN37tzDfMeKsjPEA4pS7W1UV5Tp/CxylY3WVVQaH268fQXFNKTWhKvGN44n1Ppft2hQ8It0nJPjCbiDbHyshMY9es1FxVbZ3+pIPyWJuIE2Ms9MpvJDtR8MU6ozsLkdeB1uqrLKyCofoHR8IUQ/1Y+ryAFGjBjRaZBZU1MT0dhdDjB/8pOfUFBQwHXXXUdxcTGfffYZv/71r6mtrSU1NZXq6u5HNaWlpdxyyy188sknOBxqgrY5c+Ywe/bs9n/X19eTn5+vZGxhDM2JDZQO2wpAwbaROFoia6g+8KeppJ+URNAfYsvTu2ncJsHlkQSag+z9Sw2DfpZJ3oxUqv/boHRfmBYykb23gNIhWynP2yUBphCiz3rssceYM2cOt9xyCwsWLADCBdG33XYbb775Jh6Ph7POOovnn3/+oEbqhzN37lycTmdU5tnlALO0tJQPPvjgkJ5Iu3btYvXq1RHd+MqVK6moqGDKlCntXwsEAnz++ef89re/5eOPP8br9VJXV3fQKmZ5eTk5OTmHHdNut7evqgrRXUFTgO3j1hAyhUipzGxPrXZX3pUppJ8RbqS+7dk91K1uVDzT/qdqcQNZZzuJG2gj54IUKt5oUDp+zt58SodspS6tCrejuccnMQkhDCAaeyZ7MJ7qguhLL730iEU+PdXlAHPatGns2bPnkABz0KBBDBoUWeuW008/nXXr1h30tauvvppRo0Zx1113kZ+fj9VqZfHixcyYMQOALVu2UFJSwrRp07p5a1ZA0VnkQcUpct1Xkeu30bpqpcO20pLYiMVjY8jGcRFVjedc5iTznGQAtr+wl+ol6tK9zYq76MejoyArCHv+XMOwu3LIPsdJ7cfN+GrUvbI73AmkVGdQl15FeV4Jg4pGKRtbBdXPC5VbUcKkpDdiql+TVR+mIY3WjyyKZ5F/u0akswWyAwuiH3744favtxVEv/7665x22mkALFq0iNGjR7N06dKDTuc5UDT3X0I3inxmzZrFvHnzIs7FH05SUhLjxo076JKQkEB6ejrjxo3D6XRy7bXXMnv2bP7zn/+wcuVKrr76aqZNm3bEX5gQkapOL6OsIFxlPGTjOKy+7q+E51zqJPvCcLph98IaKj9zKZ1jf+da2UzDphZMNhNZM5KVj5+9twCA8tzdUW9xJYQQHcnPz8fpdLZfOmv1qLIgGrpWRb5+/fpOf+ZIuryCed5556FpGiNGjOD8889n2rRpTJ48mfHjxx/U+V21Z555BpPJxIwZMw7aV9B9Cot8gor7YKpeaND5CqZKqoIEj62FLWNXAZBdUkBKdWa3x865NIXsi1qDy0U1VH3coLzPpGqqV0RVHD25841yxs8bTPqpiVT+owFvubrHX3plDma/BU9cM66UalLqMiIeSwLUnlD9mqL38XR+KrNet4fr4a0nikU+paWlJCfv/yDd0eql6oJogGDw8HesoaGBN954g4ULF7Jy5Ur8/sj+EF1+1G/fvp01a9a0Xx599FF27tyJ1Wpl5MiRrF27NqIJfNt///vfg/7tcDh47rnneO6555SML8S3hQixedwqfDYv8Q1J5G8b2e0xci9PIfv8A4LLD9XuHzSShs3N1H7TSOrkRLJ/4KT0t+raIpiDFjLK8ygfUEJFXmmPAkwhhOiJ5OTkgwLMI4lGQfThfP755yxcuJC//e1v7eeSr1ixIuLxuhxgDhkyhCFDhhx0lFB9fT1r1qxRFlwKEQu7hmzBlVaFyW9m6LqJmELdaA+rwcBr0sg4KwmQ4FKVkjcrSJ2cSOqJ8ZT/zYV3n7qljOx9+ZQPKKEqax9DN0/AHIxC400hRP+ggyKfaBREtykrK+OVV15h4cKF7Nu3j/PPP5+33nqLM888k82bN/Pee+91b7IH6NG6fXJyMieddBInnXRST4bpJWaUdXBWXeSj+sGregO4jot8epqmrE2roKRwCwDDN0/E0Rzf9TFNUHBDBumnJBIKhih9uZrqxdGtFledcleR0o6GpmI3rhXNOI+KJ/uiZEqe6/kqZluP08S6FOwtcXjiWqjO2EdGRV6PxxaiV6lOaeuo1k8cKloF0eeddx6LFy/m1FNP5cEHH+SCCy4gIWH/e0JPi4B0vjFEiOjx2FvYMm4VaJCzexBZZQNxd/Hcdc2qMfjmDFKOTSAUCLHrt1XUfqnv/ZZ9TdnbLpxHxZN6UgJlf3PhLVPzQUdDI6Msjz2FO6jM2SMBphDiyHSwgtlWEH2gAwuigfaC6LS0NJKTk7nppps6LYj+4IMP+NGPfsStt97KUUcd1e270ZmIj4oUoi8LakE2jV+Bz+YloSGZIVvHdX6lVqY4jaH3ZJNybAJBX4jipysluIyClmIvrlXNaCaN7PPVVpRnloUbrddmVOK3+JSOLYQQve2ZZ57h3HPPZcaMGZx88snk5OTwzjvvdHidr776iri4OE477TRGjhzJvHnz2LFjh7I5GWgFU8dV5Krf3xSnT8yKP76p78/XfUUj1tOQUovFZ2X02qO7vg/PDMN+mU3CCAeB5iBFT1bQuOHIq56qq7SNpvydepxT4kk9JZGyt1zKznFPaEomvjGJ5sQGqjPLyN4np311RuXrgOFOg9d730q9vkzp4YGi06MiVRREH3fccRx33HEsWLCAv/zlL/zhD39g7ty5HH300Vx++eWMHTu2R3OUFUxhOOW5JezL3wnAyPVTiGvp+j7EnAudJIxw4G8MsO3Bsg6DS9FzzVs9NG50Y7JoZJ6rdhUzoyycGq/K2at0XCFEPxKI0kVHEhISuOaaa/jiiy/YuHEjJ598Mo8++ugh/Ta7SwJMYSgNybVsGxXuelBQNIK06o7PaT1QXKGNnItSgHAT9Zadqo9gEodT/l64WX366YmY4tSdPJFRngtAXWoVPqv8LYUQYuTIkTzxxBPs3r2bd955h3POOSfisQyUIregLkWuuIpc+SKY2gFVp7RVVpEHuzE3r83NxglfEzIHSavIZmDR8EOuf6TxNKvGoBsz0CwaNUsaqf6yf7QiUt8IPlPxeNCw2k1LqZe4fBvp05Oo/KeaozfjWhJJaEimKamemsyy9lN+RF8kjdZFlOigyCcWzGYzF1xwARdccEHEY8gKpjCEoCnApokr8Do8xDUmMnzDpG6dMz7gR2nE5dvw1fkp+X1VFGcqDqctqMw8O0lZtzGA9NZVzKqsfeoGFUIIIQGm6P9ChNg2eg2NzrpwUc+ao7AEur6anTQ+juxzwqf07HyhkkBD7IuUjKb2iyZ8dQFsGRacR6tr2pdeEQ4wXWlV+M1STS6E+JYQ+wt9VF0Mcsqsgdbtba0XBXyK43J1J+G10m9jdNXj+bow1u5BO6jK3YsW1Bi5dgqOloQuN1M3J5oYPDOc9q342EX9N90r0VedgtZ7o3WV84s/YG4hP1R/2kDOxSlkfi8J11I1Za/xzYnENSbSkthIbUYFmeUDunxd1WeR63krinp6OGS6F6lujK56PNVV6UIgK5iin2uJb6R06FYACreMxVnbvbOnB9+QiS3NQstuL7v/WBONKYouqvq/RoL+EImjHMQNVrSfGkivDB+lVp1VpmxMIUQ/YYAq8miRAFP0WyFCFI3cQMgUJKUqk+w93SviyDwrmZSjW5up/6aCkFfPK0L9n78ugGtZeOUy/YwkZeOmtQaYtekVBDWDvPILIUSUGSdFHrSoa5Cu+j1IdbpD5ylyi8JUYEe75qqz9uFKr0ILmCjcMrZLRT2e1gr8+EF2Bl6RBsDOP5VTt1NN5bLousOloKs+aSD1hARST0pgz59qCLb0/LGZWO/E5rbjdXhwpVWTWp3V4zGF6FOUvwcpoofPewatIldBVjBFv+Q3+ygeuRGAAbuGdquZujnOxMjZAzFZTdSsaKDsQ0mN60XjRjctu72YHSZST1Czf1RDI7Uq3A+1JqNcyZhCiH5CdYFPNE4G0ikJMEW/tGvYZnx2D46mBAbuHNqt6w69Ppe4XDueSi/bn5dTXvSmenEjAOmnKUyTtwaYtRkVyot3hBDCiAyUIrera5Cu87PDVafIVVe3qnwDP9xYDc5aygeWADBk8zi0oKnLt5nz3VQypjkJ+kNseWYP/kaD5DL6kNrPG8m7PJWEYXbiBllp2dW1J2RHj4HkmjS0gAlPXAvNCQ3EN6kLXkVv0HujddF32aEbPZO7JgR4FI+pP7KCKfqVoBZgx+h1oEHm3gE4a9O7fN34kTYGXxEu+Nj5x3Iat+t1Y5Kx+RuCuJaHi33STklUMqY5aGl/rNSmVygZUwghjEwCTNGv7CncQUtiI1aPjUFbR3f5epYUEwWz0zFZNCq/dMm+S52r+SycJk89MVHZyT6p1eF+p3XplWoGFEL0fVo8aAmKL+oOi9AzA6XIHRBQlCJXnTU1WKN1lQ48O7w5oYE9g3cAMGjLaMx+S5fOKtcsUDA7E2uqmcaSZta/uL1bZ5x3RHVj9GbUNBiPHvVnkR9O/ZoWfHUBrClmkifGUb+q56vNKa0BZn1qLQGTH7OqrhP9hMqtMg2GS0Ervr/NBnlsGqQYpr+SFUzRL4S0IEVj1xEyhUipzCKtIqfL1x3w0zQSRtkJNAVZ++R2gh55VdO9ANR+GQ7eU09UU03uaE7A3hJHyBSkIaVWyZhCiD7OFB+diwFIgCn6hb2DimlKrsfss1C4aUyXel4CZJydSPppiYSCIXYtqKKlvP9vvO4var8Ip8mdR8Vjsvd8E76GRnJtuPdpfapskRBCiJ4wyDo74fS4qhS57qvI3UpHU50iV90GpjmhgT1DtgPh1LjN27W/c+IEB3lXpgKw9491NKxR+3szIpUp/LTObmuHF0+ZD3uOleTJcdQpOJ88uS6Nyrw9uFKU71vpdXre2iJEn6HFg6Zoo3f7mMboTmKcAFP0S0EtyI5xa1tT45lklOV16XqOfCuDZ2egmTRq/ttI1QcNUZ6piAbX8mayznOSNFFdgAnQmOwiqAUwhRS/sQgh+hYtQQLMCBknwAyZIaTToyKV123ouw+mqgIagD1DttOc1IDFa2XwprFA5yukFqeJwrszMcebaNzopvSl6vZrqC6i0X9RTt9Wv9ZN1nlOkifGKRnP0ZyAxWvDb/PSlFxPkitVybhdISuOeqLzIiTVWS9jbAkUvcw4Aabod1zOGvYOLgJg8OYx2Lz2Tq9jsmsU3pWFLdOCe6+P4l9VEtL5e4k4ssZNboK+ELYMC/Y8K569Pdu/oqGRXJdKTVY59c6aXg0whRA6ZIoDk+JQyWSMNx0JMEWf5Df72Dx2BWiQvi+3a1XjZhg0K4OEYXb89QGKHqsg0Ng/KsYtVgtjjhvJlOmTGHPcSIrW7uT1+W9TX9O/U/8hb4imLW6SxsWRNM7R4wATIMkVDjAbUuqgpOdzFEIIIzJOgKnnIh/lBav67oOposhn+8i1uOObsbU4KNg8qktjFvwsHeeUeIKeIDser8BTpvoPGX1tfTXNFjNDxxUy9bTJHHX6FCafMpG4hP1p4qPOnMLZPz2TVx/9M2//5m94PYe/r/H9IDfWsC4cYCaOc1D1fz0PqNtWLRuctYQIHbEjgcqtHqKn9L4ipHp+it+69bqTRw9PMS0h3CxZ6Zh6f7yqYZwAU/QbFdm7Kc8rhRAMWT8eS8Da6XVyf5hC+mlJhIIhihdU0ryt77Qj0jSNAcNyGXn0CIYdPZgxx4xixOTh2OMO3hJQU1HLik9XsfHrTXzvqu8yYtIwbnzi53z/p9/jyRsWsOLfq2J0D6KrYX24+j9pjCN8ZHAPP78k1DvRgho+uwePowWHu+8H4UII0dskwBR9SktcE9tGrwGgoHhEl/bIZZyRRM6MFABKX66mfqW+zxjPGJDOqKNHMPLo4Yw8ejijjh5OYsqhZ243uhpZ99UGvv5kJcs/XUnR+mJCoXB09faz7/LdH5/B9fN/SsGIfJ5d/BQf/vH/ePa2F6itrOvlexRdzUUeAu4glmQzjoFW3KU9W5k2B83ENybRlFxPY7JLAkwhjEyLB63zRYzujdn3smeRME6AGbSHj4tUwWBV5HpJkQe1IJvGrSBg8ZNcl0ZB8Qg8eDu8jj3HwoArw61n9r5RS/Xixg5/XvXRjl0xfPJQjv3eUe0BZUZe+iE/43V72fZNERuWb2TT8s1s/Hozpdt2tweU3xYMBvnXax/z33f/x88fvpaLb7yAs39yJieccxy/veN3/PMP/wLUV7l31ruyO7qcgg5A0xY3yRPjSRhjp7m056vTifUprQFmHRkVuT0erz9Q+TrQv3cGCyHASAGm6PN2DttIo7MOi8/KyHVT0EKdHESlQf71GZhsGvWrWyh/19U7E+2Gc6/7Lre+8AtMpv33JeAPsHNDCZuXb2Xz11vZsnwbxet3EfAHuh0ANzc088wtz/Lxnz7hrt/NZsTk4dyz8A5OvuAEHrnmCTxV/WMvUMOmcICZOMZB5cf1PR4vsT6FckpodNb1fHJCiL5LiwOTTfGYHS+M9BcSYIo+oSpzH3sGhVsSjdg4CYen87RlxvQkksY4CLiDlL6sv5NZfvzLH3LNwz8BYOWnq1n6/tdsXr6N7d8U4WlRu0d04/LNXHP09Vxyywx+/shPOfG84/nj2oU8fsUzrPx0tdLbioXGTeF9mIkj1WQpEuudADQl1XdY6COE6Oe0BNBUB5iKU+46ZZwAM2gKX1RQndJWviVQ343Wu6slromtY74BYMCuoaRX7k9ZHimNak03k/fj1mMg36jBXRmbT4yHW3E0mUzc8sxMLrn5IgAWPfQaL92/KOpzCQSCvPH023z9yUrmvXEfQ8YO5slPHuYvT/6N39/zGgF/3z1dommHh5A/hC3Ngi3TgreyZ8+B+KZETAETAYsfd3wTcc2H7oEVPdE/Vs5jR8dV6SrfH+XsgT5NUcQlRHQETH42TVhOwOonqS6VwdtHd+l6g36eiTnOROMWNxUf9Txlqooj3sGjf5vbHlw+ffOzvRJcHmjHuiKuOfp63nnh7wD88I4ZPP7RPJJS+24QFfKGaC4Or/qqWMXUQibiG5IBaEzS39YKIUQv0eJbVzFVXoxROCgBptCtECG2j15LU1I9Vq+N0euOwtTZvksg4/QknJPiCXqD7HyhUjefgjPzMnjxf7/mlAtOxOP2ct+l83j72XdiMhdPi4cnf7GABy56hOaGZqacPpHnlj1N/siBMZmPCo1bwwFmwvDOT3TqisSG/WlyIYQQ3WOcFLkXUHVevfKUtlvpaGbFZe6qq8i7Wh28b+BOKnJ3QwhGrJuC1WM/5Lqeb/3u7BlWBl4RrmXe9UYF9Xu7V68arSrykVNG8OQ/HiZzQCY1FbXcfcF9rFuyISq31R3/e3cJu7ft5ZF/3s/A4Xk8t/RXzP3BY31yX2bTdjfgJGHY4QPM7nYviG9ICo/bug9TGInqFLSOU9qg9j0trvMf6VNM8WBS86F1/5iqghF9kxVMoUuulGqKR4QDsMHbRpNSm9H5lTQYNjMPc5yZ+k3N7PuX8iOSInLu1Wfzuy+fJXNAJkUbdvLTY3+hi+CyTfH6XfzimNms+2IDiSmJPPrBA5x4wbRYT6vbmlqb58cV2pUcvJHQGE6RS4AphBDdJwGm0B2PvYUtE1YSMoXIKMsjr2RIl66Xd246zrEJBFoCbHt+T8xT41a7lbt+dxu//MOd2B02/vePL7nu+BvZt7MsthM7jLpKF7ef/kv++9b/sNqsPPD23Zx22Smxnla3eCv8+OsDmCwacQU9r/qMa0yCEPhtXnw2Y7QVEUJ8i+zBjJhxUuR+1J0hrrzjjdr0SabOU+S+DsYLEWLr2NX4bF4SGpIZunFC+9c7El9gp+DSTACKXynHUx7bkxKyB2dy119mMeKYYQSDQV667w+8Nv/1IzZG7yrVKfw4Etr/v8/r5+EfPYm72cN3r5rOPX+6DZvdykevfBqz+XVXU5EH56R44ofaaS7qWVBoDppxNCfgTmiiObEeW02moln2jlh3f+hdUpUuhN7ICqbQlYrc3dSnVWMKmBi5dgrmYOd7VUw2jRG3DsRkNVGzvIGK/9RFf6IdOOWyE/jN6icZccwwXNUuZp99N68++uceB5e9IRgI8uQ1v+bvz3+AyWTitt/fxEkXHR/raXVZ847WQp8havZMxTeG92E2J8rZM0IYkzlKl/5PAkyhG16rh10jNgGQXzQCR0tCJ9cIG3xlNvED7XhrfWx/cW80p9ihuKQ4Zr86kztev4UEZzwbv9zMVVN+zrL/Wx6zOUUiFArx65kv8M/ffYjZbOaXr9/BxFPGx3paXdK8M7xqGVeoKMBsCrduak7s+IhRIUR/ZYrSpf8zToo8gLozxHXeGF11StuiONXmPcL8ikduwG/1Ed+QTG7J4C4VViQeYyPnjHDV+IbfFtHQ0LOWMpGezT3+xLHcuegWBgzLIxAI8KeH/sIfH36TqoD+9lt21a9/8QLOjGROnnECtzx3PdeOv1H3q7DNO1sLffJt4UWCHj7n21YwWxJkBVPoiY6r0pW/P4q+yhhhtNC9moxyqnP2QQiGbhzX+TnjgDXTzOjrBwOw6+/7qFnX+/0KHQkObvrNz/n1/x5nwLA8yndVMOuUObw693WCgb69By4YDPLktb+hqb6ZwWMHcdSZk2M9pU55y/0EmoOYrBqOvJ4fxxbXuoLZktAkleRCGJKkyCMlAaaIOb/FR9Ho9QDk7RpCYkNKp9fRzFAwKx1rogXX1kZ2vLknyrM81JTTJ7Jw3W+58KbzAPjg9x/z04k3sf7Ljb0+l2hpcjXxr9//HwAXz7ogtpPpopaS1jS5gkpyR3MCBDUCFj8+m9rz4YUQoj8zTorch7oPDcqryNU2Wu9r1aM7h2/CZ/fgaIpnYNHwLl0n58dO4ofZ8DX6Wb9gB6FA760uJaclcd0TV/O9a88EoGxnOU/97Nk+2Zy8K1Xfbzz7Fy665TyOPmsK2aMz2LlpVwc/HftK65ZSL4mjHMQV2Kj9smdV7aaQCUdLPO6EJloSGrF5e34M5ZGo3tqib3qv+tb7/FRTeX/7W1hhQv2KozHW9oxxL4Vu1aZXUDkgfFrP0E0TulQ17pwWR8Y54b1xG58vxl3Vez0Kk1ITeW7ZU+3B5bvP/pNrxs3sk8FlV+3bWcb//v4lABfd8P0Yz6Zz7SuY+T1fwQSIaw4Xm7XEx7YFkxBC9CUxDTBfeOEFJkyYQHJyMsnJyUybNo0PP/yw/ftut5uZM2eSnp5OYmIiM2bMoLy8PIYzFir5LT52jF4HQG7pYJLr0jq9jn2AhQE3pAJQ8V49VSvqojnFg5hMJu594872vZY3nXAHz978O9xNaleg9eidF/4BwHd/cib2OMXHpinWsjscYDoGqgkwHa0BpjtBAkwhjEeqyCMV07XsgQMH8thjjzF8+HBCoRCvvvoq559/Pt988w1jx45l1qxZfPDBB7z99ts4nU5uvPFGLrroIr788svu35jKFLnBqshVFze0nSdeNHIDPocHR1MCA7cP7/SMclOcRsFt6ZgdJhrWu9n3Zl3EVd9H0lHK+IaHf8rRZ03B3ezm9u/fw/a1O5Tetp6tWLyK3Tv2MHDoAKb/8FQ+eOWjWE/piNy7w0327VkWNKtGyNezx6+jOXzqhtFXMI2VwldNx1XfQkRJTMPo8847j+9973sMHz6cESNG8Mgjj5CYmMjSpUtxuVwsXLiQp59+mtNOO42pU6eyaNEivvrqK5YuXXrEMT0eD/X19QddhP5UZ+2jOncvhGDIhvGYOkuNa1BwUzqOgVa81X5KFlTRm1tNz7zsdK6YczkA83/2lKGCSwj3xvzHyx8AcM5V343xbDrmdwXwNwbQTBr2nJ6/Ebf1Y3XHqf0wI4ToC6SKPFK6WacNBAK8+eabNDU1MW3aNFauXInP52P69OntPzNq1CgKCgpYsmTJEceZP38+Tqez/ZKfnx/+RpD9vTB7eqlWfMGv9JJASOlF9X8eewvFozcAkFc8hMR6Z6fXyf5BMs6j4gl6QxQ/WYmvPtBrbWNGHzWSexbeAcAfH3+D/3u960cnNuv8v+746E+fEAwGmXzKRHIH53Trur3Nsy+8iunI7XmrorYVTE9ci7Qq0i21r6H6p+f7q3Jeao89jkzsA8y+up0w5gHmunXrSExMxG63c/311/Puu+8yZswYysrKsNlspKSkHPTz2dnZlJUduXn1nDlzcLlc7ZfS0tIo3wPRHSFCFI1ZT8DqJ8GVTF7x0E6v4zw2npyLUwAo/V01LT08Y7o7sgZm8vh7D2GPs/Pl+0t48Z7f99pt603lnipWLF4FwNk/OTPGs+mYuy3AzOv5Pky7x4EW1AiZgnjt/X+/rRBCX9q2E65cuZIVK1Zw2mmncf7557NhQ3ihZtasWfzzn//k7bff5rPPPmPv3r1cdNFFMZ61DgLMkSNHsnr1apYtW8YNN9zAlVdeycaNkfcRtNvt7VF+20XoR+mgbdSnh88aH7JhPKZOGqrHDbUx6MZ0ACrer6f2f723Dy4+KZ6nPphP5oBMdqwv5v4fPUww2LdaQKn28Z/Dq7enXfKd2E6kE56y1n2Y2T1PkWshEzZ3XHhchxxTIoSxxL7IJxrbCXtDzHcK22w2hg0bBsDUqVNZvnw5v/71r/nhD3+I1+ulrq7uoFXM8vJycnIiSM/5UBdOK3+P0XcfzM6Kb7qqIamWoqHhT1wFW0bhaE7oMOVoTTcz5M5MTHYTrpXN7PljzSE/05U+jpEwW8w88tYDDJswlKp91dx+zhyaG2QP3ufvfYHP62PouEIKxwymeOPOWE/psDzl4bSfLbvnKXIAuzsOT3wznrgWcIW/pjpdrikdTYjukD6YsfDtGhG73Y7d3nGXjkAgwNtvv93l7YTHHXdcVObeFTFfwfy2YDCIx+Nh6tSpWK1WFi9e3P69LVu2UFJSwrRp02I4QxEJv9nHpnErCJlCpFZkkbF3QIc/b3JoDLkrG2uqhZYSLzt/U0lvbn+7+3e3cdx3j6GlqYU7zruHspLY72fRg0ZXE1//3woATr345BjP5sg85epWMCEcYEJ4H6YQwkiitwczPz//oJqR+fPnH3EWqrcT9oaYftSYM2cOZ599NgUFBTQ0NPD666/z3//+l48//hin08m1117L7NmzSUtLIzk5mZtuuolp06bFNCIX3RcixLbRa2hJaMLujmPwxrFoHa3XmKFwdhbxg2346gIUPV5BsKX3osuZj/+cc685G78/wP2XPsTmlVt77bb7gv/87XNOOHcaJ33/BP4w77VYT+ewvJWtK5jplvBreQ9rBezu8Ak+HrsEmEIINUpLSw/axtfR6mXbdkKXy8Vf//pXrrzySj777LPemGbEYhpgVlRUcMUVV7Bv3z6cTicTJkzg448/5owzzgDgmWeewWQyMWPGDDweD2eddRbPP/98ZDdWB6jpuxyFoyL7dx/MfXk7qcjZDUGNUeuOwuLvOG2Z96NUkifFEXAHKXq8vD1YOBzVKfIf3HYBP77zUgDm/fRhPnm/6xXjRvHVB0sJBoOMmjqCzLwMKvdWReV2PD3YOuKpg6AviMlqgrQAnkofth68ALTtwfQ6pMjHGKJRWa2SpKF7TzTaCoXH606dSK9tJ1QopinyhQsXsnPnTjweDxUVFXz66aftwSWAw+Hgueeeo6amhqamJt55552Y/8JE9zQmutgxMnxaT+GO0ThdHZ/WkzDCTtY54SfcrmeraN7RexXj371uOrN/dQsAz9zxG/7x6ge9dtt9SW1lHRuWhgvxjj9Hp9mEEHiqw2/q9oye78NsW8GUKnIhhB70he2E8jFIRI3f4mPjhK8JmoOkVWUzcNewDn9es2kU/CIDzaRR/Z8GXMt7r6jmtCtO4cbfXQfAosdf49Vf/anXbrsvWvLh14w/fhzHnHkUf3/5/VhP57C8VT7icmzY0nr+Mmf1hFNXXrunx2MJIfoSDfVrcd0r6eur2wmNE2A2ozBLoe/0iR5S5CFCbBmzCnd8M/aWeEasn9z+9SNVpQ+8NA1HXvikntJXq5VVr3fm5EtP4JY/3ADA679+k1/f/VtlY6tO4ceRoHS8SC37vxVc99A1HHX6FEwmky7bN3lqwoU+trSer2DavOEVTL/NS1ALdtpeS3TMrLiBth7acYs2Kt/P9ND0Pnop8q7q1e2EChknwBS9qnTwNqqzytCCJkavPQqrv+P9b4mjHWR9zwnArpcqCTT3TsDynctPYtarMzGbTXz44ic8cevTvXK7fd3mFVuor20gOTWJUVNHsHH55lhP6RDemgMKfXrI4rO2NlsP4bN5sHviejymEEJ0xcKFCzv8ftt2wueee66XZtQ18jFcKFeTXs6uoeGAY9jm8SQ1pHT486Y4jcEzM9FMGlX/rqf+m96p1D3jmlOZ/Vo4uPz494t5/hfGPaWnu4LBIKs/XwvAxJPGx3g2h+erbQ0wnQqaraNh9YbT5D5JkwthILE/KrKvMs4KZi2gpucyqhujq04XxbLRektcI5vHrQQNsncXkLU3/5Drf7s6eOiVudizrLgrvOx4dS+Bbtxed8/TbvP9G77HLc+H0+J/f/4DfnPji4RCoYjHM6I1/1vLyeefwKSTJ/LG02/HejqH8LrCAaY1Rc3LnNVrw+tw47P2XuFZT6jeKqNvekil9iY931/jhBWiY/JIEMr4LT42TVpBwOonqS6VIVvGdXqdtGOTyD4tlVAwxLbf7iXQEv3U+BX3X8ZVcy8H4K/PvMfzs2XlMhLtK5gndv53jgVfW4CpYAUT2L+CaZMVTCEMI2gJX1SPaQCSIhdKhLQgW8atoiWhEZvbwai1UzsthLClWRj68zwA9vyjmobN0V09NJlM3Pzb69uDy9fmvSHBZQ9sXb0dr8eLM93JgCF5sZ7OIfz14cyANVlNOsriC6dA/FafkvGEEKI/M0YYDeHzw5W9L6hNT6hOaceiirx4+CbqMioxBUyMWnMUVq+94+tpMPzGAVgTzTRub6H0LxUKZ3woq93KnNdm851LTiIYDPLbm1/ivef02V6nN/Woyt0Hm7/ZwoTjxjPsmEK2F22L2tnwkfDVh5+nlkRzuCtID58WFl+4UM1n6xspctVUv04J0SeELOGL6jENQFYwRY9V5uxmX0ExAMM2TCKxwdnpdQZckIFzXAIBd5Ctv9lDKIp9RpLTknjyk4f5ziUn4fP6eOjSJyS4VGTD8k0AjDlqdIxncih/Y/hBpZk0LAk9X8W0ygqmEEJ0mTHCaBE1HnsLRaM2ADCwaDgZFbmdXidpVDwFP8wEoOj3+3CXRW9FyGwxM/9fDzL62JE01jXywEWP8s1/1kbt9oxm65ptAIyYODzGMzlUKAABdxCzw4Q53gSNPRvP3BpgBiwSYAphGCFzFFYwpYq8f6lFYWcAtSnyLJ2nyI/0dhoixLaxqwlYwkU9+cUdn9QDYE4yUXhLHppJY99nVZR8vrdHc+ssJXv93J8y+tiR1Nc2cMNJN1O0YWePbi/WVKeg44nv0fW3rtkK6DPABPA3BTA7TEpWMC3+1hVMi54reIU+Geks8v7WaF1ESs+PUqFzewcVUZ9Wg8lvZviGiWidnW6iwcCbUnGk22ja08KW3++K6vyOOm0KP7n7MgAev+6pPh9c6tH29UUEg0HSMlNJy0qD6G6l7bZAcwDSrZjjTT3+2GX2h18uZQVTCAORKvKIyR5MEZHGJBclQ7cAULh1DI6Wzo8wzLo4iaRJDgKeAOue3kHAE72igdTMFB740z2YTCbee+l9/v3Xz6J2W0bmcXvYu3MfAINHDorxbA4VcIfDSrOj5y915kBrgGmWgwmFMA7L/kIfVReDrO0Z415C+CxyZSlytY3WVVdnWhSP5/3W2k/A5GfruG8ImUKkVWSTuXdgp5XmiRPtZM5IAmDtS1upLK1WOscDaZrGfa/eTUZuOkUbdrLg1q6fLa46Bd1fzyI/0M7NOxk4ZACFowZR8b/aWE/nIIGWcDBodph6nGwz+8MvIAFJkRuA3lPaen4MGiesEB2TFUzRbcUjN+JOaMLqtjNk03g0tA5/3pZtpuCWdDSTRvUnjez+X3TzqFfM+RHTzj4WT4uH+y+dh6dFGmNHU8m2UgDyh+XHeCaHCnrUrWCaZAVTCONRvXoZjbZHOiUBpuiWyuw9VA7YDSEYvmES1tbegEdismsMuiMDc6KJ5q0e9i2qi+r8jjptCj+bdzUAT/5iATvWF0f19gTsKQ4XauUN7ryDQG8L+sKr+Zq14w9BXWEOhlcwgyYJMIUQojPGCKMhXEWuLJxWm56IRWP07mg7S9wd10TR6PUADCgeSlJtasfnlGuQ/4t0HAVWfLUBip+qIuBXm74/MAWdNSCLB1+/B7PZzLsL/8FfX/mb0tsSh9cWYA4o1N9pPkFv+LlgspkI9vSkhda4MmQOEiCAFWsPZyeE0D0p8omYrGCKLgmaAmybsJqgJUBSbSoDiod2ep3sGcmkTEsg6A+x8+lK/LXRW/mx2W08/c7jpGens2X1Vh678cmo3ZY42L6SMgCyBmbFeCaHCvpbA0xLz1cwteD+Tdwhk5xqI4QQHTFGGA3hFUxl1Bb56H0FM0SInSM30pzUgMVrY+j6CRDSOrwd53Hx5FySAsDul2to2rJ/H2Q0jhO898W7GXfMWOqqXcy68E48btl32Vsq91YBkJ6dhslsIhjQT/AVCoQfo5q55wGmKbR/jKCm9jkmRP/Rz/pgSqP1iMkKpuhUZe4eKgfsgRAMXT8Bm8fR4c/HDbUxaGY6ABXv11Pznx4eodKJy2+5lO9fdS6BQIC7fngPe3f2rHm76J7aylr8fj8mk4nU7M6PCe1N7QGmkhXM/S+XsoIphBAdkwBTdKghqY6dozYCMKBoKM6a9A5/3pZpZshdWZjsJupXtbD3j9FtW3Pi2ccz+6lbAHjmjmdZtnh5VG9PHCoUClFbWQdAcmZybCfzbW1xYM/jy3C3hNaFy5CsYAphDFJFHjFj3EsgvENf1XK72mV71X0wOyy86Qaf1cv6CUsJmYM4KzPILR7SYVrcHG+icE4W1hQzzTu9FC+oIBSK3htxwbiBzH3zLsxmM+/8/u/86ZnXezxmNNL3RlBfU09mbgbJ6Umxnkp0hTTQQuGLiJjq17wGPaRSO6T3vpriiKTIJ2KygikOK6QF2TRuOe64ZuzNcQzZ0HG/S80ChbdnEjfQhrfGT9Fj5QTd0XsTTs1N4e73byExOZHl/1nBo794PGq3JTpXV+0CIDlDZwFm60NQU7CCCfsXQlXvcxZCiP7GGGG06LaiYRupTa/EFDAzbO0kLP4OWrJoMOjGTJLGxRFoDlI0vwJfTfQqxuOSHNzzr1lkDspg19YSbr94Dn6f3lcw+rdGV3ifbVxSXIxn8i2tH6FDsmVSCBGJaKS0JUXe37hRd3fVBjNZitNFPV1dKc8pZfeg7QCM3DCZ+MaOV6VyZjhJPT7cjqjoVxW07PJ2+PM9SUNbrGbu+dssBk8qoLa8jmu+ex37avZFPJ5Qo7mhGdBfgKmZwmuOoaDaFcfOTq8SfZ3eU9p6/kBtoLBCdEgeCeIg9c4ato5eDUBB0QgyKwbQQssRfz5hlJ2ci1MAKP1dFY3r1bZwOpDJpDHr1RuZfMYEWhrdPPi9+ZQW747a7Ymua25sDTATO+4w0Nu0tk1Aij7DhQ75P0KIfk3aFEVM9mCKdm5HMxsmLiNkDpJekcOgolEd/rw5wcTgmzPRTBo1nzVS81l0C2RueO6nnHLZCfi8fuZf/BQ7VskxkHrhcYdXrS12fX1mbTsisu3IyJ4IEQJTa9ujkKxgCiFER/T1bhBVflSlFZJ6euTct6iuqIwkRe43+1g/aSk+m5eEeicj109pH+tIVemDr8/ElmHBvdfLrt9XEuzi7TbT3O35/Wz+lZx9/RkEg0Ee/fGv+OLjJd0eQ0SPzxt+Tlis+npJMVnDn6GDvhCmHn6eDmr7nwdmg+yhEsLwgqbwRfWYBmCMeyk6FNSCbJqwgubEBmweO2PXHIO5kzYKmd9NJvXY1n2Xv64g6IlezvDKB37EZXf/AIBnfv4cn739RdRuS0SmrcjKYtNX4GWyte7B9PX88Xlg70tZwRRCiI7p691A9LoQIbaPXkNdeiUmv5kxq4/F7um4UCN+mJ2BV4Qbru/+YzUtxR0X9fTEj++9lCsf/BEAz896mQ9+/3HUbkv0QBT7nfaEyR7+DB1w9zxLEDTt74xgMsgKhBCGF2i9qB7TAAwUYHpQdXdVnx2uerzuNFovKdxKeV4phGDkuikkNCQfcn3PAWevWxJMDJk1EJNFo2pJPbs/rFA272+7/J5LuOahHwPw4u0L+euCvx/yM3pvjB7JdoCOxJGgdDxlVDWaVMzsODDA7Nkcg+bw80ILamgh9QGmRfFWGSFio5+dRS4BZsTkY7iBlQ3YRenQrQAM3TyetOrsjq+gwfCbB2LPtNGyz8OOF6N35veVD/yIax+5AoCX7lrEW0+9G7XbEj1nMoVfSkJBfQVJbQFm0KNuBVOT1UshhOiUgVYwxYGqM/exY9Q6AAYWDSdnz6BOr5P/g0xSJycS8ATZ8vRuAi3RCSZ+eMeM9rT4S3ct4s0n/haV2xHq2OzhRvw+jw5WHA5gSQgHg/7GABo9aw0SNIcDTHNAXjaFMIwQytqcHTSmARjolVJdFbneU+S+TsZrSK5ly7hvQIPs3fnkFw3vtPI8dWoi+RdnArDjpX007/JEPL+O0tpjjhnNtY+GVy5/c9vzvPH02xHfTiRUp7SNwmq3AeDzqO2w0FPmhHBQ6W8O0sFZVF0SsIRfP8x+A71sCiFEhOSV0mC8NjdbJqwkZA6SWpnNkC3jOj2VJG6AjeE3DQBg34c1VP3PFZW5JToTeOjN+7BYzPzf64t7PbgUkXPE2QHwunUUYJrAEt8aYDYGeh5gmlsDzIAxmiQLIZA9mD0gm4kMJKgF2TJ+FV6Hh7jGREasn9RpsYIpQWPUnflY4s24Njax87WyqM3v7pduJ68wlz1Fe3nihmeidjtCvfikeABaGo586lNvsyaFA8FQMIS/seev6H5ra69PX09DVSGE6P8MtILphh7uwWqjujG66upR7xHS3cUjNtKQWovZb2Hk2imYAuaOU+MmyL8lnbhcOy0VHtY8tRVfIDp77M7/2bmcfsl38Pv83H/pQzTV67s6XBwsMTlc3d5cr6MAMzn88uZvDCjZ8+S3hANMs18CTNFd+tqbHF39LKyQFcyIyQqmQZQPKKE8fxcAw9ZPJK45sdPr5F2dQtIkBwF3gLVPbsPXEJ0XyRGThjHrNzcB8MKcl9m4fHNUbkdET6Iz/HhqrtfPHlZLcvgDpa9ezau5rGAKIUTX9bOPGuJwXCnVFI/cAED+9hGkVXXSjghIPzuR9LMSCQVDrH+2iMZd0VmZSnQm8MhfH8TusPG/f3wp+y77qJSMFADqK+tjO5ED2FLDL2/eWjUfjPy28IECVp9NyXhCiD4giPoqcn11c4saAwWYAVSlKbJ0cHZ4Rw5slO6Oa2brhFWETCHSy3LI3VnYaSP2pCkOcq90ArDvz3WULFfb77KtilzTNB599UEGDh3AnuK93HPl/TSHGiMayyjU399MJaOkZIQfL/VVDUrGU8GW2to6qU5N4ZHP2hpgeqMTYPp1nlBS3e1CLSOloEWvkhR5xPT9iiZ6xG/xsWXSSvw2Hwn1yRRuHN95xfhQG4NuzUAzaVQvbqTyn9ELGH527zWcev4peD1ebr/4bhrq9BOciK5zxNlJTA6nyF16WsFMa13BrFETfPhs4dZcligFmEII0Z8YaAXTWIJakG0TvsGd0ITVbWfE6imYgx0XOdmyLRTenYnZYaJ+dQu7f18TtfmddM6J/GLezwF45PrH2LRK9l32VZl54VXQlqYWmlz62YNpzwivYHqq1Kxgeu3hANPmdSgZTwjRB8gKZsQMFGC6UbVgq7qKXHWKPESInaM2Up9Wg8lvZuTqqZ2+KVqSTQy5JxOr00xzkZddT1e1PwlUp2ULRw3m0T/PA+DN377F3195X+n4ondlDQgHmBV7KmM8k4PZ09UGmL62ANNjVzKe0DPVKXcDvdUq/d0ZJBLrp4z0qDeMPYU7qBywG0LhivGExuQOf94Up1F4Txb2XCveCj/Fj1UQdEdnv1ViWgJz/zmPJGciKz//hl/Nkn6XfV1OQQ4A5bsrYjyTg9mzWwPMyp4HmAGzv/0kH5tHVjCFMAwp8omYYQJMEwE0RZ+GVG9276zopjvKcnexZ+h2AAZvHkNKVWaHK6SaFQrvzCJ+iA2fK8D2h8vx1UXnU6PFaub2v91IwbB8SotK+cWMm6n362vfperV2jgSlI6nRwMK8wDYU7xX6f21E3kgZ443YU0Kv7yFyk09GgvA4wh3UTD7LHIWuRBCdIG8UvYj1ellbBm9GoDc4iFk7cnv+ApmGDwrk8SxDgLNQYoeKcdbFr1qzOt+dyVjvzOKxvpGfn7ejdRW1UbttkTvOTDA1At7dvilzVfnJ+jp+QdCj8MdHtcd1+OxhBB9SBD1mXqDrGBKFXk/4XJWs3HCcjCFSN+Xx8Adwzq+ggkG3ZyB86h4gt4gRY9X0LIzeudI/+CB8zn16pMIBoLc+sPb2b5xR9RuS/SuQSMKACjdXhrjmexnzwunx9371DymPY5w8ZIEmEII0TUxXcGcP38+77zzDps3byYuLo7jjz+exx9/nJEjR7b/jNvt5rbbbuPNN9/E4/Fw1lln8fzzz5Od3Xmz8APFE0RT9LFBdYq8p0U+TQn1rJ+0lKA5QFplNoM3jul4XA0KbsggdVoCQX+I4l9V0rjJfcTxe5o2PuPqU7nkwQsAeO6Gl/n8oy96NN6BmtFP1bJRDRo5CICdm3cC+mjh48gLz8O9V02A6Y4PP84cLfFKxhNCX1RmrvpZYlSqyCMW0xXMzz77jJkzZ7J06VI++eQTfD4fZ555Jk1N+wOaWbNm8c9//pO3336bzz77jL1793LRRRfFcNb60hLXyLopX+G3+kiuS2P0uqMwhTr+s+ZekkL6KYmEAiF2PlNJ/eronR999DlTuPGl6wD4yyPv8PHLi6N2W6L3paQ7SW09xWfXNv2sYDoGthb4qAow41pXMCXAFMJYglG6GEBMA8yPPvqIq666irFjxzJx4kReeeUVSkpKWLlyJQAul4uFCxfy9NNPc9pppzF16lQWLVrEV199xdKlSw87psfjob6+/qBLf+V2NLN26ld47R4SGpIZu/pYzMGOPz2mTIsnZ0YKACUvVuNaHr0VwLEnjebut2djtpj592uf8cd734zabYnYGDY+vBWjdMdu3M1HXgXvbXH54RXMllKvkvHc8eEPvY7m/l+0JYTQl/nz53P00UeTlJREVlYWF1xwAVu2bDnoZ9xuNzNnziQ9PZ3ExERmzJhBeXl5jGYcpqu1bJfLBUBaWhoAK1euxOfzMX369PafGTVqFAUFBSxZsoTjjjvukDHmz5/P3LlzD/l6JkFMij42ZCpe344kRe61uVk75Ss8jhbimhIZ981xWPxWQoSOWJUeX2hj0MwMAMr+UUfVZ9ELvgsnDuL+f96FPc7Gsn+s4NfXvhi12zISvVWlj5gQDjC3rd0e45nsp1nAkRtewWwp6XmAGSLUniKPa9LX77+vUr3NSF+9KA5H70dZ6ioU0Bc3KD8l1dO9H2/L9h599NH4/X7uuecezjzzTDZu3EhCQvg1adasWXzwwQe8/fbbOJ1ObrzxRi666CK+/PJLxZPvOt08qoLBILfeeisnnHAC48aNA6CsrAybzUZKSspBP5udnU1ZWdlhx5kzZw6zZ89u/3d9fT35+Z1UU/cxXpubtVO/wh3fhL0lnvGrpnXaSN2aZmboXTmYbCZcq5rZ8+fondJTMGYgD39yHwnOeNZ9tpHHf/gMAb9BNp0YzIiJwwHYtk4/AaZjoA3NrOFvDOCr6fnjzhPXTMgURAuYpMhHCNHrPvroo4P+/corr5CVlcXKlSs5+eST27O9r7/+OqeddhoAixYtYvTo0SxduvSwi3G9QTcB5syZM1m/fj1ffNGzAhC73Y7d3n9P2mhbuWxJaMTmdjB+1TTsno7f9EwOjWF352BLs9BS6qXo1+XqP5G1GjAil0cW348zM5ltK3bw0Pcfx+uOXnW6iK0xR40GYNNK/Rz1GV8Yfv4371STHm9OaAQgrjkRDU3JmEKIPsKN+j2TrS9N397C19X4RUW2tzfoIsC88cYbef/99/n8888ZOHBg+9dzcnLwer3U1dUdtIpZXl5OTk5Ot24jjiBmnVaR+7s4L6/Vw/opS2hJDAeX41ZOw94Sd0hK3MMBe+FMMOqWfOIH2/HW+dn42C48Ld0L+LpaqZ03NJdH/n0/qTkpbF9dxO1n/pKG+sZDfk51M3MRG454B0PHDgFgw/KNMZ7NfvGF4f2XzcXdzEMdQXNiOAEb35ioZDwh+jeV2wH0vrWgZ76dXX3ggQd48MEHO7yOqmxvb4hpgBkKhbjpppt49913+e9//0thYeFB3586dSpWq5XFixczY8YMALZs2UJJSQnTpk2LxZRjxmtzs37K0tbg0s64ldOIa+l8P9iQa3NIm5pEwBtk8xOlSo7NO5wBw/J4+j+PkjEgneL1u7jjjHtpqD00uBT9x+gpozCbzVTsraRyX1Wsp9MufkjrCmaR6gAzScl4Qog+pBn1cW7rCmZpaSnJyfuPcu7K6qWqbG9viGmAOXPmTF5//XX+/ve/k5SU1B5pO51O4uLicDqdXHvttcyePZu0tDSSk5O56aabmDZtWsyWfGPBa/OwfuoSWhKaWlcuj+tScDngwgxyzkgjFAyx7Td7aNwenXZEeUNyeOaz+WTkpbNzwy5uP/2XuKr6b/W+CJt84kQA1ny5NsYz2U+z7E+RN21TE2A2JYUfywkNyZ38pBBCdF1ycvJBAWZneiPbq1JMA8wXXngBgO985zsHfX3RokVcddVVADzzzDOYTCZmzJhxUKP17ooHLIpS2xbFGzJ8HczLb/GxcfKycHDZEse4VcfhaInvtPI88xQngy7LAqB4URk1X0deZ9lRStseZ+eBd+aQkZfOjnVF3HT6bdRW1kV8W6LvmHRCOMBc/eWaGM9kv/hCOyarhs8VwFvR82WHgNnf3qLo2wGm7McUwgDcqG+M3s1EYl/N9sY8Rd4Zh8PBc889x3PPPdcLM9KXgNnPpklf05RUj9VjY+yqY7t0kkjq1ESGXR8+H3r3e1WUfRy9M7/venE2wycOpaa8hlvPulOCS4MwmUztK5jffKGfADNhRLibQtN2NT05m5LqQQOb24HV13+LB4UQR9CC+hR5NwPMvprt1UWRjzhU0BRg84QVNKTUYfZZGPPNsV1Ki8ePtjFoVjqaWaPiszpK3qiI2hwvvP77nH3Fmfj9Ae794Tyq9lVH7baEvoyeMpKklCQa6hrY/M2Wzq/QSxJHhwPMjo4+7Y7G5LrwuPVOJeMJIUR39Wa2VyXDBJgZBLAqWufu6dnhnY0X1IJsHb8KV3o1Jr+Z0d8cQ3xjUqe36yi0MuiudMw2E5XLa1n/wna6sEgckbHHjmbWr28E4IW7X+Kbz7q+iqWyilwq0nsm0sbtJ5x+AgCr/rsGe1AnvSE1SBqlNsBscNYBkOhKUTKeEPokZ5EfUQvdXnHsVDd/3X012xvToyLFoYJakG3jv6E2swItYGLUmqNIqk/p9Hr2fAuFv8zAHG+idkM96xfsIBSl804zctOZ/848rDYr//7rZ7z+1FvRuSGhW8eceRQAK//9TYxnsl9cvg1LspmAO6ikgjxEiMa2ALMLz0EhhBD79bOPGn1bSAuybdxqarLK0YImRq6dirM2vdPr2XItFN6XiSXZTPM2L2ue2EbQF52lS5vdyvx35pGZl8GO9cU8cvXjUbkdoV/xSfFMPHE8AEv+tTTGs9kvaXx4JbVxo5uQgmSFx9GC1+FGC2qygimEUbWgPlLq3+092xkmwEwghFVRalt1ijxIkKAWZMfYtdRkl6EFNUasmYyzOv2I54q3sWVbKLw/A2uKmZZiL0WPltPQorb/5IFp6Dkv3s+448bgqnFxy/mzqW7UT/9D0TuOPn0KVpuVkq2l7N6xN9bTaZfcGmDWrzt8O67uVn03pIaL4xLqnViChnmpFID6CEAeP8J45FGvA0EtyPZxa6jNLkcLagxfO5mU6sxOr2fLtjD0wSxs6RbcpV52PFxBoClKmy6Bq++6gu9fdS6BQIA7L7mH3UV7onZbQr9OOj+8/3LJv5bFeCb7aVaNpLHh/ZcNRwgwu6s+NVy0llyXpmQ8IUQf5AbMisdU3fZIpyTAjLGgFmDbhNXUZVa0B5epVVmdXs+WY2HYg1lY0yy4d/vYMa+CQEOUNl0C02ecxi2PhYt6nrj5KZYtXh612xL6ZTabOPG8cF+1z97Tz0kSSWMcmOwmvNV+Wkp6fgZ5iBB1rQFmV7apCCGEOJhhAsx4QtgUpbY7S1t3VcDkZ8OEr6nLCBf0DF8zmZSajE5T8PY8C0PvDweXLaVedswrx+/aPyfVldXjjhnLQ689CMDrv36Tvzz/V6Xji75j8imTcKY7qatysfaLdYd8P9Kq9MMxdaMG0TklfLv137R063pH4olrxhvXghbUSK6VFUyhN5LC7zXNyApmhORRFSN+s4/1k5biSq3GFDAxfPWULq2UxA2yMuTebKxOMy0lXnY8dHBwqVrOsGzmvn8XcfEO/vfBF/xq9oKo3ZbQv9Mv+Q4A//v7lwQC0XvcdVfKUeEDCOpXNSsZrza9EoAkVypm2X8phHG5Ud9vRz8vnVElr5wx4LV6WDf5KxqTXZj9FkZ8M4UkV2qn14sfZmPIPVlYEs00F3nY8Uh00+LOrGR++dFs0jJT2bBiE3f+8JcEgwZ5ZohDWKwWTr34ZAA+eWNxjGezX1yhDVumhaAnSP1aNf0v61oDzK7shRZCCHEowwSYGQSw66DRutvezLopS2hJaMTqtTH+m2mYG+ydjpk41sGQO7Mwx5lo3Oxmx/xygi3RK+iJS3Iw54NbyRmaRWlRKT8953qqm9Sc1NOMmlUm0buOOeMonOlOqstqWPmf1bGeTruUY1pXL1e3EPL2/DkRNAVwpYUf66lVEmD2BWbFOceA7vvS6PmtW+V91UE/nxZkBTNCen6U9jtNCfWsm7wEr8ONze1gwqrjiW9OooWOq14TxzkYenc2JptGw9oWip6sIOiJXnBptVu48+83M/SoQuqrGrj2u9dTXSHHQBrdOVedBcCnb/5bVyvZKdPC+y/rvlbzwcWVWk3QHMDmdhDfmKxkTCGEMBrDBJjxhHAoKvLxR/Bp2ZVSzaaJywlY/cQ1JjL2m2Oxe+IIEOiwaChhpJ0hd2ZhsmnULW+iaEEFoU6aqPekyMdkNnHbX25j3Kmjaa5v5r6zHmbntl0Rjyf6h+S0ZE78/vEAvL/ooxjPZr+4wTYceVaC3iCu5WoCzJrMMgDSKrO73TtTCNHPtIDyl4HorQ/pimECzFiqytrL1rGrCZmDJNemMXrNUVj8tk6vlzDczvA5uZgdJlzfNFP0TDmhKGYMTCaNWa/M5Ljzj8br9vLQ959gx6ri6N2g6DPO/NHp2Ow2tn6zje1rd8R6Ou1STwivXrpWthB09/xVO0SImswKIBxgCiGEiIwEmFEUIsSeQTvYNXwzAGkV2YxYPwVzsPOeB/FD7Qz/ZS7meBP161vY8avoBpeapnHT76/n1B+fhN/n57FLnmHdZxujd4OiT7nohu8D8M+F/4rxTA5ghrSTwwFm7RdqWnPVp1bjs3sw+ywkS/9LIYSaukFDMkyAaSGIRdHOWl8X1reDWpCiUeupGFAKQG5JIYO3jkZD67SgJ36IjeH35mCON9GwoYUdj5d1mhY/UHeLaDRNY9aLMznj6lMJ+AM8fNmTfP7PL9u/r7qvptCPrvStnHDyOArHDKalyc1///il0l6XPeGcHIc11YLPFVDWnqgqex8A6RU5mEKqd/YLIYRxGCbA7E1+s48tE1bhSq+CEBRuHUNuaWGXrhs/zM7we3KwJJhp2NjC9sfKolrQ0xZcnnvddwkEAsz/yVN8/rcvO7+iMIwLZp4DwOI//5emev10AEg/PQmAms8aCSkoIg5qQaqzwvsvM8rzej6gEKIfCKC+mt0YndYlwFTMa3OzafJympLqMQXMjFg3mbSqru3lShjZuucy3kTDpugHlyaTiTv+cAtnXXk6gUCAJ65awL/f/Dxqtyf6nuyCTE6aES7uee+5D2I8m/1smRaSJ8cBUL24QcmYdemV+G1erB67HA8phGjlR32AqYP2S73AMAFmqPU/VWMdTnN8I5smf403zo3VY2PU6qNJbHB2erse3CSPjWf4Xa0FPeub2PR4SVSDS7PFzJzXZnPaZacQ8Ad49MdP8Z+/9O3g8oEHHiAQCPDwww93+Tr33nsvZrOZuXPnRnFm6vR2evrSuy7GbDaz8tPVFK3VT8FXxplJaCaN+jUtePZ17cW6s4rwytw9AGSWDcAU6nyftIojKQ/UoPOK9Sadz0/0hMqAxzBhheiEbDJSxJVaxfqjv8Ib58bRFM+4FceT2ODs0nVTpyQyek4BZoeJ2tWNbJof3eDSYrXwwNt3c9plp+Dz+ph3yWN9PrgECAQCPPTQQ9x7771d+vl7772Xhx56iEDAGOmK7sobmss5Pwv3vvzjvDdiPJv9NJtG+mmJAFR9XK9kTJ/VS01mOQBZ+wYqGVMI0R/4o3Tp/+SjhgIVuaUUjV5PyBQisS6FUWuOwurrvA0RgPOEOAbOTMNk0ahZ3sCWBbu7VdATiVtf+AUnXjANr9vLgxfPZ+kHy6N6e72lbeXyoYceOujfh9MWXN53333dWvE0kp89dhUWq4VlH65g7f82xHo67dJPScSSZMZT7sO1quNDCrqqInc3IVOQhPpkEqS5uhBC9JhhAsxA638qtDVGDxFi99Bt7C0sAiCtLIehG8djCpo7bJ7eJv3MRAZck4pm0tj3WRWbXigmpKDQvaOq7x/ddgnfu/ZMAoEAd190P0s+XNbzG+ymaFaldyXIlOCyc2OnjeKUi08gEAjwuzsWdfl68cQrm8NhU9oaZJ4TDgAr/9WAFux52jZEiPK8EgCy9wzq8XhCiP5E9mBGyjABpmoBs58dY9dRmxVOq+UVD2HgjuFdPvkj+xInOReHU+ilH5az9ZWSqHf3P+WCE5n5xM8B+PWs52MSXPaGjoJMCS47p2kaMxdcB8BHf/iUnRv0c5KT8+g4HHlW/I0Bav7dqGTM+pQaWhIbMQXMZJZJ9bgQQqggAWYEPI4Wtk5cRXNSeAWlcNNYMru6b8sMA3+W1r6HrOytOrb+tSSKsw0bc/QoHvzzLzGZTPztufd4+9l3on6bsXS4IFOCy6757tXTGXXMCJrqm/nDfX+M9XQOkn1R+ENZ1ceNyvYp78sPFy9l7huAJWBVMqYQor+QFcxIGSbAVFVF7nJWs2Hi1/htXiweG8PXTiLJldqlsU12jUGzMnBOiScUDFH6cg01ixu73Ri9u3IH5/DkPx/BEe/gyw+W8swtz3br+n210fqBQea9996L3W7v9eBSZcq4NyQ4E/jp/CsBePXB16ktr4vthA6QNNFB/BA7AXeQyn+pKe7x2Fvae1/mdbFXbV8hVd8iNlQGT8YIxPorqSLvohAh9gwsYu3UL/HbvMQ3JDH26+NIcqV26frWVDPD5mbjnBJP0BOk+MlKaharSfF1xJmezDMfPU5adhpbv9nG/ZfOIxBQc6JRX/Dwww/j8Xiw2+14PB5ZuezETx+9gtSsFHZtKuW9374f6+kcJOcHKQBUf9pIoEHNY3hvQTFo4KxJJ74pScmYQoj+pK3RusqLMTqXSIDZBQFTgC1jvmHHqHWETCHSynIYvfwY7J64Ll0/bpCV4Y/mED/Ejq8uwPa55dSvVFP92hF7nJ0n//kog0YWUFZSzu3n3kNzY/RvV0/aVi7bgsyutjAyojHHjeK8688GYMENz+P36Wf1IHlKHAkj7AQ9QSr+7lIypt/io2xAeH/pgF1DlYwphBAiTFLknXA7mtg0YQWNya7wsY/bxpBRMrDLxTzJU+MYdEsGZoeJllIvxY9V4q08+I1bdQq6hSYsFjOP/WUe46eNxVXj4oazbqJkr36KNXrDt/dctv0bOm5hZCRtKXyzxcxtv7sJk8nEJ4v+w7bPdugnva9Bzg9TAKj8sAG/S83qZdmAXQQtAeIbk0ipzlQyphC9Q/WHP8OEAhGQPZiRkkdVB6oz97FlzDcErH4sXhuj100lpTYTD54uXT/r/GRyL0tBM2k0rG1h59OVBJqjXCpOuAr4gYX3ccp5J+FucXPLebdRvHln1G9XTw5X0NOdPplGc8k9F1I4YRCuqnoW3fGnWE/nICnHxxNfaCPQHKTiH2r2XgZMfvYOCrcXG7BzaJc/MAohjEYCzEhJgHkYQS3IzmGb2DNoBwBJdamMWjcVh6drKzqaGfKvTyftlLbTRhrY/UpNr227uGPBbM674nv4/X7uvOQeVn+1tnduWCc6qhaXIPNQhRMG8cN7LwLgdzctor5azdneKmhWyPtReJ9z+d9dBBoVrV4O3IXP5sXRHE9mubQmEkII1QwTYIYIdan5ucfewpbxq2hIqQUgb1chg7aPxhQyHdRg/UjMCSYKb8skaVwcoUCI3YtqqPq/3nvD/uHcC7j45vMBuP/KuXz+/he9dtt60JVWRH05yFSdtrZYzdz6yi+wWC189c4yPn/zyx6Np/KsdA2NzLOTsGVa8Fb7qfqgsUcrjW1nhwdM/vYPj/nFIzCHInsZlFVPIYxAVjAjZZgAsyuqM8vYNmYNAasPs9/C8A0TSa/M7fL1bZkWhs7JwjHQRqAlSPHTFTSscUdxxge7cM45XHx/OLicf+OT/Ov1j3vttvWgO30u+3KQqdLlcy9h6ORC6qsbeP4Xv4/1dA5iSTGRPSPc97LsTRchr5rtJXsKivDZw6uXWWVy7rgQQkSDBJitmhMa2DxxBQCJLicj10/B0dL11Zj4YTaG3JmNNcWMt8rPjsfKcZf4ojXdQ5w760x+9OjFACy481n+8tzbvXbbemE2m7vV57Lt58xmczSnpVtTT57MjLvCH0ie/dnvqCtXU52tSu6PUzHHmWja6qH2czWFcD6rl92DtwMwqGgUppA00hBCdKStTZHqMfs/wwSYnVWRxzUlkrerEEIaBTtGYgqZjvjz3061pxyXQOGNmZhsJpqLPGx/vAxfbdcfQD2tIj/3xu9y5dOXAfCn+//C80/+rkfj9VVz587t9nWMunKZnJrMQ689iMlk4v8W/psl734d6ykdJGlUPGknJxAKhtjzh1plx6iWDt5KwOInoSGZzLIBagZVRBqjCyH6E8MEmF0xaNvo7u2r0iD34lTyfhAuQqhb2UTxggplR9h1xfdv+R7XLbgKgLcefZc3H/pbr9226LvmLrqPvEG57N22j5duWRTr6RxEs2gMvS68NaVmcSMtRV4l47bENbK39VjIwdvHyB5KIUQXyB7MSEmAeYDuvOFoNo3CmZmkTms9U/yfdez5U42ylZauuHD2uVz71BUA/OWRd/jjvW/23o2LPuvHs37Eqeefgtfj5bFLnsHd1LW2W71l4IUZxA8MH0qw7/U6ZeMWjdhAyBQitSqLtOosZeMKIYQ4lGECTFVnkQOQGWDEbQNJHBJH0B+i6KW9VPy3d/evXXzX+Vz12OUAvDHvr/z5gbeidlvRPitdT1RWQUdjvJ6adPwEbnn8RgCevPUZ9q2u1NUc4/PtDLgwA4A9i2oJNKl5ztalV1KTWY4W1BiydaySMYXoOmmM3nfJCmak5FHaTQnj7Yy6JQdrsgWfy8/mX5XSsKV3j1+88tHL+MGcCwH48wNv8ca8v/bq7Yu+KSsvk1/97XGsVgsf/+UT3n7xbwxHP8GWZoZhM/MwWTRqVjTgWqLmg03QFGDnyI0A5JUWEt8sZ44LIbrGRABNcVFOiEAXmib2fYYJMIOt//VE5nlJ5F4ePpmnfnsTa5/ajqe65/vDurpCqGkaNz37cy6YeS4AL96+kLeeerfHty/6jkhXG602K0//7UkyctLZvnYHj1/7dFRWLu04Ir5u7kWpJA6Jw98QoPSlGkyoqe7fXbgNT3wLNreDQUWjZO+l6AKjrTiqvL96v6+it8gjoQtMcRr516eTMi3c5HrvfyrZ8vtdBH29t+HSbDFzx8JbOPOK0wgGgyy44Xnef+mjXrt90bfd9bvZjDtuDPU19dx94f20NPVef9auSBhuJ/eiFABKfl+Fvy6ATUGA2ZRQ334k5LAt47EErD0eUwhhHPEE0RSvN4YI0qh0RH2SALMTjgIrg2dnYM+zEvKH2PNqLVs+3tmrc7BYLdz/1l2ceME0Av4Aj135DItf/2+vzkH0XVfOuZxzrvougUCA+y97mD1Fe2M9pYOY400U3pKFZtao+bKR2iVqel4GtSA7xq4NF/ZUZHfr0AQhhBA9Y5gAM5Iin7RTEhj4szRMNhPeKj87n6mkeZtXedFLR30wNU3jwVd+yYkXTMPT4uHeS+byxftLIh5PGMvpl5zK9Y/+FICnb3qWZf+3PMYzOlTBzzOwZ1nxlPvY9VKlsnH3Dt5BU3I9Fq+VIZvHKU+N6z3V3ow0kReip+IJYVLcHiZISFYwjcrk0Bh4bRppp4RbENV/08KuZ6sINPb+ttybn7qBM390On6fn7svvI+lH+svQBD6NPnkidz36t0AvPnM27zzwt9jPKNDZZ6ZTNq0REL+EEULKgi2qHkhb0xysbswfGLP4C1jsHntSsYVQgjRNRJgfktcoY1Bt2TgyLMSCoYoe8tF+buuXu1v2eYnd13GpbN+AMBDVz0uwaXosmEThvLEPx7G7rDx2bv/49nbX4z1lA6RMMJO/lXpAOx+vYbmHWr6cQbMfraN+4aQKURaeQ4Z5XlKxhVCGE9ClFYwjcAwAWZXUuSZZyeR9+M0TFYtnBL/dSVNW2LThPqcq77LLx67DoBfz3qO/3v905jMw2jiiVc6Xiz6auYMzmbBR0+Q6Exk7efrmf+jZ7AH43plfqYupmUtKWaGzs5Gs2jUftVE1fsNXb5uZ4pHbsCd0IzN7WDIJvWp8WgxUkpbjsXsz1RWpBvjzO7+yjABZmfsA6wM+EkamkWj7usmSl6oJtAUm05VJ33/BO5++XYAXnvsdd5cIH0uRdfkDcnhiU8eJj03jR1rivnl9x/C61Zz1KIqmlVjyO1ZWNMstJR4KXmhStnYFbm7qczbAyEYvn4SVr9N2dhCCOOJI4hZcRW5MbpgEvuPzJ9//jnnnXceeXl5aJrGe++9d9D3Q6EQ999/P7m5ucTFxTF9+nS2bdumfB6ePT72/KmW0oXVFP+qMmbB5eRTJvLQX+7HYjHz/qIPeWHOyzGZh+h7Rh41nGeX/Iq8ITns2b6Xu89+gCaXzgq+NBj0iwwSRtjxNwYo/lUFQY+adFFTkouiUesByC8aTnJdmpJxhRDGFU84Ta7yojZPpl8xDzCbmpqYOHEizz333GG//8QTT/Cb3/yGF198kWXLlpGQkMBZZ52F2929Pn7BLvxX/q86Kj52dfpzLTRF5TJ4Sn77vrl/v/tfHvjZvIjGMZI4EpRe+qpjv3cUT/93PqlZKWxbtZ2bT7yT6n01sZ7WIXIvSSH1hARC/hDFv6rEU3bkdJrWjf/8Fh9bxq8iZA6SWpnFwOLh3bp+JP8ZTTMmZRchRPfoZTGuO2L+TD/77LN5+OGHufDCCw/5XigUYsGCBdx7772cf/75TJgwgddee429e/ce8svt64aMLuT5j39DYnIiX/97BXdfdi+BgOw/EZ2bccv3efgf9xGX4ODrj1Zy6ylzqC2vi/W0DpF1bjI5M1IAKHm5msaNapq9B7UgWyeswhPfgr05nuEbJhkyABRCqKd69bLt0l29tRinkq73YBYXF1NWVsb06dPbv+Z0Ojn22GNZsmQJl1566SHX8Xg8eDz7C3Pq6+t7Za49kTc4jxc/eZbUjBQ2LN/IrAvuwOvR1745oT9mi5mbf3s95/38bAA++P3HLLjheQJ+/X0wSZ+eyIArwinrvW/UUvMfNV3gQoQoGrUeV1o1Jr+ZUWunYvHLaT1CiP7l7LPP5uyzzz7s9769GAfw2muvkZ2dzXvvvXfYWKk36DrALCsrAyA7O/ugr2dnZ7d/79vmz5/P3LlzD/m6B3VRvMo0dFpuKk9/+luyBmSxY0MRv/juLTQ19Gx8o6XJ9Ux1VXrbeElpidz91iwmnj6eYDDIH27/I+898wF2utfvUfX8DrdymHpSAvk/DbcjKn/PRcW79cpWGPcWFFExoBRCMGL9ZBIak5WM2xX+2CeAhBBRFkcQq+KiHF/reN9eALPb7djt3e/ZG8liXG/od6+Qc+bMweVytV9KS0tjPaUjSk5P4qFP7iV/6EBKd+zm+jNuxFXjivW0hM4NmTSYZ1Y8xsTTx9Pc0MJD33+C9575INbTOqzUkxMomJmOZtKo/Kiefa/XKRu7MmcPu0ZsBmDw1jGkVWV3cg0hhNCP/Px8nE5n+2X+/PkRjRPJYlxv0PUKZk5ODgDl5eXk5u4/R7i8vJxJkyYd9jqRfgLobfHJccz7+JcMGptPxZ4Krj/jRir3qWvXIvqn71x+Eje9/HPscTb27Sjj4QueZNd6fX6ISjs1kfyfp6GZNKo+bWDPolplY9emV7B9zBoAcksGk1s6WNnYQgjRJoEQVsWN0X2t45WWlpKcvD/r0hdil+7QdYBZWFhITk4Oixcvbg8o6+vrWbZsGTfccENsJ9cD9ng7D3wwh2FTh1BX4eK602eyp3hvrKfVa/Rcra3XuVmsFmb/6hZ+dPMPAVjxr1U8efmzNNXpcztE+hmJ5P8snBav/LiBPX+o6fZpWEdKo9c7a9gyYSUhU4jMfQMo3Dq2Syl3VY3c2xijk50QIlqSk5MPCjAjFcliXG+IeYDZ2NjI9u3b2/9dXFzM6tWrSUtLo6CggFtvvZWHH36Y4cOHU1hYyH333UdeXh4XXHBB7CbdA1a7lXvfu4OxJ46isbaR+858mJ1bdsV6WkLHBhTm8cRfHmXs0WMAePOhv/H6g28RDOrzuLHsi53kXpICQOUH9ex5Vd3KZb2zhg2TlxE0B0mtymTYxolSMS6EiJp4QtgUr2B6FY+n18W4mAeYK1as4NRTT23/9+zZswG48soreeWVV7jzzjtpamriuuuuo66ujhNPPJGPPvoIh8MRqylHzGwxc/dbs5h8xgRaGt08cPZ8itdIcCmO7LQLv8PcP9xHUkoSrhoXv77yRb5+f2Wsp3V4Ggy4Jo3Ms5IAKPtbHWV/UbenuCG5lo2TvyZoCeCsSWfk2qMwhfrdNnIhhI7EE8SmOF9hiWC8vrgYF/MA8zvf+Q6h0JGjeU3TmDdvHvPmzevR7bhpJqjoXNNmmrt9HZPJxD1/vJ1jv38UnhYP95w7lzXL1kU8nghTXQWtWqQpd3ucnRuf+DkX3xjuD7tuyQbu++E8rKXxStP4qsYyWTWG3ZhHxrQkQsEQexbVUv1xo7LVxXpnLRsnLyNg8ZNck87o1UdjDpqVjB0p1edp6308IUTs9MXFuJgHmEYx68WZnHbpyfi8Ph646FHWfLYu1lMSOjVi8nDm/vmXDB49CIA/PfEmL/7y9wT8AQYyOLaTOwyr08yoOwtIGh5H0B+i5NlqXEvUfWiqTatk88QVBM0BkmvTGLP6aMxBeekSQkRfPCHsilPalgjG663FOJXkVboX3PDUTznnZ2cRCAR4+LIn+fojnaY4RUyZTCZ+fOel/Gze1VisFir3VvHI1U+w7P+Wx3pqRxQ/yM7o/2/vzuOkqA59gf+qqveZ6dlXmBnWGRgRFFkcXOApyrv6Yshm4nPhYZ7xJpjgh5u8C3n3xRi9V43GJWoi+Zhobq6JOyRxiSIqIKIg+y7rDDD72vtWdd4f3fRsDMxSPd09/fvOpz7VXV1dfXpqqvs359Q59a9lMOcZEXSGUPNYK9wH/Rd+4gC1FNThy2k7IWSBrJZ8TNlzGcMlEVESSJlPag+8COnURD4Yt/3bd/CtFYsBAI/e+RQ2vfnpiJeBBi5evcjLKkrx09//BDOuvBgA8NEbG/Hw934FR1tsr0RlxtCbT7LmpmHcD/KhWGX4zgRw9OEGqI36la1hTA2OTdkLSEBeYwkq980c1jmX7AxEo0dI5+2lTBQYNBsELDrXYCo6by9R8a8qhv7H9/477nzgNgDA0z9ajff/88M4l4gSjclsxC0rbsbSn90Bs8UEt9ODx3/4a7zzx/fiXbT+ycCY/5mDopuyAACOPR4cf6IJqluDQYePFAGB2smH0FAe7gBXdLockw5NZ0AkIkoiDJgxctXXqrH8N+HhAf7zF3/Bmqf/HucSUSKRJAmLbl2I7z14J4rLw2OYffaPrXjk7sfRUKtjNaDODJkKJiwvQMY0KwCg4W8dOPPnNt0GhVTlEI5evAcd+U0AgPKjU1B6cjLDJRHFhQHakHp9X2ibqYABMwYmXDwOP33px1AUBX9f/S5evO+leBdpWBK5p3aiDox+1rnKN++muVj6wG2YOH08AKDpVDOeX/VHfPDSx/0+53zbGykZF1sxblk+TDkGqF4NJ3/TjI7Pew70PpzBzH0WDw7P2AFPhgOSKqPywEwUNI4ZbrGTBnt9D52K+I4oQER9MWDqzGgyYNV//QvMVjM+f+cLPPWD38a7SJQgZi+aiaUP3IYpsysAAK4OF/780Gt489d/R8AXiHPp+icpQMktXU3i3lMBHPtVI/x1Qd1eoz23CUem7YZqDMIQMGHK7suQ01mg2/aJiIZCRH703mYqSJmA6YMbKvT5QvSi/8vzLf3F9zBx+ni0NbXj/qX/Abfm1OU1aWToXVtrgw0Xz6/CbQ9+BxddOQUA4HX58Ncn38aaX70Fd4cbhshPPMp3oRpHyxgjyn+YB9uE8DVym99z4Myf2iEC4pzPHWxTtoDA6fFHcWrCl4AEpHdmoXLPTJj91kFtZyD0bmZP9BpHjqtJNHwMmEOXMgFzJFTNnoJbfxK+VvTDd/0K7U36XSKPks/lC+fgnv/3A0y7OnyJR783gLeffQ+vP7IWjpbE/sdDMkooXJyJwsWZkI0SQg4Vtc+1oPMLr26vETD5cOSi3ejMbQEAFJ4uw/jDVZAFmzuJiJIdA6ZOJEnCvzzzI8iyjHf/9D42/W1zvItEcXLVjVfirn9biumXh4ccCvqDeO/5D/Hqv7+JtvrE/6cj/SILSu/KhaXECADo3O5B7e9aEWrXb5iv1vwGHJu6ByFTELIqY8KhaSioL9Vt+0REetAiP3pvMxUwYOrkhiWLUDVnKtwON575yXNxLYveHUHOd0rAUCTipQ6Huz1ZljH/a1fif/3f21Bx6WQAgM/jwwerN+Dvj/0DbXUdw9r+cMvXn+7NxkqGjDF3ZCNnfjoAINgWwukX2tH5uafPukOlyiGcrDiAxrGnAABpDjsm77sUNk/6ectGo4tnGJ3BiCg5MGDqwGgy4p//438DAH5//x/R1pj4tVSkD7PVjBuWLMItK76F0sljAQBupwdv/uav+MvjryGtKTPOJbwwSQHy/ikDRd/MgmKTITSBlvedqP9LBzSvfucKtec04/jUvfBbvYAASmomoOxY5bAGTyciiiWegzl0DJg6uOZb85FXnIum08147ek18S4OjYDs/Cx8Y9lifGPZYmTlhUOko82B159Zi1eeeiN6BZ40JHbAtF9mxZg7smEuDjeHe477cfr5NniO6terPWgM4GTFATQXnwEAmLxWTDowHVntebq9BhERJZaUCZheeBDSqRd5b9+852sAgDXP/Q2hoN6X8Iq/RB5rUu9e1ed7r7IiY8b8i7Hw1gW45parYbaGe1bXHW/AG0+sxbt/WAefx3/B7SQCW7kZ424vRNb0SHN4h4qGv3Sg7WM3IIbePN39eQICzUVncKLiAEKmACCA4lPjUX6sEop64Y+e4YypeS4hnbendzMvm42JEg9rMIcuZQJmrJROHotpl1chGAjir797K97FIZ3JsoxpV1Zhwc1X4upvXoGcwuzoY4e2folXHn0Dm9ZsgaYmx0nblmITSm/OR/4V4ZpVLSDQ/LYDTWsc0Hz6feg5MzpwonI/nFnh00VszgxMOjgdGY7sCzyTiIhGAwbMYTrboePQ9i/R3twR38KQLiRJQlX1FCy4+SrM/9YVyCvJjT7W2erApjc+xXt/XI/9nx6MYykHx5xnxNhv5qFgfhYkJVzL2LK5E81/diHQrF/v8IDJh5pJh9FUfAqQAFlVUHpiMkpqJvBcSyJKOgJC917frMGkAZl4cfhyf8f2nohzSZJHIvYilyQJ0y6vwj99YxGuvLka+aVd5we62l3YsmYbNr36KXav3wc1FA5kA2me17up3AzLoNY35ioouikLedfZIRvCwbLjCzfqXmmHtyYAE0y6NEWrsoqG0pM4M/4YVEP4NJGC+rEYd3RqTAZNH4pUG3icTe7Dwa9GCmMT+dDxKBqmkvHFAID6E/VxLgkNlmJQMHP+DMz/+tW4evEVyC/pCpUehwefrd2GTa9uwc73dyMU1K+WbySYS4wo+moWcq5KjwZLxx4P6l5ph/uIX7fX0SQNTWNqcXr8MQTN4e2md2Zh4pfTYO/M0e11iIgouTBgDtOpI6cBAOOmlse5JDQQJrMRc66fjQVfvwpX3TQP9hx79DFXpwtfvLUTm17dgh3v7UbQH5tOYbGUPsWCghszkTXbBkmOBMt9XjS80Q7nfp9uryMkDc1FZ3B6wtHwsEMAzF4ryo9XoqC+lGNYEtGowBrMoWPAHKYDWw8BAKrmTBnyNvTuCZ3o9Hy/A2mCzi3OwaXXTMe8m+Zi7g2zYE3varJtb+rA5rWfYdObn2Lnh3uQGQx3QjHABANMwy7fSFw7XDZLyJqXhvxFGdFrhgNAx1Y3Gv/qgCdSY6nHtcM1SUNrYR1Ojz8Kb1p4AH6j34yxJyah8EwZTGL4v7Ohli3ZJXqTe6KXj4ZDz9FPGCsojH8Jw3Rg60GEQirKp5ThK9+9AX///TvxLlJKMxgNmHTJBFRVT0FV9RRcVD0FheUFPdZprG3CJ29uwaY3P8W+zQehacnRA7w320QTcq/JQPaVaVCs4fCoBTS0bXSj+W0HfGf0q4FVlRAaS06hrvw4ApZwTaghYMSYk5NQdLocisbrhxPR6MNLRQ4dA+YwdbY68Pufv4i7H/wufvKbe1FzqBZ7Nu+Ld7FSRnZhFi6qnhoOk/OmoOKySdHxKc9SVRXHd5/A5+9ux+a1n+HwF0fiVNrhU2wysq9KQ+61GbCN66ot9NUF0fqhE60fuaA69fvwCph8aBhbg4bSGoSM4cBq9JtRfGocik+NG9B4lkRElHr47aCDF//9vzBp+kRce/MCPPTmL7Dq6z+La8hM9Cb3ofasNltMqLh0MqrmTEHV3KmYdnlVtJNVd45WJw5t+TI6fbn1KHzuro4t5/v9JMq1zbuTFCBzWhryrspE3lw7ZPPZ2kqBjs89aFvvhOtA1/sbTNPyudYVEHBmtqNhbA1aCush5HBgtbjTMKZ2Agrqx0Lup8ZSz2ZtvZvIU60XORENH8/BHDoGTJ08uPQRjJ1UgsqZFVj9ydN476UP8Oy/rkbzmZZ4Fy0pybKMcVPLUDVnajhQzpmCidMnwmDoGWw0TUPNvlM49OmXOBgJlHVHRkGPfgnIqLAi78pM5FXbYbR3Hare2gBa17vQvtEN1a1fbWXQGEBT8Wk0ltTCm+6KLs/oyEZJ7QTkNhWl3HmRREQ0NAyYOvF5fPjRwh/j+w/dhZvuuhGLbl2Ia7/937Djo5346PWN2LBmEwdiP4fcohyMnTSmz1RWWQpbet/xE1vqW7H/84M4uPUQDmw9iPqtLfA6vXEoeWzYys3IuyIT+VfYYc7vagIPdITQusUB5ya/rtcJFxBwZLeiYUwtWgsaorWVsqogr7EERafLePUdIkpZrMEcOgbMIeiv2TPYruHX/7wa7/7uAyx74i5Mv3oa5lw3C3Oum4Uf/2Y5juw4hpP7a1GzvxYn99fi5IFaNNU2YzT/rRlNRhSPK0LJhGKMmVCCkgnFKJtYiuIJRSiZWAxrWv8Dh3ucHhz+4igObf0Sh7Z+iYOfH0bLmdYe6+QiX9dmbf2vbX6B7UmAbYIJmbNsyJxrg3VsV6hUPRo6tnrQ/okLzn0+QAMMMEDB8DvUeGwutBbVoaWoHj6bO7o8zZGJojNlKGgYC4NqHPR29bx+eKpdO5xN+ESJh518ho4BMwaO7DiGe+evRMnEYlz9jXm4+ptXYMrsiujUndflxYkDJ3HiQA1O7D+J4/tP4sSBk2isbYIQiZ08ZVlGelY67DkZyMy1o2R8cTRIjpkYDpMFY/Mhy/1/EauqisaaZtQdrcPpI3WoO1qPM0frcfpIHc4cqUvaHt7nY8hSYJ9hQcYMK+zTrTDYuwKjFhRw7PCg7RM3HDu8EEH9/gZ8Fg9aC+vRUlQHT4YzulwJKchvGIuiM+XIcGbp9npERJS6GDBjqO5YPV7+5Rt4+ZdvoLAsHxWzJmPcRWUoryrDuIvKUFo5BtZ0a+Q8w6k9nutxeXHyQDhwnjxQg+P7T6K1oQ1qMIRQMIRQUI3M+94/eynDgbJl2GDPzoA9JwMZ55r381h6ZvqAtu9xeVF3vA5njtej7ng9mo61oP54Q3g60YhQUM8x2BKPpABpUyywz7Ai4xJrj97fQLim0rHbi85tHjh2eKF69O0FHg6V9XBldnSVSZOQ1ZqPgsaxyG0uYm9wIqJz8EO/C1TEcpuJKGW+VaywwajDwNkA4IFn0M9prG1GY20zNr35aXSZYlBQMrEY4y4q6xM8bf0Ez4EKBUPQVA1qSIUamWvd5pqqwZpuQXpWOhTD8Jpc3Q4PXO0uNNY0oe54A+qONXQFyOMNaG/q6LF+92ZoU+RnqBKy17dBQvokK+xTbciuykBapRmKpasWV2gC3uMBOHZ74dzlg/uoH+j2P8H5OtJcqJONgIAnzYX2vEa05zfBmdmO6FMEkNmWi7zGEuQ0FcEYMsGg80eAnp2AUq3JONGvHa5n+VQdTvMgosSWMgEzEakhFacOn8apw6cHFDwzstOhGBUYjAYYInPFqEBR+n5YG4wGYBCn0AV8ATjanHC1u+Boc8HV7oKz3QVnmzMyD98PP+7smne4B11jOtrIZgkZk22wV9lgn2pDxmQrZFPPL+Nghwrnbi8cu7xw7vHpOlZlyBBER04LOnKb0ZHbHB0I/ayMjmzkNRYjt7EYpkD/57wSEVFPPnigQd/vuAD8F15pFGDATED9Bc/+yLLcT/CUISsyFIMCWZEhK0rkvgxFUeBz++DqcMPR5kTAp1/P5NFOscnIqOgKlOkTrZANPWvHAh0hOA664TsQguugH75TQd06cwkIuDM60Z4XDpROewcgd21cVmXY23OR01yI7JYCmP19e+MTERHFEgPmECTcQOYaAH94UqFBxeDCoiHy059EbIY+Kxf5um0L6NXrWwJMBQZYx5lgLe+azAV9f1eBlhBcB31wHQhP/vrweaVnf6/D6fmtSSpc9k44s9vgzGqHI7MdqrHneatWdzqyWvOR3ZIPe0fugC/dqPe4lqnURJ7oTe6JXj598auMYsMDL0K612CmRoUOj0oihJu5bWUWZI3LgLXcGA6TZaboNb578zcEuwVKPwLN+nVUCilBOLPaw2Eyqw0ueyeE0rNJXQkZkNWWh6zWfGS15sPiS7B/eoiIKKWlTMC0IG1YnUlSmd41mHrWOl5wnMneJMCUZ4ClzNijZtJcaIAk963x0QICvtNB+GoC8J4MwlsTgK8m2OcKOv2N/ziQTjl+ixcuewcc2W1wZLXDk+5A76cZ/SZkdOQguyMP9o5cpLvskMTwO13oMaZmd506bo/jVhJRvPnghoqgrtsMsgaTKEkpgCnXAFOBAeaCyLzICEuJEeYSQ58OOGcF20Pw1gThqwnCezIAb00Q/rog9BoTV5VD8KQ74c5wwpPuCN9Od/Rp7gYAi8eGjI4c2DtyYG/PgcVrgwRJ917fRETUPy88COkeMPXdXqLitxUlJWOmAnOBCfbCtB5B0lRggCnXAEnpvzZICwr468K1kd6aQCRMBhByaLpciUZAwG/1wpPuhC/dFQ2UPqunT80kEB6T0ubK6AqUHdns7U1EREktZQJmDnJhhj5f2t5E6+QTY/p38rnA708GDJkKTDkKjDk9ayJNhQaY8w2QzecPglpAINAUCk/NIQQaQ/CdCcJfF152rh7dMuRBNRkLCARNfnhtbnjSnZHJAXe6E5rh3CeFG/1mpLnsSHPake6yI81lh9WdAXmAzd16N2nr3clHz2bepgRv0k70Jne9y8exK0ezlIkCg+aFB8HBjPk3AHrXiCYq/lVR7EmAIV2B0a7AmGlAWqYNBrsSnjJlGOzhxwyZCgx2GYaMC3+RCU0g2KpGQ6T/bJhsCiHQpCLUoeoyLJAqq/BbvPBbPfBZPT1ue23ufoOkpMmwudKR5sqMBso0lx2moHn4hSIiIkpwDJg0KLJJgmJTYEiTw3ObDCVNgSEtEiDthmiQNNoVGCL3z9WB5nyEJhBsV8MhsjkEf1OwR5gMtoQg1P471wyUJmkIWLzwW73wW7wIWH3wW73wWTzwW70Imi8wIK4AzF4brJ40pDntsLkykOayw+JJgywGVyNKRESJxQuP7ue+hzC6L498VsoETDMsujWR67WdWJEhAxIgGSXIRgmSQYJskiAZei2zyFCs3eY2GQabDNkmQ0nrum1IC9+XbXKfAcUHI+RUEXJoCHVG5g4Voc7I/OzyyGOqSztvDaQEGRLO32SsyipCxgCCpgCCJj8CZh/8Fh/8kZpIn9WDgNl3zvMiu1NCBli8Nli8Nph9tuhtqycNFm/aeZu39f5g0uMc0e707PUNAM0J3Iucvb4TScp89RClLB7l3Yxblg9LqQlQBYQarkUTGsL3tch9deD3IRAOLzIgSQAkCZIcWSYhXKsnRR7rcRuQJKnvcw3oCodGCVKP8Ni1TDZK5+3kMlxCE1A9GlR3ZPKE7/cIio5eYdKpRq+3PdTz/jRJQ9AUQMjkRzASHDVTKBwgjf5IkOy63V/zdW+yKsPstcHss8LqTQsHyEiQNHttMISMup+rSEREic8Hj+4tUarOA7cnKgbMbiwlRqRNGH3nyAlNQIQEtCAgggIiKKAFBTS/Bs0roPo0aH7RLTBqXQHSo0Fzix7LNN/wTm4UEFANIYSMQaiGIELGIEKR+dnlIWMwHCYjQTJkCkA1DL5ZQdIkGANmGIMmGP1mmH2WaC2k2WuF2WeDMWCKBkg2aRMREQ1fygTMC10OEQDqXuiAkiYDCiAp4dpGSZZ63Iccua0AkCPrKJF1ovcBKBIkCdGaTKGJcI2mOLtMQAh0jbGoAUKIbo9H1oneBoQaDobhkIhwaAyEw+PZ0CiCAghKECFEl+nxz5KmqFBlFZqiQrNp4bmsQlO0yFyFqoSgKio0JQTVoEZDY/e5GpkPtUJQ0iQYgiYYAyYYA2aYgmaYAubofWPQHF4eua+ohkHVPuodMPVu0q7T+ZDVs0lb7+3p3Ytc7/eaer2+U+brIglwX4wUD2swh4x/pd14jo7s6PoicpKhkASEJIDIXEjhRBm+DUDSICRASBqELKBJGoQcnjRJdLutQSgCMAhokWVC6rqtRZ4vZA2aHAmGcrewKJ87OApZh+7Y5yCrCgxBIwwhY3je/XbIGA6MkaBoCJpgCpig9GquZo0jERFR4mHA7ObwjO1wZzj026DUKzyiZ5BMxtP6JFWGoimQVRmypoQnVYGsyVBCBiiqAbKqQFENMAQN0cCodAuPxpAZhqABsmA4JCKixOWDR/eWKE2vy8MluJQJmEYYL3gtctUYQtBygWFpRpIAJCFBEjLCuVSGpMmQz841GZKQIWtSr/vd5n3WlyBrSniuhgOiosmQVUM0NCrR0BieG7Sux/To7KJnraPenW8SuVc1kPjNxnpuL9F7fbNJmwaO+yJZeeFmwBwi/tV3U3HgUmiyvudGhANieAKknvcjc1nI4aDUY139w9NQJUo5iIiIKDkkTcB89tln8eijj6KhoQEzZszA008/jTlz5uj6GjZPuq7bGygGOCIiosTjhVf372gxxMvMjUQO0lNSBMxXXnkFK1aswHPPPYe5c+fiySefxKJFi3D48GEUFBQMaBttsMEEa4xLmhhSaYBqvd+r3k3QqbQvAH3fb+Jf+zopPj4TFH93RIOhRw4aaZIQIjZdhHU0d+5czJ49G8888wwAQNM0lJaW4oc//CFWrlx53uc6HA5kZmbibjzNgDlEiRxqGDATa3sMmDQw/N3RQHgAfAudnZ2w2+0j+spns0MsDeZ9DScHxUvCH+WBQADbt2/HqlWrostkWcbChQuxZcuWPuv7/X74/V0ddTo7O8PbgTf2hU0QAZ1DTVDnEKLn9kK6d7TQ971qup8crndTjd7b0/P9MmCOXvzd0UB4AETGiB6FHI6eo9aYzWaYzX0v9jLYHJQoEv4ob2lpgaqqKCws7LG8sLAQhw4d6rP+Qw89hPvvv7/P8hfwf2JWRiIiIoqN1tbWmNcm9mYymVBUVISGhoaYbD89PR2lpaU9lt133334+c9/3mfdweagRJHwAXOwVq1ahRUrVkTvd3R0oLy8HLW1tSP+B0p9ORwOlJaW4tSpUyPe5EE9cV8kDu6LxMF9kTg6OztRVlaGnJycEX9ti8WCEydOIBCIzQVYhBCQpJ4tSOeqvUxmCR8w8/LyoCgKGhsbeyxvbGxEUVFRn/X7q2LOzMzkh0UCsdvt3B8JgvsicXBfJA7ui8Qhy/qeajRQFosFFoslLq/d3WBzUKKIz14bBJPJhMsuuwzr16+PLtM0DevXr0d1dXUcS0ZEREQUW8magxK+BhMAVqxYgSVLlmDWrFmYM2cOnnzySbjdbixdujTeRSMiIiKKqWTMQUkRML/97W+jubkZP/vZz9DQ0IBLLrkE//jHP/qc8HouZrMZ991336g7tyFZcX8kDu6LxMF9kTi4LxIH90WX4eSgeEmKcTCJiIiIKHkk/DmYRERERJRcGDCJiIiISFcMmERERESkKwZMIiIiItJV0gfMjRs34itf+QpKSkogSRLWrl173vU//vhjSJLUZ4rV5aBSyUMPPYTZs2cjIyMDBQUFWLx4MQ4fPnzB57322muYMmUKLBYLLr74YrzzzjsjUNrRbSj74sUXX+xzXCTCIMPJ7re//S2mT58eHbi7uroa77777nmfw2MiNga7L3hMjJyHH34YkiTh3nvvPe96PDaSR9IHTLfbjRkzZuDZZ58d1PMOHz6M+vr66FRQUBCjEqaODRs2YNmyZfjss8+wbt06BINBXH/99XC73f0+59NPP8Utt9yC7373u9i5cycWL16MxYsXY9++fSNY8tFnKPsCCF+9pPtxUVNTM0IlHr3Gjh2Lhx9+GNu3b8cXX3yBa665Bl/96lexf//+c67PYyJ2BrsvAB4TI2Hbtm1YvXo1pk+fft71eGwkGTGKABBr1qw57zofffSRACDa29tHpEyprKmpSQAQGzZs6Hedm2++Wdx44409ls2dO1fcfffdsS5eShnIvnjhhRdEZmbmyBUqhWVnZ4vnn3/+nI/xmBhZ59sXPCZiz+l0ismTJ4t169aJ+fPni+XLl/e7Lo+N5JL0NZhDdckll6C4uBjXXXcdNm/eHO/ijEqdnZ0AgJycnH7X2bJlCxYuXNhj2aJFi7Bly5aYli3VDGRfAIDL5UJ5eTlKS0svWLNDg6eqKl5++WW43e5+L/HGY2JkDGRfADwmYm3ZsmW48cYb+/zNnwuPjeSSFFfy0VNxcTGee+45zJo1C36/H88//zwWLFiAzz//HDNnzox38UYNTdNw77334oorrsC0adP6Xa+hoaHPlQgKCwt5TqyOBrovKisr8Yc//AHTp09HZ2cnHnvsMcybNw/79+/H2LFjR7DEo8/evXtRXV0Nn8+H9PR0rFmzBlVVVedcl8dEbA1mX/CYiK2XX34ZO3bswLZt2wa0Po+N5JJyAbOyshKVlZXR+/PmzcOxY8fwxBNP4E9/+lMcSza6LFu2DPv27cMnn3wS76KkvIHui+rq6h41OfPmzcPUqVOxevVqPPDAA7Eu5qhWWVmJXbt2obOzE6+//jqWLFmCDRs29BtsKHYGsy94TMTOqVOnsHz5cqxbt44dp0aplAuY5zJnzhwGIR3dc889eOutt7Bx48YL/pdfVFSExsbGHssaGxtRVFQUyyKmjMHsi96MRiMuvfRSHD16NEalSx0mkwmTJk0CAFx22WXYtm0bnnrqKaxevbrPujwmYmsw+6I3HhP62b59O5qamnq0HKqqio0bN+KZZ56B3++Hoig9nsNjI7mk7DmY3e3atQvFxcXxLkbSE0LgnnvuwZo1a/Dhhx9i/PjxF3xOdXU11q9f32PZunXrzntOFF3YUPZFb6qqYu/evTw2YkDTNPj9/nM+xmNiZJ1vX/TGY0I/1157Lfbu3Ytdu3ZFp1mzZuHWW2/Frl27+oRLgMdG0ol3L6PhcjqdYufOnWLnzp0CgHj88cfFzp07RU1NjRBCiJUrV4rbb789uv4TTzwh1q5dK44cOSL27t0rli9fLmRZFh988EG83sKo8f3vf19kZmaKjz/+WNTX10cnj8cTXef2228XK1eujN7fvHmzMBgM4rHHHhMHDx4U9913nzAajWLv3r3xeAujxlD2xf333y/ee+89cezYMbF9+3bxne98R1gsFrF///54vIVRY+XKlWLDhg3ixIkTYs+ePWLlypVCkiTx/vvvCyF4TIykwe4LHhMjq3cvch4byS3pA+bZYYd6T0uWLBFCCLFkyRIxf/786PqPPPKImDhxorBYLCInJ0csWLBAfPjhh/Ep/Chzrv0AQLzwwgvRdebPnx/dN2e9+uqroqKiQphMJnHRRReJt99+e2QLPgoNZV/ce++9oqysTJhMJlFYWChuuOEGsWPHjpEv/Chz5513ivLycmEymUR+fr649tpro4FGCB4TI2mw+4LHxMjqHTB5bCQ3SQghRrrWlIiIiIhGL56DSURERES6YsAkIiIiIl0xYBIRERGRrhgwiYiIiEhXDJhEREREpCsGTCIiIiLSFQMmEREREemKAZOIiIiIdMWASURERES6YsAkIiIiIl0xYBJR0qmoqEB1dTW8Xm90mRACl19+OVatWhXHkhEREcCASURJ6JVXXsGOHTuwefPm6LKXXnoJNTU1+OlPfxrHkhEREcCASURJ6NJLL8Ull1yCQ4cOAQA8Hg9WrVqFBx98EBkZGXEuHRERMWASUVKqqKjA4cOHAQC//OUvkZeXh6VLl8a5VEREBACGeBeAiGgoKisrsXHjRpw+fRqPPvoo3n77bcgy/2cmIkoE/DQmoqR0tgZz5cqVuP7667FgwYJ4F4mIiCIkIYSIdyGIiAZr165dmDlzJkwmE/bt24dJkybFu0hERBTBGkwiSkoVFRUAgHvuuYfhkogowTBgElFS8vl8EELgjjvuiHdRiIioFwZMIkpKu3fvhslkwtSpU+NdFCIi6oUBk4iS0u7du1FVVQWj0RjvohARUS/s5ENEREREumINJhERERHpigGTiIiIiHTFgElEREREumLAJCIiIiJdMWASERERka4YMImIiIhIVwyYRERERKQrBkwiIiIi0hUDJhERERHpigGTiIiIiHT1/wGlAFrKzox2WAAAAABJRU5ErkJggg==", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" } ], @@ -788,23 +806,23 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ - "from skyllh.core.analysis_utils import create_trial_data_file" + "from skyllh.core.utils.analysis import create_trial_data_file" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Help on function create_trial_data_file in module skyllh.core.analysis_utils:\n", + "Help on function create_trial_data_file in module skyllh.core.utils.analysis:\n", "\n", "create_trial_data_file(ana, rss, n_trials, mean_n_sig=0, mean_n_sig_null=0, mean_n_bkg_list=None, bkg_kwargs=None, sig_kwargs=None, pathfilename=None, ncpu=None, ppbar=None, tl=None)\n", " Creates and fills a trial data file with `n_trials` generated trials for\n", @@ -815,7 +833,7 @@ " ----------\n", " ana : instance of Analysis\n", " The Analysis instance to use for the trial generation.\n", - " rss : RandomStateService\n", + " rss : instance of RandomStateService\n", " The RandomStateService instance to use for generating random\n", " numbers.\n", " n_trials : int\n", @@ -829,8 +847,7 @@ " MNOISEs with a step size of one.\n", " If a 3-element sequence of floats is given, it specifies the range plus\n", " the step size of the MNOISEs.\n", - " mean_n_sig_null : ndarray of float | float | 2- or 3-element sequence of\n", - " float\n", + " mean_n_sig_null : ndarray of float | float | 2- or 3-element sequence of float\n", " The array of the fixed mean number of signal events (FMNOSEs) for the\n", " null-hypothesis for which to generate trials. If this argument is not a\n", " ndarray, an array of FMNOSEs is generated based on this argument.\n", @@ -893,7 +910,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -903,35 +920,37 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 31, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 10001/10001 [08:52<00:00, 18.78it/s]\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "[==========================================================] 100% ELT 0h:07m:31s\n", "TimeLord: Executed tasks:\n", "[Generating background events for data set 0.] 0.002 sec/iter (10000)\n", "[Generating background events for data set 1.] 0.003 sec/iter (10000)\n", "[Generating background events for data set 2.] 0.003 sec/iter (10000)\n", - "[Generating background events for data set 3.] 0.005 sec/iter (10000)\n", - "[Generating background events for data set 4.] 0.024 sec/iter (10000)\n", - "[Generating pseudo data. ] 0.030 sec/iter (10000)\n", + "[Generating background events for data set 3.] 0.006 sec/iter (10000)\n", + "[Generating background events for data set 4.] 0.019 sec/iter (10000)\n", + "[Generating pseudo data. ] 0.027 sec/iter (10000)\n", "[Initializing trial. ] 0.030 sec/iter (10000)\n", - "[Create fitparams dictionary. ] 1.0e-05 sec/iter (593990)\n", - "[Calc fit param dep data fields. ] 2.9e-06 sec/iter (593990)\n", - "[Get sig prob. ] 1.8e-04 sec/iter (593990)\n", - "[Evaluating bkg log-spline. ] 2.6e-04 sec/iter (593990)\n", - "[Get bkg prob. ] 3.2e-04 sec/iter (593990)\n", - "[Calc PDF ratios. ] 6.2e-05 sec/iter (593990)\n", - "[Calc pdfratio values. ] 8.2e-04 sec/iter (593990)\n", - "[Calc pdfratio value product Ri ] 3.5e-05 sec/iter (593990)\n", - "[Calc logLamds and grads ] 2.9e-04 sec/iter (593990)\n", - "[Evaluate llh-ratio function. ] 0.004 sec/iter (118798)\n", - "[Minimize -llhratio function. ] 0.052 sec/iter (10000)\n", - "[Maximizing LLH ratio function. ] 0.052 sec/iter (10000)\n", - "[Calculating test statistic. ] 3.5e-05 sec/iter (10000)\n" + "[Get sig probability densities and grads. ] 4.4e-06 sec/iter (1950580)\n", + "[Get bkg probability densities and grads. ] 3.3e-06 sec/iter (1950580)\n", + "[Calculate PDF ratios. ] 9.9e-05 sec/iter (1950580)\n", + "[Calc pdfratio value Ri ] 5.4e-04 sec/iter (975290)\n", + "[Calc logLamds and grads ] 2.7e-04 sec/iter (975290)\n", + "[Evaluate llh-ratio function. ] 0.003 sec/iter (195058)\n", + "[Minimize -llhratio function. ] 0.058 sec/iter (10000)\n", + "[Maximizing LLH ratio function. ] 0.058 sec/iter (10000)\n", + "[Calculating test statistic. ] 5.1e-05 sec/iter (10000)\n" ] } ], @@ -958,19 +977,17 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 32, "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3deXxU9fX/8dcxqOACKotEAgJClbAIGI2orUhFoYpgf4hrK7UVxa31Z6vUqoj9tVK16letC1ZxB60tIIpWvwKuNChIlUWWAmLYZBFxQ7bz+2Myt8MwM5kkM7kzyfv5eOSRzJ25956EkDOf9Zi7IyIiArBH2AGIiEjuUFIQEZGAkoKIiASUFEREJKCkICIigQZhB1ATzZo187Zt24YdhohIXpk1a9Z6d2+e6Lm8Tgpt27bl/fffDzsMEZG8YmafJHtO3UciIhLIy6RgZgPMbMwXX3wRdigiInVKXiYFd5/s7sOaNGkSdigiInVKXo8piNSWbdu2UV5ezpYtW8IORSRtDRs2pKioiD333DPtc5QURNJQXl7O/vvvT9u2bTGzsMMRqZS7s2HDBsrLy2nXrl3a5+Vl95FIbduyZQtNmzZVQpC8YWY0bdq0yq1bJQWRNCkhSL6pzu9sXiaFms4+GjV5Hmc/NINnylZkODIRkfyWl0khE7OPypZtZNKclRmMSiS7li9fTpcuXWp0jenTp3P66adnKKLMGjp0KM8//3xGrrVw4UIWLlyYkWvVN/VyoHnkgM7MX7U57DBE8oq74+7ssUdevpeUNOlfVySPbN++nQsvvJBu3boxePBgvvnmG2655RaOPvpounTpwrBhw4hWU1yyZAknn3wyRx55JD179uQ///nPLtd677336NGjB0uXLmXdunX07duXnj17cskll3DooYeyfv16li9fTqdOnbjsssvo2bMnn376KePGjaNr16506dKF6667LrjefvvtF3z9/PPPM3ToUCDSArjqqqs47rjjaN++fdAacHeuuOIKiouLOe200/jss8+y/NOTdNTLloJITYyaPC/jLc3iQxozckDnSl+3cOFCHnnkEY4//nguuugi7r//fq644gpuuukmAH7yk5/w4osvMmDAAM4//3xGjBjBmWeeyZYtW9i5cyeffvopAO+++y5XXnklkyZNok2bNlxxxRX06dOH3/72t7zyyiuMGTNml3uOHTuW+++/n1WrVnHdddcxa9YsDjzwQE455RQmTpzIoEGDUsa9evVq3n77bT7++GPOOOMMBg8ezIQJE1i4cCEfffQRa9eupbi4mIsuuqgGP0XJBLUURPJI69atOf744wG44IILePvtt5k2bRqlpaV07dqVqVOnMm/ePL788ktWrlzJmWeeCUQWMe2zzz4ALFiwgGHDhjF58mTatGkDwNtvv80555wDQL9+/TjwwAODex566KEce+yxQKR10bt3b5o3b06DBg04//zzefPNNyuNe9CgQeyxxx4UFxezdu1aAN58803OPfdcCgoKOOSQQ+jTp0+GfkpSE2opiFRROu/osyV+iqGZcdlll/H+++/TunVrbr75ZrZs2RJ0ISVSWFjIli1b+OCDDzjkkEMAUr5+3333Db5O9brY2OLnxu+9994Jr6FpvrknL1sK2hBP6qsVK1YwY8YMAMaNG8cJJ5wAQLNmzfjqq6+C/vrGjRtTVFTExIkTAfjuu+/45ptvADjggAN46aWXuP7665k+fToAJ5xwAs899xwAr776Kp9//nnC+5eWlvLGG2+wfv16duzYwbhx4zjxxBMBOPjgg1mwYAE7d+5kwoQJlX4vP/jBDxg/fjw7duxg9erVTJs2rZo/FcmkvEwK2hBP6qtOnTrx+OOP061bNzZu3Mjw4cO5+OKL6dq1K4MGDeLoo48OXvvkk09yzz330K1bN4477jjWrFkTPHfwwQczefJkLr/8csrKyhg5ciSvvvoqPXv25OWXX6awsJD9999/t/sXFhZy6623ctJJJwUD2AMHDgRg9OjRnH766fTp04fCwsJKv5czzzyTjh070rVrV4YPHx4kFwmXpWoO5rqSkhKvbpGdsx+KvNt69pJemQxJ6qgFCxbQqVOnsMPImu+++46CggIaNGjAjBkzGD58OHPmzAk7rGqLrlE4/PDDQ44kfIl+d81slruXJHq9xhREhBUrVjBkyBB27tzJXnvtxcMPPxx2SBISJQURoWPHjnzwwQdhhyE5IC/HFEREJDuUFEREJKCkICIiASUFEREJ5GVS0OI1EZHsyMukoMVrUt9s2LCB7t270717d1q2bEmrVq2Cx6NGjaJz585069aN7t27U1ZWFpw3ePBgli5dSmlpKd27d6dNmzY0b948OPejjz7isMMOY/HixQBs27aNrl27Btf4wx/+kPTaURs3bqRv37507NiRvn37Bquhly9fTqNGjYJ7XXrppcE5s2bNomvXrnTo0IGrrrpql60vnnvuOYqLi+ncuTPnnXde2j+jZcuWUVpaSseOHbn66qvZunUrEKkh0aRJkyCOW265JThn06ZNDB48mCOOOIJOnToFq8V//etfM3Xq1LTv/fHHH9OrVy/23ntv7rjjjuD4li1bOOaYYzjyyCPp3LkzI0eOrNL5UTt27KBHjx671MK4+eabd/k9mDJlStrxphTdIz0fP4466iivriEPvutDHny32udL/TJ//vywQwiMHDnSb7/9dnd3f/fdd/3YY4/1LVu2uLv7unXrfOXKle7uPnfuXB80aNAu544dO9Yvv/zyXY49++yz3rdvX3d3/+Mf/+jDhg2r9NqxfvOb3/itt97q7u633nqrX3vtte7uvmzZMu/cuXPC7+Hoo4/2d99913fu3On9+vXzKVOmuLv7okWLvHv37r5x40Z3d1+7du1u544dO9ZHjhy52/GzzjrLx40b5+7uZ599dvCaadOm+WmnnZYwjp/+9Kf+8MMPu7v7d999559//rm7uy9fvjz4maRj7dq1PnPmTL/++uuDfxt39507d/qXX37p7u5bt271Y445xmfMmJH2+VF//vOf/dxzz93l+4j9PUgl0e8u8L4n+buqdQoi1dC7d++MXi+6B1FVrV69mmbNmgUbzjVr1ix47umnnw62oEhlyJAhPProo9x22208+OCDwXqFVNeONWnSpCD+Cy+8kN69e/OnP/0pZcybN2+mV6/IbgI//elPmThxIv379+fhhx/m8ssvD3ZpbdGiRaXxQ+TN7dSpU3nmmWeAyK6s9913HzfffHPSczZv3sybb77JY489BsBee+3FXnvtBUR2ht2wYQNr1qyhZcuWld6/RYsWtGjRgpdeemmX42YW1JnYtm0b27ZtS7gJYLLzAcrLy3nppZf43e9+x5133llpLDWVl91HIhJxyimn8Omnn/K9732Pyy67jDfeeCN47p133uGoo45K6zp333031113HTfccAMHHXRQpdeOtXbt2mCvo8LCwl2K5SxbtowePXpw4okn8tZbbwGwcuVKioqKgtcUFRWxcmWkNO6iRYtYtGgRxx9/PMceeyyvvPJKWvFv2LCBAw44gAYNIu9zW7ZsuUscM2bM4Mgjj6R///7MmzcPgKVLl9K8eXN+9rOf0aNHD37xi1/w9ddfB+f07NmTd955B4Crr7466KaJ/Rg9enSlse3YsYPu3bvTokUL+vbtS2lpaVrfU9SvfvUrbrvttoQV7+677z66devGRRddlHQTw6pSS0GkGqr7zj7T9ttvP2bNmsVbb73FtGnTOPvssxk9ejRDhw5l9erVNG/ePK3rvPLKKxQWFjJ37ty0rp2OwsJCVqxYQdOmTZk1axaDBg1i3rx5Cbffjr573r59O4sXL2b69OmUl5fz/e9/n7lz57Jjxw5++MMfApExjK1btwY7wD755JMp38337NmTTz75hP32248pU6YwaNAgFi9ezPbt25k9ezb33nsvpaWl/PKXv2T06NH8/ve/ByLv3letWgXAXXfdldb3nEhBQQFz5sxh06ZNnHnmmcydOzftWtsvvvgiLVq04Kijjtrtd2748OHceOONmBk33ngj11xzDY8++mi144xSS0EkzxUUFNC7d29GjRrFfffdx9///ncAGjVqtFtdg0RWrVrFPffcw8yZM5kyZQoffvhhpdeOdfDBB7N69Wog0jUU7fLZe++9adq0KQBHHXUUhx12GIsWLaKoqIjy8vLg/PLy8qCuQ1FREQMHDmTPPfekXbt2HH744SxevJimTZsyZ84c5syZwy233MKll14aPO7atSvNmjVj06ZNbN++HYA1a9YEcTRu3DjowvnRj37Etm3bWL9+PUVFRRQVFQXv3AcPHszs2bODuLZs2UKjRo2AmrUUog444AB69+6ddusHIq29F154gbZt23LOOecwdepULrjgguDnXlBQwB577MHFF1/MzJkz075uKkoKInls4cKFwcwhgDlz5nDooYcCkW22lyxZUuk1rr76aq6//nqKioq48847ufzyy3H3lNeOdcYZZ/D4448D8PjjjwfjGOvWrWPHjh1ApKtm8eLFtG/fPtiW+1//+hfuzhNPPBGcM2jQoKCuwvr161m0aBHt27ev9HswM0466aSgnsTEiRODlsWaNWuC1snMmTPZuXMnTZs2pWXLlrRu3TrYUfX111+nuLg4uOaiRYuCd/R33XVXkIRiP0aMGJEyrnXr1rFp0yYAvv32W/73f/+XI444otLvJ+rWW2+lvLyc5cuXM378ePr06cNTTz0FECRigAkTJqTd+qiMuo9E8thXX33FlVdeyaZNm2jQoAEdOnQI6iufdtppTJ8+nZNPPjnp+a+99horVqzg5z//OQADBgzg4Ycf5oknnqBLly5Jrx1rxIgRDBkyhEceeYQ2bdrwt7/9DYiU27zpppto0KABBQUFPPjgg8F4xQMPPMDQoUP59ttv6d+/P/379wfg1FNP5dVXX6W4uJiCggJuv/32oLVRmT/96U+cc8453HDDDXTo0IHBgwcD8Pzzz/PAAw/QoEEDGjVqxPjx44PuqnvvvZfzzz+frVu30r59e8aOHQtEBoWXLFlCSUnC3aV3s2bNGkpKSti8eTN77LEHd999N/Pnz2f16tVceOGF7Nixg507dzJkyJBgWumDDz4IwKWXXpr0/MaNGye957XXXsucOXMwM9q2bctDDz2UVqyVUT0F1VOQNORjPYVvv/2Wk046iXfeeYeCgoKww6lVNa2nMGHCBGbPnh2ML+SzqtZTUPeRSB3VqFEjRo0aFczskfRt376da665JuwwQqHuI5E0uXveFZo/9dRTww4hL5111llhh5AR1ekJUktBJA0NGzZkw4YN1fpPJhIGd2fDhg00bNiwSueppSCShug0ynXr1oUdiqRhzZo1AOzcuTPkSMLVsGHDXRYKpiNnkoKZdQJ+CTQDXnf3B0IOSSQQnTcv+WH48OFA7iwyzCdZ7T4ys0fN7DMzmxt3vJ+ZLTSzJWY2AsDdF7j7pcAQIL15YCIiklHZHlN4DOgXe8DMCoC/AP2BYuBcMyuueO4M4G3g9SzHJSIiCWQ1Kbj7m8DGuMPHAEvcfam7bwXGAwMrXv+Cux8HnJ/smmY2zMzeN7P31b8rIpJZYYwptAI+jXlcDpSaWW/gx8DeQNJqEe4+BhgDkcVr2QtTRKT+CSMpJJro7e4+HZie1gXMBgADOnTokMGwREQkjHUK5UDrmMdFwKqqXMBVjlNEJCvCSArvAR3NrJ2Z7QWcA7wQQhwiIhIn21NSxwEzgMPNrNzMfu7u24ErgH8CC4Dn3H1eFa87wMzGfPHFF5kPWkSkHsvqmIK7n5vk+BRSDCancd3JwOSSkpKLq3sNERHZnfY+EhGRgJKCiIgE8jIpaExBRCQ78jIpaEqqiEh25GVSEBGR7MjLpKDuIxGR7MjLpKDuIxGR7MjLpCAiItmhpCAiIgElBRERCeRlUsjUQPP81Zs5+6EZPFO2IkORiYjkt7xMCpkYaB7YvRXFhY2Zv3ozk+aszGB0IiL5Ky+TQiacV9qGZy/pRXFh47BDERHJGfU2KYiIyO6UFEREJJCXSUErmkVEsiMvk4JWNIuIZEdeJoVM09RUEZGIrJbjzAcDu7cCIokBIrOSRETqq3rfUoidmqoWg4jUd/W+pRClFoOIiFoKAS1mExHJ06SgKakiItmRl91H7j4ZmFxSUnJxNq4fHVuIGti9lbqTRKReyMukkE3RsYUojTGISH2ipBDnvNI2uySAsx+aEbQc1GIQkbpOSaESmpUkIvVJpQPNZna8mb1mZovMbKmZLTOzpbURXC7QrCQRqU/SaSk8AlwNzAJ2ZDec3KZuJBGp69JJCl+4+8tZjyTHqRtJROqDdNYpTDOz282sl5n1jH5kPbIcE9uNVLZso7bCEJE6KZ2WQmnF55KYYw70yXw4uW9g91aULdvI9RM+YtKclepKEpE6pdKk4O4n1UYgVWFmA4ABHTp0qPV7RxPApDkr1ZUkInWOuXviJ8wucPenzOz/Jnre3e/MamRpKCkp8ffffz+0+0fXMBQXNlaLQSSH9O7dG4Dp06eHGkeuMrNZ7l6S6LlULYV9Kz7vn/mQ6obo4HPZso2ULduo7iQRyXtJk4K7P1TxeVTthZNfoqufnylboe4kEakTKh1TMLP2wP8AxxIZYJ4BXO3u9WYBW2WiySG6iV40ScRSC0JE8kE6U1KfAZ4DCoFDgL8B47IZVL6LbTVAZG1DfJIQEclF6SQFc/cn3X17xcdTRFoMkkJxYWOevaSXtsgQkbyStPvIzA6q+HKamY0AxhNJBmcDL9VCbCIiUstSjSnMIpIErOLxJTHPOfD7bAWVz6LdRmodiEg+SjX7qF1tBlIXxBboiS/WIyKSD1RPIYPiC/TESrTDanSWkmYmiUiuUFKoBfE7rEJkhlLZso3BYyUFEckFKZOCmRlQ5O6f1lI8dVLsOob5qzcHyaC03UG7JAYRkbClTAru7mY2ETiqNoIxs0HAaUAL4C/u/mpt3Le2xI85xC54ExHJBel0H/3LzI529/eqcwMzexQ4HfjM3bvEHO9HZKV0AfBXdx/t7hOBiWZ2IHAHUKeSQrIxh+h4Q5TGGEQkLOksXjuJSGL4j5l9aGYfmdmHVbjHY0C/2ANmVgD8BegPFAPnmllxzEtuqHi+zhvYvdUu01e1+llEwpROS6F/TW7g7m+aWdu4w8cAS6L7J5nZeGCgmS0ARgMvu/vsRNczs2HAMIA2bfL/3XR86yE67qBa0CIShnSK7HxiZicAHd19rJk1B/ar4X1bAbGD1+VEKrxdCZwMNDGzDu7+YIJ4xgBjIFJPoYZx5JxkM5Vin4t9rKQhIpmUzi6pI4mU4jwcGAvsCTwFHF+D+1qCY+7u9wD31OC6eS/ZTKX9G/73nyo2YSgpiEgmpdN9dCbQA5gN4O6rzKymhXfKgdYxj4uAVemeHGY5ztoSP1MpdufV2DGI2G261XIQkZpKJylsrZia6gBmtm9lJ6ThPaCjmbUDVgLnAOele7K7TwYml5SUXJyBWHJSqtXR0SQBu27THa3+Fn2NEoSIVFU6SeE5M3sIOMDMLgYuAh5O9wZmNg7oDTQzs3JgpLs/YmZXAP8kMiX1UXefV+Xo65H4JJGo5VC2bKO6lkSkRtIZaL7DzPoCm4HvATe5+2vp3sDdz01yfAowJd3rxKoP3UeVSdS9BNqdVURqJt29jz4CGhHZMvuj7IWTnvrQfVSZZN1LsQlCRKSqKl28Zma/AGYCPwYGE1nIdlG2A5OqOa+0Dc9e0ktdRiJSI+m0FH4D9HD3DQBm1hR4F3g0m4Glou4jEZHsSCcplANfxjz+kl0XntU6dR+lR9NVRaSq0kkKK4EyM5tEZExhIDDTzP4vgLvfmcX4pAZiZyiBZiOJSOXSSQr/qfiImlTxuaYL2KQWJFvoFqUWhIjESmdK6qjaCKQqNKZQdfHFfaLHQC0IEfmvvCzHqTGFysUubEtU3AdQgR8R2U1eJgVJLVESSNUaULeSiEQpKdRBlSWBWOpWEpFY6WydfRvw/4BvgVeAI4FfuftTWY4tVUwaU8gAdSuJSLx0WgqnuPu1ZnYmkTULZwHTiNRUCIXGFDIjVYsitm50oq4krYEQqZvSSQp7Vnz+ETDO3TeaJaqRI3VFbAsiWVeS1kCI1E3pJIXJZvYxke6jyyrKcW7JblgSptgWRKquJO3IKlL3VLohnruPAHoBJe6+DfiayKpmqSeiXUnPlK0IOxQRybKkLQUz+3GCY7EP/5GNgNKhgebaE+1KKlu2MajsFtu9JCJ1S6ruowEpnnNCTAoaaK490a6k6MBy7DiCiNQ9SZOCu/+sNgOR3BZNDsnGGDQbSaRuSGvxmpmdBnQGGkaPufst2QpK8o9mI4nUDeksXnsQ2Ac4CfgrkeprM7Mcl+Sw2H2VEj0WkfyVTkvhOHfvZmYfuvsoM/szIY4nSLjiV0HHP5eJ+tDqihIJTzpJ4duKz9+Y2SHABqBd9kKSXJZoFXTs40wkBXVFiYQnnaTwopkdANwOzCYy8+ivWY2qEpqSWvepK0okHOksXvu9u29y978DhwJHuPuN2Q8tZUyT3X1YkyZNwgxDRKTOSbV4rY+7T022iM3dNa4gaYuOE2iMQCS3peo+OhGYSuJFbKEuXpPcFrvDKvx3ADpat0FJQSR3pVq8NtLM9gBedvfnajEmyWPxM5Ki22Ps31D1nETyQcr/qe6+08yuAJQUJC3xs5Nip5eWLdsYtCLUjSSSm9J5+/aamf0aeJbIDqkAuPvGrEUldUZskogmiPjN9ZQcRHJHOknhoorPl8ccc6B95sORuizZ5npKCiK5I52k0MnddymqY2YNk71YpDLJNteLTRZapyASjkrXKQDvpnlMpEZiE0Kimg3PlK3g7IdmqOCPSBalWqfQEmgFNDKzHkC0wk5jIhvkhUYrmuuu4sLGPHtJL2D3LTO0/YVI9qXqPjoVGAoUAX/mv0lhM3B9dsNKTUV26i91K4lkV6p1Co8Dj5vZ/6nY4kJEROq4tAaao1+Y2d7u/l0W4xHJCG2/LVI9SQeazexaM+tFpKhOVOJajCI5Jjr+MH/15oxs5y1SX6RqKSwEzgLam9lbwAKgqZkd7u4LayU6kRrQ+INI1aVKCp8TGVDuXfHRicjg84iKxHBc1qOTeqE66xPKlm3kmbIVu62Wht33XxKR9KVap9APeAk4DLgTOAb42t1/poQgmVTZ+oR40dfEdgupu0gkM1LNProewMz+DTwF9ACam9nbwOfunmhLbZFqiV2fECt2A72o80rbJPzDn6yVkWgTPg1EiySWzuyjf7r7e8B7Zjbc3U8ws2bZDkzqh+hitER/0KOJIHb77aqOE0SvEb/PkhbCiSRWaVJw92tjHg6tOLY+WwFJ/RH77j9Rt1H8BnrJXpdKsn2WQAPRIolUqfKJu/87W4FI/RNfe6Gmr6tMoq4oEdmVymFJvRDfjSQiiaWzS2qtMLP2ZvaImT0fdiySH6Lv/NPZMfW80jY8e0mvhF1G0emtldEurVIfZDUpmNmjZvaZmc2NO97PzBaa2RIzGwHg7kvd/efZjEfqjoHdW1Fc2LjGU1ATTW9NRtNepT7IdkvhMSLrHQJmVgD8BegPFAPnmllxluOQOibVO/+qXqe03UFpv764sLEGqKVOy+qYgru/aWZt4w4fAyxx96UAZjYeGAjMz2YsUn+kmuaajmyuYdD6CMl1YQw0twI+jXlcDpSaWVPgD0APM/utu9+a6GQzGwYMA2jTRv+hZFeVTXNNRzbXMGh9hOS6MJKCJTjm7r4BuLSyk919DDAGoKSkxDMcm+S56k5fjQ5aR7/OZheRup8kl4Ux+6gcaB3zuAhYVZULmNkAMxvzxRdfZDQwqZ+ig9ZR6e7BJFIXhdFSeA/oaGbtgJXAOcB5VbmAynFKJiVrXWiGkdRHWU0KZjaOyLbbzcysHBjp7o+Y2RXAP4EC4FF3n5fNOKRuq+nAcnUl2n5j0pyVGkCWvJbt2UfnJjk+BZhS3eua2QBgQIcOHap7CakjMjGwXF3xg8YQWQgHGkCW/JWX21yo+0iiMrUvUnVp0FjqmrxMCiK1qaqV4RLVb8hkHKA1DpI9ObP3UVVo9pHUpqpWhsvE9hup4tA2G5JNeZkU3H2yuw9r0qRJ2KFIPRGtDFfZu/PSdgdlZPuNVHGoy0qyKS+TgoiIZIfGFERSyNR012TjErHjBFEaL5Aw5WVLQWMKUhuiK50zscI52bhE/LRWjRdI2PKypaApqVIbMj3dNTouAbuulo49nqiWtEhtysuWgoiIZIeSgkgWRUt41qQ2dKJrVKUUqUhV5GX3kba5kHxR1TUOVblGNEloUFoyKS+TgsYUJJ/Ejhlk6hrnlbbR+INkhbqPREQkoKQgIiIBJQUREQnk5ZiCSE1kYpVyWIV9Ekm2e2qq1dKV7biqHVnrr7xMCpp9JNWViaI88dcIewVy/Kro6B/w+G01YmcrJTunsmtK3ZeXSUGzj6S6MrFKOf4aYScFSN5iSbVaurJWTi60gqT2aUxBREQCSgoiIhJQUhARkYCSgoiIBPJyoFkkH2Ri2uozZSsoW7aR0nYHZSSmsmUbg831khX7STUrK/Z132zdwT57FQTn1OYMpcqm4X7Zohv7f/ZhrcVTl+RlUtCUVMl1mZj6CqT1h7qqYjfXi14/fgpqqniir/tyy3b2b/jfPyG1mRRSTcMtW7aRvZt1UlKoprxMCpqSKrkukwV6StsdVGvFftI9HyKtjjCnrWrKbHZoTEFERAJKCiIiElBSEBGRgJKCiIgElBRERCSgpCAiIoG8nJIqkmuqs1At2TnzV2/m7IdmpLU2If4aVTm3KnFFF4XFL3pLR3VqM6ieQ3jyMilo8ZrkkuosVEt2TvTrdBaSJbtGsnOjr0kVY7JaEbEJoao1JKpTm0H1HMKTl0lBi9ckl1RnoVqyc6LH42sfpHuNVOemE2eqWhHJFr2lozoLzbQ4LRwaUxARkYCSgoiIBJQUREQkoKQgIiIBJQUREQkoKYiISEBJQUREAkoKIiISUFIQEZGAkoKIiASUFEREJKCkICIigZzZEM/M9gXuB7YC09396ZBDEhGpd8oxuBoAAAdaSURBVLLaUjCzR83sMzObG3e8n5ktNLMlZjai4vCPgefd/WLgjGzGJSIiiWW7pfAYcB/wRPSAmRUAfwH6AuXAe2b2AlAEfFTxsh1Zjksk51WncE91zq3JfWKvEbtld7QuQ6LCPPEFdKKvi79eosJBibb+TlRYaOs+LVhdfDbPlK0IzklWuCf2eGz88c+nKvaT7jXij1f2XKp7FR/SmJEDOqd8bXVkNSm4+5tm1jbu8DHAEndfCmBm44GBRBJEETCHFC0YMxsGDANo00aFN6Ruqk7hnuqcW937pCrYE1scJ1FhnvgCOrGvi4qeE3/N+D+YyYoSHdPxEOav3o9Jc1YG5yQr3BOfuOLvNWnOSsqWbUx4/6h0rpGsaFBVCwpF4yk+JDv1JsIYU2gFfBrzuBwoBe4B7jOz04DJyU529zHAGICSkhLPYpwioalO4Z7qnFvd+8SfF/t1bIshWWGe+FZJ7OsS3auywkGxz5e2O4hnL+mV8JxkraHY+6dT4Kg610jVEqtqK6203UFZaSVAOEnBEhxzd/8a+FlaF1A5ThGRrAhjSmo50DrmcRGwqioXcPfJ7j6sSZMmGQ1MRKS+CyMpvAd0NLN2ZrYXcA7wQghxiIhInGxPSR0HzAAON7NyM/u5u28HrgD+CSwAnnP3eVW87gAzG/PFF19kPmgRkXos27OPzk1yfAowpQbXnQxMLikpubi61xARkd1pmwsREQnkZVJQ95GISHbkZVLQ7CMRkeww9/xd/2Vm64BPqnFqM2B9hsPJBMVVNYqr6nI1NsVVNTWN61B3b57oibxOCtVlZu+7e0nYccRTXFWjuKouV2NTXFWTzbjysvtIRESyQ0lBREQC9TUpjAk7gCQUV9UorqrL1dgUV9VkLa56OaYgIiKJ1deWgoiIJKCkICIigXqXFJLUhw6VmbU2s2lmtsDM5pnZL8OOKZaZFZjZB2b2YtixRJnZAWb2vJl9XPFzS1yhpZaZ2dUV/4ZzzWycmTUMKY7d6qOb2UFm9pqZLa74fGCOxHV7xb/jh2Y2wcwOyIW4Yp77tZm5mTXLlbjM7MqKv2PzzOy2TN6zXiWFmPrQ/YFi4FwzKw43KgC2A9e4eyfgWODyHIkr6pdEdrTNJf8DvOLuRwBHkgPxmVkr4CqgxN27AAVEtoYPw2NAv7hjI4DX3b0j8HrF49r2GLvH9RrQxd27AYuA39Z2UCSOCzNrTaSe/IraDqjCY8TFZWYnESlh3M3dOwN3ZPKG9SopEFMf2t23AtH60KFy99XuPrvi6y+J/IGrWmHeLDGzIuA04K9hxxJlZo2BHwCPALj7VnffFG5UgQZAIzNrAOxDFQtIZYq7vwlsjDs8EHi84uvHgUG1GhSJ43L3Vyu21Af4F5HCW6HHVeEu4FoglBk5SeIaDox29+8qXvNZJu9Z35JCovrQOfHHN8rM2gI9gLJwIwncTeQ/xc6wA4nRHlgHjK3o1vqrme0bdlDuvpLIu7YVwGrgC3d/NdyodnGwu6+GyBsRoEXI8SRyEfBy2EEAmNkZwEp3/3fYscT5HvB9MyszszfM7OhMXry+JYWE9aFrPYokzGw/4O/Ar9x9cw7EczrwmbvPCjuWOA2AnsAD7t4D+JpwukJ2UdFHPxBoBxwC7GtmF4QbVf4ws98R6Up9Ogdi2Qf4HXBT2LEk0AA4kEhX82+A58ws0d+2aqlvSaHG9aGzxcz2JJIQnnb3f4QdT4XjgTPMbDmRrrY+ZvZUuCEBkX/HcnePtqaeJ5IkwnYysMzd17n7NuAfwHEhxxRrrZkVAlR8zmi3Q02Y2YXA6cD5nhuLpw4jktz/XfH7XwTMNrOWoUYVUQ78wyNmEmnFZ2wQvL4lhZysD12R5R8BFrj7nWHHE+Xuv3X3IndvS+RnNdXdQ3/n6+5rgE/N7PCKQz8E5ocYUtQK4Fgz26fi3/SH5MAAeIwXgAsrvr4QmBRiLAEz6wdcB5zh7t+EHQ+Au3/k7i3cvW3F73850LPidy9sE4E+AGb2PWAvMriTa71KCpmoD50lxwM/IfJOfE7Fx4/CDirHXQk8bWYfAt2BP4YcDxUtl+eB2cBHRP5/hbJNQqL66MBooK+ZLSYyo2Z0jsR1H7A/8FrF7/6DORJX6JLE9SjQvmKa6njgwky2rrTNhYiIBOpVS0FERFJTUhARkYCSgoiIBJQUREQkoKQgIiKBBmEHIJLPzKwpkc3lAFoCO4hswQEwARhScWwncEnMgjuRnKQpqSIZYmY3A1+5+x0VW3nfCfR29+8qtl3ey91zYgW9SDJqKYhkRyGwPmYny4ytOBXJJo0piGTHq0BrM1tkZveb2YlhBySSDiUFkSxw96+Ao4BhRMYYnjWzoaEGJZIGdR+JZIm77wCmA9PN7CMim9A9FmZMIpVRS0EkC8zscDPrGHOoO/BJWPGIpEstBZHs2A+4t6II/XZgCZGuJJGcpimpIiISUPeRiIgElBRERCSgpCAiIgElBRERCSgpiIhIQElBREQCSgoiIhL4/9NYVDE+gbElAAAAAElFTkSuQmCC", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjoAAAGwCAYAAACgi8/jAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABGUElEQVR4nO3de1xUdf7H8fdwFVQQNG4qaLVSmOkGYqy2aVGolavdNPsVWdlWaLa01Vqmbqnd1nIzdt0uanupdd1da7OLm2hZLV7QrFzMsiUtE7wiCepwOb8/aCZmuM3ADANnXs/HYx5yLvM9n2E8Mx++V4thGIYAAABMKMDXAQAAAHgLiQ4AADAtEh0AAGBaJDoAAMC0SHQAAIBpkegAAADTItEBAACmFeTrAHyttrZW3377rbp37y6LxeLrcAAAgAsMw9B3332nhIQEBQQ0XW/j94nOt99+q759+/o6DAAA0Apff/21+vTp0+Rxv0108vLylJeXp+rqakl1v6iIiAgfRwUAAFxRXl6uvn37qnv37s2eZ/H3JSDKy8sVGRmpY8eOkegAANBJuPr9TWdkAABgWiQ6AADAtEh0AACAafltZ2QA/qGmpkZVVVW+DgOAm4KDgxUYGNjmcvw20bGNuqqpqfF1KAC8wDAMlZSUqKyszNehAGilHj16KC4urk3z3DHqilFXgCnt379fZWVliomJUXh4OBOCAp2IYRiqrKzUgQMH1KNHD8XHxzc4x9Xvb7+t0QFgXjU1NfYkp2fPnr4OB0ArhIWFSZIOHDigmJiYVjdj0RkZgOnY+uSEh4f7OBIAbWG7h9vSz45EB4Bp0VwFdG6euIdJdAAAgGmR6AAAANPy20QnLy9PKSkpGjp0qK9DAQC7kSNH6u677/Za+TfddJPGjx/vtfJ94auvvpLFYtH27dt9HQo6IL9NdHJyclRUVKQtW7Z4vGzDMFRprValtVp+PnofAACfYni5F5yoqlHK7DWSpLSkKK28PYNOkQBMy2q1KiQkxNdhAI3y2xqd9lK456hOVDH7MuBr9Wta2/vhbs1udXW1pk2bpsjISPXq1UsPPfSQvYw//elPSktLU/fu3RUXF6fJkyfrwIEDkqSaWkOffFOmf+YX6LLLLldERIS6d++uCy64QF9++WWj19qyZYtOO+00Pf744/Z98+bNU0xMjLp3765bb71Vv/rVrzRkyBD7cVvz1/z585WQkKDk5GRJ0qeffqqLLrpIYWFh6tmzp2677TYdP37c/rzGmuXGjx+vm266yb7dr18/LViwQDfffLO6d++uxMREPffccw7P2bx5s3784x+rS5cuSktL00cffeTW7xf+hRodLwgLDlThrEylzVvr61AAfK9+TWt7K3o4S+Ehrn/cvvTSS7rlllu0efNmFRYW6rbbblNiYqKmTp2qqqoqPfLII0pOTtaBAweUm5urm266SW+++aYkqXT/t7r56st00ahRWrdunSIiIvThhx+qurq6wXXWrVunK6+8Uk888YRuu+02SdJf/vIXzZ8/X7/73e80fPhw/fWvf9XChQvVv39/h+fm5+crIiJC77zzjiSpoqJCWVlZysjI0JYtW3TgwAHdeuutmjZtmpYvX+7W72vhwoV65JFH9MADD+jvf/+77rjjDl144YVKTk7W8ePHdfnll+uSSy7Rn//8ZxUXF2vGjBlulQ//QqLjBRaLReEhbV+IDIB/6tu3r55++mlZLBYlJyfr008/1dNPP62pU6fq5ptvtp93+umn65lnntHQoUN1/PhxhYV31YqXXlC3iAi9/Mor6hJa15w0YMCABtdYtWqVbrzxRr3wwguaOHGiff/ixYt1yy23aMqUKZKk2bNn69///rdDzYwkde3aVS+88IK9yer555/XyZMn9cc//lFdu3aVJD377LO64oor9Pjjjys2Ntbl1z927FjdeeedkqT7779fTz/9tNavX6/k5GS9/PLLqq2t1YsvvqguXbpo4MCB+uabb3THHXe4XD78C4kOAL8QFhyoooezfHZtd5x//vkO/foyMjK0cOFC1dTUaPv27Zo7d64+/vhjHT16VLW1tZKkvXv3Kvmss7Wr6FOdl56h4ODgJsvftGmTVq9erb///e8NRmDt2rXLnmTYpKena926dQ77Bg0a5NAvZ+fOnRo8eLA9yZGk4cOHq7a2Vrt27XIr0Tn33HPtP1ssFsXFxdmb53bu3Klzzz1XXbp0sZ+TkZHhctnwPyQ6APxCXU1r5/7IO3nypLKyspSVlaW//OUvOu2007R3715lZWXJarVKkkK7hLVYzhlnnKGePXtq6dKluuyyy5pNippSP6FxVUBAQIP+So1N7e8cj8VisSd0gLvojAwAHcymTZsctjdu3Kgf/ehH+uyzz3T48GE99thjuuCCC3TWWWfZazpsBpw9UNs2FzS7NlCvXr20bt067d69W9dee63DucnJyQ2m3XBlGo6zzz5bH3/8sSoqKuz7PvzwQwUEBNg7K5922mnav3+//XhNTY127NjRYtnO1/nkk0908uRJ+76NGze6VQb8i98mOkwYCKCj2rt3r3Jzc7Vr1y698sorWrx4sWbMmKHExESFhIRo8eLF+t///qd//etfeuSRRxyeO+mmqar47jtNvu46FRYW6osvvtCf/vQn7dq1y+G8mJgYrVu3Tp999pmuu+46e2fl6dOn68UXX9RLL72kL774QvPmzdMnn3zS4hQZ119/vbp06aLs7Gzt2LFD69ev1/Tp03XDDTfYm60uuugivfHGG3rjjTf02Wef6Y477lBZWZlbv5vJkyfLYrFo6tSpKioq0ptvvqnf/OY3bpUB/+K3iY43JwwEgLa48cYbdeLECaWnpysnJ0czZszQbbfdptNOO03Lly/XypUrlZKSoscee6zBl3yPqGg9v+I1HT9+XBdeeKFSU1P1/PPPN9o8FRcXp3Xr1unTTz/V9ddfr5qaGl1//fWaOXOmfvnLX+q8885TcXGxbrrpJoc+MY0JDw/XmjVrdOTIEQ0dOlRXX321Lr74Yj377LP2c26++WZlZ2frxhtv1IUXXqjTTz9do0aNcut3061bN73++uv69NNP9eMf/1gPPvigw9B4wJnF8POpe8vLyxUZGaljx44pIiLCY+VWWqvtQ1ndHVoKoG1Onjyp4uJi9e/fv8UvaDOpqTX032+PSZIGJkQqMMAzE5VecskliouL05/+9CePlAe4qrl72dXvb759AQB2lZWVWrJkibKyshQYGKhXXnlFa9eutc+XA3Q2JDoAADuLxaI333xT8+fP18mTJ5WcnKx//OMfyszM9HVoQKuQ6AAA7MLCwrR2LbO6wzz8tjMyAAAwPxIdAABgWiQ6AADAtEh0AACAaZHoAAAA0yLRAQAApuW3iQ5rXQEwu5/+9Kd6+eWXfR0GXDRp0iQtXLjQ12GYjt8mOqx1BaCjsVgszT7mzp0rSVq1apXOP/98RUZGqnv37ho4cKDuvvtuh7Je/9e/VFpaqkmTJundd99tsex3331X999/v/r166fvvvvOoawrrrhCP/3pT1VbWytJ+vjjjzVu3DjFxMSoS5cu6tevnyZOnNhgJfX6DMPQ7NmzFR8fr7CwMGVmZuqLL75wOKdfv34N4nrssccczvnkk090wQUXqEuXLurbt6+eeOKJBtcqKytTTk6O4uPjFRoaqgEDBujNN9909W1o4MiRI7r++usVERGhHj166JZbbtHx48ftx7/66qtGf6fOq6q3FNesWbM0f/58HTt2rNWxbtiwQVdccYUSEhJksVj06quvNjhn7ty5Ouuss9S1a1dFRUUpMzNTmzZtanO59d1+++2yWCxatGiRw35X3mNP89tEBwA6mv3799sfixYtUkREhMO+X/7yl8rPz9fEiRN11VVXafPmzdq6davmz5+vqqoqh7IWP7tYU6ZMUUBAgH7yk584lHPttddq9OjRDvt+8pOf6OGHH1a3bt2Um5trL2fp0qVav369li1bpoCAAB08eFAXX3yxoqOjtWbNGu3cuVPLli1TQkKCKioqmnxtTzzxhJ555hktWbJEmzZtUteuXZWVlaWTJ086nPfwww87xDV9+nT7sfLycl166aVKSkrS1q1b9eSTT2ru3Ll67rnn7OdYrVZdcskl+uqrr/T3v/9du3bt0vPPP6/evXs3GdvIkSO1fPnyJo9ff/31+u9//6t33nlHq1ev1oYNG3Tbbbc1OG/t2rUOsaemproV1znnnKMzzjhDf/7zn5uMpSUVFRUaPHiw8vLymjxnwIABevbZZ/Xpp5/qgw8+UL9+/XTppZfq4MGDbSrXZtWqVdq4caMSEhIaPd7ce+wVhp87duyYIck4duyYR8utOFVlJN2/2ki6f7VRcarKo2UDaN6JEyeMoqIi48SJEw2OVZyqatdHay1btsyIjIxssH/GjBnGyJEjG31OdU2t8fHXR431278wLBaLsWPHjkbPy87ONn72s581eqywsNAIDg423nrrLWPPnj1GRESEkZeXZz++atUqIygoyKiqcv211dbWGnFxccaTTz5p31dWVmaEhoYar7zyin1fUlKS8fTTTzdZzu9+9zsjKirKOHXqlH3f/fffbyQnJ9u3f//73xunn366YbVaXY7vwgsvNJYtW9bosaKiIkOSsWXLFvu+t956y7BYLMa+ffsMwzCM4uJiQ5Lx0UcfNXkNV+P69a9/bYwYMcLl2JsjyVi1alWL59m+B9euXdvmcr/55hujd+/exo4dOxp9P1t6j501dy+7+v3NEhAA/ErK7DXter2vHrvMo+XFxcXp5Zdf1o4dO3TOOec0es5HmzcqPDxcZ599ttvlp6amaubMmbr11lt1xhlnKD09XXfccYfD9aurq7Vq1SpdffXVslhaXiG9uLhYJSUlDutlRUZGatiwYSooKNCkSZPs+x977DE98sgjSkxM1OTJk/WLX/xCQUF1X1UFBQX66U9/qpCQEPv5WVlZevzxx3X06FFFRUXpX//6lzIyMpSTk6PXXntNp512miZPnqz7779fgYGBbv8+CgoK1KNHD6Wlpdn3ZWZmKiAgQJs2bdKECRPs+8eNG6eTJ09qwIABuu+++zRu3Dj7MVfjSk9P1/z583Xq1CmFhoZq7969SklJaTbGBx54QA888IDbr02qq2l67rnnFBkZqcGDB7eqDJva2lrdcMMNuvfeezVw4MAmz2vuPfYGEh0A6ESmT5+u999/X4MGDVJSUpLOP/98XXrppbr++usVFFyXAOzf97ViY2MVENC63gmzZs3SsmXLtGnTJn3++ecOycz555+vBx54QJMnT9btt9+u9PR0XXTRRbrxxhsVGxvbaHklJSWS1OB4bGys/Zgk3XXXXTrvvPMUHR2t//znP5o5c6b279+vp556yl5O//79G5RhOxYVFaX//e9/Wrduna6//nq9+eab2r17t+68805VVVVpzpw5bv8uSkpKFBMT47AvKChI0dHR9ti7deumhQsXavjw4QoICNA//vEPjR8/Xq+++qo92XE1roSEBFmtVpWUlCgpKUkJCQnavn17szFGR0e7/bpWr16tSZMmqbKyUvHx8XrnnXfUq1cvt8up7/HHH1dQUJDuuuuuJs9p6T32BhIdAH6l6OEsX4fQJl27dtUbb7yhL7/8UuvXr9fGjRt1zz336Le//a0++PA/kqRTJ0+oS5curb7GO++8Y/8S37JlixITEx2Oz58/X7m5uVq3bp02bdqkJUuWaMGCBdqwYYMGDRrU6uvW7xt07rnnKiQkRD//+c/16KOPKjQ01KUyamtrFRMTo+eee06BgYFKTU3Vvn379OSTT9oTigULFmjBggX255w4cUIbN27UtGnT7PuKiooavO6m9OrVyyH2oUOH6ttvv9WTTz5pT3RciUuqW1RVkiorKyXVJVVnnnmmS3G4Y9SoUdq+fbsOHTqk559/Xtdee602bdrUIKlz1datW/Xb3/5W27Zta7aWzxPvsbvojAzAr4SHBLXrw1vOOOMM3XrrrXrhhRe0bds2FRUV6W8rVkiSekT31NGjR1tV7tGjRzV16lTNmjVLDz74oO68804dOnSowXk9e/bUNddco9/85jfauXOnEhIS9Jvf/KbRMuPi4iRJpaWlDvtLS0vtxxozbNgwVVdX66uvvrKX01gZ9a8RHx+vAQMGODQHnX322SopKZHVapVUNyJo+/bt9kdaWpoefvhhh322jrRxcXENRpNVV1fryJEjLca+e/du+7YrcUl1I7wk6bTTTpMk7d27V926dWv2UT9pc1XXrl115pln6vzzz9eLL76ooKAgvfjii26XY/P+++/rwIEDSkxMVFBQkIKCgrRnzx7dc8896tevX5PPc36PvYEaHQDo5Pr166fw8HBVVNaNejpr4LkqKSmx91txx/Tp0xUXF2fv8/Haa68pJydHK75PohoTEhKiM844o8lRV/3791dcXJzy8/M1ZMgQSXUjqDZt2uTQ/8fZ9u3bFRAQYK9lyMjI0IMPPqiqqioFBwdLqqt9Sk5Otr/O4cOH6+WXX1Ztba296e7zzz9XfHy8vW9PdHS0Q3NPWFiYYmJiGq05ycjIUFlZmbZu3WofRbVu3TrV1tZq2LBhzcYeHx9v33YlLknasWOH+vTpY29G8lbTlbPa2lqdOnWq1c+/4YYbHPpgSXX9p2644QZNmTKlyec5v8feQKIDAJ3I3LlzVVlZqbFjxyopKUllZWV65plnVFVVpczMS1Qt6axzzlWvXr304Ycf6vLLL3e57FWrVmnlypXaunWrvXPoSy+9pLS0NP3jH//QVVddpdWrV+uvf/2rJk2apAEDBsgwDL3++ut68803tWzZskbLtVgsuvvuuzVv3jz96Ec/Uv/+/fXQQw8pISFB48ePl1TX6XfTpk0aNWqUunfvroKCAv3iF7/Q//3f/9mTmMmTJ+vXv/61brnlFt1///3asWOHfvvb3+rpp5+2X+uOO+7Qs88+qxkzZmj69On64osvtGDBgmb7jTTn7LPP1ujRozV16lQtWbJEVVVVmjZtmiZNmmSv9XnppZcUEhKiH//4x5Kkf/7zn1q6dKleeOEFt+N6//33demll9q33W26On78uENNUnFxsbZv367o6GglJiaqoqJC8+fP17hx4xQfH69Dhw4pLy9P+/bt0zXXXGN/3sUXX6wJEybYm/NaKrdnz57q2bOnQyzBwcGKi4tTcnKyJNfeY69weYyXSTG8HDCf5oakdhZNDS9ft26dcdVVVxl9+/Y1QkJCjNjYWGP06NHG+++/bx9e/vHXR41f3nuvMWnSpEbLbmx4+cGDB42YmBhj/vz5Dc6fP3++ERMTYxw8eND48ssvjalTpxoDBgwwwsLCjB49ehhDhw5tcni2TW1trfHQQw8ZsbGxRmhoqHHxxRcbu3btsh/funWrMWzYMCMyMtLo0qWLcfbZZxsLFiwwTp486VDOxx9/bIwYMcIIDQ01evfubTz22GMNrvWf//zHGDZsmBEaGmqcfvrpxvz5843q6uomY2tueLlhGMbhw4eN6667zujWrZsRERFhTJkyxfjuu+/sx5cvX26cffbZRnh4uBEREWGkp6cbK1eudDuuEydOGJGRkUZBQUGTsbRk/fr1hqQGj+zsbPs1JkyYYCQkJBghISFGfHy8MW7cOGPz5s0O5SQlJRlz5sxxudzGOA8ld/U9rs8Tw8sthmEY3kujOr7y8nJFRkbq2LFjioiI8Fi5ldZq+zDWooezvNpWD8DRyZMnVVxcrP79+7epU25nU1Nr6L/f1s2q2zPghM4ddI62bdumpKQkH0cGV/z+97/XqlWr9O9//9vXoXQYzd3Lrn5/0xkZAEwoLi5OL774ovbu3evrUOCi4OBgLV682NdhmA7VDABgUrb+L+gcbr31Vl+HYErU6AAAANMi0QFgWn7eBRHo9DxxD/ttopOXl6eUlBQNHTrU16EA8DDbHCu22WUBdE62e9h2T7eG3/bRycnJUU5Ojr3XNgDzCAwMVI8ePewz2oaHh7u0+GRnV1NryKium2X35MmTCgww/2uGORmGocrKSh04cEA9evRo1YKsNn6b6AAwN9v0/M7T95tZrWHoQNlJSVJQZRcF+EFyB3Pr0aNHs0ttuIJEB4ApWSwWxcfHKyYmRlVVVb4Op12csFbrtlUfSJJWTx+hMObvQicWHBzcppocG+4CAKYWGBjokQ/LzqA2oFr7vquRJIV26aIuJDqA/3ZGBgAA5keiAwAATItEBwAAmBaJDgAAMC0SHQAAYFokOgAAwLRIdAAAgGmR6AAAANMi0QEAAKZFogMAAEyLRAcAAJgWiQ4AADAtEh0AAGBaJDoAAMC0SHQAAIBpkegAAADTItEBAACm1ekTnbKyMqWlpWnIkCE655xz9Pzzz/s6JAAA0EEE+TqAturevbs2bNig8PBwVVRU6JxzztGVV16pnj17+jo0AADgY52+RicwMFDh4eGSpFOnTskwDBmG4eOoAABAR+DzRGfDhg264oorlJCQIIvFoldffbXBOXl5eerXr5+6dOmiYcOGafPmzQ7Hy8rKNHjwYPXp00f33nuvevXq1U7RAwCAjszniU5FRYUGDx6svLy8Ro+vWLFCubm5mjNnjrZt26bBgwcrKytLBw4csJ/To0cPffzxxyouLtbLL7+s0tLSJq936tQplZeXOzwAAIA5+TzRGTNmjObNm6cJEyY0evypp57S1KlTNWXKFKWkpGjJkiUKDw/X0qVLG5wbGxurwYMH6/3332/yeo8++qgiIyPtj759+3rstQAAgI7F54lOc6xWq7Zu3arMzEz7voCAAGVmZqqgoECSVFpaqu+++06SdOzYMW3YsEHJyclNljlz5kwdO3bM/vj666+9+yIAAIDPdOhRV4cOHVJNTY1iY2Md9sfGxuqzzz6TJO3Zs0e33XabvRPy9OnTNWjQoCbLDA0NVWhoqFfjBgAAHUOHTnRckZ6eru3bt/s6DAAA0AF16KarXr16KTAwsEHn4tLSUsXFxfkoKgAA0Fl06EQnJCREqampys/Pt++rra1Vfn6+MjIy2lR2Xl6eUlJSNHTo0LaGCQAAOiifN10dP35cu3fvtm8XFxdr+/btio6OVmJionJzc5Wdna20tDSlp6dr0aJFqqio0JQpU9p03ZycHOXk5Ki8vFyRkZFtfRkAAKAD8nmiU1hYqFGjRtm3c3NzJUnZ2dlavny5Jk6cqIMHD2r27NkqKSnRkCFD9PbbbzfooAwAAODM54nOyJEjW1yyYdq0aZo2bVo7RQQAAMyiQ/fR8Sb66AAAYH5+m+jk5OSoqKhIW7Zs8XUoAADAS/w20QEAAOZHogMAAEyLRAcAAJgWiQ4AADAtv010GHUFAID5+W2iw6grAADMz28THQAAYH4kOgAAwLRIdAAAgGmR6AAAANPy20SHUVcAAJif3yY6jLoCAMD8/DbRAQAA5keiAwAATItEBwAAmBaJDgAAMC0SHQAAYFp+m+gwvBwAAPPz20SH4eUAAJif3yY6AADA/Eh0AACAaZHoAAAA0yLRAQAApkWiAwAATItEBwAAmBaJDgAAMC2/TXSYMBAAAPPz20SHCQMBADA/v010AACA+ZHoAAAA0yLRAQAApkWiAwAATItEBwAAmBaJDgAAMC0SHQAAYFokOgAAwLRIdAAAgGn5baLDEhAAAJif3yY67bkERKW1RoZheP06AADAkd8mOu0pbd5aXbOkgGQHAIB2RqLjJWHBgUpLirJvF+45qhNVNT6MCAAA/0Oi4yUWi0Urb89Q4axMX4cCAIDfItHxIovFovCQQF+HAQCA3yLRAQAApkWiAwAATCvI1wH4k0prXWfksOBAWSwWH0cDAID5kei0o7R5a+v+TYrSytszSHYAAPAymq68zHmYucRQcwAA2gs1Ol5mG2Z+oqpGldYae60OAADwPrcTnZqaGi1fvlz5+fk6cOCAamtrHY6vW7fOY8GZRd0wc3JKAADam9vfvjNmzNDy5ct12WWX6ZxzzqGfSStVWmvolAwAgJe5nej89a9/1d/+9jeNHTvWG/H4jbR5a+mUDACAl7ndGTkkJERnnnmmN2IxvcbWvzpcYVWltVqV1moW/QQAwMPcTnTuuece/fa3v+30X8p5eXlKSUnR0KFD2+2aja1/lTZvrVJmr1HK7DWscA4AgIe53XT1wQcfaP369Xrrrbc0cOBABQcHOxz/5z//6bHgvCknJ0c5OTkqLy9XZGRku13XYrGoZ9cQpSVFqXDPUYdjtmHndFwGAMAz3P5G7dGjhyZMmOCNWPxG/SHnkhh2DgCAl7id6CxbtswbcfgdhpwDAOB9zIzcwVRaa+z9dAzDoKMyAABt4FKVwnnnnaf8/HxFRUXpxz/+cbPDobdt2+ax4PyRbdj5336eoWv+UKCt3/fjse07Wc3CoAAAuMqlROdnP/uZQkNDJUnjx4/3Zjx+yTbs3NY5uXDPUe0rO2FPcmz7Llv8gXbuL5fEwqAAALjCYvh5m4ht1NWxY8cUERHhszgMw9DhCqtbnZKLHs6inw8Au0prtVJmr5HE5wPMz9Xv71bfBYWFhdq5c6ckKSUlRampqa0tCmp62HlKfISKvq/FAQAA7nE70fnmm2903XXX6cMPP1SPHj0kSWVlZfrJT36iv/71r+rTp4+nY/QbzsPOJckwpIFz1jR6PutlAQDQPLdHXd16662qqqrSzp07deTIER05ckQ7d+5UbW2tbr31Vm/E6Fdsw85tj+ZymLR5a5lNGQCAZrhdo/Pee+/pP//5j5KTk+37kpOTtXjxYl1wwQUeDQ6Nq9+cxWzKAAA0ze0anb59+6qqqqrB/pqaGiUkJHgkKPzAeSHQtKQorZ4+wmG9LMOQfb4d5twBAOAHblcDPPnkk5o+fbry8vKUlpYmqa5j8owZM/Sb3/zG4wH6O+d+O7Y+OeEhgfZzrl5SYB92LtXV+Ky8PUPhIfTfAQD4N5cSnaioKIcvzIqKCg0bNkxBQXVPr66uVlBQkG6++Wbm2fGClpaL2Ok0Kqtof7kGzlnDXDsAAL/nUqKzaNEiL4cBT9jyYKayl26m/w4AAN9z6RswOzvb23HAA7qGBuqNu0a4PfEgAABmxaKenVRjnZQb67/jrP5CoXRcBgCYHW0anVRTnZSd2UZkSVKXoECHhUIl1swCAJgbiU4n1lInZUm6fPEHKj5UIanx5SToxwMAMDOarkzOluRIckhy3r9vlP3n+vPw0JQFADATtxKdqqoqBQUFaceOHd6KBx7g3H+nf6+uDc9xmocnZfYapcxew5ISAABTcau9Ijg4WImJiaqpqWn5ZPiMc/+dLkGBuvYPBfZV0W0dl23qz8NDUxYAwEzc/jZ78MEH9cADD+hPf/qToqOjvRGTW77++mvdcMMNOnDggIKCgvTQQw/pmmuu8XVYPufcf8e543L9FdIBADArtxOdZ599Vrt371ZCQoKSkpLUtatjs8i2bds8FpwrgoKCtGjRIg0ZMkQlJSVKTU3V2LFjG8Tl71zpuGxTaa1pchQXAACdiduJTkdb4iE+Pl7x8fGSpLi4OPXq1UtHjhwh0WmDtHlrGXYOADAFtxOdOXPmeDSADRs26Mknn9TWrVu1f/9+rVq1qkEylZeXpyeffFIlJSUaPHiwFi9erPT09AZlbd26VTU1Nerbt69HY/QX9Yef01cHAGAGrRpeXlZWphdeeEEzZ87UkSNHJNU1We3bt8/tsioqKjR48GDl5eU1enzFihXKzc3VnDlztG3bNg0ePFhZWVk6cOCAw3lHjhzRjTfeqOeee67Z6506dUrl5eUOD3/U2MzKq6ePUOGsTB9GBQCAZ7n95/onn3yizMxMRUZG6quvvtLUqVMVHR2tf/7zn9q7d6/++Mc/ulXemDFjNGbMmCaPP/XUU5o6daqmTJkiSVqyZIneeOMNLV26VL/61a8k1SUv48eP169+9Sv95Cc/afZ6jz76qH7961+7FaMZNTWzcv3lI+irAwDo7Nyu0cnNzdVNN92kL774Ql26dLHvHzt2rDZs2ODR4KxWq7Zu3arMzB9qGQICApSZmamCggJJdWs33XTTTbrooot0ww03tFjmzJkzdezYMfvj66+/9mjMnYmtg3J4SFCjyUzavLXMqwMA6NTcTnS2bNmin//85w329+7dWyUlJR4JyubQoUOqqalRbGysw/7Y2Fj7tT788EOtWLFCr776qoYMGaIhQ4bo008/bbLM0NBQRUREODzwA+cmLVtfHQAAOiO3m65CQ0Mb7dfy+eef67TTTvNIUO4YMWKEamtr2/26ZmVr0jpcYVXavLVNnmcYRosLigIA4Gtu1+iMGzdODz/8sKqqqiTVfTHu3btX999/v6666iqPBterVy8FBgaqtLTUYX9paani4uLaVHZeXp5SUlI0dOjQNpVjRo311am/DpZhGCwbAQDoFNxOdBYuXKjjx48rJiZGJ06c0IUXXqgzzzxT3bt31/z58z0aXEhIiFJTU5Wfn2/fV1tbq/z8fGVkZLSp7JycHBUVFWnLli1tDdP00uatdUhoTlTVaOv3y0lING8BADout5uuIiMj9c477+iDDz7QJ598ouPHj+u8885z6DDsjuPHj2v37t327eLiYm3fvl3R0dFKTExUbm6usrOzlZaWpvT0dC1atEgVFRX2UVjwDltfnUISGgBAJ9bq2eBGjBihESNGtDmAwsJCjRo1yr6dm5srScrOztby5cs1ceJEHTx4ULNnz1ZJSYmGDBmit99+u0EHZXhW/eHnldaaZvvrAADQUbUq0cnPz9fTTz+tnTt3SpLOPvts3X333a2q1Rk5cmSL/TumTZumadOmtSZUtIE762MBANARud1H53e/+51Gjx6t7t27a8aMGZoxY4YiIiI0duzYJmc37ojojAwAgPm5/ef6ggUL9PTTTzvUsNx1110aPny4FixYoJycHI8G6C05OTnKyclReXm5IiMjfR0OAADwArdrdMrKyjR69OgG+y+99FIdO3bMI0Gh4zKMuuHmAAB0Bq2aR2fVqlUN9r/22mu6/PLLPRIUOq6rlxQ02jHZNteO7cG8OgCAjsDtpquUlBTNnz9f7777rn0um40bN+rDDz/UPffco2eeecZ+7l133eW5SNEh7Nz/w6zYKfERKvp+2zn5SYmP0MrbM2SbMJnZkwEAvmAx3PzTu3///q4VbLHof//7X6uCag95eXnKy8tTTU2NPv/8cx07dox1r5pQaa1Wyuw1DvsKZ2UqOjxE1/6hwGGunaakJUV9n/iQ7ADeUv9eLXo4i1GTMDVbH9uWvr/dvguKi4vbFFhHQWdk1zlPHpiWFKWeXUMc5tqxMQzpmiUF9poeG9tkg3zwAgDaE986aJFzQlO/GaqxuXbeuGuE/VwmGwQA+BKJDlzizuSBTDQIAOgo3B51BQAA0FnwZzc6FNvq6BIjtQAAbee3iU79UVfoGAzD0NVLCrS1XqdnRmoBANrC7aart99+Wx988IF9Oy8vT0OGDNHkyZN19GjLw4w7ipycHBUVFWnLli2+DgXfO1FVY09ypB9GagEA0FpuJzr33nuvysvrhg5/+umnuueeezR27FgVFxcrNzfX4wECAAC0Vqvm0UlJSZEk/eMf/9Dll1+uBQsWaNu2bRo7dqzHAwQAAGgtt2t0QkJCVFlZKUlau3atLr30UklSdHS0vaYHAACgI3C7RmfEiBHKzc3V8OHDtXnzZq1YsUKS9Pnnn6tPnz4eDxDmVX+ElcSq6AAAz3M70Xn22Wd155136u9//7t+//vfq3fv3pKkt956S6NHj/Z4gDCP+olMl6BAXfOHAofOxwAAeJrbiU5iYqJWr17dYP/TTz/tkYDaC8PL21/9pSDqr3zurLljAAC4w6VEx52+N51lBXAW9WwfzguC2tRPZApnZSo8JNC+bRjSwDmOq6UDANAaLiU6PXr0aHHSNsMwZLFYqCGBA+cFQRtb5DM8JNBhbaxKa3W7xggAMC+XEp3169d7Ow6YWHst8snyEQAAZy59+1x44YXejgN+wrkpKy0pSmHBgU2ebxg/1PA0l7ywfAQAoDGt/jO7srJSe/fuldVqddh/7rnntjkomJdzU1ZLNS9XLynQzu/78zSXvDS1fER71CQBADout78FDh48qClTpuitt95q9Dh9dNASd5qydtbvtLznqA5XWBUeEmivBbIlTIbh+TgBAJ2f24nO3XffrbKyMm3atEkjR47UqlWrVFpaqnnz5mnhwoXeiBGws3VkTk2KkiR7LU7/Xl19FhMAoONyO9FZt26dXnvtNaWlpSkgIEBJSUm65JJLFBERoUcffVSXXXaZN+KEn3OeW8d5osHiQxXtHRIAoBNwe62riooKxcTESJKioqJ08OBBSdKgQYO0bds2z0bnRXl5eUpJSdHQoUN9HQqc2Dos26QlRWn19BEqejhLhbMyG5xfvzYnJb5zzOMEAGgfbtfoJCcna9euXerXr58GDx6sP/zhD+rXr5+WLFmi+Ph4b8ToFUwY2HE11WG5qX49q6ePkK1/MpMNAgDqczvRmTFjhvbv3y9JmjNnjkaPHq2//OUvCgkJ0fLlyz0dH/yUOx2WLRbZz60/2aBtba3GRnYx5w4A+Ae3E53/+7//s/+cmpqqPXv26LPPPlNiYqJ69erl0eCAtrB1XHYels6cOwDgP9zuo+MsPDxc5513HkkOOgTn/j3SD3Pq2DQ15w4AwHxcqtHJzc3VI488oq5duyo3N7fZc5966imPBAa0Rv3+PY2tqwUA8C8uJTofffSRqqqqJEnbtm1rsoqfqn90BO21thYAoONze1HPd99911uxAC1yd60sAIB/c+vP3qqqKoWFhWn79u0655xzvBUT0CR318qyqbTWMLoKAPyQW52Rg4ODlZiYyHpW8Clb01R4SJDLiUvavLW6ZkmBDBbFAgC/4vaoqwcffFAPPPCAjhw54o14AI9xHoHl7ugqwzBUaa1WpbWaBAkAOim3e2w+++yz2r17txISEpSUlKSuXR0XU+xMy0DA3GzNXIcrrG6PvmKuHQAwB7cTnZ/97Gem+LDPy8tTXl4ezXAmV9fM5X5n5abm2mE0FwB0Lm5/as+dO9cLYbQ/1roCAMD83O6jc/rpp+vw4cMN9peVlen000/3SFAAAACe4Hai89VXXzXa3HPq1Cl98803HgkK8KW6Tsg0aQKAGbjcdPWvf/3L/vOaNWscmntqamqUn5+v/v37ezY6oJ05d0IGAHRuLic648ePl1TXuTM7O9vhWHBwsPr166eFCxd6NDigvdgmFHTuhJwSH6Gi/eU+jAwA0BYuJzq1tbWSpP79+2vLli2sVg5TSZu3VmlJUXrp5nT7vsJZmQoLDtTAOWt8GBkAoC3c7qNTXFxMkgPTSImPsP/sPKFgeEigTDCTAgD4NbcTHUmaNm0aMyOjU6q01qj+JMcrb89Q4axM3wUEAPAqlxOd+iOqXn75ZR0/flySNGjQIH399deejwzwAtuaVzYWi1o1oSAAoHNwOdE566yzlJSUpMmTJ+vkyZP25Oarr75SVVWV1wIE2sp5zavmOhefYFg5AJiKy4lOWVmZVq5cqdTUVNXW1mrs2LEaMGCATp06pTVr1qi0tNSbcQKtZlvzypUmqgueWN8OEQEA2ovLiU5VVZXS09N1zz33KCwsTB999JGWLVumwMBALV26VP3791dycrI3YwVarbk1r5xrfKS6RTzDgmnSAoDOzuXh5T169NCQIUM0fPhwWa1WnThxQsOHD1dQUJBWrFih3r17a8uWLd6MFfAKW41P/RFXYcGBpli8FgD8ncs1Ovv27dOsWbMUGhqq6upqpaam6oILLpDVatW2bdtksVg0YsQIb8YKtIlzzU39Wpu6Gp8g+6OxJKduxJbRYD8AoOOyGK345I6KitKGDRu0c+dO3XjjjYqLi1NpaanS09P13nvveSNOr7GtXn7s2DFFRES0/AR0aoZh2GtuXKm1qbRWK2X2DxMGpiVFaeXtGdT2oEOq//+16OEshYe4XGkPdDqufn+3ah4dSYqMjNS1116r4OBgrVu3TsXFxbrzzjtbW1y7y8vLU0pKioYOHerrUNCO6tfcuJKsONcCOU8qCADo2FqV6HzyySfq06ePJCkpKUnBwcGKi4vTxIkTPRqcN+Xk5KioqIh+RWiWOyO2AAAdT6vqNfv27Wv/eceOHR4LBuiInEdsVTrNtdNYE5i7TWQAAO+gARdwU9q8tY7bTv12DMPQ1UsK7Kug068HAHyn1X10AH/S2Fw7NoV7jupwhdU+IutEVY09ybEdP1FVN2Kr0lqtSms1o7cAoJ1QowO4oLG5diqtNfbanbR5a+01N40xDFHLAwA+QI0O4CLnuXZ6dg1xeURWU7U8AADvItEBWokRWQDQ8ZHoAG3Q3BpaAADfI9EBPKhumYiG+09YaaYCAF+gMzLgQWnz1iolvuFU5Bc8sd4H0QAAqNEB2sh56HnR/nL7z85JT2NJEADAe6jRAdrI1in5cIW1wWSCdUPIf9g2DGngnDUCALQPEh3AA5rqlGyxyGEF6UprdXuGBQB+j6YrwEOcm7DSkqIUFsyILADwJWp0AA9xnj25My3mWX8RUpvOFD8ANIVEB/Ag2+zJnYnzIqQ2LFMBwAxougL8nPPyFDYsUwHADEh0ANgVzspkSQsAptK56tgBeBXLWQAwG2p0AACAaZHoAAAA0zJFojNhwgRFRUXp6quv9nUogMsqrTWqtFar0lotwzBkGIbDdnPqn1v/0Vg57pQLAGZjij46M2bM0M0336yXXnrJ16EALqu/XETq9xMN2kY/NTe0u6nh4I2V4065AGBGpkh0Ro4cqXfffdfXYQAtss2eXOiUpDgnLbah3Y3NydPUcPDGynGnXAAwI583XW3YsEFXXHGFEhISZLFY9OqrrzY4Jy8vT/369VOXLl00bNgwbd68uf0DBTzANnty0cNZKno4q8Wh3I03Q/0wt03hrEyXygEAf+XzP+sqKio0ePBg3XzzzbryyisbHF+xYoVyc3O1ZMkSDRs2TIsWLVJWVpZ27dqlmJgYt6936tQpnTp1yr5dXl7epvgBd7k6e7JhyKGJyrkZSqobDk7tDAA0zec1OmPGjNG8efM0YcKERo8/9dRTmjp1qqZMmaKUlBQtWbJE4eHhWrp0aauu9+ijjyoyMtL+6Nu3b1vCB9rEeSHQlPgI+8/OTVRb9xx12GbRUABoWYf+U9BqtWrr1q2aOXOmfV9AQIAyMzNVUFDQqjJnzpyp3Nxc+3Z5eTnJDnzGeSFQw5AGzlnT4vMKZ2WqZ9cQOhUDQAs6dKJz6NAh1dTUKDY21mF/bGysPvvsM/t2ZmamPv74Y1VUVKhPnz5auXKlMjIyGi0zNDRUoaGhXo0bcEf9pqxKa7VLzwkPYWVxAHBFh050XLV27dqWTwLgNlvH57BgEisAnVOHTnR69eqlwMBAlZaWOuwvLS1VXFycj6IC/Idtrh/m3wHQWfm8M3JzQkJClJqaqvz8fPu+2tpa5efnN9k05aq8vDylpKRo6NChbQ0T6JDqd2yu/3NLnDtISz/MvwMAnY3Pa3SOHz+u3bt327eLi4u1fft2RUdHKzExUbm5ucrOzlZaWprS09O1aNEiVVRUaMqUKW26bk5OjnJyclReXq7IyMi2vgygw6mrgan72dVOzpJjB+lKa43DDM4A0Nn4PNEpLCzUqFGj7Nu2EVHZ2dlavny5Jk6cqIMHD2r27NkqKSnRkCFD9PbbbzfooAyYzQlr22pQLBY12cnZMAyHkV4Nn+vaXD8A0NH5/JNs5MiRLS40OG3aNE2bNq2dIgI6hgueWO+Vcp0nInSnWQsAOpsO3UfHm+ijg46osf4xriQizs9rbjJB54kIi/YzOzgA8/J5jY6v0EcHHZHzBIKSa/1rnJ/HcHAAqOO3iQ7QUTn3j3F1EkH61QBAQ37bdAUAAMyPP/8AtEr9kVveaCrzdvkA/AOJDgC3GYbhMHLL0zMne7t8AP7Db5uuGHWFzsKdEVXuqj+iy51ynUdueXrmZG+XD8B/+G2NDqOu0Fl4c0RV/dmTaR4CYEZ+m+gAnYm3RlTVnz0ZAMzIb5uuAACA+fGnHOBn2rqGlmEYqnSxDFdGTjG6CoA3kegAfqYta2g5j4Zy59zGRk41dQ4AeIrfNl0x6gr+pLE1tFozest5NFRz63C5MnKK0VUAvM1va3QYdQV/0tgaWm1tJiqclamw4MAW1+ECAF/y20QH8DeeHrkVHuKZuXwAwJv8tukKAACYH4kOAAAwLZquALjEMORyR2F3hqC35ToA0BISHQAuuXpJgXbuL2/xPHeGoLflOgDgCr9tumJ4OeCe+slHc0PT3RmC3tJ13H0uADjz2xodhpcDrVM4K1M9u4a4NDS9LUPQGb4OwBP8NtEB0DrhIa7Pv9OWIegMXwfgCX7bdAUAAMyPRAcAAJgWiQ4AADAtEh0AAGBadEYG4BGV1poWV0M3DKnSWu3wnLYwDMM+uWBbFykFYE4kOgA8Im3eWqUlRemlm9ObPMeTkwE6T0yYlhSllbdnkOwAcOC3TVdMGAi0XVhwoNKSouzbhXuONrt8Q1NJTmpSlFLrldPchIQ2zhMTtnRtAP7Jb2t0mDAQaDuLxaKVt2focIVVafPWuvy8wlmZDvPk2JIamqEAeJrfJjoAPMNisbg9uV94SKDCQxp+/DS2DwDawm+brgAAgPmR6AAAANMi0QEAAKZFogMAAEyLRAcAAJgWiQ4AADAtxnIC6DTqL/lgGG0vw3n+Hts+5vABzINEB0CnYBhyWPIhJT6iFWU4Lhthm425/gzLLCUBmIvfNl2xBATQMuclHlxZmsFbnJd8KGrFmlnOZWzdc9RhW2IpCcBs/LZGhyUggJbZlnjwh6UZ3r9vlC54Yr2vwwDgYX6b6ABwTd0SD+b/qAhzcxkLAJ2D3zZdAQAA8yPRAQAApkWiAwAATItEBwAAmBaJDgAAMC0SHQAAYFokOgAAwLRIdAAAgGmR6AAAANMi0QEAAKZFogMAAEzL/AvYAGhXJ6zeWfnblXINQ6q0Vkv6YQFSwzDsi5IahmvXqrTWNLqAaf2ybNeQ1OZFT+uXa+aFUwFfINEB4FHeWgHclXKvXlKgnfvLJUlpSVH6288zdM0fCrR1z1FJUkp8hEvXSpu3VmlJUVp5e4Y96TAMQ1cv+aEsSUpNipIk+z7n57jCudzWlAGgaX7bdJWXl6eUlBQNHTrU16EAnV5YcKDSvv/St0lLirLXeHiy3NSkKHuCITkmL7YkR5IK9xzVkUqrQ2JSVO94S9cq3HPUofbmRFWNQ1lSXYJTf5/zc1zhXG5rygDQNL+t0cnJyVFOTo7Ky8sVGRnp63CATs1isWjl7RkNmnXaWivRVLmSHJqjBs5Z06br1F1LWnl7hg5XWJU2b22z575/3yiv1VwB8Cy/TXQAeJbFYlF4iOc/Upoq17bP1ifHc9dquRYqzIVzAHQMftt0BQAAzI9EBwAAmBaJDgAAMC0SHQAAYFokOgAAwLRIdAAAgGmR6AAAANMi0QEAAKZFogMAAEyLRAcAAJgWiQ4AADAtEh0AAGBaJDoAAMC0SHQAAIBpkegAAADTItEBAACmRaIDAABMi0QHAACYFokOAAAwLRIdAABgWqZIdFavXq3k5GT96Ec/0gsvvODrcAAAQAcR5OsA2qq6ulq5ublav369IiMjlZqaqgkTJqhnz56+Dg0AAPhYp6/R2bx5swYOHKjevXurW7duGjNmjP7973/7OiwAANAB+DzR2bBhg6644golJCTIYrHo1VdfbXBOXl6e+vXrpy5dumjYsGHavHmz/di3336r3r1727d79+6tffv2tUfoADq4E9aaNj2/0lojwzDa/BzDMFRprValtdrhWN1+12KsX4ZzWfWPuco5pua2mzqnteU39hrayp3Y2lq+J67h7Xg7go7yGn3edFVRUaHBgwfr5ptv1pVXXtng+IoVK5Sbm6slS5Zo2LBhWrRokbKysrRr1y7FxMS4fb1Tp07p1KlT9u3y8vI2xQ+g47rgifVten7avLVKS4rSytszWvUci8UiwzB09ZICbd1ztO54vfLq72+Ocxn2ayVF6W8/z9A1f3CtnKbKS02KkqQmtxvbV/81ulu+82toqpzWvh5PlNlc+W29hrfj7QicX2PRw1kKD/FNyuHzGp0xY8Zo3rx5mjBhQqPHn3rqKU2dOlVTpkxRSkqKlixZovDwcC1dulSSlJCQ4FCDs2/fPiUkJDR5vUcffVSRkZH2R9++fT37ggA4CAsOVNr3X3RS3Yd6WHCgV6+ZEh/R7LbzvvoxOcdbuOeoTlQ51ro4n5OaFGX/Mnd+zomqGocvSNsx5/2NxWjjfG79so5UWhsca+lLxbm8rXuONrvd2L7Gfi+ulu/8Gpoqx1VN/Y49pbHff1uu4e14O4Km/s/6gs9rdJpjtVq1detWzZw5074vICBAmZmZKigokCSlp6drx44d2rdvnyIjI/XWW2/poYcearLMmTNnKjc3175dXl5OsgN4kcVi0crbM+wf5GHBgV7/y7Xur+Mftg1DGjhnTZPn1I/JFu/hCqvS5q1ttHyLRQ1ek6Rmn9OcwlmZCgsObBBjU+dKatV1WuP9+0a1uWasvsJZmQoPCVSltabdXoMnefr34Q9s/799pUMnOocOHVJNTY1iY2Md9sfGxuqzzz6TJAUFBWnhwoUaNWqUamtrdd999zU74io0NFShoaFejRuAI4vF0q7V1haLHK7XWN8V53Mcj1kUHtL8B3Njr6ml5zTFnee19hqtFebh64WHBPqsCcMTPP378AfhId7/46Y5nfd/Wz3jxo3TuHHjfB0GAADoYHzeR6c5vXr1UmBgoEpLSx32l5aWKi4urk1l5+XlKSUlRUOHDm1TOQAAoOPq0IlOSEiIUlNTlZ+fb99XW1ur/Px8ZWS4PgqiMTk5OSoqKtKWLVvaGiYAAOigfN50dfz4ce3evdu+XVxcrO3btys6OlqJiYnKzc1Vdna20tLSlJ6erkWLFqmiokJTpkzxYdQAAKAz8HmiU1hYqFGjRtm3bSOisrOztXz5ck2cOFEHDx7U7NmzVVJSoiFDhujtt99u0EEZAADAmc8TnZEjR7Y4Y+K0adM0bdq0dooIAACYRYfuo+NNdEYGAMD8/DbRoTMyAADm57eJDgAAMD8SHQAAYFokOgAAwLT8NtGhMzIAAObnt4kOnZEBADA/n8+j42u2OXzKy8t9HAmA1qi0Vqv2VKXDvvLyclU7rV7e0jnNlWv7fKi/3dhznZ9THRLU5LWdy2uu/OZi+a68vE2vrSWNld/SdZorv7HfS0vxtqQ1729ry6//+2jtNbwdb0fgyfe3KbZ7oaW5+CxGS2eY3DfffKO+ffv6OgwAANAKX3/9tfr06dPkcb9PdGpra/Xtt9+qe/fuslgsHimzvLxcffv21ddff62IiAiPlAnv4j3rnHjfOh/es86pI75vhmHou+++U0JCggICmu6JY556slYKCAhoNhNsi4iIiA7zHwKu4T3rnHjfOh/es86po71vkZGRLZ7jt52RAQCA+ZHoAAAA0yLR8YLQ0FDNmTNHoaGhvg4FLuI965x43zof3rPOqTO/b37fGRkAAJgXNToAAMC0SHQAAIBpkegAAADTItEBAACmRaLjBXl5eerXr5+6dOmiYcOGafPmzb4OCU2YO3euLBaLw+Oss87ydVhwsmHDBl1xxRVKSEiQxWLRq6++6nDcMAzNnj1b8fHxCgsLU2Zmpr744gvfBAtJLb9nN910U4N7b/To0b4JFpKkRx99VEOHDlX37t0VExOj8ePHa9euXQ7nnDx5Ujk5OerZs6e6deumq666SqWlpT6K2DUkOh62YsUK5ebmas6cOdq2bZsGDx6srKwsHThwwNehoQkDBw7U/v377Y8PPvjA1yHBSUVFhQYPHqy8vLxGjz/xxBN65plntGTJEm3atEldu3ZVVlaWTp482c6Rwqal90ySRo8e7XDvvfLKK+0YIZy99957ysnJ0caNG/XOO++oqqpKl156qSoqKuzn/OIXv9Drr7+ulStX6r333tO3336rK6+80odRu8CAR6Wnpxs5OTn27ZqaGiMhIcF49NFHfRgVmjJnzhxj8ODBvg4DbpBkrFq1yr5dW1trxMXFGU8++aR9X1lZmREaGmq88sorPogQzpzfM8MwjOzsbONnP/uZT+KBaw4cOGBIMt577z3DMOruq+DgYGPlypX2c3bu3GlIMgoKCnwVZouo0fEgq9WqrVu3KjMz074vICBAmZmZKigo8GFkaM4XX3yhhIQEnX766br++uu1d+9eX4cENxQXF6ukpMThvouMjNSwYcO47zq4d999VzExMUpOTtYdd9yhw4cP+zok1HPs2DFJUnR0tCRp69atqqqqcrjXzjrrLCUmJnboe41Ex4MOHTqkmpoaxcbGOuyPjY1VSUmJj6JCc4YNG6bly5fr7bff1u9//3sVFxfrggsu0Hfffefr0OAi273Ffde5jB49Wn/84x+Vn5+vxx9/XO+9957GjBmjmpoaX4cGSbW1tbr77rs1fPhwnXPOOZLq7rWQkBD16NHD4dyOfq/5/erl8G9jxoyx/3zuuedq2LBhSkpK0t/+9jfdcsstPowMMLdJkybZfx40aJDOPfdcnXHGGXr33Xd18cUX+zAySFJOTo527Nhhij6L1Oh4UK9evRQYGNigB3ppaani4uJ8FBXc0aNHDw0YMEC7d+/2dShwke3e4r7r3E4//XT16tWLe68DmDZtmlavXq3169erT58+9v1xcXGyWq0qKytzOL+j32skOh4UEhKi1NRU5efn2/fV1tYqPz9fGRkZPowMrjp+/Li+/PJLxcfH+zoUuKh///6Ki4tzuO/Ky8u1adMm7rtO5JtvvtHhw4e593zIMAxNmzZNq1at0rp169S/f3+H46mpqQoODna413bt2qW9e/d26HuNpisPy83NVXZ2ttLS0pSenq5FixapoqJCU6ZM8XVoaMQvf/lLXXHFFUpKStK3336rOXPmKDAwUNddd52vQ0M9x48fd/hLv7i4WNu3b1d0dLQSExN19913a968efrRj36k/v3766GHHlJCQoLGjx/vu6D9XHPvWXR0tH7961/rqquuUlxcnL788kvdd999OvPMM5WVleXDqP1bTk6OXn75Zb322mvq3r27vd9NZGSkwsLCFBkZqVtuuUW5ubmKjo5WRESEpk+froyMDJ1//vk+jr4Zvh72ZUaLFy82EhMTjZCQECM9Pd3YuHGjr0NCEyZOnGjEx8cbISEhRu/evY2JEycau3fv9nVYcLJ+/XpDUoNHdna2YRh1Q8wfeughIzY21ggNDTUuvvhiY9euXb4N2s81955VVlYal156qXHaaacZwcHBRlJSkjF16lSjpKTE12H7tcbeL0nGsmXL7OecOHHCuPPOO42oqCgjPDzcmDBhgrF//37fBe0Ci2EYRvunVwAAAN5HHx0AAGBaJDoAAMC0SHQAAIBpkegAAADTItEBAACmRaIDAABMi0QHAACYFokOAAAwLRIdAABgWiQ6ADoti8XS7GPu3LmSpFWrVun8889XZGSkunfvroEDB+ruu+/2aewA2geLegLotPbv32//ecWKFZo9e7Z27dpl39etWzfl5+dr4sSJmj9/vsaNGyeLxaKioiK98847vggZQDsj0QHQacXFxdl/joyMlMVicdgnSa+//rqGDx+ue++9175vwIABrGwO+AmargCYWlxcnP773/9qx44dvg4FgA+Q6AAwtenTp2vo0KEaNGiQ+vXrp0mTJmnp0qU6deqUr0MD0A5IdACYWteuXfXGG29o9+7dmjVrlrp166Z77rlH6enpqqys9HV4ALyMRAeAXzjjjDN066236oUXXtC2bdtUVFSkFStW+DosAF5GogPA7/Tr10/h4eGqqKjwdSgAvIxRVwBMbe7cuaqsrNTYsWOVlJSksrIyPfPMM6qqqtIll1zi6/AAeBk1OgBM7cILL9T//vc/3XjjjTrrrLM0ZswYlZSU6N///reSk5N9HR4AL7MYhmH4OggAAABvoEYHAACYFokOAAAwLRIdAABgWiQ6AADAtEh0AACAaZHoAAAA0yLRAQAApkWiAwAATItEBwAAmBaJDgAAMC0SHQAAYFr/D2Q++YNysY+8AAAAAElFTkSuQmCC", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" } ], @@ -995,23 +1012,23 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ - "from skyllh.core.analysis_utils import extend_trial_data_file" + "from skyllh.core.utils.analysis import extend_trial_data_file" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Help on function extend_trial_data_file in module skyllh.core.analysis_utils:\n", + "Help on function extend_trial_data_file in module skyllh.core.utils.analysis:\n", "\n", "extend_trial_data_file(ana, rss, n_trials, trial_data, mean_n_sig=0, mean_n_sig_null=0, mean_n_bkg_list=None, bkg_kwargs=None, sig_kwargs=None, pathfilename=None, **kwargs)\n", " Appends to the trial data file `n_trials` generated trials for each\n", @@ -1078,14 +1095,14 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 34, "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "[==========================================================] 100% ELT 0h:29m:56s\n" + "100%|██████████| 40001/40001 [1:33:15<00:00, 7.15it/s]\n" ] } ], @@ -1104,7 +1121,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -1112,26 +1129,23 @@ "output_type": "stream", "text": [ "TimeLord: Executed tasks:\n", - "[Generating background events for data set 0.] 0.002 sec/iter (40000)\n", - "[Generating background events for data set 1.] 0.003 sec/iter (40000)\n", - "[Generating background events for data set 2.] 0.003 sec/iter (40000)\n", - "[Generating background events for data set 3.] 0.005 sec/iter (40000)\n", - "[Generating background events for data set 4.] 0.019 sec/iter (40000)\n", - "[Generating pseudo data. ] 0.027 sec/iter (40000)\n", - "[Initializing trial. ] 0.032 sec/iter (40000)\n", - "[Create fitparams dictionary. ] 1.1e-05 sec/iter (2375320)\n", - "[Calc fit param dep data fields. ] 3.3e-06 sec/iter (2375320)\n", - "[Get sig prob. ] 2.0e-04 sec/iter (2375320)\n", - "[Evaluating bkg log-spline. ] 2.8e-04 sec/iter (2375320)\n", - "[Get bkg prob. ] 3.5e-04 sec/iter (2375320)\n", - "[Calc PDF ratios. ] 6.8e-05 sec/iter (2375320)\n", - "[Calc pdfratio values. ] 8.5e-04 sec/iter (2375320)\n", - "[Calc pdfratio value product Ri ] 3.9e-05 sec/iter (2375320)\n", - "[Calc logLamds and grads ] 3.1e-04 sec/iter (2375320)\n", - "[Evaluate llh-ratio function. ] 0.005 sec/iter (475064)\n", - "[Minimize -llhratio function. ] 0.054 sec/iter (40000)\n", - "[Maximizing LLH ratio function. ] 0.054 sec/iter (40000)\n", - "[Calculating test statistic. ] 3.7e-05 sec/iter (40000)\n" + "[Generating background events for data set 0.] 0.003 sec/iter (40000)\n", + "[Generating background events for data set 1.] 0.005 sec/iter (40000)\n", + "[Generating background events for data set 2.] 0.004 sec/iter (40000)\n", + "[Generating background events for data set 3.] 0.008 sec/iter (40000)\n", + "[Generating background events for data set 4.] 0.029 sec/iter (40000)\n", + "[Generating pseudo data. ] 0.045 sec/iter (40000)\n", + "[Initializing trial. ] 0.126 sec/iter (40000)\n", + "[Get sig probability densities and grads. ] 2.6e-04 sec/iter (7959160)\n", + "[Evaluating bkg log-spline. ] 3.3e-04 sec/iter (7959160)\n", + "[Get bkg probability densities and grads. ] 4.0e-04 sec/iter (7959160)\n", + "[Calculate PDF ratios. ] 1.3e-04 sec/iter (7959160)\n", + "[Calc pdfratio value Ri ] 0.002 sec/iter (3979580)\n", + "[Calc logLamds and grads ] 4.4e-04 sec/iter (3979580)\n", + "[Evaluate llh-ratio function. ] 0.008 sec/iter (795916)\n", + "[Minimize -llhratio function. ] 0.166 sec/iter (40000)\n", + "[Maximizing LLH ratio function. ] 0.166 sec/iter (40000)\n", + "[Calculating test statistic. ] 6.1e-05 sec/iter (40000)\n" ] } ], @@ -1149,14 +1163,14 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "-log10(p_local) = 2.93\n" + "-log10(p_local) = 2.89\n" ] } ], @@ -1167,12 +1181,12 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 37, "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3deXzU9bX/8dcxiOCCyqaRgARBSwANmBqp3opeUVAR6A9BsI9KbQUXtO1tq5YuiF2ktbW91VqFiksVxNqLiHKtbZHiQoNgc2WTpUAxECGAgBv7+f0x+X47mUySSchkZjLv5+PBI5nvzPc7J+M4Zz7rMXdHREQE4KhUByAiIulDSUFEREJKCiIiElJSEBGRkJKCiIiEWqQ6gCPRvn1779q1a6rDEBHJKEuXLt3u7h3i3ZfRSaFr164sWbIk1WGIiGQUM/tXTfep+0hEREJKCiIiElJSEBGRUEaPKYg0lQMHDlBWVsbevXtTHYpIwlq1akVeXh5HH310wucoKYgkoKysjBNOOIGuXbtiZqkOR6RO7s6OHTsoKysjPz8/4fPUfSSSgL1799KuXTslBMkYZka7du3q3brNyKRgZkPMbOru3btTHYpkESUEyTQNec9mZFJw97nuPu7EE09s0PmT565g1COLmFGyqZEjExHJbBmZFBpDyYadzCndnOowRBK2ceNGevfuHfe+1atXs3r16jqvsWDBAq666qrGDq1RjB07lueeey7VYWS9rBxonjSkFyu37El1GCIZxd1xd446Kmu/S2YF/dcVySAHDx7k+uuv5+yzz2bEiBF88skn3HPPPYwYMYIhQ4Ywbtw4gmqK69at49JLL+Wcc86hX79+/POf/6xyrbfeeou+ffuyfv16KioqGDhwIP369WP8+PGcfvrpbN++nY0bN9KzZ09uueUW+vXrx3vvvcfMmTPp06cPvXv35s477wyvd/zxx4e/P/fcc4wdOxaItABuv/12Pve5z9GtW7ewNeDuTJgwgYKCAq688kq2bduW5FdPEpGVLQWRIzF57opGb2kWnNaGSUN61fm41atX8+ijj3LBBRdwww038NBDDzFhwgRGjRoFwI9+9CNefPFFhgwZwnXXXcddd93F8OHD2bt3L4cPH+a9994D4M033+S2225jzpw5dOnShQkTJnDJJZfwne98h5dffpmpU6dWec7HHnuMhx56iC1btnDnnXeydOlSTj75ZC677DKef/55hg0bVmvc5eXlvP7667z77rtcffXVjBgxgtmzZ7N69WqWLVvG1q1bKSgo4IYbbjiCV1Eag1oKIhmkc+fOXHDBBQB88Ytf5PXXX+fVV19l5MiRDBkyhPnz57NixQo+/PBDNm/ezPDhw4HIIqZjjz0WgFWrVjFu3Djmzp1Lly5dAHj99de59tprARg0aBAnn3xy+Jynn346559/PhBpXQwYMIAOHTrQokULrrvuOhYuXFhn3MOGDeOoo46ioKCArVu3ArBw4UJGjx5NTk4Op512GpdcckkjvUpyJNRSEKmnRL7RJ0vsFEMz45ZbbmHWrFnk5uYyc+ZM9u7dG3YhxZObm8vevXv5xz/+wWmnnQZQ6+OPO+648PfaHhcdW+zc+GOOOSbuNTTNN/2opSCSQTZt2sSiRYsAmDlzJhdeeCEAJ598Mh9//HHYX9+mTRvy8vJ4/vnnAdi3bx+ffPIJACeddBIvvfQSEydOZMGCBQBceOGFPPvsswC88sorfPDBB3Gfv7i4mL/97W9s376dQ4cOMXPmTC666CIATjnlFFatWsXhw4eZPXt2nX/L5z//eZ555hkOHTpEeXk5r776agNfFWlMaimIZJCePXvyxBNPMH78eHr06MHNN9/MBx98wNVXX02nTp347Gc/Gz7297//PePHj+cHP/gBRx99NH/4wx/C+0455RTmzp3L4MGDmT59OpMmTWL06NHMmjWLiy66iNzcXE444QQ++uijKs+fm5vLvffey8UXX4y7c8UVVzB06FAApkyZwlVXXUXnzp3p3bt3tXNjDR8+nPnz59OnTx/OPPPMMLlIalltzcF0V1RU5A0tsjPqkci3rVnj+zdmSNJMrVq1ip49e6Y6jBoFaxTOOuusBp2/b98+cnJyaNGiBYsWLeLmm2+mtLS0MUOUFIn33jWzpe5eFO/xadVSMLPjgIXAJHd/MdXxiGSLTZs2MXLkSA4fPkzLli2ZNm1aqkOSFElqUjCz6cBVwDZ37x11fBDw30AO8Dt3n1J5153As8mMSUSq69GjB//4xz9SHYakgWQPND8ODIo+YGY5wG+AwUABMNrMCszsUmAlsDXJMYmISA2S2lJw94Vm1jXm8HnAOndfD2BmzwBDgeOB44gkik/NbJ67H469ppmNA8YB4RxrERFpHKkYU+gEvBd1uwwodvcJAGY2FtgeLyEAuPtUYCpEBpqTG6qISHZJRVKIt1ol/HB398ebLhQREYmWisVrZUDnqNt5wJb6XEBFdkREkiMVSeEtoIeZ5ZtZS+Ba4IX6XOBIi+yIZJodO3ZQWFhIYWEhp556Kp06dQpvT548mauuuoqrr76awsJCSkpKwvNGjBjB+vXrKS4uprCwkC5dutChQ4fw3GXLlnHGGWewdu1aAA4cOECfPn3Ca/z4xz+mV69enH322dWuHdi5cycDBw6kR48eDBw4MFwNvXHjRlq3bh0+10033RSes3TpUvr06UP37t25/fbbq2x98eyzz1JQUECvXr0YM2ZMwq/Rhg0bKC4upkePHowaNYr9+/cDkRoSJ554YhjHPffcE56za9cuRowYwWc+8xl69uwZrhb/1re+xfz58xN+7nfffZf+/ftzzDHH8POf/zw8vnfvXs477zzOOeccevXqxaRJk+p1fuDQoUP07du3Si2Mu+++u8r7YN68eQnHW6tgj/Rk/ANmAuXAASIthK9UHr8CWAP8E/huA647BJjavXt3b6iRD7/pIx9+s8HnS3ZZuXJlqkMITZo0ye+77z53d3/zzTf9/PPP93feecffffddr6io8M2bN7u7+/Lly33YsGFVzn3sscf81ltvrXJs1qxZPnDgQHd3/8lPfuLjxo2rcu29e/e6u1e5drRvf/vbfu+997q7+7333ut33HGHu7tv2LDBe/XqFfdv+OxnP+tvvvmmHz582AcNGuTz5s1zd/c1a9Z4YWGh79y5093dt27dWu3cxx57zCdNmlTt+DXXXOMzZ850d/fx48f7Qw895O7ur776ql955ZVx4/jSl77k06ZNc3f3ffv2+QcffODu7hs3bgxfk0Rs3brVFy9e7BMnTgz/27i7Hz582D/88EN3d9+/f7+fd955vmjRooTPD/ziF7/w0aNHV/k7ot8HtYn33gWWeA2fr8mefTS6huPzgAanNXefC8wtKiq6saHXEDkSAwYMaNTrBXsQ1Vd5eTnt27enZcuWALRv3z687+mnnw63oKjNyJEjmT59Oj/72c94+OGHw/UKwbWDzeyirx1tzpw5YfzXX389AwYM4Kc//WmtMe/Zs4f+/SO7CXzpS1/i+eefZ/DgwUybNo1bb7013KW1Y8eOdcYPkS+38+fPZ8aMGWEcd999NzfffHON5+zZs4eFCxfy+OOPA9CyZcvwdTz99NPZsWMH77//Pqeeemqdz9+xY0c6duzISy+9VOW4mYV1Jg4cOMCBAwfibgJY0/kAZWVlvPTSS3z3u9/l/vvvrzOWI6UN8UQy2GWXXcZ7773H5ZdfzuTJk/nb3/4W3vfGG29w7rnnJnSdX/3qV9x5551873vfo23btlWufeaZZ3LLLbdUuXa0rVu3kpubC0T2RooulrNhwwb69u3LRRddxGuvvQbA5s2bycvLCx+Tl5fH5s2R0rhr1qxhzZo1XHDBBZx//vm8/PLLCcW/Y8cOTjrpJFq0aFHtmgCLFi3inHPOYfDgwaxYsQKA9evX06FDB7785S/Tt29fvvrVr/Lxxx+H5/Tr14833ngDgG984xthN030vylTplCXQ4cOUVhYSMeOHRk4cCDFxcUJ/U2Br3/96/zsZz+LW/HuwQcf5Oyzz+aGG26ocRPD+kqrbS4SZWZDgCHdu3dPdSiSpRr6zb6xHX/88SxdupSnn36akpISRo0axZQpUxg7dizl5eV06NAhoeu8/PLL5Obmsnz58mrXfu2113j11VerXDsRubm5bNq0iXbt2rF06VKGDRvGihUr4m6/HXx7PnjwIGvXrmXBggWUlZXxH//xHyxfvpxDhw7xn//5n0BkDGP//v3hDrC///3v436bD67Zr18//vWvf3H88cczb948hg0bxtq1azl48CBvv/02DzzwAMXFxXzta19jypQp/PCHPwQi3963bInMgfnlL3+Z0N8cT05ODqWlpezatYvhw4ezfPnyGmttx3rxxRfp2LEj5557brX33M0338z3v/99zIzvf//7fPOb32T69OkNjjOQkS0F10CzSCgnJ4fi4mJuv/12HnzwQf74xz8C0Lp162p1DeLZsmULv/71r1m8eDHz5s3jnXfeqXLtAQMGMHny5CrXjnbKKadQXl4ORLqGgi6fY445hnbt2gFw7rnncsYZZ7BmzRry8vIoKysLzy8rKwvrOuTl5TF06FCOPvpo8vPzOeuss1i7di3t2rWjtLSU0tJS7rnnHm666abwdp8+fWjfvj27du3i4MGD1a7Zpk2bsAvniiuu4MCBA2zfvp28vDzy8vLCb+4jRozg7bffDuPau3cvrVu3Bo6spRA46aSTGDBgQMKtH4i09l544QW6du3Ktddey/z58/niF78Yvu45OTkcddRR3HjjjSxevDjh69YmI5OCiESsXr06nDkEUFpayumnnw5Ettlet25dndf4xje+wcSJE8nLy+P+++/n1ltvxd1rvXa0q6++mieeeAKAJ554IhzHqKio4NChQ0Ckq2bt2rV069Yt3Jb773//O+7Ok08+GZ4zbNiwsK7C9u3bWbNmDd26davzbzAzLr744rCeRHQc77//ftg6Wbx4MYcPH6Zdu3aceuqpdO7cOdxh9q9//SsFBQXhNdesWRN+o//lL38ZJqHof3fddVetcVVUVLBr1y4APv30U/7yl7/wmc98ps6/J3DvvfdSVlbGxo0beeaZZ7jkkkt46qmnAMJEDDB79uyEWx91UfeRSAb76KOPuO2229i2bRs5OTn06tUrrK985ZVXsmDBAi699NIaz//zn//Mpk2b+MpXvgLAkCFDmDZtGk8++SS9e/fmtttuY9euXbRo0YLu3btXqd0cuOuuuxg5ciSPPvooXbp0Ces2LFy4kB/84Ae0aNGCnJwcHn744XC84re//S1jx47l008/ZfDgwQwePBiAyy+/nFdeeYWCggJycnK47777wtZGXX76059y7bXX8r3vfY++ffuGf9Nzzz3Hb3/7W1q0aEHr1q155plnwq6lBx54gOuuu479+/fTrVs3HnvsMSAyKLxu3TqKiuLuLl3N+++/T1FREXv27OGoo47iV7/6FStXrqS8vJzrr7+eQ4cOcfjwYUaOHBlOK3344YcBuOmmm2o8v02bNjU+5x133EFpaSlmRteuXXnkkUcSirUuqqegegqSgEysp/Dpp59y8cUX88Ybb5CTk5Oq0DLS7Nmzefvtt8PxhUxW33oK6j4SaaZat27N5MmTq8zCkcQcPHiQb37zm6kOIyXUfSSSIHfPuELzl19+eapDyEjXXHNNqkNoFA3pCcrIloJmH0lTa9WqFTt27GjQ/2QiqeDu7Nixg1atWtXrvIxsKYg0tWAaZUVFRapDiev9998H4PDhuDvOS5Zq1apVlYWCiVBSEElAMG8+XQXbOaTLojrJXBnZfSQiIsmRkUlB9RRERJIjI5OCBppFRJIjI5OCiIgkh5KCiIiElBRERCSkpCAiIqGMTAqafSQikhwZmRQ0+0hEJDkyMimIiEhyZHVSKNmwkxklm1IdhohI2sjapDC0sBMAc0q117yISCBrk8KY4i4U57dNdRgiImkla5OCiIhUl/VJYWX5HkY9skhjCyIiZGg9hcYqxxmMK6ws3wNEupRERLJZRrYUGmudwpjiLswa35+C3DaNFJmISGbLyKQgIiLJoaRQSWsWRESUFACtWRARCSgpoDULIiIBJYUo6kISkWynpFAp6EKaOHuZ1i2ISNbKyHUKyRCsUZhTulnrFkQka6mlECV63YK6kkQkGykpxKHZSCKSrdImKZhZTzN72MyeM7ObUxlLMBtJ+yKJSLZJ6piCmU0HrgK2uXvvqOODgP8GcoDfufsUd18F3GRmRwHTkhlXIoLWQsmGnZRs2AlojEFEmr9ktxQeBwZFHzCzHOA3wGCgABhtZgWV910NvA78Nclx1SkYX/jJ8D5AZFaSWgwi0twlNSm4+0JgZ8zh84B17r7e3fcDzwBDKx//grt/Driupmua2TgzW2JmSyoqKpIVemhMcZcqiUHdSSLSnKViSmon4L2o22VAsZkNAL4AHAPMq+lkd58KTAUoKiry5IX5b5quKiLZIhUDzRbnmLv7Ane/3d3Hu/tvar2A2RAzm7p79+4khVidpquKSDZIRVIoAzpH3c4DttTnAo1VT6EhNF1VRJqzVCSFt4AeZpZvZi2Ba4EX6nOBVLQUAto8T0Sas2RPSZ0JDADam1kZMMndHzWzCcCfiExJne7uK+pzXXefC8wtKiq6sbFjTlSwhiEwtLCTxhlEJOMlNSm4++gajs+jlsHkdBd0IQWCtQxzSjcrOYhIRqszKZjZBcDdwOmVjzciA8PdkhtarTENAYZ07949Jc8/prhLlQ/+GSWbNDNJRJqFRMYUHgXuBy4EPgsUVf5MmVQONMcTPTNJW2OISCZLpPtot7v/b9IjaQZit8ZQd5KIZJpEksKrZnYf8D/AvuCgu7+dtKjqkOruo5oE3UpBd5L2TRKRTJNI91ExkS6jnwC/qPz382QGVZd06z6KFbtvktY0iEimqLOl4O4XN0UgzdGY4i7hAPSoRxapK0lE0l6NScHMvujuT5nZf8W7393vT15YzYfGGUQkk9TWUjiu8ucJTRFIfaTrmEI8seMMmrYqIunM3Jtko9GkKCoq8iVLlqQ6jHoZ9cgiVpbvoSC3jVoM0mgGDBgAwIIFC1Iah2QGM1vq7kXx7ktk8Vo3IlXSzgccWAR8w93XN2qUWUIV3UQknSUy+2gG8CyQC5wG/AGYmcygmrN4Fd202E1E0kUi6xTM3X8fdfupyg3tUiaTxhRqEl24R4PQIpIuahxTMLNgf+g7gF1EymY6MAo4xt1/2CQR1iITxxTiiV7sBlCc31bJQepFYwpSHw0dU1hKJAkEldLGR93nQMqTQnOhGUoiki5qTArunt+Ugci/k8OoRxZRsmEnQx54nWNb5gCq1yAiTSOp9RSkYYYWdqJkw06Wbd7NCa3+/Z9ISUFEki0V5TilDtElPwty21CQ2ybFEYlItqi1pWBmBuS5+3tNFE9CmsPso7oE6xmGFnYKB6GDaauapSQiyVJrUnB3N7PngXObKJ6EpEON5mSLre4WTFkNftfCNxFJhkS6j/5uZimttJbtoruTomlLbhFpbIkMNF8M3GRmG4GP+XeN5rOTGZhUF0xVDRKEtuQWkcaWSFIYnPQopE7BGEPs7+pKEpHGlEiRnX+Z2YVAD3d/zMw6AMcnPzSJFjvGEBybUbKJibOXMad0s5KCiByxRHZJnUSkHOdZwGPA0cBTwAXJDU0SEVvdDbTQTUQaLpHuo+FAX+BtAHffYmZpV3gnm8XrTgoGoZUgRKQ+EkkK+yunpjqAmR1X1wnJlg3rFOojumsp2D8JIgPR2n1VROojkSmpz5rZI8BJZnYj8BdgWnLDqp27z3X3cSeeeGIqw0hLQb2GWeP7hyuhSzbsVN0GEUlIIgPNPzezgcAe4EzgB+7+56RHJo2qOL+tdl8VkToluvfRMuA1YGHl75JBivPbhi2H6O0yRERi1ZkUzOyrwGLgC8AIIiucb0h2YNL4ggHpOaWbmVGySd1JIlJNIgPN3wb6uvsOADNrB7wJTE9mYNL4gumrQJVKb+pOEpFAIkmhDPgw6vaHQFrtmirxRe+0KiKSiESSwmagxMzmECnDORRYbGb/BeDu9ycxPjkC8VZBw7/3UILIzCQtehORQCJJ4Z+V/wJzKn9qAVsGil3oBlWThJKCSHZLZErq5KYIRJpGdOshqAWtym4iElCNZglF75/0yf5DHNsyR11KIlkmrZKCmQ0DrgQ6Ar9x91dSHFKzVtNA9MryPXy492B4W0lBJHskPSmY2XTgKmCbu/eOOj4I+G8gB/idu09x9+eB583sZODngJJCEsUORMd2K4lI9klk6+yfAT8CPgVeBs4Bvu7uTyX4HI8DDwJPRl0zB/gNMJDIlNe3zOwFd19Z+ZDvVd4vKRC0HEo27IzbpRT9OLUiRJqXRLa5uMzd9xD5tl9GZP+jbyf6BO6+EIj92nkesM7d17v7fuAZYKhF/BT4X3d/O971zGycmS0xsyUVFRWJhiH1EGyq95PhfcJB6JXle1i2eXc4U2ll+R7ViBZphhJJCkdX/rwCmOnujdGv0ImqC+DKKo/dBlwKjDCzm+Kd6O5T3b3I3Ys6dOjQCKFITeLtuFqQ2ya8HbQitFWGSPORyJjCXDN7l0j30S2V5Tj3HuHzWpxj7u6/Bn5d58mqp5ByQReTdl4VaV7qbCm4+11Af6DI3Q8AHxNZ1XwkyoDOUbfzgC2Jnqx6Ck1vaGEnivPbhskgaEVojYNI81JjS8HMvhDnWPTN/zmC530L6GFm+US20bgWGHME15Mkq2nLDBFpXmrrPhpSy31OgknBzGYCA4D2ZlYGTHL3R81sAvAnIlNSp7v7isRCVveRiEiy1JgU3P3LjfEE7j66huPzgHkNvOZcYG5RUdGNRxKbNI6gcI9aEiKZL6HFa2Z2JdALaBUcc/d7khVUAvGopZAmhhZ2omTDTuaUbmZMcRdmlGxiTunmhNcwBI8PrqXEIpJaiSxeexg4FrgY+B2R6muLkxxXrdRSSB9B4Z6gtRBbvCc6SQDVEsac0s3apVUkjSTSUvicu59tZu+4+2Qz+wVHNsgszUzQWpg4exkntIq8peIlieA4VP3w1wwmkfSRSFL4tPLnJ2Z2GrADyE9eSHVT91F6if7WD5HtMJZt3l0lScQzo2QTJRt2UpzftkniFJG6JZIUXjSzk4D7gLeJzDz6XVKjqoO6j9JP7JTV6LGCmjbXix5L0JYZIukhkSI7P6z89Y9m9iLQyt13JzcsyXSxxXxqUpzfNhyXEJHUq23x2iXuPr+mRWzurnEFEZFmpraWwkXAfOIvYkt48VoyaEwhswUb6a0s36NBZpE0U9vitUlmdhSRbayfbcKY6qQxhcwWveNqbNU3rVsQSa1axxTc/XDldhRplRQkcxXnt2XW+P413h+7biE4BkoSIk0hkdlHfzazbwGziOyQCkAj1VUQCQWzlIIpqiUbdobHgqmtSgoiyZVIkZ0bgFuBhcDSyn9LkhmUND8ry/dUawFEi+1GihZdAS4wo2STCvyIJEEiSaGnu+dH/wMKkh1YbcxsiJlN3b1bM2MzwdDCThTktok7hhAYU9wl7iK2YMpqrGCltKayijSuRLqP3gT6JXCsyWigObMkWoshSBhazCaSOrWtUziVSN3k1mbWl3+X0GxDZIM8kUYVnTyUFERSo7aWwuXAWCKlMn/Bv5PCHmBicsMSqa5kw85wdXRNW2eIyJGpbZ3CE8ATZvb/3P2PTRiTSI1iB6uDhXCgKasijSGRMYWewS9mdoy770tiPAnRiubmL3p8IVowCyloKQS3o5NFfYr8iEhVNc4+MrM7zKw/kaI6gZp3NmtC7j7X3cedeOKJqQ5FkmRMcRdmje8ffrAPLexEcX7bKkkiWAg3a3z/MDloVpLIkamtpbAauAboZmavAauAdmZ2lruvbpLoRCo1dBC6vuVBRbJdbesUPiAyoLwOGAD8uvL4XWb2ZpLjEjkisZXf1HIQSUxtLYVBwCTgDOB+4P+Aj939y00RmMiRUiIQqb/aZh9NBDCz/wOeAvoCHczsdeADd4+3pbZIysVbGR09nVVdSSI1S2T20Z/c/S3gLTO72d0vNLP2yQ5MpL6CGUjxZiTF3lZSEIkvkXKcd0TdHFt5bHuyAhJpiOhZSbHTWGOnr4pIzRJpKYTc/f+SFYjIkYi3v1J0jWitgBZJTCK7pKYd7ZIqIpIcGZkUtHgtu8VbyNaYjxfJZvXqPhJJB4luxR3v8UG5z/rORNIiOMkWGdlSEGmooOAPRGYjRa9lqK2amxbBSbZQS0GySnSrIRiAHvXIorCwT1AXOvjwV8tAso2SgmS92JlJweK3YF2DkoJkEyUFkSjBzqtAOO4gkk2UFCRrBbORSjbsrLYauibaLkOaOyUFyVrB+EIwswiqr4aOR9tlSHOmpCBZr75TXKOrv6nVIM1N2iQFM+sGfBc40d1H1PV4kXSgVoM0N0ldp2Bm081sm5ktjzk+yMxWm9k6M7sLwN3Xu/tXkhmPSGMryG1T5zhEbesfRNJNshevPU6kWE/IzHKA3wCDgQJgtJkVJDkOkZTRwjfJJElNCu6+EIjdnvI8YF1ly2A/8AwwNJlxiDRUsCVGbG2GWMH4gloDkulSsc1FJ+C9qNtlQCcza2dmDwN9zew7NZ1sZuPMbImZLamoqEh2rJLForfESKQWg1oD0hykYqDZ4hxzd98B3FTXye4+FZgKUFRU5I0cm0godlaS6jJINkhFS6EM6Bx1Ow/YUp8LqJ6CpIK24JZskIqWwltADzPLBzYD1wJj6nMBd58LzC0qKroxCfGJxBW7BXc8wRjEJ/sPcWzLHKD63koi6SypScHMZgIDgPZmVgZMcvdHzWwC8CcgB5ju7ivqed0hwJDu3bs3dsgiCQlaC8HuqoGC3DasLN/Dh3sPckKrtFkGJJKwZM8+Gu3uue5+tLvnufujlcfnufuZ7n6Gu/+4AddV5TVJqTHFXZg1vn+VMYdgM73owelEBqhF0om+yoikqXjV3mL3adIqamlsGZkU1H0k2SBY9AZUKycaUFKQxpaR5TjVfSTZTN1SkkwZ2VIQyXQN6UziTrAAAAgLSURBVAaaUbKJkg07w8pw0dcpLxjF8dtXJS1eyR4Z2VLQOgXJFMHahlhBN9DK8j0Jr4KOV/MhuM7+YzvyUfuejRO0ZLWMTArqPpJMEcxSipcYGtINVJzftlqroiC3DS0/2XZEcYoEMjIpiIhIcmhMQaSJBKuda9smI3oaalOIN+01kfuk+crIpKApqZJpgg/5urbgjp6G2hTiTXtN5D5pvjKy+0hjCpJpgrEFTSWVdJeRSUFERJIjI7uPRDJZ0CUTzEgKxhqC36N/BuKtUYi9jkhjyMikoDEFSWfRO6jGuy96zCD2MQW5beJuux27RiH2OiKNJSOTguopSDqLrdgWe1/sAG5tg7hBCwKqrlGIvY5IY9GYgoiIhJQUREQkpKQgIiKhjBxTEMkmwUykutY47GvThRklm4DIwHT0gHXwe0NWJ0fPjgoGulXop/nKyKSg2UeSTmqbbXSkj49+TOzjo6+z7q8z2demS/hhHQxAB3WiP9x7MDyvvh/iQTKKniarQj/NV0YmBc0+knRS1wyiI3l8XTOZgvumbnuncuvsqmsWgg/0hs5SCupOQ9WZUFqZ3XxpTEFEREJKCiIiElJSEBGRkJKCiIiElBRERCSUkbOPRCQ9lGzYmZQ1DEHVt0D0tRtjbYSqytUsI5OC1imI1K04vy1DCztV+XBNhthtvhtjDcOc0s2sLN9DQW6bKtdrrEpwqipXs4zsPlLlNZHaBesLmuIDryC3TZV1C7G3j+S6qlbX9DIyKYiISHIoKYiISEhJQUREQkoKIiISUlIQEZGQkoKIiISUFEREJKSkICIiISUFEREJpc02F2Z2HPAQsB9Y4O5PpzgkEZGsk9SWgplNN7NtZrY85vggM1ttZuvM7K7Kw18AnnP3G4GrkxmXiIjEl+yWwuPAg8CTwQEzywF+AwwEyoC3zOwFIA9YVvmwQ0mOSyQrrSzfU6XWcuxmdnUJNpErzm9b530lG3Yyo2QTY4q71LrraXC7vvs0xV6zIdcJXo/6nJfqHVaD5y84rQ2ThvRq9OsntaXg7guB2Irh5wHr3H29u+8HngGGEkkQeXXFZWbjzGyJmS2pqKhIRtgiGWfBggUUnlMY976hhZ0ozm9Ln04nVttcrqGbzQUf6nXdDj60g11PIfJBPKd0c3gsuF1f0deMvm59BLuw1ue8YIfVZO8+W9fzJ0sqxhQ6Ae9F3S4DioFfAw+a2ZXA3JpOdvepwFSAoqIiT2KcIs3CmOIutX6jjW45JKI4vy1jirtU+VAMbkdvRx37oRnsehr9fEe6A2pwTWjY3xEbT6Yozm+blFYCpCYpWJxj7u4fA19O6AKqpyAikhSpmJJaBnSOup0HbKnPBVRPQUQkOVKRFN4CephZvpm1BK4FXqjPBcxsiJlN3b17d1ICFBHJVsmekjoTWAScZWZlZvYVdz8ITAD+BKwCnnX3FfW5rloKIiLJkdQxBXcfXcPxecC8ZD63iIjUX0Zuc6HuIxGR5MjIpKDuIxGR5MjIpCAiIslh7pm7/svMKoB/NfD09sD2RgynOdBrUp1ek+r0mlSXaa/J6e7eId4dGZ0UjoSZLXH3olTHkU70mlSn16Q6vSbVNafXRN1HIiISUlIQEZFQNieFqakOIA3pNalOr0l1ek2qazavSdaOKYiISHXZ3FIQEZEYSgoiIhLKuqRQQ33orGZmG81smZmVmtmSVMeTKvFqiptZWzP7s5mtrfx5cipjbGo1vCZ3m9nmyvdLqZldkcoYm5qZdTazV81slZmtMLOvVR5vFu+VrEoKUfWhBwMFwGgzK0htVGnjYncvbC5zrRvocWBQzLG7gL+6ew/gr5W3s8njVH9NAH5Z+X4prNzgMpscBL7p7j2B84FbKz9HmsV7JauSAjXXhxapqab4UOCJyt+fAIY1aVApVsNrktXcvdzd3678/UMiJQA60UzeK9mWFOLVh+5Uw2OziQOvmNlSMxuX6mDSzCnuXg6RDwOgY4rjSRcTzOydyu6ljOwmaQxm1hXoC5TQTN4r2ZYU4taHbvIo0s8F7t6PSLfarWb2+VQHJGntt8AZQCFQDvwiteGkhpkdD/wR+Lq770l1PI0l25LCEdeHbo7cfUvlz23AbCLdbBKx1cxyASp/bktxPCnn7lvd/ZC7HwamkYXvFzM7mkhCeNrd/6fycLN4r2RbUjji+tDNjZkdZ2YnBL8DlwHLaz8rq7wAXF/5+/XAnBTGkhaCD75Kw8my94uZGfAosMrd74+6q1m8V7JuRXPl9LlfATnAdHf/cYpDSikz60akdQCR8qwzsvU1qawpPoDINshbgUnA88CzQBdgE3CNu2fNwGsNr8kAIl1HDmwExgd96dnAzC4EXgOWAYcrD08kMq6Q8e+VrEsKIiJSs2zrPhIRkVooKYiISEhJQUREQkoKIiISUlIQEZFQi1QHIJLJzKwdkc3PAE4FDgEVlbdnAyMrjx0mMnWzpMmDFKkHTUkVaSRmdjfwkbv/3Mz6A/cDA9x9n5m1B1oGq8dF0pVaCiLJkQtsd/d9AO6+PcXxiCREYwoiyfEK0NnM1pjZQ2Z2UaoDEkmEkoJIErj7R8C5wDgiYwyzzGxsSoMSSYC6j0SSxN0PAQuABWa2jMgmaY+nMiaRuqilIJIEZnaWmfWIOlQI/CtV8YgkSi0FkeQ4HnjAzE4iUtN3HZGuJJG0pimpIiISUveRiIiElBRERCSkpCAiIiElBRERCSkpiIhISElBRERCSgoiIhL6/ymxUAt6QB7JAAAAAElFTkSuQmCC", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3deXiU9bn/8fdtEMEFlE0jIQJCLUEwYDRup6IVhSoC/SGbtlItKEK3X1ulnLaovVqptupPrSJU3FpRjz2IKLX2KBQXCorNKZssBYqBgCxlsYps9++PyfM4GWYmE5LJZDKf13XlSuZ55nnmHhxz57vdX3N3REREAI7KdAAiItJwKCmIiEhISUFEREJKCiIiElJSEBGRUJNMB1Abbdq08Y4dO2Y6DBGRrLJ48eJt7t423rmsTgodO3bkvffey3QYIiJZxcz+meicuo9ERCSkpCAiIiElBRERCWXlmIKZDQAGdOnSJdOhSI7Yv38/5eXl7N27N9OhiKSsWbNmFBQUcPTRR6d8jWVz7aOSkhLXQLPUh3Xr1nHCCSfQunVrzCzT4YhUy93Zvn07e/bsoVOnTlXOmdlidy+Jd526j0RSsHfvXiUEySpmRuvWrWvculVSEEmREoJkmyP5zGblmEJt3TF7Gcs37WZgcXtGlhZmOhwRkQYjZ1sKC9ftYFbZxkyHIXKYlStXsnLlysOOr1+/njPPPLNW9543bx5XXXVVre6RLqNGjeKFF17IdBg5LydbCpMGdGf5pt2ZDkMkq7g77s5RR+Xs35I5Qf91RbLIgQMHuP766+nZsydDhgzhk08+4c477+Scc87hzDPPZMyYMQQzCtesWcNll13GWWedRe/evfnHP/5R5V7vvvsuvXr1Yu3atWzdupW+ffvSu3dvbrrpJk477TS2bdvG+vXr6datG7fccgu9e/fmww8/ZMaMGfTo0YMzzzyT2267Lbzf8ccfH/78wgsvMGrUKCDSAvj2t7/NBRdcQOfOncPWgLszfvx4ioqKuPLKK/noo4/S/K8nqcjKloLWKUgmBWNSdano1BZMGtC92uetXLmSxx57jAsvvJAbbriBhx9+mPHjx/PTn/4UgK997Wu8/PLLDBgwgGuvvZYJEyYwePBg9u7dy6FDh/jwww8BeOedd/jWt77FrFmzKCwsZPz48Vx66aX86Ec/4tVXX2Xq1KlVXvPxxx/n4YcfZtOmTdx2220sXryYk046icsvv5wXX3yRQYMGJY27oqKCt956iw8++ICrr76aIUOGMHPmTFauXMmSJUvYsmULRUVF3HDDDbX4V5S6kJUtBXef7e5jWrZsmelQROpVhw4duPDCCwG47rrreOutt5g7dy6lpaX06NGDN954g2XLlrFnzx42btzI4MGDgcgipmOPPRaAFStWMGbMGGbPnk1hYWSixVtvvcXw4cMB6NevHyeddFL4mqeddhrnnXceEGld9OnTh7Zt29KkSROuvfZa5s+fX23cgwYN4qijjqKoqIgtW7YAMH/+fEaMGEFeXh6nnnoql156aR39K0ltZGVLQSSTUvmLPl1ipxiaGbfccgvvvfceHTp04Pbbb2fv3r0kW5San5/P3r17+dvf/sapp54KkPT5xx13XPhzsudFxxY7N/6YY46Jew9N8214srKlIJKrNmzYwIIFCwCYMWMGF110EQBt2rTh448/DvvrW7RoQUFBAS+++CIAn332GZ988gkAJ554Iq+88goTJ05k3rx5AFx00UU8//zzALz22mv861//ivv6paWl/OUvf2Hbtm0cPHiQGTNmcPHFFwNw8skns2LFCg4dOsTMmTOrfS9f+tKXePbZZzl48CAVFRXMnTv3CP9VpC4pKYhkkW7duvHkk0/Ss2dPduzYwdixYxk9ejQ9evRg0KBBnHPOOeFzn376aR544AF69uzJBRdcwObNm8NzJ598MrNnz2bcuHEsXLiQSZMm8dprr9G7d2/++Mc/kp+fzwknnHDY6+fn53PXXXdxySWXhAPYAwcOBGDy5MlcddVVXHrppeTn51f7XgYPHkzXrl3p0aMHY8eODZOLZFbO1j4a9mjkr63nbjq/LkOSRmrFihV069atXl4rWKNwxhln1MvrQaQlkZeXR5MmTViwYAFjx46lrKys3l5f0ifeZzdZ7SONKYgIGzZsYOjQoRw6dIimTZsybdq0TIckGaKkICJ07dqVv/3tb5kOQxoAjSmIiEhISUFEREINKimY2XFmttjMGmbFLhGRRi6tScHMppvZR2a2NOZ4PzNbaWZrzGxC1KnbgOfTGZOIiCSW7pbCE0C/6ANmlgf8BugPFAEjzKzIzC4DlgNb0hyTiIgkkNak4O7zgR0xh88F1rj7WnffBzwLDAQuAc4DRgKjzaxBdW2JZNL27dspLi6muLiYU045hfbt24eP77jjDrp3707Pnj0pLi5m4cKF4XVDhgxh7dq1lJaWUlxcTGFhIW3btg2vXbJkCaeffjqrV68GYP/+/fTo0SO8x89//vOE9w7s2LGDvn370rVrV/r27Ruuhl6/fj3NmzcPX+vmm28Or1m8eDE9evSgS5cufPvb365S+uL555+nqKiI7t27M3LkyJT/jdatW0dpaSldu3Zl2LBh7Nu3D4jsIdGyZcswjjvvvDO8ZufOnQwZMoQvfvGLdOvWLVwt/oMf/IA33ngj5df+4IMPOP/88znmmGP41a9+FR7fu3cv5557LmeddRbdu3dn0qRJNbo+cPDgQXr16lVlL4zbb7+9yudgzpw5KcebVFAjPV1fQEdgadTjIcBvox5/DXgo6vEo4Kok9xsDvAe8V1hY6Edq6JR3fOiUd474eskty5cvr7fX+uCDD/yDDz5IeH7SpEl+zz33uLv7O++84+edd57v3bvX3d23bt3qGzdudHf3pUuX+qBBg6pc+/jjj/u4ceOqHHvuuee8b9++7u7+i1/8wseMGVPtvaP98Ic/9Lvuusvd3e+66y6/9dZb3d193bp13r1797jv4ZxzzvF33nnHDx065P369fM5c+a4u/uqVau8uLjYd+zY4e7uW7ZsOezaxx9/3CdNmnTY8WuuucZnzJjh7u433XSTP/zww+7uPnfuXL/yyivjxvH1r3/dp02b5u7un332mf/rX/9yd/f169eH/yap2LJliy9atMgnTpwY/rdxdz906JDv2bPH3d337dvn5557ri9YsCDl6wO//vWvfcSIEVXeR/TnIJl4n13gPU/wOzYT6xTiVcAK/0xw9yeSXezuU4GpEFnRXKeRiaSoT58+dXq/oAZRTVVUVNCmTZuw4FybNm3Cc7///e/DEhTJDB06lOnTp3P33XczZcqUcL1CsntHmzVrVhj/9ddfT58+ffjlL3+ZNObdu3dz/vmRagJf//rXefHFF+nfvz/Tpk1j3LhxYZXWdu3aVRs/RP64feONN3jmmWfCOG6//XbGjh2b8Jrdu3czf/58nnjiCQCaNm1K06ZNgUhl2O3bt7N582ZOOeWUal+/Xbt2tGvXjldeeaXKcTML95nYv38/+/fvj1sEMNH1AOXl5bzyyiv853/+J/fee2+1sdRWJrpoyoEOUY8LgE01uYGZDTCzqbt27arTwESyzeWXX86HH37IF77wBW655Rb+8pe/hOfefvttzj777JTuc//993Pbbbfx4x//mFatWlV772hbtmwJax3l5+dX2Sxn3bp19OrVi4svvpg333wTgI0bN1JQUBA+p6CggI0bI1vjrlq1ilWrVnHhhRdy3nnn8eqrr6YU//bt2znxxBNp0qTJYfcEWLBgAWeddRb9+/dn2bJlAKxdu5a2bdvyjW98g169evHNb36Tf//73+E1vXv35u233wbge9/7XthNE/01efLkamM7ePAgxcXFtGvXjr59+1JaWprSewp897vf5e677467491DDz1Ez549ueGGGxIWMaypTLQU3gW6mlknYCMwnMg4QsrcfTYwu6SkZHQa4hOp1pH+ZV/Xjj/+eBYvXsybb77J3LlzGTZsGJMnT2bUqFFUVFTQtm3blO7z6quvkp+fz9Kln08UTHbvVOTn57NhwwZat27N4sWLGTRoEMuWLYtbfjv46/nAgQOsXr2aefPmUV5ezn/8x3+wdOlSDh48yJe//GUgMoaxb9++sALs008/Hfev+eCevXv35p///CfHH388c+bMYdCgQaxevZoDBw7w/vvv8+CDD1JaWsp3vvMdJk+ezM9+9jMg8tf7pk2Rv1fvu+++lN5zPHl5eZSVlbFz504GDx7M0qVLU95r++WXX6Zdu3acffbZh33mxo4dy09+8hPMjJ/85Cd8//vfZ/r06UccZyDdU1JnAAuAM8ys3MxudPcDwHjgT8AK4Hl3X1bD+6qlIFIpLy+PPn36cMcdd/DQQw/xhz/8AYDmzZsftq9BPJs2beKBBx5g0aJFzJkzh7///e/V3jvaySefTEVFBRDpGgq6fI455hhat24NwNlnn83pp5/OqlWrKCgooLy8PLy+vLw83NehoKCAgQMHcvTRR9OpUyfOOOMMVq9eTevWrSkrK6OsrIw777yTm2++OXzco0cP2rRpw86dOzlw4MBh92zRokXYhfOVr3yF/fv3s23bNgoKCigoKAj/ch8yZAjvv/9+GNfevXtp3rw5ULuWQuDEE0+kT58+Kbd+INLae+mll+jYsSPDhw/njTfe4Lrrrgv/3fPy8jjqqKMYPXo0ixYtSvm+yaR79tEId89396PdvcDdH6s8Psfdv+Dup7v7z4/gvtp5TYRIRdVg5hBAWVkZp512GhAps71mzZpq7/G9732PiRMnUlBQwL333su4ceNw96T3jnb11Vfz5JNPAvDkk0+G4xhbt27l4MGDQKSrZvXq1XTu3Dksy/3Xv/4Vd+epp54Krxk0aFC4r8K2bdtYtWoVnTt3rvY9mBmXXHJJuJ9EdBybN28OWyeLFi3i0KFDtG7dmlNOOYUOHTqEVWlff/11ioqKwnuuWrUq/Iv+vvvuC5NQ9NeECdHLrA63detWdu7cCcCnn37K//zP//DFL36x2vcTuOuuuygvL2f9+vU8++yzXHrppfzud78DCBMxwMyZM1NufVRHBfFEstjHH3/Mt771LXbu3EmTJk3o0qVLuL/ylVdeybx587jssssSXv/nP/+ZDRs2cOONNwIwYMAApk2bxlNPPcWZZ56Z8N7RJkyYwNChQ3nssccoLCzkv/7rv4DIdps//elPadKkCXl5eUyZMiUcr3jkkUcYNWoUn376Kf3796d///4AXHHFFbz22msUFRWRl5fHPffcE7Y2qvPLX/6S4cOH8+Mf/5hevXqF7+mFF17gkUceoUmTJjRv3pxnn3027Fp68MEHufbaa9m3bx+dO3fm8ccfByKDwmvWrKGkJG516cNs3ryZkpISdu/ezVFHHcX999/P8uXLqaio4Prrr+fgwYMcOnSIoUOHhtNKp0yZAsDNN9+c8PoWLVokfM1bb72VsrIyzIyOHTvy6KOPphRrdbJyPwUzGwAM6NKly+jov2RqQvspSE1k434Kn376KZdccglvv/02eXl5dRFazpg5cybvv/9+OL6QzWq6n0JWLhBT95FI9Zo3b84dd9xRZRaOpObAgQN8//vfz3QYGaHuI5EUuXvWbTR/xRVXZDqErHTNNddkOoQ6cSQ9QVnZUtDsI6lvzZo1Y/v27Uf0P5lIJrg727dvp1mzZjW6LitbClqnIPUtmEa5devWtL/W5s2bATh06FDaX0sat2bNmlVZKJiKrEwKIvUtmDdfH4LSDA1lgZzkFnUfiYhIKCuTgmYfiYikR1YmBRERSQ8lBRERCSkpiIhIKCuTggaaRUTSIyuTggaaRUTSIyuTgoiIpIeSgoiIhJQUREQklJVJQQPNIiLpkZVJQQPNIiLpkZVJQURE0kNJQUREQkoKIiISUlIQEZGQkoKIiISUFEREJJSVSUHrFERE0iMrk4LWKYiIpEdWJgUREUkPJQUREQkpKYiISEhJQUREQkoKIiISUlIQEZFQTieFhet28MzCDZkOQ0SkwcjZpDCwuD0AE2cuUWIQEamUs0lhZGkhvxjcA4BZZRszHI2ISMPQYJKCmXUzsylm9oKZja2P1xxZWkhpp1Ysr9jNsEcXqMUgIjmvSTpvbmbTgauAj9z9zKjj/YD/B+QBv3X3ye6+ArjZzI4CpqUzrmhBN9LCdTtYuG4HEEkWIiK5KN0thSeAftEHzCwP+A3QHygCRphZUeW5q4G3gNfTHFdoZGkhz910ftiVpDEGEcllaW0puPt8M+sYc/hcYI27rwUws2eBgcByd38JeMnMXgGeiXdPMxsDjAEoLKy7v+iD1sHEmUuYOHMJs8o28sm+gxzbNI+Bxe3VehCRnJDWpJBAe+DDqMflQKmZ9QG+ChwDzEl0sbtPBaYClJSUeF0GFvzin1W2keUVu9mz98Bh50REGrNMJAWLc8zdfR4wL6UbmA0ABnTp0qUOw4oYWVrIyNJChj26IBxjEBHJFZmYfVQOdIh6XABsqskN6mM/hYHF7Snt1ArQIjcRyR3VJgUzu9DM/mxmq8xsrZmtM7O1tXjNd4GuZtbJzJoCw4GXanKD+th5Ld4AtKatikhjl0r30WPA94DFwMGa3NzMZgB9gDZmVg5McvfHzGw88CciU1Knu/uymtzX3WcDs0tKSkbX5LojET3OEExbnbFoA8c2zQPQILSINCqpJIVd7v7HI7m5u49IcHwOSQaTG5pgnOGZhRuYOHMJSzZGWignNGsSnhcRaQxSSQpzzewe4L+Bz4KD7v5+2qKqRjoHmpOJbjUMLG4fth6eWbhBiUFEGoVUkkJp5feSqGMOXFr34aSmPruPYgWthsDCdTuYVbZRSUFEGoVqk4K7X1IfgWSjkaWFYTG9ZxZuCH/WOIOIZKuEScHMrnP335nZ/4133t3vTV9YyWWq+yiR5RW7wzUNGmcQkWyWrKVwXOX3E+ojkJrIZPdRrKCgXvCzWg4iks0SJgV3f7Ty+x31F072iR1jCEpkqOUgItkolcVrnc1stpltNbOPzGyWmXWuj+Cy0cDi9hTlt6C0Uyt+MbgHRfkttF+DiGSNVGYfPUOk1PXgysfDgRl8Piup3jW0MYVosS2HwPKK3eF5EZGGKpXaR+buT7v7gcqv3xGZkpox9VH7qK4E5TKK8luohpKINHgJk4KZtTKzVkQWr00ws45mdpqZ3Qq8Un8hNg7BgLT2gxaRhixZ99FiIi2CoNT1TVHnHPhZuoJqjII1DcH4gmYkiUhDlGz2Uaf6DKQmGvKYQjKx+0EH5TKUHESkocjEfgq1lk1jCtGiy3GXdmrF8ordVbqTnlm4QbOURCSjsjIpZLvowedAUIE1aEGIiGRC0impZmZAgbt/mOx5cuSCMQZt/SkiDUHSpODubmYvAmfXUzw5JbpERmmnVlXKZIiIZEIqi9f+ambnuPu7aY8mx8Rb6KYZSiKSSakkhUuAm81sPfBvIlNU3d17pjOwZLJ19lEq4s1Qij2vRCEi6ZJKUuif9ihqqCFVSa1r0Vt/xiYElcoQkXRLZZOdf5rZRUBXd3/czNoCx6c/tNwWr2tp2KMLMhSNiOSKVKqkTgJuA35Ueeho4HfpDEoSU/0kEUmnVLqPBgO9gPcB3H2TmTW4jXdywcDi9ixct4OJM5doAx8RSYtUksK+yqmpDmBmx1V3gaRHUD8pGISGSMthxqINHNs0D1CSEJHaSSUpPG9mjwInmtlo4AZgWnrDkkSC2UnB94kzl7Bk465whzfQQLSIHLlUBpp/ZWZ9gd3AF4Cfuvuf0x6ZxBVv+8+F63ZUKZkhInKkUq19tAR4E5hf+XNGmdkAM5u6a9euTIeScQOL24eroUED0SJSO6nMPvomsAj4KjCEyArnG9IdWDLZWiU1HYLieiNLC6t0KanaqogciVTGFH4I9HL37QBm1hp4B5iezsCk5oJupejBaO3ZICI1kUpSKAf2RD3eA6hqagMVuyJaq6BFpCZSSQobgYVmNovINpwDgUVm9n8B3P3eNMYnRyhIDloFLSI1kUpS+EflV2BW5XctYBMRaWRSmZJ6R30EIumzcN2Ow1oMGmcQkXi0HWeOiN7ZLXZvaBGRQCrdR5LFoldABy0DjTOISCJKCo1cvBLcgHZ3E5G4Ulm8dreZtTCzo83sdTPbZmbXpSMYMxtkZtPMbJaZXZ6O15BIq6Eov0VYcVUL3UQkkMqYwuXuvhu4isiahS8QWdCWEjObbmYfmdnSmOP9zGylma0xswkA7v6iu48GRgHDUn0NqZlgFfQvBvegtFMrjTGISCiVpHB05fevADPcfUeyJ8fxBNAv+oCZ5QG/IbLVZxEwwsyKop7y48rzkkZBcijKbxF2J6nFIJLbUhlTmG1mHwCfArdUbse5N9UXcPf5ZtYx5vC5wBp3XwtgZs8CA81sBTAZ+KO7vx/vfmY2BhgDUFiovvC6EF1MLyiNEX1OYw4iuaPaloK7TwDOB0rcfT/wbyKrmmujPVVLZZRXHvsWcBkwxMxuThDPVHcvcfeStm3b1jIMgcO7kwLR3UrPLNygloRIDkjYUjCzr8Y5Fv3wv2vxuhbnmLv7A8AD1V5sNgAY0KVLl1qEILFiZypFT10NiuwFzxORxilZ99GAJOec2iWFcqBD1OMCYFOqF7v7bGB2SUnJ6FrEICIiMRImBXf/Rhpf912gq5l1IlJwbzgwMo2vJ0coKJERVFsVkcYtpcVrZnYl0B1oFhxz9ztTvHYG0AdoY2blwCR3f8zMxgN/AvKA6e6+LNWg1X1Uv5ZX7A7XNYhI41ZtUjCzKcCxwCXAb4nsvrYo1Rdw9xEJjs8B5qR6n5hr1X1Uj4ryW/DcTecz7NEFVYrraWaSSOOTSkvhAnfvaWZ/d/c7zOzX1G48odbUUqgf0XWTogUtBg08izQ+qSSFTyu/f2JmpwLbgU7pC6l6ainUj9jZSLFJYuLMJUycuSR8rohkv1SSwstmdiJwD/A+kZlHv01rVNIgxSuuN3HmEmaVbayyBWjQrRT7WEQavlQ22flZ5Y9/MLOXgWbuviu9YSWn7qOGYWRpYbgPdDDeEHtOXUwi2SXZ4rVL3f2NRIvY3D1j4wrqPmo4oscbSju1CktlaOWzSHZK1lK4GHiD+IvYart4TRqJ2C6lZxZuCMcaTmim7TpEsk2yxWuTzOwoIsXpnq/HmCSLBQkiqJkU3XJQF5JIw5e0IJ67HwLG11MsKTOzAWY2ddeujA5tSAJBgb2gyB6g/RpEskQq+yn82cx+YGYdzKxV8JX2yJJw99nuPqZly5aZDENSMLK0MBxrCMYZVHFVpOFKpdP3hsrv46KOOdC57sORxmhgcftwn4boWUnRezdo2qpIw5BKUujm7lU21TGzZomeLBIr0dTVYO+GoNhebFLQOgeR+pdKUngH6J3CsXqjdQrZJ5i6GiSA0k6teO6m84Gq+zZE0zoHkfqXcEzBzE4xs7OB5mbWy8x6V371IVIgL2M0ppB9oveDjicotKdxBpHMStZSuAIYRWQDnF/z+W5pu4GJ6Q1LcpFaBSKZl2ydwpPAk2b2f9z9D/UYk+SYoGtJ+zWIZF4qU1K7BT+Y2TFpjEVyVNC1FAw8i0jmJBtTuNXMzieyqU4g/oigSB0JZihpbEEkM5KNKawErgE6m9mbwAqgtZmd4e4r6yW6BDT7qHGKnqGUqCspmKYa/XxNWxWpO8mSwr+IDCj3qfzqRmTweUJlYrgg7dEloCqp2SvRbm7weXG92DLc0YL1DtE0QC1Sd5IlhX7AJOB04F7gf4F/u/s36iMwaZzibdSTSOwYwzMLN7Bw3Y6EYw9a7CZSewnHFNx9ort/GVgP/I5IAmlrZm+Z2ex6ik8kFNttFO98dOkMEam5VGYf/cnd33X3qUC5u18EqLUg9Sa6PEZpp1ZqBYikUSrbcd4a9XBU5bFt6QpIJFpseYxoWtcgUvdSaSmE3P1/0xWISDzVlceIJyiZoamtIjWn/RIlq8VWWg1EP47dLlSD0SKJZWVS0DqF3JNsKmv0saBLKVHLQpVXRZLLyqSgdQqNW7wEkGwqa/S5ZGscRKR6WZkUpHGryVqGuqAuJZHPKSlIoxYMOif7ha/tQUU+p6QgWSnRGEP08eAXfE3GEBINUIvkCiUFyUqJupiij0evbF64bkdK01NrMvVVpDGq0ToFkWwysLg9pZ1a0aN9ZNvWiTOXxF0EFyjt1CphCQ2RXKGWgjRa0a2G6JLb8WYnlXZqxXM3nQ+g2kmS05QUJCfETlsVkfiUFEQSiG5dgGYkSW5oMGMKZtbZzB4zsxcyHYsIVN3QZ3nFbnUrSU5Ia1Iws+lm9pGZLY053s/MVprZGjObAODua939xnTGI1JTRfktwoJ8wZoHFdmTxizdLYUniOzgFjKzPOA3QH+gCBhhZkVpjkOkTmgTH2ns0jqm4O7zzaxjzOFzgTXuvhbAzJ4FBgLL0xmLSLTgr36IdA3Frk8IZigl2vozED3uoDEHaQwyMabQHvgw6nE50N7MWpvZFKCXmf0o0cVmNsbM3jOz97Zu3ZruWKURC8YLivJbVFmfUJO1CsG4g8YcpLHIxOwji3PM3X07cHN1F1duCzoVoKSkxOs4NskhwXhBrJGlhVVKbKdyH5HGIhNJoRzoEPW4ANhUkxtoPwVJt2T7N4g0ZpnoPnoX6GpmncysKTAceKkmN3D32e4+pmXLlmkJUCTYBrQmYwSp1lcSacjSPSV1BrAAOMPMys3sRnc/AIwH/gSsAJ5392U1vO8AM5u6a9euug9a5AgELQqNK0i2S/fsoxEJjs8B5tTivtp5TRqUYBxCJNupzIXIEQimoi5ctyPptNXoXd0ATV+VBi8rk4IGmqW+Bb/UgxlJqc5Oin2eNvGRhq7B1D6qCQ00S30LBp6rW8xWnaL8FprCKg1aVrYURDIp2UY90XtCB49FsklWJgV1H0lt1GYNQvQ1iX7hKxFINsvKpKDZR1IbifZ3rum1wx5dUG0CqG13k0h9y8oxBRERSY+sTApavCYN0cDi9moZSNbLyqSg2UfSENXVDCWRTMrKpCAiIumhpCAiIqGsnH0kkm2iN/SBqju/fbLvIMc2zSwngxMAAAc+SURBVAufu6ddT4DwfEClMaQ+ZGVS0DoFaQhSXe8Qu6tbUP8oSBR79h7ghGZNKMpvwfKK3exr0y08HySR4LlKCpJuWZkUtE5BGoJU1zvEPi9ICtGthmAXuGGPLqDsI8Lzwc5wsa0GkXTRmIKIiISUFEREJJSV3UciuS7YpyF6kLq6gejovR1qOjZRm2slu2RlUtBAs+S66H0aTmj2+f/GyX5hR19T01/stblWsktWdh9pRbPI57RHg9SlrEwKIiKSHkoKIiISUlIQEZGQkoKIiISUFEREJJSVU1JFGrrYAng18VmL+FM+o4voBfdPdD6QyrqC6DUIgNYj5LisTApapyANWWwBvETn4z1vYHH7pPs+B0XyivJbhAvXYovs1bSIXvQaBEDrEXJcViYFFcSThqy6Qnmx52N/vuPJVxK2FqKL5EWLLrKnInpSGxpTEBGRkJKCiIiElBRERCSkpCAiIiElBRERCSkpiIhISElBRERCSgoiIhJSUhARkVCDWdFsZscBDwP7gHnu/vsMhyQiknPS2lIws+lm9pGZLY053s/MVprZGjObUHn4q8AL7j4auDqdcYmISHzpbik8ATwEPBUcMLM84DdAX6AceNfMXgIKgCWVTzuY5rhEckJQOTW6+F5sNdWgcF505dXlFburPCe2SF90ZdWRpYXh42hBwb5AKlVYY+8b71xwr5oU7AuujS4iGFx/pPetTTy1Ebxu0aktmDSge53fP61Jwd3nm1nHmMPnAmvcfS2AmT0LDCSSIAqAMpK0YMxsDDAGoLBQVRyl8Zl0/ZVVSllD/Mqq0ZKdj1d1NbqaanTFVTj8l3m8Mt3RlVVHlhYyq2zjYRVa9+w9wAnNmlCU36LKPZJVYY29b+y56PvU5JdwbCXY6OuP9L61iac2gvdSdGrNy7KnIhNjCu2BD6MelwOlwAPAQ2Z2JTA70cXuPhWYClBSUuJpjFMkI+JVWa1p5dXqJKq2Gs+wRxckLecd757BNcGxuqrYeiT7U6TzvumKpzqlnVqlpZUAmUkKFueYu/u/gW+kdAPtpyAikhaZmJJaDnSIelwAbKrJDdx9truPadmyZZ0GJiKS6zKRFN4FuppZJzNrCgwHXqrJDcxsgJlN3bVrV1oCFBHJVemekjoDWACcYWblZnajux8AxgN/AlYAz7v7sprcVy0FEZH0SPfsoxEJjs8B5qTztUVEpOayssyFuo9ERNIjK5OCuo9ERNIjK5OCiIikh7ln7/ovM9sK/PMIL28DbKvDcLKN3r/ev95/7jrN3dvGO5HVSaE2zOw9dy/JdByZovev96/3n7vvPxl1H4mISEhJQUREQrmcFKZmOoAM0/vPbXr/ElfOjimIiMjhcrmlICIiMZQUREQklHNJIcH+0DnDzNab2RIzKzOz9zIdT32It1e4mbUysz+b2erK7ydlMsZ0SvD+bzezjZWfgzIz+0omY0wnM+tgZnPNbIWZLTOz71Qez5nPQE3kVFKI2h+6P1AEjDCzosxGlRGXuHtxDs3TfgLoF3NsAvC6u3cFXq983Fg9weHvH+C+ys9BcWWRysbqAPB9d+8GnAeMq/z/Ppc+AynLqaRA1P7Q7r4PCPaHlkbM3ecDsftJDgSerPz5SWBQvQZVjxK8/5zh7hXu/n7lz3uIlOxvTw59Bmoi15JCvP2h4++E3ng58JqZLTazMZkOJoNOdvcKiPzSANplOJ5MGG9mf6/sXsqJrhMz6wj0Ahaiz0BcuZYU4u4PXe9RZNaF7t6bSBfaODP7UqYDkox4BDgdKAYqgF9nNpz0M7PjgT8A33X33ZmOp6HKtaRQ6/2hs527b6r8/hEwk0iXWi7aYmb5AJXfP8pwPPXK3be4+0F3PwRMo5F/DszsaCIJ4ffu/t+Vh3P6M5BIriWFWu8Pnc3M7DgzOyH4GbgcWJr8qkbrJeD6yp+vB2ZlMJZ6F/wyrDSYRvw5MDMDHgNWuPu9Uady+jOQSM6taK6cenc/kAdMd/efZzikemNmnYm0DiCyFeszufD+K/cK70OkXPIWYBLwIvA8UAhsAK5x90Y5GJvg/fch0nXkwHrgpqB/vbExs4uAN4ElwKHKwxOJjCvkxGegJnIuKYiISGK51n0kIiJJKCmIiEhISUFEREJKCiIiElJSEBGRUJNMByCSzcysNZFiagCnAAeBrZWPZwJDK48dIjLtc2G9BylSA5qSKlJHzOx24GN3/5WZnQ/cC/Rx98/MrA3QNFhRLtJQqaUgkh75wDZ3/wzA3bdlOB6RlGhMQSQ9XgM6mNkqM3vYzC7OdEAiqVBSEEkDd/8YOBsYQ2SM4TkzG5XRoERSoO4jkTRx94PAPGCemS0hUnTtiUzGJFIdtRRE0sDMzjCzrlGHioF/ZioekVSppSCSHscDD5rZiUT2CF5DpCtJpEHTlFQREQmp+0hEREJKCiIiElJSEBGRkJKCiIiElBRERCSkpCAiIiElBRERCf1/8bTF09INVrAAAAAASUVORK5CYII=", "text/plain": [ "
" ] diff --git a/doc/sphinx/tutorials/publicdata_ps_timedep.ipynb b/doc/sphinx/tutorials/publicdata_ps_timedep.ipynb new file mode 100644 index 0000000000..b292272799 --- /dev/null +++ b/doc/sphinx/tutorials/publicdata_ps_timedep.ipynb @@ -0,0 +1,215 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Time dependent analyses with the public 10-year IceCube point-source data " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This tutorial shows how to use the public point-source data for a time dependent point-source analysis. The time fit is performed by the expectation maximization (EM) algorithm. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from matplotlib import pyplot as plt\n", + "import numpy as np\n", + "\n", + "from skyllh.analyses.i3.publicdata_ps.time_dependent_ps import (\n", + " create_analysis,\n", + " do_trials_with_em,\n", + " unblind_single_flare,\n", + " TXS_0506_PLUS056_ALERT_TIME,\n", + " TXS_0506_PLUS056_SOURCE,\n", + ")\n", + "from skyllh.core.random import RandomStateService\n", + "from skyllh.datasets.i3.PublicData_10y_ps import create_dataset_collection\n", + "from skyllh.core.source_model import PointLikeSource" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "dsc = create_dataset_collection(\n", + " base_path=\"/home/mwolf/projects/publicdata_ps/\")\n", + "datasets = dsc.get_datasets([\"IC86_II-VII\"])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We create the ``Anaylsis`` instance for the TXS 0506+056 source." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 43/43 [00:04<00:00, 8.67it/s]\n", + "100%|██████████| 1/1 [00:10<00:00, 10.31s/it]\n", + "100%|██████████| 44/44 [00:00<00:00, 6942.64it/s]\n" + ] + } + ], + "source": [ + "ana = create_analysis(\n", + " datasets=datasets, \n", + " source=TXS_0506_PLUS056_SOURCE, \n", + " refplflux_gamma=2.0, \n", + " gauss={\"mu\":57000, \"sigma\": 65})" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 51/51 [00:06<00:00, 8.13it/s]\n" + ] + } + ], + "source": [ + "(best_ts, best_em_result, best_fitparam_values) = unblind_single_flare(\n", + " ana=ana, \n", + " remove_time=TXS_0506_PLUS056_ALERT_TIME)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "best TS = 15.4046\n", + "best em mu = 56972.7\n", + "best em sigma = 27.9716\n", + "best ns = 7.36603\n", + "best gamma = 2.20371\n" + ] + } + ], + "source": [ + "print(f'best TS = {best_ts:g}')\n", + "print(f'best em mu = {best_em_result[\"mu\"]:g}')\n", + "print(f'best em sigma = {best_em_result[\"sigma\"]:g}')\n", + "print(f'best ns = {best_fitparam_values[0]:g}')\n", + "print(f'best gamma = {best_fitparam_values[1]:g}')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run backgroud trials, i.e. ``mean_n_sig=0``" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 100/100 [04:09<00:00, 2.50s/it]\n" + ] + } + ], + "source": [ + "bg_trials = do_trials_with_em(ana=ana, n=100, mean_n_sig=0, ncpu=4)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 3., 3., 17., 34., 20., 11., 8., 2., 1., 1.]),\n", + " array([ 0.45811669, 2.79832821, 5.13853973, 7.47875125, 9.81896277,\n", + " 12.15917429, 14.49938581, 16.83959733, 19.17980885, 21.52002037,\n", + " 23.86023189]),\n", + " )" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAicAAAGdCAYAAADJ6dNTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAd7UlEQVR4nO3df2xV9334/5ch4ECxzQwB42EIISk0IWYSTRwrLUsLBZwIhYZK+VGpJEJUyUw0sLoknpJQtk7OUmllnSiZtC20Upy0mUKiJAooIcWoGtCFDtFsixUQEURgWJmwgzMMwufzR7+937iQHxeue99cPx7SkbjnHp/78nWO/MzxufeWZVmWBQBAIoYVewAAgI8SJwBAUsQJAJAUcQIAJEWcAABJEScAQFLECQCQFHECACTlsmIP8Pv6+/vj8OHDUVFREWVlZcUeBwD4DLIsiw8++CBqa2tj2LCLO/eRXJwcPnw46urqij0GAHABDh06FJMnT76ofSQXJxUVFRHx22+usrKyyNMAAJ9FT09P1NXV5X6PX4zk4uR3f8qprKwUJwBwiSnEJRl5/VFow4YNUV9fnwuHxsbGeO2113L333LLLVFWVjZguf/++y96SABg6MjrzMnkyZPjiSeeiGuuuSayLIsf//jHcfvtt8d//Md/xHXXXRcREStWrIi/+qu/yn3N6NGjCzsxAFDS8oqTxYsXD7j9N3/zN7Fhw4bYuXNnLk5Gjx4dNTU1hZsQABhSLvi1PmfPno3nnnsuent7o7GxMbf+mWeeifHjx8esWbOitbU1Pvzww0/cT19fX/T09AxYAIChK+8LYn/9619HY2NjnDp1KsaMGRObNm2Ka6+9NiIi7rnnnpg6dWrU1tbG3r174+GHH47Ozs544YUXPnZ/bW1tsXbt2gv/DgCAklKWZVmWzxecPn06Dh48GN3d3fGv//qv8U//9E/R0dGRC5SPevPNN2PevHmxb9++mD59+nn319fXF319fbnbv3spUnd3t1frAMAloqenJ6qqqgry+zvvOPl98+fPj+nTp8c//uM/nnNfb29vjBkzJjZv3hwLFy78TPsr5DcHAPxhFPL390V/tk5/f/+AMx8ftWfPnoiImDRp0sU+DAAwROR1zUlra2s0NTXFlClT4oMPPoj29vbYtm1bbNmyJfbv3x/t7e1x6623xrhx42Lv3r2xevXqmDt3btTX1w/W/ABAickrTo4dOxbf+ta34siRI1FVVRX19fWxZcuW+NrXvhaHDh2KN954I9atWxe9vb1RV1cXS5cujUcffXSwZgcAStBFX3NSaK45AYBLT1LXnAAAFJI4AQCSIk4AgKTk/Q6xUKqufOTVYo+Qt/eeuK3YIwAUnDMnAEBSxAkAkBRxAgAkRZwAAEkRJwBAUsQJAJAUcQIAJEWcAABJEScAQFLECQCQFHECACRFnAAASREnAEBSxAkAkBRxAgAkRZwAAEkRJwBAUsQJAJAUcQIAJEWcAABJEScAQFLECQCQFHECACRFnAAASREnAEBSxAkAkBRxAgAkRZwAAEkRJwBAUsQJAJAUcQIAJEWcAABJEScAQFLECQCQFHECACRFnAAASREnAEBS8oqTDRs2RH19fVRWVkZlZWU0NjbGa6+9lrv/1KlT0dzcHOPGjYsxY8bE0qVL4+jRowUfGgAoXXnFyeTJk+OJJ56I3bt3x1tvvRVf/epX4/bbb4///M//jIiI1atXx8svvxzPP/98dHR0xOHDh+OOO+4YlMEBgNJUlmVZdjE7qK6uju9///vxjW98I6644opob2+Pb3zjGxER8c4778QXvvCF2LFjR9x0002faX89PT1RVVUV3d3dUVlZeTGjQV6ufOTVYo+Qt/eeuK3YIwBERGF/f1/wNSdnz56N5557Lnp7e6OxsTF2794dZ86cifnz5+e2mTlzZkyZMiV27Njxsfvp6+uLnp6eAQsAMHTlHSe//vWvY8yYMVFeXh73339/bNq0Ka699tro6uqKkSNHxtixYwdsP3HixOjq6vrY/bW1tUVVVVVuqaury/ubAABKR95xMmPGjNizZ0/s2rUrHnjggVi2bFn813/91wUP0NraGt3d3bnl0KFDF7wvAODSd1m+XzBy5Mi4+uqrIyJizpw58e///u/x93//93HnnXfG6dOn48SJEwPOnhw9ejRqamo+dn/l5eVRXl6e/+QAQEm66Pc56e/vj76+vpgzZ06MGDEitm7dmruvs7MzDh48GI2NjRf7MADAEJHXmZPW1tZoamqKKVOmxAcffBDt7e2xbdu22LJlS1RVVcXy5cujpaUlqquro7KyMh588MFobGz8zK/UAQDIK06OHTsW3/rWt+LIkSNRVVUV9fX1sWXLlvja174WERE/+MEPYtiwYbF06dLo6+uLhQsXxo9+9KNBGRwAKE0X/T4nheZ9TigW73MCcOGSeJ8TAIDBIE4AgKSIEwAgKeIEAEiKOAEAkiJOAICkiBMAICniBABIijgBAJIiTgCApIgTACAp4gQASIo4AQCSIk4AgKSIEwAgKeIEAEiKOAEAkiJOAICkiBMAICniBABIijgBAJIiTgCApIgTACAp4gQASIo4AQCSIk4AgKSIEwAgKeIEAEiKOAEAkiJOAICkiBMAICniBABIijgBAJIiTgCApIgTACAp4gQASIo4AQCSIk4AgKSIEwAgKeIEAEhKXnHS1tYWN9xwQ1RUVMSECRNiyZIl0dnZOWCbW265JcrKygYs999/f0GHBgBKV15x0tHREc3NzbFz5854/fXX48yZM7FgwYLo7e0dsN2KFSviyJEjueXJJ58s6NAAQOm6LJ+NN2/ePOD2xo0bY8KECbF79+6YO3dubv3o0aOjpqamMBMCAEPKRV1z0t3dHRER1dXVA9Y/88wzMX78+Jg1a1a0trbGhx9++LH76Ovri56engELADB05XXm5KP6+/tj1apVcfPNN8esWbNy6++5556YOnVq1NbWxt69e+Phhx+Ozs7OeOGFF867n7a2tli7du2FjgEAlJiyLMuyC/nCBx54IF577bX4xS9+EZMnT/7Y7d58882YN29e7Nu3L6ZPn37O/X19fdHX15e73dPTE3V1ddHd3R2VlZUXMhpckCsfebXYI+TtvSduK/YIABHx29/fVVVVBfn9fUFnTlauXBmvvPJKbN++/RPDJCKioaEhIuJj46S8vDzKy8svZAwAoATlFSdZlsWDDz4YmzZtim3btsW0adM+9Wv27NkTERGTJk26oAEBgKElrzhpbm6O9vb2eOmll6KioiK6uroiIqKqqipGjRoV+/fvj/b29rj11ltj3LhxsXfv3li9enXMnTs36uvrB+UbAABKS15xsmHDhoj47RutfdTTTz8d9957b4wcOTLeeOONWLduXfT29kZdXV0sXbo0Hn300YINDACUtrz/rPNJ6urqoqOj46IGAgCGNp+tAwAkRZwAAEkRJwBAUsQJAJAUcQIAJEWcAABJEScAQFLECQCQFHECACRFnAAASREnAEBSxAkAkBRxAgAkRZwAAEkRJwBAUsQJAJAUcQIAJEWcAABJEScAQFLECQCQFHECACRFnAAASREnAEBSxAkAkBRxAgAkRZwAAEkRJwBAUsQJAJAUcQIAJEWcAABJEScAQFLECQCQFHECACRFnAAASREnAEBSxAkAkBRxAgAkRZwAAEkRJwBAUsQJAJAUcQIAJCWvOGlra4sbbrghKioqYsKECbFkyZLo7OwcsM2pU6eiubk5xo0bF2PGjImlS5fG0aNHCzo0AFC68oqTjo6OaG5ujp07d8brr78eZ86ciQULFkRvb29um9WrV8fLL78czz//fHR0dMThw4fjjjvuKPjgAEBpuiyfjTdv3jzg9saNG2PChAmxe/fumDt3bnR3d8c///M/R3t7e3z1q1+NiIinn346vvCFL8TOnTvjpptuKtzkAEBJuqhrTrq7uyMiorq6OiIidu/eHWfOnIn58+fntpk5c2ZMmTIlduzYcd599PX1RU9Pz4AFABi68jpz8lH9/f2xatWquPnmm2PWrFkREdHV1RUjR46MsWPHDth24sSJ0dXVdd79tLW1xdq1ay90DBjSrnzk1WKPkLf3nrit2CMAibvgMyfNzc3x9ttvx3PPPXdRA7S2tkZ3d3duOXTo0EXtDwC4tF3QmZOVK1fGK6+8Etu3b4/Jkyfn1tfU1MTp06fjxIkTA86eHD16NGpqas67r/Ly8igvL7+QMQCAEpTXmZMsy2LlypWxadOmePPNN2PatGkD7p8zZ06MGDEitm7dmlvX2dkZBw8ejMbGxsJMDACUtLzOnDQ3N0d7e3u89NJLUVFRkbuOpKqqKkaNGhVVVVWxfPnyaGlpierq6qisrIwHH3wwGhsbvVIHAPhM8oqTDRs2RETELbfcMmD9008/Hffee29ERPzgBz+IYcOGxdKlS6Ovry8WLlwYP/rRjwoyLABQ+vKKkyzLPnWbyy+/PNavXx/r16+/4KEAgKHLZ+sAAEkRJwBAUsQJAJAUcQIAJEWcAABJEScAQFLECQCQFHECACRFnAAASREnAEBSxAkAkBRxAgAkRZwAAEkRJwBAUsQJAJAUcQIAJEWcAABJEScAQFLECQCQFHECACRFnAAASREnAEBSxAkAkBRxAgAk5bJiD0BpuvKRV4s9AgCXKGdOAICkiBMAICniBABIijgBAJIiTgCApIgTACAp4gQASIo4AQCSIk4AgKSIEwAgKeIEAEiKOAEAkiJOAICkiBMAICniBABIijgBAJKSd5xs3749Fi9eHLW1tVFWVhYvvvjigPvvvffeKCsrG7AsWrSoUPMCACUu7zjp7e2N2bNnx/r16z92m0WLFsWRI0dyy7PPPntRQwIAQ8dl+X5BU1NTNDU1feI25eXlUVNTc8FDAQBD16Bcc7Jt27aYMGFCzJgxIx544IE4fvz4x27b19cXPT09AxYAYOgqeJwsWrQofvKTn8TWrVvjb//2b6OjoyOampri7Nmz592+ra0tqqqqcktdXV2hRwIALiF5/1nn09x11125f19//fVRX18f06dPj23btsW8efPO2b61tTVaWlpyt3t6egQKAAxhg/5S4quuuirGjx8f+/btO+/95eXlUVlZOWABAIauQY+T999/P44fPx6TJk0a7IcCAEpA3n/WOXny5ICzIAcOHIg9e/ZEdXV1VFdXx9q1a2Pp0qVRU1MT+/fvj4ceeiiuvvrqWLhwYUEHBwBKU95x8tZbb8VXvvKV3O3fXS+ybNmy2LBhQ+zduzd+/OMfx4kTJ6K2tjYWLFgQf/3Xfx3l5eWFmxoAKFl5x8ktt9wSWZZ97P1btmy5qIEAgKHNZ+sAAEkRJwBAUsQJAJAUcQIAJEWcAABJEScAQFLECQCQFHECACRFnAAASREnAEBSxAkAkBRxAgAkRZwAAEkRJwBAUsQJAJAUcQIAJEWcAABJEScAQFLECQCQFHECACRFnAAASREnAEBSxAkAkBRxAgAkRZwAAEkRJwBAUsQJAJAUcQIAJEWcAABJEScAQFLECQCQFHECACRFnAAASREnAEBSLiv2AMDQcuUjrxZ7hLy998RtxR4BhhRnTgCApIgTACAp4gQASIo4AQCSIk4AgKSIEwAgKXnHyfbt22Px4sVRW1sbZWVl8eKLLw64P8uyePzxx2PSpEkxatSomD9/frz77ruFmhcAKHF5x0lvb2/Mnj071q9ff977n3zyyfjhD38YTz31VOzatSs+97nPxcKFC+PUqVMXPSwAUPryfhO2pqamaGpqOu99WZbFunXr4tFHH43bb789IiJ+8pOfxMSJE+PFF1+Mu+666+KmBQBKXkGvOTlw4EB0dXXF/Pnzc+uqqqqioaEhduzYcd6v6evri56engELADB0FTROurq6IiJi4sSJA9ZPnDgxd9/va2tri6qqqtxSV1dXyJEAgEtM0V+t09raGt3d3bnl0KFDxR4JACiigsZJTU1NREQcPXp0wPqjR4/m7vt95eXlUVlZOWABAIaugsbJtGnToqamJrZu3Zpb19PTE7t27YrGxsZCPhQAUKLyfrXOyZMnY9++fbnbBw4ciD179kR1dXVMmTIlVq1aFd/73vfimmuuiWnTpsVjjz0WtbW1sWTJkkLODQCUqLzj5K233oqvfOUrudstLS0REbFs2bLYuHFjPPTQQ9Hb2xvf/va348SJE/GlL30pNm/eHJdffnnhpgYASlZZlmVZsYf4qJ6enqiqqoru7m7Xn1zCrnzk1WKPAAXz3hO3FXsESF4hf38X/dU6AAAfJU4AgKSIEwAgKeIEAEiKOAEAkiJOAICkiBMAICniBABIijgBAJIiTgCApIgTACAp4gQASIo4AQCSclmxBwBI3aX6Kds+TZlLlTMnAEBSxAkAkBRxAgAkRZwAAEkRJwBAUsQJAJAUcQIAJEWcAABJEScAQFLECQCQFHECACRFnAAASREnAEBSxAkAkBRxAgAkRZwAAEkRJwBAUsQJAJAUcQIAJEWcAABJEScAQFLECQCQFHECACRFnAAASREnAEBSxAkAkJSCx8l3v/vdKCsrG7DMnDmz0A8DAJSoywZjp9ddd1288cYb//+DXDYoDwMAlKBBqYbLLrssampqBmPXAECJG5RrTt59992ora2Nq666Kr75zW/GwYMHB+NhAIASVPAzJw0NDbFx48aYMWNGHDlyJNauXRtf/vKX4+23346Kiopztu/r64u+vr7c7Z6enkKPBABcQgoeJ01NTbl/19fXR0NDQ0ydOjV+9rOfxfLly8/Zvq2tLdauXVvoMQCAS9Sgv5R47Nix8fnPfz727dt33vtbW1uju7s7txw6dGiwRwIAEjbocXLy5MnYv39/TJo06bz3l5eXR2Vl5YAFABi6Ch4n3/nOd6KjoyPee++9+Ld/+7f4+te/HsOHD4+777670A8FAJSggl9z8v7778fdd98dx48fjyuuuCK+9KUvxc6dO+OKK64o9EMBACWo4HHy3HPPFXqXAMAQ4rN1AICkiBMAICniBABIijgBAJIiTgCApIgTACAp4gQASIo4AQCSIk4AgKSIEwAgKeIEAEiKOAEAkiJOAICkiBMAICmXFXuAP7QrH3m12CMAAJ/AmRMAICniBABIijgBAJIiTgCApIgTACAp4gQASIo4AQCSIk4AgKSIEwAgKeIEAEjKkHv7eoCh4lL8uI73nrit2COQAGdOAICkiBMAICniBABIijgBAJIiTgCApIgTACAp4gQASIo4AQCSIk4AgKR4h1gAknEpvqvtpSj1d+J15gQASIo4AQCSIk4AgKSIEwAgKeIEAEjKoMXJ+vXr48orr4zLL788Ghoa4pe//OVgPRQAUEIGJU5++tOfRktLS6xZsyZ+9atfxezZs2PhwoVx7NixwXg4AKCEDEqc/N3f/V2sWLEi7rvvvrj22mvjqaeeitGjR8e//Mu/DMbDAQAlpOBvwnb69OnYvXt3tLa25tYNGzYs5s+fHzt27Dhn+76+vujr68vd7u7ujoiInp6eQo8WERH9fR8Oyn4B4FIxGL9jf7fPLMsuel8Fj5Pf/OY3cfbs2Zg4ceKA9RMnTox33nnnnO3b2tpi7dq156yvq6sr9GgAQERUrRu8fR8/fjyqqqouah9Ff/v61tbWaGlpyd3u7++P//3f/41x48ZFWVnZZ95PT09P1NXVxaFDh6KysnIwRuUTeP6Lz8+guDz/xeX5L77u7u6YMmVKVFdXX/S+Ch4n48ePj+HDh8fRo0cHrD969GjU1NScs315eXmUl5cPWDd27NgLfvzKykr/YRaR57/4/AyKy/NfXJ7/4hs27OIvZy34BbEjR46MOXPmxNatW3Pr+vv7Y+vWrdHY2FjohwMASsyg/FmnpaUlli1bFl/84hfjxhtvjHXr1kVvb2/cd999g/FwAEAJGZQ4ufPOO+N//ud/4vHHH4+urq74kz/5k9i8efM5F8kWUnl5eaxZs+acPxHxh+H5Lz4/g+Ly/BeX57/4CvkzKMsK8ZofAIAC8dk6AEBSxAkAkBRxAgAkRZwAAEkpmThZv359XHnllXH55ZdHQ0ND/PKXvyz2SEPCd7/73SgrKxuwzJw5s9hjlazt27fH4sWLo7a2NsrKyuLFF18ccH+WZfH444/HpEmTYtSoUTF//vx49913izNsifq0n8G99957zjGxaNGi4gxbgtra2uKGG26IioqKmDBhQixZsiQ6OzsHbHPq1Klobm6OcePGxZgxY2Lp0qXnvDEoF+azPP+33HLLOcfA/fffn9fjlESc/PSnP42WlpZYs2ZN/OpXv4rZs2fHwoUL49ixY8UebUi47rrr4siRI7nlF7/4RbFHKlm9vb0xe/bsWL9+/Xnvf/LJJ+OHP/xhPPXUU7Fr16743Oc+FwsXLoxTp079gSctXZ/2M4iIWLRo0YBj4tlnn/0DTljaOjo6orm5OXbu3Bmvv/56nDlzJhYsWBC9vb25bVavXh0vv/xyPP/889HR0RGHDx+OO+64o4hTl47P8vxHRKxYsWLAMfDkk0/m90BZCbjxxhuz5ubm3O2zZ89mtbW1WVtbWxGnGhrWrFmTzZ49u9hjDEkRkW3atCl3u7+/P6upqcm+//3v59adOHEiKy8vz5599tkiTFj6fv9nkGVZtmzZsuz2228vyjxD0bFjx7KIyDo6OrIs++1/8yNGjMief/753Db//d//nUVEtmPHjmKNWbJ+//nPsiz70z/90+zP//zPL2q/l/yZk9OnT8fu3btj/vz5uXXDhg2L+fPnx44dO4o42dDx7rvvRm1tbVx11VXxzW9+Mw4ePFjskYakAwcORFdX14BjoaqqKhoaGhwLf2Dbtm2LCRMmxIwZM+KBBx6I48ePF3ukktXd3R0Rkfuwud27d8eZM2cGHAczZ86MKVOmOA4Gwe8//7/zzDPPxPjx42PWrFnR2toaH374YV77LfqnEl+s3/zmN3H27Nlz3n124sSJ8c477xRpqqGjoaEhNm7cGDNmzIgjR47E2rVr48tf/nK8/fbbUVFRUezxhpSurq6IiPMeC7+7j8G3aNGiuOOOO2LatGmxf//++Mu//MtoamqKHTt2xPDhw4s9Xknp7++PVatWxc033xyzZs2KiN8eByNHjjznA2QdB4V3vuc/IuKee+6JqVOnRm1tbezduzcefvjh6OzsjBdeeOEz7/uSjxOKq6mpKffv+vr6aGhoiKlTp8bPfvazWL58eREng+K46667cv++/vrro76+PqZPnx7btm2LefPmFXGy0tPc3Bxvv/2269yK5OOe/29/+9u5f19//fUxadKkmDdvXuzfvz+mT5/+mfZ9yf9ZZ/z48TF8+PBzrsQ+evRo1NTUFGmqoWvs2LHx+c9/Pvbt21fsUYac3/337lhIy1VXXRXjx493TBTYypUr45VXXomf//znMXny5Nz6mpqaOH36dJw4cWLA9o6Dwvq45/98GhoaIiLyOgYu+TgZOXJkzJkzJ7Zu3Zpb19/fH1u3bo3GxsYiTjY0nTx5Mvbv3x+TJk0q9ihDzrRp06KmpmbAsdDT0xO7du1yLBTR+++/H8ePH3dMFEiWZbFy5crYtGlTvPnmmzFt2rQB98+ZMydGjBgx4Djo7OyMgwcPOg4K4NOe//PZs2dPRERex0BJ/FmnpaUlli1bFl/84hfjxhtvjHXr1kVvb2/cd999xR6t5H3nO9+JxYsXx9SpU+Pw4cOxZs2aGD58eNx9993FHq0knTx5csD/fRw4cCD27NkT1dXVMWXKlFi1alV873vfi2uuuSamTZsWjz32WNTW1saSJUuKN3SJ+aSfQXV1daxduzaWLl0aNTU1sX///njooYfi6quvjoULFxZx6tLR3Nwc7e3t8dJLL0VFRUXuOpKqqqoYNWpUVFVVxfLly6OlpSWqq6ujsrIyHnzwwWhsbIybbrqpyNNf+j7t+d+/f3+0t7fHrbfeGuPGjYu9e/fG6tWrY+7cuVFfX//ZH+iiXuuTkH/4h3/IpkyZko0cOTK78cYbs507dxZ7pCHhzjvvzCZNmpSNHDky++M//uPszjvvzPbt21fssUrWz3/+8ywizlmWLVuWZdlvX0782GOPZRMnTszKy8uzefPmZZ2dncUdusR80s/gww8/zBYsWJBdccUV2YgRI7KpU6dmK1asyLq6uoo9dsk433MfEdnTTz+d2+b//u//sj/7sz/L/uiP/igbPXp09vWvfz07cuRI8YYuIZ/2/B88eDCbO3duVl1dnZWXl2dXX3119hd/8RdZd3d3Xo9T9v89GABAEi75a04AgNIiTgCApIgTACAp4gQASIo4AQCSIk4AgKSIEwAgKeIEAEiKOAEAkiJOAICkiBMAICniBABIyv8DUYXMv4kKHLAAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.hist(bg_trials[\"ts\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/user_manual.pdf b/doc/user_manual.pdf index fe2addda01..497a56b872 100644 Binary files a/doc/user_manual.pdf and b/doc/user_manual.pdf differ diff --git a/doc/user_manual.tex b/doc/user_manual.tex index f7a80bedee..ba98625fad 100644 --- a/doc/user_manual.tex +++ b/doc/user_manual.tex @@ -15,13 +15,19 @@ \newcommand{\psk}{\vec{p}_{\mathrm{s}_k}} \newcommand{\hatps}{\vec{\hat{p}}_{\mathrm{s}}} \newcommand{\xs}{\vec{x}_{\mathrm{s}}} -\newcommand{\xsk}{\vec{x}_{\mathrm{s}_k}} \newcommand{\dPhisdE}{\frac{\mathrm{d}\Phi_{\mathrm{s}}}{\mathrm{d}E}} \begin{document} +\section{SkyLLH and this document} + +This document describes the mathematical formalism used within the SkyLLH +source code. Because SkyLLH is structured around the mathematical objects of the +log-likelihood ratio function, it should help the user and developer to +understand SkyLLH's class structure and implementation. + \section{The Hypotheses} Before performing a statistical test, is is important to formulate the exact @@ -37,18 +43,16 @@ \section{The Hypotheses} for instance a neutrino or gamma-ray particle flux from a source. Such a general flux can be parameterized as \begin{equation} - \Phi_{\mathrm{S}}(\alpha,\delta,E,t|\xs, \ps), + \Phi_{\mathrm{S}}(\alpha,\delta,E,t|\ps), \label{eq:PhiS-general} \end{equation} which is a function of the celestial coordinates right-ascension, $\alpha$, and declination, $\delta$, as well as the energy and time of the signal -particle, given the source to be at location $\xs$ with source -parameters $\ps$. At this stage $\xs$ doesn't have to be a single coordinate, -but could also describe an extended source, e.g. the galactic plane. +particle, given the source parameters $\ps$. This flux is a differential flux, thus can be written as \begin{equation} - \frac{\mathrm{d}\Phi_{\mathrm{S}}(\alpha,\delta,E,t|\xs, \ps)}{\mathrm{d}A\mathrm{d}\Omega\mathrm{d}E\mathrm{d}t}, + \frac{\mathrm{d}\Phi_{\mathrm{S}}(\alpha,\delta,E,t|\ps)}{\mathrm{d}A\mathrm{d}\Omega\mathrm{d}E\mathrm{d}t}, \label{eq:differential-PhiS-general} \end{equation} where $A$ and $\Omega$ denotes area and solid-angle, respectively. @@ -71,7 +75,7 @@ \subsection{Factorized Flux Models} flux model is already an assumption on the source. Such a factorized flux model is of the form \begin{equation} - \Phi_{\mathrm{S}}(\alpha,\delta,E,t|\xs,\ps) = \Phi_0 \Psi_{\mathrm{S}}(\alpha,\delta|\xs,\ps) \epsilon_{\mathrm{S}}(E|\ps) T_{\mathrm{S}}(t|\ps), + \Phi_{\mathrm{S}}(\alpha,\delta,E,t|\ps) = \Phi_0 \Psi_{\mathrm{S}}(\alpha,\delta|\ps) \epsilon_{\mathrm{S}}(E|\ps) T_{\mathrm{S}}(t|\ps), \end{equation} where $\Phi_0$ is the flux normalization carrying the differential flux units, and $\Psi_{\mathrm{S}}$, $\epsilon_{\mathrm{S}}$, and $T_{\mathrm{S}}$ are the @@ -88,13 +92,13 @@ \subsection{Point-like Source Factorized Flux Models} flux model has been used as source hypothesis. Thus, the spatial profile is given by two delta functions: \begin{equation} - \Psi_{\mathrm{S}}(\alpha,\delta|\xs) = \delta(\alpha-\alpha_{\mathrm{s}})\delta(\delta-\delta_{\mathrm{s}}), + \Psi_{\mathrm{S}}(\alpha,\delta|\ps) \equiv \delta(\alpha-\alpha_{\mathrm{s}})\delta(\delta-\delta_{\mathrm{s}}), \end{equation} -where the source location $\xs$ is given by a single point in the sky in -equatorial coordinates: $\xs = (\alpha_{\mathrm{s}}, \delta_{\mathrm{s}})$. -Hence, the flux model can be formulated as: +where the source location is given by a single point in the sky in +equatorial coordinates right-ascension $\alpha_{\mathrm{s}}$ and declination +$\delta_{\mathrm{s}}$. Hence, the flux model can be formulated as: \begin{equation} - \Phi_{\mathrm{S}}(\alpha,\delta,E,t|\xs,\ps) = \Phi_0 \delta(\alpha-\alpha_{\mathrm{s}})\delta(\delta-\delta_{\mathrm{s}}) \epsilon_{\mathrm{S}}(E|\ps) T_{\mathrm{S}}(t|\ps). + \Phi_{\mathrm{S}}(\alpha,\delta,E,t|\ps) \equiv \Phi_0 \delta(\alpha-\alpha_{\mathrm{s}})\delta(\delta-\delta_{\mathrm{s}}) \epsilon_{\mathrm{S}}(E|\ps) T_{\mathrm{S}}(t|\ps). \end{equation} For such point-like source flux models SkyLLH provides the @@ -157,9 +161,10 @@ \subsection{The Log-Likelihood Approach} \end{equation} where $\ns$ is the number of signal events, hence, $(N-\ns)$ the number of background events in the data sample $D$ of $N$ total events. -The set of source model parameters is denoted as $\ps$. For a point-like source -model, the source model parameters include the source position $\xs$ and the -spectral index $\gamma$ of the source flux. +The set of source model parameters is denoted as $\ps$. For example for a +point-like source +model with a power-law energy emission profile, the source model parameters +include the source position and the spectral index $\gamma$ of the source flux. $S_i(\ps)$ and $B_i$ is the value of the signal and background PDF for the $i$th data event, respectively. @@ -573,10 +578,11 @@ \subsection{Gradients of the Log-Likelihood Ratio} (\ref{eq:SourceWeightCoefficient}), and (\ref{eq:SumOfSourceWeightCoefficients}), it is given by \begin{equation} -\mathcal{R}_i(\ps) = \frac{\mathcal{S}_i(\ps)}{\mathcal{B}_i} = \frac{1}{A(\ps)} \sum_{k=1}^{K} a_k(\psk) \frac{\mathcal{S}_{i}(\psk)}{\mathcal{B}_{i}}. +\mathcal{R}_i(\ps) = \frac{\mathcal{S}_i(\ps)}{\mathcal{B}_i} = \frac{1}{A(\ps)} \sum_{k=1}^{K} a_k(\psk) \mathcal{R}_{i,k}(\psk), \label{eq:RiStacking} \end{equation} -The signal over background ratio $\mathcal{S}_{i}(\psk) / \mathcal{B}_{i} \equiv \mathcal{R}_{i,k}(\psk)$ +where the signal over background PDF ratio +$\mathcal{R}_{i,k}(\psk) \equiv \mathcal{S}_{i}(\psk) / \mathcal{B}_{i}$ for the source $k$ is then given by equation (\ref{eq:Ri}). Using the same set of source fit parameters $\ps$ for all sources, the @@ -597,10 +603,8 @@ \subsection{Gradients of the Log-Likelihood Ratio} \end{equation} with \begin{equation} -\frac{\mathrm{d} a_k}{\mathrm{d} p_{\mathrm{s}}} = W_k \frac{\mathrm{d}\mathcal{Y}_{\mathrm{s}_{k}}(\psk)}{\mathrm{d}{p_s}_k}, +\frac{\mathrm{d} a_k}{\mathrm{d} p_{\mathrm{s}}} = W_k \frac{\mathrm{d}\mathcal{Y}_{\mathrm{s}_{k}}(\psk)}{\mathrm{d}p_{\mathrm{s}}}. \end{equation} -where the local source parameter $p_{\mathrm{s}_{k}}$ maps to the correct global -parameter $p_{\mathrm{s}}$. In case one would fit each source individually with its own set of source fit parameters, $\psk$, $\ps$ would be a set of $K$ sets @@ -613,8 +617,8 @@ \subsection{Gradients of the Log-Likelihood Ratio} \subsection{Multiple Datasets} -With SkyLLH a set of $J$ different data samples (datasets), $\mathrm{D}_j$, can be -analyzed at once. Each data sample has its own detector signal yield, +With SkyLLH a set of $J$ different data samples (datasets), $\mathrm{D}_j$, can +be analyzed at once. Each data sample has its own detector signal yield, $\mathcal{Y}_{\mathrm{s}_j}$. The composite likelihood function is the product of the individual dataset @@ -629,17 +633,17 @@ \subsection{Multiple Datasets} along the different datasets is based on the detector signal yield, $\mathcal{Y}_{\mathrm{s}_j}$, of each dataset. For a single source it is given by: \begin{equation} - n_{\mathrm{s}_j}(\ns,\ps) = \ns \frac{\mathcal{Y}_{\mathrm{s}_j}(\xs,\ps)}{\sum_{j'=1}^{J} \mathcal{Y}_{\mathrm{s}_{j'}}(\xs,\ps)}, + n_{\mathrm{s}_j}(\ns,\ps) = \ns \frac{\mathcal{Y}_{\mathrm{s}_j}(\ps)}{\sum_{j'=1}^{J} \mathcal{Y}_{\mathrm{s}_{j'}}(\ps)}, \label{eq:nsjy} \end{equation} -where $\xs$ and $\ps$ denote the source position and flux fit parameters, e.g. -the spectral index $\gamma$, respectively. The detector signal yield can be -calculated via the detector effective area and the source flux (\emph{c.f.} section -\ref{sec:detsigyield}). +where $\ps$ denotes the source flux fit parameters, e.g. +the spectral index $\gamma$. The detector signal yield can be +calculated via the detector effective area and the source flux (\emph{c.f.} +section \ref{sec:detsigyield}). By defining the dataset weight factor \begin{equation} - f_j(\ps) \equiv \frac{\mathcal{Y}_{\mathrm{s}_j}(\xs,\ps)}{\sum_{j'=1}^{J} \mathcal{Y}_{\mathrm{s}_{j'}}(\xs,\ps)} + f_j(\ps) \equiv \frac{\mathcal{Y}_{\mathrm{s}_j}(\ps)}{\sum_{j'=1}^{J} \mathcal{Y}_{\mathrm{s}_{j'}}(\ps)} \label{eq:dataset-weight-factor-single-source} \end{equation} with the property @@ -668,8 +672,8 @@ \subsection{Multiple Datasets} \label{eq:logLambdaOfLogLambdaj} \end{equation} -For multiple point sources, i.e. a stacking of $K$ point sources with positions -$\xsk$, the dataset weight factor of each single source needs +For multiple point sources, i.e. a stacking of $K$ point sources, the dataset +weight factor of each single source needs to be taken into account via Bayes' theorem. Thus, $f_{j}(\ps)$ can be written as the sum of the products of the dataset weight factor $f_{j}(\psk)$ for source $k$, as given by equation (\ref{eq:dataset-weight-factor-single-source}), @@ -680,25 +684,50 @@ \subsection{Multiple Datasets} \end{equation} The relative strength of source $k$ can be written as \begin{equation} - f_{k}(\psk) = \frac{\sum_{j=1}^{J} \mathcal{Y}_{\mathrm{s}_{j,k}}(\xsk,\psk)}{\sum_{\kappa=1}^{K} \sum_{j=1}^{J} \mathcal{Y}_{\mathrm{s}_{j,\kappa}}(\vec{x}_{\mathrm{s}_\kappa},\vec{p}_{\mathrm{s}_\kappa}) } + f_{k}(\psk) = \frac{\sum_{j=1}^{J} \mathcal{Y}_{\mathrm{s}_{j,k}}(\psk)}{ \sum_{j=1}^{J} \sum_{k'=1}^{K} \mathcal{Y}_{\mathrm{s}_{j,k'}}(\vec{p}_{\mathrm{s}_{k'}}) } \label{eq:fk} \end{equation} -By combining equation \ref{eq:dataset-weight-factor-single-source} with $\xs \equiv \xsk$ -and $\ps \equiv \psk$, and equation \ref{eq:fk}, the expression for +By combining equation \ref{eq:dataset-weight-factor-single-source} with +$\ps \equiv \psk$, and equation \ref{eq:fk}, the expression for $f_{j}(\ps)$ for multiple sources is given by: \begin{equation} f_{j}(\ps) = \sum_{k=1}^{K} - \frac{\left(\sum_{j'=1}^{J} \mathcal{Y}_{\mathrm{s}_{j',k}}(\xsk,\psk)\right) \mathcal{Y}_{\mathrm{s}_{j,k}}(\xsk,\psk)} - {\left(\sum_{\kappa=1}^{K} \sum_{j'=1}^{J} \mathcal{Y}_{\mathrm{s}_{j',\kappa}}(\vec{x}_{\mathrm{s}_\kappa},\vec{p}_{\mathrm{s}_\kappa})\right) \left( \sum_{j'=1}^{J} \mathcal{Y}_{\mathrm{s}_{j',k}}(\xsk,\psk) \right)} + \frac{\left(\sum_{j'=1}^{J} \mathcal{Y}_{\mathrm{s}_{j',k}}(\psk)\right) \mathcal{Y}_{\mathrm{s}_{j,k}}(\psk)} + {\left(\sum_{k'=1}^{K} \sum_{j'=1}^{J} \mathcal{Y}_{\mathrm{s}_{j',k'}}(\vec{p}_{\mathrm{s}_{k'}})\right) \left( \sum_{j'=1}^{J} \mathcal{Y}_{\mathrm{s}_{j',k}}(\psk) \right)} \end{equation} The sum over the datasets of the detector signal yield for source $k$ cancels out leaving the simplified equation \begin{equation} - f_{j}(\ps) = \frac{\sum_{k=1}^{K} \mathcal{Y}_{\mathrm{s}_{j,k}}(\xsk,\psk)} - {\sum_{k=1}^{K} \sum_{j'=1}^{J} \mathcal{Y}_{\mathrm{s}_{j',k}}(\xsk,\psk)}. + f_{j}(\ps) = \frac{\sum_{k=1}^{K} \mathcal{Y}_{\mathrm{s}_{j,k}}(\psk)} + {\sum_{k=1}^{K} \sum_{j'=1}^{J} \mathcal{Y}_{\mathrm{s}_{j',k}}(\psk)}. \label{eq:dataset-weight-factor-multiple-sources} \end{equation} +Equation \ref{eq:dataset-weight-factor-multiple-sources} assumes an equal +strength weighting of all sources, \emph{i.e.} $W_k = 1 \forall k$. In the case +of non-equal source strength weighting, the detector signal yield +$\mathcal{Y}_{j,k}$ needs to be multiplied with the source strength weight +$W_k$: +\begin{equation} + f_{j}(\ps) = \frac{\sum_{k=1}^{K} W_k \mathcal{Y}_{\mathrm{s}_{j,k}}(\psk)} + {\sum_{j'=1}^{J} \sum_{k=1}^{K} W_k \mathcal{Y}_{\mathrm{s}_{j',k}}(\psk)}. + \label{eq:dataset-weight-factor-multiple-sources-with-source-strength-weight} +\end{equation} + +Using the definition \ref{eq:SourceWeightCoefficient} for the source-weight +coefficient, \emph{i.e.} +\begin{equation} +a_{j,k}(\psk) \equiv W_k \mathcal{Y}_{\mathrm{s}_{j,k}}(\psk), +\end{equation} +equation +\ref{eq:dataset-weight-factor-multiple-sources-with-source-strength-weight} +reads +\begin{equation} + f_{j}(\ps) = \frac{\sum_{k=1}^{K} a_{j,k}(\psk)} + {\sum_{j'=1}^{J} \sum_{k=1}^{K} a_{j',k}(\psk)}. + \label{eq:dataset-weight-factor-multi-sources-with-source-weight-coefficient} +\end{equation} + \subsection{Gradients of the Multi-Dataset Log-Likelihood Ratio} By using equation (\ref{eq:logLambdaOfLogLambdaj}) for the combined log-likelihood @@ -713,7 +742,7 @@ \subsection{Gradients of the Multi-Dataset Log-Likelihood Ratio} Its second derivative w.r.t. $\ns$ is given by \begin{align} \frac{\mathrm{d}^2\log\Lambda(\ns,\ps)}{\mathrm{d}\ns^2} &= \sum_{j=1}^{J} \frac{\mathrm{d}}{\mathrm{d}\ns}\left( \frac{\mathrm{d}\log\Lambda_j(n_{\mathrm{s}_j},\ps)}{\mathrm{d}n_{\mathrm{s}_j}} \right) \frac{\mathrm{d} n_{\mathrm{s}_j}}{\mathrm{d} \ns}\\ - &= \sum_{j=1}^{J} \frac{\mathrm{d}^2\log\Lambda_j(n_{\mathrm{s}_j},\ps)}{\mathrm{d}n_{\mathrm{s}_j}^2} \left( \frac{\mathrm{d} n_{\mathrm{s}_j}}{\mathrm{d} \ns} \right)^2. +&= \sum_{j=1}^{J} \frac{\mathrm{d}^2\log\Lambda_j(n_{\mathrm{s}_j},\ps)}{\mathrm{d}n_{\mathrm{s}_j}^2} \left( \frac{\mathrm{d} n_{\mathrm{s}_j}}{\mathrm{d} \ns} \right)^2. \end{align} The derivative w.r.t. a single source fit parameter, $p_{\mathrm{s}}$, consists @@ -730,40 +759,45 @@ \subsection{Gradients of the Multi-Dataset Log-Likelihood Ratio} weight factor, where $f_j(\ps)$ is given by equation (\ref{eq:dataset-weight-factor-single-source}), reads via the quotient rule of differentiation: \begin{equation} -\frac{\mathrm{d}f_j(\ps)}{\mathrm{d}p_{\mathrm{s}}} = \frac{\frac{\mathrm{d}\mathcal{Y}_{\mathrm{s}_j}(\xs,\ps)}{\mathrm{d}p_{\mathrm{s}}} \sum_{j'=1}^{J}\mathcal{Y}_{\mathrm{s}_{j'}}(\xs,\ps) - \mathcal{Y}_{\mathrm{s}_j}(\xs,\ps) \sum_{j'=1}^{J} \frac{\mathrm{d}\mathcal{Y}_{\mathrm{s}_{j'}}(\xs,\ps)}{\mathrm{d}p_{\mathrm{s}}} }{\left( \sum_{j'=1}^{J} \mathcal{Y}_{\mathrm{s}_{j'}}(\xs,\ps) \right)^2}. +\frac{\mathrm{d}f_j(\ps)}{\mathrm{d}p_{\mathrm{s}}} = \frac{\frac{\mathrm{d}\mathcal{Y}_{\mathrm{s}_j}(\ps)}{\mathrm{d}p_{\mathrm{s}}} \sum_{j'=1}^{J}\mathcal{Y}_{\mathrm{s}_{j'}}(\ps) - \mathcal{Y}_{\mathrm{s}_j}(\ps) \sum_{j'=1}^{J} \frac{\mathrm{d}\mathcal{Y}_{\mathrm{s}_{j'}}(\ps)}{\mathrm{d}p_{\mathrm{s}}} }{\left( \sum_{j'=1}^{J} \mathcal{Y}_{\mathrm{s}_{j'}}(\ps) \right)^2}. \end{equation} In case of multiple sources (stacking), the expression for the derivative of the dataset weight factor, where $f_j(\ps)$ is given by equation -(\ref{eq:dataset-weight-factor-multiple-sources}) reads via the quotient rule of -differentiation: +(\ref{eq:dataset-weight-factor-multi-sources-with-source-weight-coefficient}) +reads via the quotient rule of differentiation: \begin{equation} \frac{\mathrm{d} f_j(\ps)}{\mathrm{d}p_{\mathrm{s}}} = - \frac{\left(\sum_{k=1}^{K} \frac{\mathrm{d}\mathcal{Y}_{\mathrm{s}_{j,k}}}{\mathrm{d}p_{\mathrm{s}}}\right) \left(\sum_{k=1}^{K}\sum_{j'=1}^{J}\mathcal{Y}_{\mathrm{s}_{j',k}}\right) - \left(\sum_{k=1}^{K}\mathcal{Y}_{\mathrm{s}_{j,k}}\right)\left(\sum_{k=1}^{K}\sum_{j'=1}^{J}\frac{\mathrm{d}\mathcal{Y}_{\mathrm{s}_{j',k}}}{\mathrm{d}p_{\mathrm{s}}}\right)} - {\left(\sum_{k=1}^{K}\sum_{j'=1}^{J}\mathcal{Y}_{\mathrm{s}_{j',k}} \right)^2} + \frac{\left(\sum_{k=1}^{K} \frac{\mathrm{d}a_{j,k}}{\mathrm{d}p_{\mathrm{s}}}\right) \left(\sum_{k=1}^{K}\sum_{j'=1}^{J}a_{j',k}\right) - \left(\sum_{k=1}^{K} a_{j,k}\right)\left(\sum_{k=1}^{K}\sum_{j'=1}^{J}\frac{\mathrm{d}a_{j',k}}{\mathrm{d}p_{\mathrm{s}}}\right)} + {\left(\sum_{k=1}^{K}\sum_{j'=1}^{J}a_{j',k} \right)^2} \end{equation} \section{Detector Signal Yield} \label{sec:detsigyield} -The detector signal yield, $\mathcal{Y}_{\mathrm{s}_{j,k}}(\xsk,\psk)$, +The detector signal yield, $\mathcal{Y}_{\mathrm{s}_{j,k}}(\psk)$, is the mean number of expected signal events in the detector from a given source $k$ in a given data sample $j$. For a differential source flux, $\mathrm{d}\Phi_{\mathrm{s}}/(\mathrm{d}A\mathrm{d}\Omega\mathrm{d}E\mathrm{d}t)$, it is the integral of the product of the detector effective area and this differential flux over the solid-angle, energy, and time of the source: \begin{equation} - \mathcal{Y}_{\mathrm{s}_{j,k}}(\xsk,\psk) \equiv \int_{\Omega_{\mathrm{s}_k}} \mathrm{d}\Omega \int_0^\infty \mathrm{d}E \int_{t_{\mathrm{start}_j}}^{t_{\mathrm{end}_j}}\mathrm{d}t A_{\mathrm{eff}_j}(E,t|\xsk) \frac{\mathrm{d}\Phi_{\mathrm{s}}(E,t|\psk)}{\mathrm{d}A\mathrm{d}\Omega\mathrm{d}E\mathrm{d}t} + \mathcal{Y}_{\mathrm{s}_{j,k}}(\psk) \equiv \int_{\Omega_{\mathrm{s}_k}} \mathrm{d}\Omega \int_0^\infty \mathrm{d}E \int_{t_{\mathrm{start}_j}}^{t_{\mathrm{end}_j}}\mathrm{d}t A_{\mathrm{eff}_j}(E,t|\psk) \frac{\mathrm{d}\Phi_{\mathrm{s}}(E,t|\psk)}{\mathrm{d}A\mathrm{d}\Omega\mathrm{d}E\mathrm{d}t} \label{eq:Ysj} \end{equation} -In the most-general case, the source position $\xs$ consists of three -quantities: right-ascension, declination, and observation time, i.e. -$\xs = (\alpha_{\mathrm{s}},\delta_{\mathrm{s}},t_{\mathrm{obs}})$. -The time-dependent effective area $A_{\mathrm{eff}_j}(E,t|\xsk)$ must account +In the most-general case, the source position $\xs$ consists of celestrial +coordinates right-ascension and declination, which might be a function of the +observation time $t_{\mathrm{obs}}$ for very long data taking periods in the +order of several tens of years, i.e. due to the movement of the solar system +w.r.t. the source: +$\xs = \{\alpha_{\mathrm{s}}(t_{\mathrm{obs}}),\delta_{\mathrm{s}}(t_{\mathrm{obs}})\}$. + + +The time-dependent effective area $A_{\mathrm{eff}_j}(E,t|\psk)$ must account for the detector off-time intervals within the data sample $j$. In cases, where the effective area is constant within a data sample, it can be written as \begin{equation} - A_{\mathrm{eff}_j}(E,t|\xsk) = A_{\mathrm{eff}_j}(E|\xsk) T_{\mathrm{live}}(t) + A_{\mathrm{eff}_j}(E,t|\psk) \equiv A_{\mathrm{eff}_j}(E|\psk) T_{\mathrm{live}}(t) \end{equation} with $T_{\mathrm{live}}(t)$ is the detector live-time function as given by equation (\ref{eq:Tlive}). @@ -773,9 +807,9 @@ \subsection{Effective Area} In SkyLLH the effective area $A_{\mathrm{eff},j}$ of a data sample $j$ is not calculated separately in order to avoid binning effects. However, the effective area can be calculated using the Monte-Carlo weights, \code{mcweight}\footnote{In IceCube -known as ``OneWeight'', but which already includes the number of used MC files.}, +known as ``OneWeight'', which already includes the number of used MC files.}, of the simulation events. -The Monte-Carlo weights have the unit GeV~cm$^2$~sr. +In IceCube simulation the Monte-Carlo weights have the unit GeV~cm$^2$~sr. Using the Monte-Carlo weight, $w_{m,j}$, of the $m$th event of data sample $j$, that corresponds to a signal event, i.e. an event that has similar properties as a signal event (\emph{e.g.} same true direction), the effective area is given by @@ -786,99 +820,30 @@ \subsection{Effective Area} \end{equation} -\subsection{The DetSigYield Class} - -\class{DetSigYield} provides an abstract base class for a detector signal yield -class to compute the integral given in equation \eq{eq:Ysj}. The detector signal -yield depends on the flux model and its source parameters, which might -change during the likelihood maximization process. It is also dependent on the -detector effective area, hence is detector dependent. Thus, -\class{DetSigYield} must be provided with a detector signal -yield implementation method derived from the \class{DetSigYieldImplMethod} -class. - -Detector signal yield values can be retrieved via the call operator -\code{\_\_call\_\_(src, src\_flux\_params)}, which takes the celestial source -position(s), and the additional source flux parameters as arguments. - -\subsubsection{The DetSigYieldImplMethod Class} - -\class{DetSigYieldImplMethod} is an abstract base class and defines the interface -between the detector signal yield implementation method and the -\class{DetSigYield} class. - -% List of detector signal yield implementation methods. -Table \ref{tbl:I3DetSigYieldImplMethod} lists all available IceCube specific -detector signal yield implementation methods and their description. -\begin{table} -\caption{IceCube specific detector signal yield implementation methods.} -\label{tbl:I3DetSigYieldImplMethod} - -\begin{tabular}{p{.95\textwidth}} -\hline -Name of Class \& Description \\ -\hline -\textbf{FixedFluxPointLikeSourceI3DetSigYieldImplMethod} - -IceCube detector signal yield - implementation method for a fixed flux model and a point-like source. - The flux model might contain flux parameters, but these are not fit in the - likelihood maximization process. - This implementation assumes that the detector effective area depends solely - on the declination of the source. This method creates - a spline function of given order for the logarithmic values of the - $\sin(\delta)$-dependent detector signal yield. - - The constructor of this implementation method requires a $\sin(\delta)$ - binning definition for the Monte-Carlo events and the order of the spline - function. - - This implementation method create a detector signal yield instance of class - \class{FixedFluxPointLikeSourceI3DetSigYield}. -\\ -\hline -\textbf{PowerLawFluxPointLikeSourceI3DetSigYieldImplMethod} - -IceCube detector signal - yield implementation method for a - power law flux model, implemented by the \class{PowerLawFlux} class, an a - point-like source. - This method creates a 2D spline function of given orders for the logarithmic - values of the $\sin(\delta)$-dependent detector signal yield for a - range of $\gamma$ values. This implementation method supports - multi-processing. -\end{tabular} -\end{table} - \section{The Concept of Source Hypothesis Groups} The analyses in SkyLLH rely heavily on the calculation of detector signal -efficiencies. As seen in section \ref{sec:detsigyield}, the detector signal +yields. As seen in section \ref{sec:detsigyield}, the detector signal efficiency depends on the source hypothesis (spatial model and flux model) and the detector response (dataset). Hence, the analyses require detector signal -efficiency instances for each combination of source and dataset. +yield instances for each combination of source and dataset. However, the sources might be of the same kind, i.e. having the same spatial -model and the same flux model. For such sources detector signal efficiency +model and the same flux model. For such sources detector signal yield instances are needed only for each dataset. Thus, we define a group -of sources with the same spatial model and flux model as a \emph{source hypothesis group}, -$G_{\mathrm{s}}$. +of sources with the same spatial model and flux model as a +\emph{source hypothesis group}. -A source hypothesis group has a list of spatial source models, e.g. point-like source -locations in case of point-like sources, a flux model, and a detection signal -efficiency implementation method assigned. +A source hypothesis group has a list of spatial source models, e.g. point-like +sources, a flux model, and a detector signal yield builder assigned. SkyLLH provides the \class{SourceHypoGroupManager} class to define the groups of source hypotheses. \section{Implemented Log-Likelihood Models} -This section describes the implemented log-likelihood models. \cite{TimeDepPSSearchMethods2010} - -% \subsection{Time Dependent Point-Source Flare} -% -% The \class{TimeDepPSFlareLHModel} class provides the likelihood model for searching for a point source with unknown time-dependence. -% The search is based on the formulism described in \cite{TimeDepPSSearchMethods2010}. -% -% The model utilizes a two-component likelihood function with signal and background events. +This section describes the implemented log-likelihood models. +\cite{TimeDepPSSearchMethods2010} + + \appendix diff --git a/examples/multiproc.py b/examples/multiproc.py index afd5a8e026..367b0f425d 100644 --- a/examples/multiproc.py +++ b/examples/multiproc.py @@ -10,5 +10,13 @@ def f(x, c=0.): return x**2 + c -res = parallelize(f, [((x,),{'c':x}) for x in np.arange(1, 10, 1)], ncpu=3) -print(res) + +if __name__ == '__main__': + res = parallelize( + func=f, + args_list=[ + ((x,), {'c': x}) + for x in np.arange(1, 10, 1) + ], + ncpu=3) + print(res) diff --git a/examples/scrambling.py b/examples/scrambling.py index 2ef72ac7ed..69b0fc1488 100644 --- a/examples/scrambling.py +++ b/examples/scrambling.py @@ -1,11 +1,21 @@ -# Example how to use the data scrambling mechanism of skyllh. +# -*- coding: utf-8 -*- + +""" +Example how to use the data scrambling mechanism of skyllh. +""" import numpy as np -from skyllh.core.random import RandomStateService -from skyllh.core.scrambling import DataScrambler, RAScramblingMethod +from skyllh.core.random import ( + RandomStateService, +) +from skyllh.core.scrambling import ( + DataScrambler, + UniformRAScramblingMethod, +) + -def gen_data(rss, N=100, window=(0,365)): +def gen_data(rss, N=100, window=(0, 365)): """Create uniformly distributed data on sphere. """ arr = np.empty((N,), dtype=[("ra", np.float64), ("dec", np.float64)]) @@ -14,15 +24,19 @@ def gen_data(rss, N=100, window=(0,365)): return arr -rss = RandomStateService(seed=1) -# Generate some psydo data. -data = gen_data(rss, N=10) -print(data['ra']) +if __name__ == '__main__': + rss = RandomStateService(seed=1) + + # Generate some psydo data. + data = gen_data(rss, N=10) + print(data['ra']) -# Create DataScrambler instance with RA scrambling. -scr = DataScrambler(method=RAScramblingMethod(rss=rss)) + # Create DataScrambler instance with uniform RA scrambling. + scr = DataScrambler(method=UniformRAScramblingMethod()) -# Scramble the data. -scr.scramble_data(data) -print(data['ra']) + # Scramble the data. + scr.scramble_data( + rss=rss, + data=data) + print(data['ra']) diff --git a/requirements.txt b/requirements.txt index 9fd02bc0d7..386f5a54a9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ astropy numpy +pyarrow scipy iminuit -matplotlib \ No newline at end of file +matplotlib +tqdm diff --git a/skyllh/__init__.py b/skyllh/__init__.py index c32ef5e0db..55abd31aff 100644 --- a/skyllh/__init__.py +++ b/skyllh/__init__.py @@ -1,15 +1,19 @@ # -*- coding: utf-8 -*- +import logging +import multiprocessing as mp + +from . import _version + # Initialize top-level logger with a do-nothing NullHandler. It is required to # be able to log messages when user has not set up any handler for the logger. -import logging logging.getLogger(__name__).addHandler(logging.NullHandler()) # Change macOS default multiprocessing start method 'spawn' to 'fork'. -import multiprocessing as mp + try: mp.set_start_method("fork") -except: +except Exception: # It could be already set by another package. if mp.get_start_method() != "fork": logging.warning( @@ -17,5 +21,4 @@ "Parallel calculations using 'ncpu' argument != 1 may break." ) -from . import _version __version__ = _version.get_versions()['version'] diff --git a/skyllh/_version.py b/skyllh/_version.py index 702d15f438..1b2459c736 100644 --- a/skyllh/_version.py +++ b/skyllh/_version.py @@ -166,7 +166,7 @@ def git_get_keywords(versionfile_abs): @register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): +def git_versions_from_keywords(keywords, tag_prefix, verbose): # noqa: C901 """Get version information from git keywords.""" if "refnames" not in keywords: raise NotThisMethod("Short version file found") @@ -230,7 +230,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): @register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): +def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): # noqa: C901 """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* @@ -576,7 +576,7 @@ def render_git_describe_long(pieces): return rendered -def render(pieces, style): +def render(pieces, style): # noqa: C901 """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", @@ -612,7 +612,7 @@ def render(pieces, style): "date": pieces.get("date")} -def get_versions(): +def get_versions(): # noqa: C901 """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some diff --git a/skyllh/analyses/i3/publicdata_ps/aeff.py b/skyllh/analyses/i3/publicdata_ps/aeff.py index 798b758983..923fb34502 100644 --- a/skyllh/analyses/i3/publicdata_ps/aeff.py +++ b/skyllh/analyses/i3/publicdata_ps/aeff.py @@ -2,16 +2,21 @@ import numpy as np -from scipy import interpolate -from scipy import integrate +from scipy import ( + integrate, + interpolate, +) +from skyllh.analyses.i3.publicdata_ps.utils import ( + FctSpline2D, +) from skyllh.core.binning import ( - get_bincenters_from_binedges, get_bin_indices_from_lower_and_upper_binedges, + get_bincenters_from_binedges, +) +from skyllh.core.storage import ( + create_FileLoader, ) -from skyllh.core.storage import create_FileLoader - -from skyllh.analyses.i3.publicdata_ps.utils import FctSpline2D def load_effective_area_array(pathfilenames): @@ -58,12 +63,14 @@ def load_effective_area_array(pathfilenames): decnu_binedges_lower = np.unique(data['decnu_min']) decnu_binedges_upper = np.unique(data['decnu_max']) - if(len(log10_enu_binedges_lower) != len(log10_enu_binedges_upper)): - raise ValueError('Cannot extract the log10(E/GeV) binning of the ' + if len(log10_enu_binedges_lower) != len(log10_enu_binedges_upper): + raise ValueError( + 'Cannot extract the log10(E/GeV) binning of the ' 'effective area from data file "{}". The number of lower and upper ' 'bin edges is not equal!'.format(str(loader.pathfilename_list))) - if(len(decnu_binedges_lower) != len(decnu_binedges_upper)): - raise ValueError('Cannot extract the dec_nu binning of the effective ' + if len(decnu_binedges_lower) != len(decnu_binedges_upper): + raise ValueError( + 'Cannot extract the dec_nu binning of the effective ' 'area from data file "{}". The number of lower and upper bin edges ' 'is not equal!'.format(str(loader.pathfilename_list))) @@ -140,7 +147,7 @@ def __init__( # Cut the energies where all effective areas are zero. m = np.sum(self._aeff_decnu_log10enu, axis=0) > 0 - self._aeff_decnu_log10enu = self._aeff_decnu_log10enu[:,m] + self._aeff_decnu_log10enu = self._aeff_decnu_log10enu[:, m] self._log10_enu_binedges_lower = self._log10_enu_binedges_lower[m] self._log10_enu_binedges_upper = self._log10_enu_binedges_upper[m] @@ -174,15 +181,16 @@ def __init__( (self.log10_enu_bincenters >= min_log10enu) & (self.log10_enu_bincenters < max_log10enu) ) - bin_centers = self.log10_enu_bincenters[m] low_bin_edges = self._log10_enu_binedges_lower[m] high_bin_edges = self._log10_enu_binedges_upper[m] # Get the detection probability P(E_nu | sin(dec)) per bin. self.det_prob = self.get_detection_prob_for_decnu( src_dec, - 10**low_bin_edges, 10**high_bin_edges, - 10**low_bin_edges[0], 10**high_bin_edges[-1] + 10**low_bin_edges, + 10**high_bin_edges, + 10**low_bin_edges[0], + 10**high_bin_edges[-1] ) @property @@ -219,6 +227,20 @@ def log10_enu_binedges(self): """ return self._log10_enu_binedges + @property + def log10_enu_binedges_lower(self): + """(read-only) The lower binedges of the log10(E_nu/GeV) neutrino energy + axis. + """ + return self._log10_enu_binedges_lower + + @property + def log10_enu_binedges_upper(self): + """(read-only) The upper binedges of the log10(E_nu/GeV) neutrino energy + axis. + """ + return self._log10_enu_binedges_upper + @property def log10_enu_bincenters(self): """(read-only) The bin center values of the log10(E_nu/GeV) neutrino @@ -358,7 +380,7 @@ def _eval_spl_func(x): limit=200, full_output=1 )[0] - + enu_min = np.atleast_1d(enu_min) enu_max = np.atleast_1d(enu_max) @@ -375,4 +397,3 @@ def _eval_spl_func(x): det_prob[i] = integral / norm return det_prob - diff --git a/skyllh/analyses/i3/publicdata_ps/backgroundpdf.py b/skyllh/analyses/i3/publicdata_ps/backgroundpdf.py index 492914272e..ba09db8a0d 100644 --- a/skyllh/analyses/i3/publicdata_ps/backgroundpdf.py +++ b/skyllh/analyses/i3/publicdata_ps/backgroundpdf.py @@ -2,6 +2,10 @@ import numpy as np +from scipy.stats import ( + gaussian_kde, +) + from skyllh.core.binning import ( BinningDefinition, UsesBinning, @@ -9,26 +13,35 @@ from skyllh.core.pdf import ( EnergyPDF, IsBackgroundPDF, - PDFAxis + PDFAxis, +) +from skyllh.core.py import ( + classname, ) -from skyllh.core.storage import DataFieldRecordArray -from skyllh.core.timing import TaskTimer from skyllh.core.smoothing import ( UNSMOOTH_AXIS, SmoothingFilter, HistSmoothingMethod, NoHistSmoothingMethod, - NeighboringBinHistSmoothingMethod + NeighboringBinHistSmoothingMethod, +) +from skyllh.core.storage import ( + DataFieldRecordArray, +) +from skyllh.core.timing import ( + TaskTimer, ) -from skyllh.core.timing import TaskTimer -from scipy.stats import gaussian_kde +class PDBackgroundI3EnergyPDF( + EnergyPDF, + IsBackgroundPDF, + UsesBinning): + """This is the base class for an IceCube specific energy background PDF for + the public data. -class PDEnergyPDF(EnergyPDF, UsesBinning): - """This is the base class for IceCube specific energy PDF models. - IceCube energy PDFs depend solely on the energy and the - zenith angle, and hence, on the declination of the event. + IceCube energy PDFs depend solely on the energy and the zenith angle, and + hence, on the declination of the event. The IceCube energy PDF is modeled as a 1d histogram in energy, but for different sin(declination) bins, hence, stored as a 2d histogram. @@ -37,29 +50,38 @@ class PDEnergyPDF(EnergyPDF, UsesBinning): _KDE_BW_NORTH = 0.4 _KDE_BW_SOUTH = 0.32 - def __init__(self, data_logE, data_sinDec, data_mcweight, data_physicsweight, - logE_binning, sinDec_binning, smoothing_filter, kde_smoothing=False): - """Creates a new IceCube energy PDF object. + def __init__( + self, + data_logE, + data_sinDec, + data_mcweight, + data_physicsweight, + logE_binning, + sinDec_binning, + smoothing_filter, + kde_smoothing=False, + **kwargs): + """Creates a new IceCube energy PDF object for the public data. Parameters ---------- - data_logE : 1d ndarray - The array holding the log10(E) values of the events. - data_sinDec : 1d ndarray - The array holding the sin(dec) values of the events. - data_mcweight : 1d ndarray - The array holding the monte-carlo weights of the events. + data_logE : instance of ndarray + The 1d ndarray holding the log10(E) values of the events. + data_sinDec : instance of ndarray + The 1d ndarray holding the sin(dec) values of the events. + data_mcweight : instance of ndarray + The 1d ndarray holding the monte-carlo weights of the events. The final data weight will be the product of data_mcweight and data_physicsweight. - data_physicsweight : 1d ndarray - The array holding the physics weights of the events. + data_physicsweight : instance of ndarray + The 1d ndarray holding the physics weights of the events. The final data weight will be the product of data_mcweight and data_physicsweight. - logE_binning : BinningDefinition - The binning definition for the log(E) axis. - sinDec_binning : BinningDefinition + logE_binning : instance of BinningDefinition + The binning definition for the log10(E) axis. + sinDec_binning : instance of BinningDefinition The binning definition for the sin(declination) axis. - smoothing_filter : SmoothingFilter instance | None + smoothing_filter : instance of SmoothingFilter | None The smoothing filter to use for smoothing the energy histogram. If None, no smoothing will be applied. kde_smoothing : bool @@ -68,31 +90,44 @@ def __init__(self, data_logE, data_sinDec, data_mcweight, data_physicsweight, background is not zero when injecting high energy events. Default: False. """ - super(PDEnergyPDF, self).__init__() + super().__init__( + pmm=None, + **kwargs) # Define the PDF axes. - self.add_axis(PDFAxis(name='log_energy', - vmin=logE_binning.lower_edge, - vmax=logE_binning.upper_edge)) - self.add_axis(PDFAxis(name='sin_dec', - vmin=sinDec_binning.lower_edge, - vmax=sinDec_binning.upper_edge)) + self.add_axis( + PDFAxis( + name='log_energy', + vmin=logE_binning.lower_edge, + vmax=logE_binning.upper_edge)) + self.add_axis( + PDFAxis( + name='sin_dec', + vmin=sinDec_binning.lower_edge, + vmax=sinDec_binning.upper_edge)) self.add_binning(logE_binning, 'log_energy') self.add_binning(sinDec_binning, 'sin_dec') # Create the smoothing method instance tailored to the energy PDF. # We will smooth only the first axis (logE). - if((smoothing_filter is not None) and - (not isinstance(smoothing_filter, SmoothingFilter))): + if (smoothing_filter is not None) and\ + (not isinstance(smoothing_filter, SmoothingFilter)): raise TypeError( - 'The smoothing_filter argument must be None or an instance of SmoothingFilter!') - if(smoothing_filter is None): + 'The smoothing_filter argument must be None or an instance of ' + 'SmoothingFilter! ' + f'Its current type is {classname(smoothing_filter)}!') + if smoothing_filter is None: self.hist_smoothing_method = NoHistSmoothingMethod() else: self.hist_smoothing_method = NeighboringBinHistSmoothingMethod( (smoothing_filter.axis_kernel_array, UNSMOOTH_AXIS)) + if not isinstance(kde_smoothing, bool): + raise ValueError( + 'The kde_smoothing argument must be an instance of bool! ' + f'Its current type is {classname(kde_smoothing)}!') + # We have to figure out, which histogram bins are zero due to no # monte-carlo coverage, and which due to zero physics model # contribution. @@ -100,11 +135,16 @@ def __init__(self, data_logE, data_sinDec, data_mcweight, data_physicsweight, # Create a 2D histogram with only the MC events to determine the MC # coverage. (h, bins_logE, bins_sinDec) = np.histogram2d( - data_logE, data_sinDec, + data_logE, + data_sinDec, bins=[ - logE_binning.binedges, sinDec_binning.binedges], + logE_binning.binedges, + sinDec_binning.binedges + ], range=[ - logE_binning.range, sinDec_binning.range], + logE_binning.range, + sinDec_binning.range + ], density=False) h = self._hist_smoothing_method.smooth(h) self._hist_mask_mc_covered = h > 0 @@ -117,40 +157,47 @@ def __init__(self, data_logE, data_sinDec, data_mcweight, data_physicsweight, # contribution. Note: By construction the zero physics contribution bins # are a subset of the MC covered bins. (h, bins_logE, bins_sinDec) = np.histogram2d( - data_logE[mask], data_sinDec[mask], + data_logE[mask], + data_sinDec[mask], bins=[ - logE_binning.binedges, sinDec_binning.binedges], + logE_binning.binedges, + sinDec_binning.binedges + ], range=[ - logE_binning.range, sinDec_binning.range], + logE_binning.range, + sinDec_binning.range + ], density=False) h = self._hist_smoothing_method.smooth(h) self._hist_mask_mc_covered_zero_physics = h > 0 if kde_smoothing: - # If a bandwidth is passed, apply a KDE-based smoothing with the given - # bw parameter as bandwidth for the fit. - if not isinstance(kde_smoothing, bool): - raise ValueError( - "The bandwidth parameter must be True or False!") - kde_pdf = np.empty( - (len(sinDec_binning.bincenters),), dtype=object) + # If a bandwidth is passed, apply a KDE-based smoothing with the + # given bandwidth parameter as bandwidth for the fit. + kde_pdf_list = [] data_logE_masked = data_logE[~mask] data_sinDec_masked = data_sinDec[~mask] - for i in range(len(sinDec_binning.bincenters)): + + for (sindec_lower, sindec_upper) in zip( + sinDec_binning.binedges[:-1], + sinDec_binning.binedges[1:]): + sindec_mask = np.logical_and( - data_sinDec_masked >= sinDec_binning.binedges[i], - data_sinDec_masked < sinDec_binning.binedges[i+1] + data_sinDec_masked >= sindec_lower, + data_sinDec_masked < sindec_upper ) this_energy = data_logE_masked[sindec_mask] - if sinDec_binning.binedges[i] >= 0: - kde_pdf[i] = gaussian_kde( - this_energy, bw_method=self._KDE_BW_NORTH) + if sindec_lower >= 0: + kde = gaussian_kde( + this_energy, + bw_method=self._KDE_BW_NORTH) else: - kde_pdf[i] = gaussian_kde( - this_energy, bw_method=self._KDE_BW_SOUTH) - h = np.vstack( - [kde_pdf[i].evaluate(logE_binning.bincenters) - for i in range(len(sinDec_binning.bincenters))]).T + kde = gaussian_kde( + this_energy, + bw_method=self._KDE_BW_SOUTH) + kde_pdf_list.append(kde.evaluate(logE_binning.bincenters)) + + h = np.vstack(kde_pdf_list).T else: # Create a 2D histogram with only the data which has physics @@ -158,12 +205,17 @@ def __init__(self, data_logE, data_sinDec, data_mcweight, data_physicsweight, # axis manually. data_weights = data_mcweight[~mask] * data_physicsweight[~mask] (h, bins_logE, bins_sinDec) = np.histogram2d( - data_logE[~mask], data_sinDec[~mask], + data_logE[~mask], + data_sinDec[~mask], bins=[ - logE_binning.binedges, sinDec_binning.binedges], + logE_binning.binedges, + sinDec_binning.binedges + ], weights=data_weights, range=[ - logE_binning.range, sinDec_binning.range], + logE_binning.range, + sinDec_binning.range + ], density=False) # Calculate the normalization for each logE bin. Hence we need to sum @@ -179,16 +231,18 @@ def __init__(self, data_logE, data_sinDec, data_mcweight, data_physicsweight, @property def hist_smoothing_method(self): - """The HistSmoothingMethod instance defining the smoothing filter of the - energy PDF histogram. + """The instance of HistSmoothingMethod defining the smoothing filter of + the energy PDF histogram. """ return self._hist_smoothing_method @hist_smoothing_method.setter def hist_smoothing_method(self, method): - if(not isinstance(method, HistSmoothingMethod)): + if not isinstance(method, HistSmoothingMethod): raise TypeError( - 'The hist_smoothing_method property must be an instance of HistSmoothingMethod!') + 'The hist_smoothing_method property must be an instance of ' + 'HistSmoothingMethod! ' + f'Its current type is {classname(method)}!') self._hist_smoothing_method = method @property @@ -218,10 +272,89 @@ def hist_mask_mc_covered_with_physics(self): bins for which there is monte-carlo coverage and has physics contribution. """ - return self._hist_mask_mc_covered & ~self._hist_mask_mc_covered_zero_physics + return ( + self._hist_mask_mc_covered & + ~self._hist_mask_mc_covered_zero_physics) + + def initialize_for_new_trial( + self, + tdm, + tl=None, + **kwargs): + """Pre-compute the probability densitiy values of the trial data, + which has to be done only once for a particular trial data. + """ + + logE_binning = self.get_binning('log_energy') + sinDec_binning = self.get_binning('sin_dec') + + logE_idx = np.digitize( + tdm['log_energy'], logE_binning.binedges) - 1 + sinDec_idx = np.digitize( + tdm['sin_dec'], sinDec_binning.binedges) - 1 + + with TaskTimer(tl, 'Evaluating logE-sinDec histogram.'): + self._pd = self._hist_logE_sinDec[(logE_idx, sinDec_idx)] + + def assert_is_valid_for_trial_data( + self, + tdm, + tl=None): + """Checks if this energy PDF covers the entire value range of the trail + data events. + + Parameters + ---------- + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial data events. + The following data fields need to exist: + + log_energy : float + The base-10 logarithm of the reconstructed energy value. + sin_dec : float + The sine of the declination value of the event. + tl : instance of TimeLord | None + The optional instance of TimeLord to measure timing information. + + Raises + ------ + ValueError + If parts of the trial data is outside the value range of this + PDF. + """ + log10emu = tdm['log_energy'] + log10emu_axis = self.axes['log_energy'] + if np.min(log10emu) < log10emu_axis.vmin: + raise ValueError( + f'The minimum log10emu value {np.min(log10emu):g} of the trial ' + 'data is lower than the minimum value of the PDF ' + f'{log10emu_axis.vmin:g}!') + if np.max(log10emu) > log10emu_axis.vmax: + raise ValueError( + f'The maximum log10emu value {np.max(log10emu):g} of the trial ' + 'data is larger than the maximum value of the PDF ' + f'{log10emu_axis.vmax}:g!') - def get_prob(self, tdm, fitparams=None, tl=None): - """Calculates the energy probability (in logE) of each event. + sindecmu = tdm['sin_dec'] + sindecmu_axis = self.axes['sin_dec'] + if np.min(sindecmu) < sindecmu_axis.vmin: + raise ValueError( + f'The minimum sindecmu value {np.min(sindecmu):g} of the trial ' + 'data is lower than the minimum value of the PDF ' + f'{sindecmu_axis.vmin:g}!') + if np.max(sindecmu) > sindecmu_axis.vmax: + raise ValueError( + f'The maximum sindecmu value {np.max(sindecmu):g} of the trial ' + 'data is larger than the maximum value of the PDF ' + f'{sindecmu_axis.vmax:g}!') + + def get_pd( + self, + tdm, + params_recarray=None, + tl=None): + """Calculates the energy probability density (in 1/log10(E/GeV)) of each + trial data event. Parameters ---------- @@ -230,45 +363,46 @@ def get_prob(self, tdm, fitparams=None, tl=None): probability should be calculated for. The following data fields must exist: - - 'log_energy' : float - The logarithm of the energy value of the event. - - 'sin_dec' : float + log_energy : float + The base-10 logarithm of the energy value of the event. + sin_dec : float The sin(declination) value of the event. - fitparams : None + params_recarray : None Unused interface parameter. - tl : TimeLord instance | None + tl : instance of TimeLord | None The optional TimeLord instance that should be used to measure timing information. Returns ------- - prob : 1D (N_events,) shaped ndarray - The array with the energy probability for each event. + pd : instance of ndarray + The (N_selected_events,)-shaped numpy ndarray holding the energy + probability density value for each trial data event. + grads : dict + The dictionary holding the gradients of the probability density + w.r.t. each global fit parameter. By definition this PDF does not + depend on any fit parameter, hence, this dictionary is empty. """ - get_data = tdm.get_data - - logE_binning = self.get_binning('log_energy') - sinDec_binning = self.get_binning('sin_dec') + grads = dict() - logE_idx = np.digitize( - get_data('log_energy'), logE_binning.binedges) - 1 - sinDec_idx = np.digitize( - get_data('sin_dec'), sinDec_binning.binedges) - 1 + return (self._pd, grads) - with TaskTimer(tl, 'Evaluating logE-sinDec histogram.'): - prob = self._hist_logE_sinDec[(logE_idx, sinDec_idx)] - return prob - - -class PDDataBackgroundI3EnergyPDF(PDEnergyPDF, IsBackgroundPDF): +class PDDataBackgroundI3EnergyPDF( + PDBackgroundI3EnergyPDF): """This is the IceCube energy background PDF, which gets constructed from - experimental data. This class is derived from I3EnergyPDF. + the experimental data of the public data. """ - def __init__(self, data_exp, logE_binning, sinDec_binning, - smoothing_filter=None, kde_smoothing=False): + def __init__( + self, + data_exp, + logE_binning, + sinDec_binning, + smoothing_filter=None, + kde_smoothing=False, + **kwargs): """Constructs a new IceCube energy background PDF from experimental data. @@ -278,23 +412,25 @@ def __init__(self, data_exp, logE_binning, sinDec_binning, The array holding the experimental data. The following data fields must exist: - - 'log_energy' : float - The logarithm of the reconstructed energy value of the data - event. - - 'sin_dec' : float + log_energy : float + The base-10 logarithm of the reconstructed energy value of the + data event. + sin_dec : float The sine of the reconstructed declination of the data event. - logE_binning : BinningDefinition + logE_binning : instance of BinningDefinition The binning definition for the binning in log10(E). - sinDec_binning : BinningDefinition + sinDec_binning : instance of BinningDefinition The binning definition for the sin(declination). - smoothing_filter : SmoothingFilter instance | None + smoothing_filter : instance of SmoothingFilter | None The smoothing filter to use for smoothing the energy histogram. If None, no smoothing will be applied. """ - if(not isinstance(data_exp, DataFieldRecordArray)): - raise TypeError('The data_exp argument must be an instance of ' - 'DataFieldRecordArray!') + if not isinstance(data_exp, DataFieldRecordArray): + raise TypeError( + 'The data_exp argument must be an instance of ' + 'DataFieldRecordArray! ' + f'Its current type is {classname(data_exp)}!') data_logE = data_exp['log_energy'] data_sinDec = data_exp['sin_dec'] @@ -302,34 +438,46 @@ def __init__(self, data_exp, logE_binning, sinDec_binning, data_mcweight = np.ones((len(data_exp),)) data_physicsweight = data_mcweight - # Create the PDF using the base class. - super(PDDataBackgroundI3EnergyPDF, self).__init__( - data_logE, data_sinDec, data_mcweight, data_physicsweight, - logE_binning, sinDec_binning, smoothing_filter, kde_smoothing - ) - - -class PDMCBackgroundI3EnergyPDF(EnergyPDF, IsBackgroundPDF, UsesBinning): + # Create the energy PDF using the base class. + super().__init__( + data_logE=data_logE, + data_sinDec=data_sinDec, + data_mcweight=data_mcweight, + data_physicsweight=data_physicsweight, + logE_binning=logE_binning, + sinDec_binning=sinDec_binning, + smoothing_filter=smoothing_filter, + kde_smoothing=kde_smoothing, + **kwargs) + + +class PDMCBackgroundI3EnergyPDF( + EnergyPDF, + IsBackgroundPDF, + UsesBinning): """This class provides a background energy PDF constructed from the public data and a monte-carlo background flux model. """ def __init__( - self, pdf_log10emu_sindecmu, log10emu_binning, sindecmu_binning, + self, + pdf_log10emu_sindecmu, + log10emu_binning, + sindecmu_binning, **kwargs): """Constructs a new background energy PDF with the given PDF data and binning. Parameters ---------- - pdf_log10emu_sindecmu : 2D numpy ndarray + pdf_log10emu_sindecmu : instance of numpy ndarray The (n_log10emu, n_sindecmu)-shaped 2D numpy ndarray holding the PDF values in unit 1/log10(E_mu/GeV). A copy of this data will be created and held within this class instance. - log10emu_binning : BinningDefinition + log10emu_binning : instance of BinningDefinition The binning definition for the binning in log10(E_mu/GeV). - sindecmu_binning : BinningDefinition + sindecmu_binning : instance of BinningDefinition The binning definition for the binning in sin(dec_mu). """ if not isinstance(pdf_log10emu_sindecmu, np.ndarray): @@ -345,7 +493,9 @@ def __init__( 'The log10emu_binning argument must be an instance of ' 'BinningDefinition!') - super().__init__(**kwargs) + super().__init__( + pmm=None, + **kwargs) self.add_axis(PDFAxis( log10emu_binning.name, @@ -363,19 +513,25 @@ def __init__( self.add_binning(log10emu_binning, name='log_energy') self.add_binning(sindecmu_binning, name='sin_dec') - def assert_is_valid_for_trial_data(self, tdm): - """Checks if this PDF covers the entire value range of the trail + def assert_is_valid_for_trial_data( + self, + tdm, + tl=None): + """Checks if this energy PDF covers the entire value range of the trail data events. Parameters ---------- - tdm : TrialDataManager instance - The TrialDataManager instance holding the data events. + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial data events. The following data fields need to exist: - 'sin_dec' - - 'log_energy' + log_energy : float + The base-10 logarithm of the reconstructed energy value. + sin_dec : float + The sine of the declination value of the event. + tl : instance of TimeLord | None + The optional instance of TimeLord to measure timing information. Raises ------ @@ -383,61 +539,68 @@ def assert_is_valid_for_trial_data(self, tdm): If parts of the trial data is outside the value range of this PDF. """ - sindecmu = tdm.get_data('sin_dec') - if np.min(sindecmu) < self.get_axis(0).vmin: + log10emu = tdm['log_energy'] + log10emu_axis = self.get_axis(0) + if np.min(log10emu) < log10emu_axis.vmin: raise ValueError( - 'The minimum sindecmu value %e of the trial data is lower ' - 'than the minimum value of the PDF %e!' % ( - np.min(sindecmu), self.get_axis(0).vmin)) - if np.max(sindecmu) > self.get_axis(0).vmax: + f'The minimum log10emu value {np.min(log10emu):g} of the trial ' + 'data is lower than the minimum value of the PDF ' + f'{log10emu_axis.vmin:g}!') + if np.max(log10emu) > log10emu_axis.vmax: raise ValueError( - 'The maximum sindecmu value %e of the trial data is larger ' - 'than the maximum value of the PDF %e!' % ( - np.max(sindecmu), self.get_axis(0).vmax)) + f'The maximum log10emu value {np.max(log10emu):g} of the trial ' + 'data is larger than the maximum value of the PDF ' + f'{log10emu_axis.vmax}:g!') - log10emu = tdm.get_data('log_energy') - if np.min(log10emu) < self.get_axis(1).vmin: + sindecmu = tdm['sin_dec'] + sindecmu_axis = self.get_axis(1) + if np.min(sindecmu) < sindecmu_axis.vmin: raise ValueError( - 'The minimum log10emu value %e of the trial data is lower ' - 'than the minimum value of the PDF %e!' % ( - np.min(log10emu), self.get_axis(1).vmin)) - if np.max(log10emu) > self.get_axis(1).vmax: + f'The minimum sindecmu value {np.min(sindecmu):g} of the trial ' + 'data is lower than the minimum value of the PDF ' + f'{sindecmu_axis.vmin:g}!') + if np.max(sindecmu) > sindecmu_axis.vmax: raise ValueError( - 'The maximum log10emu value %e of the trial data is larger ' - 'than the maximum value of the PDF %e!' % ( - np.max(log10emu), self.get_axis(1).vmax)) - - def get_prob(self, tdm, params=None, tl=None): + f'The maximum sindecmu value {np.max(sindecmu):g} of the trial ' + 'data is larger than the maximum value of the PDF ' + f'{sindecmu_axis.vmax:g}!') + + def get_pd( + self, + tdm, + params_recarray=None, + tl=None): """Gets the probability density for the given trial data events. Parameters ---------- - tdm : TrialDataManager instance - The TrialDataManager instance holding the data events. + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial data events. The following data fields need to exist: - 'sin_dec' - - 'log_energy' + log_energy : float + The base-10 logarithm of the reconstructed energy value. + sin_dec : float + The sine of the declination value of the event. - params : dict | None - The dictionary containing the parameter names and values for which - the probability should get calculated. - By definition of this PDF, this is ``Ǹone``, because this PDF does - not depend on any parameters. - tl : TimeLord instance | None - The optional TimeLord instance that should be used to measure + params_recarray : None + Unused interface argument. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to measure timing information. Returns ------- - prob : (N_events,)-shaped numpy ndarray - The 1D numpy ndarray with the probability density for each event. + pd : instance of ndarray + The (N_selected_events,)-shaped numpy ndarray holding the + probability density value for each event. + grads : dict + The dictionary holding the gradients of the probability density + w.r.t. each global fit parameter. By definition this PDF does not + depend on any fit parameter, hence, this dictionary is empty. """ - get_data = tdm.get_data - - log10emu = get_data('log_energy') - sindecmu = get_data('sin_dec') + log10emu = tdm['log_energy'] + sindecmu = tdm['sin_dec'] log10emu_idxs = np.digitize( log10emu, self.get_binning('log_energy').binedges) - 1 @@ -447,4 +610,6 @@ def get_prob(self, tdm, params=None, tl=None): with TaskTimer(tl, 'Evaluating sindecmu-log10emu PDF.'): pd = self._hist_logE_sinDec[(log10emu_idxs, sindecmu_idxs)] - return pd + grads = dict() + + return (pd, grads) diff --git a/skyllh/analyses/i3/publicdata_ps/bkg_flux.py b/skyllh/analyses/i3/publicdata_ps/bkg_flux.py index 731489f367..96e617e847 100644 --- a/skyllh/analyses/i3/publicdata_ps/bkg_flux.py +++ b/skyllh/analyses/i3/publicdata_ps/bkg_flux.py @@ -3,8 +3,13 @@ import numpy as np import pickle -from skyllh.physics.flux import PowerLawFlux -from skyllh.core.binning import get_bincenters_from_binedges +from skyllh.core.binning import ( + get_bincenters_from_binedges, +) +from skyllh.core.flux_model import ( + PowerLawEnergyFluxProfile, + SteadyPointlikeFFM, +) def get_dOmega(dec_min, dec_max): @@ -88,11 +93,11 @@ def get_flux_atmo_decnu_log10enu(flux_pathfilename, log10_enu_max=9): zero_zen_idx = np.digitize(0, zenith_angle_binedges) - 1 for (decnu_idx, decnu) in enumerate(decnu_angles): if decnu < 0: - fl = flux_def['numu_total'][:,decnu_idx][m_e_grid] + fl = flux_def['numu_total'][:, decnu_idx][m_e_grid] else: # For up-going we use the flux calculation from the streight # downgoing. - fl = flux_def['numu_total'][:,zero_zen_idx][m_e_grid] + fl = flux_def['numu_total'][:, zero_zen_idx][m_e_grid] f_atmo[decnu_idx] = fl return (f_atmo, decnu_binedges, log10_enu_binedges) @@ -122,14 +127,18 @@ def get_flux_astro_decnu_log10enu(decnu_binedges, log10_enu_binedges): ---------- [1] https://arxiv.org/pdf/2111.10299.pdf """ - fluxmodel = PowerLawFlux(Phi0=1.44e-18, E0=100e3, gamma=2.37) + fluxmodel = SteadyPointlikeFFM( + Phi0=1.44e-18, + energy_profile=PowerLawEnergyFluxProfile( + E0=100e3, + gamma=2.37)) n_decnu = len(decnu_binedges) - 1 enu_binedges = np.power(10, log10_enu_binedges) enu_bincenters = get_bincenters_from_binedges(enu_binedges) - fl = fluxmodel(enu_bincenters) + fl = fluxmodel(E=enu_bincenters).squeeze() f_astro = np.tile(fl, (n_decnu, 1)) return f_astro @@ -158,13 +167,14 @@ def convert_flux_bkg_to_pdf_bkg(f_bkg, decnu_binedges, log10_enu_binedges): d_decnu = np.diff(decnu_binedges) d_log10_enu = np.diff(log10_enu_binedges) - bin_area = d_decnu[:,np.newaxis] * d_log10_enu[np.newaxis,:] + bin_area = d_decnu[:, np.newaxis] * d_log10_enu[np.newaxis, :] p_bkg = f_bkg / np.sum(f_bkg*bin_area) # Cross-check the normalization of the PDF. if not np.isclose(np.sum(p_bkg*bin_area), 1): raise ValueError( - 'The background PDF is not normalized! The integral is %f!'%(np.sum(p_bkg*bin_area))) + 'The background PDF is not normalized! The integral is ' + f'{np.sum(p_bkg*bin_area)}!') return p_bkg @@ -216,20 +226,21 @@ def get_pd_atmo_decnu_Enu(flux_pathfilename, log10_true_e_max=9): pd_atmo = np.zeros((n_decnu, n_e_grid)) for (decnu_idx, decnu) in enumerate(decnu_angles): if decnu < 0: - fl = flux_def['numu_total'][:,decnu_idx][m_e_grid] + fl = flux_def['numu_total'][:, decnu_idx][m_e_grid] else: # For up-going we use the flux calculation from the streight # downgoing. - fl = flux_def['numu_total'][:,0][m_e_grid] + fl = flux_def['numu_total'][:, 0][m_e_grid] pd_atmo[decnu_idx] = fl # Normalize the PDF. - bin_area = d_decnu[:,np.newaxis] * np.diff(log10_e_grid_edges)[np.newaxis,:] + bin_area = d_decnu[:, np.newaxis] * np.diff(log10_e_grid_edges)[np.newaxis, :] pd_atmo /= np.sum(pd_atmo*bin_area) # Cross-check the normalization of the PDF. if not np.isclose(np.sum(pd_atmo*bin_area), 1): raise ValueError( - 'The atmospheric true energy PDF is not normalized! The integral is %f!'%(np.sum(pd_atmo*bin_area))) + 'The atmospheric true energy PDF is not normalized! The integral ' + f'is {np.sum(pd_atmo*bin_area)}!') return (pd_atmo, decnu_binedges, log10_e_grid_edges) @@ -283,15 +294,15 @@ def get_pd_atmo_E_nu_sin_dec_nu(flux_pathfilename): pd_atmo = np.zeros((n_sin_dec, n_e_grid)) for (sin_dec_idx, sin_dec) in enumerate(sin_dec_angles): if sin_dec < 0: - fl = flux_def['numu_total'][:,sin_dec_idx][m_e_grid] + fl = flux_def['numu_total'][:, sin_dec_idx][m_e_grid] else: # For up-going we use the flux calculation from the streight # downgoing. - fl = flux_def['numu_total'][:,0][m_e_grid] + fl = flux_def['numu_total'][:, 0][m_e_grid] pd_atmo[sin_dec_idx] = fl/np.sum(fl*dE) # Cross-check the normalization of the PDF. - if not np.all(np.isclose(np.sum(pd_atmo*dE[np.newaxis,:], axis=1), 1)): + if not np.all(np.isclose(np.sum(pd_atmo*dE[np.newaxis, :], axis=1), 1)): raise ValueError( 'The atmospheric true energy PDF is not normalized!') @@ -321,22 +332,25 @@ def get_pd_astro_E_nu_sin_dec_nu(sin_dec_binedges, log10_e_grid_edges): ---------- [1] https://arxiv.org/pdf/2111.10299.pdf """ - fluxmodel = PowerLawFlux(Phi0=1.44e-18, E0=100e3, gamma=2.37) + fluxmodel = SteadyPointlikeFFM( + Phi0=1.44e-18, + energy_profile=PowerLawEnergyFluxProfile( + E0=100e3, + gamma=2.37)) n_sin_dec = len(sin_dec_binedges) - 1 - n_e_grid = len(log10_e_grid_edges) - 1 e_grid_edges = 10**log10_e_grid_edges e_grid_bc = 0.5*(e_grid_edges[:-1] + e_grid_edges[1:]) dE = np.diff(e_grid_edges) - fl = fluxmodel(e_grid_bc) + fl = fluxmodel(E=e_grid_bc).squeeze() pd = fl / np.sum(fl*dE) pd_astro = np.tile(pd, (n_sin_dec, 1)) # Cross-check the normalization of the PDF. - if not np.all(np.isclose(np.sum(pd_astro*dE[np.newaxis,:], axis=1), 1)): + if not np.all(np.isclose(np.sum(pd_astro*dE[np.newaxis, :], axis=1), 1)): raise ValueError( 'The astrophysical energy PDF is not normalized!') @@ -369,13 +383,11 @@ def get_pd_bkg_E_nu_sin_dec_nu(pd_atmo, pd_astro, log10_e_grid_edges): dE = np.diff(10**log10_e_grid_edges) - s = np.sum(pd_bkg*dE[np.newaxis,:], axis=1, keepdims=True) + s = np.sum(pd_bkg*dE[np.newaxis, :], axis=1, keepdims=True) pd_bkg /= s - if not np.all(np.isclose(np.sum(pd_bkg*dE[np.newaxis,:], axis=1), 1)): + if not np.all(np.isclose(np.sum(pd_bkg*dE[np.newaxis, :], axis=1), 1)): raise ValueError( 'The background energy PDF is not normalized!') return pd_bkg - - diff --git a/skyllh/analyses/i3/publicdata_ps/detsigyield.py b/skyllh/analyses/i3/publicdata_ps/detsigyield.py index ad138b75c2..4ef2adc7d6 100644 --- a/skyllh/analyses/i3/publicdata_ps/detsigyield.py +++ b/skyllh/analyses/i3/publicdata_ps/detsigyield.py @@ -1,46 +1,48 @@ # -*- coding: utf-8 -*- +from astropy import units import numpy as np import scipy.interpolate -from skyllh.core import multiproc -from skyllh.core.binning import BinningDefinition -from skyllh.core.dataset import ( - Dataset, - DatasetData +from skyllh.analyses.i3.publicdata_ps.aeff import ( + load_effective_area_array, ) -from skyllh.core.livetime import Livetime -from skyllh.core.parameters import ParameterGrid -from skyllh.core.detsigyield import ( - get_integrated_livetime_in_days +from skyllh.core import ( + multiproc, ) -from skyllh.physics.flux import ( - PowerLawFlux, - get_conversion_factor_to_internal_flux_unit +from skyllh.core.binning import ( + BinningDefinition, ) -from skyllh.i3.detsigyield import ( - PowerLawFluxPointLikeSourceI3DetSigYieldImplMethod, - PowerLawFluxPointLikeSourceI3DetSigYield +from skyllh.core.config import ( + to_internal_time_unit, ) -from skyllh.analyses.i3.publicdata_ps.aeff import ( - load_effective_area_array +from skyllh.core.flux_model import ( + FactorizedFluxModel, +) +from skyllh.core.livetime import ( + Livetime, +) +from skyllh.core.py import ( + classname, + issequence, +) +from skyllh.i3.detsigyield import ( + SingleParamFluxPointLikeSourceI3DetSigYieldBuilder, + SingleParamFluxPointLikeSourceI3DetSigYield, ) -class PublicDataPowerLawFluxPointLikeSourceI3DetSigYieldImplMethod( - PowerLawFluxPointLikeSourceI3DetSigYieldImplMethod, - multiproc.IsParallelizable): - """This detector signal yield constructor class constructs a - detector signal yield instance for a variable power law flux model, which - has the spectral index gamma as fit parameter, assuming a point-like source. - It constructs a two-dimensional spline function in sin(dec) and gamma, using - a :class:`scipy.interpolate.RectBivariateSpline`. Hence, the detector signal - yield can vary with the declination and the spectral index, gamma, of the - source. - - This detector signal yield implementation method works with a - PowerLawFlux flux model. +class PDSingleParamFluxPointLikeSourceI3DetSigYieldBuilder( + SingleParamFluxPointLikeSourceI3DetSigYieldBuilder, +): + """This detector signal yield builder class constructs a + detector signal yield instance for a variable flux model of a single + parameter, assuming a point-like source. + It constructs a two-dimensional spline function in sin(dec) and the + parameter, using a :class:`scipy.interpolate.RectBivariateSpline`. + Hence, the detector signal yield can vary with the declination and the + parameter of the flux model. It is tailored to the IceCube detector at the South Pole, where the effective area depends solely on the zenith angle, and hence on the @@ -51,86 +53,109 @@ class PublicDataPowerLawFluxPointLikeSourceI3DetSigYieldImplMethod( """ def __init__( - self, gamma_grid, spline_order_sinDec=2, spline_order_gamma=2, - ncpu=None): - """Creates a new IceCube detector signal yield constructor instance for - a power law flux model. It requires the effective area from the public - data, and a gamma parameter grid to compute the gamma dependency of the - detector signal yield. + self, + param_grid, + spline_order_sinDec=2, + spline_order_param=2, + ncpu=None, + **kwargs, + ): + """Creates a new IceCube detector signal yield builder instance for + a flux model with a single parameter. + It requires the effective area from the public data, and a parameter + grid to compute the parameter dependency of the detector signal yield. Parameters ---------- - gamma_grid : ParameterGrid instance - The ParameterGrid instance which defines the grid of gamma values. + param_grid : instance of ParameterGrid + The instance of ParameterGrid which defines the grid of parameter + values. spline_order_sinDec : int The order of the spline function for the logarithmic values of the detector signal yield along the sin(dec) axis. The default is 2. - spline_order_gamma : int + spline_order_param : int The order of the spline function for the logarithmic values of the - detector signal yield along the gamma axis. + detector signal yield along the parameter axis. The default is 2. ncpu : int | None - The number of CPUs to utilize. Global setting will take place if - not specified, i.e. set to None. + The number of CPUs to utilize. If set to ``None``, global setting + will take place. """ super().__init__( - gamma_grid=gamma_grid, + param_grid=param_grid, sin_dec_binning=None, spline_order_sinDec=spline_order_sinDec, - spline_order_gamma=spline_order_gamma, - ncpu=ncpu) + spline_order_param=spline_order_param, + ncpu=ncpu, + **kwargs) + + def assert_types_of_construct_detsigyield_arguments( + self, + shgs, + **kwargs): + """Checks the correct types of the arguments for the + ``construct_detsigyield`` method. + """ + super().assert_types_of_construct_detsigyield_arguments( + shgs=shgs, + **kwargs) + + if not issequence(shgs): + shgs = [shgs] + for shg in shgs: + if not isinstance(shg.fluxmodel, FactorizedFluxModel): + raise TypeError( + 'The fluxmodel of the source hypothesis group must be an ' + 'instance of FactorizedFluxModel! ' + f'Its current type is {classname(shg.fluxmodel)}!') def construct_detsigyield( - self, dataset, data, fluxmodel, livetime, ppbar=None): + self, + dataset, + data, + shg, + ppbar=None): """Constructs a detector signal yield 2-dimensional log spline - function for the given power law flux model with varying gamma values. + function for the given flux model with varying parameter values. Parameters ---------- - dataset : Dataset instance + dataset : instance of Dataset The Dataset instance holding the sin(dec) binning definition. - data : DatasetData instance - The DatasetData instance holding the monte-carlo event data. + data : instance of DatasetData + The instance of DatasetData holding the monte-carlo event data. This implementation loads the effective area from the provided public data and hence does not need monte-carlo data. - fluxmodel : FluxModel - The flux model instance. Must be an instance of PowerLawFlux. - livetime : float | Livetime instance - The live-time in days or an instance of Livetime to use for the - detector signal yield. + shg : instance of SourceHypoGroup + The instance of SourceHypoGroup (i.e. sources and flux model) for + which the detector signal yield should get constructed. ppbar : ProgressBar instance | None The instance of ProgressBar of the optional parent progress bar. Returns ------- - detsigyield : PowerLawFluxPointLikeSourceI3DetSigYield instance - The DetSigYield instance for a point-like source with a power law - flux with variable gamma parameter. + detsigyield : instance of SingleParamFluxPointLikeSourceI3DetSigYield + The DetSigYield instance for a point-like source with a flux model + of a single parameter. """ - # Check for the correct data types of the input arguments. - if(not isinstance(dataset, Dataset)): - raise TypeError('The dataset argument must be an instance of ' - 'Dataset!') - if(not isinstance(data, DatasetData)): - raise TypeError('The data argument must be an instance of ' - 'DatasetData!') - if(not self.supports_fluxmodel(fluxmodel)): - raise TypeError('The DetSigYieldImplMethod "%s" does not support ' - 'the flux model "%s"!' % ( - self.__class__.__name__, - fluxmodel.__class__.__name__)) - if((not isinstance(livetime, float)) and - (not isinstance(livetime, Livetime))): - raise TypeError('The livetime argument must be an instance of ' - 'float or Livetime!') + self.assert_types_of_construct_detsigyield_arguments( + dataset=dataset, + data=data, + shgs=shg, + ppbar=ppbar, + ) # Get integrated live-time in days. - livetime_days = get_integrated_livetime_in_days(livetime) + livetime_days = Livetime.get_integrated_livetime(data.livetime) + + to_internal_time_unit_factor = to_internal_time_unit( + time_unit=units.day + ) # Calculate conversion factor from the flux model unit into the internal # flux unit GeV^-1 cm^-2 s^-1. - toGeVcm2s = get_conversion_factor_to_internal_flux_unit(fluxmodel) + to_internal_flux_unit_factor = shg.fluxmodel.get_conversion_factor_to_internal_flux_unit() # Load the effective area data from the public dataset. aeff_fnames = dataset.get_abs_pathfilename_list( @@ -144,9 +169,13 @@ def construct_detsigyield( ) = load_effective_area_array(aeff_fnames) # Calculate the detector signal yield in sin_dec vs gamma. - def hist( - energy_bin_edges_lower, energy_bin_edges_upper, - aeff, fluxmodel): + def _create_hist( + energy_bin_edges_lower, + energy_bin_edges_upper, + aeff, + fluxmodel, + to_internal_flux_unit_factor, + ): """Creates a histogram of the detector signal yield for the given sin(dec) binning. @@ -161,14 +190,15 @@ def hist( Returns ------- - h : (n_bins_sin_dec,)-shaped 1d ndarray - The numpy array containing the detector signal yield values for - the different sin_dec bins and the given flux model. + h : instance of ndarray + The (n_bins_sin_dec,)-shaped 1d numpy ndarray containing the + detector signal yield values for the different sin_dec bins and + the given flux model. """ - # Create histogram for the number of neutrinos with each energy - # bin. - h_phi = fluxmodel.get_integral( - energy_bin_edges_lower, energy_bin_edges_upper) + h_phi = fluxmodel.energy_profile.get_integral( + E1=energy_bin_edges_lower, + E2=energy_bin_edges_upper) + h_phi *= to_internal_flux_unit_factor # Sum over the enegry bins for each sin_dec row. h = np.sum(aeff*h_phi, axis=1) @@ -178,37 +208,53 @@ def hist( energy_bin_edges_lower = np.power(10, log_true_e_binedges_lower) energy_bin_edges_upper = np.power(10, log_true_e_binedges_upper) - # Make a copy of the gamma grid and extend the grid by one bin on each - # side. - gamma_grid = self._gamma_grid.copy() - gamma_grid.add_extra_lower_and_upper_bin() + # Make a copy of the parameter grid and extend the grid by one bin on + # each side. + param_grid = self.param_grid.copy() + param_grid.add_extra_lower_and_upper_bin() # Construct the arguments for the hist function to be used in the # multiproc.parallelize function. args_list = [ - ((energy_bin_edges_lower, - energy_bin_edges_upper, - aeff_arr, - fluxmodel.copy({'gamma': gamma})), {}) - for gamma in gamma_grid.grid + ( + ( + energy_bin_edges_lower, + energy_bin_edges_upper, + aeff_arr, + shg.fluxmodel.copy({param_grid.name: param_val}), + to_internal_flux_unit_factor, + ), + {} + ) + for param_val in param_grid.grid ] h = np.vstack( multiproc.parallelize( - hist, args_list, self.ncpu, ppbar=ppbar)).T - h *= toGeVcm2s * livetime_days * 86400. + _create_hist, args_list, self.ncpu, ppbar=ppbar)).T + h *= livetime_days*to_internal_time_unit_factor # Create a 2d spline in log of the detector signal yield. sin_dec_bincenters = 0.5*( sin_true_dec_binedges_lower + sin_true_dec_binedges_upper) - log_spl_sinDec_gamma = scipy.interpolate.RectBivariateSpline( - sin_dec_bincenters, gamma_grid.grid, np.log(h), - kx=self.spline_order_sinDec, ky=self.spline_order_gamma, s=0) + log_spl_sinDec_param = scipy.interpolate.RectBivariateSpline( + sin_dec_bincenters, + param_grid.grid, + np.log(h), + kx=self.spline_order_sinDec, + ky=self.spline_order_param, + s=0) # Construct the detector signal yield instance with the created spline. sin_dec_binedges = np.concatenate( (sin_true_dec_binedges_lower, [sin_true_dec_binedges_upper[-1]])) sin_dec_binning = BinningDefinition('sin_dec', sin_dec_binedges) - detsigyield = PowerLawFluxPointLikeSourceI3DetSigYield( - self, dataset, fluxmodel, livetime, sin_dec_binning, log_spl_sinDec_gamma) + + detsigyield = SingleParamFluxPointLikeSourceI3DetSigYield( + param_name=param_grid.name, + dataset=dataset, + fluxmodel=shg.fluxmodel, + livetime=data.livetime, + sin_dec_binning=sin_dec_binning, + log_spl_sinDec_param=log_spl_sinDec_param) return detsigyield diff --git a/skyllh/analyses/i3/publicdata_ps/mcbkg_ps.py b/skyllh/analyses/i3/publicdata_ps/mcbkg_ps.py index 5f98db4220..18ce225604 100644 --- a/skyllh/analyses/i3/publicdata_ps/mcbkg_ps.py +++ b/skyllh/analyses/i3/publicdata_ps/mcbkg_ps.py @@ -11,120 +11,105 @@ import numpy as np import pickle -from skyllh.core.progressbar import ProgressBar - -# Classes to define the source hypothesis. -from skyllh.physics.source import PointLikeSource -from skyllh.physics.flux import PowerLawFlux -from skyllh.core.source_hypo_group import SourceHypoGroup -from skyllh.core.source_hypothesis import SourceHypoGroupManager - -# Classes to define the fit parameters. -from skyllh.core.parameters import ( - SingleSourceFitParameterMapper, - FitParameter +from skyllh.analyses.i3.publicdata_ps.backgroundpdf import ( + PDMCBackgroundI3EnergyPDF, ) - -# Classes for the minimizer. -from skyllh.core.minimizer import Minimizer, LBFGSMinimizerImpl - -# Classes for utility functionality. -from skyllh.core.config import CFG -from skyllh.core.random import RandomStateService -from skyllh.core.optimize import SpatialBoxEventSelectionMethod -from skyllh.core.smoothing import BlockSmoothingFilter -from skyllh.core.timing import TimeLord -from skyllh.core.trialdata import TrialDataManager - -# Classes for defining the analysis. -from skyllh.core.test_statistic import TestStatisticWilks -from skyllh.core.analysis import ( - TimeIntegratedMultiDatasetSingleSourceAnalysis as Analysis +from skyllh.analyses.i3.publicdata_ps.detsigyield import ( + PDSingleParamFluxPointLikeSourceI3DetSigYieldBuilder, ) - -# Classes to define the background generation. -from skyllh.core.scrambling import DataScrambler, UniformRAScramblingMethod -from skyllh.i3.background_generation import FixedScrambledExpDataI3BkgGenMethod - -# Classes to define the signal and background PDFs. -from skyllh.core.signalpdf import RayleighPSFPointSourceSignalSpatialPDF -from skyllh.i3.signalpdf import SignalI3EnergyPDFSet -from skyllh.i3.backgroundpdf import ( - DataBackgroundI3SpatialPDF, - DataBackgroundI3EnergyPDF +from skyllh.analyses.i3.publicdata_ps.pdfratio import ( + PDSigSetOverBkgPDFRatio, ) -from skyllh.i3.pdfratio import ( - I3EnergySigSetOverBkgPDFRatioSpline +from skyllh.analyses.i3.publicdata_ps.signal_generator import ( + PDDatasetSignalGenerator, ) -# Classes to define the spatial and energy PDF ratios. -from skyllh.core.pdfratio import ( - SpatialSigOverBkgPDFRatio, - Skylab2SkylabPDFRatioFillMethod +from skyllh.analyses.i3.publicdata_ps.signalpdf import ( + PDSignalEnergyPDFSet, ) - -from skyllh.i3.signal_generation import PointLikeSourceI3SignalGenerationMethod - -# Analysis utilities. -from skyllh.core.analysis_utils import ( - pointlikesource_to_data_field_array +from skyllh.analyses.i3.publicdata_ps.utils import ( + create_energy_cut_spline, + tdm_field_func_psi, +) +from skyllh.core.analysis import ( + SingleSourceMultiDatasetLLHRatioAnalysis as Analysis, +) +from skyllh.core.config import ( + CFG, ) - -# Logging setup utilities. from skyllh.core.debugging import ( + get_logger, setup_logger, setup_console_handler, - setup_file_handler + setup_file_handler, ) - -# Pre-defined public IceCube data samples. -from skyllh.datasets.i3 import data_samples - -# Analysis specific classes for working with the public data. -from skyllh.analyses.i3.publicdata_ps.signal_generator import ( - PDSignalGenerator +from skyllh.core.event_selection import ( + SpatialBoxEventSelectionMethod, ) -from skyllh.analyses.i3.publicdata_ps.detsigyield import ( - PublicDataPowerLawFluxPointLikeSourceI3DetSigYieldImplMethod +from skyllh.core.flux_model import ( + PowerLawEnergyFluxProfile, + SteadyPointlikeFFM, ) -from skyllh.analyses.i3.publicdata_ps.signalpdf import ( - PDSignalEnergyPDFSet +from skyllh.core.minimizer import ( + LBFGSMinimizerImpl, + Minimizer, ) -from skyllh.analyses.i3.publicdata_ps.backgroundpdf import ( - PDMCBackgroundI3EnergyPDF +from skyllh.core.minimizers.iminuit import ( + IMinuitMinimizerImpl, ) -from skyllh.analyses.i3.publicdata_ps.pdfratio import ( - PDPDFRatio +from skyllh.core.model import ( + DetectorModel, +) +from skyllh.core.parameters import ( + Parameter, + ParameterModelMapper, +) +from skyllh.core.pdfratio import ( + SigOverBkgPDFRatio, +) +from skyllh.core.progressbar import ( + ProgressBar, +) +from skyllh.core.random import ( + RandomStateService, +) +from skyllh.core.scrambling import ( + DataScrambler, + UniformRAScramblingMethod, +) +from skyllh.core.signal_generator import ( + MultiDatasetSignalGenerator, +) +from skyllh.core.signalpdf import ( + RayleighPSFPointSourceSignalSpatialPDF, +) +from skyllh.core.source_hypo_grouping import ( + SourceHypoGroup, + SourceHypoGroupManager, +) +from skyllh.core.source_model import ( + PointLikeSource, +) +from skyllh.core.test_statistic import ( + WilksTestStatistic, +) +from skyllh.core.timing import ( + TimeLord, +) +from skyllh.core.trialdata import ( + TrialDataManager, +) +from skyllh.core.utils.analysis import ( + pointlikesource_to_data_field_array, +) +from skyllh.datasets.i3 import ( + data_samples, +) +from skyllh.i3.background_generation import ( + FixedScrambledExpDataI3BkgGenMethod, +) +from skyllh.i3.backgroundpdf import ( + DataBackgroundI3SpatialPDF, ) - - -def psi_func(tdm, src_hypo_group_manager, fitparams): - """Function to calculate the opening angle between the source position - and the event's reconstructed position. - """ - ra = tdm.get_data('ra') - dec = tdm.get_data('dec') - - # Make the source position angles two-dimensional so the PDF value - # can be calculated via numpy broadcasting automatically for several - # sources. This is useful for stacking analyses. - src_ra = tdm.get_data('src_array')['ra'][:, np.newaxis] - src_dec = tdm.get_data('src_array')['dec'][:, np.newaxis] - - delta_dec = np.abs(dec - src_dec) - delta_ra = np.abs(ra - src_ra) - x = ( - (np.sin(delta_dec / 2.))**2. + np.cos(dec) * - np.cos(src_dec) * (np.sin(delta_ra / 2.))**2. - ) - - # Handle possible floating precision errors. - x[x < 0.] = 0. - x[x > 1.] = 1. - - psi = (2.0*np.arcsin(np.sqrt(x))) - - # For now we support only a single source, hence return psi[0]. - return psi[0, :] def TXS_location(): @@ -134,27 +119,33 @@ def TXS_location(): def create_analysis( - rss, datasets, source, refplflux_Phi0=1, refplflux_E0=1e3, refplflux_gamma=2, - ns_seed=10.0, + ns_seed=100, + ns_min=0, + ns_max=1e3, gamma_seed=3, - cache_dir='.', + gamma_min=1, + gamma_max=5, + minimizer_impl='LBFGS', + cut_sindec=None, + spl_smooth=None, cap_ratio=False, compress_data=False, keep_data_fields=None, - optimize_delta_angle=10, + evt_sel_delta_angle_deg=10, efficiency_mode=None, tl=None, - ppbar=None + ppbar=None, + logger_name=None, ): """Creates the Analysis instance for this particular analysis. - Parameters: - ----------- + Parameters + ---------- datasets : list of Dataset instances The list of Dataset instances, which should be used in the analysis. @@ -168,30 +159,47 @@ def create_analysis( The spectral index to use for the reference power law flux model. ns_seed : float Value to seed the minimizer with for the ns fit. + ns_min : float + Lower bound for ns fit. + ns_max : float + Upper bound for ns fit. gamma_seed : float | None Value to seed the minimizer with for the gamma fit. If set to None, the refplflux_gamma value will be set as gamma_seed. - cache_dir : str - The cache directory where to look for cached data, e.g. signal PDFs. + gamma_min : float + Lower bound for gamma fit. + gamma_max : float + Upper bound for gamma fit. + minimizer_impl : str + Minimizer implementation to be used. Supported options are ``"LBFGS"`` + (L-BFG-S minimizer used from the :mod:`scipy.optimize` module), or + ``"minuit"`` (Minuit minimizer used by the :mod:`iminuit` module). + Default: "LBFGS". + cut_sindec : list of float | None + sin(dec) values at which the energy cut in the southern sky should + start. If None, np.sin(np.radians([-2, 0, -3, 0, 0])) is used. + spl_smooth : list of float + Smoothing parameters for the 1D spline for the energy cut. If None, + [0., 0.005, 0.05, 0.2, 0.3] is used. compress_data : bool Flag if the data should get converted from float64 into float32. keep_data_fields : list of str | None List of additional data field names that should get kept when loading the data. - optimize_delta_angle : float + evt_sel_delta_angle_deg : float The delta angle in degrees for the event selection optimization methods. efficiency_mode : str | None The efficiency mode the data should get loaded with. Possible values are: - - 'memory': - The data will be load in a memory efficient way. This will - require more time, because all data records of a file will - be loaded sequentially. - - 'time': - The data will be loaded in a time efficient way. This will - require more memory, because each data file gets loaded in - memory at once. + ``'memory'``: + The data will be load in a memory efficient way. This will + require more time, because all data records of a file will + be loaded sequentially. + ``'time'``: + The data will be loaded in a time efficient way. This will + require more memory, because each data file gets loaded in + memory at once. The default value is ``'time'``. If set to ``None``, the default value will be used. @@ -199,47 +207,81 @@ def create_analysis( The TimeLord instance to use to time the creation of the analysis. ppbar : ProgressBar instance | None The instance of ProgressBar for the optional parent progress bar. + logger_name : str | None + The name of the logger to be used. If set to ``None``, ``__name__`` will + be used. Returns ------- - analysis : SpatialEnergyTimeIntegratedMultiDatasetSingleSourceAnalysis + ana : instance of SingleSourceMultiDatasetLLHRatioAnalysis The Analysis instance for this analysis. """ + if logger_name is None: + logger_name = __name__ + logger = get_logger(logger_name) + + # Create the minimizer instance. + if minimizer_impl == 'LBFGS': + minimizer = Minimizer(LBFGSMinimizerImpl()) + elif minimizer_impl == 'minuit': + minimizer = Minimizer(IMinuitMinimizerImpl(ftol=1e-8)) + else: + raise NameError( + f"Minimizer implementation `{minimizer_impl}` is not supported " + "Please use `LBFGS` or `minuit`.") + # Define the flux model. - flux_model = PowerLawFlux( - Phi0=refplflux_Phi0, E0=refplflux_E0, gamma=refplflux_gamma) + fluxmodel = SteadyPointlikeFFM( + Phi0=refplflux_Phi0, + energy_profile=PowerLawEnergyFluxProfile( + E0=refplflux_E0, + gamma=refplflux_gamma)) # Define the fit parameter ns. - fitparam_ns = FitParameter('ns', 0, 1e3, ns_seed) - - # Define the gamma fit parameter. - fitparam_gamma = FitParameter( - 'gamma', valmin=1, valmax=5, initial=gamma_seed) + param_ns = Parameter( + name='ns', + initial=ns_seed, + valmin=ns_min, + valmax=ns_max) + + # Define the fit parameter gamma. + param_gamma = Parameter( + name='gamma', + initial=gamma_seed, + valmin=gamma_min, + valmax=gamma_max) # Define the detector signal efficiency implementation method for the # IceCube detector and this source and flux_model. # The sin(dec) binning will be taken by the implementation method # automatically from the Dataset instance. - gamma_grid = fitparam_gamma.as_linear_grid(delta=0.1) - detsigyield_implmethod = \ - PublicDataPowerLawFluxPointLikeSourceI3DetSigYieldImplMethod( - gamma_grid) - - # Define the signal generation method. - #sig_gen_method = PointLikeSourceI3SignalGenerationMethod() - sig_gen_method = None + gamma_grid = param_gamma.as_linear_grid(delta=0.1) + detsigyield_builder =\ + PDSingleParamFluxPointLikeSourceI3DetSigYieldBuilder( + param_grid=gamma_grid) # Create a source hypothesis group manager. - src_hypo_group_manager = SourceHypoGroupManager( + shg_mgr = SourceHypoGroupManager( SourceHypoGroup( - source, flux_model, detsigyield_implmethod, sig_gen_method)) + sources=source, + fluxmodel=fluxmodel, + detsigyield_builders=detsigyield_builder, + sig_gen_method=None)) + + # Define a detector model for the ns fit parameter. + detector_model = DetectorModel('IceCube') - # Create a source fit parameter mapper and define the fit parameters. - src_fitparam_mapper = SingleSourceFitParameterMapper() - src_fitparam_mapper.def_fit_parameter(fitparam_gamma) + # Define the parameter model mapper for the analysis, which will map global + # parameters to local source parameters. + pmm = ParameterModelMapper( + models=[detector_model, source]) + pmm.def_param(param_ns, models=detector_model) + pmm.def_param(param_gamma, models=source) + + logger.info(str(pmm)) # Define the test statistic. - test_statistic = TestStatisticWilks() + test_statistic = WilksTestStatistic() # Define the data scrambler with its data scrambling method, which is used # for background generation. @@ -252,24 +294,33 @@ def create_analysis( minimizer = Minimizer(LBFGSMinimizerImpl()) # Create the Analysis instance. - analysis = Analysis( - src_hypo_group_manager, - src_fitparam_mapper, - fitparam_ns, - test_statistic, - bkg_gen_method, - sig_generator_cls=PDSignalGenerator + ana = Analysis( + shg_mgr=shg_mgr, + pmm=pmm, + test_statistic=test_statistic, + bkg_gen_method=bkg_gen_method, + sig_generator_cls=MultiDatasetSignalGenerator, ) # Define the event selection method for pure optimization purposes. # We will use the same method for all datasets. event_selection_method = SpatialBoxEventSelectionMethod( - src_hypo_group_manager, delta_angle=np.deg2rad(optimize_delta_angle)) - #event_selection_method = None + shg_mgr=shg_mgr, + delta_angle=np.deg2rad(evt_sel_delta_angle_deg)) + + # Prepare the spline parameters for the signal generator. + if cut_sindec is None: + cut_sindec = np.sin(np.radians([-2, 0, -3, 0, 0])) + if spl_smooth is None: + spl_smooth = [0., 0.005, 0.05, 0.2, 0.3] + if len(spl_smooth) < len(datasets) or len(cut_sindec) < len(datasets): + raise AssertionError( + 'The length of the spl_smooth and of the cut_sindec must be equal ' + f'to the length of datasets: {len(datasets)}.') # Add the data sets to the analysis. pbar = ProgressBar(len(datasets), parent=ppbar).start() - for ds in datasets: + for (ds_idx, ds) in enumerate(datasets): # Load the data of the data set. data = ds.load_and_prepare_data( keep_fields=keep_data_fields, @@ -277,68 +328,92 @@ def create_analysis( efficiency_mode=efficiency_mode, tl=tl) - # Create a trial data manager and add the required data fields. - tdm = TrialDataManager() - tdm.add_source_data_field('src_array', - pointlikesource_to_data_field_array) - tdm.add_data_field('psi', psi_func) - sin_dec_binning = ds.get_binning_definition('sin_dec') - log_energy_binning = ds.get_binning_definition('log_energy') # Create the spatial PDF ratio instance for this dataset. spatial_sigpdf = RayleighPSFPointSourceSignalSpatialPDF( dec_range=np.arcsin(sin_dec_binning.range)) spatial_bkgpdf = DataBackgroundI3SpatialPDF( - data.exp, sin_dec_binning) - spatial_pdfratio = SpatialSigOverBkgPDFRatio( - spatial_sigpdf, spatial_bkgpdf) + data_exp=data.exp, + sin_dec_binning=sin_dec_binning) + spatial_pdfratio = SigOverBkgPDFRatio( + sig_pdf=spatial_sigpdf, + bkg_pdf=spatial_bkgpdf) # Create the energy PDF ratio instance for this dataset. energy_sigpdfset = PDSignalEnergyPDFSet( ds=ds, src_dec=source.dec, - flux_model=flux_model, - fitparam_grid_set=gamma_grid, + fluxmodel=fluxmodel, + param_grid_set=gamma_grid, ppbar=ppbar ) - #smoothing_filter = BlockSmoothingFilter(nbins=1) - #energy_bkgpdf = DataBackgroundI3EnergyPDF( - # data.exp, log_energy_binning, sin_dec_binning, smoothing_filter) - bkg_pdf_pathfilename = ds.get_abs_pathfilename_list( ds.get_aux_data_definition('pdf_bkg_datafile'))[0] with open(bkg_pdf_pathfilename, 'rb') as f: bkg_pdf_data = pickle.load(f) energy_bkgpdf = PDMCBackgroundI3EnergyPDF( pdf_log10emu_sindecmu=bkg_pdf_data['pdf'], + log10emu_binning=bkg_pdf_data['log10emu_binning'], sindecmu_binning=bkg_pdf_data['sindecmu_binning'], - log10emu_binning=bkg_pdf_data['log10emu_binning'] ) - energy_pdfratio = PDPDFRatio( + energy_pdfratio = PDSigSetOverBkgPDFRatio( sig_pdf_set=energy_sigpdfset, bkg_pdf=energy_bkgpdf, - cap_ratio=cap_ratio - ) + cap_ratio=cap_ratio) - pdfratios = [spatial_pdfratio, energy_pdfratio] + pdfratio = spatial_pdfratio * energy_pdfratio - analysis.add_dataset( - ds, data, pdfratios, tdm, event_selection_method) + # Create a trial data manager and add the required data fields. + tdm = TrialDataManager() + tdm.add_source_data_field( + name='src_array', + func=pointlikesource_to_data_field_array) + tdm.add_data_field( + name='psi', + func=tdm_field_func_psi, + dt='dec', + is_srcevt_data=True) + + energy_cut_spline = create_energy_cut_spline( + ds, + data.exp, + spl_smooth[ds_idx]) + + sig_generator = PDDatasetSignalGenerator( + shg_mgr=shg_mgr, + ds=ds, + ds_idx=ds_idx, + energy_cut_spline=energy_cut_spline, + cut_sindec=cut_sindec[ds_idx], + ) + + ana.add_dataset( + dataset=ds, + data=data, + pdfratio=pdfratio, + tdm=tdm, + event_selection_method=event_selection_method, + sig_generator=sig_generator) pbar.increment() pbar.finish() - analysis.llhratio = analysis.construct_llhratio(minimizer, ppbar=ppbar) + ana.construct_services( + ppbar=ppbar) - # analysis.construct_signal_generator() + ana.llhratio = ana.construct_llhratio( + minimizer=minimizer, + ppbar=ppbar) - return analysis + ana.construct_signal_generator() + return ana -if(__name__ == '__main__'): + +if __name__ == '__main__': p = argparse.ArgumentParser( description='Calculates TS for a given source location using the ' '10-year public point source sample.', @@ -368,13 +443,6 @@ def create_analysis( type=str, help='The base path to the data samples (default=None)' ) - p.add_argument( - '--pdf-seed', - default=1, - type=int, - help='The random number generator seed for generating the ' - 'signal PDF.' - ) p.add_argument( '--seed', default=1, @@ -388,11 +456,6 @@ def create_analysis( type=int, help='The number of CPUs to utilize where parallelization is possible.' ) - p.add_argument( - '--cache-dir', - default='.', - type=str, - help='The cache directory to look for cached data, e.g. signal PDFs.') p.add_argument( '--cap-ratio', action='store_true', @@ -405,20 +468,24 @@ def create_analysis( setup_logger('skyllh', logging.DEBUG) log_format = '%(asctime)s %(processName)s %(name)s %(levelname)s: '\ '%(message)s' - setup_console_handler('skyllh', logging.INFO, log_format) - setup_file_handler('skyllh', 'debug.log', + setup_console_handler( + 'skyllh', + logging.INFO, log_format) + setup_file_handler( + 'skyllh', + 'debug.log', log_level=logging.DEBUG, log_format=log_format) CFG['multiproc']['ncpu'] = args.ncpu sample_seasons = [ - #('PublicData_10y_ps', 'IC40'), - #('PublicData_10y_ps', 'IC59'), - #('PublicData_10y_ps', 'IC79'), - #('PublicData_10y_ps', 'IC86_I'), + # ('PublicData_10y_ps', 'IC40'), + # ('PublicData_10y_ps', 'IC59'), + # ('PublicData_10y_ps', 'IC79'), + # ('PublicData_10y_ps', 'IC86_I'), ('PublicData_10y_ps', 'IC86_II'), - #('PublicData_10y_ps', 'IC86_II-VII') + # ('PublicData_10y_ps', 'IC86_II-VII') ] datasets = [] @@ -429,7 +496,6 @@ def create_analysis( datasets.append(dsc.get_dataset(season)) # Define a random state service. - rss_pdf = RandomStateService(args.pdf_seed) rss = RandomStateService(args.seed) # Define the point source. source = PointLikeSource(np.deg2rad(args.ra), np.deg2rad(args.dec)) @@ -439,10 +505,8 @@ def create_analysis( with tl.task_timer('Creating analysis.'): ana = create_analysis( - rss_pdf, - datasets, - source, - cache_dir=args.cache_dir, + datasets=datasets, + source=source, cap_ratio=args.cap_ratio, gamma_seed=args.gamma_seed, tl=tl) @@ -454,19 +518,4 @@ def create_analysis( print('ns_fit = %g' % (fitparam_dict['ns'])) print('gamma_fit = %g' % (fitparam_dict['gamma'])) - - # Generate some signal events. - #ana.construct_signal_generator() - #with tl.task_timer('Generating signal events.'): - # (n_sig, signal_events_dict) =\ - # ana.sig_generator.generate_signal_events(rss, 100) - - #trials = ana.do_trials( - # rss, 100, mean_n_sig=20 - #) - - #print('n_sig: %d'%n_sig) - #print('signal datasets: '+str(signal_events_dict.keys())) - - print(tl) diff --git a/skyllh/analyses/i3/publicdata_ps/pdfratio.py b/skyllh/analyses/i3/publicdata_ps/pdfratio.py index 961addf28f..7abdbb3dac 100644 --- a/skyllh/analyses/i3/publicdata_ps/pdfratio.py +++ b/skyllh/analyses/i3/publicdata_ps/pdfratio.py @@ -4,15 +4,28 @@ import numpy as np -from skyllh.core.py import module_classname -from skyllh.core.debugging import get_logger -from skyllh.core.parameters import make_params_hash -from skyllh.core.pdf import PDF -from skyllh.core.pdfratio import SigSetOverBkgPDFRatio - - -class PDPDFRatio(SigSetOverBkgPDFRatio): - def __init__(self, sig_pdf_set, bkg_pdf, cap_ratio=False, **kwargs): +from skyllh.core.debugging import ( + get_logger, +) +from skyllh.core.parameters import ( + ParameterModelMapper, +) +from skyllh.core.pdfratio import ( + SigSetOverBkgPDFRatio, +) +from skyllh.core.py import ( + module_class_method_name, +) + + +class PDSigSetOverBkgPDFRatio( + SigSetOverBkgPDFRatio): + def __init__( + self, + sig_pdf_set, + bkg_pdf, + cap_ratio=False, + **kwargs): """Creates a PDFRatio instance for the public data. It takes a signal PDF set for different discrete gamma values. @@ -28,17 +41,17 @@ def __init__(self, sig_pdf_set, bkg_pdf, cap_ratio=False, **kwargs): Switch whether the S/B PDF ratio should get capped where no background is available. Default is False. """ - self._logger = get_logger(module_classname(self)) + self._logger = get_logger(module_class_method_name(self, '__init__')) super().__init__( - pdf_type=PDF, - signalpdfset=sig_pdf_set, - backgroundpdf=bkg_pdf, + sig_pdf_set=sig_pdf_set, + bkg_pdf=bkg_pdf, **kwargs) # Construct the instance for the fit parameter interpolation method. - self._interpolmethod_instance = self.interpolmethod( - self._get_ratio_values, sig_pdf_set.fitparams_grid_set) + self._interpolmethod = self.interpolmethod_cls( + func=self._get_ratio_values, + param_grid_set=sig_pdf_set.param_grid_set) self.cap_ratio = cap_ratio if self.cap_ratio: @@ -73,9 +86,10 @@ def __init__(self, sig_pdf_set, bkg_pdf, cap_ratio=False, **kwargs): # order to avoid the recalculation of the ratio value when the # ``get_gradient`` method is called (usually after the ``get_ratio`` # method was called). + self._cache_tdm_trial_data_state_id = None self._cache_fitparams_hash = None self._cache_ratio = None - self._cache_gradients = None + self._cache_grads = None @property def cap_ratio(self): @@ -84,56 +98,122 @@ def cap_ratio(self): point number greater than zero as background pdf value (False). """ return self._cap_ratio + @cap_ratio.setter def cap_ratio(self, b): self._cap_ratio = b - def _get_signal_fitparam_names(self): - """This method must be re-implemented by the derived class and needs to - return the list of signal fit parameter names, this PDF ratio is a - function of. If it returns an empty list, the PDF ratio is independent - of any signal fit parameters. + def _is_cached( + self, + tdm, + fitparams_hash): + """Checks if the ratio and gradients for the given hash of local fit + parameters are already cached. + + Parameters + ---------- + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial data events. + fitparams_hach : int + The hash value of the local fit parameter values. Returns ------- - list of str - The list of the signal fit parameter names, this PDF ratio is a - function of. By default this method returns an empty list indicating - that the PDF ratio depends on no signal parameter. - """ - fitparam_names = self.signalpdfset.fitparams_grid_set.parameter_names - return fitparam_names - - def _is_cached(self, tdm, fitparams_hash): - """Checks if the ratio and gradients for the given set of fit parameters - are already cached. + check : bool + ``True`` if the ratio and gradient values are already cached, + ``False`` otherwise. """ - if((self._cache_fitparams_hash == fitparams_hash) and - (len(self._cache_ratio) == tdm.n_selected_events) - ): + if (self._cache_tdm_trial_data_state_id == tdm.trial_data_state_id) and\ + (self._cache_fitparams_hash == fitparams_hash) and\ + (self._cache_ratio is not None) and\ + (self._cache_grads is not None): return True - return False - - def _get_ratio_values(self, tdm, gridfitparams, eventdata): - """Select the signal PDF for the given fit parameter grid point and - evaluates the S/B ratio for all the given events. - """ - sig_pdf_key = self.signalpdfset.make_pdf_key(gridfitparams) - sig_prob = self.signalpdfset.get_pdf(sig_pdf_key).get_prob(tdm) - if isinstance(sig_prob, tuple): - (sig_prob, _) = sig_prob + return False - bkg_prob = self.backgroundpdf.get_prob(tdm) - if isinstance(bkg_prob, tuple): - (bkg_prob, _) = bkg_prob + def _get_hash_of_local_sig_fit_param_values( + self, + src_params_recarray): + """Gets the hash of the values of the local signal fit parameters from + the given ``src_params_recarray``. - if len(sig_prob) != len(bkg_prob): - raise ValueError( - f'The number of signal ({len(sig_prob)}) and background ' - f'({len(bkg_prob)}) probability values is not equal!') + Parameters + ---------- + src_params_recarray : instance of ndarray + The (N_sources,)-shaped structured numpy ndarray holding the local + parameter names and values of the sources. - m_nonzero_bkg = bkg_prob > 0 + Returns + ------- + hash : int + The hash of the (N_fitparams, N_sources)-shaped tuple of tuples + holding the values of the local signal fit parameters. + """ + values = [] + for param_name in self.sig_param_names: + if ParameterModelMapper.is_local_param_a_fitparam( + param_name, src_params_recarray): + values.append(tuple(src_params_recarray[param_name])) + + values = tuple(values) + + return hash(values) + + def _get_ratio_values( + self, + tdm, + eventdata, + gridparams_recarray, + n_values): + """Select the signal PDF for the given fit parameter grid point and + evaluates the S/B ratio for all the trial data events and sources. + """ + n_sources = len(gridparams_recarray) + + ratio = np.empty((n_values,), dtype=np.double) + + same_pdf_for_all_sources = True + if len(gridparams_recarray) > 1: + for pname in gridparams_recarray.dtype.fields.keys(): + if not np.all(np.isclose(np.diff(gridparams_recarray[pname]), 0)): + same_pdf_for_all_sources = False + break + if same_pdf_for_all_sources: + # Special case where the grid parameter values are the same for all + # sources for all grid parameters + gridparams = dict( + zip(gridparams_recarray.dtype.fields.keys(), + gridparams_recarray[0]) + ) + sig_pdf_key = self.sig_pdf_set.make_key(gridparams) + sig_pdf = self.sig_pdf_set.get_pdf(sig_pdf_key) + (ratio, sig_grads) = sig_pdf.get_pd( + tdm=tdm, + params_recarray=None) + else: + # General case, we need to loop over the sources. + for (sidx, interpol_param_values) in enumerate(gridparams_recarray): + m_src = np.zeros((n_sources), dtype=np.bool_) + m_src[sidx] = True + m_values = tdm.get_values_mask_for_source_mask(m_src) + + gridparams = dict( + zip(gridparams_recarray.dtype.fields.keys(), + interpol_param_values) + ) + sig_pdf_key = self.sig_pdf_set.make_key(gridparams) + sig_pdf = self.sig_pdf_set.get_pdf(sig_pdf_key) + (sig_pd, sig_grads) = sig_pdf.get_pd( + tdm=tdm, + params_recarray=None) + + ratio[m_values] = sig_pd[m_values] + + (bkg_pd, bkg_grads) = self.bkg_pdf.get_pd(tdm=tdm) + (bkg_pd,) = tdm.broadcast_selected_events_arrays_to_values_arrays( + (bkg_pd,)) + + m_nonzero_bkg = bkg_pd > 0 m_zero_bkg = np.invert(m_nonzero_bkg) if np.any(m_zero_bkg): ev_idxs = np.where(m_zero_bkg)[0] @@ -141,14 +221,20 @@ def _get_ratio_values(self, tdm, gridfitparams, eventdata): f'For {len(ev_idxs)} events the background probability is ' f'zero. The event indices of these events are: {ev_idxs}') - ratio = np.empty((len(sig_prob),), dtype=np.double) - ratio[m_nonzero_bkg] = sig_prob[m_nonzero_bkg] / bkg_prob[m_nonzero_bkg] + np.divide( + ratio, + bkg_pd, + where=m_nonzero_bkg, + out=ratio) if self._cap_ratio: ratio[m_zero_bkg] = self.ratio_fill_value_dict[sig_pdf_key] else: - ratio[m_zero_bkg] = (sig_prob[m_zero_bkg] / - np.finfo(np.double).resolution) + np.divide( + ratio, + np.finfo(np.double).resolution, + where=m_zero_bkg, + out=ratio) # Check for positive inf values in the ratio and set the ratio to a # finite number. Here we choose the maximum value of float32 to keep @@ -158,75 +244,141 @@ def _get_ratio_values(self, tdm, gridfitparams, eventdata): return ratio - def _calculate_ratio_and_gradients(self, tdm, fitparams, fitparams_hash): - """Calculates the ratio values and ratio gradients for all the events - given the fit parameters using the interpolation method for the fit - parameter. It caches the results. + def _calculate_ratio_and_grads( + self, + tdm, + src_params_recarray, + fitparams_hash): + """Calculates the ratio and ratio gradient values for all the trial data + events and sources given the fit parameters using the interpolation + method for the fit parameter. It caches the results. """ - (ratio, gradients) =\ - self._interpolmethod_instance.get_value_and_gradients( - tdm, eventdata=None, params=fitparams) + (ratio, grads) = self._interpolmethod( + tdm=tdm, + eventdata=None, + params_recarray=src_params_recarray) - # Cache the value and the gradients. + # Cache the ratio and gradient values. self._cache_fitparams_hash = fitparams_hash self._cache_ratio = ratio - self._cache_gradients = gradients + self._cache_grads = grads - def get_ratio(self, tdm, fitparams=None, tl=None): - """Calculates the PDF ratio values for all the events. + def get_ratio( + self, + tdm, + src_params_recarray, + tl=None): + """Calculates the PDF ratio values for all events and sources. Parameters ---------- tdm : instance of TrialDataManager - The TrialDataManager instance holding the trial data events for + The instance of TrialDataManager holding the trial data events for which the PDF ratio values should get calculated. - fitparams : dict | None - The dictionary with the parameter name-value pairs. - It can be ``None``, if the PDF ratio does not depend on any - parameters. - tl : TimeLord instance | None - The optional TimeLord instance that should be used to measure + src_params_recarray : instance of numpy structured ndarray | None + The (N_sources,)-shaped numpy structured ndarray holding the + parameter names and values of the sources. + See the documentation of the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for more information. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to measure timing information. Returns ------- - ratios : (N_events,)-shaped 1d numpy ndarray of float - The PDF ratio value for each trial event. + ratios : instance of ndarray + The (N_values,)-shaped 1d numpy ndarray of float holding the PDF + ratio value for each trial event and source. """ - fitparams_hash = make_params_hash(fitparams) + fitparams_hash = self._get_hash_of_local_sig_fit_param_values( + src_params_recarray) # Check if the ratio value is already cached. - if self._is_cached(tdm, fitparams_hash): + if self._is_cached( + tdm=tdm, + fitparams_hash=fitparams_hash): return self._cache_ratio - self._calculate_ratio_and_gradients(tdm, fitparams, fitparams_hash) + self._calculate_ratio_and_grads( + tdm=tdm, + src_params_recarray=src_params_recarray, + fitparams_hash=fitparams_hash) return self._cache_ratio - def get_gradient(self, tdm, fitparams, fitparam_name): - """Retrieves the PDF ratio gradient for the pidx'th fit parameter. + def get_gradient( + self, + tdm, + src_params_recarray, + fitparam_id, + tl=None): + """Retrieves the PDF ratio gradient for the global fit parameter + ``fitparam_id`` for each trial data event and source, given the given + set of parameters ``src_params_recarray`` for each source. Parameters ---------- tdm : instance of TrialDataManager - The TrialDataManager instance holding the trial event data for which - the PDF ratio gradient values should get calculated. - fitparams : dict - The dictionary with the fit parameter values. - fitparam_name : str + The instance of TrialDataManager holding the trial data events for + which the PDF ratio gradient values should get calculated. + src_params_recarray : instance of numpy structured ndarray | None + The (N_sources,)-shaped numpy structured ndarray holding the + parameter names and values of the sources. + See the documentation of the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for more information. + fitparam_id : int The name of the fit parameter for which the gradient should get calculated. - """ - fitparams_hash = make_params_hash(fitparams) - - # Convert the fit parameter name into the local fit parameter index. - pidx = self.convert_signal_fitparam_name_into_index(fitparam_name) - - # Check if the gradients have been calculated already. - if self._is_cached(tdm, fitparams_hash): - return self._cache_gradients[pidx] - - # The gradients have not been calculated yet. - self._calculate_ratio_and_gradients(tdm, fitparams, fitparams_hash) + tl : instance of TimeLord | None + The optional TimeLord instance that should be used to measure + timing information. - return self._cache_gradients[pidx] + Returns + ------- + grad : instance of ndarray + The (N_values,)-shaped numpy ndarray holding the gradient values + for all sources and trial events w.r.t. the given global fit + parameter. + """ + fitparams_hash = self._get_hash_of_local_sig_fit_param_values( + src_params_recarray) + + # Calculate the gradients if they are not calculated yet. + if not self._is_cached( + tdm=tdm, + fitparams_hash=fitparams_hash + ): + self._calculate_ratio_and_grads( + tdm=tdm, + src_params_recarray=src_params_recarray, + fitparams_hash=fitparams_hash) + + tdm_n_sources = tdm.n_sources + + grad = np.zeros((tdm.get_n_values(),), dtype=np.float64) + + # Loop through the parameters of the signal PDF set and match them with + # the global fit parameter. + for (pidx, pname) in enumerate( + self._sig_pdf_set.param_grid_set.params_name_list): + if pname not in src_params_recarray.dtype.fields: + continue + p_gpidxs = src_params_recarray[f'{pname}:gpidx'] + src_mask = p_gpidxs == (fitparam_id + 1) + n_sources = np.count_nonzero(src_mask) + if n_sources == 0: + continue + if n_sources == tdm_n_sources: + # This parameter applies to all sources, hence to all values, + # and hence it's the only local parameter contributing to the + # global parameter fitparam_id. + return self._cache_grads[pidx] + + # The current parameter does not apply to all sources. + # Create a values mask that matches a given source mask. + m_values = tdm.get_values_mask_for_source_mask(src_mask) + grad[m_values] = self._cache_grads[pidx][m_values] + + return grad diff --git a/skyllh/analyses/i3/publicdata_ps/scripts/mceq_atm_bkg.py b/skyllh/analyses/i3/publicdata_ps/scripts/mceq_atm_bkg.py index 9f59c46e63..30aae2a89b 100644 --- a/skyllh/analyses/i3/publicdata_ps/scripts/mceq_atm_bkg.py +++ b/skyllh/analyses/i3/publicdata_ps/scripts/mceq_atm_bkg.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + import argparse import numpy as np import os.path @@ -5,11 +7,14 @@ import crflux.models as pm import mceq_config as config -from MCEq.core import MCEqRun +from MCEq.core import ( + MCEqRun, +) from skyllh.analyses.i3.publicdata_ps.aeff import PDAeff from skyllh.datasets.i3 import PublicData_10y_ps + def create_flux_file(save_path, ds): """Creates a pickle file containing the flux for the given dataset. """ @@ -21,7 +26,7 @@ def create_flux_file(save_path, ds): output_pathfilename = os.path.join( save_path, output_filename) - print('Output path filename: %s'%(output_pathfilename)) + print(f'Output path filename: {output_pathfilename}') # Load the effective area instance to get the binning information. aeff = PDAeff( @@ -37,8 +42,8 @@ def create_flux_file(save_path, ds): config.e_max = float( 10**(np.min([aeff._log10_enu_binedges_upper[-1], 9])+0.05)) - print('E_min = %s'%(config.e_min)) - print('E_max = %s'%(config.e_max)) + print(f'E_min = {config.e_min}') + print(f'E_max = {config.e_max}') mceq = MCEqRun( interaction_model="SIBYLL2.3c", @@ -47,17 +52,17 @@ def create_flux_file(save_path, ds): density_model=("MSIS00_IC", ("SouthPole", "January")), ) - print('MCEq log10(e_grid) = %s'%(str(np.log10(mceq.e_grid)))) + print(f'MCEq log10(e_grid) = {np.log10(mceq.e_grid)}') mag = 0 # Use the same binning as for the effective area. # theta = delta + pi/2 - print('sin_true_dec_binedges: %s'%(str(aeff.sin_decnu_binedges))) + print(f'sin_true_dec_binedges: {aeff.sin_decnu_binedges}') theta_angles_binedges = np.rad2deg( np.arcsin(aeff.sin_decnu_binedges) + np.pi/2 ) theta_angles = 0.5*(theta_angles_binedges[:-1] + theta_angles_binedges[1:]) - print('Theta angles = %s'%(str(theta_angles))) + print(f'Theta angles = {theta_angles}') flux_def = dict() @@ -142,9 +147,8 @@ def create_flux_file(save_path, ds): # Save the result to the output file. with open(output_pathfilename, 'wb') as f: pickle.dump(((mceq.e_grid, theta_angles_binedges), flux_def), f) - print('Saved fluxes for dataset %s to: %s'%(ds.name, output_pathfilename)) + print(f'Saved fluxes for dataset {ds.name} to: {output_pathfilename}') -#------------------------------------------------------------------------------- if __name__ == '__main__': @@ -173,6 +177,6 @@ def create_flux_file(save_path, ds): for ds_name in dataset_names: ds = dsc.get_dataset(ds_name) create_flux_file( - save_path = args.save_path, + save_path=args.save_path, ds=ds ) diff --git a/skyllh/analyses/i3/publicdata_ps/signal_generator.py b/skyllh/analyses/i3/publicdata_ps/signal_generator.py index a36ac2a673..ea2395c3c1 100644 --- a/skyllh/analyses/i3/publicdata_ps/signal_generator.py +++ b/skyllh/analyses/i3/publicdata_ps/signal_generator.py @@ -1,102 +1,188 @@ # -*- coding: utf-8 -*- import numpy as np -from scipy import interpolate -import scipy.stats +from scipy import ( + interpolate, +) +from skyllh.analyses.i3.publicdata_ps.aeff import ( + PDAeff, +) +from skyllh.analyses.i3.publicdata_ps.smearing_matrix import ( + PDSmearingMatrix, +) +from skyllh.analyses.i3.publicdata_ps.utils import ( + psi_to_dec_and_ra, +) +from skyllh.core.debugging import ( + get_logger, +) +from skyllh.core.flux_model import ( + TimeFluxProfile, +) +from skyllh.core.livetime import ( + Livetime, +) from skyllh.core.py import ( - issequenceof, + classname, float_cast, - int_cast + int_cast, + module_classname, ) -from skyllh.core.py import module_classname -from skyllh.core.debugging import get_logger -from skyllh.core.signal_generator import SignalGeneratorBase -from skyllh.core.llhratio import LLHRatio -from skyllh.core.dataset import Dataset -from skyllh.core.source_hypothesis import SourceHypoGroupManager -from skyllh.core.storage import DataFieldRecordArray - -from skyllh.analyses.i3.publicdata_ps.utils import psi_to_dec_and_ra -from skyllh.analyses.i3.publicdata_ps.smearing_matrix import ( - PDSmearingMatrix +from skyllh.core.signal_generator import ( + SignalGenerator, +) +from skyllh.core.storage import ( + DataFieldRecordArray, +) +from skyllh.core.utils.flux_model import ( + create_scipy_stats_rv_continuous_from_TimeFluxProfile, ) -from skyllh.analyses.i3.publicdata_ps.aeff import PDAeff -class PDDatasetSignalGenerator(object): - """This class provides a signal generation method for a point-like source - seen in the IceCube detector using one dataset of the 10 years public data - release. It is used by the PDSignalGenerator class in a loop over all the - datasets that have been added to the analysis. +class PDDatasetSignalGenerator( + SignalGenerator): + """This class implements a signal generator for a single public data + dataset. """ - - def __init__(self, ds, src_dec, effA=None, sm=None, **kwargs): + def __init__( + self, + shg_mgr, + ds, + ds_idx, + energy_cut_spline=None, + cut_sindec=None, + **kwargs): """Creates a new instance of the signal generator for generating signal events from a specific public data dataset. - Parameters: - ----------- - ds : Dataset instance - Dataset instance for which signal events should get - generated for. - src_dec : float - The declination of the source in radians. - effA : PDAeff | None - Representation of the effective area provided by the public data. - sm : PDSmearingMatrix | None - Representation of the smearing matrix provided by the public data. + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager defining the source + hypothesis groups. + ds : instance of Dataset + The instance of Dataset for which signal events should get + generated. + ds_idx : int + The index of the dataset. + energy_cut_spline : scipy.interpolate.UnivariateSpline + A spline of E(sin_dec) that defines the declination + dependent energy cut in the IceCube southern sky. + cut_sindec : float + The sine of the declination to start applying the energy cut. + The cut will be applied from this declination down. """ - super().__init__(**kwargs) + super().__init__( + shg_mgr=shg_mgr, + **kwargs) self._logger = get_logger(module_classname(self)) - if sm is None: - self.smearing_matrix = PDSmearingMatrix( - pathfilenames=ds.get_abs_pathfilename_list( + self.ds = ds + self.ds_idx = ds_idx + self.energy_cut_spline = energy_cut_spline + self.cut_sindec = cut_sindec + + self.sm = PDSmearingMatrix( + pathfilenames=ds.get_abs_pathfilename_list( ds.get_aux_data_definition('smearing_datafile'))) - else: - self.smearing_matrix = sm - if effA is None: - dec_idx = self.smearing_matrix.get_true_dec_idx(src_dec) + self._create_source_dependent_data_structures() + + def _create_source_dependent_data_structures(self): + """Creates the source dependent data structures needed by this signal + generator. These are: + + - source location in ra and dec + - effective area + - log10 true energy inv CDF spline + + """ + n_sources = self.shg_mgr.n_sources + + self._src_ra_arr = np.empty( + (n_sources,), + dtype=np.float64) + self._src_dec_arr = np.empty( + (n_sources,), + dtype=np.float64) + self._effA_arr = np.empty( + (n_sources,), + dtype=np.object_) + self._log10_true_e_inv_cdf_spl_arr = np.empty( + (n_sources,), + dtype=np.object_) + + for (src_idx, src) in enumerate(self._shg_mgr.source_list): + self._src_ra_arr[src_idx] = src.ra + self._src_dec_arr[src_idx] = src.dec + + dec_idx = self.sm.get_true_dec_idx(src.dec) (min_log_true_e, - max_log_true_e) = \ - self.smearing_matrix.get_true_log_e_range_with_valid_log_e_pdfs( + max_log_true_e) =\ + self.sm.get_true_log_e_range_with_valid_log_e_pdfs( dec_idx) - kwargs = { - 'src_dec': src_dec, - 'min_log10enu': min_log_true_e, - 'max_log10enu': max_log_true_e - } - self.effA = PDAeff( - pathfilenames=ds.get_abs_pathfilename_list( - ds.get_aux_data_definition('eff_area_datafile')), - **kwargs) - - else: - self.effA = effA - - def _generate_inv_cdf_spline(self, flux_model, log_e_min, - log_e_max): - """Sample the true neutrino energy from the power-law - re-weighted with the detection probability. + + self._effA_arr[src_idx] = PDAeff( + pathfilenames=self.ds.get_abs_pathfilename_list( + self.ds.get_aux_data_definition('eff_area_datafile')), + src_dec=src.dec, + min_log10enu=min_log_true_e, + max_log10enu=max_log_true_e) + + # Build the spline for the inverse CDF of the source flux's true + # energy probability distribution. + fluxmodel = self.shg_mgr.get_fluxmodel_by_src_idx(src_idx=src_idx) + self._log10_true_e_inv_cdf_spl_arr[src_idx] =\ + self._create_inv_cdf_spline( + src_idx=src_idx, + fluxmodel=fluxmodel, + log_e_min=min_log_true_e, + log_e_max=max_log_true_e) + + @staticmethod + def _eval_spline(x, spl): + """Evaluates the given spline at the given coordinates. + """ + x = np.asarray(x) + if (x.any() < 0) or (x.any() > 1): + raise ValueError( + f'{x} is outside of the valid spline range. ' + 'The valid range is [0,1].') + + values = interpolate.splev(x, spl, ext=3) + + return values + + def _create_inv_cdf_spline( + self, + src_idx, + fluxmodel, + log_e_min, + log_e_max): + """Creates a spline for the inverse cumulative distribution function of + the detectable true energy probability distribution. """ - m = (self.effA.log10_enu_bincenters >= log_e_min) & ( - self.effA.log10_enu_bincenters < log_e_max) - bin_centers = self.effA.log10_enu_bincenters[m] - low_bin_edges = self.effA._log10_enu_binedges_lower[m] - high_bin_edges = self.effA._log10_enu_binedges_upper[m] + effA = self._effA_arr[src_idx] + + m = (effA.log10_enu_bincenters >= log_e_min) & ( + effA.log10_enu_bincenters < log_e_max) + bin_centers = effA.log10_enu_bincenters[m] + low_bin_edges = effA.log10_enu_binedges_lower[m] + high_bin_edges = effA.log10_enu_binedges_upper[m] # Flux probability P(E_nu | gamma) per bin. - flux_prob = flux_model.get_integral( - 10**low_bin_edges, 10**high_bin_edges - ) / flux_model.get_integral( - 10**low_bin_edges[0], 10**high_bin_edges[-1] + flux_prob = fluxmodel.energy_profile.get_integral( + E1=10**low_bin_edges, + E2=10**high_bin_edges + ) / fluxmodel.energy_profile.get_integral( + E1=10**low_bin_edges[0], + E2=10**high_bin_edges[-1] ) # Do the product and normalize again to a probability per bin. - product = flux_prob * self.effA.det_prob + product = flux_prob * effA.det_prob prob_per_bin = product / np.sum(product) # The probability per bin cannot be zero, otherwise the cumulative @@ -104,13 +190,15 @@ def _generate_inv_cdf_spline(self, flux_model, log_e_min, # 1000 times smaller than the smallest non-zero bin. m = prob_per_bin == 0 prob_per_bin[m] = np.min(prob_per_bin[np.invert(m)]) / 1000 - to_keep = np.where(prob_per_bin > 1e-15)[0] # For numerical stability + to_keep = prob_per_bin > 1e-15 # For numerical stability. prob_per_bin = prob_per_bin[to_keep] prob_per_bin /= np.sum(prob_per_bin) # Compute the cumulative distribution CDF. - cum_per_bin = [np.sum(prob_per_bin[:i]) - for i in range(prob_per_bin.size+1)] + cum_per_bin = [ + np.sum(prob_per_bin[:i]) + for i in range(prob_per_bin.size+1) + ] if np.any(np.diff(cum_per_bin) == 0): raise ValueError( 'The cumulative sum of the true energy probability is not ' @@ -123,21 +211,17 @@ def _generate_inv_cdf_spline(self, flux_model, log_e_min, # Build a spline for the inverse CDF. return interpolate.splrep(cum_per_bin, bin_centers, k=1, s=0) - @staticmethod - def _eval_spline(x, spl): - x = np.asarray(x) - if (x.any() < 0 or x.any() > 1): - raise ValueError( - f'{x} is outside of the valid spline range. ' - 'The valid range is [0,1].') - values = interpolate.splev(x, spl, ext=3) - return values - - def _generate_events( - self, rss, src_dec, src_ra, dec_idx, - log_true_e_inv_cdf_spl, n_events): + def _draw_signal_events_for_source( + self, + rss, + src_dec, + src_ra, + dec_idx, + log10_true_e_inv_cdf_spl, + n_events): """Generates `n_events` signal events for the given source location - and flux model. + given the given inverse cumulative density function for the + log10(E_true/GeV) distribution. Note: Some values can be NaN in cases where a PDF was not available! @@ -151,19 +235,33 @@ def _generate_events( The declination of the source in radians. src_ra : float The right-ascention of the source in radians. + dec_idx : int + The SM's declination bin index of the source's declination. + log10_true_e_inv_cdf_spl : instance of scipy.interpolate.splrep + The linear spline interpolation representation of the inverse + cummulative density function of the log10(E_true/GeV) distribution. + n_events : int + The number of events to generate. Returns ------- - events : numpy record array of size `n_events` - The numpy record array holding the event data. - It contains the following data fields: - - 'isvalid' - - 'log_true_energy' - - 'log_energy' - - 'sin_dec' + events : instance of DataFieldRecordArray of size `n_events` + The instance of DataFieldRecordArray of length `n_events` holding + the event data. It contains the following data fields: + + ``'isvalid'`` + ``'log_true_energy'`` + ``'log_energy'`` + ``'dec'`` + ``'sin_dec'`` + ``'ang_err'`` + ``'time'`` + ``'azi'`` + ``'zen'`` + ``'run'`` + Single values can be NaN in cases where a pdf was not available. """ - # Create the output event DataFieldRecordArray. out_dtype = [ ('isvalid', np.bool_), @@ -188,10 +286,10 @@ def _generate_events( events = DataFieldRecordArray(data, copy=False) - sm = self.smearing_matrix + sm = self.sm log_true_e = self._eval_spline( - rss.random.uniform(size=n_events), log_true_e_inv_cdf_spl) + rss.random.uniform(size=n_events), log10_true_e_inv_cdf_spl) events['log_true_energy'] = log_true_e @@ -227,116 +325,137 @@ def _generate_events( # Add an angular error. Only use non-nan values. events['ang_err'][isvalid] = ang_err[isvalid] - # Add fields required by the framework - events['time'] = np.ones(n_events) - events['azi'] = np.ones(n_events) - events['zen'] = np.ones(n_events) - events['run'] = -1 * np.ones(n_events) + # Add fields required by the framework. + events['time'] = np.full((n_events,), np.nan, dtype=np.float64) + events['azi'] = np.full((n_events,), np.nan, dtype=np.float64) + events['zen'] = np.full((n_events,), np.nan, dtype=np.float64) + events['run'] = np.full((n_events,), -1, dtype=np.int64) return events + def change_shg_mgr( + self, + shg_mgr): + """Changes the source hypothesis group manager. This will recreate the + internal source dependent data structures. + """ + super().change_shg_mgr( + shg_mgr=shg_mgr) + + self._create_source_dependent_data_structures() + @staticmethod - @np.vectorize - def energy_filter(events, spline, cut_sindec, logger): - """The energy filter will select all events below `cut_sindec` + def create_energy_filter_mask( + events, + spline, + cut_sindec, + logger): + """Creates a mask for cutting all events below ``cut_sindec`` that have an energy smaller than the energy spline at their declination. - Paramters - --------- - events : numpy record array - Numpy record array with the generated signal events. - energy_cut_splines : scipy.interpolate.UnivariateSpline + Parameters + ---------- + events : instance of DataFieldRecordArray + The instance of DataFieldRecordArray holding the generated signal + events. + spline : instance of scipy.interpolate.UnivariateSpline A spline of E(sin_dec) that defines the declination dependent energy cut in the IceCube southern sky. cut_sindec : float - The sine of the declination to start applying the energy cut. + The sine of the declination to start applying the energy cut. The cut will be applied from this declination down. - logger : logging.Logger + logger : instance of logging.Logger The Logger instance. Returns - energy_filter : (len(events),)-shaped numpy ndarray - A mask of shape `len(events)` of the events to be cut. + ------- + filter_mask : instance of numpy ndarray + The (len(events),)-shaped numpy ndarray with the mask of the events + to cut. """ if cut_sindec is None: logger.warn( 'No `cut_sindec` has been specified. The energy cut will be ' 'applied in [-90, 0] deg.') cut_sindec = 0. - energy_filter = np.logical_and( + + filter_mask = np.logical_and( events['sin_dec'] < cut_sindec, events['log_energy'] < spline(events['sin_dec'])) - return energy_filter + return filter_mask - def generate_signal_events( - self, rss, src_dec, src_ra, flux_model, n_events, - energy_cut_spline=None, cut_sindec=None): + def generate_signal_events_for_source( + self, + rss, + src_idx, + n_events): """Generates ``n_events`` signal events for the given source location and flux model. - Paramters - --------- - rss : RandomStateService - src_dec : float - Declination coordinate of the injection point. - src_ra : float - Right ascension coordinate of the injection point. - flux_model : FluxModel - Instance of the `FluxModel` class. + Parameters + ---------- + rss : instance of RandomStateService + The instance of RandomStateService providing the random number + generator state. + src_idx : int + The index of the source. n_events : int Number of signal events to be generated. - energy_cut_splines : scipy.interpolate.UnivariateSpline - A spline of E(sin_dec) that defines the declination - dependent energy cut in the IceCube southern sky. - cut_sindec : float - The sine of the declination to start applying the energy cut. - The cut will be applied from this declination down. Returns ------- - events : numpy record array + events : instance of DataFieldRecordArray The numpy record array holding the event data. It contains the following data fields: + - 'isvalid' - 'log_true_energy' - 'log_energy' - 'dec' - 'ra' - 'ang_err' + """ - sm = self.smearing_matrix + sm = self.sm + + src_dec = self._src_dec_arr[src_idx] + src_ra = self._src_ra_arr[src_idx] + + log10_true_e_inv_cdf_spl = self._log10_true_e_inv_cdf_spl_arr[src_idx] # Find the declination bin index. dec_idx = sm.get_true_dec_idx(src_dec) - # Determine the true energy range for which log_e PDFs are available. - (min_log_true_e, - max_log_true_e) = sm.get_true_log_e_range_with_valid_log_e_pdfs( - dec_idx) - # Build the spline for the inverse CDF and draw a true neutrino - # energy from the hypothesis spectrum. - log_true_e_inv_cdf_spl = self._generate_inv_cdf_spline( - flux_model, min_log_true_e, max_log_true_e) - events = None - n_evt_generated = 0 - while n_evt_generated != n_events: - n_evt = n_events - n_evt_generated - events_ = self._generate_events( - rss, src_dec, src_ra, dec_idx, log_true_e_inv_cdf_spl, n_evt) + n_events_generated = 0 + while n_events_generated < n_events: + n_evt = n_events - n_events_generated + + events_ = self._draw_signal_events_for_source( + rss=rss, + src_dec=src_dec, + src_ra=src_ra, + dec_idx=dec_idx, + log10_true_e_inv_cdf_spl=log10_true_e_inv_cdf_spl, + n_events=n_evt) # Cut events that failed to be generated due to missing PDFs. # Also cut low energy events if generating in the southern sky. events_ = events_[events_['isvalid']] - if energy_cut_spline is not None: - to_cut = self.energy_filter( - events_, energy_cut_spline, cut_sindec, self._logger) - events_ = events_[~to_cut] - if not len(events_) == 0: - n_evt_generated += len(events_) + + if self.energy_cut_spline is not None: + cut_mask = self.create_energy_filter_mask( + events=events_, + spline=self.energy_cut_spline, + cut_sindec=self.cut_sindec, + logger=self._logger) + events_ = events_[~cut_mask] + + if len(events_) > 0: + n_events_generated += len(events_) if events is None: events = events_ else: @@ -344,315 +463,232 @@ def generate_signal_events( return events - -class PDSignalGenerator(SignalGeneratorBase): - """This class provides a signal generation method for a point-like source - seen in the IceCube detector using the 10 years public data release. - """ - - def __init__(self, src_hypo_group_manager, dataset_list, data_list=None, - llhratio=None, energy_cut_splines=None, cut_sindec=None): - """Constructs a new signal generator instance. + def generate_signal_events( + self, + rss, + mean, + poisson=True, + src_detsigyield_weights_service=None, + **kwargs): + """Generates ``mean`` number of signal events. Parameters ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The SourceHypoGroupManager instance defining the source hypothesis - groups. - dataset_list : list of Dataset instances - The list of Dataset instances for which signal events should get - generated for. - data_list : list of DatasetData instances - The list of DatasetData instances holding the actual data of each - dataset. The order must match the order of ``dataset_list``. - llhratio : LLHRatio - The likelihood ratio object contains the datasets signal weights - needed for distributing the event generation among the different - datasets. - energy_cut_splines : list of UnivariateSpline - A list of splines of E(sin_dec) used to define the declination - dependent energy cut in the IceCube southern sky. - cut_sindec : list of float - The sine of the declination to start applying the energy cut. - The cut will be applied from this declination down. - """ - self.src_hypo_group_manager = src_hypo_group_manager - self.dataset_list = dataset_list - self.data_list = data_list - self.llhratio = llhratio - self.effA = [None] * len(self._dataset_list) - self.sm = [None] * len(self._dataset_list) - self.splines = energy_cut_splines - self.cut_sindec = cut_sindec + rss : instance of RandomStateService + The instance of RandomStateService providing the random number + generator state. + mean : int | float + The mean number of signal events. If the ``poisson`` argument is set + to True, the actual number of generated signal events will be drawn + from a Poisson distribution with this given mean value of signal + events. + poisson : bool + If set to True, the actual number of generated signal events will + be drawn from a Poisson distribution with the given mean value of + signal events. + If set to False, the argument ``mean`` specifies the actual number + of generated signal events. + src_detsigyield_weights_service : instance of SrcDetSigYieldWeightsService + The instance of SrcDetSigYieldWeightsService providing the weighting + of the sources within the detector. - @property - def src_hypo_group_manager(self): - """The SourceHypoGroupManager instance defining the source groups with - their spectra. + Returns + ------- + n_signal : int + The number of generated signal events. + signal_events_dict : dict of DataFieldRecordArray + The dictionary holding the DataFieldRecordArray instances with the + generated signal events. Each key of this dictionary represents the + dataset index for which the signal events have been generated. """ - return self._src_hypo_group_manager + if poisson: + n_events = rss.random.poisson( + float_cast( + mean, + 'The `mean` argument must be castable to type of float!')) - @src_hypo_group_manager.setter - def src_hypo_group_manager(self, manager): - if(not isinstance(manager, SourceHypoGroupManager)): - raise TypeError('The src_hypo_group_manager property must be an ' - 'instance of SourceHypoGroupManager!') - self._src_hypo_group_manager = manager + n_events = int_cast( + mean, + 'The `mean` argument must be castable to type of int!') - @property - def dataset_list(self): - """The list of Dataset instances for which signal events should get - generated for. - """ - return self._dataset_list + if src_detsigyield_weights_service is None: + raise ValueError( + 'The src_detsigyield_weights_service argument must be provided ' + f'for the signal generator {classname(self)}!') - @dataset_list.setter - def dataset_list(self, datasets): - if(not issequenceof(datasets, Dataset)): - raise TypeError('The dataset_list property must be a sequence of ' - 'Dataset instances!') - self._dataset_list = list(datasets) + (a_jk, a_jk_grads) = src_detsigyield_weights_service.get_weights() - @property - def llhratio(self): - """The log-likelihood ratio function for the analysis. - """ - return self._llhratio - - @llhratio.setter - def llhratio(self, llhratio): - if llhratio is not None: - if(not isinstance(llhratio, LLHRatio)): - raise TypeError('The llratio property must be an instance of ' - 'LLHRatio!') - self._llhratio = llhratio - - def generate_signal_events(self, rss, mean, poisson=True): - shg_list = self._src_hypo_group_manager.src_hypo_group_list - # Only supports a single source hypothesis group. Raise an error - # if more than one shg is in the source hypo group manager. - if len(shg_list) > 1: - raise RuntimeError( - 'Signal injection for multiple source hypothesis groups is ' - 'not supported yet.') - - tot_n_events = 0 + a_k = np.copy(a_jk[self.ds_idx]) + a_k /= np.sum(a_k) + + n_signal = 0 signal_events_dict = {} - for shg in shg_list: - # Only supports single point source signal injection. Raise - # an error if more than one source is in the source hypo group. - if len(shg.source_list) > 1: - raise RuntimeError( - 'Signal injection for multiple sources within a source ' - 'hypothesis group is not supported yet.') - # This only works with power-laws for now. - # Each source hypo group can have a different power-law - gamma = shg.fluxmodel.gamma - weights, _ = self.llhratio.dataset_signal_weights([mean, gamma]) - for (ds_idx, w) in enumerate(weights): - w_mean = mean * w - if(poisson): - n_events = rss.random.poisson( - float_cast( - w_mean, - '`mean` must be castable to type of float!' - ) - ) - else: - n_events = int_cast( - w_mean, - '`mean` must be castable to type of int!' - ) - tot_n_events += n_events - - events_ = None - for (shg_src_idx, src) in enumerate(shg.source_list): - ds = self._dataset_list[ds_idx] - sig_gen = PDDatasetSignalGenerator( - ds, src.dec, self.effA[ds_idx], self.sm[ds_idx]) - if self.effA[ds_idx] is None: - self.effA[ds_idx] = sig_gen.effA - if self.sm[ds_idx] is None: - self.sm[ds_idx] = sig_gen.smearing_matrix - # ToDo: here n_events should be split according to some - # source weight - events_ = sig_gen.generate_signal_events( - rss, - src.dec, - src.ra, - shg.fluxmodel, - n_events, - energy_cut_spline=self.splines[ds_idx], - cut_sindec=self.cut_sindec[ds_idx] - ) - if events_ is None: - continue - - if shg_src_idx == 0: - signal_events_dict[ds_idx] = events_ - else: - signal_events_dict[ds_idx].append(events_) - - return tot_n_events, signal_events_dict - - -class PDTimeDependentSignalGenerator(PDSignalGenerator): - """ The time dependent signal generator works so far only for one single - dataset. For multi datasets one needs to adjust the dataset weights - accordingly (scaling of the effective area with livetime of the flare in - the dataset). - """ + # Loop over the sources and generate signal events according to the + # weights of the sources. + for src_idx in range(self.shg_mgr.n_sources): + n_events_src = int(np.round(n_events * a_k[src_idx], 0)) - def __init__(self, src_hypo_group_manager, dataset_list, data_list=None, - llhratio=None, energy_cut_splines=None, cut_sindec=None, - gauss=None, box=None): - """ - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The instance of SourceHypoGroupManager that defines the list of - sources, i.e. the list of SourceModel instances. - dataset_list : list of Dataset instances - The list of Dataset instances for which signal events should get - generated for. - data_list : list of DatasetData instances - The list of DatasetData instances holding the actual data of each - dataset. The order must match the order of ``dataset_list``. - llhratio : LLHRatio - The likelihood ratio object contains the datasets signal weights - needed for distributing the event generation among the different - datsets. - energy_cut_splines : list of UnivariateSpline - cut_sindec : float - gauss : dict | None - None or dictionary with {"mu": float, "sigma": float}. - box : dict | None - None or dictionary with {"start": float, "end": float}. - """ - if gauss is None and box is None: - raise ValueError( - "Either box or gauss keywords must define the neutrino flare.") - if gauss is not None and box is not None: - raise ValueError( - "Either box or gauss keywords must define the neutrino flare, " - "cannot use both.") - - super().__init__(src_hypo_group_manager, dataset_list, data_list, - llhratio, energy_cut_splines, cut_sindec) - self.box = box - self.gauss = gauss + src_events = self.generate_signal_events_for_source( + rss=rss, + src_idx=src_idx, + n_events=n_events_src, + ) + if src_events is None: + continue - self.time_pdf = self._get_time_pdf() + n_signal += len(src_events) - def _get_time_pdf(self): - """Get the neutrino flare time pdf given parameters. - Will be used to generate random numbers by calling `rvs()` method. + if self.ds_idx not in signal_events_dict: + signal_events_dict[self.ds_idx] = src_events + else: + signal_events_dict[self.ds_idx].append(src_events) - Returns - ------- - time_pdf : instance of scipy.stats.rv_continuous base class - Has to base scipy.stats.rv_continuous. - """ - # Make sure flare is in dataset. - for data_list in self.data_list: - grl = data_list.grl - - if self.gauss is not None: - if (self.gauss["mu"] - 4 * self.gauss["sigma"] > grl["stop"][-1]) or ( - self.gauss["mu"] + 4 * self.gauss["sigma"] < grl["start"][0]): - raise ValueError( - f"Gaussian {str(self.gauss)} flare is not in dataset.") - - if self.box is not None: - if (self.box["start"] > grl["stop"][-1]) or ( - self.box["end"] < grl["start"][0]): - raise ValueError( - f"Box {str(self.box)} flare is not in dataset.") - - # Create `time_pdf`. - if self.gauss is not None: - time_pdf = scipy.stats.norm(self.gauss["mu"], self.gauss["sigma"]) - if self.box is not None: - time_pdf = scipy.stats.uniform( - self.box["start"], - self.box["end"] - self.box["start"] - ) + return (n_signal, signal_events_dict) - return time_pdf - def set_flare(self, gauss=None, box=None): - """Set the neutrino flare given parameters. +class TimeDependentPDDatasetSignalGenerator( + PDDatasetSignalGenerator): + """This time dependent signal generator for a public PS dataset generates + events using the + :class:`~skyllh.analyses.i3.publicdata_ps.signal_generator.PDDatasetSignalGenerator` + class. It then draws times for each event and adds them to the event array. + """ + def __init__( + self, + shg_mgr, + ds, + ds_idx, + livetime, + time_flux_profile, + energy_cut_spline=None, + cut_sindec=None, + **kwargs): + """ Parameters ---------- - gauss : dict | None - None or dictionary with {"mu": float, "sigma": float}. - box : dict | None - None or dictionary with {"start": float, "end": float}. + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager that defines the list of + source hypothesis groups, i.e. the list of sources. + ds : instance of Dataset + The instance of Dataset for which signal events should get + generated. + ds_idx : int + The index of the dataset. + livetime : instance of Livetime + The instance of Livetime providing the live-time information of the + dataset. + time_flux_profile : instance of TimeFluxProfile + The instance of TimeFluxProfile providing the time profile of the + source(s). + + Note: + + At this time the some time profile will be used for all + sources! + + energy_cut_spline : scipy.interpolate.UnivariateSpline + A spline of E(sin_dec) that defines the declination + dependent energy cut in the IceCube southern sky. + cut_sindec : float + The sine of the declination to start applying the energy cut. + The cut will be applied from this declination down. """ - if gauss is None and box is None: - raise ValueError( - "Either box or gauss keywords must define the neutrino flare.") - if gauss is not None and box is not None: - raise ValueError( - "Either box or gauss keywords must define the neutrino flare, " - "cannot use both.") + super().__init__( + shg_mgr=shg_mgr, + ds=ds, + ds_idx=ds_idx, + energy_cut_spline=energy_cut_spline, + cut_sindec=cut_sindec, + **kwargs) + + if not isinstance(time_flux_profile, TimeFluxProfile): + raise TypeError( + 'The time_flux_profile argument must be an instance of ' + 'TimeFluxProfile! ' + f'Its current type is {classname(time_flux_profile)}!') + + self.livetime = livetime + self._time_flux_profile = time_flux_profile - self.box = box - self.gauss = gauss + @property + def livetime(self): + """The instance of Livetime providing the live-time information of the + dataset. + """ + return self._livetime - self.time_pdf = self._get_time_pdf() + @livetime.setter + def livetime(self, lt): + if not isinstance(lt, Livetime): + raise TypeError( + 'The livetime property must be an instance of Livetime! ' + f'Its current type is {classname(lt)}!') - def is_in_grl(self, time, grl): - """Helper function to check if given times are in the grl ontime. + def generate_signal_events( + self, + rss, + mean, + poisson=True, + src_detsigyield_weights_service=None, + **kwargs): + """Generates ``mean`` number of signal events with times. Parameters ---------- - time : 1d ndarray - Time values. - grl : ndarray - Array of the detector good run list. + rss : instance of RandomStateService + The instance of RandomStateService providing the random number + generator state. + mean : int | float + The mean number of signal events. If the ``poisson`` argument is set + to True, the actual number of generated signal events will be drawn + from a Poisson distribution with this given mean value of signal + events. + poisson : bool + If set to True, the actual number of generated signal events will + be drawn from a Poisson distribution with the given mean value of + signal events. + If set to False, the argument ``mean`` specifies the actual number + of generated signal events. + src_detsigyield_weights_service : instance of SrcDetSigYieldWeightsService + The instance of SrcDetSigYieldWeightsService providing the weighting + of the sources within the detector. Returns ------- - is_in_grl : 1d ndarray - Boolean mask of `time` in grl ontime. - """ - def f(time, grl): - return np.any((grl["start"] <= time) & (time <= grl["stop"])) - - # Vectorize `f`, but exclude `grl` argument from vectorization. - # This is needed to support `time` as an array argument. - f_v = np.vectorize(f, excluded=[1]) - is_in_grl = f_v(time, grl) - - return is_in_grl - - def generate_signal_events(self, rss, mean, poisson=True): - """Same as in PDSignalGenerator, but we assign times here. + n_signal : int + The number of generated signal events. + signal_events_dict : dict of DataFieldRecordArray + The dictionary holding the DataFieldRecordArray instances with the + generated signal events. Each key of this dictionary represents the + dataset index for which the signal events have been generated. """ - # Call method from the parent class to generate signal events. - (tot_n_events, signal_events_dict) = super().generate_signal_events( - rss, mean, poisson=poisson) - - # Assign times for flare. We can also use inverse transform - # sampling instead of the lazy version implemented here. - for (ds_idx, events_) in signal_events_dict.items(): - grl = self.data_list[ds_idx].grl - - # Optimized time injection version, based on csky implementation. - # https://github.com/icecube/csky/blob/7e969639c5ef6dbb42872dac9b761e1e8b0ccbe2/csky/inj.py#L1122 - times = np.array([]) - n_events = len(events_) - while len(times) < n_events: - times = np.concatenate( - (times, self.time_pdf.rvs(n_events - len(times), - random_state=rss.random)) - ) - # Check if times is in grl. - is_in_grl_mask = self.is_in_grl(times, grl) - times = times[is_in_grl_mask] - - events_["time"] = times - return tot_n_events, signal_events_dict + (n_signal, signal_events_dict) = super().generate_signal_events( + rss=rss, + mean=mean, + poisson=poisson, + src_detsigyield_weights_service=src_detsigyield_weights_service, + **kwargs) + + # Create a scipy.stats.rv_continuous instance for the time flux profile. + time_rv = create_scipy_stats_rv_continuous_from_TimeFluxProfile( + profile=self._time_flux_profile) + + # Optimized time injection version, based on csky implementation. + # https://github.com/icecube/csky/blob/7e969639c5ef6dbb42872dac9b761e1e8b0ccbe2/csky/inj.py#L1122 + events = signal_events_dict[self.ds_idx] + times = np.array([], dtype=np.float64) + n_events = len(events) + while len(times) < n_events: + new_times = time_rv.rvs( + size=(n_events - len(times)), + random_state=rss.random) + mask = self._livetime.is_on( + mjd=new_times) + new_times = new_times[mask] + + times = np.concatenate((times, new_times)) + events['time'] = times + + return (n_signal, signal_events_dict) diff --git a/skyllh/analyses/i3/publicdata_ps/signalpdf.py b/skyllh/analyses/i3/publicdata_ps/signalpdf.py index 474e2a976c..131796ce44 100644 --- a/skyllh/analyses/i3/publicdata_ps/signalpdf.py +++ b/skyllh/analyses/i3/publicdata_ps/signalpdf.py @@ -1,56 +1,79 @@ # -*- coding: utf-8 -*- import numpy as np + from scipy import integrate -from skyllh.core.py import module_classname -from skyllh.core.debugging import get_logger -from skyllh.core.timing import TaskTimer -from skyllh.core.binning import get_bincenters_from_binedges -from skyllh.core.pdf import ( - PDF, - PDFAxis, - PDFSet, - IsSignalPDF, +from skyllh.analyses.i3.publicdata_ps.aeff import ( + PDAeff, +) +from skyllh.analyses.i3.publicdata_ps.utils import ( + FctSpline1D, +) +from skyllh.analyses.i3.publicdata_ps.smearing_matrix import ( + PDSmearingMatrix, +) +from skyllh.core.binning import ( + get_bincenters_from_binedges, +) +from skyllh.core.debugging import ( + get_logger, +) +from skyllh.core.flux_model import ( + FactorizedFluxModel, ) from skyllh.core.multiproc import ( IsParallelizable, - parallelize + parallelize, ) from skyllh.core.parameters import ( ParameterGrid, - ParameterGridSet + ParameterGridSet, ) -from skyllh.i3.dataset import I3Dataset -from skyllh.physics.flux import FluxModel - -from skyllh.analyses.i3.publicdata_ps.aeff import PDAeff -from skyllh.analyses.i3.publicdata_ps.utils import ( - FctSpline1D, +from skyllh.core.pdf import ( + IsSignalPDF, + PDF, + PDFAxis, + PDFSet, ) -from skyllh.analyses.i3.publicdata_ps.smearing_matrix import ( - PDSmearingMatrix +from skyllh.core.py import ( + classname, + module_classname, +) +from skyllh.core.timing import ( + TaskTimer, +) +from skyllh.i3.dataset import ( + I3Dataset, ) -class PDSignalEnergyPDF(PDF, IsSignalPDF): +class PDSignalEnergyPDF( + PDF, + IsSignalPDF): """This class provides a signal energy PDF for a spectrial index value. """ def __init__( - self, f_e_spl, **kwargs): + self, + f_e_spl, + **kwargs): """Creates a new signal energy PDF instance for a particular spectral index value. Parameters ---------- - f_e_spl : FctSpline1D instance - The FctSpline1D instance representing the spline of the energy PDF. + f_e_spl : instance of FctSpline1D + The instance of FctSpline1D representing the spline of the energy + PDF. """ - super().__init__(**kwargs) + super().__init__( + pmm=None, + **kwargs) if not isinstance(f_e_spl, FctSpline1D): raise TypeError( - 'The f_e_spl argument must be an instance of FctSpline1D!') + 'The f_e_spl argument must be an instance of FctSpline1D! ' + f'Its current type is {classname(f_e_spl)}!') self.f_e_spl = f_e_spl @@ -61,11 +84,11 @@ def __init__( self.log10_reco_e_max = self.log10_reco_e_upper_binedges[-1] # Add the PDF axes. - self.add_axis(PDFAxis( - name='log_energy', - vmin=self.log10_reco_e_min, - vmax=self.log10_reco_e_max) - ) + self.add_axis( + PDFAxis( + name='log_energy', + vmin=self.log10_reco_e_min, + vmax=self.log10_reco_e_max)) # Check integrity. integral = integrate.quad( @@ -78,31 +101,39 @@ def __init__( if not np.isclose(integral, 1): raise ValueError( 'The integral over log10_reco_e of the energy term must be ' - 'unity! But it is {}!'.format(integral)) + f'unity! But it is {integral}!') - def assert_is_valid_for_trial_data(self, tdm): + def assert_is_valid_for_trial_data( + self, + tdm, + tl=None): pass - def get_pd_by_log10_reco_e(self, log10_reco_e, tl=None): + def get_pd_by_log10_reco_e( + self, + log10_reco_e, + tl=None): """Calculates the probability density for the given log10(E_reco/GeV) values using the spline representation of the PDF. Parameters ---------- - log10_reco_e : (n_log10_reco_e,)-shaped 1D numpy ndarray - The numpy ndarray holding the log10(E_reco/GeV) values for which - the energy PDF should get evaluated. - tl : TimeLord instance | None - The optional TimeLord instance that should be used to measure + log10_reco_e : instance of ndarray + The (n_log10_reco_e,)-shaped numpy ndarray holding the + log10(E_reco/GeV) values for which the energy PDF should get + evaluated. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to measure timing information. Returns ------- - pd : (N_events,)-shaped numpy ndarray - The 1D numpy ndarray with the probability density for each event. + pd : instance of numpy ndarray + The (n_log10_reco_e,)-shaped numpy ndarray with the probability + density for each energy value. """ - # Select events that actually have a signal energy PDF. - # All other events will get zero signal probability density. + # Select energy values that actually have a signal energy PDF. + # All other energy values will get zero signal probability density. m = ( (log10_reco_e >= self.log10_reco_e_min) & (log10_reco_e < self.log10_reco_e_max) @@ -114,46 +145,58 @@ def get_pd_by_log10_reco_e(self, log10_reco_e, tl=None): return pd - def get_prob(self, tdm, params=None, tl=None): - """Calculates the probability density for the events given by the - TrialDataManager. + def get_pd( + self, + tdm, + params_recarray=None, + tl=None): + """Calculates the probability density for all given trial data events + and sources. Parameters ---------- - tdm : TrialDataManager instance - The TrialDataManager instance holding the data events for which the - probability should be looked up. The following data fields are - required: - - 'log_energy' - The log10 of the reconstructed energy. - params : dict | None - The dictionary containing the parameter names and values for which - the probability should get calculated. - By definition this PDF does not depend on parameters. - tl : TimeLord instance | None - The optional TimeLord instance that should be used to measure + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial data events for + which the probability density should be looked up. + The following data fields must be present: + + log_energy : float + The base-10 logarithm of the reconstructed energy. + + params_recarray : None + Unused interface argument. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to measure timing information. Returns ------- - pd : (N_events,)-shaped numpy ndarray - The 1D numpy ndarray with the probability density for each event. - grads : (N_fitparams,N_events)-shaped ndarray | None - The 2D numpy ndarray holding the gradients of the PDF w.r.t. - each fit parameter for each event. The order of the gradients - is the same as the order of floating parameters specified through - the ``param_set`` property. - It is ``None``, if this PDF does not depend on any parameters. + pd : instance of ndarray + The (N_values,)-shaped numpy ndarray holding the probability density + for each trial data event and source. + grads : dict + The dictionary holding the gradient values for each global fit + parameter. By definition this PDF does not depend on any fit + parameters, hence, this is an empty dictionary. """ - log10_reco_e = tdm.get_data('log_energy') + evt_idxs = tdm.src_evt_idxs[1] + + log10_reco_e = np.take(tdm['log_energy'], evt_idxs) - pd = self.get_pd_by_log10_reco_e(log10_reco_e, tl=tl) + pd = self.get_pd_by_log10_reco_e( + log10_reco_e=log10_reco_e, + tl=tl) - return (pd, None) + grads = dict() + return (pd, grads) -class PDSignalEnergyPDFSet(PDFSet, IsSignalPDF, IsParallelizable): - """This class provides a signal energy PDF set for the public data. + +class PDSignalEnergyPDFSet( + PDFSet, + IsSignalPDF, + IsParallelizable): + """This class provides a signal energy PDF set using the public data. It creates a set of PDSignalEnergyPDF instances, one for each spectral index value on a grid. """ @@ -161,8 +204,8 @@ def __init__( self, ds, src_dec, - flux_model, - fitparam_grid_set, + fluxmodel, + param_grid_set, ncpu=None, ppbar=None, **kwargs): @@ -170,18 +213,20 @@ def __init__( Parameters ---------- - ds : I3Dataset instance - The I3Dataset instance that defines the dataset of the public data. + ds : instance of Dataset + The instance of Dataset that defines the dataset of the public data. src_dec : float The declination of the source in radians. - flux_model : FluxModel instance - The FluxModel instance that defines the source's flux model. - fitparam_grid_set : ParameterGrid | ParameterGridSet instance - The parameter grid set defining the grids of the fit parameters. + fluxmodel : instance of FactorizedFluxModel + The instance of FactorizedFluxModel that defines the source's flux + model. + param_grid_set : instance of ParameterGrid | instance of ParameterGridSet + The parameter grid set defining the grids of the parameters this + energy PDF set depends on. ncpu : int | None The number of CPUs to utilize. Global setting will take place if not specified, i.e. set to None. - ppbar : ProgressBar instance | None + ppbar : instance of ProgressBar | None The instance of ProgressBar for the optional parent progress bar. """ self._logger = get_logger(module_classname(self)) @@ -191,26 +236,28 @@ def __init__( raise TypeError( 'The ds argument must be an instance of I3Dataset!') - if not isinstance(flux_model, FluxModel): + if not isinstance(fluxmodel, FactorizedFluxModel): raise TypeError( - 'The flux_model argument must be an instance of FluxModel!') + 'The fluxmodel argument must be an instance of ' + 'FactorizedFluxModel! ' + f'Its current type is {classname(fluxmodel)}') - if (not isinstance(fitparam_grid_set, ParameterGrid)) and\ - (not isinstance(fitparam_grid_set, ParameterGridSet)): + if (not isinstance(param_grid_set, ParameterGrid)) and\ + (not isinstance(param_grid_set, ParameterGridSet)): raise TypeError( - 'The fitparam_grid_set argument must be an instance of type ' - 'ParameterGrid or ParameterGridSet!') + 'The param_grid_set argument must be an instance of type ' + 'ParameterGrid or ParameterGridSet! ' + f'Its current type is {classname(param_grid_set)}!') - # Extend the fitparam_grid_set to allow for parameter interpolation + # Extend the param_grid_set to allow for parameter interpolation # values at the grid edges. - fitparam_grid_set = fitparam_grid_set.copy() - fitparam_grid_set.add_extra_lower_and_upper_bin() + param_grid_set = param_grid_set.copy() + param_grid_set.add_extra_lower_and_upper_bin() super().__init__( - pdf_type=PDF, - fitparams_grid_set=fitparam_grid_set, - ncpu=ncpu - ) + param_grid_set=param_grid_set, + ncpu=ncpu, + **kwargs) # Load the smearing matrix. sm = PDSmearingMatrix( @@ -234,11 +281,12 @@ def __init__( 10, sm.log10_true_enu_binedges[log_true_e_mask]) true_enu_binedges_lower = true_enu_binedges[:-1] true_enu_binedges_upper = true_enu_binedges[1:] - valid_true_e_idxs = [sm.get_log10_true_e_idx(0.5 * (he + le)) - for he,le in zip( + valid_true_e_idxs = [ + sm.get_log10_true_e_idx(0.5 * (he + le)) + for (he, le) in zip( sm.log10_true_enu_binedges[log_true_e_mask][1:], sm.log10_true_enu_binedges[log_true_e_mask][:-1]) - ] + ] xvals_binedges = ds.get_binning_definition('log_energy').binedges xvals = get_bincenters_from_binedges(xvals_binedges) @@ -276,26 +324,24 @@ def __init__( ang_err_bw = sm.ang_err_upper_edges - sm.ang_err_lower_edges # Create the energy pdf for different gamma values. - def create_energy_pdf(sm_pdf, flux_model, gridfitparams): + def create_energy_pdf(sm_pdf, fluxmodel, gridparams): """Creates an energy pdf for a specific gamma value. """ # Create a copy of the FluxModel with the given flux parameters. # The copy is needed to not interfer with other CPU processes. - my_flux_model = flux_model.copy(newprop=gridfitparams) + my_fluxmodel = fluxmodel.copy(newparams=gridparams) self._logger.debug( - 'Generate signal energy PDF for parameters {} in {} E_nu ' - 'bins.'.format( - gridfitparams, len(valid_true_e_idxs)) - ) + f'Generate signal energy PDF for parameters {gridparams} in ' + f'{len(valid_true_e_idxs)} E_nu bins.') # Calculate the flux probability p(E_nu|gamma). flux_prob = ( - my_flux_model.get_integral( + my_fluxmodel.energy_profile.get_integral( true_enu_binedges_lower, true_enu_binedges_upper ) / - my_flux_model.get_integral( + my_fluxmodel.energy_profile.get_integral( true_enu_binedges[0], true_enu_binedges[-1] ) @@ -343,10 +389,12 @@ def create_reco_e_pdf_for_true_e(idx, true_e_idx): return spline(xvals) # Integrate over the true neutrino energy and spline the output. - sum_pdf = np.sum([ - create_reco_e_pdf_for_true_e(i, true_e_idx) - for i,true_e_idx in enumerate(valid_true_e_idxs) - ], axis=0) + sum_pdf = np.sum( + [ + create_reco_e_pdf_for_true_e(i, true_e_idx) + for (i, true_e_idx) in enumerate(valid_true_e_idxs) + ], + axis=0) spline = FctSpline1D(sum_pdf, xvals_binedges, norm=True) @@ -355,8 +403,8 @@ def create_reco_e_pdf_for_true_e(idx, true_e_idx): return pdf args_list = [ - ((sm_pdf, flux_model, gridfitparams), {}) - for gridfitparams in self.gridfitparams_list + ((sm_pdf, fluxmodel, gridparams), {}) + for gridparams in self.gridparams_list ] pdf_list = parallelize( @@ -365,57 +413,9 @@ def create_reco_e_pdf_for_true_e(idx, true_e_idx): ncpu=self.ncpu, ppbar=ppbar) - del(sm_pdf) + del sm_pdf # Save all the energy PDF objects in the PDFSet PDF registry with # the hash of the individual parameters as key. - for (gridfitparams, pdf) in zip(self.gridfitparams_list, pdf_list): - self.add_pdf(pdf, gridfitparams) - - def get_prob(self, tdm, gridfitparams, tl=None): - """Calculates the signal probability density of each event for the - given set of signal fit parameters on a grid. - - Parameters - ---------- - tdm : instance of TrialDataManager - The TrialDataManager instance holding the data events for which the - probability should be calculated for. The following data fields must - exist: - - - 'log_energy' - The log10 of the reconstructed energy. - - 'psi' - The opening angle from the source to the event in radians. - - 'ang_err' - The angular error of the event in radians. - gridfitparams : dict - The dictionary holding the signal parameter values for which the - signal energy probability should be calculated. Note, that the - parameter values must match a set of parameter grid values for which - a PDSignalPDF object has been created at construction time of this - PDSignalPDFSet object. - tl : TimeLord instance | None - The optional TimeLord instance that should be used to measure time. - - Returns - ------- - prob : 1d ndarray - The array with the signal energy probability for each event. - grads : (N_fitparams,N_events)-shaped ndarray | None - The 2D numpy ndarray holding the gradients of the PDF w.r.t. - each fit parameter for each event. The order of the gradients - is the same as the order of floating parameters specified through - the ``param_set`` property. - It is ``None``, if this PDF does not depend on any parameters. - - Raises - ------ - KeyError - If no energy PDF can be found for the given signal parameter values. - """ - pdf = self.get_pdf(gridfitparams) - - (prob, grads) = pdf.get_prob(tdm, tl=tl) - - return (prob, grads) + for (gridparams, pdf) in zip(self.gridparams_list, pdf_list): + self.add_pdf(pdf, gridparams) diff --git a/skyllh/analyses/i3/publicdata_ps/smearing_matrix.py b/skyllh/analyses/i3/publicdata_ps/smearing_matrix.py index 7b801855cd..046eea9c48 100644 --- a/skyllh/analyses/i3/publicdata_ps/smearing_matrix.py +++ b/skyllh/analyses/i3/publicdata_ps/smearing_matrix.py @@ -4,7 +4,9 @@ from skyllh.core.storage import create_FileLoader -def load_smearing_histogram(pathfilenames): + +def load_smearing_histogram( + pathfilenames): """Loads the 5D smearing histogram from the given data file. Parameters @@ -79,10 +81,10 @@ def _get_nbins_from_edges(lower_edges, upper_edges): # bin edge values. v0 = None for v in data: - if(v0 is not None and v < v0): + if (v0 is not None) and (v < v0): # Reached the end of the edges block. break - if(v0 is None or v > v0): + if (v0 is None) or (v > v0): v0 = v n += 1 return n @@ -172,12 +174,15 @@ def _get_nbins_from_edges(lower_edges, upper_edges): ) -class PDSmearingMatrix(object): +class PDSmearingMatrix( + object): """This class is a helper class for dealing with the smearing matrix provided by the public data. """ def __init__( - self, pathfilenames, **kwargs): + self, + pathfilenames, + **kwargs): """Creates a smearing matrix instance by loading the smearing matrix from the given file. """ @@ -202,22 +207,22 @@ def __init__( s = np.array(self.reco_e_lower_edges.shape) s[-1] += 1 self.log10_reco_e_binedges = np.empty(s, dtype=np.double) - self.log10_reco_e_binedges[...,:-1] = self.reco_e_lower_edges - self.log10_reco_e_binedges[...,-1] = self.reco_e_upper_edges[...,-1] + self.log10_reco_e_binedges[..., :-1] = self.reco_e_lower_edges + self.log10_reco_e_binedges[..., -1] = self.reco_e_upper_edges[..., -1] # Create bin edges array for psi. s = np.array(self.psi_lower_edges.shape) s[-1] += 1 self.psi_binedges = np.empty(s, dtype=np.double) - self.psi_binedges[...,:-1] = self.psi_lower_edges - self.psi_binedges[...,-1] = self.psi_upper_edges[...,-1] + self.psi_binedges[..., :-1] = self.psi_lower_edges + self.psi_binedges[..., -1] = self.psi_upper_edges[..., -1] # Create bin edges array for ang_err. s = np.array(self.ang_err_lower_edges.shape) s[-1] += 1 self.ang_err_binedges = np.empty(s, dtype=np.double) - self.ang_err_binedges[...,:-1] = self.ang_err_lower_edges - self.ang_err_binedges[...,-1] = self.ang_err_upper_edges[...,-1] + self.ang_err_binedges[..., :-1] = self.ang_err_lower_edges + self.ang_err_binedges[..., -1] = self.ang_err_upper_edges[..., -1] @property def n_log10_true_e_bins(self): @@ -345,7 +350,9 @@ def pdf(self): return pdf - def get_true_dec_idx(self, true_dec): + def get_true_dec_idx( + self, + true_dec): """Returns the true declination index for the given true declination value. @@ -361,14 +368,17 @@ def get_true_dec_idx(self, true_dec): """ if (true_dec < self.true_dec_bin_edges[0]) or\ (true_dec > self.true_dec_bin_edges[-1]): - raise ValueError('The declination {} degrees is not supported by ' - 'the smearing matrix!'.format(true_dec)) + raise ValueError( + f'The declination {true_dec} degrees is not supported by the ' + 'smearing matrix!') true_dec_idx = np.digitize(true_dec, self.true_dec_bin_edges) - 1 return true_dec_idx - def get_log10_true_e_idx(self, log10_true_e): + def get_log10_true_e_idx( + self, + log10_true_e): """Returns the bin index for the given true log10 energy value. Parameters @@ -384,16 +394,20 @@ def get_log10_true_e_idx(self, log10_true_e): """ if (log10_true_e < self.true_e_bin_edges[0]) or\ (log10_true_e > self.true_e_bin_edges[-1]): - raise ValueError( - 'The log10 true energy value {} is not supported by the ' - 'smearing matrix!'.format(log10_true_e)) + raise ValueError( + f'The log10 true energy value {log10_true_e} is not supported ' + 'by the smearing matrix!') log10_true_e_idx = np.digitize( log10_true_e, self._true_e_bin_edges) - 1 return log10_true_e_idx - def get_reco_e_idx(self, true_e_idx, true_dec_idx, reco_e): + def get_reco_e_idx( + self, + true_e_idx, + true_dec_idx, + reco_e): """Returns the bin index for the given reco energy value given the given true energy and true declination bin indices. @@ -412,19 +426,24 @@ def get_reco_e_idx(self, true_e_idx, true_dec_idx, reco_e): The index of the reco energy bin the given reco energy value falls into. It returns None if the value is out of range. """ - lower_edges = self.reco_e_lower_edges[true_e_idx,true_dec_idx] - upper_edges = self.reco_e_upper_edges[true_e_idx,true_dec_idx] + lower_edges = self.reco_e_lower_edges[true_e_idx, true_dec_idx] + upper_edges = self.reco_e_upper_edges[true_e_idx, true_dec_idx] m = (lower_edges <= reco_e) & (upper_edges > reco_e) idxs = np.nonzero(m)[0] - if(len(idxs) == 0): + if (len(idxs) == 0): return None reco_e_idx = idxs[0] return reco_e_idx - def get_psi_idx(self, true_e_idx, true_dec_idx, reco_e_idx, psi): + def get_psi_idx( + self, + true_e_idx, + true_dec_idx, + reco_e_idx, + psi): """Returns the bin index for the given psi value given the true energy, true declination and reco energy bin indices. @@ -446,12 +465,12 @@ def get_psi_idx(self, true_e_idx, true_dec_idx, reco_e_idx, psi): The index of the psi bin the given psi value falls into. It returns None if the value is out of range. """ - lower_edges = self.psi_lower_edges[true_e_idx,true_dec_idx,reco_e_idx] - upper_edges = self.psi_upper_edges[true_e_idx,true_dec_idx,reco_e_idx] + lower_edges = self.psi_lower_edges[true_e_idx, true_dec_idx, reco_e_idx] + upper_edges = self.psi_upper_edges[true_e_idx, true_dec_idx, reco_e_idx] m = (lower_edges <= psi) & (upper_edges > psi) idxs = np.nonzero(m)[0] - if(len(idxs) == 0): + if len(idxs) == 0: return None psi_idx = idxs[0] @@ -459,7 +478,12 @@ def get_psi_idx(self, true_e_idx, true_dec_idx, reco_e_idx, psi): return psi_idx def get_ang_err_idx( - self, true_e_idx, true_dec_idx, reco_e_idx, psi_idx, ang_err): + self, + true_e_idx, + true_dec_idx, + reco_e_idx, + psi_idx, + ang_err): """Returns the bin index for the given angular error value given the true energy, true declination, reco energy, and psi bin indices. @@ -484,20 +508,22 @@ def get_ang_err_idx( falls into. It returns None if the value is out of range. """ lower_edges = self.ang_err_lower_edges[ - true_e_idx,true_dec_idx,reco_e_idx,psi_idx] + true_e_idx, true_dec_idx, reco_e_idx, psi_idx] upper_edges = self.ang_err_upper_edges[ - true_e_idx,true_dec_idx,reco_e_idx,psi_idx] + true_e_idx, true_dec_idx, reco_e_idx, psi_idx] m = (lower_edges <= ang_err) & (upper_edges > ang_err) idxs = np.nonzero(m)[0] - if(len(idxs) == 0): + if len(idxs) == 0: return None ang_err_idx = idxs[0] return ang_err_idx - def get_true_log_e_range_with_valid_log_e_pdfs(self, dec_idx): + def get_true_log_e_range_with_valid_log_e_pdfs( + self, + dec_idx): """Determines the true log energy range for which log_e PDFs are available for the given declination bin. @@ -514,8 +540,8 @@ def get_true_log_e_range_with_valid_log_e_pdfs(self, dec_idx): The maximum true log energy value. """ m = np.sum( - (self.reco_e_upper_edges[:,dec_idx] - - self.reco_e_lower_edges[:,dec_idx] > 0), + (self.reco_e_upper_edges[:, dec_idx] - + self.reco_e_lower_edges[:, dec_idx] > 0), axis=1) != 0 min_log_true_e = np.min(self.true_e_bin_edges[:-1][m]) max_log_true_e = np.max(self.true_e_bin_edges[1:][m]) @@ -523,7 +549,9 @@ def get_true_log_e_range_with_valid_log_e_pdfs(self, dec_idx): return (min_log_true_e, max_log_true_e) def get_log_e_pdf( - self, log_true_e_idx, dec_idx): + self, + log_true_e_idx, + dec_idx): """Retrieves the log_e PDF from the given true energy bin index and source bin index. Returns (None, None, None, None) if any of the bin indices are less then @@ -571,7 +599,10 @@ def get_log_e_pdf( return (pdf, lower_bin_edges, upper_bin_edges, bin_widths) def get_psi_pdf( - self, log_true_e_idx, dec_idx, log_e_idx): + self, + log_true_e_idx, + dec_idx, + log_e_idx): """Retrieves the psi PDF from the given true energy bin index, the source bin index, and the log_e bin index. Returns (None, None, None, None) if any of the bin indices are less then @@ -621,7 +652,11 @@ def get_psi_pdf( return (pdf, lower_bin_edges, upper_bin_edges, bin_widths) def get_ang_err_pdf( - self, log_true_e_idx, dec_idx, log_e_idx, psi_idx): + self, + log_true_e_idx, + dec_idx, + log_e_idx, + psi_idx): """Retrieves the angular error PDF from the given true energy bin index, the source bin index, the log_e bin index, and the psi bin index. Returns (None, None, None, None) if any of the bin indices are less then @@ -680,7 +715,10 @@ def get_ang_err_pdf( return (pdf, lower_bin_edges, upper_bin_edges, bin_widths) def sample_log_e( - self, rss, dec_idx, log_true_e_idxs): + self, + rss, + dec_idx, + log_true_e_idxs): """Samples log energy values for the given source declination and true energy bins. @@ -703,7 +741,7 @@ def sample_log_e( The sampled log_e values. """ n_evt = len(log_true_e_idxs) - log_e_idx = np.empty((n_evt,), dtype=np.int_) + log_e_idx = np.empty((n_evt,), dtype=np.int64) log_e = np.empty((n_evt,), dtype=np.double) unique_log_true_e_idxs = np.unique(log_true_e_idxs) @@ -739,7 +777,11 @@ def sample_log_e( return (log_e_idx, log_e) def sample_psi( - self, rss, dec_idx, log_true_e_idxs, log_e_idxs): + self, + rss, + dec_idx, + log_true_e_idxs, + log_e_idxs): """Samples psi values for the given source declination, true energy bins, and log_e bins. @@ -763,12 +805,12 @@ def sample_psi( psi : 1d ndarray of float The sampled psi values in radians. """ - if(len(log_true_e_idxs) != len(log_e_idxs)): + if len(log_true_e_idxs) != len(log_e_idxs): raise ValueError( 'The lengths of log_true_e_idxs and log_e_idxs must be equal!') n_evt = len(log_true_e_idxs) - psi_idx = np.empty((n_evt,), dtype=np.int_) + psi_idx = np.empty((n_evt,), dtype=np.int64) psi = np.empty((n_evt,), dtype=np.double) unique_log_true_e_idxs = np.unique(log_true_e_idxs) @@ -808,7 +850,12 @@ def sample_psi( return (psi_idx, psi) def sample_ang_err( - self, rss, dec_idx, log_true_e_idxs, log_e_idxs, psi_idxs): + self, + rss, + dec_idx, + log_true_e_idxs, + log_e_idxs, + psi_idxs): """Samples ang_err values for the given source declination, true energy bins, log_e bins, and psi bins. @@ -841,7 +888,7 @@ def sample_ang_err( 'be equal!') n_evt = len(log_true_e_idxs) - ang_err_idx = np.empty((n_evt,), dtype=np.int_) + ang_err_idx = np.empty((n_evt,), dtype=np.int64) ang_err = np.empty((n_evt,), dtype=np.double) unique_log_true_e_idxs = np.unique(log_true_e_idxs) diff --git a/skyllh/analyses/i3/publicdata_ps/time_dependent_ps.py b/skyllh/analyses/i3/publicdata_ps/time_dependent_ps.py index a6e6daabcd..0d5cab44ca 100644 --- a/skyllh/analyses/i3/publicdata_ps/time_dependent_ps.py +++ b/skyllh/analyses/i3/publicdata_ps/time_dependent_ps.py @@ -1,220 +1,368 @@ # -*- coding: utf-8 -*- -"""Setup the time-dependent analysis. For now this works on a single dataset. +"""Setup the time-dependent analysis. For now this only works on a single +dataset. """ -import argparse -import logging import numpy as np -from skyllh.core.progressbar import ProgressBar - -# Classes to define the source hypothesis. -from skyllh.physics.source import PointLikeSource -from skyllh.physics.flux import PowerLawFlux -from skyllh.core.source_hypo_group import SourceHypoGroup -from skyllh.core.source_hypothesis import SourceHypoGroupManager - -# Classes to define the fit parameters. -from skyllh.core.parameters import ( - SingleSourceFitParameterMapper, - FitParameter +from skyllh.analyses.i3.publicdata_ps.backgroundpdf import ( + PDDataBackgroundI3EnergyPDF, +) +from skyllh.analyses.i3.publicdata_ps.detsigyield import ( + PDSingleParamFluxPointLikeSourceI3DetSigYieldBuilder, +) +from skyllh.analyses.i3.publicdata_ps.pdfratio import ( + PDSigSetOverBkgPDFRatio, +) +from skyllh.analyses.i3.publicdata_ps.signal_generator import ( + TimeDependentPDDatasetSignalGenerator, +) +from skyllh.analyses.i3.publicdata_ps.signalpdf import ( + PDSignalEnergyPDFSet, +) +from skyllh.analyses.i3.publicdata_ps.utils import ( + clip_grl_start_times, + create_energy_cut_spline, + get_tdm_field_func_psi, ) -# Classes for the minimizer. -from skyllh.core.minimizer import Minimizer, LBFGSMinimizerImpl -from skyllh.core.minimizers.iminuit import IMinuitMinimizerImpl - -# Classes for utility functionality. -from skyllh.core.config import CFG -from skyllh.core.random import RandomStateService -from skyllh.core.optimize import SpatialBoxEventSelectionMethod -from skyllh.core.smoothing import BlockSmoothingFilter -from skyllh.core.timing import TimeLord -from skyllh.core.trialdata import TrialDataManager - -# Classes for defining the analysis. -from skyllh.core.test_statistic import TestStatisticWilks from skyllh.core.analysis import ( - TimeIntegratedMultiDatasetSingleSourceAnalysis, + SingleSourceMultiDatasetLLHRatioAnalysis as Analysis, +) +from skyllh.core.backgroundpdf import ( + BackgroundTimePDF, +) +from skyllh.core.config import ( + CFG, + set_enable_tracing, + set_n_cpu, +) +from skyllh.core.debugging import ( + get_logger, +) +from skyllh.core.event_selection import ( + SpatialBoxEventSelectionMethod, +) +from skyllh.core.expectation_maximization import ( + em_fit, +) +from skyllh.core.flux_model import ( + BoxTimeFluxProfile, + GaussianTimeFluxProfile, + PowerLawEnergyFluxProfile, + SteadyPointlikeFFM, +) +from skyllh.core.minimizer import ( + LBFGSMinimizerImpl, + Minimizer, +) +from skyllh.core.minimizers.iminuit import ( + IMinuitMinimizerImpl, +) +from skyllh.core.model import ( + DetectorModel, +) +from skyllh.core.multiproc import ( + get_ncpu, + parallelize, +) +from skyllh.core.parameters import ( + Parameter, + ParameterModelMapper, +) +from skyllh.core.pdfratio import ( + SigOverBkgPDFRatio, +) +from skyllh.core.progressbar import ( + ProgressBar, +) +from skyllh.core.random import ( + RandomStateService, +) +from skyllh.core.scrambling import ( + DataScrambler, +) +from skyllh.core.signal_generator import ( + MultiDatasetSignalGenerator, ) - -# Classes to define the background generation. -from skyllh.core.scrambling import DataScrambler -from skyllh.i3.scrambling import I3SeasonalVariationTimeScramblingMethod -from skyllh.i3.background_generation import FixedScrambledExpDataI3BkgGenMethod - -# Classes to define the signal and background PDFs. from skyllh.core.signalpdf import ( RayleighPSFPointSourceSignalSpatialPDF, - SignalBoxTimePDF, - SignalGaussTimePDF + SignalTimePDF, ) -from skyllh.core.backgroundpdf import BackgroundUniformTimePDF -from skyllh.i3.backgroundpdf import ( - DataBackgroundI3SpatialPDF +from skyllh.core.smoothing import ( + BlockSmoothingFilter, ) -from skyllh.core.pdf import TimePDF - -# Classes to define the spatial and energy PDF ratios. -from skyllh.core.pdfratio import ( - SpatialSigOverBkgPDFRatio, - SigOverBkgPDFRatio +from skyllh.core.source_hypo_grouping import ( + SourceHypoGroup, + SourceHypoGroupManager, ) - -# Analysis utilities. -from skyllh.core.analysis_utils import ( +from skyllh.core.source_model import ( + PointLikeSource, +) +from skyllh.core.test_statistic import ( + WilksTestStatistic, +) +from skyllh.core.timing import ( + TimeLord, +) +from skyllh.core.trialdata import ( + TrialDataManager, +) +from skyllh.core.utils.analysis import ( pointlikesource_to_data_field_array ) -from skyllh.core.expectation_maximization import em_fit - -# Analysis specific classes for working with the public data. -from skyllh.analyses.i3.publicdata_ps.signal_generator import ( - PDTimeDependentSignalGenerator +from skyllh.datasets.i3 import ( + data_samples, ) -from skyllh.analyses.i3.publicdata_ps.detsigyield import ( - PublicDataPowerLawFluxPointLikeSourceI3DetSigYieldImplMethod + +from skyllh.i3.background_generation import ( + FixedScrambledExpDataI3BkgGenMethod, ) -from skyllh.analyses.i3.publicdata_ps.signalpdf import ( - PDSignalEnergyPDFSet +from skyllh.i3.backgroundpdf import ( + DataBackgroundI3SpatialPDF, ) -from skyllh.analyses.i3.publicdata_ps.pdfratio import ( - PDPDFRatio +from skyllh.i3.livetime import ( + I3Livetime, ) -from skyllh.analyses.i3.publicdata_ps.backgroundpdf import ( - PDDataBackgroundI3EnergyPDF +from skyllh.i3.scrambling import ( + I3SeasonalVariationTimeScramblingMethod, ) -from skyllh.analyses.i3.publicdata_ps.utils import ( - create_energy_cut_spline, + +from skyllh.scripting.argparser import ( + create_argparser, ) -from skyllh.analyses.i3.publicdata_ps.time_integrated_ps import ( - psi_func, +from skyllh.scripting.logging import ( + setup_logging, ) +TXS_0506_PLUS056_SOURCE = PointLikeSource( + name='TXS 0506+056', + ra=np.deg2rad(77.3581851), + dec=np.deg2rad(5.69314828)) +TXS_0506_PLUS056_ALERT_TIME = 58018.8711856 + -def change_time_pdf(analysis, gauss=None, box=None): - """Changes the time pdf. +def create_signal_time_pdf( + grl, + gauss=None, + box=None, +): + """Creates the signal time PDF, either a gaussian or a box shaped PDF. Parameters ---------- + grl : instance of numpy structured ndarray + The structured numpy ndarray holding the good-run-list data. gauss : dict | None None or dictionary with {"mu": float, "sigma": float}. box : dict | None - None or dictionary with {"start": float, "end": float}. + None or dictionary with {"start": float, "stop": float}. + + Returns + ------- + pdf : instance of PDF + The created time PDF instance. """ + if (gauss is None) and (box is None): + raise TypeError( + 'Either gauss or box have to be specified as time pdf.') - if gauss is None and box is None: - raise TypeError("Either gauss or box have to be specified as time pdf.") + livetime = I3Livetime.from_grl_data( + grl_data=grl) - grl = analysis._data_list[0].grl - # redo this in case the background pdf was not calculated before - time_bkgpdf = BackgroundUniformTimePDF(grl) if gauss is not None: - time_sigpdf = SignalGaussTimePDF(grl, gauss['mu'], gauss['sigma']) + time_flux_profile = GaussianTimeFluxProfile( + t0=gauss['mu'], + sigma_t=gauss['sigma']) elif box is not None: - time_sigpdf = SignalBoxTimePDF(grl, box["start"], box["end"]) + time_flux_profile = BoxTimeFluxProfile.from_start_and_stop_time( + start=box['start'], + stop=box['stop']) - time_pdfratio = SigOverBkgPDFRatio( - sig_pdf=time_sigpdf, - bkg_pdf=time_bkgpdf, - pdf_type=TimePDF + pdf = SignalTimePDF( + livetime=livetime, + time_flux_profile=time_flux_profile, ) - # the next line seems to make no difference in the llh evaluation. We keep it for consistency - analysis._llhratio.llhratio_list[0].pdfratio_list[2] = time_pdfratio - # this line here is relevant for the llh evaluation - analysis._llhratio.llhratio_list[0]._pdfratioarray._pdfratio_list[2] = time_pdfratio + return pdf + + +def change_signal_time_pdf_of_llhratio_function( + ana, + gauss=None, + box=None, +): + """Changes the signal time PDF of the log-likelihood ratio function. + + Parameters + ---------- + gauss : dict | None + None or dictionary with {"mu": float, "sigma": float}. + box : dict | None + None or dictionary with {"start": float, "stop": float}. + """ + grl = ana.data_list[0].grl + + time_sigpdf = create_signal_time_pdf( + grl=grl, + gauss=gauss, + box=box) - # change detector signal yield with flare livetime in sample (1 / grl_norm in pdf), - # rebuild the histograms if it is changed... + pdfratio = ana.llhratio.llhratio_list[0].pdfratio + # pdfratio is an instance of PDFRatioProduct. + # The first item is the PDF ratio product of the spatial and energy PDF + # ratios. The second item is the time PDF ratio. + pdfratio.pdfratio2.sig_pdf = time_sigpdf -def get_energy_spatial_signal_over_background(analysis, fitparams): + # TODO: Change detector signal yield with flare livetime in sample + # (1 / grl_norm in pdf), rebuild the histograms if it is changed. + + +def get_energy_spatial_signal_over_background( + ana, + fitparam_values, + tl=None, +): """Returns the signal over background ratio for (spatial_signal * energy_signal) / (spatial_background * energy_background). - Parameter - --------- - fitparams : dict - Dictionary with {"gamma": float} for energy pdf. + Parameters + ---------- + fitparam_values : instance of ndarray + The (N_fitparams,)-shaped numpy ndarray holding the values of the global + fit parameters, e.g. ns and gamma. Returns ------- ratio : 1d ndarray Product of spatial and energy signal over background pdfs. """ + tdm = ana.tdm_list[0] - ratio = analysis._llhratio.llhratio_list[0].pdfratio_list[0].get_ratio(analysis._tdm_list[0], fitparams) - ratio *= analysis._llhratio.llhratio_list[0].pdfratio_list[1].get_ratio(analysis._tdm_list[0], fitparams) + pdfratio = ana.llhratio.llhratio_list[0].pdfratio + + # pdfratio is an instance of PDFRatioProduct. + # The first item is the PDF ratio product of the spatial and energy PDF + # ratios. The second item is the time PDF ratio. + pdfratio = pdfratio.pdfratio1 + + src_params_recarray = ana.pmm.create_src_params_recarray( + gflp_values=fitparam_values) + + ratio = pdfratio.get_ratio( + tdm=tdm, + src_params_recarray=src_params_recarray, + tl=tl) return ratio -def change_fluxmodel_gamma(analysis, gamma): - """Set new gamma for the flux model. +def change_fluxmodel_gamma( + ana, + gamma, +): + """Sets the given gamma value to the flux model of the single source. - Parameter - --------- + Parameters + ---------- + ana : instance of SingleSourceMultiDatasetLLHRatioAnalysis + The analysis that should be used. gamma : float - Spectral index for flux model. + Spectral index for the flux model. """ + ana.shg_mgr.shg_list[0].fluxmodel.set_params({'gamma': gamma}) + ana.change_shg_mgr(shg_mgr=ana.shg_mgr) - analysis.src_hypo_group_manager.src_hypo_group_list[0].fluxmodel.gamma = gamma - -def change_signal_time(analysis, gauss=None, box=None): - """Change the signal injection to gauss or box. +def change_time_flux_profile_params( + ana, + params, +): + """Changes the parameters of the source's time flux profile. Parameters ---------- - gauss : dict | None - None or dictionary {"mu": float, "sigma": float}. - box : dict | None - None or dictionary {"start" : float, "end" : float}. + ana : instance of SingleSourceMultiDatasetLLHRatioAnalysis + The analysis that should be used. + params : dict + The dictionary with the parameter names and values to be set. """ - - analysis.sig_generator.set_flare(box=box, gauss=gauss) + # Note: In the future the primary storage place for the time flux profile + # will be within the flux model. + ana.sig_generator_list[0]._time_flux_profile.set_params(params) -def calculate_TS(analysis, em_results, rss): - """Calculate the best TS value for the expectation maximization gamma scan. +def calculate_TS( + ana, + em_results, + rss, +): + """Calculate the best TS value from the expectation maximization gamma scan + results. Parameters ---------- - em_results : 1d ndarray of tuples - Gamma scan result. + ana : instance of SingleSourceMultiDatasetLLHRatioAnalysis + The analysis that should be used. + em_results : instance of structured ndarray + The numpy structured ndarray holding the EM results (from the gamma + scan). rss : instance of RandomStateService The instance of RandomStateService that should be used to generate random numbers from. Returns ------- - float maximized TS value - tuple(gamma from em scan [float], best fit mean time [float], best fit width [float]) - (float ns, float gamma) fitparams from TS optimization + max_TS : float + The maximal TS value of all maximized time hypotheses. + best_em_result : instance of numpy structured ndarray + The row of ``em_results`` that corresponds to the best fit. + best_fitparam_values : instance of numpy ndarray + The instance of numpy ndarray holding the fit parameter values of the + overall best fit result. """ - - max_TS = 0 - best_time = None - best_fitparams = None - for index, result in enumerate(em_results): - change_time_pdf(analysis, gauss={"mu": result["mu"], "sigma": result["sigma"]}) - (fitparamset, log_lambda_max, fitparam_values, status) = analysis.maximize_llhratio(rss) - TS = analysis.calculate_test_statistic(log_lambda_max, fitparam_values) - if TS > max_TS: + max_TS = None + best_em_result = None + best_fitparam_values = None + for em_result in em_results: + change_signal_time_pdf_of_llhratio_function( + ana=ana, + gauss={ + 'mu': em_result['mu'], + 'sigma': em_result['sigma']}) + + (log_lambda_max, fitparam_values, status) = ana.llhratio.maximize( + rss=rss) + + TS = ana.calculate_test_statistic( + log_lambda=log_lambda_max, + fitparam_values=fitparam_values) + + if (max_TS is None) or (TS > max_TS): max_TS = TS - best_time = result - best_fitparams = fitparam_values + best_em_result = em_result + best_fitparam_values = fitparam_values - return max_TS, best_time, best_fitparams + return (max_TS, best_em_result, best_fitparam_values) -def run_gamma_scan_single_flare(analysis, remove_time=None, gamma_min=1, gamma_max=5, n_gamma=51): - """Run em for different gammas in the signal energy pdf +def run_gamma_scan_for_single_flare( + ana, + remove_time=None, + gamma_min=1, + gamma_max=5, + n_gamma=51, + ppbar=None, +): + """Runs ``em_fit`` for different gamma values in the signal energy PDF. Parameters ---------- + ana : instance of SingleSourceMultiDatasetLLHRatioAnalysis + The analysis that should be used. remove_time : float Time information of event that should be removed. gamma_min : float @@ -223,84 +371,415 @@ def run_gamma_scan_single_flare(analysis, remove_time=None, gamma_min=1, gamma_m Upper bound for gamma scan. n_gamma : int Number of steps for gamma scan. + ppbar : instance of ProgressBar | None + The optional parent instance of ProgressBar. Returns ------- - array with "gamma", "mu", "sigma", and scaling factor for flare "ns_em" + em_results : instance of numpy structured ndarray + The numpy structured ndarray with fields + + gamma : float + The spectral index value. + mu : float + The determined mean value of the gauss curve. + sigma : float + The determoned standard deviation of the gauss curve. + ns_em : float + The scaling factor of the flare. """ - dtype = [("gamma", "f8"), ("mu", "f8"), ("sigma", "f8"), ("ns_em", "f8")] - results = np.empty(n_gamma, dtype=dtype) + em_results_dt = [ + ('gamma', np.float64), + ('mu', np.float64), + ('sigma', np.float64), + ('ns_em', np.float64), + ] + em_results = np.empty(n_gamma, dtype=em_results_dt) + + time = ana._tdm_list[0].get_data('time') + + gamma_values = np.linspace(gamma_min, gamma_max, n_gamma) + + pbar = ProgressBar(len(gamma_values), parent=ppbar).start() + for (i, gamma) in enumerate(gamma_values): + fitparam_values = np.array([0, gamma], dtype=np.float64) + ratio = get_energy_spatial_signal_over_background(ana, fitparam_values) + (mu, sigma, ns) = em_fit( + time, + ratio, + n=1, + tol=1.e-200, + iter_max=500, + weight_thresh=0, + initial_width=5000, + remove_x=remove_time) + em_results[i] = ( + gamma, + mu[0], + sigma[0], + ns[0], + ) + pbar.increment() + pbar.finish() - time = analysis._tdm_list[0].get_data("time") + return em_results - for index, g in enumerate(np.linspace(gamma_min, gamma_max, n_gamma)): - ratio = get_energy_spatial_signal_over_background(analysis, {"gamma": g}) - mu, sigma, ns = em_fit(time, ratio, n=1, tol=1.e-200, iter_max=500, weight_thresh=0, - initial_width=5000, remove_x=remove_time) - results[index] = (g, mu[0], sigma[0], ns[0]) - return results +def unblind_single_flare( + ana, + remove_time=None, +): + """Run EM for a single flare on unblinded data. + Similar to the original analysis, remove the alert event. + + Parameters + ---------- + ana : instance of SingleSourceMultiDatasetLLHRatioAnalysis + The analysis that should be used. + remove_time : float + Time of the event that should be removed. + In the case of the TXS analysis: + ``remove_time=TXS_0506_PLUS056_ALERT_TIME``. + Returns + ------- + max_TS : float + The maximal TS value of all maximized time hypotheses. + best_em_result : instance of numpy structured ndarray + The EM result from the gamma scan corresponding to the best fit. + best_fitparam_values : instance of numpy ndarray + The instance of numpy ndarray holding the fit parameter values of the + overall best fit result. + """ + rss = RandomStateService(seed=1) -def unblind_flare(analysis, remove_time=None): - """Run EM on unscrambled data. Similar to the original analysis, remove the alert event. + ana.unblind( + rss=rss) + + em_results = run_gamma_scan_for_single_flare( + ana=ana, + remove_time=remove_time) + + (max_ts, best_em_result, best_fitparam_values) = calculate_TS( + ana=ana, + em_results=em_results, + rss=rss) + + return (max_ts, best_em_result, best_fitparam_values) + + +def do_trial_with_em( + ana, + rss, + mean_n_sig=0, + gamma_src=2, + gamma_min=1, + gamma_max=5, + n_gamma=21, + gauss=None, + box=None, + tl=None, + ppbar=None, +): + """Performs a trial using the expectation maximization algorithm. + It runs a gamma scan and does the EM for each gamma value. Parameters ---------- - remove_time : float - Time information of event that should be removed. - In the case of the TXS analysis: remove_time=58018.8711856 + ana : instance of SingleSourceMultiDatasetLLHRatioAnalysis + The anaylsis instance that should be used to perform the trial. + rss : instance of RandomStateService + The instance of RandomStateService that should be used to generate + random numbers. + mean_n_sig : float + The mean number of signal events that should be generated. + gamma_src : float + The spectral index of the source. + gamma_min : float + Lower bound of the gamma scan. + gamma_max : float + Upper bound of the gamma scan. + n_gamma : int + Number of steps of the gamma scan. + gauss : dict | None + Properties of the Gaussian time PDF. + None or dictionary with {"mu": float, "sigma": float}. + box : dict | None + Properties of the box time PDF. + None or dictionary with {"start": float, "stop": float}. + tl : instance of TimeLord | None + The optional instance of TimeLord to measure timing information. + ppbar : instance of ProgressBar | None + The optional parent instance of ProgressBar. Returns ------- - array with "gamma", "mu", "sigma", and scaling factor for flare "ns_em" + trial : instance of structured ndarray + The numpy structured ndarray of length 1 with the following fields: + + seed : numpy.int64 + The seed value used to generate the trial. + mean_n_sig : numpy.float64 + The mean number of signal events of the trial. + n_sig : numpy.int64 + The actual number of signal events in the trial. + gamma_src : numpy.float64 + The spectral index of the source. + mu_sig : numpy.float64 + The mean value of the Gaussian time PDF of the source. + sigma_sig : numpy.float64 + The sigma value of the Gaussian time PDF of the source. + start_sig : numpy.float64 + The start time of the box time PDF of the source. + stop_sig : numpy.float64 + The stop time of the box time PDF of the source. + ts : numpy.float64 + The test-statistic value of the trial. + ns_fit : numpy.float64 + The fitted number of signal events. + ns_em : numpy.float64 + The scaling factor of the flare. + gamma_fit : numpy.float64 + The fitted spectial index of the trial. + gamma_em : numpy.float64 + The spectral index of the best EM trial. + mu_fit : numpy.float64 + The fitted mean value of the Gaussian time PDF. + sigma_fit : numpy.float64 + The fitted sigma value of the Gaussian time PDF. """ + trial_dt = [ + ('seed', np.int64), + ('mean_n_sig', np.float64), + ('n_sig', np.int64), + ('gamma_src', np.float64), + ('mu_sig', np.float64), + ('sigma_sig', np.float64), + ('start_sig', np.float64), + ('stop_sig', np.float64), + ('ts', np.float64), + ('ns_fit', np.float64), + ('ns_em', np.float64), + ('gamma_fit', np.float64), + ('gamma_em', np.float64), + ('mu_fit', np.float64), + ('sigma_fit', np.float64) + ] + + trial = np.empty((1,), dtype=trial_dt) + + (n_sig, n_events_list, events_list) = ana.generate_pseudo_data( + rss=rss, + mean_n_sig=mean_n_sig, + tl=tl) + ana.initialize_trial(events_list, n_events_list) + + em_results = run_gamma_scan_for_single_flare( + ana=ana, + gamma_min=gamma_min, + gamma_max=gamma_max, + n_gamma=n_gamma, + ppbar=ppbar) + + (max_ts, best_em_result, best_fitparams) = calculate_TS( + ana=ana, + em_results=em_results, + rss=rss) + + trial[0] = ( + rss.seed, + mean_n_sig, + n_sig, + gamma_src, + gauss['mu'] if gauss is not None else -1, + gauss['sigma'] if gauss is not None else -1, + box['start'] if box is not None else -1, + box['end'] if box is not None else -1, + max_ts, + best_fitparams[0], + best_em_result['ns_em'], + best_fitparams[1], + best_em_result['gamma'], + best_em_result['mu'], + best_em_result['sigma'] + ) - # get the original unblinded data - rss = RandomStateService(seed=1) - analysis.unblind(rss) - time_results = run_gamma_scan_single_flare(analysis, remove_time=remove_time) - return time_results - - -def create_analysis( - datasets, - source, - gauss=None, - box=None, - refplflux_Phi0=1, - refplflux_E0=1e3, - refplflux_gamma=2.0, - ns_seed=100.0, - ns_min=0., - ns_max=1e3, - gamma_seed=3.0, - gamma_min=1., - gamma_max=5., - kde_smoothing=False, - minimizer_impl="LBFGS", - cut_sindec=None, - spl_smooth=None, - cap_ratio=False, - compress_data=False, - keep_data_fields=None, - optimize_delta_angle=10, - tl=None, - ppbar=None + return trial + + +def do_trials_with_em( + ana, + n=1000, + ncpu=None, + seed=1, + mean_n_sig=0, + gamma_src=2, + gamma_min=1, + gamma_max=5, + n_gamma=21, + gauss=None, + box=None, + tl=None, + ppbar=None, ): + """Performs ``n_trials`` trials using the expectation maximization + algorithm. For each trial it runs a gamma scan and does the EM for each + gamma value. + + Parameters + ---------- + ana : instance of SingleSourceMultiDatasetLLHRatioAnalysis + The anaylsis instance that should be used to perform the trials. + n : int + The number of trials to generate. + ncpu : int | None + The number of CPUs to use to generate the trials. If set to ``None`` + the configured default value will be used. + mean_n_sig : float + The mean number of signal events that should be generated. + gamma_src : float + The spectral index of the source. + gamma_min : float + Lower bound of the gamma scan. + gamma_max : float + Upper bound of the gamma scan. + n_gamma : int + Number of steps of the gamma scan. + seed : int + The seed for the random number generator. + gauss : dict | None + Properties of the Gaussian time PDF. + None or dictionary with {"mu": float, "sigma": float}. + box : dict | None + Properties of the box time PDF. + None or dictionary with {"start": float, "stop": float}. + tl : instance of TimeLord | None + The optional instance of TimeLord to measure timing information. + ppbar : instance of ProgressBar | None + The optional parent instance of ProgressBar. + + Returns + ------- + trials : instance of numpy structured ndarray + The numpy structured ndarray of length ``n_trials`` with the results for + each trial. The array has the following fields: + + seed : numpy.int64 + The seed value used to generate the trial. + mean_n_sig : numpy.float64 + The mean number of signal events of the trial. + n_sig : numpy.int64 + The actual number of signal events in the trial. + gamma_src : numpy.float64 + The spectral index of the source. + mu_sig : numpy.float64 + The mean value of the Gaussian time PDF of the source. + sigma_sig : numpy.float64 + The sigma value of the Gaussian time PDF of the source. + start_sig : numpy.float64 + The start time of the box time PDF of the source. + stop_sig : numpy.float64 + The stop time of the box time PDF of the source. + ts : numpy.float64 + The test-statistic value of the trial. + ns_fit : numpy.float64 + The fitted number of signal events. + ns_em : numpy.float64 + The scaling factor of the flare. + gamma_fit : numpy.float64 + The fitted spectial index of the trial. + gamma_em : numpy.float64 + The spectral index of the best EM trial. + mu_fit : numpy.float64 + The fitted mean value of the Gaussian time PDF. + sigma_fit : numpy.float64 + The fitted sigma value of the Gaussian time PDF. + """ + rss = RandomStateService(seed=seed) + + if mean_n_sig > 0: + change_fluxmodel_gamma( + ana=ana, + gamma=gamma_src) + + args_list = [ + ( + tuple(), + dict( + ana=ana, + mean_n_sig=mean_n_sig, + gamma_src=gamma_src, + gamma_min=gamma_min, + gamma_max=gamma_max, + n_gamma=n_gamma, + gauss=gauss, + box=box, + ppbar=False, + ) + ) + for i in range(n) + ] + + result_list = parallelize( + func=do_trial_with_em, + args_list=args_list, + ncpu=get_ncpu(ncpu), + rss=rss, + tl=tl, + ppbar=ppbar, + ) + + trials = None + for (i, result) in enumerate(result_list): + if trials is None: + trials = np.empty((n,), dtype=result.dtype) + trials[i] = result[0] + + return trials + + +def create_analysis( # noqa: C901 + datasets, + source, + box=None, + gauss=None, + refplflux_Phi0=1, + refplflux_E0=1e3, + refplflux_gamma=2.0, + ns_seed=100.0, + ns_min=0., + ns_max=1e3, + gamma_seed=3.0, + gamma_min=1., + gamma_max=5., + kde_smoothing=False, + minimizer_impl="LBFGS", + cut_sindec=None, + spl_smooth=None, + cap_ratio=False, + compress_data=False, + keep_data_fields=None, + evt_sel_delta_angle_deg=10, + construct_bkg_generator=True, + construct_sig_generator=True, + tl=None, + ppbar=None, + logger_name=None): """Creates the Analysis instance for this particular analysis. - Parameters: - ----------- + Parameters + ---------- datasets : list of Dataset instances The list of Dataset instances, which should be used in the analysis. source : PointLikeSource instance The PointLikeSource instance defining the point source position. - gauss : None or dictionary with mu, sigma - None if no Gaussian time pdf. Else dictionary with {"mu": float, "sigma": float} of Gauss box : None or dictionary with start, end - None if no Box shaped time pdf. Else dictionary with {"start": float, "end": float} of box. + None if no box shaped time pdf, else dictionary of the format + ``{'start': float, 'stop': float}``. + gauss : None or dictionary with mu, sigma + None if no gaussian time pdf, else dictionary of the format + ``{'mu': float, 'sigma': float}``. refplflux_Phi0 : float The flux normalization to use for the reference power law flux model. refplflux_E0 : float @@ -346,35 +825,48 @@ def create_analysis( keep_data_fields : list of str | None List of additional data field names that should get kept when loading the data. - optimize_delta_angle : float + evt_sel_delta_angle_deg : float The delta angle in degrees for the event selection optimization methods. + construct_bkg_generator : bool + Flag if the background generator should be constructed (``True``) or not + (``False``). + construct_sig_generator : bool + Flag if the signal generator should be constructed (``True``) or not + (``False``). tl : TimeLord instance | None The TimeLord instance to use to time the creation of the analysis. ppbar : ProgressBar instance | None The instance of ProgressBar for the optional parent progress bar. + logger_name : str | None + The name of the logger to be used. If set to ``None``, ``__name__`` will + be used. Returns ------- - analysis : TimeIntegratedMultiDatasetSingleSourceAnalysis + ana : instance of SingleSourceMultiDatasetLLHRatioAnalysis The Analysis instance for this analysis. """ + if logger_name is None: + logger_name = __name__ + logger = get_logger(logger_name) if len(datasets) != 1: raise RuntimeError( 'This analysis supports only analyses with only single datasets ' 'at the moment!') - if gauss is None and box is None: - raise ValueError("No time pdf specified (box or gauss)") - if gauss is not None and box is not None: + if (gauss is None) and (box is None): + raise ValueError( + 'No time pdf specified (box or gauss)!') + if (gauss is not None) and (box is not None): raise ValueError( - "Time PDF cannot be both Gaussian and box shaped. " - "Please specify only one shape.") + 'Time PDF cannot be both gaussian and box shaped. ' + 'Please specify only one shape.') # Create the minimizer instance. - if minimizer_impl == "LBFGS": + if minimizer_impl == 'LBFGS': minimizer = Minimizer(LBFGSMinimizerImpl()) - elif minimizer_impl == "minuit": + elif minimizer_impl == 'minuit': minimizer = Minimizer(IMinuitMinimizerImpl(ftol=1e-8)) else: raise NameError( @@ -382,82 +874,112 @@ def create_analysis( "Please use `LBFGS` or `minuit`.") # Define the flux model. - flux_model = PowerLawFlux( - Phi0=refplflux_Phi0, E0=refplflux_E0, gamma=refplflux_gamma) + fluxmodel = SteadyPointlikeFFM( + Phi0=refplflux_Phi0, + energy_profile=PowerLawEnergyFluxProfile( + E0=refplflux_E0, + gamma=refplflux_gamma)) + + # Define the time flux profile of the source. + time_flux_profile = None + if box is not None: + time_flux_profile = BoxTimeFluxProfile.from_start_and_stop_time( + start=box['start'], + stop=box['stop']) + elif gauss is not None: + time_flux_profile = GaussianTimeFluxProfile( + t0=gauss['mu'], + sigma_t=gauss['sigma']) # Define the fit parameter ns. - fitparam_ns = FitParameter('ns', ns_min, ns_max, ns_seed) - - # Define the gamma fit parameter. - fitparam_gamma = FitParameter( - 'gamma', valmin=gamma_min, valmax=gamma_max, initial=gamma_seed) - - # Define the detector signal efficiency implementation method for the - # IceCube detector and this source and flux_model. - # The sin(dec) binning will be taken by the implementation method - # automatically from the Dataset instance. - gamma_grid = fitparam_gamma.as_linear_grid(delta=0.1) - detsigyield_implmethod = \ - PublicDataPowerLawFluxPointLikeSourceI3DetSigYieldImplMethod( - gamma_grid) + param_ns = Parameter( + name='ns', + initial=ns_seed, + valmin=ns_min, + valmax=ns_max) + + # Define the fit parameter gamma. + param_gamma = Parameter( + name='gamma', + initial=gamma_seed, + valmin=gamma_min, + valmax=gamma_max) + + # Define the detector signal yield builder for the IceCube detector and this + # source and flux model. + # The sin(dec) binning will be taken by the builder automatically from the + # Dataset instance. + gamma_grid = param_gamma.as_linear_grid(delta=0.1) + detsigyield_builder =\ + PDSingleParamFluxPointLikeSourceI3DetSigYieldBuilder( + param_grid=gamma_grid) # Define the signal generation method. - #sig_gen_method = PointLikeSourceI3SignalGenerationMethod() sig_gen_method = None - # Create a source hypothesis group manager. - src_hypo_group_manager = SourceHypoGroupManager( + # Create a source hypothesis group manager with a single source hypothesis + # group for the single source. + shg_mgr = SourceHypoGroupManager( SourceHypoGroup( - source, flux_model, detsigyield_implmethod, sig_gen_method)) + sources=source, + fluxmodel=fluxmodel, + detsigyield_builders=detsigyield_builder, + sig_gen_method=sig_gen_method)) + + # Define a detector model for the ns fit parameter. + detector_model = DetectorModel('IceCube') - # Create a source fit parameter mapper and define the fit parameters. - src_fitparam_mapper = SingleSourceFitParameterMapper() - src_fitparam_mapper.def_fit_parameter(fitparam_gamma) + # Define the parameter model mapper for the analysis, which will map global + # parameters to local source parameters. + pmm = ParameterModelMapper( + models=[detector_model, source]) + pmm.def_param(param_ns, models=detector_model) + pmm.def_param(param_gamma, models=source) + + logger.info(str(pmm)) # Define the test statistic. - test_statistic = TestStatisticWilks() + test_statistic = WilksTestStatistic() # Create the Analysis instance. - analysis = TimeIntegratedMultiDatasetSingleSourceAnalysis( - src_hypo_group_manager, - src_fitparam_mapper, - fitparam_ns, - test_statistic, - sig_generator_cls=PDTimeDependentSignalGenerator + ana = Analysis( + shg_mgr=shg_mgr, + pmm=pmm, + test_statistic=test_statistic, + sig_generator_cls=MultiDatasetSignalGenerator, ) # Define the event selection method for pure optimization purposes. # We will use the same method for all datasets. event_selection_method = SpatialBoxEventSelectionMethod( - src_hypo_group_manager, delta_angle=np.deg2rad(optimize_delta_angle)) + shg_mgr=shg_mgr, + delta_angle=np.deg2rad(evt_sel_delta_angle_deg)) - # Prepare the spline parameters. + # Prepare the spline parameters for the signal generator. if cut_sindec is None: cut_sindec = np.sin(np.radians([-2, 0, -3, 0, 0])) if spl_smooth is None: spl_smooth = [0., 0.005, 0.05, 0.2, 0.3] if len(spl_smooth) < len(datasets) or len(cut_sindec) < len(datasets): raise AssertionError( - "The length of the spl_smooth and of the cut_sindec must be equal " - f"to the length of datasets: {len(datasets)}.") + 'The length of the spl_smooth and of the cut_sindec must be equal ' + f'to the length of datasets: {len(datasets)}.') # Add the data sets to the analysis. pbar = ProgressBar(len(datasets), parent=ppbar).start() data_list = [] - energy_cut_splines = [] - for idx, ds in enumerate(datasets): - # Load the data of the data set. + for (ds_idx, ds) in enumerate(datasets): data = ds.load_and_prepare_data( keep_fields=keep_data_fields, compress=compress_data, tl=tl) data_list.append(data) - # Create a trial data manager and add the required data fields. - tdm = TrialDataManager() - tdm.add_source_data_field('src_array', - pointlikesource_to_data_field_array) - tdm.add_data_field('psi', psi_func) + # Some runs might overlap slightly. So we need to clip those runs. + clip_grl_start_times(grl_data=data.grl) + + livetime = I3Livetime.from_grl_data( + grl_data=data.grl) sin_dec_binning = ds.get_binning_definition('sin_dec') log_energy_binning = ds.get_binning_definition('log_energy') @@ -466,72 +988,193 @@ def create_analysis( spatial_sigpdf = RayleighPSFPointSourceSignalSpatialPDF( dec_range=np.arcsin(sin_dec_binning.range)) spatial_bkgpdf = DataBackgroundI3SpatialPDF( - data.exp, sin_dec_binning) - spatial_pdfratio = SpatialSigOverBkgPDFRatio( - spatial_sigpdf, spatial_bkgpdf) + data_exp=data.exp, + sin_dec_binning=sin_dec_binning) + spatial_pdfratio = SigOverBkgPDFRatio( + sig_pdf=spatial_sigpdf, + bkg_pdf=spatial_bkgpdf) # Create the energy PDF ratio instance for this dataset. energy_sigpdfset = PDSignalEnergyPDFSet( ds=ds, src_dec=source.dec, - flux_model=flux_model, - fitparam_grid_set=gamma_grid, + fluxmodel=fluxmodel, + param_grid_set=gamma_grid, ppbar=ppbar ) smoothing_filter = BlockSmoothingFilter(nbins=1) energy_bkgpdf = PDDataBackgroundI3EnergyPDF( - data.exp, log_energy_binning, sin_dec_binning, - smoothing_filter, kde_smoothing) + data_exp=data.exp, + logE_binning=log_energy_binning, + sinDec_binning=sin_dec_binning, + smoothing_filter=smoothing_filter, + kde_smoothing=kde_smoothing) - energy_pdfratio = PDPDFRatio( + energy_pdfratio = PDSigSetOverBkgPDFRatio( sig_pdf_set=energy_sigpdfset, bkg_pdf=energy_bkgpdf, - cap_ratio=cap_ratio - ) + cap_ratio=cap_ratio) - pdfratios = [spatial_pdfratio, energy_pdfratio] + pdfratio = spatial_pdfratio * energy_pdfratio # Create the time PDF ratio instance for this dataset. - if gauss is not None or box is not None: - time_bkgpdf = BackgroundUniformTimePDF(data.grl) - if gauss is not None: - time_sigpdf = SignalGaussTimePDF( - data.grl, gauss['mu'], gauss['sigma']) - elif box is not None: - time_sigpdf = SignalBoxTimePDF( - data.grl, box["start"], box["end"]) + if (gauss is not None) or (box is not None): + time_bkgpdf = BackgroundTimePDF( + livetime=livetime, + time_flux_profile=BoxTimeFluxProfile.from_start_and_stop_time( + start=livetime.time_start, + stop=livetime.time_stop)) + time_sigpdf = create_signal_time_pdf( + grl=data.grl, + gauss=gauss, + box=box) time_pdfratio = SigOverBkgPDFRatio( sig_pdf=time_sigpdf, bkg_pdf=time_bkgpdf, - pdf_type=TimePDF + same_axes=False, ) - pdfratios.append(time_pdfratio) - analysis.add_dataset( - ds, data, pdfratios, tdm, event_selection_method) + pdfratio = pdfratio * time_pdfratio + + # Create a trial data manager and add the required data fields. + tdm = TrialDataManager() + tdm.add_source_data_field( + name='src_array', + func=pointlikesource_to_data_field_array) + tdm.add_data_field( + name='psi', + func=get_tdm_field_func_psi(), + dt='dec', + is_srcevt_data=True) energy_cut_spline = create_energy_cut_spline( - ds, data.exp, spl_smooth[idx]) - energy_cut_splines.append(energy_cut_spline) + ds, + data.exp, + spl_smooth[ds_idx]) + + sig_generator = TimeDependentPDDatasetSignalGenerator( + shg_mgr=shg_mgr, + ds=ds, + ds_idx=ds_idx, + livetime=livetime, + time_flux_profile=time_flux_profile, + energy_cut_spline=energy_cut_spline, + cut_sindec=cut_sindec[ds_idx], + ) + + ana.add_dataset( + ds, + data, + pdfratio=pdfratio, + tdm=tdm, + event_selection_method=event_selection_method, + sig_generator=sig_generator) pbar.increment() pbar.finish() - analysis.llhratio = analysis.construct_llhratio(minimizer, ppbar=ppbar) + ana.construct_services( + ppbar=ppbar) + + ana.llhratio = ana.construct_llhratio( + minimizer=minimizer, + ppbar=ppbar) # Define the data scrambler with its data scrambling method, which is used # for background generation. # FIXME: Support multiple datasets for the DataScrambler. - data_scrambler = DataScrambler(I3SeasonalVariationTimeScramblingMethod(data_list[0])) - # Create background generation method. + data_scrambler = DataScrambler( + I3SeasonalVariationTimeScramblingMethod( + data_list[0])) bkg_gen_method = FixedScrambledExpDataI3BkgGenMethod(data_scrambler) + ana.bkg_gen_method = bkg_gen_method + + if construct_bkg_generator is True: + ana.construct_background_generator() + + if construct_sig_generator is True: + ana.construct_signal_generator() + + return ana + + +if __name__ == '__main__': + parser = create_argparser( + description='Calculates TS for a given source location using the ' + '10-year public point source sample assuming a signal ' + 'time PDF.', + ) + + parser.add_argument( + '--dec', + dest='dec', + default=5.7, + type=float, + help='The source declination in degrees.' + ) + parser.add_argument( + '--ra', + dest='ra', + default=77.35, + type=float, + help='The source right-ascention in degrees.' + ) + parser.add_argument( + '--gamma-seed', + dest='gamma_seed', + default=3, + type=float, + help='The seed value of the gamma fit parameter.' + ) + + args = parser.parse_args() + + CFG.from_yaml(args.config) + + setup_logging( + script_logger_name=__name__, + debug_pathfilename=args.debug_logfile) + + set_enable_tracing(args.enable_tracing) + set_n_cpu(args.n_cpu) + + sample_seasons = [ + ('PublicData_10y_ps', 'IC86_II-VII'), + ] + + datasets = [] + for (sample, season) in sample_seasons: + # Get the dataset from the correct dataset collection. + dsc = data_samples[sample].create_dataset_collection( + args.data_basepath) + datasets.append(dsc.get_dataset(season)) + + # Define a random state service. + rss = RandomStateService(args.seed) + + # Define the point source. + source = PointLikeSource( + ra=np.deg2rad(args.ra), + dec=np.deg2rad(args.dec)) + print(f'source: {source}') + + tl = TimeLord() + + with tl.task_timer('Creating analysis.'): + ana = create_analysis( + datasets=datasets, + source=source, + gamma_seed=args.gamma_seed, + gauss={'mu': 57000, 'sigma': 62}, + tl=tl) - analysis.bkg_gen_method = bkg_gen_method - analysis.construct_background_generator() + with tl.task_timer('Unblinding data.'): + (TS, param_dict, status) = ana.unblind(rss) - analysis.construct_signal_generator( - llhratio=analysis.llhratio, energy_cut_splines=energy_cut_splines, - cut_sindec=cut_sindec, box=box, gauss=gauss) + print(f'TS = {TS:g}') + print(f'ns_fit = {param_dict["ns"]:g}') + print(f'gamma_fit = {param_dict["gamma"]:g}') + print(f'minimizer status = {status}') - return analysis + print(tl) diff --git a/skyllh/analyses/i3/publicdata_ps/time_integrated_ps.py b/skyllh/analyses/i3/publicdata_ps/time_integrated_ps.py index a111c59567..8a47ca55fc 100644 --- a/skyllh/analyses/i3/publicdata_ps/time_integrated_ps.py +++ b/skyllh/analyses/i3/publicdata_ps/time_integrated_ps.py @@ -1,126 +1,124 @@ # -*- coding: utf-8 -*- -"""The time_integrated_ps analysis is a multi-dataset time-integrated single source -analysis with a two-component likelihood function using a spacial and an energy -event PDF. +"""The time_integrated_ps analysis is a multi-dataset time-integrated single +source analysis with a two-component likelihood function using a spacial and an +energy event PDF. """ -import argparse -import logging import numpy as np -from skyllh.core.progressbar import ProgressBar - -# Classes to define the source hypothesis. -from skyllh.physics.source import PointLikeSource -from skyllh.physics.flux import PowerLawFlux -from skyllh.core.source_hypo_group import SourceHypoGroup -from skyllh.core.source_hypothesis import SourceHypoGroupManager - -# Classes to define the fit parameters. -from skyllh.core.parameters import ( - SingleSourceFitParameterMapper, - FitParameter +from skyllh.analyses.i3.publicdata_ps.backgroundpdf import ( + PDDataBackgroundI3EnergyPDF, ) - -# Classes for the minimizer. -from skyllh.core.minimizer import Minimizer, LBFGSMinimizerImpl -from skyllh.core.minimizers.iminuit import IMinuitMinimizerImpl - -# Classes for utility functionality. -from skyllh.core.config import CFG -from skyllh.core.random import RandomStateService -from skyllh.core.optimize import SpatialBoxEventSelectionMethod -from skyllh.core.smoothing import BlockSmoothingFilter -from skyllh.core.timing import TimeLord -from skyllh.core.trialdata import TrialDataManager - -# Classes for defining the analysis. -from skyllh.core.test_statistic import TestStatisticWilks -from skyllh.core.analysis import ( - TimeIntegratedMultiDatasetSingleSourceAnalysis as Analysis +from skyllh.analyses.i3.publicdata_ps.detsigyield import ( + PDSingleParamFluxPointLikeSourceI3DetSigYieldBuilder, ) - -# Classes to define the background generation. -from skyllh.core.scrambling import DataScrambler, UniformRAScramblingMethod -from skyllh.i3.background_generation import FixedScrambledExpDataI3BkgGenMethod - -# Classes to define the signal and background PDFs. -from skyllh.core.signalpdf import RayleighPSFPointSourceSignalSpatialPDF -from skyllh.i3.backgroundpdf import ( - DataBackgroundI3SpatialPDF +from skyllh.analyses.i3.publicdata_ps.pdfratio import ( + PDSigSetOverBkgPDFRatio, ) - -# Classes to define the spatial and energy PDF ratios. -from skyllh.core.pdfratio import SpatialSigOverBkgPDFRatio - -# Analysis utilities. -from skyllh.core.analysis_utils import ( - pointlikesource_to_data_field_array +from skyllh.analyses.i3.publicdata_ps.signal_generator import ( + PDDatasetSignalGenerator, +) +from skyllh.analyses.i3.publicdata_ps.signalpdf import ( + PDSignalEnergyPDFSet, +) +from skyllh.analyses.i3.publicdata_ps.utils import ( + create_energy_cut_spline, + get_tdm_field_func_psi, ) -# Logging setup utilities. +from skyllh.core.analysis import ( + SingleSourceMultiDatasetLLHRatioAnalysis as Analysis, +) +from skyllh.core.config import ( + CFG, + set_enable_tracing, + set_n_cpu, +) from skyllh.core.debugging import ( - setup_logger, - setup_console_handler, - setup_file_handler + get_logger, ) - -# Pre-defined public IceCube data samples. -from skyllh.datasets.i3 import data_samples - -# Analysis specific classes for working with the public data. -from skyllh.analyses.i3.publicdata_ps.signal_generator import PDSignalGenerator - -from skyllh.analyses.i3.publicdata_ps.detsigyield import ( - PublicDataPowerLawFluxPointLikeSourceI3DetSigYieldImplMethod +from skyllh.core.event_selection import ( + SpatialBoxEventSelectionMethod, ) -from skyllh.analyses.i3.publicdata_ps.signalpdf import ( - PDSignalEnergyPDFSet +from skyllh.core.flux_model import ( + PowerLawEnergyFluxProfile, + SteadyPointlikeFFM, ) -from skyllh.analyses.i3.publicdata_ps.pdfratio import ( - PDPDFRatio +from skyllh.core.minimizer import ( + Minimizer, + LBFGSMinimizerImpl, ) -from skyllh.analyses.i3.publicdata_ps.backgroundpdf import ( - PDDataBackgroundI3EnergyPDF +from skyllh.core.minimizers.iminuit import ( + IMinuitMinimizerImpl, +) +from skyllh.core.model import ( + DetectorModel, +) +from skyllh.core.parameters import ( + Parameter, + ParameterModelMapper, +) +from skyllh.core.pdfratio import ( + SigOverBkgPDFRatio, +) +from skyllh.core.progressbar import ( + ProgressBar, +) +from skyllh.core.random import ( + RandomStateService, +) +from skyllh.core.scrambling import ( + DataScrambler, + UniformRAScramblingMethod, +) +from skyllh.core.signal_generator import ( + MultiDatasetSignalGenerator, +) +from skyllh.core.signalpdf import ( + RayleighPSFPointSourceSignalSpatialPDF, +) +from skyllh.core.smoothing import ( + BlockSmoothingFilter, +) +from skyllh.core.source_hypo_grouping import ( + SourceHypoGroup, + SourceHypoGroupManager, +) +from skyllh.core.source_model import ( + PointLikeSource, +) +from skyllh.core.test_statistic import ( + WilksTestStatistic, +) +from skyllh.core.timing import ( + TimeLord, +) +from skyllh.core.trialdata import ( + TrialDataManager, +) +from skyllh.core.utils.analysis import ( + create_trial_data_file, + pointlikesource_to_data_field_array, ) -from skyllh.analyses.i3.publicdata_ps.utils import create_energy_cut_spline - - -def psi_func(tdm, src_hypo_group_manager, fitparams): - """Function to calculate the opening angle between the source position - and the event's reconstructed position. - """ - ra = tdm.get_data('ra') - dec = tdm.get_data('dec') - - # Make the source position angles two-dimensional so the PDF value - # can be calculated via numpy broadcasting automatically for several - # sources. This is useful for stacking analyses. - src_ra = tdm.get_data('src_array')['ra'][:, np.newaxis] - src_dec = tdm.get_data('src_array')['dec'][:, np.newaxis] - - delta_dec = np.abs(dec - src_dec) - delta_ra = np.abs(ra - src_ra) - x = ( - (np.sin(delta_dec / 2.))**2. + np.cos(dec) * - np.cos(src_dec) * (np.sin(delta_ra / 2.))**2. - ) - - # Handle possible floating precision errors. - x[x < 0.] = 0. - x[x > 1.] = 1. - - psi = (2.0*np.arcsin(np.sqrt(x))) - # For now we support only a single source, hence return psi[0]. - return psi[0, :] +from skyllh.datasets.i3 import ( + data_samples, +) +from skyllh.i3.background_generation import ( + FixedScrambledExpDataI3BkgGenMethod, +) +from skyllh.i3.backgroundpdf import ( + DataBackgroundI3SpatialPDF, +) -def TXS_location(): - src_ra = np.radians(77.358) - src_dec = np.radians(5.693) - return (src_ra, src_dec) +from skyllh.scripting.argparser import ( + create_argparser, +) +from skyllh.scripting.logging import ( + setup_logging, +) def create_analysis( @@ -136,20 +134,22 @@ def create_analysis( gamma_min=1., gamma_max=5., kde_smoothing=False, - minimizer_impl="LBFGS", + minimizer_impl='LBFGS', cut_sindec=None, spl_smooth=None, cap_ratio=False, compress_data=False, keep_data_fields=None, - optimize_delta_angle=10, + evt_sel_delta_angle_deg=10, + construct_sig_generator=True, tl=None, - ppbar=None + ppbar=None, + logger_name=None, ): """Creates the Analysis instance for this particular analysis. - Parameters: - ----------- + Parameters + ---------- datasets : list of Dataset instances The list of Dataset instances, which should be used in the analysis. @@ -177,10 +177,10 @@ def create_analysis( kde_smoothing : bool Apply a KDE-based smoothing to the data-driven background pdf. Default: False. - minimizer_impl : str | "LBFGS" - Minimizer implementation to be used. Supported options are "LBFGS" + minimizer_impl : str + Minimizer implementation to be used. Supported options are ``"LBFGS"`` (L-BFG-S minimizer used from the :mod:`scipy.optimize` module), or - "minuit" (Minuit minimizer used by the :mod:`iminuit` module). + ``"minuit"`` (Minuit minimizer used by the :mod:`iminuit` module). Default: "LBFGS". cut_sindec : list of float | None sin(dec) values at which the energy cut in the southern sky should @@ -200,18 +200,27 @@ def create_analysis( keep_data_fields : list of str | None List of additional data field names that should get kept when loading the data. - optimize_delta_angle : float + evt_sel_delta_angle_deg : float The delta angle in degrees for the event selection optimization methods. + construct_sig_generator : bool + Flag if the signal generator should be constructed (``True``) or not + (``False``). tl : TimeLord instance | None The TimeLord instance to use to time the creation of the analysis. ppbar : ProgressBar instance | None The instance of ProgressBar for the optional parent progress bar. + logger_name : str | None + The name of the logger to be used. If set to ``None``, ``__name__`` will + be used. Returns ------- - analysis : TimeIntegratedMultiDatasetSingleSourceAnalysis + ana : instance of SingleSourceMultiDatasetLLHRatioAnalysis The Analysis instance for this analysis. """ + if logger_name is None: + logger_name = __name__ + logger = get_logger(logger_name) # Create the minimizer instance. if minimizer_impl == "LBFGS": @@ -224,40 +233,61 @@ def create_analysis( "Please use `LBFGS` or `minuit`.") # Define the flux model. - flux_model = PowerLawFlux( - Phi0=refplflux_Phi0, E0=refplflux_E0, gamma=refplflux_gamma) + fluxmodel = SteadyPointlikeFFM( + Phi0=refplflux_Phi0, + energy_profile=PowerLawEnergyFluxProfile( + E0=refplflux_E0, + gamma=refplflux_gamma)) # Define the fit parameter ns. - fitparam_ns = FitParameter('ns', ns_min, ns_max, ns_seed) - - # Define the gamma fit parameter. - fitparam_gamma = FitParameter( - 'gamma', valmin=gamma_min, valmax=gamma_max, initial=gamma_seed) - - # Define the detector signal efficiency implementation method for the - # IceCube detector and this source and flux_model. - # The sin(dec) binning will be taken by the implementation method - # automatically from the Dataset instance. - gamma_grid = fitparam_gamma.as_linear_grid(delta=0.1) - detsigyield_implmethod = \ - PublicDataPowerLawFluxPointLikeSourceI3DetSigYieldImplMethod( - gamma_grid) + param_ns = Parameter( + name='ns', + initial=ns_seed, + valmin=ns_min, + valmax=ns_max) + + # Define the fit parameter gamma. + param_gamma = Parameter( + name='gamma', + initial=gamma_seed, + valmin=gamma_min, + valmax=gamma_max) + + # Define the detector signal yield builder for the IceCube detector and this + # source and flux model. + # The sin(dec) binning will be taken by the builder automatically from the + # Dataset instance. + gamma_grid = param_gamma.as_linear_grid(delta=0.1) + detsigyield_builder =\ + PDSingleParamFluxPointLikeSourceI3DetSigYieldBuilder( + param_grid=gamma_grid) # Define the signal generation method. - #sig_gen_method = PointLikeSourceI3SignalGenerationMethod() sig_gen_method = None - # Create a source hypothesis group manager. - src_hypo_group_manager = SourceHypoGroupManager( + # Create a source hypothesis group manager with a single source hypothesis + # group for the single source. + shg_mgr = SourceHypoGroupManager( SourceHypoGroup( - source, flux_model, detsigyield_implmethod, sig_gen_method)) + sources=source, + fluxmodel=fluxmodel, + detsigyield_builders=detsigyield_builder, + sig_gen_method=sig_gen_method)) - # Create a source fit parameter mapper and define the fit parameters. - src_fitparam_mapper = SingleSourceFitParameterMapper() - src_fitparam_mapper.def_fit_parameter(fitparam_gamma) + # Define a detector model for the ns fit parameter. + detector_model = DetectorModel('IceCube') + + # Define the parameter model mapper for the analysis, which will map global + # parameters to local source parameters. + pmm = ParameterModelMapper( + models=[detector_model, source]) + pmm.def_param(param_ns, models=detector_model) + pmm.def_param(param_gamma, models=source) + + logger.info(str(pmm)) # Define the test statistic. - test_statistic = TestStatisticWilks() + test_statistic = WilksTestStatistic() # Define the data scrambler with its data scrambling method, which is used # for background generation. @@ -267,46 +297,38 @@ def create_analysis( bkg_gen_method = FixedScrambledExpDataI3BkgGenMethod(data_scrambler) # Create the Analysis instance. - analysis = Analysis( - src_hypo_group_manager, - src_fitparam_mapper, - fitparam_ns, - test_statistic, - bkg_gen_method, - sig_generator_cls=PDSignalGenerator + ana = Analysis( + shg_mgr=shg_mgr, + pmm=pmm, + test_statistic=test_statistic, + bkg_gen_method=bkg_gen_method, + sig_generator_cls=MultiDatasetSignalGenerator, ) # Define the event selection method for pure optimization purposes. # We will use the same method for all datasets. event_selection_method = SpatialBoxEventSelectionMethod( - src_hypo_group_manager, delta_angle=np.deg2rad(optimize_delta_angle)) + shg_mgr=shg_mgr, + delta_angle=np.deg2rad(evt_sel_delta_angle_deg)) - # Prepare the spline parameters. + # Prepare the spline parameters for the signal generator. if cut_sindec is None: cut_sindec = np.sin(np.radians([-2, 0, -3, 0, 0])) if spl_smooth is None: spl_smooth = [0., 0.005, 0.05, 0.2, 0.3] if len(spl_smooth) < len(datasets) or len(cut_sindec) < len(datasets): raise AssertionError( - "The length of the spl_smooth and of the cut_sindec must be equal " - f"to the length of datasets: {len(datasets)}.") + 'The length of the spl_smooth and of the cut_sindec must be equal ' + f'to the length of datasets: {len(datasets)}.') # Add the data sets to the analysis. pbar = ProgressBar(len(datasets), parent=ppbar).start() - energy_cut_splines = [] - for idx, ds in enumerate(datasets): - # Load the data of the data set. + for (ds_idx, ds) in enumerate(datasets): data = ds.load_and_prepare_data( keep_fields=keep_data_fields, compress=compress_data, tl=tl) - # Create a trial data manager and add the required data fields. - tdm = TrialDataManager() - tdm.add_source_data_field('src_array', - pointlikesource_to_data_field_array) - tdm.add_data_field('psi', psi_func) - sin_dec_binning = ds.get_binning_definition('sin_dec') log_energy_binning = ds.get_binning_definition('log_energy') @@ -314,110 +336,128 @@ def create_analysis( spatial_sigpdf = RayleighPSFPointSourceSignalSpatialPDF( dec_range=np.arcsin(sin_dec_binning.range)) spatial_bkgpdf = DataBackgroundI3SpatialPDF( - data.exp, sin_dec_binning) - spatial_pdfratio = SpatialSigOverBkgPDFRatio( - spatial_sigpdf, spatial_bkgpdf) + data_exp=data.exp, + sin_dec_binning=sin_dec_binning) + spatial_pdfratio = SigOverBkgPDFRatio( + sig_pdf=spatial_sigpdf, + bkg_pdf=spatial_bkgpdf) # Create the energy PDF ratio instance for this dataset. energy_sigpdfset = PDSignalEnergyPDFSet( ds=ds, src_dec=source.dec, - flux_model=flux_model, - fitparam_grid_set=gamma_grid, + fluxmodel=fluxmodel, + param_grid_set=gamma_grid, ppbar=ppbar ) smoothing_filter = BlockSmoothingFilter(nbins=1) energy_bkgpdf = PDDataBackgroundI3EnergyPDF( - data.exp, log_energy_binning, sin_dec_binning, - smoothing_filter, kde_smoothing) + data_exp=data.exp, + logE_binning=log_energy_binning, + sinDec_binning=sin_dec_binning, + smoothing_filter=smoothing_filter, + kde_smoothing=kde_smoothing) - energy_pdfratio = PDPDFRatio( + energy_pdfratio = PDSigSetOverBkgPDFRatio( sig_pdf_set=energy_sigpdfset, bkg_pdf=energy_bkgpdf, - cap_ratio=cap_ratio - ) + cap_ratio=cap_ratio) - pdfratios = [spatial_pdfratio, energy_pdfratio] + pdfratio = spatial_pdfratio * energy_pdfratio - analysis.add_dataset( - ds, data, pdfratios, tdm, event_selection_method) + # Create a trial data manager and add the required data fields. + tdm = TrialDataManager() + tdm.add_source_data_field( + name='src_array', + func=pointlikesource_to_data_field_array) + tdm.add_data_field( + name='psi', + func=get_tdm_field_func_psi(), + dt='dec', + is_srcevt_data=True) energy_cut_spline = create_energy_cut_spline( - ds, data.exp, spl_smooth[idx]) - energy_cut_splines.append(energy_cut_spline) + ds, + data.exp, + spl_smooth[ds_idx]) + + sig_generator = PDDatasetSignalGenerator( + shg_mgr=shg_mgr, + ds=ds, + ds_idx=ds_idx, + energy_cut_spline=energy_cut_spline, + cut_sindec=cut_sindec[ds_idx], + ) + + ana.add_dataset( + dataset=ds, + data=data, + pdfratio=pdfratio, + tdm=tdm, + event_selection_method=event_selection_method, + sig_generator=sig_generator) pbar.increment() pbar.finish() - analysis.llhratio = analysis.construct_llhratio(minimizer, ppbar=ppbar) - analysis.construct_signal_generator( - llhratio=analysis.llhratio, energy_cut_splines=energy_cut_splines, - cut_sindec=cut_sindec) + ana.construct_services( + ppbar=ppbar) + + ana.llhratio = ana.construct_llhratio( + minimizer=minimizer, + ppbar=ppbar) + + if construct_sig_generator is True: + ana.construct_signal_generator() - return analysis + return ana -if(__name__ == '__main__'): - p = argparse.ArgumentParser( +if __name__ == '__main__': + parser = create_argparser( description='Calculates TS for a given source location using the ' - '10-year public point source sample.', - formatter_class=argparse.RawTextHelpFormatter + '10-year public point source sample.', ) - p.add_argument( + + parser.add_argument( '--dec', - default=23.8, + dest='dec', + default=5.7, type=float, help='The source declination in degrees.' ) - p.add_argument( + parser.add_argument( '--ra', - default=216.76, + dest='ra', + default=77.35, type=float, help='The source right-ascention in degrees.' ) - p.add_argument( + parser.add_argument( '--gamma-seed', + dest='gamma_seed', default=3, type=float, help='The seed value of the gamma fit parameter.' ) - p.add_argument( - '--data_base_path', - default=None, - type=str, - help='The base path to the data samples (default=None)' - ) - p.add_argument( - '--seed', - default=1, - type=int, - help='The random number generator seed for the likelihood ' - 'minimization.' - ) - p.add_argument( - '--ncpu', - default=1, - type=int, - help='The number of CPUs to utilize where parallelization is possible.' - ) - p.add_argument( + + parser.add_argument( '--cap-ratio', + dest='cap_ratio', + default=False, action='store_true', help='Switch to cap the energy PDF ratio.') - p.set_defaults(cap_ratio=False) - args = p.parse_args() - # Setup `skyllh` package logging. - # To optimize logging set the logging level to the lowest handling level. - setup_logger('skyllh', logging.DEBUG) - log_format = '%(asctime)s %(processName)s %(name)s %(levelname)s: '\ - '%(message)s' - setup_console_handler('skyllh', logging.INFO, log_format) - setup_file_handler('skyllh', 'debug.log', - log_level=logging.DEBUG, - log_format=log_format) + args = parser.parse_args() + + CFG.from_yaml(args.config) + + setup_logging( + script_logger_name=__name__, + debug_pathfilename=args.debug_logfile) - CFG['multiproc']['ncpu'] = args.ncpu + set_enable_tracing(args.enable_tracing) + set_n_cpu(args.n_cpu) sample_seasons = [ ('PublicData_10y_ps', 'IC40'), @@ -431,31 +471,46 @@ def create_analysis( for (sample, season) in sample_seasons: # Get the dataset from the correct dataset collection. dsc = data_samples[sample].create_dataset_collection( - args.data_base_path) + args.data_basepath) datasets.append(dsc.get_dataset(season)) # Define a random state service. rss = RandomStateService(args.seed) # Define the point source. - source = PointLikeSource(np.deg2rad(args.ra), np.deg2rad(args.dec)) - print('source: ', str(source)) + source = PointLikeSource( + ra=np.deg2rad(args.ra), + dec=np.deg2rad(args.dec)) + print(f'source: {source}') tl = TimeLord() with tl.task_timer('Creating analysis.'): ana = create_analysis( - datasets, - source, - cap_ratio=args.cap_ratio, + datasets=datasets, + source=source, gamma_seed=args.gamma_seed, + cap_ratio=args.cap_ratio, tl=tl) with tl.task_timer('Unblinding data.'): - (TS, fitparam_dict, status) = ana.unblind(rss) + (TS, param_dict, status) = ana.unblind(rss) - print('TS = %g' % (TS)) - print('ns_fit = %g' % (fitparam_dict['ns'])) - print('gamma_fit = %g' % (fitparam_dict['gamma'])) + print(f'TS = {TS:g}') + print(f'ns_fit = {param_dict["ns"]:g}') + print(f'gamma_fit = {param_dict["gamma"]:g}') + print(f'minimizer status = {status}') print(tl) + + tl = TimeLord() + rss = RandomStateService(seed=1) + (_, _, _, trials) = create_trial_data_file( + ana=ana, + rss=rss, + n_trials=1e3, + mean_n_sig=0, + pathfilename=None, + ncpu=1, + tl=tl) + print(tl) diff --git a/skyllh/analyses/i3/publicdata_ps/utils.py b/skyllh/analyses/i3/publicdata_ps/utils.py index 00c381f37b..272aea8730 100644 --- a/skyllh/analyses/i3/publicdata_ps/utils.py +++ b/skyllh/analyses/i3/publicdata_ps/utils.py @@ -2,10 +2,17 @@ import numpy as np -from scipy import interpolate -from scipy import integrate +from scipy import ( + integrate, + interpolate, +) -from skyllh.core.binning import get_bincenters_from_binedges +from skyllh.core.binning import ( + get_bincenters_from_binedges, +) +from skyllh.core.utils.coords import ( + angular_separation, +) class FctSpline1D(object): @@ -15,7 +22,12 @@ class from scipy. The evaluate the spline, use the ``__call__`` method. """ - def __init__(self, f, x_binedges, norm=False, **kwargs): + def __init__( + self, + f, + x_binedges, + norm=False, + **kwargs): """Creates a new 1D function spline using the PchipInterpolator class from scipy. @@ -51,7 +63,10 @@ class from scipy. full_output=1 )[0] - def __call__(self, x, oor_value=0): + def __call__( + self, + x, + oor_value=0): """Evaluates the spline at the given x values. For x-values outside the spline's range, the oor_value is returned. @@ -69,11 +84,14 @@ def __call__(self, x, oor_value=0): The numpy ndarray holding the evaluated values of the spline. """ f = self.spl_f(x) - f = np.nan_to_num(f, nan=oor_value) + f = np.where(np.isnan(f), oor_value, f) return f - def evaluate(self, *args, **kwargs): + def evaluate( + self, + *args, + **kwargs): """Alias for the __call__ method. """ return self(*args, **kwargs) @@ -89,7 +107,12 @@ class from scipy. The evaluate the spline, use the ``__call__`` method. """ - def __init__(self, f, x_binedges, y_binedges, **kwargs): + def __init__( + self, + f, + x_binedges, + y_binedges, + **kwargs): """Creates a new 2D function spline using the RectBivariateSpline class from scipy. @@ -126,7 +149,11 @@ class from scipy. self.spl_log10_f = interpolate.RectBivariateSpline( x, y, z, kx=3, ky=3, s=0) - def __call__(self, x, y, oor_value=0): + def __call__( + self, + x, + y, + oor_value=0): """Evaluates the spline at the given coordinates. For coordinates outside the spline's range, the oor_value is returned. @@ -158,7 +185,35 @@ def __call__(self, x, y, oor_value=0): return f -def psi_to_dec_and_ra(rss, src_dec, src_ra, psi): +def clip_grl_start_times(grl_data): + """Make sure that the start time of a run is not smaller than the stop time + of the previous run. + + Parameters + ---------- + grl_data : instance of numpy structured ndarray + The numpy structured ndarray of length N_runs, with the following + fields: + + start : float + The start time of the run. + stop : float + The stop time of the run. + """ + start = grl_data['start'] + stop = grl_data['stop'] + + m = (start[1:] - stop[:-1]) < 0 + new_start = np.where(m, stop[:-1], start[1:]) + + grl_data['start'][1:] = new_start + + +def psi_to_dec_and_ra( + rss, + src_dec, + src_ra, + psi): """Generates random declinations and right-ascension coordinates for the given source location and opening angle `psi`. @@ -218,8 +273,10 @@ def psi_to_dec_and_ra(rss, src_dec, src_ra, psi): return (dec, ra) -def create_energy_cut_spline(ds, exp_data, spl_smooth): - +def create_energy_cut_spline( + ds, + exp_data, + spl_smooth): """Create the spline for the declination-dependent energy cut that the signal generator needs for injection in the southern sky Some special conditions are needed for IC79 and IC86_I, because @@ -253,3 +310,47 @@ def create_energy_cut_spline(ds, exp_data, spl_smooth): sindec_centers, min_log_e, k=2, s=spl_smooth) return spline + + +def get_tdm_field_func_psi(psi_floor=None): + """Returns the TrialDataManager (TDM) field function for psi with an + optional psi value floor. + + Parameters + ---------- + psi_floor : float | None + The optional floor value for psi. This should be ``None`` for a standard + point-source analysis that uses an analytic function for the detector's + point-spread-function (PSF). + + Returns + ------- + tdm_field_func_psi : function + TrialDataManager (TDM) field function for psi. + """ + def tdm_field_func_psi( + tdm, + shg_mgr, + pmm): + """TDM data field function to calculate the opening angle between the + source positions and the event's reconstructed position. + """ + (src_idxs, evt_idxs) = tdm.src_evt_idxs + + ra = np.take(tdm.get_data('ra'), evt_idxs) + dec = np.take(tdm.get_data('dec'), evt_idxs) + + src_array = tdm.get_data('src_array') + src_ra = np.take(src_array['ra'], src_idxs) + src_dec = np.take(src_array['dec'], src_idxs) + + psi = angular_separation( + ra1=ra, + dec1=dec, + ra2=src_ra, + dec2=src_dec, + psi_floor=psi_floor) + + return psi + + return tdm_field_func_psi diff --git a/skyllh/core/README.txt b/skyllh/core/README.txt index a53ce80bfc..ae80f6382c 100644 --- a/skyllh/core/README.txt +++ b/skyllh/core/README.txt @@ -1,3 +1,3 @@ -The `core` module holds all the code that defines the framework of skyllh. +The ``core`` module holds all the code that defines the framework of SkyLLH. It should not be neccessary for users to change code of this module, unless -there is a need to change a overall framework design. \ No newline at end of file +there is a need to change the overall framework design. \ No newline at end of file diff --git a/skyllh/core/__init__.py b/skyllh/core/__init__.py index 135d702d86..513d91c69b 100644 --- a/skyllh/core/__init__.py +++ b/skyllh/core/__init__.py @@ -2,12 +2,16 @@ import os.path -from skyllh.core import session -from skyllh.core.config import CFG +from skyllh.core import ( + session, +) +from skyllh.core.config import ( + CFG, +) # Automatically enable interactive mode, if the Python interpreter is in # interactive mode. -if(session.is_python_interpreter_in_interactive_mode()): +if session.is_python_interpreter_in_interactive_mode(): session.enable_interactive_session() else: session.disable_interactive_session() diff --git a/skyllh/core/analysis.py b/skyllh/core/analysis.py index 0c1dae27d4..cad03724d4 100644 --- a/skyllh/core/analysis.py +++ b/skyllh/core/analysis.py @@ -4,93 +4,109 @@ """ import abc +from astropy import units import numpy as np -from skyllh.core.py import ( - classname, - issequenceof + +from skyllh.core.background_generation import ( + BackgroundGenerationMethod, +) +from skyllh.core.background_generator import ( + BackgroundGenerator, + BackgroundGeneratorBase, ) -from skyllh.core.debugging import get_logger -from skyllh.core.storage import DataFieldRecordArray from skyllh.core.dataset import ( Dataset, DatasetData, ) -from skyllh.core.parameters import ( - FitParameter, - SourceFitParameterMapper, - SingleSourceFitParameterMapper, +from skyllh.core.debugging import ( + get_logger, +) +from skyllh.core.event_selection import ( + EventSelectionMethod, ) -from skyllh.core.pdfratio import PDFRatio -from skyllh.core.progressbar import ProgressBar -from skyllh.core.random import RandomStateService from skyllh.core.llhratio import ( LLHRatio, MultiDatasetTCLLHRatio, - SingleSourceDatasetSignalWeights, - SingleSourceZeroSigH0SingleDatasetTCLLHRatio, - MultiSourceZeroSigH0SingleDatasetTCLLHRatio, - MultiSourceDatasetSignalWeights, + ZeroSigH0SingleDatasetTCLLHRatio, +) +from skyllh.core.multiproc import ( + get_ncpu, + parallelize, +) +from skyllh.core.parameters import ( + ParameterModelMapper, +) +from skyllh.core.pdfratio import ( + PDFRatio, + SourceWeightedPDFRatio, +) +from skyllh.core.py import ( + classname, + issequenceof, +) +from skyllh.core.random import ( + RandomStateService, +) +from skyllh.core.services import ( + DatasetSignalWeightFactorsService, + DetSigYieldService, + SrcDetSigYieldWeightsService, ) -from skyllh.core.timing import TaskTimer -from skyllh.core.trialdata import TrialDataManager -from skyllh.core.optimize import EventSelectionMethod -from skyllh.core.source_hypothesis import SourceHypoGroupManager -from skyllh.core.test_statistic import TestStatistic -from skyllh.core.multiproc import get_ncpu, parallelize -from skyllh.core.background_generation import BackgroundGenerationMethod -from skyllh.core.background_generator import BackgroundGenerator from skyllh.core.signal_generator import ( - SignalGeneratorBase, SignalGenerator, + MultiDatasetSignalGenerator, +) +from skyllh.core.source_hypo_grouping import ( + SourceHypoGroupManager, +) +from skyllh.core.source_model import ( + SourceModel, +) +from skyllh.core.storage import ( + DataFieldRecordArray, +) +from skyllh.core.test_statistic import ( + TestStatistic, +) +from skyllh.core.timing import ( + TaskTimer, +) +from skyllh.core.trialdata import ( + TrialDataManager, ) -from skyllh.physics.source import SourceModel logger = get_logger(__name__) -class Analysis(object, metaclass=abc.ABCMeta): - """This is the abstract base class for all analysis classes. It contains - common properties required by all analyses and defines the overall analysis - interface howto set-up and run an analysis. - - Note: This analysis base class assumes the analysis to be a log-likelihood - ratio test, i.e. requires a mathematical log-likelihood ratio - function. - - To set-up and run an analysis the following procedure applies: - - 1. Create an analysis instance. - 2. Add the datasets and their PDF ratio instances via the - :meth:`.add_dataset` method. - 3. Construct the log-likelihood ratio function via the - :meth:`.construct_llhratio` method. - 4. Call the :meth:`do_trial` or :meth:`unblind` method to perform a - random trial or to unblind the data. Both methods will fit the global - fit parameters using the set up data. Finally, the test statistic - is calculated via the :meth:`.calculate_test_statistic` method. - - In order to calculate sensitivities and discovery potentials, analysis - trials have to be performed on random data samples with injected signal - events. To perform a trial with injected signal events, the signal generator - has to be constructed via the ``construct_signal_generator`` method before - any random trial data can be generated. +class Analysis( + object, + metaclass=abc.ABCMeta): + """This is the abstract base class for all analysis classes. + It contains common properties required by all analyses and defines the + overall analysis interface how to setup and run an analysis. """ - - def __init__(self, src_hypo_group_manager, src_fitparam_mapper, - test_statistic, bkg_gen_method=None, sig_generator_cls=None): + def __init__( + self, + shg_mgr, + pmm, + test_statistic, + bkg_gen_method=None, + bkg_generator_cls=None, + sig_generator_cls=None, + **kwargs): """Constructor of the analysis base class. Parameters ---------- - src_hypo_group_manager : instance of SourceHypoGroupManager + shg_mgr : instance of SourceHypoGroupManager The instance of SourceHypoGroupManager, which defines the groups of source hypotheses, their flux model, and their detector signal - efficiency implementation method. - src_fitparam_mapper : instance of SourceFitParameterMapper - The SourceFitParameterMapper instance managing the global fit - parameters and their relation to the individual sources. + yield implementation method. + pmm : instance of ParameterModelMapper + The ParameterModelMapper instance managing the global set of + parameters and their relation to individual models, e.g. sources. test_statistic : TestStatistic instance The TestStatistic instance that defines the test statistic function of the analysis. @@ -98,18 +114,27 @@ def __init__(self, src_hypo_group_manager, src_fitparam_mapper, The instance of BackgroundGenerationMethod that should be used to generate background events for pseudo data. This can be set to None, if there is no need to generate background events. - sig_generator_cls : SignalGeneratorBase class | None + bkg_generator_cls : class of BackgroundGeneratorBase | None + The background generator class used to create the background + generator instance. + If set to ``None``, the + :class:`skyllh.core.background_generator.BackgroundGenerator` class + is used. + sig_generator_cls : class of MultiDatasetSignalGenerator | None The signal generator class used to create the signal generator - instance. - If set to None, the `SignalGenerator` class is used. + instance for multiple datasets. + If set to ``None``, the + :class:`~skyllh.core.signal_generator.MultiDatasetSignalGenerator` + class is used. """ - # Call the super function to allow for multiple class inheritance. - super(Analysis, self).__init__() + super().__init__( + **kwargs) - self.src_hypo_group_manager = src_hypo_group_manager - self.src_fitparam_mapper = src_fitparam_mapper + self.shg_mgr = shg_mgr + self.pmm = pmm self.test_statistic = test_statistic self.bkg_gen_method = bkg_gen_method + self.bkg_generator_cls = bkg_generator_cls self.sig_generator_cls = sig_generator_cls self._dataset_list = [] @@ -117,43 +142,46 @@ def __init__(self, src_hypo_group_manager, src_fitparam_mapper, self._tdm_list = [] self._event_selection_method_list = [] - # Predefine the variable for the global fit parameter set, which holds - # all the global fit parameters. - self._fitparamset = None - - # Predefine the variable for the log-likelihood ratio function. - self._llhratio = None + self._detsigyield_service = None + self._src_detsigyield_weights_service = None + self._ds_sig_weight_factors_service = None - # Predefine the variable for the background and signal generators. self._bkg_generator = None + self._sig_generator_list = [] self._sig_generator = None @property - def src_hypo_group_manager(self): + def shg_mgr(self): """The SourceHypoGroupManager instance, which defines the groups of source hypothesis, their flux model, and their detector signal - efficiency implementation method. + yield implementation method. """ - return self._src_hypo_group_manager - @src_hypo_group_manager.setter - def src_hypo_group_manager(self, manager): - if(not isinstance(manager, SourceHypoGroupManager)): - raise TypeError('The src_hypo_group_manager property must be an ' - 'instance of SourceHypoGroupManager!') - self._src_hypo_group_manager = manager + return self._shg_mgr + + @shg_mgr.setter + def shg_mgr(self, mgr): + if not isinstance(mgr, SourceHypoGroupManager): + raise TypeError( + 'The shg_mgr property must be an instance of ' + 'SourceHypoGroupManager! ' + f'Its current type is {classname(mgr)}.') + self._shg_mgr = mgr @property - def src_fitparam_mapper(self): - """The SourceFitParameterMapper instance that manages the global fit - parameters and their relation to the sources. + def pmm(self): + """The ParameterModelMapper instance that manages the global set of + parameters and their relation to individual models, e.g. sources. """ - return self._src_fitparam_mapper - @src_fitparam_mapper.setter - def src_fitparam_mapper(self, mapper): - if(not isinstance(mapper, SourceFitParameterMapper)): - raise TypeError('The src_fitparam_mapper property must be an ' - 'instance of SourceFitParameterMapper!') - self._src_fitparam_mapper = mapper + return self._pmm + + @pmm.setter + def pmm(self, mapper): + if not isinstance(mapper, ParameterModelMapper): + raise TypeError( + 'The pmm property must be an instance of ' + 'ParameterModelMapper! ' + f'Its current type is {classname(mapper)}.') + self._pmm = mapper @property def test_statistic(self): @@ -161,11 +189,14 @@ def test_statistic(self): of the analysis. """ return self._test_statistic + @test_statistic.setter def test_statistic(self, ts): - if(not isinstance(ts, TestStatistic)): - raise TypeError('The test_statistic property must be an instance ' - 'of TestStatistic, but is %s!'%(classname(ts))) + if not isinstance(ts, TestStatistic): + raise TypeError( + 'The test_statistic property must be an instance of ' + 'TestStatistic! ' + f'Its current type is {classname(ts)}.') self._test_statistic = ts @property @@ -175,24 +206,48 @@ def bkg_gen_method(self): generation method has been defined. """ return self._bkg_gen_method + @bkg_gen_method.setter def bkg_gen_method(self, method): - if(method is not None): - if(not isinstance(method, BackgroundGenerationMethod)): - raise TypeError('The bkg_gen_method property must be an ' - 'instance of BackgroundGenerationMethod!') + if method is not None: + if not isinstance(method, BackgroundGenerationMethod): + raise TypeError( + 'The bkg_gen_method property must be an instance of ' + 'BackgroundGenerationMethod! ' + f'Its current type is {classname(method)}.') self._bkg_gen_method = method + @property + def bkg_generator_cls(self): + """The background generator class that should be used to construct the + background generator instance. + """ + return self._bkg_generator_cls + + @bkg_generator_cls.setter + def bkg_generator_cls(self, cls): + if cls is None: + cls = BackgroundGenerator + if not issubclass(cls, BackgroundGeneratorBase): + raise TypeError( + 'The bkg_generator_cls property must be a subclass of ' + 'BackgroundGeneratorBase! ' + f'Its current type is {classname(cls)}.') + self._bkg_generator_cls = cls + @property def dataset_list(self): """The list of Dataset instances. """ return self._dataset_list + @dataset_list.setter def dataset_list(self, datasets): - if(not issequenceof(datasets, Dataset)): - raise TypeError('The dataset_list property must be a sequence ' - 'of Dataset instances!') + if not issequenceof(datasets, Dataset): + raise TypeError( + 'The dataset_list property must be a sequence of Dataset ' + 'instances! ' + f'Its current type is {classname(datasets)}.') self._dataset_list = list(datasets) @property @@ -201,11 +256,14 @@ def data_list(self): dataset. """ return self._data_list + @data_list.setter def data_list(self, datas): - if(not issequenceof(datas, DatasetData)): - raise TypeError('The data_list property must be a sequence ' - 'of DatasetData instances!') + if not issequenceof(datas, DatasetData): + raise TypeError( + 'The data_list property must be a sequence of DatasetData ' + 'instances! ' + f'Its current type is {classname(datas)}.') self._data_list = list(datas) @property @@ -215,50 +273,81 @@ def n_datasets(self): return len(self._dataset_list) @property - def fitparamset(self): - """(read-only) The instance of FitParameterSet holding all the global - fit parameters of the log-likelihood ratio function. + def bkg_generator(self): + """(read-only) The background generator instance. Is None of the + background generator has not been constructed via the + `construct_background_generator` method. """ - return self._fitparamset + return self._bkg_generator @property - def llhratio(self): - """The log-likelihood ratio function instance. It is None, if it has - not been constructed yet. + def detsigyield_service(self): + """The instance of DetSigYieldService for the analysis. """ - if(self._llhratio is None): - raise RuntimeError('The log-likelihood ratio function is not ' - 'defined yet. Call the construct_analysis method first!') - return self._llhratio - @llhratio.setter - def llhratio(self, obj): - if(not isinstance(obj, LLHRatio)): - raise TypeError('The llhratio property must be an instance of ' - 'LLHRatio!') - self._llhratio = obj + return self._detsigyield_service + + @detsigyield_service.setter + def detsigyield_service(self, service): + if not isinstance(service, DetSigYieldService): + raise TypeError( + 'The detsigyield_service property must be an instance of ' + 'DetSigYieldService! ' + f'Its current type is {classname(service)}!') + self._detsigyield_service = service @property - def bkg_generator(self): - """(read-only) The background generator instance. Is None of the - background generator has not been constructed via the - `construct_background_generator` method. + def src_detsigyield_weights_service(self): + """The instance of SrcDetSigYieldWeightsService for the analysis. """ - return self._bkg_generator + return self._src_detsigyield_weights_service + + @src_detsigyield_weights_service.setter + def src_detsigyield_weights_service(self, service): + if not isinstance(service, SrcDetSigYieldWeightsService): + raise TypeError( + 'The src_detsigyield_weights_service property must be an ' + 'instance of SrcDetSigYieldWeightsService! ' + f'Its current type is {classname(service)}!') + self._src_detsigyield_weights_service = service + + @property + def ds_sig_weight_factors_service(self): + """The instance of DatasetSignalWeightFactorsService for the analysis. + """ + return self._ds_sig_weight_factors_service + + @ds_sig_weight_factors_service.setter + def ds_sig_weight_factors_service(self, service): + if not isinstance(service, DatasetSignalWeightFactorsService): + raise TypeError( + 'The ds_sig_weight_factors_service property must be an ' + 'instance of DatasetSignalWeightFactorsService! ' + f'Its current type is {classname(service)}!') + self._ds_sig_weight_factors_service = service + + @property + def sig_generator_list(self): + """(read-only) The list of instance of SignalGenerator, one for each + dataset. + """ + return self._sig_generator_list @property def sig_generator_cls(self): """The signal generator class that should be used to construct the - signal generator instance. + signal generator instance handling all datasets. """ return self._sig_generator_cls + @sig_generator_cls.setter def sig_generator_cls(self, cls): if cls is None: - cls = SignalGenerator - if not issubclass(cls, SignalGeneratorBase): + cls = MultiDatasetSignalGenerator + if not issubclass(cls, SignalGenerator): raise TypeError( - 'The sig_generator_cls property must be an subclass of ' - 'SignalGeneratorBase!') + 'The sig_generator_cls property must be a subclass of ' + 'SignalGenerator! ' + f'Its current type is {classname(cls)}.') self._sig_generator_cls = cls @property @@ -269,6 +358,21 @@ def sig_generator(self): """ return self._sig_generator + @property + def tdm_list(self): + """The list of instance of TrialDataManager. One for each dataset. + """ + return self._tdm_list + + @tdm_list.setter + def tdm_list(self, tdms): + if not issequenceof(tdms, TrialDataManager): + raise TypeError( + 'The tdm_list property must be a sequence of TrialDataManager ' + 'instances! ' + f'Its current type is {classname(tdms)}.') + self._tdm_list = list(tdms) + @property def total_livetime(self): """(read-only) The total live-time in days of the loaded data. @@ -278,17 +382,52 @@ def total_livetime(self): livetime += data.livetime return livetime - def add_dataset(self, dataset, data, tdm=None, event_selection_method=None): + def construct_services( + self, + ppbar=None): + """Constructs the following services: + + - detector signal yield service + - source detector signal yield weights service + - dataset signal weight factors service + + Parameters + ---------- + ppbar : instance of ProgressBar | None + The instance of ProgressBar of the optional parent progress bar. + """ + self.detsigyield_service = DetSigYieldService( + shg_mgr=self._shg_mgr, + dataset_list=self._dataset_list, + data_list=self._data_list, + ppbar=ppbar, + ) + + self.src_detsigyield_weights_service = SrcDetSigYieldWeightsService( + detsigyield_service=self.detsigyield_service, + ) + + self.ds_sig_weight_factors_service = DatasetSignalWeightFactorsService( + src_detsigyield_weights_service=self.src_detsigyield_weights_service, + ) + + def add_dataset( + self, + dataset, + data, + tdm=None, + event_selection_method=None, + sig_generator=None): """Adds the given dataset to the list of datasets for this analysis. Parameters ---------- - dataset : Dataset instance + dataset : instance of Dataset The Dataset instance that should get added. - data : DatasetData instance + data : instance of DatasetData The DatasetData instance holding the original (prepared) data of the dataset. - tdm : TrialDataManager instance | None + tdm : instance of TrialDataManager | None The TrialDataManager instance managing the trial data and additional data fields of the data set. If set to None, it means that no additional data fields are defined. @@ -298,35 +437,96 @@ def add_dataset(self, dataset, data, tdm=None, event_selection_method=None): will be treated as pure background events. This reduces the amount of log-likelihood-ratio function evaluations. If set to None, all events will be evaluated. + sig_generator : instance of SignalGenerator | None + The optional instance of SignalGenerator, which should be used + to generate signal events for this particular dataset. """ - if(not isinstance(dataset, Dataset)): + if not isinstance(dataset, Dataset): raise TypeError( 'The dataset argument must be an instance of Dataset!') - if(not isinstance(data, DatasetData)): + if not isinstance(data, DatasetData): raise TypeError( 'The data argument must be an instance of DatasetData!') - if(tdm is None): + if tdm is None: tdm = TrialDataManager() - if(not isinstance(tdm, TrialDataManager)): + if not isinstance(tdm, TrialDataManager): raise TypeError( 'The tdm argument must be None or an instance of ' - 'TrialDataManager!') + 'TrialDataManager! ' + f'Its current type is {classname(tdm)}!') - if(event_selection_method is not None): - if(not isinstance(event_selection_method, EventSelectionMethod)): + if event_selection_method is not None: + if not isinstance(event_selection_method, EventSelectionMethod): raise TypeError( 'The event_selection_method argument must be None or an ' - 'instance of EventSelectionMethod!') + 'instance of EventSelectionMethod! ' + f'Its current type is {classname(event_selection_method)}!') + + if sig_generator is not None: + if not isinstance(sig_generator, SignalGenerator): + raise TypeError( + 'The sig_generator argument must be None or an instance of ' + 'SignalGenerator! ' + f'Its current type is {classname(sig_generator)}!') self._dataset_list.append(dataset) self._data_list.append(data) self._tdm_list.append(tdm) self._event_selection_method_list.append(event_selection_method) + self._sig_generator_list.append(sig_generator) + + def get_livetime( + self, + dataset_key=None, + unit=None): + """Retrieves the numeric livetime of the given dataset in the specified + unit. The dataset can be specified either through its index or its name. + If no dataset is specified, the total livetime, i.e. the sum of the + livetime of all datasets, is returned. + + Parameters + ---------- + dataset_key : int | str | None + The index or name of the dataset for which the livetime should get + retrieved. If set to ``None``, the total livetime of all datasets + will be returned. + unit : instance of astropy.units.Unit | None + The time unit in which the livetime should be returned. If set to + ``None``, ``astropy.units.day`` will be used. + """ + if dataset_key is None: + livetime = np.sum([data.livetime for data in self._data_list]) + else: + if isinstance(dataset_key, int): + dataset_idx = dataset_key + elif isinstance(dataset_key, str): + dataset_idx = None + for (idx, ds) in enumerate(self._dataset_list): + if ds.name == dataset_key: + dataset_idx = idx + break + if dataset_idx is None: + raise KeyError( + f'The dataset of name "{dataset_key}" does not exist!') + else: + raise TypeError( + 'The dataset_key argument must be an instance of int, str, ' + 'or None! ' + f'Its current type is {classname(dataset_key)}.') + livetime = self._data_list[dataset_idx].livetime + + if isinstance(unit, units.Unit): + livetime *= units.day.to(unit) + + return livetime def calculate_test_statistic( - self, log_lambda, fitparam_values, *args, **kwargs): + self, + log_lambda, + fitparam_values, + **kwargs): """Calculates the test statistic value by calling the ``evaluate`` method of the TestStatistic class with the given log_lambda value and fit parameter values. @@ -336,29 +536,24 @@ def calculate_test_statistic( log_lambda : float The value of the log-likelihood ratio function. Usually, this is its maximum. - fitparam_values : (N_fitparam+1)-shaped 1D ndarray - The 1D ndarray holding the fit parameter values of the - log-likelihood ratio function for the given log_lambda value. - - Additional arguments and keyword arguments - ------------------------------------------ - Any additional arguments and keyword arguments are passed to the - evaluate method of the TestStatistic class instance. + fitparam_values : instance of numpy ndarray + The (N_fitparam,)-shaped 1D ndarray holding the global + fit parameter values of the log-likelihood ratio function for + the given log_lambda value. + **kwargs + Any additional keyword arguments are passed to the + ``__call__`` method of the TestStatistic instance. Returns ------- TS : float The calculated test-statistic value. """ - return self._test_statistic.evaluate( - self._llhratio, log_lambda, fitparam_values, *args, **kwargs) - - @abc.abstractmethod - def construct_llhratio(self): - """This method is supposed to construct the log-likelihood ratio - function and sets it as the _llhratio property. - """ - pass + return self._test_statistic( + pmm=self._pmm, + log_lambda=log_lambda, + fitparam_values=fitparam_values, + **kwargs) def construct_background_generator(self, **kwargs): """Constructs the background generator for all added datasets. @@ -366,12 +561,15 @@ def construct_background_generator(self, **kwargs): add_dataset method. It sets the `bkg_generator` property of this Analysis class instance. """ - if(self._bkg_gen_method is None): - raise RuntimeError('No background generation method has been ' - 'defined for this analysis!') + if self._bkg_gen_method is None: + raise RuntimeError( + 'No background generation method has been ' + f'defined for this analysis ({classname(self)})!') - self._bkg_generator = BackgroundGenerator( - self._bkg_gen_method, self._dataset_list, self._data_list, + self._bkg_generator = self.bkg_generator_cls( + bkg_gen_method=self._bkg_gen_method, + dataset_list=self._dataset_list, + data_list=self._data_list, **kwargs) def construct_signal_generator(self, **kwargs): @@ -382,13 +580,18 @@ def construct_signal_generator(self, **kwargs): through the source hypothesis group. """ self._sig_generator = self.sig_generator_cls( - src_hypo_group_manager=self._src_hypo_group_manager, + shg_mgr=self._shg_mgr, dataset_list=self._dataset_list, data_list=self._data_list, + sig_generator_list=self._sig_generator_list, + ds_sig_weight_factors_service=self.ds_sig_weight_factors_service, **kwargs) @abc.abstractmethod - def initialize_trial(self, events_list, n_events_list=None): + def initialize_trial( + self, + events_list, + n_events_list=None): """This method is supposed to initialize the log-likelihood ratio function with a new set of given trial data. This is a low-level method. For convenient methods see the `unblind` and `do_trial` methods. @@ -409,150 +612,359 @@ def initialize_trial(self, events_list, n_events_list=None): pass @abc.abstractmethod - def maximize_llhratio(self, rss, tl=None): - """This method is supposed to maximize the log-likelihood ratio - function, by calling the ``maximize`` method of the LLHRatio class. + def unblind( + self, + rss, + tl=None): + """This method is supposed to run the analysis on the experimental data, + i.e. unblinds the data. Parameters ---------- - rss : RandomStateService instance - The RandomStateService instance to draw random numbers from. - tl : TimeLord instance | None - The optional TimeLord instance that should be used to time the + rss : instance of RandomStateService + The instance of RandomStateService that should be used draw random + numbers from. It can be used to generate random initial values for + fit parameters. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to time the maximization of the LLH ratio function. Returns ------- - fitparamset : FitParameterSet instance - The instance of FitParameterSet holding the global fit parameter - definitions used in the maximization process. - log_lambda_max : float - The value of the log-likelihood ratio function at its maximum. - fitparam_values : (N_fitparam,)-shaped 1D ndarray - The ndarray holding the global fit parameter values. - By definition, the first element is the value of the fit parameter - ns. + TS : float + The test-statistic value. + global_params_dict : dict + The dictionary holding the global parameter names and their + best fit values. It includes fixed and floating parameters. status : dict - The dictionary with status information about the maximization - process, i.e. from the minimizer. + The status dictionary with information about the performed + minimization process of the analysis. """ pass - def unblind(self, rss): - """Evaluates the unscrambled data, i.e. unblinds the data. + @abc.abstractmethod + def do_trial_with_given_pseudo_data( + self, + rss, + mean_n_sig, + n_sig, + n_events_list, + events_list, + minimizer_status_dict=None, + tl=None, + **kwargs): + """This method is supposed to perform an analysis trial on a given + pseudo data. Parameters ---------- - rss : RandomStateService instance - The RandomStateService instance that should be used draw random - numbers from. + rss : instance of RandomStateService + The instance of RandomStateService to use for generating random + numbers. + mean_n_sig : float + The mean number of signal events the pseudo data was generated with. + n_sig : int + The total number of actual signal events in the pseudo data. + n_events_list : list of int + The total number of events for each data set of the pseudo data. + events_list : list of instance of DataFieldRecordArray + The list of instance of DataFieldRecordArray containing the pseudo + data events for each data sample. The number of events for each + data sample can be less than the number of events given by + ``n_events_list`` if an event selection method was already utilized + when generating background events. + minimizer_status_dict : dict | None + If a dictionary is provided, it will be updated with the minimizer + status dictionary. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to time + individual tasks. Returns ------- - TS : float - The test-statistic value. - fitparam_dict : dict - The dictionary holding the global fit parameter names and their best - fit values. - status : dict - The status dictionary with information about the performed - minimization process of the negative of the log-likelihood ratio - function. + recarray : instance of numpy record ndarray + The numpy record ndarray holding the result of the trial. It must + contain the following data fields: + + rss_seed : int + The RandomStateService seed. + mean_n_sig : float + The mean number of signal events. + n_sig : int + The actual number of injected signal events. + ts : float + The test-statistic value. + [ : float ] + Any additional parameters of the analysis. """ - events_list = [ data.exp for data in self._data_list ] - self.initialize_trial(events_list) - (fitparamset, log_lambda_max, fitparam_values, status) = self.maximize_llhratio(rss) - TS = self.calculate_test_statistic(log_lambda_max, fitparam_values) + pass + + def change_shg_mgr( + self, + shg_mgr): + """If the SourceHypoGroupManager instance changed, this method needs to + be called to propagate the change to all components of the analysis. + + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager + The new instance of SourceHypoGroupManager. + """ + for evt_selection_method in self._event_selection_method_list: + if evt_selection_method is not None: + evt_selection_method.change_shg_mgr( + shg_mgr=shg_mgr) + + for tdm in self._tdm_list: + tdm.change_shg_mgr( + shg_mgr=shg_mgr) + + if self._detsigyield_service is not None: + self._detsigyield_service.change_shg_mgr( + shg_mgr=shg_mgr) + + if self._src_detsigyield_weights_service is not None: + self._src_detsigyield_weights_service.change_shg_mgr( + shg_mgr=shg_mgr) + + if self._bkg_generator is not None: + self._bkg_generator.change_shg_mgr( + shg_mgr=shg_mgr) + + if self._sig_generator is not None: + self._sig_generator.change_shg_mgr( + shg_mgr=shg_mgr) + + def do_trial_with_given_bkg_and_sig_pseudo_data( + self, + rss, + mean_n_sig, + n_sig, + n_bkg_events_list, + n_sig_events_list, + bkg_events_list, + sig_events_list, + minimizer_status_dict=None, + tl=None, + **kwargs): + """Performs an analysis trial on the given background and signal pseudo + data. This method merges the background and signal pseudo events and + calls the ``do_trial_with_given_pseudo_data`` method of this class. + + Note + ---- + This method alters the DataFieldRecordArray instances of the + bkg_events_list argument! - fitparam_dict = fitparamset.fitparam_values_to_dict(fitparam_values) + Parameters + ---------- + rss : instance of RandomStateService + The instance of RandomStateService instance to use for generating + random numbers. + mean_n_sig : float + The mean number of signal events the pseudo data was generated with. + n_sig : int + The total number of actual signal events in the pseudo data. + n_bkg_events_list : list of int + The total number of background events for each data set of the + pseudo data. + n_sig_events_list : list of int + The total number of signal events for each data set of the + pseudo data. + bkg_events_list : list of instance of DataFieldRecordArray + The list of instance of DataFieldRecordArray containing the + background pseudo data events for each data set. + sig_events_list : list of instance of DataFieldRecordArray | None + The list of instance of DataFieldRecordArray containing the signal + pseudo data events for each data set. If a particular dataset has + no signal events, the entry for that dataset can be ``None``. + minimizer_status_dict : dict | None + If a dictionary is provided, it will be updated with the minimizer + status dictionary. + tl : instance of TimeLord | None + The instance of TimeLord that should be used to time individual + tasks. + **kwargs : dict + Additional keyword arguments are passed to the + :meth:`~skyllh.core.analysis.Analysis.do_trial_with_given_pseudo_data` + method. + + Returns + ------- + recarray : instance of numpy record ndarray + The numpy record ndarray holding the result of the trial. + See the documentation of the + :meth:`~skyllh.core.analysis.Analysis.do_trial_with_given_pseudo_data` + method for further information. + """ + n_events_list = list( + np.array(n_bkg_events_list) + + np.array(n_sig_events_list) + ) + + events_list = bkg_events_list + + # Add potential signal events to the background events. + for ds_idx in range(len(events_list)): + if sig_events_list[ds_idx] is not None: + if events_list[ds_idx] is None: + events_list[ds_idx] = sig_events_list[ds_idx] + else: + events_list[ds_idx].append(sig_events_list[ds_idx]) + + recarray = self.do_trial_with_given_pseudo_data( + rss=rss, + mean_n_sig=mean_n_sig, + n_sig=n_sig, + n_events_list=n_events_list, + events_list=events_list, + minimizer_status_dict=minimizer_status_dict, + tl=tl, + **kwargs) - return (TS, fitparam_dict, status) + return recarray def generate_background_events( - self, rss, mean_n_bkg_list=None, bkg_kwargs=None, tl=None): + self, + rss, + mean_n_bkg_list=None, + bkg_kwargs=None, + tl=None): """Generates background events utilizing the background generator. Parameters ---------- - rss : RandomStateService - The RandomStateService instance to use for generating random + rss : instance of RandomStateService + The instance of RandomStateService to use for generating random numbers. mean_n_bkg_list : list of float | None The mean number of background events that should be generated for each dataset. If set to None (the default), the background generation method needs to obtain this number itself. + bkg_kwargs : dict | None + Optional keyword arguments for the ``generate_background_events`` + method of the background generator. tl : instance of TimeLord | None - The instance of TimeLord that should be used to time individual - tasks of this method. + The optional instance of TimeLord that should be used to time + individual tasks of this method. Returns ------- n_events_list : list of int The list of the number of events that have been generated for each pseudo data set. - events_list : list of DataFieldRecordArray instances - The list of DataFieldRecordArray instances containing the pseudo + events_list : list of instance of DataFieldRecordArray + The list of instance of DataFieldRecordArray containing the pseudo data events for each data sample. The number of events for each data set can be less than the number of events given by - `n_events_list` if an event selection method was already utilized + ``n_events_list`` if an event selection method was already utilized when generating background events. """ n_datasets = self.n_datasets - if(not isinstance(rss, RandomStateService)): + if not isinstance(rss, RandomStateService): raise TypeError( - 'The rss argument must be an instance of RandomStateService!') + 'The rss argument must be an instance of RandomStateService! ' + f'Its current type is {classname(rss)}.') - if(mean_n_bkg_list is None): - mean_n_bkg_list = [ None ] * n_datasets - if(not issequenceof(mean_n_bkg_list, (type(None), float))): + if mean_n_bkg_list is None: + mean_n_bkg_list = [None] * n_datasets + if not issequenceof(mean_n_bkg_list, (type(None), float)): raise TypeError( 'The mean_n_bkg_list argument must be a sequence of None ' - 'and/or floats!') + 'and/or floats! ' + f'Its current type is {classname(mean_n_bkg_list)}.') - if(bkg_kwargs is None): + if bkg_kwargs is None: bkg_kwargs = dict() # Construct the background event generator in case it's not constructed # yet. - if(self._bkg_generator is None): + if self._bkg_generator is None: self.construct_background_generator() n_events_list = [] events_list = [] for ds_idx in range(n_datasets): bkg_kwargs.update(mean=mean_n_bkg_list[ds_idx]) - with TaskTimer(tl, 'Generating background events for data set ' - '{:d}.'.format(ds_idx)): - (n_bkg, bkg_events) = self._bkg_generator.generate_background_events( - rss, ds_idx, tl=tl, **bkg_kwargs) + with TaskTimer( + tl, + f'Generating background events for data set {ds_idx}.'): + (n_bkg, bkg_events) =\ + self._bkg_generator.generate_background_events( + rss=rss, + dataset_idx=ds_idx, + tl=tl, + **bkg_kwargs) n_events_list.append(n_bkg) events_list.append(bkg_events) return (n_events_list, events_list) + def _assert_input_arguments_of_generate_signal_events( + self, + rss, + n_events_list, + events_list): + """Checks the input arguments of the ``generate_signal_events`` method + for correct type and value. + """ + n_datasets = self.n_datasets + + if not isinstance(rss, RandomStateService): + raise TypeError( + 'The rss argument must be an instance of RandomStateService! ' + f'Its current type is {classname(rss)}.') + + if not issequenceof(n_events_list, int): + raise TypeError( + 'The n_events_list argument must be a sequence of ' + 'instances of type int! ' + f'Its current type is {classname(n_events_list)}.') + if len(n_events_list) != n_datasets: + raise ValueError( + 'The n_events_list argument must be a list of int of ' + f'length {n_datasets}! Currently it is of length ' + f'{len(n_events_list)}.') + + if not issequenceof(events_list, (type(None), DataFieldRecordArray)): + raise TypeError( + 'The events_list argument must be a sequence of ' + 'instances of type DataFieldRecordArray! ' + f'Its current type is {classname(events_list)}.') + if len(events_list) != n_datasets: + raise ValueError( + 'The events_list argument must be a list of instances of ' + f'type DataFieldRecordArray with a length of {n_datasets}! ' + f'Currently it is of length {len(events_list)}.') + def generate_signal_events( - self, rss, mean_n_sig, sig_kwargs=None, n_events_list=None, - events_list=None, tl=None): + self, + rss, + mean_n_sig, + sig_kwargs=None, + n_events_list=None, + events_list=None, + tl=None): """Generates signal events utilizing the signal generator. Parameters ---------- - rss : RandomStateService - The RandomStateService instance to use for generating random + rss : instance of RandomStateService + The instance of RandomStateService to use for generating random numbers. mean_n_sig : float The mean number of signal events that should be generated for the trial. The actual number of generated events will be drawn from a Poisson distribution with this given signal mean as mean. sig_kwargs : dict | None - Additional keyword arguments for the `generate_signal_events` method - of the `sig_generator_cls` class. An usual keyword argument is - `poisson`. + Additional keyword arguments for the ``generate_signal_events`` + method of the ``sig_generator_cls`` class. An usual keyword argument + is ``poisson``. n_events_list : list of int | None If given, it specifies the number of events of each data set already present and the number of signal events will be added. - events_list : list of DataFieldRecordArray instances | None + events_list : list of instance of DataFieldRecordArray | None If given, it specifies the events of each data set already present and the signal events will be added. tl : instance of TimeLord | None @@ -564,69 +976,50 @@ def generate_signal_events( n_sig : int The actual number of injected signal events. n_events_list : list of int - The list of the number of events that have been generated for each - pseudo data set. - events_list : list of DataFieldRecordArray instances - The list of DataFieldRecordArray instances containing the pseudo + The list of the number of signal events that have been generated for + each data set. + events_list : list of instance of DataFieldRecordArray + The list of instance of DataFieldRecordArray containing the signal data events for each data set. An entry is None, if no signal events were generated for this particular data set. """ - n_datasets = self.n_datasets + if sig_kwargs is None: + sig_kwargs = dict() - if(not isinstance(rss, RandomStateService)): - raise TypeError( - 'The rss argument must be an instance of RandomStateService!') + if n_events_list is None: + n_events_list = [0] * self.n_datasets - if(sig_kwargs is None): - sig_kwargs = dict() + if events_list is None: + events_list = [None] * self.n_datasets - if(n_events_list is None): - n_events_list = [0] * n_datasets - else: - if(not issequenceof(n_events_list, int)): - raise TypeError( - 'The n_events_list argument must be a sequence of ' - 'instances of type int!') - if(len(n_events_list) != n_datasets): - raise ValueError( - 'The n_events_list argument must be a list of int of ' - 'length {:d}! Currently it is of length {:d}.'.format( - n_datasets, len(n_events_list))) - - if(events_list is None): - events_list = [None] * n_datasets - else: - if(not issequenceof( - events_list, (type(None), DataFieldRecordArray))): - raise TypeError( - 'The events_list argument must be a sequence of ' - 'instances of type DataFieldRecordArray!') - if(len(events_list) != n_datasets): - raise ValueError( - 'The events_list argument must be a list of instances of ' - 'type DataFieldRecordArray with a length of {:d}! ' - 'Currently it is of length {:d}.'.format( - n_datasets, len(events_list))) + self._assert_input_arguments_of_generate_signal_events( + rss=rss, + n_events_list=n_events_list, + events_list=events_list) n_sig = 0 - if(mean_n_sig == 0): + if mean_n_sig == 0: return (n_sig, n_events_list, events_list) # Construct the signal generator if not done yet. - if(self._sig_generator is None): + if self._sig_generator is None: with TaskTimer(tl, 'Constructing signal generator.'): self.construct_signal_generator() + # Generate signal events with the given mean number of signal # events. sig_kwargs.update(mean=mean_n_sig) with TaskTimer(tl, 'Generating signal events.'): - (n_sig, ds_sig_events_dict) = self._sig_generator.generate_signal_events( - rss, **sig_kwargs) + (n_sig, ds_sig_events_dict) =\ + self._sig_generator.generate_signal_events( + rss=rss, + **sig_kwargs) + # Inject the signal events to the generated background data. for (ds_idx, sig_events) in ds_sig_events_dict.items(): n_events_list[ds_idx] += len(sig_events) - if(events_list[ds_idx] is None): + if events_list[ds_idx] is None: events_list[ds_idx] = sig_events else: events_list[ds_idx].append(sig_events) @@ -634,16 +1027,21 @@ def generate_signal_events( return (n_sig, n_events_list, events_list) def generate_pseudo_data( - self, rss, mean_n_bkg_list=None, mean_n_sig=0, bkg_kwargs=None, - sig_kwargs=None, tl=None): + self, + rss, + mean_n_bkg_list=None, + mean_n_sig=0, + bkg_kwargs=None, + sig_kwargs=None, + tl=None): """Generates pseudo data with background and possible signal events for each data set using the background and signal generation methods of the analysis. Parameters ---------- - rss : RandomStateService - The RandomStateService instance to use for generating random + rss : instance of RandomStateService + The instance of RandomStateService to use for generating random numbers. mean_n_bkg_list : list of float | None The mean number of background events that should be generated for @@ -662,8 +1060,8 @@ def generate_pseudo_data( of the `SignalGenerator` class. An usual keyword argument is `poisson`. tl : instance of TimeLord | None - The instance of TimeLord that should be used to time individual - tasks of this method. + The optional instance of TimeLord that should be used to time + individual tasks of this method. Returns ------- @@ -672,7 +1070,7 @@ def generate_pseudo_data( n_events_list : list of int The list of the number of events that have been generated for each pseudo data set. - events_list : list of DataFieldRecordArray instances + events_list : list of instance of DataFieldRecordArray The list of DataFieldRecordArray instances containing the pseudo data events for each data sample. The number of events for each data set can be less than the number of events given by @@ -698,204 +1096,34 @@ def generate_pseudo_data( return (n_sig, n_events_list, events_list) - def do_trial_with_given_pseudo_data( - self, rss, mean_n_sig, n_sig, n_events_list, events_list, - mean_n_sig_0=None, - minimizer_status_dict=None, - tl=None): - """Performs an analysis trial on the given pseudo data. - - Parameters - ---------- - rss : RandomStateService - The RandomStateService instance to use for generating random - numbers. - mean_n_sig : float - The mean number of signal events the pseudo data was generated with. - n_sig : int - The total number of actual signal events in the pseudo data. - n_events_list : list of int - The total number of events for each data set of the pseudo data. - events_list : list of DataFieldRecordArray instances - The list of DataFieldRecordArray instances containing the pseudo - data events for each data sample. The number of events for each - data sample can be less than the number of events given by - `n_events_list` if an event selection method was already utilized - when generating background events. - mean_n_sig_0 : float | None - The fixed mean number of signal events for the null-hypothesis, - when using a ns-profile log-likelihood-ratio function. - If set to None, this argument is interpreted as 0. - minimizer_status_dict : dict | None - If a dictionary is provided, it will be updated with the minimizer - status dictionary. - tl : instance of TimeLord | None - The instance of TimeLord that should be used to time individual - tasks. - - Returns - ------- - result : structured numpy ndarray - The structured numpy ndarray holding the result of the trial. It - contains the following data fields: - - rss_seed : int - The RandomStateService seed. - mean_n_sig : float - The mean number of signal events. - n_sig : int - The actual number of injected signal events. - mean_n_sig_0 : float - The fixed mean number of signal events for the null-hypothesis. - ts : float - The test-statistic value. - [ ... : float ] - Any additional fit parameters of the LLH function. - """ - if(mean_n_sig_0 is not None): - self._llhratio.mean_n_sig_0 = mean_n_sig_0 - else: - mean_n_sig_0 = 0 - - with TaskTimer(tl, 'Initializing trial.'): - self.initialize_trial(events_list, n_events_list) - - with TaskTimer(tl, 'Maximizing LLH ratio function.'): - (fitparamset, log_lambda_max, fitparam_values, status) = self.maximize_llhratio(rss, tl=tl) - if(isinstance(minimizer_status_dict, dict)): - minimizer_status_dict.update(status) - - with TaskTimer(tl, 'Calculating test statistic.'): - ts = self.calculate_test_statistic(log_lambda_max, fitparam_values) - - # Create the structured array data type for the result array. - result_dtype = [ - ('seed', np.int64), - ('mean_n_sig', np.float64), - ('n_sig', np.int64), - ('mean_n_sig_0', np.float64), - ('ts', np.float64) - ] + [ - (fitparam_name, np.float64) - for fitparam_name in fitparamset.fitparam_name_list - ] - result = np.empty((1,), dtype=result_dtype) - result['seed'] = rss.seed - result['mean_n_sig'] = mean_n_sig - result['n_sig'] = n_sig - result['mean_n_sig_0'] = mean_n_sig_0 - result['ts'] = ts - for (idx, fitparam_name) in enumerate(fitparamset.fitparam_name_list): - result[fitparam_name] = fitparam_values[idx] - - return result - - def do_trial_with_given_bkg_and_sig_pseudo_data( - self, rss, mean_n_sig, n_sig, n_bkg_events_list, n_sig_events_list, - bkg_events_list, sig_events_list, - mean_n_sig_0=None, - minimizer_status_dict=None, - tl=None): - """Performs an analysis trial on the given background and signal pseudo - data. This method merges the background and signal pseudo events and - calls the ``do_trial_with_given_pseudo_data`` method of this class. - - Note - ---- - This method alters the DataFieldRecordArray instances of the - bkg_events_list argument! - - Parameters - ---------- - rss : RandomStateService - The RandomStateService instance to use for generating random - numbers. - mean_n_sig : float - The mean number of signal events the pseudo data was generated with. - n_sig : int - The total number of actual signal events in the pseudo data. - n_bkg_events_list : list of int - The total number of background events for each data set of the - pseudo data. - n_sig_events_list : list of int - The total number of signal events for each data set of the - pseudo data. - bkg_events_list : list of DataFieldRecordArray instances - The list of DataFieldRecordArray instances containing the background - pseudo data events for each data set. - sig_events_list : list of DataFieldRecordArray instances or None - The list of DataFieldRecordArray instances containing the signal - pseudo data events for each data set. If a particular dataset has - no signal events, the entry for that dataset can be None. - mean_n_sig_0 : float | None - The fixed mean number of signal events for the null-hypothesis, - when using a ns-profile log-likelihood-ratio function. - If set to None, this argument is interpreted as 0. - minimizer_status_dict : dict | None - If a dictionary is provided, it will be updated with the minimizer - status dictionary. - tl : instance of TimeLord | None - The instance of TimeLord that should be used to time individual - tasks. - - Returns - ------- - result : structured numpy ndarray - The structured numpy ndarray holding the result of the trial. - See the documentation of the ``do_trial_with_given_pseudo_data`` - method for further information. - """ - n_events_list = list( - np.array(n_bkg_events_list) + - np.array(n_sig_events_list) - ) - - events_list = bkg_events_list - - # Add potential signal events to the background events. - for ds_idx in range(len(events_list)): - if(sig_events_list[ds_idx] is not None): - if(events_list[ds_idx] is None): - events_list[ds_idx] = sig_events_list[ds_idx] - else: - events_list[ds_idx].append(sig_events_list[ds_idx]) - - return self.do_trial_with_given_pseudo_data( - rss = rss, - mean_n_sig = mean_n_sig, - n_sig = n_sig, - n_events_list = n_events_list, - events_list = events_list, - mean_n_sig_0 = mean_n_sig_0, - minimizer_status_dict = minimizer_status_dict, - tl = tl - ) - def do_trial( - self, rss, mean_n_bkg_list=None, mean_n_sig=0, mean_n_sig_0=None, - bkg_kwargs=None, sig_kwargs=None, minimizer_status_dict=None, - tl=None): - """Performs an analysis trial by generating a pseudo data sample with - background events and possible signal events, and performs the LLH - analysis on that random pseudo data sample. + self, + rss, + mean_n_bkg_list=None, + mean_n_sig=0, + bkg_kwargs=None, + sig_kwargs=None, + minimizer_status_dict=None, + tl=None, + **kwargs): + """This method performs an analysis trial by generating a + pseudo data sample with background events and possible signal events + via the :meth:`generate_pseudo_data` method, and performs the analysis + on that random pseudo data sample by calling the + :meth:`do_trial_with_given_pseudo_data` method. Parameters ---------- - rss : RandomStateService - The RandomStateService instance to use for generating random - numbers. + rss : instance of RandomStateService + The instance of RandomStateService instance to use for generating + random numbers. mean_n_bkg_list : list of float | None The mean number of background events that should be generated for each dataset. If set to None (the default), the background generation method needs to obtain this number itself. mean_n_sig : float The mean number of signal events that should be generated for the - trial. The actual number of generated events will be drawn from a - Poisson distribution with this given signal mean as mean. - mean_n_sig_0 : float | None - The fixed mean number of signal events for the null-hypothesis, - when using a ns-profile log-likelihood-ratio function. - If set to None, this argument is interpreted as 0. + trial. bkg_kwargs : dict | None Additional keyword arguments for the `generate_events` method of the background generation method class. An usual keyword argument is @@ -908,218 +1136,237 @@ def do_trial( If a dictionary is provided, it will be updated with the minimizer status dictionary. tl : instance of TimeLord | None - The instance of TimeLord that should be used to time individual - tasks. + The optional instance of TimeLord that should be used to time + individual tasks. + **kwargs : dict + Additional keyword arguments are passed to the + :meth:`do_trial_with_given_pseudo_data` method. Returns ------- - result : structured numpy ndarray - The structured numpy ndarray holding the result of the trial. It - contains the following data fields: - - mean_n_sig : float - The mean number of signal events. - n_sig : int - The actual number of injected signal events. - mean_n_sig_0 : float - The fixed mean number of signal events for the null-hypothesis. - ts : float - The test-statistic value. - [ ... : float ] - Any additional fit parameters of the LLH function. + recarray : instance of numpy record ndarray + The numpy record ndarray holding the result of the trial. + See the documentation of the + :py:meth:`~skyllh.core.analysis.Analysis.do_trial_with_given_pseudo_data` + method for further information. """ - if(mean_n_sig_0 is not None): - self._llhratio.mean_n_sig_0 = mean_n_sig_0 - else: - mean_n_sig_0 = 0 - with TaskTimer(tl, 'Generating pseudo data.'): (n_sig, n_events_list, events_list) = self.generate_pseudo_data( - rss=rss, mean_n_bkg_list=mean_n_bkg_list, mean_n_sig=mean_n_sig, - bkg_kwargs=bkg_kwargs, sig_kwargs=sig_kwargs, tl=tl) + rss=rss, + mean_n_bkg_list=mean_n_bkg_list, + mean_n_sig=mean_n_sig, + bkg_kwargs=bkg_kwargs, + sig_kwargs=sig_kwargs, + tl=tl) - result = self.do_trial_with_given_pseudo_data( + recarray = self.do_trial_with_given_pseudo_data( rss=rss, mean_n_sig=mean_n_sig, n_sig=n_sig, n_events_list=n_events_list, events_list=events_list, - mean_n_sig_0=mean_n_sig_0, minimizer_status_dict=minimizer_status_dict, - tl=tl - ) + tl=tl, + **kwargs) - return result + return recarray def do_trials( - self, rss, n, mean_n_bkg_list=None, mean_n_sig=0, mean_n_sig_0=None, - bkg_kwargs=None, sig_kwargs=None, ncpu=None, tl=None, ppbar=None): - """Executes `do_trial` method `N` times with possible multi-processing. - One trial performs an analysis trial by generating a pseudo data sample - with background events and possible signal events, and performs the LLH - analysis on that random pseudo data sample. + self, + rss, + n, + ncpu=None, + tl=None, + ppbar=None, + **kwargs): + """Executes the :meth:`do_trial` method ``n`` times with possible + multi-processing. Parameters ---------- - rss : RandomStateService + rss : instance of RandomStateService The RandomStateService instance to use for generating random numbers. n : int Number of trials to generate using the `do_trial` method. - mean_n_bkg_list : list of float | None - The mean number of background events that should be generated for - each dataset. If set to None (the default), the number of data - events of each data sample will be used as mean. - mean_n_sig : float - The mean number of signal events that should be generated for the - trial. The actual number of generated events will be drawn from a - Poisson distribution with this given signal mean as mean. - mean_n_sig_0 : float | None - The fixed mean number of signal events for the null-hypothesis, - when using a ns-profile log-likelihood-ratio function. - bkg_kwargs : dict | None - Additional keyword arguments for the `generate_events` method of the - background generation method class. An usual keyword argument is - `poisson`. - sig_kwargs : dict | None - Additional keyword arguments for the `generate_signal_events` method - of the `SignalGenerator` class. An usual keyword argument is - `poisson`. If `poisson` is set to True, the actual number of - generated signal events will be drawn from a Poisson distribution - with the given mean number of signal events. - If set to False, the argument ``mean_n_sig`` specifies the actual - number of generated signal events. ncpu : int | None The number of CPUs to use, i.e. the number of subprocesses to spawn. If set to None, the global setting will be used. tl : instance of TimeLord | None - The instance of TimeLord that should be used to time individual - tasks. + The optional instance of TimeLord that should be used to time + individual tasks. ppbar : instance of ProgressBar | None The possible parent ProgressBar instance. + **kwargs + Additional keyword arguments are passed to the :meth:`do_trial` + method. See the documentation of that method for allowed keyword + arguments. Returns ------- - result : structured numpy ndarray - The structured numpy ndarray holding the result of the trial. It - contains the following data fields: - - n_sig : int - The actual number of injected signal events. - ts : float - The test-statistic value. - [ ... : float ] - Any additional fit parameters of the LLH function. + recarray : numpy record ndarray + The numpy record ndarray holding the result of all trials. + See the documentation of the + :py:meth:`~skyllh.core.analysis.Analysis.do_trial` method for the + list of data fields. """ ncpu = get_ncpu(ncpu) - args_list = [((), { - 'mean_n_bkg_list': mean_n_bkg_list, - 'mean_n_sig': mean_n_sig, - 'mean_n_sig_0': mean_n_sig_0, - 'bkg_kwargs': bkg_kwargs, - 'sig_kwargs': sig_kwargs - }) for i in range(n) - ] + + args_list = [((), kwargs) for i in range(n)] result_list = parallelize( - self.do_trial, args_list, ncpu, rss=rss, tl=tl, ppbar=ppbar) + func=self.do_trial, + args_list=args_list, + ncpu=ncpu, + rss=rss, + tl=tl, + ppbar=ppbar) + + recarray_dtype = result_list[0].dtype + recarray = np.empty(n, dtype=recarray_dtype) + recarray[:] = np.array(result_list)[:, 0] - result_dtype = result_list[0].dtype - result = np.empty(n, dtype=result_dtype) - result[:] = np.array(result_list)[:,0] - return result + return recarray -class TimeIntegratedMultiDatasetSingleSourceAnalysis(Analysis): - """This is an analysis class that implements a time-integrated LLH ratio - analysis for multiple datasets assuming a single source. +class LLHRatioAnalysis( + Analysis, + metaclass=abc.ABCMeta): + """This is the abstract base class for all log-likelihood ratio analysis + classes. It requires a mathematical log-likelihood ratio function. - To run this analysis the following procedure applies: + To set-up and run an analysis the following procedure applies: - 1. Add the datasets and their spatial and energy PDF ratio instances - via the :meth:`.add_dataset` method. - 2. Construct the log-likelihood ratio function via the + 1. Create an Analysis instance. + 2. Add the datasets and their PDF ratio instances via the + :meth:`skyllh.core.analysis.Analysis.add_dataset` method. + 3. Construct the log-likelihood ratio function via the :meth:`construct_llhratio` method. - 3. Initialize a trial via the :meth:`initialize_trial` method. - 4. Fit the global fit parameters to the trial data via the - :meth:`maximize_llhratio` method. + 4. Initialize a trial via the :meth:`initialize_trial` method. + 5. Fit the global fit parameters to the trial data via the + :meth:`maximize` method of the ``llhratio`` property. + + Alternatively, one can use the convenient methods :meth:`do_trial` or + :meth:`unblind` to perform a random trial or to unblind the data, + respectively. Both methods will fit the global fit parameters using the set + up data. Finally, the test statistic is calculated via the + :meth:`calculate_test_statistic` method. + + In order to calculate sensitivities and discovery potentials, analysis + trials have to be performed on random data samples with injected signal + events. To perform a trial with injected signal events, the signal generator + has to be constructed via the :meth:`construct_signal_generator` method + before any random trial data can be generated. """ - def __init__(self, src_hypo_group_manager, src_fitparam_mapper, fitparam_ns, - test_statistic, bkg_gen_method=None, sig_generator_cls=None): - """Creates a new time-integrated point-like source analysis assuming a - single source. + + def __init__( + self, + shg_mgr, + pmm, + test_statistic, + bkg_gen_method=None, + bkg_generator_cls=None, + sig_generator_cls=None, + **kwargs): + """Constructs a new instance of LLHRatioAnalysis. Parameters ---------- - src_hypo_group_manager : instance of SourceHypoGroupManager + shg_mgr : instance of SourceHypoGroupManager The instance of SourceHypoGroupManager, which defines the groups of source hypotheses, their flux model, and their detector signal - efficiency implementation method. - src_fitparam_mapper : instance of SingleSourceFitParameterMapper - The instance of SingleSourceFitParameterMapper defining the global - fit parameters and their mapping to the source fit parameters. - fitparam_ns : FitParameter instance - The FitParameter instance defining the fit parameter ns. + yield implementation method. + pmm : instance of ParameterModelMapper + The ParameterModelMapper instance managing the global set of + parameters and their relation to individual models, e.g. sources. test_statistic : TestStatistic instance The TestStatistic instance that defines the test statistic function of the analysis. bkg_gen_method : instance of BackgroundGenerationMethod | None - The instance of BackgroundGenerationMethod that will be used to - generate background events for a new analysis trial. This can be set - to None, if no background events have to get generated. - sig_generator_cls : SignalGeneratorBase class | None + The instance of BackgroundGenerationMethod that should be used to + generate background events for pseudo data. This can be set to None, + if there is no need to generate background events. + bkg_generator_cls : class of BackgroundGeneratorBase | None + The background generator class used to create the background + generator instance. + If set to ``None``, the + :class:`skyllh.core.background_generator.BackgroundGenerator` class + is used. + sig_generator_cls : class of SignalGenerator | None The signal generator class used to create the signal generator instance. - If set to None, the `SignalGenerator` class is used. + If set to None, the + :class:`~skyllh.core.signal_generator.MultiDatasetSignalGenerator` + class is used. """ - if(not isinstance(src_fitparam_mapper, SingleSourceFitParameterMapper)): - raise TypeError('The src_fitparam_mapper argument must be an ' - 'instance of SingleSourceFitParameterMapper!') - super().__init__( - src_hypo_group_manager=src_hypo_group_manager, - src_fitparam_mapper=src_fitparam_mapper, + shg_mgr=shg_mgr, + pmm=pmm, test_statistic=test_statistic, bkg_gen_method=bkg_gen_method, - sig_generator_cls=sig_generator_cls) - - self.fitparam_ns = fitparam_ns + bkg_generator_cls=bkg_generator_cls, + sig_generator_cls=sig_generator_cls, + **kwargs) - # Define the member for the list of PDF ratio lists. Each list entry is - # a list of PDF ratio instances for each data set. - self._pdfratio_list_list = [] + # Define the member variable for the list of PDFRatio instances, one for + # each dataset. + self._pdfratio_list = [] - # Create the FitParameterSet instance holding the fit parameter ns and - # all the other additional fit parameters. This set is used by the - # ``maximize_llhratio`` method. - self._fitparamset = self._src_fitparam_mapper.fitparamset.copy() - self._fitparamset.add_fitparam(self._fitparam_ns, atfront=True) + self._llhratio = None @property - def fitparam_ns(self): - """The FitParameter instance for the fit parameter ns. + def llhratio(self): + """The log-likelihood ratio function instance. It is None, if it has + not been constructed yet. + """ + if self._llhratio is None: + raise RuntimeError( + 'The log-likelihood ratio function is not defined yet. ' + 'Call the "construct_llhratio" method first!') + return self._llhratio + + @llhratio.setter + def llhratio(self, obj): + if not isinstance(obj, LLHRatio): + raise TypeError( + 'The llhratio property must be an instance of LLHRatio! ' + f'Its current type is {classname(obj)}.') + self._llhratio = obj + + @abc.abstractmethod + def construct_llhratio( + self, + minimizer, + ppbar=None): + """This method is supposed to construct the LLH ratio function. + + Returns + ------- + llhratio : instance of LLHRatio + The instance of LLHRatio that implements the + log-likelihood-ratio function of this LLH ratio analysis. """ - return self._fitparam_ns - @fitparam_ns.setter - def fitparam_ns(self, fitparam): - if(not isinstance(fitparam, FitParameter)): - raise TypeError('The fitparam_ns property must be an instance of FitParameter!') - self._fitparam_ns = fitparam - - def add_dataset(self, dataset, data, pdfratios, tdm=None, - event_selection_method=None): + pass + + def add_dataset( + self, + dataset, + data, + pdfratio, + tdm=None, + event_selection_method=None, + sig_generator=None): """Adds a dataset with its PDF ratio instances to the analysis. Parameters ---------- - dataset : Dataset instance - The Dataset instance that should get added. - data : DatasetData instance - The DatasetData instance holding the original (prepared) data of the - dataset. - pdfratios : PDFRatio instance | sequence of PDFRatio instances - The PDFRatio instance or the sequence of PDFRatio instances for the - to-be-added data set. - tdm : TrialDataManager instance | None + dataset : instance of Dataset + The instance of Dataset that should get added. + data : instance of DatasetData + The instance of DatasetData holding the original (prepared) data of + the dataset. + pdfratio : instance of PDFRatio + The instance of PDFRatio for the to-be-added data set. + tdm : instance of TrialDataManager | None The TrialDataManager instance that manages the trial data and additional data fields for this data set. event_selection_method : instance of EventSelectionMethod | None @@ -1128,251 +1375,374 @@ def add_dataset(self, dataset, data, pdfratios, tdm=None, will be treated as pure background events. This reduces the amount of log-likelihood-ratio function evaluations. If set to None, all events will be evaluated. + sig_generator : instance of SignalGenerator | None + The optional instance of SignalGenerator, which should be used + to generate signal events for this particular dataset. """ - super(TimeIntegratedMultiDatasetSingleSourceAnalysis, self).add_dataset( - dataset, data, tdm, event_selection_method) - - if(isinstance(pdfratios, PDFRatio)): - pdfratios = [pdfratios] - if(not issequenceof(pdfratios, PDFRatio)): - raise TypeError('The pdfratios argument must be an instance of ' - 'PDFRatio or a sequence of PDFRatio!') + super().add_dataset( + dataset=dataset, + data=data, + tdm=tdm, + event_selection_method=event_selection_method, + sig_generator=sig_generator) + + if not isinstance(pdfratio, PDFRatio): + raise TypeError( + 'The pdfratio argument must be an instance of PDFRatio! ' + f'Its current type is {classname(pdfratio)}') - self._pdfratio_list_list.append(list(pdfratios)) + self._pdfratio_list.append(pdfratio) - def construct_llhratio(self, minimizer, ppbar=None): - """Constructs the log-likelihood-ratio (LLH-ratio) function of the - analysis. This setups all the necessary analysis - objects like detector signal efficiencies and dataset signal weights, - constructs the log-likelihood ratio functions for each dataset and the - final composite llh ratio function. + def change_shg_mgr( + self, + shg_mgr): + """If the SourceHypoGroupManager instance changed, this method needs to + be called to propagate the change to all components of the analysis. Parameters ---------- - minimizer : instance of Minimizer - The instance of Minimizer that should be used to minimize the - negative of the log-likelihood ratio function. - ppbar : ProgressBar instance | None - The instance of ProgressBar of the optional parent progress bar. - - Returns - ------- - llhratio : instance of MultiDatasetTCLLHRatio - The instance of MultiDatasetTCLLHRatio that implements the - log-likelihood-ratio function of the analysis. + shg_mgr : instance of SourceHypoGroupManager + The new instance of SourceHypoGroupManager. """ - # Create the detector signal yield instances for each dataset. - # Since this is for a single source, we don't have to have individual - # detector signal yield instances for each source as well. - detsigyield_list = [] - fluxmodel = self._src_hypo_group_manager.get_fluxmodel_by_src_idx(0) - detsigyield_implmethod_list = self._src_hypo_group_manager.get_detsigyield_implmethod_list_by_src_idx(0) - if((len(detsigyield_implmethod_list) != 1) and - (len(detsigyield_implmethod_list) != self.n_datasets)): - raise ValueError('The number of detector signal yield ' - 'implementation methods is not 1 and does not match the number ' - 'of used datasets in the analysis!') - pbar = ProgressBar(len(self.dataset_list), parent=ppbar).start() - for (j, (dataset, data)) in enumerate(zip(self.dataset_list, - self.data_list)): - if(len(detsigyield_implmethod_list) == 1): - # Only one detsigyield implementation method was defined, so we - # use it for all datasets. - detsigyield_implmethod = detsigyield_implmethod_list[0] - else: - detsigyield_implmethod = detsigyield_implmethod_list[j] + if self._llhratio is None: + raise RuntimeError( + 'The LLH ratio function has to be constructed ' + 'before the `change_shg_mgr` method can be called!') - detsigyield = detsigyield_implmethod.construct_detsigyield( - dataset, data, fluxmodel, data.livetime, ppbar=pbar) - detsigyield_list.append(detsigyield) - pbar.increment() - pbar.finish() + super().change_shg_mgr( + shg_mgr=shg_mgr) - # For multiple datasets we need a dataset signal weights instance in - # order to distribute ns over the different datasets. - dataset_signal_weights = SingleSourceDatasetSignalWeights( - self._src_hypo_group_manager, self._src_fitparam_mapper, - detsigyield_list) + # Change the source hypo group manager of the LLH ratio function + # instance. + self._llhratio.change_shg_mgr( + shg_mgr=shg_mgr) - # Create the list of log-likelihood ratio functions, one for each - # dataset. - llhratio_list = [] - for j in range(self.n_datasets): - tdm = self._tdm_list[j] - pdfratio_list = self._pdfratio_list_list[j] - llhratio = SingleSourceZeroSigH0SingleDatasetTCLLHRatio( - minimizer, - self._src_hypo_group_manager, - self._src_fitparam_mapper, - tdm, - pdfratio_list - ) - llhratio_list.append(llhratio) + def initialize_trial( + self, + events_list, + n_events_list=None, + tl=None): + """This method initializes the log-likelihood ratio + function with a new set of given trial data. This is a low-level method. + For convenient methods see the ``unblind`` and ``do_trial`` methods. - # Create the final multi-dataset log-likelihood ratio function. - llhratio = MultiDatasetTCLLHRatio( - minimizer, dataset_signal_weights, llhratio_list) + Parameters + ---------- + events_list : list of DataFieldRecordArray instances + The list of DataFieldRecordArray instances holding the data events + to use for the log-likelihood function evaluation. The data arrays + for the datasets must be in the same order than the added datasets. + n_events_list : list of int | None + The list of the number of events of each data set. If set to None, + the number of events is taken from the size of the given events + arrays. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used for timing + measurements. + """ + if n_events_list is None: + n_events_list = [None] * len(events_list) - return llhratio + for (tdm, events, n_events, evt_sel_method) in zip( + self._tdm_list, + events_list, + n_events_list, + self._event_selection_method_list): - def change_source(self, source): - """Changes the source of the analysis to the given source. It makes the - necessary changes to all the objects of the analysis. + # Initialize the trial data manager with the given raw events. + tdm.initialize_trial( + shg_mgr=self._shg_mgr, + pmm=self._pmm, + events=events, + n_events=n_events, + evt_sel_method=evt_sel_method, + tl=tl) + + self._llhratio.initialize_for_new_trial( + tl=tl) + + def unblind( + self, + rss, + tl=None): + """Evaluates the unscrambled data, i.e. unblinds the data. Parameters ---------- - source : SourceModel instance - The SourceModel instance describing the new source. - """ - if(not isinstance(source, SourceModel)): - raise TypeError('The source argument must be an instance of SourceModel') - - if(self._llhratio is None): - raise RuntimeError('The LLH ratio function has to be constructed, ' - 'before the `change_source` method can be called!') + rss : instance of RandomStateService + The instance of RandomStateService that should be used draw random + numbers from. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to time the + maximization of the LLH ratio function. - # Change the source in the SourceHypoGroupManager instance. - # Because this is a single source analysis, there can only be one source - # hypothesis group defined. - self._src_hypo_group_manager.src_hypo_group_list[0].source_list[0] = source + Returns + ------- + TS : float + The test-statistic value. + global_params_dict : dict + The dictionary holding the global parameter names and their + best fit values. It includes fixed and floating parameters. + status : dict + The status dictionary with information about the performed + minimization process of the negative of the log-likelihood ratio + function. + """ + events_list = [data.exp for data in self._data_list] + self.initialize_trial(events_list) - # Change the source hypo group manager of the EventSelectionMethod - # instance. - for event_selection_method in self._event_selection_method_list: - if(event_selection_method is not None): - event_selection_method.change_source_hypo_group_manager( - self._src_hypo_group_manager) + (log_lambda, fitparam_values, status) = self._llhratio.maximize( + rss=rss, + tl=tl) - # Change the source hypo group manager of the LLH ratio function - # instance. - self._llhratio.change_source_hypo_group_manager(self._src_hypo_group_manager) + TS = self.calculate_test_statistic( + log_lambda=log_lambda, + fitparam_values=fitparam_values) - # Change the source hypo group manager of the background generator - # instance. - if(self._bkg_generator is not None): - self._bkg_generator.change_source_hypo_group_manager( - self._src_hypo_group_manager) + global_params_dict = self._pmm.create_global_params_dict( + gflp_values=fitparam_values) - # Change the source hypo group manager of the signal generator instance. - if(self._sig_generator is not None): - self._sig_generator.change_source_hypo_group_manager( - self._src_hypo_group_manager) + return (TS, global_params_dict, status) - def change_sources(self, sources): - """Changes the sources of the analysis to the given source list. It - makes the necessary changes to all the objects of the analysis. + def do_trial_with_given_pseudo_data( + self, + rss, + mean_n_sig, + n_sig, + n_events_list, + events_list, + minimizer_status_dict=None, + tl=None, + mean_n_sig_0=None): + """Performs an analysis trial on the given pseudo data. Parameters ---------- - sources : list of SourceModel instances - The SourceModel instances describing new sources. - """ - if(isinstance(sources, SourceModel)): - sources = [sources] - if(not issequenceof(sources, SourceModel)): - raise TypeError('The sources argument must be a list of instances ' - 'of SourceModel') + rss : instance of RandomStateService + The instance of RandomStateService to use for generating random + numbers. + mean_n_sig : float + The mean number of signal events the pseudo data was generated with. + n_sig : int + The total number of actual signal events in the pseudo data. + n_events_list : list of int + The total number of events for each data set of the pseudo data. + events_list : list of instance of DataFieldRecordArray + The list of instance of DataFieldRecordArray containing the pseudo + data events for each data sample. The number of events for each + data sample can be less than the number of events given by + ``n_events_list`` if an event selection method was already utilized + when generating background events. + minimizer_status_dict : dict | None + If a dictionary is provided, it will be updated with the minimizer + status dictionary. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to time + individual tasks. + mean_n_sig_0 : float | None + The fixed mean number of signal events for the null-hypothesis, + when using a ns-profile log-likelihood-ratio function. + If set to None, this argument is interpreted as 0. - if(self._llhratio is None): - raise RuntimeError( - 'The LLH ratio function has to be constructed, ' - 'before the `change_source` method can be called!') + Returns + ------- + recarray : instance of numpy record ndarray + The numpy record ndarray holding the result of the trial. It + contains the following data fields: - # Change the source in the SourceHypoGroupManager instance. - # Because this is a single type sources analysis, there can only be one - # source hypothesis group defined. - self._src_hypo_group_manager.src_hypo_group_list[0].source_list = sources + rss_seed : int + The RandomStateService seed. + mean_n_sig : float + The mean number of signal events. + n_sig : int + The actual number of injected signal events. + mean_n_sig_0 : float + The fixed mean number of signal events for the null-hypothesis. + ts : float + The test-statistic value. + [ : float ] + Any additional parameters of the LLH ratio function. + """ + if mean_n_sig_0 is None: + mean_n_sig_0 = 0 - # Change the source hypo group manager of the EventSelectionMethod - # instance. - for event_selection_method in self._event_selection_method_list: - if(event_selection_method is not None): - event_selection_method.change_source_hypo_group_manager( - self._src_hypo_group_manager) + self._llhratio.mean_n_sig_0 = mean_n_sig_0 - # Change the source hypo group manager of the LLH ratio function - # instance. - self._llhratio.change_source_hypo_group_manager( - self._src_hypo_group_manager) + with TaskTimer(tl, 'Initializing trial.'): + self.initialize_trial(events_list, n_events_list) - # Change the source hypo group manager of the background generator - # instance. - if(self._bkg_generator is not None): - self._bkg_generator.change_source_hypo_group_manager( - self._src_hypo_group_manager) + with TaskTimer(tl, 'Maximizing LLH ratio function.'): + (log_lambda, fitparam_values, status) = self._llhratio.maximize( + rss=rss, + tl=tl) + if isinstance(minimizer_status_dict, dict): + minimizer_status_dict.update(status) - # Change the source hypo group manager of the signal generator instance. - if(self._sig_generator is not None): - self._sig_generator.change_source_hypo_group_manager( - self._src_hypo_group_manager) + with TaskTimer(tl, 'Calculating test statistic.'): + ts = self.calculate_test_statistic( + log_lambda=log_lambda, + fitparam_values=fitparam_values) - def initialize_trial(self, events_list, n_events_list=None, tl=None): - """This method initializes the multi-dataset log-likelihood ratio - function with a new set of given trial data. This is a low-level method. - For convenient methods see the `unblind` and `do_trial` methods. + # Get the dictionary holding all floating and fixed parameter names + # and values. + global_params_dict = self._pmm.create_global_params_dict( + gflp_values=fitparam_values) + + # Create the structured array data type for the result array. + recarray_dtype = [ + ('seed', np.int64), + ('mean_n_sig', np.float64), + ('n_sig', np.int64), + ('mean_n_sig_0', np.float64), + ('ts', np.float64) + ] + [ + (param_name, np.float64) + for param_name in global_params_dict.keys() + ] + recarray = np.empty((1,), dtype=recarray_dtype) + recarray['seed'] = rss.seed + recarray['mean_n_sig'] = mean_n_sig + recarray['n_sig'] = n_sig + recarray['mean_n_sig_0'] = mean_n_sig_0 + recarray['ts'] = ts + for (param_name, param_value) in global_params_dict.items(): + recarray[param_name] = param_value + + return recarray + + +class SingleSourceMultiDatasetLLHRatioAnalysis( + LLHRatioAnalysis): + """This is an analysis class that implements a log-likelihood ratio analysis + for multiple datasets assuming a single source. + It is a special case of the multi-source analysis. + + For more information how to construct and run the analysis see the + documentation of the :class:`~skyllh.core.analysis.LLHRatioAnalysis` class. + """ + def __init__( + self, + shg_mgr, + pmm, + test_statistic, + bkg_gen_method=None, + bkg_generator_cls=None, + sig_generator_cls=None, + **kwargs): + """Creates a new time-integrated point-like source analysis assuming a + single source. Parameters ---------- - events_list : list of DataFieldRecordArray instances - The list of DataFieldRecordArray instances holding the data events - to use for the log-likelihood function evaluation. The data arrays - for the datasets must be in the same order than the added datasets. - n_events_list : list of int | None - The list of the number of events of each data set. If set to None, - the number of events is taken from the size of the given events - arrays. - tl : TimeLord | None - The optional TimeLord instance that should be used for timing - measurements. + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager, which defines the groups of + source hypotheses, their flux model, and their detector signal + efficiency implementation method. + pmm : instance of ParameterModelMapper + The ParameterModelMapper instance managing the global set of + parameters and their relation to individual models, e.g. sources. + test_statistic : TestStatistic instance + The TestStatistic instance that defines the test statistic function + of the analysis. + bkg_gen_method : instance of BackgroundGenerationMethod | None + The instance of BackgroundGenerationMethod that will be used to + generate background events for a new analysis trial. This can be set + to None, if no background events have to get generated. + bkg_generator_cls : class of BackgroundGeneratorBase | None + The background generator class used to create the background + generator instance. + If set to ``None``, the + :class:`skyllh.core.background_generator.BackgroundGenerator` class + is used. + sig_generator_cls : SignalGenerator class | None + The signal generator class that should be used to create the signal + generator instance for multiple datasets. If set to None, the + :class:`~skyllh.core.signal_generator.MultiDatasetSignalGenerator` + class is used. """ - if(n_events_list is None): - n_events_list = [None] * len(events_list) - - for (idx, (tdm, events, n_events, evt_sel_method)) in enumerate(zip( - self._tdm_list, events_list, n_events_list, - self._event_selection_method_list)): - - # Initialize the trial data manager with the given raw events. - self._tdm_list[idx].initialize_trial( - self._src_hypo_group_manager, events, n_events, evt_sel_method, - tl=tl) - - self._llhratio.initialize_for_new_trial(tl=tl) + super().__init__( + shg_mgr=shg_mgr, + pmm=pmm, + test_statistic=test_statistic, + bkg_gen_method=bkg_gen_method, + bkg_generator_cls=bkg_generator_cls, + sig_generator_cls=sig_generator_cls, + **kwargs) - def maximize_llhratio(self, rss, tl=None): - """Maximizes the log-likelihood ratio function, by minimizing its - negative. + def construct_llhratio( + self, + minimizer, + ppbar=None): + """Constructs the log-likelihood (LLH) ratio function of the analysis. + This setups all the necessary analysis objects like detector signal + yields and dataset signal weights, constructs the log-likelihood ratio + functions for each dataset and the final composite LLH ratio function. Parameters ---------- - rss : RandomStateService instance - The RandomStateService instance that should be used to draw random - numbers from. It is used by the minimizer to generate random - fit parameter initial values. - tl : TimeLord instance | None - The optional TimeLord instance that should be used to time the - maximization of the LLH ratio function. + minimizer : instance of Minimizer + The instance of Minimizer that should be used to minimize the + negative of the log-likelihood ratio function. + ppbar : instance of ProgressBar | None + The instance of ProgressBar of the optional parent progress bar. Returns ------- - fitparamset : FitParameterSet instance - The instance of FitParameterSet holding the global fit parameter - definitions used in the maximization process. - log_lambda_max : float - The value of the log-likelihood ratio function at its maximum. - fitparam_values : (N_fitparam,)-shaped 1D ndarray - The ndarray holding the global fit parameter values. - By definition, the first element is the value of the fit parameter - ns. - status : dict - The dictionary with status information about the maximization - process, i.e. from the minimizer. + llhratio : instance of MultiDatasetTCLLHRatio + The instance of MultiDatasetTCLLHRatio that implements the + log-likelihood-ratio function of the analysis. + """ + # Create the list of log-likelihood ratio functions, one for each + # dataset. + llhratio_list = [ + ZeroSigH0SingleDatasetTCLLHRatio( + pmm=self._pmm, + minimizer=minimizer, + shg_mgr=self._shg_mgr, + tdm=tdm, + pdfratio=pdfratio + ) + for (tdm, pdfratio) in zip(self._tdm_list, self._pdfratio_list) + ] + + # Create the final multi-dataset log-likelihood ratio function. + llhratio = MultiDatasetTCLLHRatio( + pmm=self._pmm, + minimizer=minimizer, + src_detsigyield_weights_service=self.src_detsigyield_weights_service, + ds_sig_weight_factors_service=self.ds_sig_weight_factors_service, + llhratio_list=llhratio_list) + + return llhratio + + def change_source( + self, + source): + """Changes the source of the analysis to the given source. It makes the + necessary changes to all the objects of the analysis. + + Parameters + ---------- + source : instance of SourceModel + The instance of SourceModel describing the new source. """ - (log_lambda_max, fitparam_values, status) = self._llhratio.maximize( - rss, self._fitparamset, tl=tl) - return (self._fitparamset, log_lambda_max, fitparam_values, status) + if not isinstance(source, SourceModel): + raise TypeError( + 'The source argument must be an instance of SourceModel! ' + f'Its current type is {classname(source)}.') - def calculate_fluxmodel_scaling_factor(self, mean_ns, fitparam_values): + # Change the source in the SourceHypoGroupManager instance. + # Because this is a single source analysis, there can only be one source + # hypothesis group defined. + self._shg_mgr.shg_list[0].source_list[0] = source + + self.change_shg_mgr( + shg_mgr=self._shg_mgr) + + def calculate_fluxmodel_scaling_factor( + self, + mean_ns, + fitparam_values): """Calculates the factor the source's fluxmodel has to be scaled in order to obtain the given mean number of signal events in the detector. @@ -1382,30 +1752,33 @@ def calculate_fluxmodel_scaling_factor(self, mean_ns, fitparam_values): mean_ns : float The mean number of signal events in the detector for which the scaling factor is calculated. - fitparam_values : (N_fitparams,)-shaped 1D ndarray - The ndarray holding the fit parameter values that should be used for - the flux calculation. + fitparam_values : instance of numpy ndarray + The (N_fitparam,)-shaped 1D ndarray holding the values of the global + fit parameters, that should be used for the flux calculation. + The order of the values must match the order the fit parameters were + defined in the parameter model mapper. Returns ------- factor : float - The factor the given fluxmodel needs to be scaled in order to obtain - the given mean number of signal events in the detector. + The factor the source's fluxmodel needs to be scaled in order to + obtain the given mean number of signal events in the detector. """ - fitparams_arr = self._src_fitparam_mapper.get_fitparams_array( - fitparam_values) - - # We use the DatasetSignalWeights class instance of this analysis to - # calculate the detector signal yield for all datasets. - dataset_signal_weights = self._llhratio.dataset_signal_weights + src_params_recarray =\ + self._pmm.create_src_params_recarray( + gflp_values=fitparam_values) # Calculate the detector signal yield, i.e. the mean number of signal # events in the detector, for the given reference flux model. mean_ns_ref = 0 - detsigyields = dataset_signal_weights.detsigyield_arr[0] - for detsigyield in detsigyields: + + detsigyields = self.detsigyield_service.arr[:, 0] + for (j, detsigyield) in enumerate(detsigyields): + src_recarray =\ + self.src_detsigyield_weights_service.src_recarray_list_list[j][0] (Yj, Yj_grads) = detsigyield( - dataset_signal_weights._src_arr_list[0], fitparams_arr) + src_recarray=src_recarray, + src_params_recarray=src_params_recarray) mean_ns_ref += Yj[0] factor = mean_ns / mean_ns_ref @@ -1413,71 +1786,78 @@ def calculate_fluxmodel_scaling_factor(self, mean_ns, fitparam_values): return factor -class TimeIntegratedMultiDatasetMultiSourceAnalysis( - TimeIntegratedMultiDatasetSingleSourceAnalysis): - """This is an analysis class that implements a time-integrated LLH ratio - analysis for multiple datasets assuming multiple sources. +class MultiSourceMultiDatasetLLHRatioAnalysis( + LLHRatioAnalysis): + """This is an analysis class that implements a log-likelihood ratio analysis + for multiple datasets assuming a multiple sources. - To run this analysis the following procedure applies: - - 1. Add the datasets and their spatial and energy PDF ratio instances - via the :meth:`.add_dataset` method. - 2. Construct the log-likelihood ratio function via the - :meth:`construct_llhratio` method. - 3. Initialize a trial via the :meth:`initialize_trial` method. - 4. Fit the global fit parameters to the trial data via the - :meth:`maximize_llhratio` method. + For more information how to construct and run the analysis see the + documentation of the :class:`~skyllh.core.analysis.LLHRatioAnalysis` class. """ def __init__( - self, src_hypo_group_manager, src_fitparam_mapper, fitparam_ns, - test_statistic, bkg_gen_method=None, sig_generator_cls=None): - """Creates a new time-integrated point-like source analysis assuming - multiple sources. + self, + shg_mgr, + pmm, + test_statistic, + bkg_gen_method=None, + bkg_generator_cls=None, + sig_generator_cls=None, + **kwargs): + """Constructs a new instance of MultiDatasetLLHRatioAnalysis. Parameters ---------- - src_hypo_group_manager : instance of SourceHypoGroupManager + shg_mgr : instance of SourceHypoGroupManager The instance of SourceHypoGroupManager, which defines the groups of source hypotheses, their flux model, and their detector signal - efficiency implementation method. - src_fitparam_mapper : instance of SingleSourceFitParameterMapper - The instance of SingleSourceFitParameterMapper defining the global - fit parameters and their mapping to the source fit parameters. - fitparam_ns : FitParameter instance - The FitParameter instance defining the fit parameter ns. + yield implementation method. + pmm : instance of ParameterModelMapper + The ParameterModelMapper instance managing the global set of + parameters and their relation to individual models, e.g. sources. test_statistic : TestStatistic instance The TestStatistic instance that defines the test statistic function of the analysis. bkg_gen_method : instance of BackgroundGenerationMethod | None - The instance of BackgroundGenerationMethod that will be used to - generate background events for a new analysis trial. This can be set - to None, if no background events have to get generated. - sig_generator_cls : SignalGeneratorBase class | None - The signal generator class used to create the signal generator - instance. - If set to None, the `SignalGenerator` class is used. + The instance of BackgroundGenerationMethod that should be used to + generate background events for pseudo data. This can be set to None, + if there is no need to generate background events. + bkg_generator_cls : class of BackgroundGeneratorBase | None + The background generator class used to create the background + generator instance. + If set to ``None``, the + :class:`skyllh.core.background_generator.BackgroundGenerator` class + is used. + sig_generator_cls : subclass of SignalGenerator| None + The signal generator class that should be used to create the signal + generator instance handling multiple datasets. + If set to None, the + :class:`~skyllh.core.signal_generator.MultiDatasetSignalGenerator` + class is used. """ super().__init__( - src_hypo_group_manager=src_hypo_group_manager, - src_fitparam_mapper=src_fitparam_mapper, - fitparam_ns=fitparam_ns, + shg_mgr=shg_mgr, + pmm=pmm, test_statistic=test_statistic, bkg_gen_method=bkg_gen_method, - sig_generator_cls=sig_generator_cls) + bkg_generator_cls=bkg_generator_cls, + sig_generator_cls=sig_generator_cls, + **kwargs) - def construct_llhratio(self, minimizer, ppbar=None): - """Constructs the log-likelihood-ratio (LLH-ratio) function of the - analysis. This setups all the necessary analysis - objects like detector signal efficiencies and dataset signal weights, - constructs the log-likelihood ratio functions for each dataset and the - final composite llh ratio function. + def construct_llhratio( + self, + minimizer, + ppbar=None): + """Constructs the log-likelihood (LLH) ratio function of the analysis. + This setups all the necessary analysis objects like detector signal + yields and dataset signal weights, constructs the log-likelihood ratio + functions for each dataset and the final composite LLH ratio function. Parameters ---------- minimizer : instance of Minimizer The instance of Minimizer that should be used to minimize the negative of the log-likelihood ratio function. - ppbar : ProgressBar instance | None + ppbar : instance of ProgressBar | None The instance of ProgressBar of the optional parent progress bar. Returns @@ -1486,99 +1866,79 @@ def construct_llhratio(self, minimizer, ppbar=None): The instance of MultiDatasetTCLLHRatio that implements the log-likelihood-ratio function of the analysis. """ - # Create the detector signal yield instances for each dataset. - # Multi source analysis has to also support multiple source hypothesis - # groups. - # Initialize empty (N_source_hypo_groups, N_datasets)-shaped ndarray. - detsigyield_array = np.empty( - (self._src_hypo_group_manager.n_src_hypo_groups, - len(self.dataset_list)), - dtype=object - ) - - for (g, shg) in enumerate(self._src_hypo_group_manager._src_hypo_group_list): - fluxmodel = shg.fluxmodel - detsigyield_implmethod_list = shg.detsigyield_implmethod_list - - if((len(detsigyield_implmethod_list) != 1) and - (len(detsigyield_implmethod_list) != self.n_datasets)): - raise ValueError( - 'The number of detector signal yield ' - 'implementation methods is not 1 and does not match the number ' - 'of used datasets in the analysis!') - pbar = ProgressBar(len(self.dataset_list), parent=ppbar).start() - for (j, (dataset, data)) in enumerate(zip(self.dataset_list, - self.data_list)): - if(len(detsigyield_implmethod_list) == 1): - # Only one detsigyield implementation method was defined, so we - # use it for all datasets. - detsigyield_implmethod = detsigyield_implmethod_list[0] - else: - detsigyield_implmethod = detsigyield_implmethod_list[j] - - detsigyield = detsigyield_implmethod.construct_detsigyield( - dataset, data, fluxmodel, data.livetime, ppbar=pbar) - detsigyield_array[g, j] = detsigyield - pbar.increment() - pbar.finish() - - # For multiple datasets we need a dataset signal weights instance in - # order to distribute ns over the different datasets. - dataset_signal_weights = MultiSourceDatasetSignalWeights( - self._src_hypo_group_manager, self._src_fitparam_mapper, - detsigyield_array) - # Create the list of log-likelihood ratio functions, one for each # dataset. - llhratio_list = [] - for j in range(self.n_datasets): - tdm = self._tdm_list[j] - pdfratio_list = self._pdfratio_list_list[j] - llhratio = MultiSourceZeroSigH0SingleDatasetTCLLHRatio( - minimizer, - self._src_hypo_group_manager, - self._src_fitparam_mapper, - tdm, - pdfratio_list, - detsigyield_array[:, j] + llhratio_list = [ + ZeroSigH0SingleDatasetTCLLHRatio( + pmm=self._pmm, + minimizer=minimizer, + shg_mgr=self._shg_mgr, + tdm=tdm, + pdfratio=SourceWeightedPDFRatio( + dataset_idx=dataset_idx, + src_detsigyield_weights_service=self.src_detsigyield_weights_service, + pdfratio=pdfratio) ) - llhratio_list.append(llhratio) + for (dataset_idx, (tdm, pdfratio)) in enumerate( + zip(self._tdm_list, self._pdfratio_list)) + ] # Create the final multi-dataset log-likelihood ratio function. llhratio = MultiDatasetTCLLHRatio( - minimizer, dataset_signal_weights, llhratio_list) + pmm=self._pmm, + minimizer=minimizer, + src_detsigyield_weights_service=self.src_detsigyield_weights_service, + ds_sig_weight_factors_service=self.ds_sig_weight_factors_service, + llhratio_list=llhratio_list) return llhratio - def initialize_trial(self, events_list, n_events_list=None, tl=None): - """This method initializes the multi-dataset log-likelihood ratio - function with a new set of given trial data. This is a low-level method. - For convenient methods see the `unblind` and `do_trial` methods. + def calculate_fluxmodel_scaling_factors( + self, + mean_ns, + fitparam_values): + """Calculates the factors the source's fluxmodel has to be scaled + in order to obtain the given mean number of signal events in the + detector. Parameters ---------- - events_list : list of DataFieldRecordArray instances - The list of DataFieldRecordArray instances holding the data events - to use for the log-likelihood function evaluation. The data arrays - for the datasets must be in the same order than the added datasets. - n_events_list : list of int | None - The list of the number of events of each data set. If set to None, - the number of events is taken from the size of the given events - arrays. - tl : TimeLord | None - The optional TimeLord instance that should be used for timing - measurements. + mean_ns : float + The mean number of signal events in the detector for which the + scaling factors will be calculated. + fitparam_values : instance of numpy ndarray + The (N_fitparam,)-shaped 1D ndarray holding the values of the global + fit parameters, which should be used for the flux calculation. + The order of the values must match the order the fit parameters were + defined in the parameter model mapper. + + Returns + ------- + factors : instance of numpy ndarray + The (N_sources,)-shaped numpy ndarray of float holding the factors + the fluxmodels of the sources need to be scaled in order to obtain + the given mean number of signal events in the detector. """ - if(n_events_list is None): - n_events_list = [None] * len(events_list) + src_params_recarray =\ + self._pmm.create_src_params_recarray( + gflp_values=fitparam_values) - for (idx, (tdm, events, n_events, evt_sel_method)) in enumerate(zip( - self._tdm_list, events_list, n_events_list, - self._event_selection_method_list)): + # Calculate the detector signal yield, i.e. the mean number of signal + # events in the detector, for the given reference flux model. + mean_ns_ref = np.zeros((self._shg_mgr.n_sources,), dtype=np.float64) - # Initialize the trial data manager with the given raw events. - self._tdm_list[idx].initialize_trial( - self._src_hypo_group_manager, events, n_events, evt_sel_method, - store_src_ev_idxs=True, tl=tl) + for (g, shg) in enumerate(self._shg_mgr.shg_list): + shg_src_mask = self._shg_mgr.get_src_mask_of_shg(shg_idx=g) + + detsigyields = self.detsigyield_service.arr[:, g] + for (j, detsigyield) in enumerate(detsigyields): + src_recarray =\ + self.src_detsigyield_weights_service.src_recarray_list_list[j][g] + (Yj, Yj_grads) = detsigyield( + src_recarray=src_recarray, + src_params_recarray=src_params_recarray) + mean_ns_ref[shg_src_mask] += Yj + + factors = mean_ns / mean_ns_ref - self._llhratio.initialize_for_new_trial(tl=tl) + return factors diff --git a/skyllh/core/background_generation.py b/skyllh/core/background_generation.py index bda4ec42fd..b9b7d99029 100644 --- a/skyllh/core/background_generation.py +++ b/skyllh/core/background_generation.py @@ -3,47 +3,66 @@ import abc import numpy as np -from skyllh.core.optimize import ( + +from skyllh.core.config import ( + CFG, +) +from skyllh.core.debugging import ( + get_logger, +) +from skyllh.core.event_selection import ( AllEventSelectionMethod, - EventSelectionMethod + EventSelectionMethod, ) from skyllh.core.py import ( + classname, float_cast, func_has_n_args, - issequenceof + issequenceof, +) +from skyllh.core.scrambling import ( + DataScrambler, +) +from skyllh.core.timing import ( + TaskTimer, ) -from skyllh.core.debugging import get_logger -from skyllh.core.scrambling import DataScrambler -from skyllh.core.timing import TaskTimer -from skyllh.core.config import CFG logger = get_logger(__name__) -class BackgroundGenerationMethod(object, metaclass=abc.ABCMeta): +class BackgroundGenerationMethod( + object, + metaclass=abc.ABCMeta): """This is the abstract base class for a detector specific background generation method. """ - def __init__(self): + def __init__(self, **kwargs): """Constructs a new background generation method instance. """ - super(BackgroundGenerationMethod, self).__init__() + super().__init__(**kwargs) - def change_source_hypo_group_manager(self, src_hypo_group_manager): + def change_shg_mgr(self, shg_mgr): """Notifies the background generation method about an updated SourceHypoGroupManager instance. Parameters ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The new SourceHypoGroupManager instance. + shg_mgr : instance of SourceHypoGroupManager + The new instance of SourceHypoGroupManager. """ pass @abc.abstractmethod - def generate_events(self, rss, dataset, data, mean, tl=None, **kwargs): + def generate_events( + self, + rss, + dataset, + data, + mean, + tl=None, + **kwargs): """This method is supposed to generate a `mean` number of background events for the given dataset and its data. @@ -71,18 +90,19 @@ def generate_events(self, rss, dataset, data, mean, tl=None, **kwargs): ------- n_bkg : int The number of generated background events. - bkg_events : DataFieldRecordArray + bkg_events : instance of DataFieldRecordArray The instance of DataFieldRecordArray holding the generated background events. The number of events in this array might be less - than `n_bkg` if an event selection method was used for optimization - purposes. The difference `n_bkg - len(bkg_events)` is then the - number of pure background events in the generated background event - sample. + than ``n_bkg`` if an event selection method was used for + optimization purposes. The difference ``n_bkg - len(bkg_events)`` is + then the number of pure background events in the generated + background event sample. """ pass -class MCDataSamplingBkgGenMethod(BackgroundGenerationMethod): +class MCDataSamplingBkgGenMethod( + BackgroundGenerationMethod): """This class implements the method to generate background events from monte-carlo (MC) data by sampling events from the MC data set according to a probability value given for each event. Functions can be provided to get the @@ -90,9 +110,15 @@ class MCDataSamplingBkgGenMethod(BackgroundGenerationMethod): event. """ def __init__( - self, get_event_prob_func, get_mean_func=None, unique_events=False, - data_scrambler=None, mc_inplace_scrambling=False, - keep_mc_data_fields=None, pre_event_selection_method=None): + self, + get_event_prob_func, + get_mean_func=None, + unique_events=False, + data_scrambler=None, + mc_inplace_scrambling=False, + keep_mc_data_fields=None, + pre_event_selection_method=None, + **kwargs): """Creates a new instance of the MCDataSamplingBkgGenMethod class. Parameters @@ -100,26 +126,32 @@ def __init__( get_event_prob_func : callable The function to get the background probability of each monte-carlo event. The call signature of this function must be - `__call__(dataset, data, events)`, - where `dataset` and `data` are `Dataset` and `DatasetData` instances - of the data set for which background events needs to get generated. - The `events` argument holds the actual set of events, for which the - background event probabilities need to get calculated. + + __call__(dataset, data, events) + + where ``dataset`` is an instance of Dataset and ``data`` is an + instance of DatasetData of the data set for which background events + needs to get generated. The ``events`` argument holds the actual + set of events, for which the background event probabilities need to + get calculated. get_mean_func : callable | None The function to get the mean number of background events. The call signature of this function must be - `__call__(dataset, data, events)`, - where `dataset` and `data` are `Dataset` and `DatasetData` instances - of the data set for which background events needs to get generated. - The `events` argument holds the actual set of events, for which the - mean number of background events should get calculated. - This argument can be `None`, which means that the mean number of - background events to generate needs to get specified through the - `generate_events` method. However, if an event selection method is - provided, this argument cannot be None. + + __call__(dataset, data, events) + + where ``dataset`` is an instance of Dataset and ``data`` is an + instance of DatasetData of the data set for which background events + needs to get generated. The `events` argument holds the actual set + of events, for which the mean number of background events should get + calculated. This argument can be `None`, which means that the mean + number of background events to generate needs to get specified + through the ``generate_events`` method. However, if an event + selection method is provided, this argument cannot be ``None``! unique_events : bool - Flag if unique events should be drawn from the monte-carlo (True), - or if events can be drawn several times (False). Default is False. + Flag if unique events should be drawn from the monte-carlo + (``True``), or if events can be drawn several times (``False``). + Default is ``False``. data_scrambler : instance of DataScrambler | None If set to an instance of DataScrambler, the drawn monte-carlo background events will get scrambled. This can ensure more @@ -140,7 +172,7 @@ def __init__( event generation. Using this pre-selection a large portion of the MC data can be reduced prior to background event generation. """ - super(MCDataSamplingBkgGenMethod, self).__init__() + super().__init__(**kwargs) self.get_event_prob_func = get_event_prob_func self.get_mean_func = get_mean_func @@ -150,9 +182,10 @@ def __init__( self.keep_mc_data_field_names = keep_mc_data_fields self.pre_event_selection_method = pre_event_selection_method - if((pre_event_selection_method is not None) and (get_mean_func is None)): - raise ValueError('If an event pre-selection method is provided, a ' - 'get_mean_func needs to be provided as well!') + if (pre_event_selection_method is not None) and (get_mean_func is None): + raise ValueError( + 'If an event pre-selection method is provided, a ' + 'get_mean_func needs to be provided as well!') # Define cache members to cache the background probabilities for each # monte-carlo event. The probabilities change only if the data changes. @@ -168,14 +201,17 @@ def get_event_prob_func(self): monte-carlo event of the data set. """ return self._get_event_prob_func + @get_event_prob_func.setter def get_event_prob_func(self, func): - if(not callable(func)): - raise TypeError('The get_event_prob_func property must be a ' - 'callable!') - if(not func_has_n_args(func, 3)): - raise TypeError('The function provided for the get_event_prob_func ' - 'property must have 3 arguments!') + if not callable(func): + raise TypeError( + 'The get_event_prob_func property must be a callable! ' + f'Its current type is {classname(func)}.') + if not func_has_n_args(func, 3): + raise TypeError( + 'The function provided for the get_event_prob_func property ' + 'must have 3 arguments!') self._get_event_prob_func = func @property @@ -186,15 +222,21 @@ def get_mean_func(self): `generate_events` method. """ return self._get_mean_func + @get_mean_func.setter def get_mean_func(self, func): - if(func is not None): - if(not callable(func)): - raise TypeError('The get_mean_func property must be a ' - 'callable!') - if(not func_has_n_args(func, 3)): - raise TypeError('The function provided for the get_mean_func ' - 'property must have 3 arguments!') + if func is None: + self._get_mean_func = None + return + + if not callable(func): + raise TypeError( + 'The get_mean_func property must be a callable! ' + f'Its current type is {classname(func)}.') + if not func_has_n_args(func, 3): + raise TypeError( + 'The function provided for the get_mean_func property must ' + 'have 3 arguments!') self._get_mean_func = func @property @@ -203,10 +245,13 @@ def unique_events(self): or if the same event can be drawn multiple times from the monte-carlo. """ return self._unique_events + @unique_events.setter def unique_events(self, b): - if(not isinstance(b, bool)): - raise TypeError('The unique_events property must be of type bool!') + if not isinstance(b, bool): + raise TypeError( + 'The unique_events property must be of type bool! ' + f'Its current type is {classname(b)}.') self._unique_events = b @property @@ -217,12 +262,17 @@ def data_scrambler(self): `None`, if no data scrambling should be used. """ return self._data_scrambler + @data_scrambler.setter def data_scrambler(self, scrambler): - if(scrambler is not None): - if(not isinstance(scrambler, DataScrambler)): - raise TypeError('The data_scrambler property must be an instance ' - 'of DataScrambler!') + if scrambler is None: + self._data_scrambler = None + + if not isinstance(scrambler, DataScrambler): + raise TypeError( + 'The data_scrambler property must be an instance of ' + 'DataScrambler! ' + f'Its current type is {classname(scrambler)}.') self._data_scrambler = scrambler @property @@ -231,11 +281,13 @@ def mc_inplace_scrambling(self): inplace, i.e. without creating a copy of the MC data first. """ return self._mc_inplace_scrambling + @mc_inplace_scrambling.setter def mc_inplace_scrambling(self, b): - if(not isinstance(b, bool)): - raise TypeError('The mc_inplace_scrambling property must be of ' - 'type bool!') + if not isinstance(b, bool): + raise TypeError( + 'The mc_inplace_scrambling property must be of type bool! ' + f'Its current type is {classname(b)}.') self._mc_inplace_scrambling = b @property @@ -246,15 +298,17 @@ def keep_mc_data_field_names(self): will get droped due to computational efficiency reasons. """ return self._keep_mc_data_field_names + @keep_mc_data_field_names.setter def keep_mc_data_field_names(self, names): - if(names is None): + if names is None: names = [] - elif(isinstance(names, str)): - names = [ names ] - elif(not issequenceof(names, str)): - raise TypeError('The keep_mc_data_field_names must be None, an ' - 'instance of type str, or a sequence of objects of type str!') + elif isinstance(names, str): + names = [names] + elif not issequenceof(names, str): + raise TypeError( + 'The keep_mc_data_field_names must be None, an instance of ' + 'type str, or a sequence of instances of type str!') self._keep_mc_data_field_names = names @property @@ -263,40 +317,53 @@ def pre_event_selection_method(self): which can be considered for background event generation. """ return self._pre_event_selection_method + @pre_event_selection_method.setter def pre_event_selection_method(self, method): - if(method is not None): - if(not isinstance(method, EventSelectionMethod)): - raise TypeError('The pre_event_selection_method property must ' - 'be None, or an instance of EventSelectionMethod!') - # If the event selection method selects all events, it's equivalent - # to have it set to None, because then no operation has to be - # performed. - if(isinstance(method, AllEventSelectionMethod)): - method = None + if method is None: + self._pre_event_selection_method = None + return + + if not isinstance(method, EventSelectionMethod): + raise TypeError( + 'The pre_event_selection_method property must be None, or an ' + 'instance of EventSelectionMethod!') + + # If the event selection method selects all events, it's equivalent + # to have it set to None, because then no operation has to be + # performed. + if isinstance(method, AllEventSelectionMethod): + method = None + self._pre_event_selection_method = method - def change_source_hypo_group_manager(self, src_hypo_group_manager): - """Changes the SourceHypoGroupManager instance of the - pre-event-selection method. Also it invalides the data cache of this + def change_shg_mgr(self, shg_mgr): + """Changes the instance of SourceHypoGroupManager of the + pre-event-selection method. Also it invalidates the data cache of this background generation method. Parameters ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The new SourceHypoGroupManager instance. + shg_mgr : instance of SourceHypoGroupManager + The new instance of SourceHypoGroupManager. """ - if(self._pre_event_selection_method is not None): - self._pre_event_selection_method.change_source_hypo_group_manager( - src_hypo_group_manager) + if self._pre_event_selection_method is not None: + self._pre_event_selection_method.change_shg_mgr( + shg_mgr=shg_mgr) # Invalidate the data cache. self._cache_data_id = None def generate_events( - self, rss, dataset, data, mean=None, poisson=True, tl=None): - """Generates a `mean` number of background events for the given dataset - and its data. + self, + rss, + dataset, + data, + mean=None, + poisson=True, + tl=None): + """Generates a ``mean`` number of background events for the given + dataset and its data. Parameters ---------- @@ -314,11 +381,11 @@ def generate_events( Can be `None`. In that case the mean number of background events is obtained through the `get_mean_func` function. poisson : bool - If set to True (default), the actual number of generated background - events will be drawn from a Poisson distribution with the given mean - value of background events. - If set to False, the argument ``mean`` specifies the actual number - of generated background events. + If set to ``True`` (default), the actual number of generated + background events will be drawn from a Poisson distribution with the + given mean number of background events. + If set to ``False``, the argument ``mean`` specifies the actual + number of generated background events. tl : instance of TimeLord | None The optional instance of TimeLord that should be used to collect timing information about this method. @@ -332,7 +399,7 @@ def generate_events( background events. The number of events can be less than `n_bkg` if an event selection method is used. """ - tracing_enabled = CFG['debugging']['enable_tracing'] + tracing = CFG['debugging']['enable_tracing'] # Create aliases to avoid dot-lookup. self__pre_event_selection_method = self._pre_event_selection_method @@ -341,8 +408,8 @@ def generate_events( # background probabilities for each monte-carlo event and a new mean # number of background events. data_id = id(data) - if(self._cache_data_id != data_id): - if(tracing_enabled): + if self._cache_data_id != data_id: + if tracing: logger.debug( f'DatasetData instance id of dataset "{dataset.name}" ' f'changed from {self._cache_data_id} to {data_id}') @@ -359,35 +426,51 @@ def generate_events( )) data_mc = data.mc.copy(keep_fields=keep_field_names) - if(self._get_mean_func is not None): - with TaskTimer(tl, 'Calculate total MC background mean.'): + if self._get_mean_func is not None: + with TaskTimer( + tl, + 'Calculate total MC background mean.'): self._cache_mean = self._get_mean_func( - dataset, data, data_mc) + dataset=dataset, + data=data, + events=data_mc) - with TaskTimer(tl, 'Calculate MC background event probability cache.'): + with TaskTimer( + tl, + 'Calculate MC background event probability cache.'): self._cache_mc_event_bkg_prob = self._get_event_prob_func( - dataset, data, data_mc) - - if(self__pre_event_selection_method is not None): - with TaskTimer(tl, 'Pre-select MC events.'): + dataset=dataset, + data=data, + events=data_mc) + + if self__pre_event_selection_method is not None: + with TaskTimer( + tl, + 'Pre-select MC events.'): (self._cache_mc_pre_selected, - mc_pre_selected_mask_idxs) =\ - self__pre_event_selection_method.select_events( - data_mc, ret_mask_idxs=True, tl=tl) - self._cache_mc_event_bkg_prob_pre_selected = self._cache_mc_event_bkg_prob[mc_pre_selected_mask_idxs] + mc_pre_selected_src_evt_idxs, + mc_pre_selected_idxs) =\ + self__pre_event_selection_method.select_events( + events=data_mc, + ret_original_evt_idxs=True, + tl=tl) + self._cache_mc_event_bkg_prob_pre_selected = np.take( + self._cache_mc_event_bkg_prob, mc_pre_selected_idxs) else: self._cache_mc_pre_selected = data_mc - - if(mean is None): - if(self._cache_mean is None): - raise ValueError('No mean number of background events and no ' + if mean is None: + if self._cache_mean is None: + raise ValueError( + 'No mean number of background events and no ' 'get_mean_func were specified! One of the two must be ' 'specified!') mean = self._cache_mean else: - mean = float_cast(mean, 'The mean number of background events must ' - 'be castable to type float!') + mean = float_cast( + mean, + 'The mean number of background events must be castable to type ' + 'float!') # Draw the number of background events from a poisson distribution with # the given mean number of background events. This will be the number of @@ -400,19 +483,21 @@ def generate_events( # Calculate the mean number of background events for the pre-selected # MC events. - if(self__pre_event_selection_method is None): + if self__pre_event_selection_method is None: # No selection at all, use the total mean. mean_selected = mean else: with TaskTimer(tl, 'Calculate selected MC background mean.'): mean_selected = self._get_mean_func( - dataset, data, data_mc_selected) + dataset=dataset, + data=data, + events=data_mc_selected) # Calculate the actual number of background events for the selected # events. p_binomial = mean_selected / mean with TaskTimer(tl, 'Get p array.'): - if(self__pre_event_selection_method is None): + if self__pre_event_selection_method is None: p = self._cache_mc_event_bkg_prob else: # Pre-selection. @@ -431,10 +516,12 @@ def generate_events( bkg_events = data_mc_selected[bkg_event_indices] # Scramble the drawn MC events if requested. - if(self._data_scrambler is not None): + if self._data_scrambler is not None: with TaskTimer(tl, 'Scramble MC background data.'): bkg_events = self._data_scrambler.scramble_data( - rss, bkg_events, copy=False) + rss=rss, + data=bkg_events, + copy=False) # Remove MC specific data fields from the background events record # array. So the result contains only experimental data fields. The list diff --git a/skyllh/core/background_generator.py b/skyllh/core/background_generator.py index bb7b38fa10..734a333ef6 100644 --- a/skyllh/core/background_generator.py +++ b/skyllh/core/background_generator.py @@ -1,19 +1,30 @@ # -*- coding: utf-8 -*- -from skyllh.core.background_generation import BackgroundGenerationMethod -from skyllh.core.dataset import Dataset, DatasetData -from skyllh.core.py import issequenceof -from skyllh.core.random import RandomStateService +from skyllh.core.background_generation import ( + BackgroundGenerationMethod, +) +from skyllh.core.dataset import ( + Dataset, + DatasetData, +) +from skyllh.core.py import ( + classname, + issequenceof, +) -class BackgroundGenerator(object): - """This is the general background generator class, which provides a method - to generate background events. It does not depend on the detector or - background hypothesis. These dependencies are out-sourced to a class derived - from the BackgroundGenerationMethod class. +class BackgroundGeneratorBase( + object): + """This is the abstract base class for all background generator classes in + SkyLLH. It defines the interface for background generators. """ - def __init__(self, bkg_gen_method, dataset_list, data_list): - """Constructs a new background generator instance. + def __init__( + self, + bkg_gen_method, + dataset_list, + data_list, + **kwargs): + """Constructs a new background generator base instance. Parameters ---------- @@ -27,7 +38,7 @@ def __init__(self, bkg_gen_method, dataset_list, data_list): The list of DatasetData instances holding the actual data of each dataset. The order must match the order of ``dataset_list``. """ - super(BackgroundGenerator, self).__init__() + super().__init__(**kwargs) self.bkg_gen_method = bkg_gen_method self.dataset_list = dataset_list @@ -39,11 +50,14 @@ def bkg_gen_method(self): generate background events. """ return self._bkg_gen_method + @bkg_gen_method.setter def bkg_gen_method(self, method): - if(not isinstance(method, BackgroundGenerationMethod)): - raise TypeError('The bkg_gen_method property must be an instance ' - 'of BackgroundGenerationMethod!') + if not isinstance(method, BackgroundGenerationMethod): + raise TypeError( + 'The bkg_gen_method property must be an instance of ' + 'BackgroundGenerationMethod! ' + f'Its current type is {classname(method)}.') self._bkg_gen_method = method @property @@ -52,11 +66,14 @@ def dataset_list(self): generated for. """ return self._dataset_list + @dataset_list.setter def dataset_list(self, datasets): - if(not issequenceof(datasets, Dataset)): - raise TypeError('The dataset_list property must be a sequence of ' - 'Dataset instances!') + if not issequenceof(datasets, Dataset): + raise TypeError( + 'The dataset_list property must be a sequence of Dataset ' + 'instances! ' + f'Its current type is {classname(datasets)}.') self._dataset_list = list(datasets) @property @@ -66,26 +83,34 @@ def data_list(self): property. """ return self._data_list + @data_list.setter def data_list(self, datas): - if(not issequenceof(datas, DatasetData)): - raise TypeError('The data_list property must be a sequence of ' - 'DatasetData instances!') + if not issequenceof(datas, DatasetData): + raise TypeError( + 'The data_list property must be a sequence of DatasetData ' + 'instances! ' + f'Its current type is {classname(datas)}.') self._data_list = datas - def change_source_hypo_group_manager(self, src_hypo_group_manager): + def change_shg_mgr(self, shg_mgr): """Changes the SourceHypoGroupManager instance of the background generation method. Parameters ---------- - src_hypo_group_manager : instance of SourceHypoGroupManager - The new SourceHypoGroupManager instance. + shg_mgr : instance of SourceHypoGroupManager + The new instance of SourceHypoGroupManager. """ - self._bkg_gen_method.change_source_hypo_group_manager( - src_hypo_group_manager) + self._bkg_gen_method.change_shg_mgr( + shg_mgr=shg_mgr) - def generate_background_events(self, rss, dataset_idx, tl=None, **kwargs): + def generate_background_events( + self, + rss, + dataset_idx, + tl=None, + **kwargs): """Generates a mean number of background events for the given dataset. Parameters @@ -120,6 +145,44 @@ def generate_background_events(self, rss, dataset_idx, tl=None, **kwargs): data = self._data_list[dataset_idx] (n_bkg, bkg_events) = self._bkg_gen_method.generate_events( - rss, ds, data, tl=tl, **kwargs) + rss=rss, + dataset=ds, + data=data, + tl=tl, + **kwargs) return (n_bkg, bkg_events) + + +class BackgroundGenerator( + BackgroundGeneratorBase): + """This is the general background generator class, which provides a method + to generate background events. It does not depend on the detector or + background hypothesis. These dependencies are out-sourced to a class derived + from the BackgroundGenerationMethod class. + """ + def __init__( + self, + bkg_gen_method, + dataset_list, + data_list, + **kwargs): + """Constructs a new background generator instance. + + Parameters + ---------- + bkg_gen_method : instance of BackgroundGenerationMethod + The background event generation method, which should be used to + generate events. + dataset_list : list of Dataset instances + The list of Dataset instances for which background events should get + generated for. + data_list : list of DatasetData instances + The list of DatasetData instances holding the actual data of each + dataset. The order must match the order of ``dataset_list``. + """ + super().__init__( + bkg_gen_method=bkg_gen_method, + dataset_list=dataset_list, + data_list=data_list, + **kwargs) diff --git a/skyllh/core/backgroundpdf.py b/skyllh/core/backgroundpdf.py index cb41ff7eb4..6d72f2f0f6 100644 --- a/skyllh/core/backgroundpdf.py +++ b/skyllh/core/backgroundpdf.py @@ -1,191 +1,143 @@ # -*- coding: utf-8 -*- -"""The ``backgroundpdf`` module contains possible background PDF models for the +"""The ``backgroundpdf`` module contains background PDF classes for the likelihood function. """ +import numpy as np + from skyllh.core.pdf import ( IsBackgroundPDF, MultiDimGridPDF, - NDPhotosplinePDF, TimePDF, ) - -import numpy as np +from skyllh.core.py import ( + classname, +) -class BackgroundMultiDimGridPDF(MultiDimGridPDF, IsBackgroundPDF): - """This class provides a multi-dimensional background PDF. The PDF is - created from pre-calculated PDF data on a grid. The grid data is +class BackgroundMultiDimGridPDF( + MultiDimGridPDF, + IsBackgroundPDF): + """This class provides a multi-dimensional background PDF defined on a grid. + The PDF is created from pre-calculated PDF data on a grid. The grid data is interpolated using a :class:`scipy.interpolate.RegularGridInterpolator` instance. """ def __init__( self, - axis_binnings, - path_to_pdf_splinetable=None, - pdf_grid_data=None, - norm_factor_func=None): - """Creates a new background PDF instance for a multi-dimensional PDF - given as PDF values on a grid. The grid data is interpolated with a - :class:`scipy.interpolate.RegularGridInterpolator` instance. As grid - points the bin edges of the axis binning definitions are used. + *args, + **kwargs): + """Creates a new :class:`~skyllh.core.pdf.MultiDimGridPDF` instance that + is also derived from :class:`~skyllh.core.pdf.IsBackgroundPDF`. - Parameters - ---------- - axis_binnings : sequence of BinningDefinition - The sequence of BinningDefinition instances defining the binning of - the PDF axes. The name of each BinningDefinition instance defines - the event field name that should be used for querying the PDF. - path_to_pdf_splinetable : str - The path to the file containing the spline table. - The spline table contains a pre-computed fit to pdf_grid_data. - pdf_grid_data : n-dimensional numpy ndarray - The n-dimensional numpy ndarray holding the PDF values at given grid - points. The grid points must match the bin edges of the given - BinningDefinition instances of the `axis_binnings` argument. - norm_factor_func : callable | None - The function that calculates a possible required normalization - factor for the PDF value based on the event properties. - The call signature of this function - must be `__call__(pdf, events, fitparams)`, where `pdf` is this PDF - instance, `events` is a numpy record ndarray holding the events for - which to calculate the PDF values, and `fitparams` is a dictionary - with the current fit parameter names and values. + For the documentation of arguments see the documentation of the + :meth:`~skyllh.core.pdf.MultiDimGridPDF.__init__` method. """ - super(BackgroundMultiDimGridPDF, self).__init__( - axis_binnings, path_to_pdf_splinetable, pdf_grid_data, norm_factor_func) + super().__init__(*args, **kwargs) -class BackgroundNDPhotosplinePDF(NDPhotosplinePDF, IsBackgroundPDF): - """This class provides a multi-dimensional background PDF created from a - n-dimensional photospline fit. The photospline package is used to evaluate - the PDF fit. +class BackgroundTimePDF( + TimePDF, + IsBackgroundPDF): + """This class provides a background time PDF class. """ def __init__( self, - axis_binnings, - param_set, - path_to_pdf_splinefit, - norm_factor_func=None): - """Creates a new background PDF instance for a n-dimensional photospline - PDF fit. + livetime, + time_flux_profile, + **kwargs): + """Creates a new signal time PDF instance for a given time flux profile + and detector live time. Parameters ---------- - axis_binnings : BinningDefinition | sequence of BinningDefinition - The sequence of BinningDefinition instances defining the binning of - the PDF axes. The name of each BinningDefinition instance defines - the event field name that should be used for querying the PDF. - param_set : Parameter | ParameterSet - The Parameter instance or ParameterSet instance defining the - parameters of this PDF. The ParameterSet holds the information - which parameters are fixed and which are floating (i.e. fitted). - path_to_pdf_splinefit : str - The path to the file containing the photospline fit. - norm_factor_func : callable | None - The function that calculates a possible required normalization - factor for the PDF value based on the event properties. - The call signature of this function must be - `__call__(pdf, tdm, params)`, where `pdf` is this PDF - instance, `tdm` is an instance of TrialDataManager holding the - event data for which to calculate the PDF values, and `params` is a - dictionary with the current parameter names and values. + livetime : instance of Livetime + An instance of Livetime, which provides the detector live-time + information. + time_flux_profile : instance of TimeFluxProfile + The signal's time flux profile. """ - super(BackgroundNDPhotosplinePDF, self).__init__( - axis_binnings=axis_binnings, - param_set=param_set, - path_to_pdf_splinefit=path_to_pdf_splinefit, - norm_factor_func=norm_factor_func - ) - + super().__init__( + pmm=None, + livetime=livetime, + time_flux_profile=time_flux_profile, + **kwargs) -class BackgroundUniformTimePDF(TimePDF, IsBackgroundPDF): + self._pd = None - def __init__(self, grl): - """Creates a new background time PDF instance as uniform background + def initialize_for_new_trial( + self, + tdm, + tl=None, + **kwargs): + """Initializes the background time PDF with new trial data. Because this + PDF does not depend on any parameters, the probability density values + can be pre-computed here. Parameters ---------- - grl : ndarray - Array of the detector good run list - - """ - super(BackgroundUniformTimePDF, self).__init__() - self.start = grl["start"][0] - self.end = grl["stop"][-1] - self.grl = grl - + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial event data for + which to calculate the PDF value. The following data fields must + exist: - def cdf(self, t): - """Compute the cumulative density function for the box pdf. This is - needed for normalization. - - Parameters - ---------- - t : float, ndarray - MJD times + ``'time'`` : float + The MJD time of the event. - Returns - ------- - cdf : float, ndarray - Values of cumulative density function evaluated at t + tl : instance of TimeLord | None + The optional TimeLord instance that should be used to measure + timing information. """ - t_start = self.grl["start"][0] - t_end = self.grl["stop"][-1] - t = np.atleast_1d(t) - - cdf = np.zeros(t.size, float) + times = tdm.get_data('time') - # values between start and stop times - mask = (t_start <= t) & (t <= t_end) - cdf[mask] = (t[mask] - t_start) / [t_end - t_start] + self._pd = np.zeros((len(times),), dtype=np.float64) - # take care of values beyond stop time in sample + # Get a mask of the event times which fall inside a detector on-time + # interval. + on = self._livetime.is_on(times) - return cdf + self._pd[on] = self._time_flux_profile(t=times[on]) / self._S - def norm_uptime(self): - """Compute the normalization with the dataset uptime. Distributions like - scipy.stats.norm are normalized (-inf, inf). - These must be re-normalized such that the function sums to 1 over the - finite good run list domain. - - Returns - ------- - norm : float - Normalization such that cdf sums to 1 over good run list domain + def get_pd( + self, + tdm, + params_recarray=None, + tl=None): """ + Parameters + ---------- + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial event data for + which to calculate the PDF value. The following data fields must + exist: - integral = (self.cdf(self.grl["stop"]) - self.cdf(self.grl["start"])).sum() - - if np.isclose(integral, 0): - return 0 - - return 1. / integral - - def get_prob(self, tdm, fitparams=None, tl=None): - """Calculates the background time probability density of each event. + ``'time'`` : float + The MJD time of the event. - tdm : TrialDataManager - Unused interface argument. - fitparams : None + params_recarray : None Unused interface argument. tl : instance of TimeLord | None - The optional instance of TimeLord that should be used to collect - timing information about this method. + The optional TimeLord instance that should be used to measure + timing information. Returns ------- - pd : array of float - The (N,)-shaped ndarray holding the probability density for each event. - grads : empty array of float - Does not depend on fit parameter, so no gradient. + pd : instance of numpy ndarray + The (N_events,)-shaped numpy ndarray holding the background + probability density value for each event. + grads : dict + The dictionary holding the gradients of the probability density + w.r.t. each global fit parameter. + The background PDF does not depend on any global fit parameter, + hence, this is an empty dictionary. """ - livetime = self.grl["stop"][-1] - self.grl["start"][0] - pd = 1./livetime - grads = np.array([], dtype=np.double) + if self._pd is None: + raise RuntimeError( + f'The {classname(self)} was not initialized with trial data!') + + grads = dict() - return (pd, grads) + return (self._pd, grads) diff --git a/skyllh/core/binning.py b/skyllh/core/binning.py index f9918f9f65..7b1de98c5b 100644 --- a/skyllh/core/binning.py +++ b/skyllh/core/binning.py @@ -2,9 +2,9 @@ import numpy as np -from scipy.linalg import solve - -from skyllh.core.py import classname +from skyllh.core.py import ( + classname, +) def get_bincenters_from_binedges(edges): @@ -22,6 +22,7 @@ def get_bincenters_from_binedges(edges): """ return 0.5*(edges[:-1] + edges[1:]) + def get_binedges_from_bincenters(centers): """Calculates the bin edges from the given bin center values. The bin center values must be evenly spaced. @@ -47,6 +48,7 @@ def get_binedges_from_bincenters(centers): return edges + def get_bin_indices_from_lower_and_upper_binedges(le, ue, values): """Returns the bin indices for the given values which must fall into bins defined by the given lower and upper bin edges. @@ -86,8 +88,8 @@ def get_bin_indices_from_lower_and_upper_binedges(le, ue, values): len(invalid_values), str(invalid_values), ue[-1])) m = ( - (values[:,np.newaxis] >= le[np.newaxis,:]) & - (values[:,np.newaxis] < ue[np.newaxis,:]) + (values[:, np.newaxis] >= le[np.newaxis, :]) & + (values[:, np.newaxis] < ue[np.newaxis, :]) ) idxs = np.nonzero(m)[1] @@ -98,14 +100,17 @@ class BinningDefinition(object): """The BinningDefinition class provides a structure to hold histogram binning definitions for an analyis. """ - def __init__(self, name, binedges): + def __init__( + self, + name, + binedges): """Creates a new binning definition object. Parameters ---------- name : str The name of the binning definition. - binedges : sequence + binedges : sequence of float The sequence of the bin edges, which should be used for the binning. """ self.name = name @@ -114,20 +119,23 @@ def __init__(self, name, binedges): def __str__(self): """Pretty string representation. """ - s = '%s: %s\n'%(classname(self), self._name) + s = f'{classname(self)}: {self._name}\n' s += str(self._binedges) return s def __eq__(self, other): """Checks if object ``other`` is equal to this BinningDefinition object. """ - if(not isinstance(other, BinningDefinition)): - raise TypeError('The other object in the equal comparison must be ' - 'an instance of BinningDefinition!') - if(self.name != other.name): + if not isinstance(other, BinningDefinition): + raise TypeError( + 'The other object in the equal comparison must be an instance ' + 'of BinningDefinition! ' + f'Its current type is {classname(other)}.') + if self.name != other.name: return False - if(np.any(self.binedges != other.binedges)): + if np.any(self.binedges != other.binedges): return False + return True @property @@ -136,10 +144,13 @@ def name(self): for all the different binning settings used within a season. """ return self._name + @name.setter def name(self, name): - if(not isinstance(name, str)): - raise TypeError("The name must be of type 'str'!") + if not isinstance(name, str): + raise TypeError( + 'The name must be of type str! ' + f'Its current type is {classname(name)}.') self._name = name @property @@ -147,6 +158,7 @@ def binedges(self): """The numpy.ndarray holding the bin edges. """ return self._binedges + @binedges.setter def binedges(self, arr): arr = np.atleast_1d(arr) @@ -188,13 +200,14 @@ def range(self): """ return (self.lower_edge, self.upper_edge) - def any_data_out_of_binning_range(self, data): - """Checks if any of the given data is outside of the binning range. + def any_data_out_of_range(self, data): + """Checks if any of the given data is outside the range of this binning + definition. Parameters ---------- - data : 1d ndarray - The array with the data values to check. + data : instance of ndarray + The 1D ndarray with the data values to check. Returns ------- @@ -229,8 +242,8 @@ def get_subset(self, lower_edge, upper_edge): Returns ------- - new_binning : BinningDefinition instance - The new BinningDefinition instance holding the binning subset. + binning : instance of BinningDefinition + The new instance of BinningDefinition holding the binning subset. """ idxs = np.indices((len(self._binedges),))[0] @@ -238,12 +251,12 @@ def get_subset(self, lower_edge, upper_edge): idx_lower = np.min(idxs[m]) # Include the lower edge of the bin the lower_edge value falls into. - if(self._binedges[idx_lower] > lower_edge): + if self._binedges[idx_lower] > lower_edge: idx_lower -= 1 idx_upper = np.max(idxs[m]) # Include the upper edge of the bin the upper_edge value falls into. - if(self._binedges[idx_upper] < upper_edge): + if self._binedges[idx_upper] < upper_edge: idx_upper += 1 new_binedges = self._binedges[idx_lower:idx_upper+1] @@ -262,8 +275,7 @@ class UsesBinning(object): a given object (that also uses binning) has the same binning. """ def __init__(self, *args, **kwargs): - # Make sure that multiple inheritance can be used. - super(UsesBinning, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) # Define the list of binning definition objects and a name->list_index # mapping for faster access. @@ -288,7 +300,7 @@ def has_same_binning_as(self, obj): Parameters ---------- - obj : class instance derived from UsesBinning + obj : instance of UsesBinning The object that should be checked for same binning. Returns @@ -296,13 +308,15 @@ def has_same_binning_as(self, obj): check : bool True if ``obj`` uses the same binning, False otherwise. """ - if(not isinstance(obj, UsesBinning)): - raise TypeError('The obj argument must be an instance of ' - 'UsesBinning!') + if not isinstance(obj, UsesBinning): + raise TypeError( + 'The obj argument must be an instance of UsesBinning! ' + f'Its current type is {classname(obj)}.') for (self_binning, obj_binning) in zip(self.binnings, obj.binnings): - if(not (self_binning == obj_binning)): + if self_binning != obj_binning: return False + return True def add_binning(self, binning, name=None): @@ -310,22 +324,26 @@ def add_binning(self, binning, name=None): Parameters ---------- - binning : BinningDefinition + binning : instance of BinningDefinition The binning definition to add. name : str | (default) None The name of the binning. If not None and it's different to the name of the given binning definition, a copy of the BinningDefinition object is made and the new name is set. """ - if(not isinstance(binning, BinningDefinition)): - raise TypeError('The binning argument must be an instance of ' - 'BinningDefinition!') + if not isinstance(binning, BinningDefinition): + raise TypeError( + 'The binning argument must be an instance of ' + 'BinningDefinition! ' + f'Its current type is {classname(binning)}.') # Create a copy of the BinningDefinition object if the name differs. - if(name is not None): - if(not isinstance(name, str)): - raise TypeError('The name argument must be of type str!') - if(name != binning.name): + if name is not None: + if not isinstance(name, str): + raise TypeError( + 'The name argument must be of type str! ' + f'Its current type is {classname(name)}.') + if name != binning.name: binning = BinningDefinition(name, binning.binedges) self._binnings.append(binning) @@ -342,17 +360,19 @@ def get_binning(self, name): Returns ------- - binning : BinningDefinition + binning : instance of BinningDefinition The binning definition of the given name. """ - if(isinstance(name, str)): - if(name not in self._binning_name2idx): - raise KeyError('The binning definition "%s" is not defined!'%( - name)) + if isinstance(name, str): + if name not in self._binning_name2idx: + raise KeyError( + f'The binning definition "{name}" is not defined!') binning = self._binnings[self._binning_name2idx[name]] - elif(isinstance(name, int)): + elif isinstance(name, int): binning = self._binnings[name] else: - raise TypeError('The name argument must be of type str or int!') + raise TypeError( + 'The name argument must be of type str or int! ' + f'Its current type is {classname(name)}.') return binning diff --git a/skyllh/core/catalog.py b/skyllh/core/catalog.py new file mode 100644 index 0000000000..0e59124c17 --- /dev/null +++ b/skyllh/core/catalog.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +# Author: Dr. Martin Wolf + +"""This module provides classes for defining source catalogs. +""" + +from skyllh.core.py import ( + str_cast, +) +from skyllh.core.source_model import ( + SourceModelCollection, +) + + +class SourceCatalog( + SourceModelCollection): + """This class describes a catalog of sources. It is derived from + SourceModelCollection. A catalog has a name. + """ + def __init__( + self, + name, + sources=None, + source_type=None, + **kwargs): + """Creates a new source catalog. + + Parameters + ---------- + name : str + The name of the catalog. + sources : sequence of source_type | None + The sequence of sources this catalog should be initalized with. + source_type : type | None + The type of the source class. If set to None (default), the + default type defined by SourceCollection will be used. + """ + super().__init__( + sources=sources, + source_type=source_type, + **kwargs) + + self.name = name + + @property + def name(self): + """The name of the catalog. + """ + return self._name + + @name.setter + def name(self, name): + name = str_cast( + name, + 'The name property must be castable to type str!') + self._name = name + + def __str__(self): + s = f'"{self.name}" {super().__str__()}' + return s + + def as_SourceModelCollection(self): + """Creates a SourceModelCollection object for this catalog and + returns it. + + Returns + ------- + source_model_collection : instance of SourceModelCollection + The created instance of SourceModelCollection. + """ + return SourceModelCollection( + sources=self.sources, + source_type=self.source_type) diff --git a/skyllh/core/config.py b/skyllh/core/config.py index f6638ef6d7..1a50a2329d 100644 --- a/skyllh/core/config.py +++ b/skyllh/core/config.py @@ -4,10 +4,19 @@ convenience utility functions to set different configuration settings. """ -from astropy import units # type: ignore +from astropy import ( + units, +) import os.path import sys -from typing import Any, Dict, Iterator, KeysView, ItemsView, ValuesView +from typing import ( + Any, + Dict, +) + +from skyllh.core.py import ( + issequenceof, +) # Try to load the yaml package. YAML_LOADED = True @@ -16,8 +25,6 @@ except ImportError: YAML_LOADED = False -from skyllh.core.py import issequenceof - _BASECONFIG = { 'multiproc': { @@ -84,48 +91,65 @@ } -class CFGClass(dict): - - """ - This class holds the global config state. +class CFGClass( + dict): + """This class holds the global configuration state. The class behaves like a dict, delegating all methods of the dict - interface to the underlying config dictionary. + interface to the underlying configuration dictionary. """ # Keep track of whether this class has been instantiated. - _is_instantiated = False - def __init__(self, *args, **kwargs) -> None: + def __init__( + self, + *args, + **kwargs) -> None: + """Initializes a new CFGClass instance. Such a instance can be + initialized only once! + """ if CFGClass._is_instantiated: - raise RuntimeError("Can only instantiate CFGClass once") + raise RuntimeError( + 'Can instantiate CFGClass only once!') super().__init__(*args, **kwargs) - CFGClass._is_instantiated = True - def from_yaml(self, yaml_file: str) -> None: - """ - Update config with yaml file. + CFGClass._is_instantiated = True - Parameters: - yaml_file: str - Path to yaml file. + def from_yaml( + self, + yaml_file: str) -> None: + """Updates the configuration with the configuration items contained in + the yaml file. This calls ``dict.update``. + + Parameters + ---------- + yaml_file: str | None + Path to yaml file containg the to-be-updated configuration items. + If set to ``None``, nothing is done. """ - if(YAML_LOADED): - yaml_config = yaml.load(open(yaml_file), Loader=yaml.SafeLoader) - self.update(yaml_config) - else: - raise ImportError(f'Could not import yaml package. Thus can not' - f'import config from yaml file {yaml_file}') - - def from_dict(self, user_dict: Dict[Any, Any]) -> None: - """ - Creates a config from dictionary. - - Parameters: - user_dict: dict - + if yaml_file is None: + return + + if not YAML_LOADED: + raise ImportError( + f'Could not import yaml package. Thus cannot' + f'import config from yaml file {yaml_file}!') + + yaml_config = yaml.load(open(yaml_file), Loader=yaml.SafeLoader) + self.update(yaml_config) + + def from_dict( + self, + user_dict: Dict[Any, Any]) -> None: + """Updates the configuration with the given configuration + dictionary. This calls ``dict.update``. + + Parameters + ---------- + user_dict: dict + The dictionary containg the to-be-updated configuration items. """ self.update(user_dict) @@ -133,8 +157,54 @@ def from_dict(self, user_dict: Dict[Any, Any]) -> None: CFG = CFGClass(_BASECONFIG) +def to_internal_time_unit( + time_unit, +): + """Calculates the conversion factor from the given time unit to the internal + time unit. + + Parameters + ---------- + time_unit : instance of astropy.units.UnitBase + The time unit from which to convert to the internal time unit. + """ + internal_time_unit = CFG['internal_units']['time'] + factor = time_unit.to(internal_time_unit) + + return factor + + +def set_enable_tracing( + flag): + """Sets the global setting for tracing. + + Parameters + ---------- + flag : bool + The flag if tracing should be enabled (``True``) or disabled + (``False``). + """ + CFG['debugging']['enable_tracing'] = flag + + +def set_n_cpu( + n_cpu): + """Sets the global setting for the number of CPUs to use, when + parallelization is available. + + Parameters + ---------- + n_cpu : int + The number of CPUs. + """ + CFG['multiproc']['ncpu'] = n_cpu + + def set_internal_units( - angle_unit=None, energy_unit=None, length_unit=None, time_unit=None): + angle_unit=None, + energy_unit=None, + length_unit=None, + time_unit=None): """Sets the units used internally to compute quantities. These units must match the units used in the monte-carlo files. @@ -153,36 +223,37 @@ def set_internal_units( The internal unit that should be used for time. If set to ``None``, the unit is not changed. """ - if(angle_unit is not None): - if(not isinstance(angle_unit, units.UnitBase)): + if angle_unit is not None: + if not isinstance(angle_unit, units.UnitBase): raise TypeError( 'The angle_unit argument must be an instance of ' 'astropy.units.UnitBase!') CFG['internal_units']['angle'] = angle_unit - if(energy_unit is not None): - if(not isinstance(energy_unit, units.UnitBase)): + if energy_unit is not None: + if not isinstance(energy_unit, units.UnitBase): raise TypeError( 'The energy_unit argument must be an instance of ' 'astropy.units.UnitBase!') CFG['internal_units']['energy'] = energy_unit - if(length_unit is not None): - if(not isinstance(length_unit, units.UnitBase)): + if length_unit is not None: + if not isinstance(length_unit, units.UnitBase): raise TypeError( 'The length_unit argument must be an instance of ' 'astropy.units.UnitBase!') CFG['internal_units']['length'] = length_unit - if(time_unit is not None): - if(not isinstance(time_unit, units.UnitBase)): + if time_unit is not None: + if not isinstance(time_unit, units.UnitBase): raise TypeError( 'The time_unit argument must be an instance of ' 'astropy.units.UnitBase!') CFG['internal_units']['time'] = time_unit -def set_wd(path): +def set_wd( + path): """Sets the project's working directory configuration variable and adds it to the Python path variable. @@ -198,7 +269,7 @@ def set_wd(path): wd : str The project's working directory. """ - if(CFG['project']['working_directory'] in sys.path): + if CFG['project']['working_directory'] in sys.path: sys.path.remove(CFG['project']['working_directory']) wd = os.path.abspath(path) @@ -208,7 +279,8 @@ def set_wd(path): return wd -def add_analysis_required_exp_data_field_names(fieldnames): +def add_analysis_required_exp_data_field_names( + fieldnames): """Adds the given data field names to the set of data field names of the experimental data that are required by the analysis. @@ -218,9 +290,9 @@ def add_analysis_required_exp_data_field_names(fieldnames): The field name or sequence of field names that should get added for the experimental data. """ - if(isinstance(fieldnames, str)): + if isinstance(fieldnames, str): fieldnames = [fieldnames] - elif(not issequenceof(fieldnames, str)): + elif not issequenceof(fieldnames, str): raise TypeError( 'The fieldnames argument must be an instance of str ' 'or a sequence of type str instances!') @@ -229,7 +301,30 @@ def add_analysis_required_exp_data_field_names(fieldnames): CFG['dataset']['analysis_required_exp_field_names'] + fieldnames)) -def set_analysis_required_exp_data_field_names(fieldnames): +def add_analysis_required_mc_data_field_names( + fieldnames): + """Adds the given data field names to the set of data field names of the + monte-carlo data that are required by the analysis. + + Parameters + ---------- + fieldnames : str | sequence of str + The field name or sequence of field names that should get added for the + monto-carlo data. + """ + if isinstance(fieldnames, str): + fieldnames = [fieldnames] + elif not issequenceof(fieldnames, str): + raise TypeError( + 'The fieldnames argument must be an instance of str ' + 'or a sequence of type str instances!') + + CFG['dataset']['analysis_required_mc_field_names'] = list(set( + CFG['dataset']['analysis_required_mc_field_names'] + fieldnames)) + + +def set_analysis_required_exp_data_field_names( + fieldnames): """Sets the data field names of the experimental data that are required by the analysis. @@ -238,9 +333,9 @@ def set_analysis_required_exp_data_field_names(fieldnames): fieldnames : str | sequence of str The field name or sequence of field names for the experimental data. """ - if(isinstance(fieldnames, str)): + if isinstance(fieldnames, str): fieldnames = [fieldnames] - elif(not issequenceof(fieldnames, str)): + elif not issequenceof(fieldnames, str): raise TypeError( 'The fieldnames argument must be an instance of str ' 'or a sequence of type str instances!') @@ -248,7 +343,8 @@ def set_analysis_required_exp_data_field_names(fieldnames): CFG['dataset']['analysis_required_exp_field_names'] = list(set(fieldnames)) -def set_analysis_required_mc_data_field_names(fieldnames): +def set_analysis_required_mc_data_field_names( + fieldnames): """Sets the data field names of the monte-carlo data that are required by the analysis. @@ -257,9 +353,9 @@ def set_analysis_required_mc_data_field_names(fieldnames): fieldnames : str | sequence of str The field name or sequence of field names for the monte-carlo data. """ - if(isinstance(fieldnames, str)): + if isinstance(fieldnames, str): fieldnames = [fieldnames] - elif(not issequenceof(fieldnames, str)): + elif not issequenceof(fieldnames, str): raise TypeError( 'The fieldnames argument must be an instance of str ' 'or a sequence of type str instances!') diff --git a/skyllh/core/dataset.py b/skyllh/core/dataset.py index ecefc308bd..5992b43bfe 100644 --- a/skyllh/core/dataset.py +++ b/skyllh/core/dataset.py @@ -1,31 +1,49 @@ # -*- coding: utf-8 -*- +from copy import ( + deepcopy, +) import os import os.path import numpy as np -from copy import deepcopy -from skyllh.core.binning import BinningDefinition -from skyllh.core.config import CFG -from skyllh.core.livetime import Livetime -from skyllh.core.progressbar import ProgressBar +from skyllh.core import ( + display, +) +from skyllh.core.binning import ( + BinningDefinition, +) +from skyllh.core.config import ( + CFG, +) +from skyllh.core.display import ( + ANSIColors, +) +from skyllh.core.livetime import ( + Livetime, +) +from skyllh.core.progressbar import ( + ProgressBar, +) from skyllh.core.py import ( + classname, float_cast, issequence, issequenceof, list_of_cast, - str_cast + str_cast, ) -from skyllh.core import display -from skyllh.core.display import ANSIColors from skyllh.core.storage import ( DataFieldRecordArray, - create_FileLoader + create_FileLoader, +) +from skyllh.core.timing import ( + TaskTimer, ) -from skyllh.core.timing import TaskTimer -class Dataset(object): +class Dataset( + object): """The Dataset class describes a set of self-consistent experimental and simulated detector data. Usually this is for a certain time period, i.e. a season. @@ -34,7 +52,8 @@ class Dataset(object): through a DatasetCollection object. """ @staticmethod - def get_combined_exp_pathfilenames(datasets): + def get_combined_exp_pathfilenames( + datasets): """Creates the combined list of exp pathfilenames of all the given datasets. @@ -48,8 +67,10 @@ def get_combined_exp_pathfilenames(datasets): exp_pathfilenames : list The combined list of exp pathfilenames. """ - if(not issequenceof(datasets, Dataset)): - raise TypeError('The datasets argument must be a sequence of Dataset instances!') + if not issequenceof(datasets, Dataset): + raise TypeError( + 'The datasets argument must be a sequence of Dataset ' + 'instances!') exp_pathfilenames = [] for ds in datasets: @@ -58,7 +79,8 @@ def get_combined_exp_pathfilenames(datasets): return exp_pathfilenames @staticmethod - def get_combined_mc_pathfilenames(datasets): + def get_combined_mc_pathfilenames( + datasets): """Creates the combined list of mc pathfilenames of all the given datasets. @@ -72,8 +94,10 @@ def get_combined_mc_pathfilenames(datasets): mc_pathfilenames : list The combined list of mc pathfilenames. """ - if(not issequenceof(datasets, Dataset)): - raise TypeError('The datasets argument must be a sequence of Dataset instances!') + if not issequenceof(datasets, Dataset): + raise TypeError( + 'The datasets argument must be a sequence of Dataset ' + 'instances!') mc_pathfilenames = [] for ds in datasets: @@ -82,7 +106,8 @@ def get_combined_mc_pathfilenames(datasets): return mc_pathfilenames @staticmethod - def get_combined_livetime(datasets): + def get_combined_livetime( + datasets): """Sums the live-time of all the given datasets. Parameters @@ -95,10 +120,15 @@ def get_combined_livetime(datasets): livetime : float The sum of all the individual live-times. """ - if(not issequenceof(datasets, Dataset)): - raise TypeError('The datasets argument must be a sequence of Dataset instances!') + if not issequenceof(datasets, Dataset): + raise TypeError( + 'The datasets argument must be a sequence of Dataset ' + 'instances!') - livetime = np.sum([ ds.livetime for ds in datasets ]) + livetime = np.sum([ + ds.livetime + for ds in datasets + ]) return livetime @@ -172,6 +202,7 @@ def name(self): all the different datasets. """ return self._name + @name.setter def name(self, name): self._name = name @@ -181,11 +212,12 @@ def description(self): """The (longer) description of the dataset. """ return self._description + @description.setter def description(self, description): - if(not isinstance(description, str)): - raise TypeError('The description of the dataset must be of ' - 'type str!') + if not isinstance(description, str): + raise TypeError( + 'The description of the dataset must be of type str!') self._description = description @property @@ -196,15 +228,17 @@ def exp_pathfilename_list(self): root_dir property of this Dataset instance. """ return self._exp_pathfilename_list + @exp_pathfilename_list.setter def exp_pathfilename_list(self, pathfilenames): - if(pathfilenames is None): + if pathfilenames is None: pathfilenames = [] - if(isinstance(pathfilenames, str)): + if isinstance(pathfilenames, str): pathfilenames = [pathfilenames] - if(not issequenceof(pathfilenames, str)): - raise TypeError('The exp_pathfilename_list property must be of ' - 'type str or a sequence of str!') + if not issequenceof(pathfilenames, str): + raise TypeError( + 'The exp_pathfilename_list property must be of type str or a ' + 'sequence of str!') self._exp_pathfilename_list = list(pathfilenames) @property @@ -222,15 +256,17 @@ def mc_pathfilename_list(self): root_dir property of this Dataset instance. """ return self._mc_pathfilename_list + @mc_pathfilename_list.setter def mc_pathfilename_list(self, pathfilenames): - if(pathfilenames is None): + if pathfilenames is None: pathfilenames = [] - if(isinstance(pathfilenames, str)): + if isinstance(pathfilenames, str): pathfilenames = [pathfilenames] - if(not issequenceof(pathfilenames, str)): - raise TypeError('The mc_pathfilename_list property must be of ' - 'type str or a sequence of str!') + if not issequenceof(pathfilenames, str): + raise TypeError( + 'The mc_pathfilename_list property must be of type str or a ' + 'sequence of str!') self._mc_pathfilename_list = list(pathfilenames) @property @@ -246,10 +282,12 @@ def livetime(self): cases where the livetime is retrieved directly from the data files. """ return self._lifetime + @livetime.setter def livetime(self, lt): - if(lt is not None): - lt = float_cast(lt, + if lt is not None: + lt = float_cast( + lt, 'The lifetime property of the dataset must be castable to ' 'type float!') self._lifetime = lt @@ -259,10 +297,12 @@ def version(self): """The main version (int) of the dataset. """ return self._version + @version.setter def version(self, version): - if(not isinstance(version, int)): - raise TypeError('The version of the dataset must be of type int!') + if not isinstance(version, int): + raise TypeError( + 'The version of the dataset must be of type int!') self._version = version @property @@ -272,18 +312,21 @@ def verqualifiers(self): The dictionary must have the type form of str:int. """ return self._verqualifiers + @verqualifiers.setter def verqualifiers(self, verqualifiers): - if(verqualifiers is None): + if verqualifiers is None: verqualifiers = dict() - if(not isinstance(verqualifiers, dict)): + if not isinstance(verqualifiers, dict): raise TypeError('The version qualifiers must be of type dict!') # Check if the dictionary has format str:int. - for (q,v) in verqualifiers.items(): - if(not isinstance(q, str)): - raise TypeError('The version qualifier "%s" must be of type str!'%(q)) - if(not isinstance(v, int)): - raise TypeError('The version for the qualifier "%s" must be of type int!'%(q)) + for (q, v) in verqualifiers.items(): + if not isinstance(q, str): + raise TypeError( + f'The version qualifier "{q}" must be of type str!') + if not isinstance(v, int): + raise TypeError( + f'The version for the qualifier "{q}" must be of type int!') # We need to take a deep copy in order to make sure that two datasets # don't share the same version qualifier dictionary. self._verqualifiers = deepcopy(verqualifiers) @@ -293,14 +336,16 @@ def base_path(self): """The base path of the data set. This can be ``None``. """ return self._base_path + @base_path.setter def base_path(self, path): - if(path is not None): - path = str_cast(path, 'The base_path property must be castable to ' - 'type str!') - if(not os.path.isabs(path)): - raise ValueError('The base_path property must be an absolute ' - 'path!') + if path is not None: + path = str_cast( + path, + 'The base_path property must be castable to type str!') + if not os.path.isabs(path): + raise ValueError( + 'The base_path property must be an absolute path!') self._base_path = path @property @@ -310,10 +355,12 @@ def default_sub_path_fmt(self): class. """ return self._default_sub_path_fmt + @default_sub_path_fmt.setter def default_sub_path_fmt(self, fmt): - fmt = str_cast(fmt, 'The default_sub_path_fmt property must be ' - 'castable to type str!') + fmt = str_cast( + fmt, + 'The default_sub_path_fmt property must be castable to type str!') self._default_sub_path_fmt = fmt @property @@ -323,14 +370,17 @@ def sub_path_fmt(self): If set to ``None``, this property will return the ``default_sub_path_fmt`` property. """ - if(self._sub_path_fmt is None): + if self._sub_path_fmt is None: return self._default_sub_path_fmt return self._sub_path_fmt + @sub_path_fmt.setter def sub_path_fmt(self, fmt): - if(fmt is not None): - fmt = str_cast(fmt, 'The sub_path_fmt property must be None, or ' - 'castable to type str!') + if fmt is not None: + fmt = str_cast( + fmt, + 'The sub_path_fmt property must be None, or castable to type ' + 'str!') self._sub_path_fmt = fmt @property @@ -355,14 +405,15 @@ def loading_extra_exp_field_name_list(self): during the data preparation of this specific data set. """ return self._loading_extra_exp_field_name_list + @loading_extra_exp_field_name_list.setter def loading_extra_exp_field_name_list(self, fieldnames): - if(isinstance(fieldnames, str)): - fieldnames = [ fieldnames ] - elif(not issequenceof(fieldnames, str)): - raise TypeError('The loading_extra_exp_field_name_list property ' - 'must be an instance of str or a sequence of str type ' - 'instances!') + if isinstance(fieldnames, str): + fieldnames = [fieldnames] + elif not issequenceof(fieldnames, str): + raise TypeError( + 'The loading_extra_exp_field_name_list property must be an ' + 'instance of str or a sequence of str type instances!') self._loading_extra_exp_field_name_list = list(fieldnames) @property @@ -372,14 +423,15 @@ def loading_extra_mc_field_name_list(self): during the data preparation of this specific data set. """ return self._loading_extra_mc_field_name_list + @loading_extra_mc_field_name_list.setter def loading_extra_mc_field_name_list(self, fieldnames): - if(isinstance(fieldnames, str)): - fieldnames = [ fieldnames ] - elif(not issequenceof(fieldnames, str)): - raise TypeError('The loading_extra_mc_field_name_list property ' - 'must be an instance of str or a sequence of str type ' - 'instances!') + if isinstance(fieldnames, str): + fieldnames = [fieldnames] + elif not issequenceof(fieldnames, str): + raise TypeError( + 'The loading_extra_mc_field_name_list property must be an ' + 'instance of str or a sequence of str type instances!') self._loading_extra_mc_field_name_list = list(fieldnames) @property @@ -389,11 +441,13 @@ def exp_field_name_renaming_dict(self): values are the new names. """ return self._exp_field_name_renaming_dict + @exp_field_name_renaming_dict.setter def exp_field_name_renaming_dict(self, d): - if(not isinstance(d, dict)): - raise TypeError('The exp_field_name_renaming_dict property must ' - 'be an instance of dict!') + if not isinstance(d, dict): + raise TypeError( + 'The exp_field_name_renaming_dict property must be an instance ' + 'of dict!') self._exp_field_name_renaming_dict = d @property @@ -403,11 +457,13 @@ def mc_field_name_renaming_dict(self): values are the new names. """ return self._mc_field_name_renaming_dict + @mc_field_name_renaming_dict.setter def mc_field_name_renaming_dict(self, d): - if(not isinstance(d, dict)): - raise TypeError('The mc_field_name_renaming_dict property must ' - 'be an instance of dict!') + if not isinstance(d, dict): + raise TypeError( + 'The mc_field_name_renaming_dict property must be an instance ' + 'of dict!') self._mc_field_name_renaming_dict = d @property @@ -417,7 +473,7 @@ def exists(self): """ for pathfilename in (self.exp_abs_pathfilename_list + self.mc_abs_pathfilename_list): - if(not os.path.exists(pathfilename)): + if not os.path.exists(pathfilename): return False return True @@ -426,9 +482,9 @@ def version_str(self): """The version string of the dataset. This combines all the version information about the dataset. """ - s = '%03d'%(self._version) - for (q,v) in self._verqualifiers.items(): - s += q+'%02d'%(v) + s = f'{self._version:03d}' + for (q, v) in self._verqualifiers.items(): + s += f'{q}{v:02d}' return s @property @@ -452,7 +508,7 @@ def _gen_datafile_pathfilename_entry(self, pathfilename): s : str The generated string. """ - if(os.path.exists(pathfilename)): + if os.path.exists(pathfilename): s = '['+ANSIColors.OKGREEN+'FOUND'+ANSIColors.ENDC+']' else: s = '['+ANSIColors.FAIL+'NOT FOUND'+ANSIColors.ENDC+']' @@ -477,11 +533,11 @@ def __gt__(self, ds): dataset. """ # Datasets of different names cannot be compared usefully. - if(self._name != ds._name): + if self._name != ds._name: return False # Larger main version numbers indicate newer datasets. - if(self._version > ds._version): + if self._version > ds._version: return True # Look for version qualifiers that make this dataset older than the @@ -495,37 +551,37 @@ def __gt__(self, ds): # If a qualifier is present in self but not in ds, self is considered # newer. for q in qs1: - if(q in qs2 and qs1[q] <= qs2[q]): + if q in qs2 and qs1[q] <= qs2[q]: return False # If there is a qualifier in ds but not in self, self is considered # older. for q in qs2: - if(q not in qs1): + if q not in qs1: return False return True - def __str__(self): + def __str__(self): # noqa: C901 """Implementation of the pretty string representation of the Dataset object. """ - s = 'Dataset "%s": v%s\n'%(self.name, self.version_str) + s = f'Dataset "{self.name}": v{self.version_str}\n' s1 = '' - if(self.livetime is None): + if self.livetime is None: s1 += '{ livetime = UNDEFINED }' else: - s1 += '{ livetime = %.3f days }'%(self.livetime) + s1 += '{ 'f'livetime = {self.livetime:.3f} days'' }' s1 += '\n' - if(self.description != ''): + if self.description != '': s1 += 'Description:\n' + self.description + '\n' s1 += 'Experimental data:\n' s2 = '' for (idx, pathfilename) in enumerate(self.exp_abs_pathfilename_list): - if(idx > 0): + if idx > 0: s2 += '\n' s2 += self._gen_datafile_pathfilename_entry(pathfilename) s1 += display.add_leading_text_line_padding( @@ -535,19 +591,19 @@ def __str__(self): s1 += 'MC data:\n' s2 = '' for (idx, pathfilename) in enumerate(self.mc_abs_pathfilename_list): - if(idx > 0): + if idx > 0: s2 += '\n' s2 += self._gen_datafile_pathfilename_entry(pathfilename) s1 += display.add_leading_text_line_padding( display.INDENTATION_WIDTH, s2) s1 += '\n' - if(len(self._aux_data_definitions) > 0): + if len(self._aux_data_definitions) > 0: s1 += 'Auxiliary data:\n' s2 = '' - for (idx,(name, pathfilename_list)) in enumerate( - self._aux_data_definitions.items()): - if(idx > 0): + for (idx, (name, pathfilename_list)) in enumerate( + self._aux_data_definitions.items()): + if idx > 0: s2 += '\n' s2 += name+':' @@ -566,7 +622,10 @@ def __str__(self): return s - def get_abs_pathfilename_list(self, pathfilename_list): + def get_abs_pathfilename_list( + self, + pathfilename_list, + ): """Returns a list where each entry of the given pathfilename_list is an absolute path. Relative paths will be prefixed with the root_dir property of this Dataset instance. @@ -585,7 +644,7 @@ def get_abs_pathfilename_list(self, pathfilename_list): abs_pathfilename_list = [] for pathfilename in pathfilename_list: - if(os.path.isabs(pathfilename)): + if os.path.isabs(pathfilename): abs_pathfilename_list.append( pathfilename) else: @@ -594,7 +653,10 @@ def get_abs_pathfilename_list(self, pathfilename_list): return abs_pathfilename_list - def update_version_qualifiers(self, verqualifiers): + def update_version_qualifiers( + self, + verqualifiers, + ): """Updates the version qualifiers of the dataset. The update can only be done by increasing the version qualifier integer or by adding new version qualifiers. @@ -613,29 +675,36 @@ def update_version_qualifiers(self, verqualifiers): got_new_verqualifiers = False verqualifiers_keys = verqualifiers.keys() self_verqualifiers_keys = self._verqualifiers.keys() - if(len(verqualifiers_keys) > len(self_verqualifiers_keys)): + if len(verqualifiers_keys) > len(self_verqualifiers_keys): # New version qualifiers must be a subset of the old version # qualifiers. for q in self_verqualifiers_keys: - if(not q in verqualifiers_keys): - raise ValueError('The version qualifier {} has been ' - 'dropped!'.format(q)) + if q not in verqualifiers_keys: + raise ValueError( + f'The version qualifier {q} has been dropped!') got_new_verqualifiers = True existing_verqualifiers_incremented = False for q in verqualifiers: - if((q in self._verqualifiers) and - (verqualifiers[q] > self._verqualifiers[q])): + if (q in self._verqualifiers) and\ + (verqualifiers[q] > self._verqualifiers[q]): existing_verqualifiers_incremented = True self._verqualifiers[q] = verqualifiers[q] - if(not (got_new_verqualifiers or existing_verqualifiers_incremented)): - raise ValueError('Version qualifier values did not increment and ' - 'no new version qualifiers were added!') + if not (got_new_verqualifiers or existing_verqualifiers_incremented): + raise ValueError( + 'Version qualifier values did not increment and no new version ' + 'qualifiers were added!') def load_data( - self, keep_fields=None, livetime=None, dtc_dict=None, - dtc_except_fields=None, efficiency_mode=None, tl=None): + self, + keep_fields=None, + livetime=None, + dtc_dict=None, + dtc_except_fields=None, + efficiency_mode=None, + tl=None, + ): """Loads the data, which is described by the dataset. Note: This does not call the ``prepare_data`` method! It only loads @@ -662,49 +731,52 @@ def load_data( The efficiency mode the data should get loaded with. Possible values are: - - 'memory': + ``'memory'`` The data will be load in a memory efficient way. This will require more time, because all data records of a file will be loaded sequentially. - - 'time' + ``'time'`` The data will be loaded in a time efficient way. This will require more memory, because each data file gets loaded in memory at once. The default value is ``'time'``. If set to ``None``, the default value will be used. - tl : TimeLord instance | None + tl : instance of TimeLord | None The TimeLord instance to use to time the data loading procedure. Returns ------- - data : DatasetData - A DatasetData instance holding the experimental and monte-carlo + data : instance of DatasetData + A instance of DatasetData holding the experimental and monte-carlo data. """ - def _conv_new2orig_field_names(new_field_names, orig2new_renaming_dict): + def _conv_new2orig_field_names( + new_field_names, + orig2new_renaming_dict, + ): """Converts the given ``new_field_names`` into their original name given the original-to-new field name renaming dictionary. """ - if(new_field_names is None): + if new_field_names is None: return None new2orig_renaming_dict = dict() - for (k,v) in orig2new_renaming_dict.items(): + for (k, v) in orig2new_renaming_dict.items(): new2orig_renaming_dict[v] = k orig_field_names = [ new2orig_renaming_dict.get(new_field_name, new_field_name) - for new_field_name in new_field_names + for new_field_name in new_field_names ] return orig_field_names - if(keep_fields is None): + if keep_fields is None: keep_fields = [] # Load the experimental data if there is any. - if(len(self._exp_pathfilename_list) > 0): + if len(self._exp_pathfilename_list) > 0: with TaskTimer(tl, 'Loading exp data from disk.'): fileloader_exp = create_FileLoader( self.exp_abs_pathfilename_list) @@ -730,7 +802,7 @@ def _conv_new2orig_field_names(new_field_names, orig2new_renaming_dict): data_exp = None # Load the monte-carlo data if there is any. - if(len(self._mc_pathfilename_list) > 0): + if len(self._mc_pathfilename_list) > 0: with TaskTimer(tl, 'Loading mc data from disk.'): fileloader_mc = create_FileLoader( self.mc_abs_pathfilename_list) @@ -762,21 +834,28 @@ def _conv_new2orig_field_names(new_field_names, orig2new_renaming_dict): else: data_mc = None - if(livetime is None): + if livetime is None: livetime = self.livetime - data = DatasetData(data_exp, data_mc, livetime) + data = DatasetData( + data_exp=data_exp, + data_mc=data_mc, + livetime=livetime) return data - def load_aux_data(self, name, tl=None): + def load_aux_data( + self, + name, + tl=None, + ): """Loads the auxiliary data for the given auxiliary data definition. Parameters ---------- name : str The name of the auxiliary data. - tl : TimeLord instance | None + tl : instance of TimeLord | None The TimeLord instance to use to time the data loading procedure. Returns @@ -784,28 +863,32 @@ def load_aux_data(self, name, tl=None): data : unspecified The loaded auxiliary data. """ - name = str_cast(name, + name = str_cast( + name, 'The name argument must be castable to type str!') # Check if the data was defined in memory. - if(name in self._aux_data): - with TaskTimer(tl, 'Loaded aux data "%s" from memory.'%(name)): + if name in self._aux_data: + with TaskTimer(tl, f'Loaded aux data "{name}" from memory.'): data = self._aux_data[name] return data - if(name not in self._aux_data_definitions): - raise KeyError('The auxiliary data named "%s" does not exist!'%( - name)) + if name not in self._aux_data_definitions: + raise KeyError( + f'The auxiliary data named "{name}" does not exist!') aux_pathfilename_list = self._aux_data_definitions[name] - with TaskTimer(tl, 'Loaded aux data "%s" from disk.'%(name)): + with TaskTimer(tl, f'Loaded aux data "{name}" from disk.'): fileloader_aux = create_FileLoader(self.get_abs_pathfilename_list( aux_pathfilename_list)) data = fileloader_aux.load_data() return data - def add_data_preparation(self, func): + def add_data_preparation( + self, + func, + ): """Adds the given data preparation function to the dataset. Parameters @@ -817,11 +900,16 @@ def add_data_preparation(self, func): must alter the properties of the DatasetData instance. """ - if(not callable(func)): - raise TypeError('The argument "func" must be a callable object with call signature __call__(data)!') + if not callable(func): + raise TypeError( + 'The argument "func" must be a callable object with call ' + 'signature __call__(data)!') self._data_preparation_functions.append(func) - def remove_data_preparation(self, key=-1): + def remove_data_preparation( + self, + key=-1, + ): """Removes a data preparation function from the dataset. Parameters @@ -839,45 +927,57 @@ def remove_data_preparation(self, key=-1): KeyError If the data preparation function cannot be found. """ - if(isinstance(key, int)): + if isinstance(key, int): n = len(self._data_preparation_functions) - if((key < -n) or (key >= n)): - raise IndexError('The given index (%d) for the data ' - 'preparation function is out of range (%d,%d)!'%( - key, -n, n-1)) + if (key < -n) or (key >= n): + raise IndexError( + f'The given index ({key}) for the data preparation ' + f'function is out of range ({-n},{n-1})!') del self._data_preparation_functions[key] return - elif(isinstance(key, str)): - for (i,func) in enumerate(self._data_preparation_functions): - if(func.__name__ == key): + elif isinstance(key, str): + for (i, func) in enumerate(self._data_preparation_functions): + if func.__name__ == key: del self._data_preparation_functions[i] return - raise KeyError('The data preparation function "%s" was not found ' - 'in the dataset "%s"!'%(key, self._name)) + raise KeyError( + f'The data preparation function "{key}" was not found in the ' + f'dataset "{self._name}"!') - TypeError('The key argument must be an instance of int or str!') + TypeError( + 'The key argument must be an instance of int or str!') - def prepare_data(self, data, tl=None): + def prepare_data( + self, + data, + tl=None, + ): """Prepares the data by calling the data preparation callback functions of this dataset. Parameters ---------- - data : DatasetData instance - The DatasetData instance holding the data. - tl : TimeLord instance | None - The TimeLord instance that should be used to time the data + data : instance of DatasetData + The instance of DatasetData holding the data. + tl : instance of TimeLord | None + The instance TimeLord that should be used to time the data preparation. """ for data_prep_func in self._data_preparation_functions: - task = 'Preparing data of dataset "'+self.name+'" by '\ - '"'+data_prep_func.__name__+'".' - with TaskTimer(tl, task): + with TaskTimer( + tl, + f'Preparing data of dataset "{self.name}" by ' + f'"{data_prep_func.__name__}".'): data_prep_func(data) def load_and_prepare_data( - self, livetime=None, keep_fields=None, compress=False, - efficiency_mode=None, tl=None): + self, + livetime=None, + keep_fields=None, + compress=False, + efficiency_mode=None, + tl=None, + ): """Loads and prepares the experimental and monte-carlo data of this dataset by calling its ``load_data`` and ``prepare_data`` methods. After loading the data it drops all unnecessary data fields if they are @@ -905,39 +1005,39 @@ def load_and_prepare_data( The efficiency mode the data should get loaded with. Possible values are: - - 'memory': + ``'memory'`` The data will be load in a memory efficient way. This will require more time, because all data records of a file will be loaded sequentially. - - 'time' + ``'time'`` The data will be loaded in a time efficient way. This will require more memory, because each data file gets loaded in memory at once. The default value is ``'time'``. If set to ``None``, the default value will be used. - tl : TimeLord instance | None - The TimeLord instance that should be used to time the data loading - and preparation. + tl : instance of TimeLord | None + The instance of TimeLord that should be used to time the data + loading and preparation. Returns ------- - data : DatasetData - The DatasetData instance holding the experimental and monte-carlo + data : instance of DatasetData + The instance of DatasetData holding the experimental and monte-carlo data. """ - if(keep_fields is None): + if keep_fields is None: keep_fields = list() - elif(not issequenceof(keep_fields, str)): - raise TypeError('The keep_fields argument must be None, or a ' - 'sequence of str!') + elif not issequenceof(keep_fields, str): + raise TypeError( + 'The keep_fields argument must be None, or a sequence of str!') keep_fields = list(keep_fields) dtc_dict = None dtc_except_fields = None - if(compress): - dtc_dict = { np.dtype(np.float64): np.dtype(np.float32) } - dtc_except_fields = [ 'mcweight' ] + if compress: + dtc_dict = {np.dtype(np.float64): np.dtype(np.float32)} + dtc_except_fields = ['mcweight'] data = self.load_data( keep_fields=keep_fields, @@ -950,7 +1050,7 @@ def load_and_prepare_data( self.prepare_data(data, tl=tl) # Drop unrequired data fields. - if(data.exp is not None): + if data.exp is not None: with TaskTimer(tl, 'Cleaning exp data.'): keep_fields_exp = ( CFG['dataset']['analysis_required_exp_field_names'] + @@ -958,7 +1058,7 @@ def load_and_prepare_data( ) data.exp.tidy_up(keep_fields=keep_fields_exp) - if(data.mc is not None): + if data.mc is not None: with TaskTimer(tl, 'Cleaning MC data.'): keep_fields_mc = ( CFG['dataset']['analysis_required_exp_field_names'] + @@ -972,7 +1072,10 @@ def load_and_prepare_data( return data - def add_binning_definition(self, binning): + def add_binning_definition( + self, + binning, + ): """Adds a binning setting to this dataset. Parameters @@ -980,16 +1083,20 @@ def add_binning_definition(self, binning): binning : BinningDefinition The BinningDefinition object holding the binning information. """ - if(not isinstance(binning, BinningDefinition)): - raise TypeError('The "binning" argument must be of type ' - 'BinningDefinition!') - if(binning.name in self._binning_definitions): - raise KeyError('The binning definition "%s" is already defined for ' - 'dataset "%s"!'%(binning.name, self._name)) + if not isinstance(binning, BinningDefinition): + raise TypeError( + 'The "binning" argument must be of type BinningDefinition!') + if binning.name in self._binning_definitions: + raise KeyError( + f'The binning definition "{binning.name}" is already defined ' + f'for dataset "{self._name}"!') self._binning_definitions[binning.name] = binning - def get_binning_definition(self, name): + def get_binning_definition( + self, + name, + ): """Gets the BinningDefinition object for the given binning name. Parameters @@ -999,15 +1106,19 @@ def get_binning_definition(self, name): Returns ------- - binning_definition : BinningDefinition instance - The requested BinningDefinition instance. + binning_definition : instance of BinningDefinition + The requested instance of BinningDefinition. """ - if(name not in self._binning_definitions): - raise KeyError('The given binning name "%s" has not been added to ' - 'the dataset yet!'%(name)) + if name not in self._binning_definitions: + raise KeyError( + f'The given binning name "{name}" has not been added to the ' + 'dataset yet!') return self._binning_definitions[name] - def remove_binning_definition(self, name): + def remove_binning_definition( + self, + name, + ): """Removes the BinningDefinition object from the dataset. Parameters @@ -1016,15 +1127,17 @@ def remove_binning_definition(self, name): The name of the binning definition. """ - if(name not in self._binning_definitions): + if name not in self._binning_definitions: raise KeyError( f'The given binning name "{name}" does not exist in the ' - f'dataset "{self.name}", nothing to remove!' - ) + f'dataset "{self.name}", nothing to remove!') self._binning_definitions.pop(name) - def has_binning_definition(self, name): + def has_binning_definition( + self, + name, + ): """Checks if the dataset has a defined binning definition with the given name. @@ -1038,11 +1151,15 @@ def has_binning_definition(self, name): check : bool True if the binning definition exists, False otherwise. """ - if(name in self._binning_definitions): + if name in self._binning_definitions: return True return False - def define_binning(self, name, binedges): + def define_binning( + self, + name, + binedges, + ): """Defines a binning for ``name``, and adds it as binning definition. Parameters @@ -1055,9 +1172,9 @@ def define_binning(self, name, binedges): Returns ------- - binning : BinningDefinition - The BinningDefinition object which was created and added to this - season. + binning : instance of BinningDefinition + The instance of BinningDefinition which was created and added to + this dataset. """ binning = BinningDefinition(name, binedges) self.add_binning_definition(binning) @@ -1069,50 +1186,64 @@ def replace_binning_definition(self, binning): Parameters ---------- - binning : BinningDefinition instance - The instance of BinningDefinition that will replace the data set's + binning : instance of BinningDefinition + The instance of BinningDefinition that will replace the dataset's BinningDefinition instance of the same name. """ - if(not isinstance(binning, BinningDefinition)): - raise TypeError('The "binning" argument must be of type ' - 'BinningDefinition!') - if(binning.name not in self._binning_definitions): - raise KeyError('The given binning definition "%s" has not been ' - 'added to the dataset yet!'%(binning.name)) + if not isinstance(binning, BinningDefinition): + raise TypeError( + 'The "binning" argument must be of type BinningDefinition!') + if binning.name not in self._binning_definitions: + raise KeyError( + f'The given binning definition "{binning.name}" has not been ' + 'added to the dataset yet!') self._binning_definitions[binning.name] = binning - def add_aux_data_definition(self, name, pathfilenames): + def add_aux_data_definition( + self, + name, + pathfilenames, + ): """Adds the given data files as auxiliary data definition to the dataset. Parameters ---------- name : str - The name of the auxiliary data. The name is used as identifier for - the data within SkyLLH. + The name of the auxiliary data definition. The name is used as + identifier for the data within SkyLLH. pathfilenames : str | sequence of str The file name(s) (including paths) of the data file(s). """ - name = str_cast(name, - 'The name argument must be castable to type str!') - pathfilenames = list_of_cast(str, pathfilenames, + name = str_cast( + name, + 'The name argument must be castable to type str! ' + f'Its current type is {classname(name)}.') + + pathfilenames = list_of_cast( + str, + pathfilenames, 'The pathfilenames argument must be of type str or a sequence ' - 'of str!') + f'of str! Its current type is {classname(pathfilenames)}.') - if(name in self._aux_data_definitions): - raise KeyError('The auxiliary data definition "%s" is already ' - 'defined for dataset "%s"!'%(name, self.name)) + if name in self._aux_data_definitions: + raise KeyError( + f'The auxiliary data definition "{name}" is already defined ' + f'for dataset "{self.name}"!') self._aux_data_definitions[name] = pathfilenames - def get_aux_data_definition(self, name): + def get_aux_data_definition( + self, + name, + ): """Returns the auxiliary data definition from the dataset. Parameters ---------- name : str - The name of the auxiliary data. + The name of the auxiliary data definition. Raises ------ @@ -1122,17 +1253,54 @@ def get_aux_data_definition(self, name): Returns ------- aux_data_definition : list of str - The locations (pathfilenames) of the files defined in the auxiliary data - as auxiliary data definition. + The locations (pathfilenames) of the files defined in the auxiliary + data as auxiliary data definition. """ - - if(not name in self._aux_data_definitions): - raise KeyError('The auxiliary data definition "{}" does not ' - 'exist in dataset "{}"!'.format(name, self.name)) + if name not in self._aux_data_definitions: + raise KeyError( + f'The auxiliary data definition "{name}" does not exist in ' + f'dataset "{self.name}"!') return self._aux_data_definitions[name] - def remove_aux_data_definition(self, name): + def set_aux_data_definition( + self, + name, + pathfilenames, + ): + """Sets the files of the auxiliary data definition, which has the given + name. + + Parameters + ---------- + name : str + The name of the auxiliary data definition. + pathfilenames : str | sequence of str + The file name(s) (including paths) of the data file(s). + """ + name = str_cast( + name, + 'The name argument must be castable to type str! ' + f'Its current type is {classname(name)}.') + + pathfilenames = list_of_cast( + str, + pathfilenames, + 'The pathfilenames argument must be of type str or a sequence ' + f'of str! Its current type is {classname(pathfilenames)}.') + + if name not in self._aux_data_definitions: + raise KeyError( + f'The auxiliary data definition "{name}" is not defined ' + f'for dataset "{self.name}"! Use add_aux_data_definition ' + 'instead!') + + self._aux_data_definitions[name] = pathfilenames + + def remove_aux_data_definition( + self, + name, + ): """Removes the auxiliary data definition from the dataset. Parameters @@ -1140,15 +1308,18 @@ def remove_aux_data_definition(self, name): name : str The name of the dataset that should get removed. """ - if(name not in self._aux_data_definitions): + if name not in self._aux_data_definitions: raise KeyError( f'The auxiliary data definition "{name}" does not exist in ' - f'dataset "{self.name}", nothing to remove!' - ) + f'dataset "{self.name}", nothing to remove!') self._aux_data_definitions.pop(name) - def add_aux_data(self, name, data): + def add_aux_data( + self, + name, + data, + ): """Adds the given data as auxiliary data to this data set. Parameters @@ -1163,16 +1334,21 @@ def add_aux_data(self, name, data): KeyError If auxiliary data is already stored under the given name. """ - name = str_cast(name, + name = str_cast( + name, 'The name argument must be castable to type str!') - if(name in self._aux_data): - raise KeyError('The auxiliary data "%s" is already defined for ' - 'dataset "%s"!'%(name, self.name)) + if name in self._aux_data: + raise KeyError( + f'The auxiliary data "{name}" is already defined for dataset ' + f'"{self.name}"!') self._aux_data[name] = data - def get_aux_data(self, name): + def get_aux_data( + self, + name, + ): """Retrieves the auxiliary data that is stored in this data set under the given name. @@ -1191,16 +1367,21 @@ def get_aux_data(self, name): KeyError If no auxiliary data is stored with the given name. """ - name = str_cast(name, + name = str_cast( + name, 'The name argument must be castable to type str!') - if(name not in self._aux_data): - raise KeyError('The auxiliary data "%s" is not defined for ' - 'dataset "%s"!'%(name, self.name)) + if name not in self._aux_data: + raise KeyError( + f'The auxiliary data "{name}" is not defined for dataset ' + f'"{self.name}"!') return self._aux_data[name] - def remove_aux_data(self, name): + def remove_aux_data( + self, + name, + ): """Removes the auxiliary data that is stored in this data set under the given name. @@ -1209,22 +1390,25 @@ def remove_aux_data(self, name): name : str The name of the dataset that should get removed. """ - if(name not in self._aux_data): + if name not in self._aux_data: raise KeyError( f'The auxiliary data "{name}" is not defined for dataset ' - f'"{self.name}", nothing to remove!' - ) + f'"{self.name}", nothing to remove!') self._aux_data.pop(name) -class DatasetCollection(object): +class DatasetCollection( + object): """The DatasetCollection class describes a collection of different datasets. New datasets can be added via the add-assign operator (+=), which calls the ``add_datasets`` method. """ - def __init__(self, name, description=''): + def __init__( + self, + name, + description=''): """Creates a new DatasetCollection instance. Parameters @@ -1244,10 +1428,12 @@ def name(self): """The name (str) of the dataset collection. """ return self._name + @name.setter def name(self, name): - if(not isinstance(name, str)): - raise TypeError('The name of the dataset collection must be of type str!') + if not isinstance(name, str): + raise TypeError( + 'The name of the dataset collection must be of type str!') self._name = name @property @@ -1255,10 +1441,13 @@ def description(self): """The (longer) description of the dataset collection. """ return self._description + @description.setter def description(self, description): - if(not isinstance(description, str)): - raise TypeError('The description of the dataset collection must be of type str!') + if not isinstance(description, str): + raise TypeError( + 'The description of the dataset collection must be of type ' + 'str!') self._description = description @property @@ -1287,8 +1476,9 @@ def __iadd__(self, ds): """Implementation of the ``self += dataset`` operation to add a Dataset object to this dataset collection. """ - if(not isinstance(ds, Dataset)): - raise TypeError('The dataset object must be a subclass of Dataset!') + if not isinstance(ds, Dataset): + raise TypeError( + 'The dataset object must be a subclass of Dataset!') self.add_datasets(ds) @@ -1298,48 +1488,56 @@ def __str__(self): """Implementation of the pretty string representation of the DatasetCollection instance. It shows the available datasets. """ - lines = 'DatasetCollection "%s"\n'%(self.name) + lines = f'DatasetCollection "{self.name}"\n' lines += "-"*display.PAGE_WIDTH + "\n" lines += "Description:\n" + self.description + "\n" lines += "Available datasets:\n" for name in self.dataset_names: lines += '\n' - lines += display.add_leading_text_line_padding(2, str(self._datasets[name])) + lines += display.add_leading_text_line_padding( + 2, str(self._datasets[name])) return lines - def add_datasets(self, datasets): + def add_datasets( + self, + datasets, + ): """Adds the given Dataset object(s) to this dataset collection. Parameters ---------- - datasets : Dataset | sequence of Dataset - The Dataset object or the sequence of Dataset objects that should be - added to the dataset collection. + datasets : instance of Dataset | sequence of instance of Dataset + The instance of Dataset or the sequence of instance of Dataset that + should be added to the dataset collection. Returns ------- - self : DatasetCollection - This DatasetCollection object in order to be able to chain several - add_dataset calls. + self : instance of DatasetCollection + This instance of DatasetCollection in order to be able to chain + several ``add_datasets`` calls. """ - if(not issequence(datasets)): + if not issequence(datasets): datasets = [datasets] for dataset in datasets: - if(not isinstance(dataset, Dataset)): - raise TypeError('The dataset object must be a sub-class of ' - 'Dataset!') + if not isinstance(dataset, Dataset): + raise TypeError( + 'The dataset object must be a sub-class of Dataset!') - if(dataset.name in self._datasets): - raise KeyError('Dataset "%s" already exists!'%(dataset.name)) + if dataset.name in self._datasets: + raise KeyError( + f'Dataset "{dataset.name}" already exists!') self._datasets[dataset.name] = dataset return self - def remove_dataset(self, name): + def remove_dataset( + self, + name, + ): """Removes the given dataset from the collection. Parameters @@ -1347,13 +1545,17 @@ def remove_dataset(self, name): name : str The name of the dataset that should get removed. """ - if(name not in self._datasets): - raise KeyError('Dataset "%s" is not part of the dataset ' - 'collection "%s", nothing to remove!'%(name, self.name)) + if name not in self._datasets: + raise KeyError( + f'Dataset "{name}" is not part of the dataset collection ' + f'"{self.name}", nothing to remove!') self._datasets.pop(name) - def get_dataset(self, name): + def get_dataset( + self, + name, + ): """Retrieves a Dataset object from this dataset collection. Parameters @@ -1372,15 +1574,19 @@ def get_dataset(self, name): If the data set of the given name is not present in this data set collection. """ - if(name not in self._datasets): + if name not in self._datasets: ds_names = '", "'.join(self.dataset_names) ds_names = '"'+ds_names+'"' - raise KeyError('The dataset "%s" is not part of the dataset ' - 'collection "%s"! Possible dataset names are: %s!'%( - name, self.name, ds_names)) + raise KeyError( + f'The dataset "{name}" is not part of the dataset collection ' + f'"{self.name}"! Possible dataset names are: {ds_names}!') + return self._datasets[name] - def get_datasets(self, names): + def get_datasets( + self, + names, + ): """Retrieves a list of Dataset objects from this dataset collection. Parameters @@ -1399,11 +1605,12 @@ def get_datasets(self, names): If one of the requested data sets is not present in this data set collection. """ - if(not issequence(names)): + if not issequence(names): names = [names] - if(not issequenceof(names, str)): - raise TypeError('The names argument must be an instance of str or ' - 'a sequence of str instances!') + if not issequenceof(names, str): + raise TypeError( + 'The names argument must be an instance of str or a sequence ' + 'of str instances!') datasets = [] for name in names: @@ -1411,7 +1618,10 @@ def get_datasets(self, names): return datasets - def set_exp_field_name_renaming_dict(self, d): + def set_exp_field_name_renaming_dict( + self, + d, + ): """Sets the dictionary with the data field names of the experimental data that needs to be renamed just after loading the data. The dictionary will be set to all added data sets. @@ -1425,7 +1635,10 @@ def set_exp_field_name_renaming_dict(self, d): for (dsname, dataset) in self._datasets.items(): dataset.exp_field_name_renaming_dict = d - def set_mc_field_name_renaming_dict(self, d): + def set_mc_field_name_renaming_dict( + self, + d, + ): """Sets the dictionary with the data field names of the monte-carlo data that needs to be renamed just after loading the data. The dictionary will be set to all added data sets. @@ -1439,7 +1652,11 @@ def set_mc_field_name_renaming_dict(self, d): for (dsname, dataset) in self._datasets.items(): dataset.mc_field_name_renaming_dict = d - def set_dataset_prop(self, name, value): + def set_dataset_prop( + self, + name, + value, + ): """Sets the given property to the given name for all data sets of this data set collection. @@ -1456,12 +1673,17 @@ def set_dataset_prop(self, name, value): If the given property does not exist in the data sets. """ for (dsname, dataset) in self._datasets.items(): - if(not hasattr(dataset, name)): - raise KeyError('The data set "%s" does not have a property ' - 'named "%s"!'%(dsname, name)) + if not hasattr(dataset, name): + raise KeyError( + f'The dataset "{dsname}" does not have a property named ' + f'"{name}"!') setattr(dataset, name, value) - def define_binning(self, name, binedges): + def define_binning( + self, + name, + binedges, + ): """Defines a binning definition and adds it to all the datasets of this dataset collection. @@ -1472,25 +1694,31 @@ def define_binning(self, name, binedges): binedges : sequence The sequence of the bin edges, that should be used for the binning. """ - for (dsname, dataset) in self._datasets.items(): + for dataset in self._datasets.values(): dataset.define_binning(name, binedges) - def add_data_preparation(self, func): + def add_data_preparation( + self, + func, + ): """Adds the data preparation function to all the datasets of this dataset collection. Parameters ---------- func : callable - The object with call signature __call__(data) that will prepare + The object with call signature ``__call__(data)`` that will prepare the data after it was loaded. The argument 'data' is the DatasetData instance holding the experimental and monte-carlo data. This function must alter the properties of the DatasetData instance. """ - for (dsname, dataset) in self._datasets.items(): + for dataset in self._datasets.values(): dataset.add_data_preparation(func) - def remove_data_preparation(self, key=-1): + def remove_data_preparation( + self, + key=-1, + ): """Removes data preparation function from all the datasets of this dataset collection. @@ -1509,17 +1737,26 @@ def remove_data_preparation(self, key=-1): KeyError If the data preparation function cannot be found. """ - for (dsname, dataset) in self._datasets.items(): + for dataset in self._datasets.values(): dataset.remove_data_preparation(key=key) - def update_version_qualifiers(self, verqualifiers): + def update_version_qualifiers( + self, + verqualifiers, + ): """Updates the version qualifiers of all datasets of this dataset collection. """ - for (dsname, dataset) in self._datasets.items(): + for dataset in self._datasets.values(): dataset.update_version_qualifiers(verqualifiers) - def load_data(self, livetime=None, tl=None, ppbar=None): + def load_data( + self, + livetime=None, + tl=None, + ppbar=None, + **kwargs, + ): """Loads the data of all data sets of this data set collection. Parameters @@ -1529,11 +1766,15 @@ def load_data(self, livetime=None, tl=None, ppbar=None): DatasetData instances, otherwise uses the live time from the Dataset instance. If a dictionary of data set names and floats is given, it defines the livetime for the individual data sets. - tl : TimeLord instance | None - The TimeLord instance that should be used to time the data load + tl : instance of TimeLord | None + The instance of TimeLord that should be used to time the data load operation. ppbar : instance of ProgressBar | None The optional parent progress bar. + **kwargs + Additional keyword arguments are passed to the + :meth:`~skyllh.core.dataset.Dataset.load_data` method of the + individual datasets. Returns ------- @@ -1541,35 +1782,43 @@ def load_data(self, livetime=None, tl=None, ppbar=None): The dictionary with the DatasetData instance holding the data of an individual data set as value and the data set's name as key. """ - if(not isinstance(livetime, dict)): + if not isinstance(livetime, dict): livetime_dict = dict() for (dsname, dataset) in self._datasets.items(): livetime_dict[dsname] = livetime livetime = livetime_dict - if(len(livetime) != len(self._datasets)): - raise ValueError('The livetime argument must be None, a single ' - 'float, or a dictionary with %d str:float entries! Currently ' - 'the dictionary has %d entries.'%( - len(self._datasets), len(livetime))) + if len(livetime) != len(self._datasets): + raise ValueError( + 'The livetime argument must be None, a single float, or a ' + f'dictionary with {len(self._datasets)} str:float entries! ' + f'Currently the dictionary has {len(livetime)} entries.') pbar = ProgressBar(len(self._datasets), parent=ppbar).start() data_dict = dict() for (dsname, dataset) in self._datasets.items(): data_dict[dsname] = dataset.load_data( - livetime=livetime[dsname], tl=tl) + livetime=livetime[dsname], + tl=tl, + **kwargs) pbar.increment() pbar.finish() return data_dict -class DatasetData(object): +class DatasetData( + object): """This class provides the container for the actual experimental and - monto-carlo data. It also holds a reference to the Dataset instance, which - holds the data's meta information. + monto-carlo data. """ - def __init__(self, data_exp, data_mc, livetime): + def __init__( + self, + data_exp, + data_mc, + livetime, + **kwargs, + ): """Creates a new DatasetData instance. Parameters @@ -1582,7 +1831,7 @@ def __init__(self, data_exp, data_mc, livetime): livetime : float The integrated livetime in days of the data. """ - super(DatasetData, self).__init__() + super().__init__(**kwargs) self.exp = data_exp self.mc = data_mc @@ -1594,11 +1843,13 @@ def exp(self): This is None, if there is no experimental data available. """ return self._exp + @exp.setter def exp(self, data): - if(data is not None): - if(not isinstance(data, DataFieldRecordArray)): - raise TypeError('The exp property must be an instance of ' + if data is not None: + if not isinstance(data, DataFieldRecordArray): + raise TypeError( + 'The exp property must be an instance of ' 'DataFieldRecordArray!') self._exp = data @@ -1608,11 +1859,13 @@ def mc(self): This is None, if there is no monte-carlo data available. """ return self._mc + @mc.setter def mc(self, data): - if(data is not None): - if(not isinstance(data, DataFieldRecordArray)): - raise TypeError('The mc property must be an instance of ' + if data is not None: + if not isinstance(data, DataFieldRecordArray): + raise TypeError( + 'The mc property must be an instance of ' 'DataFieldRecordArray!') self._mc = data @@ -1622,10 +1875,12 @@ def livetime(self): This is None, if there is no live-time provided. """ return self._livetime + @livetime.setter def livetime(self, lt): - if(lt is not None): - lt = float_cast(lt, + if lt is not None: + lt = float_cast( + lt, 'The livetime property must be castable to type float!') self._livetime = lt @@ -1634,7 +1889,7 @@ def exp_field_names(self): """(read-only) The list of field names present in the experimental data. This is an empty list if there is no experimental data available. """ - if(self._exp is None): + if self._exp is None: return [] return self._exp.field_name_list @@ -1645,7 +1900,10 @@ def mc_field_names(self): return self._mc.field_name_list -def assert_data_format(dataset, data): +def assert_data_format( + dataset, + data, +): """Checks the format of the experimental and monte-carlo data. Raises @@ -1656,76 +1914,91 @@ def assert_data_format(dataset, data): def _get_missing_keys(keys, required_keys): missing_keys = [] for reqkey in required_keys: - if(reqkey not in keys): + if reqkey not in keys: missing_keys.append(reqkey) return missing_keys - if(data.exp is not None): - # Check experimental data keys. + if data.exp is not None: missing_exp_keys = _get_missing_keys( data.exp.field_name_list, CFG['dataset']['analysis_required_exp_field_names']) - if(len(missing_exp_keys) != 0): - raise KeyError('The following data fields are missing for the ' - 'experimental data of dataset "%s": '%(dataset.name)+ + if len(missing_exp_keys) != 0: + raise KeyError( + 'The following data fields are missing for the experimental ' + f'data of dataset "{dataset.name}": ' ', '.join(missing_exp_keys)) - if(data.mc is not None): - # Check monte-carlo data keys. + if data.mc is not None: missing_mc_keys = _get_missing_keys( data.mc.field_name_list, CFG['dataset']['analysis_required_exp_field_names'] + CFG['dataset']['analysis_required_mc_field_names']) - if(len(missing_mc_keys) != 0): - raise KeyError('The following data fields are missing for the ' - 'monte-carlo data of dataset "%s": '%(dataset.name)+ + if len(missing_mc_keys) != 0: + raise KeyError( + 'The following data fields are missing for the monte-carlo ' + f'data of dataset "{dataset.name}": ' ', '.join(missing_mc_keys)) - if(data.livetime is None): - raise ValueError('No livetime was specified for dataset "{}"!'.format( - dataset.name)) + if data.livetime is None: + raise ValueError( + f'No livetime was specified for dataset "{dataset.name}"!') -def remove_events(data_exp, mjds): +def remove_events( + data_exp, + mjds, +): """Utility function to remove events having the specified MJD time stamps. Parameters ---------- - data_exp : numpy record ndarray - The numpy record ndarray holding the experimental data events. + data_exp : instance of DataFieldRecordArray + The instance of DataFieldRecordArray holding the experimental data + events. mjds : float | array of floats The MJD time stamps of the events, that should get removed from the experimental data array. Returns ------- - data_exp : numpy record ndarray - The array holding the experimental data events with the specified events - removed. + data_exp : instance of DataFieldRecordArray + The instance of DataFieldRecordArray holding the experimental data + events with the specified events removed. """ mjds = np.atleast_1d(mjds) + mask = np.zeros((len(data_exp)), dtype=np.bool_) for time in mjds: - mask = data_exp['time'] == time - if(np.sum(mask) > 1): - raise LookupError('The MJD time stamp %f is not unique!'%(time)) - data_exp = data_exp[~mask] + m = data_exp['time'] == time + if np.count_nonzero(m) > 1: + raise LookupError( + f'The MJD time stamp {time} is not unique!') + mask |= m + data_exp = data_exp[~mask] return data_exp + def generate_data_file_root_dir( - default_base_path, default_sub_path_fmt, - version, verqualifiers, - base_path=None, sub_path_fmt=None + default_base_path, + default_sub_path_fmt, + version, + verqualifiers, + base_path=None, + sub_path_fmt=None, ): """Generates the root directory of the data files based on the given base path and sub path format. If base_path is None, default_base_path is used. If sub_path_fmt is None, default_sub_path_fmt is used. - The default_sub_path_fmt and sub_path_fmt arguments can contain the + The ``default_sub_path_fmt`` and ``sub_path_fmt`` arguments can contain the following wildcards: - - '{version:d}' - - '{:d}' + + ``{version:d}`` + The version integer number of the dataset. + ``{:d}`` + The integer number of the specific version qualifier + ``'verqualifiers_key'``. Parameters ---------- @@ -1747,25 +2020,34 @@ def generate_data_file_root_dir( root_dir : str The generated root directory of the data files. """ - if(base_path is None): - if(default_base_path is None): - raise ValueError('The default_base_path argument must not be None, ' - 'when the base_path argument is set to None!') + if base_path is None: + if default_base_path is None: + raise ValueError( + 'The default_base_path argument must not be None, when the ' + 'base_path argument is set to None!') base_path = default_base_path - if(sub_path_fmt is None): + if sub_path_fmt is None: sub_path_fmt = default_sub_path_fmt - fmtdict = dict( [('version', version)] + list(verqualifiers.items()) ) + fmtdict = dict( + [('version', version)] + list(verqualifiers.items()) + ) sub_path = sub_path_fmt.format(**fmtdict) root_dir = os.path.join(base_path, sub_path) return root_dir -def get_data_subset(data, livetime, t_start, t_end): - """Gets DatasetData and Livetime objects with data subsets between the given - time range from t_start to t_end. + +def get_data_subset( + data, + livetime, + t_start, + t_stop, +): + """Gets instance of DatasetData and instance of Livetime with data subsets + between the given time range from ``t_start`` to ``t_stop``. Parameters ---------- @@ -1780,29 +2062,37 @@ def get_data_subset(data, livetime, t_start, t_end): Returns ------- - dataset_data_subset : DatasetData - DatasetData object with subset of the data between the given time range - from t_start to t_end. - livetime_subset : Livetime - Livetime object with subset of the data between the given time range - from t_start to t_end. + data_subset : instance of DatasetData + The instance of DatasetData with subset of the data between the given + time range from ``t_start`` to ``t_stop``. + livetime_subset : instance of Livetime + The instance of Livetime for a subset of the data between the given + time range from ``t_start`` to ``t_stop``. """ - if(not isinstance(data, DatasetData)): - raise TypeError('The "data" argument must be of type DatasetData!') - if(not isinstance(livetime, Livetime)): - raise TypeError('The "livetime" argument must be of type Livetime!') - - exp_slice = np.logical_and(data.exp['time'] >= t_start, - data.exp['time'] < t_end) - mc_slice = np.logical_and(data.mc['time'] >= t_start, - data.mc['time'] < t_end) + if not isinstance(data, DatasetData): + raise TypeError( + 'The "data" argument must be of type DatasetData!') + if not isinstance(livetime, Livetime): + raise TypeError( + 'The "livetime" argument must be of type Livetime!') + + exp_slice = np.logical_and( + data.exp['time'] >= t_start, + data.exp['time'] < t_stop) + mc_slice = np.logical_and( + data.mc['time'] >= t_start, + data.mc['time'] < t_stop) data_exp = data.exp[exp_slice] data_mc = data.mc[mc_slice] - uptime_mjd_intervals_arr = livetime.get_ontime_intervals_between(t_start, t_end) + uptime_mjd_intervals_arr = livetime.get_uptime_intervals_between( + t_start, t_stop) livetime_subset = Livetime(uptime_mjd_intervals_arr) - dataset_data_subset = DatasetData(data_exp, data_mc, livetime_subset.livetime) + data_subset = DatasetData( + data_exp=data_exp, + data_mc=data_mc, + livetime=livetime_subset.livetime) - return (dataset_data_subset, livetime_subset) + return (data_subset, livetime_subset) diff --git a/skyllh/core/debugging.py b/skyllh/core/debugging.py index 11367beaea..a67e187af9 100644 --- a/skyllh/core/debugging.py +++ b/skyllh/core/debugging.py @@ -7,6 +7,10 @@ from skyllh.core.config import CFG +# Initialize the root logger. +logging.root.setLevel(logging.NOTSET) + + def enable_tracing(): """Enables the tracing log debug messages of SkyLLH. """ @@ -25,7 +29,8 @@ def is_tracing_enabled(): return CFG['debugging']['enable_tracing'] -def get_logger(name): +def get_logger( + name): """Retrieves the logger with the given name from the Python logging system. Parameters @@ -43,7 +48,9 @@ def get_logger(name): return logger -def setup_logger(name, log_level): +def setup_logger( + name, + log_level): """Initializes logger with a given name and a log level. Parameters @@ -58,7 +65,11 @@ def setup_logger(name, log_level): logger.setLevel(log_level) -def setup_console_handler(name, log_level=None, log_format=None, stream=None): +def setup_console_handler( + name, + log_level=None, + log_format=None, + stream=None): """Initializes `StreamHandler` for a logger with a given name and sets its handling level. @@ -78,13 +89,13 @@ def setup_console_handler(name, log_level=None, log_format=None, stream=None): """ logger = logging.getLogger(name) - if(log_level is None): + if log_level is None: log_level = logger.level - if(log_format is None): + if log_format is None: log_format = CFG['debugging']['log_format'] - if(stream is None): + if stream is None: stream = sys.stderr # Create and add `StreamHandler` to the logger. @@ -95,7 +106,12 @@ def setup_console_handler(name, log_level=None, log_format=None, stream=None): def setup_file_handler( - name, filename, log_level=None, path=None, log_format=None, mode='a'): + name, + filename, + log_level=None, + path=None, + log_format=None, + mode='a'): """Initializes `FileHandler` for a logger with a given name and sets its handling level. @@ -120,13 +136,13 @@ def setup_file_handler( """ logger = logging.getLogger(name) - if(log_level is None): + if log_level is None: log_level = logger.level - if(path is None): + if path is None: path = CFG['project']['working_directory'] - if(log_format is None): + if log_format is None: log_format = CFG['debugging']['log_format'] pathfilename = os.path.join(path, filename) @@ -136,67 +152,3 @@ def setup_file_handler( fh.setLevel(log_level) fh.setFormatter(logging.Formatter(log_format)) logger.addHandler(fh) - - -class QueueHandler(logging.Handler): - """ - This handler sends events to a queue. Typically, it would be used together - with a multiprocessing Queue to centralise logging to file in one process - (in a multi-process application), so as to avoid file write contention - between processes. - - This code is new in Python 3.2, but this class can be copy pasted into - user code for use with earlier Python versions. - """ - def __init__(self, queue): - """ - Initialise an instance, using the passed queue. - """ - logging.Handler.__init__(self) - self.queue = queue - - def enqueue(self, record): - """ - Enqueue a record. - - The base implementation uses put_nowait. You may want to override - this method if you want to use blocking, timeouts or custom queue - implementations. - """ - self.queue.put_nowait(record) - - def prepare(self, record): - """ - Prepares a record for queuing. The object returned by this method is - enqueued. - - The base implementation formats the record to merge the message - and arguments, and removes unpickleable items from the record - in-place. - - You might want to override this method if you want to convert - the record to a dict or JSON string, or send a modified copy - of the record while leaving the original intact. - """ - # The format operation gets traceback text into record.exc_text - # (if there's exception data), and also puts the message into - # record.message. We can then use this to replace the original - # msg + args, as these might be unpickleable. We also zap the - # exc_info attribute, as it's no longer needed and, if not None, - # will typically not be pickleable. - self.format(record) - record.msg = record.message - record.args = None - record.exc_info = None - return record - - def emit(self, record): - """ - Emit a record. - - Writes the LogRecord to the queue, preparing it for pickling first. - """ - try: - self.enqueue(self.prepare(record)) - except Exception: - self.handleError(record) diff --git a/skyllh/core/detsigyield.py b/skyllh/core/detsigyield.py index 6e4b781859..c3fb837215 100644 --- a/skyllh/core/detsigyield.py +++ b/skyllh/core/detsigyield.py @@ -1,64 +1,66 @@ # -*- coding: utf-8 -*- import abc -import numpy as np -from astropy import units - -from skyllh.core.py import issequenceofsubclass -from skyllh.core.dataset import Dataset, DatasetData -from skyllh.core.livetime import Livetime -from skyllh.physics.source import SourceModel -from skyllh.physics.flux import FluxModel - - -def get_integrated_livetime_in_days(livetime): - """Gets the integrated live-time in days from the given livetime argument. - - Parameters - ---------- - livetime : float | Livetime instance - The live-time in days as float, or an instance of Livetime. - - Returns - ------- - livetime_days : float - The integrated live-time in days. - """ - livetime_days = livetime - if(isinstance(livetime, Livetime)): - livetime_days = livetime.livetime - return livetime_days - - -class DetSigYield(object, metaclass=abc.ABCMeta): +from skyllh.core.py import ( + classname, + issequence, + issequenceof, +) +from skyllh.core.dataset import ( + Dataset, + DatasetData, +) +from skyllh.core.flux_model import ( + FluxModel, +) +from skyllh.core.livetime import ( + Livetime, +) +from skyllh.core.progressbar import ( + ProgressBar, +) +from skyllh.core.types import ( + SourceHypoGroup_t, +) + + +class DetSigYield( + object, + metaclass=abc.ABCMeta, +): """This is the abstract base class for a detector signal yield. - The detector signal yield, Y_s(x_s,p_s), is defined as the expected mean - number of signal events detected by the detector from a given source at - position x_s with flux fit parameters p_s. + The detector signal yield, Y_s(p_s), is defined as the expected mean + number of signal events detected by the detector from a given source with + source parameters p_s. To construct a detector signal yield object, four ingredients are needed: the dataset holding the monte-carlo data events, a signal flux - model, the live time, and an implementation method that knows howto contruct + model, the livetime, and a builder instance that knows how to construct the actual detector yield in an efficient way. In general, the implementation method depends on the detector, the source, the flux model with its flux model's signal parameters, and the dataset. Hence, for a given detector, source, flux model, and dataset, an appropriate implementation method needs to be chosen. """ - - def __init__(self, implmethod, dataset, fluxmodel, livetime): + def __init__( + self, + param_names, + dataset, + fluxmodel, + livetime, + **kwargs, + ): """Constructs a new detector signal yield object. It takes the monte-carlo data events, a flux model of the signal, and the live time to compute the detector signal yield. Parameters ---------- - implmethod : instance of DetSigYieldImplMethod - The implementation method to use for constructing and receiving - the detector signal yield. The appropriate method depends on - the used flux model. + param_names : sequence of str + The sequence of parameter names this detector signal yield depends + on. These are either fixed or floating parameters. dataset : Dataset instance The Dataset instance holding the monte-carlo event data. fluxmodel : FluxModel @@ -66,22 +68,30 @@ def __init__(self, implmethod, dataset, fluxmodel, livetime): livetime : float | Livetime The live-time in days to use for the detector signal yield. """ - super(DetSigYield, self).__init__() + super().__init__(**kwargs) - self.implmethod = implmethod + self.param_names = param_names self.dataset = dataset self.fluxmodel = fluxmodel self.livetime = livetime @property - def implmethod(self): - return self._implmethod - @implmethod.setter - def implmethod(self, method): - if(not isinstance(method, DetSigYieldImplMethod)): - raise TypeError('The implmethod property must be an instance of ' - 'DetSigYieldImplMethod!') - self._implmethod = method + def param_names(self): + """The tuple of parameter names this detector signal yield instance + is a function of. + """ + return self._param_names + + @param_names.setter + def param_names(self, names): + if not issequence(names): + names = [names] + if not issequenceof(names, str): + raise TypeError( + 'The param_names property must be a sequence of str ' + 'instances! ' + f'Its current type is {classname(names)}.') + self._param_names = tuple(names) @property def dataset(self): @@ -89,11 +99,13 @@ def dataset(self): for. """ return self._dataset + @dataset.setter def dataset(self, ds): - if(not isinstance(ds, Dataset)): - raise TypeError('The dataset property must be an instance of ' - 'Dataset!') + if not isinstance(ds, Dataset): + raise TypeError( + 'The dataset property must be an instance of Dataset! ' + f'Its current type is {classname(ds)}.') self._dataset = ds @property @@ -102,11 +114,13 @@ def fluxmodel(self): signal yield. """ return self._fluxmodel + @fluxmodel.setter def fluxmodel(self, model): - if(not isinstance(model, FluxModel)): - raise TypeError('The fluxmodel property must be an instance of ' - 'FluxModel!') + if not isinstance(model, FluxModel): + raise TypeError( + 'The fluxmodel property must be an instance of FluxModel! ' + f'Its current type is {classname(model)}.') self._fluxmodel = model @property @@ -114,172 +128,169 @@ def livetime(self): """The live-time in days. """ return self._livetime + @livetime.setter def livetime(self, lt): - if(not (isinstance(lt, float) or isinstance(lt, Livetime))): - raise TypeError('The livetime property must be of type float or ' - 'an instance of Livetime!') + if not (isinstance(lt, float) or isinstance(lt, Livetime)): + raise TypeError( + 'The livetime property must be of type float or an instance ' + 'of Livetime! ' + f'Its current type is {classname(lt)}.') self._livetime = lt - @property - def n_fitparams(self): - """(read-only) The number of fit parameters this detector signal yield - depends on. - """ - return self._implmethod.n_fitparams - - @property - def fitparam_names(self): - """(read-only) The list of fit parameter names this detector signal - yield depends on. - """ - return self._implmethod.fitparam_names - - def source_to_array(self, source): - """Converts the (sequence of) source(s) into a numpy record array needed - for the __call__ method. This convertion is intrinsic to the - implementation method. + @abc.abstractmethod + def sources_to_recarray( + self, + sources, + ): + """This method is supposed to convert a (list of) source model(s) into + a numpy record array that is understood by the detector signal yield + class. + This is for efficiency reasons only. This way the user code can + pre-convert the list of sources into a numpy record array and cache the + array. + The fields of the array are detector signal yield implementation + dependent, i.e. what kind of sources: point-like source, or extended + source for instance. Because the sources usually don't change their + position in the sky, this has to be done only once. Parameters ---------- - source : SourceModel | sequence of SourceModel - The source model containing the spatial information of the source. + sources : SourceModel | sequence of SourceModel + The source model(s) containing the information of the source(s). Returns ------- - arr : numpy record ndarray - The generated numpy record ndarray holding the spatial information - for each source. + recarr : numpy record ndarray + The generated (N_sources,)-shaped 1D numpy record ndarray holding + the information for each source. """ - return self._implmethod.source_to_array(source) + pass @abc.abstractmethod - def __call__(self, src, src_flux_params): + def __call__( + self, + src_recarray, + src_params_recarray, + ): """Abstract method to retrieve the detector signal yield for the given - sources and source flux parameters. + sources and source parameter values. Parameters ---------- - src : numpy record ndarray - The numpy record array containing the spatial information of the - signal sources. The required fields of this record array are - implementation method dependent. In the most generic case for a - point-like source, it must contain the following three fields: - ra, dec, time. - src_flux_params : numpy record ndarray - The numpy record ndarray containing the flux parameters of the - sources. The flux parameters can be different for the different + src_recarray : (N_sources,)-shaped numpy record ndarray + The numpy record array containing the information of the sources. + The required fields of this record array are implementation + dependent. In the most generic case for a point-like source, it + must contain the following three fields: ra, dec. + src_params_recarray : (N_sources,)-shaped numpy record ndarray + The numpy record ndarray containing the parameter values of the + sources. The parameter values can be different for the different sources. + The record array must contain two fields for each source parameter, + one named with the source's local parameter name + holding the source's local parameter value, and one named + holding the global parameter index plus one for each + source value. For values mapping to non-fit parameters, the index + should be negative. Returns ------- detsigyield : (N_sources,)-shaped 1D ndarray of float The array with the mean number of signal in the detector for each given source. - grads : None | (N_sources,N_fitparams)-shaped 2D ndarray - The gradient of the detector signal yield w.r.t. each fit - parameter for each source. If the detector signal yield depends - on no fit parameter, None is returned. + grads : dict + The dictionary holding the gradient values for each global fit + parameter. The key is the global fit parameter index and the value + is the (N_sources,)-shaped numpy ndarray holding the gradient value + dY_k/dp_s. """ pass -class DetSigYieldImplMethod(object, metaclass=abc.ABCMeta): - """Abstract base class for an implementation method of a detector signal - yield. Via the ``construct_detsigyield`` method it creates a DetSigYield - instance holding the internal objects to calculate the detector signal - yield. +class DetSigYieldBuilder( + object, + metaclass=abc.ABCMeta, +): + """Abstract base class for a builder of a detector signal yield. Via the + ``construct_detsigyield`` method it creates a DetSigYield instance holding + the internal objects to calculate the detector signal yield. """ - def __init__(self, **kwargs): - super(DetSigYieldImplMethod, self).__init__(**kwargs) - - self.supported_sourcemodels = () - self.supported_fluxmodels = () - - @property - def supported_sourcemodels(self): - """The tuple with the SourceModel classes, which are supported by this - detector signal yield implementation method. + def __init__( + self, + **kwargs, + ): + """Constructor. """ - return self._supported_sourcemodels - @supported_sourcemodels.setter - def supported_sourcemodels(self, models): - if(not isinstance(models, tuple)): - raise TypeError('The supported_sourcemodels property must be of ' - 'type tuple!') - if(not issequenceofsubclass(models, SourceModel)): - raise TypeError('The supported_sourcemodels property must be a ' - 'sequence of SourceModel classes!') - self._supported_sourcemodels = models - - @property - def supported_fluxmodels(self): - """The tuple with the FluxModel classes, which are supported by this - detector signal yield implementation method. - """ - return self._supported_fluxmodels - @supported_fluxmodels.setter - def supported_fluxmodels(self, models): - if(not isinstance(models, tuple)): - raise TypeError('The supported_fluxmodels property must be of ' - 'type tuple!') - if(not issequenceofsubclass(models, FluxModel)): - raise TypeError('The supported_fluxmodels property must be a ' - 'sequence of FluxModel instances!') - self._supported_fluxmodels = models - - @property - def n_signal_fitparams(self): - """(read-only) The number of signal fit parameters the detector signal - yield depends on. + super().__init__( + **kwargs) + + def assert_types_of_construct_detsigyield_arguments( + self, + dataset, + data, + shgs, + ppbar, + ): + + """Checks the types of the arguments for the ``construct_detsigyield`` + method. It raises errors if the arguments have the wrong type. """ - return len(self._get_signal_fitparam_names()) - - @property - def signal_fitparam_names(self): - """(read-only) The list of fit parameter names the detector signal - yield depends on. An empty list indicates that it does not depend - on any fit parameter. - """ - return self._get_signal_fitparam_names() - - def _get_signal_fitparam_names(self): - """This method must be re-implemented by the derived class and needs to - return the list of fit parameter names, this detector signal yield - is a function of. If it returns an empty list, the detector signal - yield is independent of any fit parameters. + if not isinstance(dataset, Dataset): + raise TypeError( + 'The dataset argument must be an instance of Dataset! ' + f'Its current type is {classname(dataset)}.') + + if not isinstance(data, DatasetData): + raise TypeError( + 'The data argument must be an instance of DatasetData! ' + f'Its current type is {classname(data)}.') + + if (not isinstance(shgs, SourceHypoGroup_t)) and\ + (not issequenceof(shgs, SourceHypoGroup_t)): + raise TypeError( + 'The shgs argument must be an instance of SourceHypoGroup ' + 'or a sequence of SourceHypoGroup instances!' + f'Its current type is {classname(shgs)}.') + + if ppbar is not None: + if not isinstance(ppbar, ProgressBar): + raise TypeError( + 'The ppbar argument must be an instance of ProgressBar! ' + f'Its current type is {classname(ppbar)}.') + + def get_detsigyield_construction_factory(self): + """This method is supposed to return a callable with the call-signature + + __call__( + dataset, + data, + shgs, + ppbar, + ) + + to construct several DetSigYield instances, one for each provided + source hypo group (i.e. sources and fluxmodel). + The return value of this callable must be a sequence of DetSigYield + instances of the same length as the sequence of ``shgs``. Returns ------- - list of str - The list of the fit parameter names, this detector signal yield - is a function of. By default this method returns an empty list - indicating that the detector signal yield depends on no fit - parameter. + factory : callable | None + This default implementation returns ``None``, indicating that a + factory is not supported by this builder. """ - return [] - - def supports_sourcemodel(self, sourcemodel): - """Checks if the given source model is supported by this detected signal - yield implementation method. - """ - for ssm in self._supported_sourcemodels: - if(isinstance(sourcemodel, ssm)): - return True - return False - - def supports_fluxmodel(self, fluxmodel): - """Checks if the given flux model is supported by this detector signal - yield implementation method. - """ - for sfm in self._supported_fluxmodels: - if(isinstance(fluxmodel, sfm)): - return True - return False + return None @abc.abstractmethod - def construct_detsigyield(self, dataset, data, fluxmodel, livetime): + def construct_detsigyield( + self, + dataset, + data, + shg, + ppbar=None, + ): """Abstract method to construct the DetSigYield instance. This method must be called by the derived class method implementation to ensure the compatibility check of the given flux model with the @@ -287,57 +298,44 @@ def construct_detsigyield(self, dataset, data, fluxmodel, livetime): Parameters ---------- - dataset : Dataset - The Dataset instance holding possible dataset specific settings. - data : DatasetData - The DatasetData instance holding the monte-carlo event data. - fluxmodel : FluxModel - The flux model instance. Must be an instance of FluxModel. - livetime : float | Livetime - The live-time in days to use for the detector signal yield. + dataset : instance of Dataset + The instance of Dataset holding possible dataset specific settings. + data : instance of DatasetData + The instance of DatasetData holding the monte-carlo event data. + shg : instance of SourceHypoGroup + The instance of SourceHypoGroup (i.e. sources and flux model) for + which the detector signal yield should be constructed. + ppbar : instance of ProgressBar | None + The instance of ProgressBar of the optional parent progress bar. Returns ------- - detsigyield : DetSigYield instance + detsigyield : instance of DetSigYield An instance derived from DetSigYield. """ - if(not isinstance(dataset, Dataset)): - raise TypeError('The dataset argument must be an instance of ' - 'Dataset!') - if(not isinstance(data, DatasetData)): - raise TypeError('The data argument must be an instance of ' - 'DatasetData!') - if(not self.supports_fluxmodel(fluxmodel)): - raise TypeError('The DetSigYieldImplMethod "%s" does not support ' - 'the flux model "%s"!'%( - self.__class__.__name__, - fluxmodel.__class__.__name__)) - if((not isinstance(livetime, float)) and - (not isinstance(livetime, Livetime))): - raise TypeError('The livetime argument must be an instance of ' - 'float or Livetime!') - - @abc.abstractmethod - def source_to_array(self, source): - """This method is supposed to convert a (list of) source model(s) into - a numpy record array that is understood by the implementation method. - This is for efficiency reasons only. This way the user code can - pre-convert the list of sources into a numpy record array and cache the - array. - The fields of the array are detector signal yield implementation - dependent, i.e. what kind of sources: point-like source, or extended - source for instance. Because the sources usually don't change their - position in the sky, this has to be done only once. + pass - Parameters - ---------- - source : SourceModel | sequence of SourceModel - The source model containing the spatial information of the source. - Returns - ------- - arr : numpy record ndarray - The generated numpy record ndarray holding the spatial information - for each source. +class NullDetSigYieldBuilder( + DetSigYieldBuilder): + """This class provides a dummy detector signal yield builder, which can + be used for testing purposes, when an actual builder is not required. + """ + def __init__( + self, + **kwargs, + ): + super().__init__( + **kwargs) + + def construct_detsigyield( + self, + *args, + **kwargs, + ): + """Since this is a dummy detector signal yield builder, calling this + method will raise a NotImplementedError! """ - pass + raise NotImplementedError( + f'The {classname(self)} detector signal yield builder cannot ' + 'actually build a DetSigYield instance!') diff --git a/skyllh/core/display.py b/skyllh/core/display.py index 16b2e544d0..27b546b203 100644 --- a/skyllh/core/display.py +++ b/skyllh/core/display.py @@ -40,4 +40,4 @@ def add_leading_text_line_padding(padwidth, text): padded_text : str The text where each line is padded with the given number of whitespaces. """ - return '\n'.join([ ' '*padwidth + line for line in text.split('\n') ]) + return '\n'.join([' '*padwidth + line for line in text.split('\n')]) diff --git a/skyllh/core/event_selection.py b/skyllh/core/event_selection.py new file mode 100644 index 0000000000..4056e22671 --- /dev/null +++ b/skyllh/core/event_selection.py @@ -0,0 +1,1258 @@ +# -*- coding: utf-8 -*- + +import abc +import inspect +import numpy as np +import scipy.sparse + +from skyllh.core.py import ( + classname, + float_cast, + issequenceof, +) +from skyllh.core.source_hypo_grouping import ( + SourceHypoGroupManager, +) +from skyllh.core.source_model import ( + SourceModel, +) +from skyllh.core.timing import ( + TaskTimer, +) +from skyllh.core.utils.coords import ( + angular_separation, +) + + +class EventSelectionMethod( + object, + metaclass=abc.ABCMeta): + """This is the abstract base class for all event selection method classes. + The idea is to pre-select only events that contribute to the likelihood + function, i.e. are more signal than background like. The different methods + are implemented through derived classes of this base class. + """ + + def __init__( + self, + shg_mgr, + **kwargs): + """Creates a new event selection method instance. + + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager | None + The instance of SourceHypoGroupManager that defines the list of + sources, i.e. the list of SourceModel instances. + It can be ``None`` if the event selection method does not depend on + the sources. + """ + super().__init__( + **kwargs) + + self._src_arr = None + + self._shg_mgr = shg_mgr + if self._shg_mgr is not None: + if not isinstance(self._shg_mgr, SourceHypoGroupManager): + raise TypeError( + 'The shg_mgr argument must be None or an instance of ' + 'SourceHypoGroupManager! ' + f'Its current type is {classname(self._shg_mgr)}.') + + # The _src_arr variable holds a numpy record array with the + # necessary source information needed for the event selection + # method. + self._src_arr = self.sources_to_array( + sources=self._shg_mgr.source_list) + + @property + def shg_mgr(self): + """(read-only) The instance of SourceHypoGroupManager, which defines the + list of sources. + """ + return self._shg_mgr + + def __and__(self, other): + """Implements the AND operator (&) for creating an event selection + method, which is the intersection of this event selection method and + another one using the expression ``intersection = self & other``. + + Parameters + ---------- + other : instance of EventSelectionMethod + The instance of EventSelectionMethod that is the other event + selection method. + + Returns + ------- + intersection : instance of IntersectionEventSelectionMethod + The instance of IntersectionEventSelectionMethod that creates the + intersection of this event selection method and the other. + """ + return IntersectionEventSelectionMethod(self, other) + + def change_shg_mgr(self, shg_mgr): + """Changes the SourceHypoGroupManager instance of the event selection + method. This will also recreate the internal source numpy record array. + + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager | None + The new SourceHypoGroupManager instance, that should be used for + this event selection method. + It can be ``None`` if the event selection method does not depend on + the sources. + """ + self._shg_mgr = shg_mgr + self._src_arr = None + + if self._shg_mgr is not None: + if not isinstance(self._shg_mgr, SourceHypoGroupManager): + raise TypeError( + 'The shg_mgr argument must be None or an instance of ' + 'SourceHypoGroupManager! ' + f'Its current type is {classname(self._shg_mgr)}.') + + self._src_arr = self.sources_to_array( + sources=self._shg_mgr.source_list) + + def sources_to_array(self, sources): + """This method is supposed to convert a sequence of SourceModel + instances into a structured numpy ndarray with the source information + in a format that is best understood by the actual event selection + method. + + Parameters + ---------- + sources : sequence of SourceModel + The sequence of source models containing the necessary information + of the source. + + Returns + ------- + arr : numpy record ndarray | None + The generated numpy record ndarray holding the necessary information + for each source. + By default ``None`` is returned. + """ + return None + + @abc.abstractmethod + def select_events( + self, + events, + src_evt_idxs=None, + ret_original_evt_idxs=False, + tl=None): + """This method selects the events, which will contribute to the + log-likelihood ratio function. + + Parameters + ---------- + events : instance of DataFieldRecordArray + The instance of DataFieldRecordArray of length N_events, holding the + events. + src_evt_idxs : 2-tuple of 1d ndarrays of ints | None + The 2-element tuple holding the two 1d ndarrays of int of length + N_values, specifying to which sources the given events belong to. + ret_original_evt_idxs : bool + Flag if the original indices of the selected events should get + returned as well. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to collect + timing information about this method. + + Returns + ------- + selected_events : instance of DataFieldRecordArray + The instance of DataFieldRecordArray of length N_selected_events, + holding the selected events, i.e. a subset of the ``events`` + argument. + (src_idxs, evt_idxs) : 1d ndarrays of ints + The two 1d ndarrays of int of length N_values, holding the indices + of the sources and the selected events. + original_evt_idxs : 1d ndarray of ints + The (N_selected_events,)-shaped numpy ndarray holding the original + indices of the selected events, if ``ret_original_evt_idxs`` is set + to ``True``. + """ + pass + + +class IntersectionEventSelectionMethod( + EventSelectionMethod): + """This class provides an event selection method for the intersection of two + event selection methods. It can be created using the ``&`` operator: + ``evt_sel_method1 & evt_sel_method2``. + """ + def __init__( + self, + evt_sel_method1, + evt_sel_method2, + **kwargs): + """Creates a compounded event selection method of two given event + selection methods. + + Parameters + ---------- + evt_sel_method1 : instance of EventSelectionMethod + The instance of EventSelectionMethod for the first event selection + method. + evt_sel_method2 : instance of EventSelectionMethod + The instance of EventSelectionMethod for the second event selection + method. + """ + super().__init__( + shg_mgr=None, + **kwargs) + + self.evt_sel_method1 = evt_sel_method1 + self.evt_sel_method2 = evt_sel_method2 + + @property + def evt_sel_method1(self): + """The instance of EventSelectionMethod for the first event selection + method. + """ + return self._evt_sel_method1 + + @evt_sel_method1.setter + def evt_sel_method1(self, method): + if not isinstance(method, EventSelectionMethod): + raise TypeError( + 'The evt_sel_method1 property must be an instance of ' + 'EventSelectionMethod!' + f'Its current type is {classname(method)}.') + self._evt_sel_method1 = method + + @property + def evt_sel_method2(self): + """The instance of EventSelectionMethod for the second event selection + method. + """ + return self._evt_sel_method2 + + @evt_sel_method2.setter + def evt_sel_method2(self, method): + if not isinstance(method, EventSelectionMethod): + raise TypeError( + 'The evt_sel_method2 property must be an instance of ' + 'EventSelectionMethod!' + f'Its current type is {classname(method)}.') + self._evt_sel_method2 = method + + def change_shg_mgr(self, shg_mgr): + """Changes the SourceHypoGroupManager instance of the event selection + method. This will call the ``change_shg_mgr`` of the individual event + selection methods. + + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager | None + The new SourceHypoGroupManager instance, that should be used for + this event selection method. + It can be ``None`` if the event selection method does not depend on + the sources. + """ + self._evt_sel_method1.change_shg_mgr(shg_mgr=shg_mgr) + self._evt_sel_method2.change_shg_mgr(shg_mgr=shg_mgr) + + def select_events( + self, + events, + src_evt_idxs=None, + ret_original_evt_idxs=False, + tl=None): + """Selects events by calling the ``select_events`` methods of the + individual event selection methods. + + Parameters + ---------- + events : instance of DataFieldRecordArray + The instance of DataFieldRecordArray holding the events. + src_evt_idxs : 2-tuple of 1d ndarrays of ints | None + The 2-element tuple holding the two 1d ndarrays of int of length + N_values, specifying to which sources the given events belong to. + ret_original_evt_idxs : bool + Flag if the original indices of the selected events should get + returned as well. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to collect + timing information about this method. + + Returns + ------- + selected_events : DataFieldRecordArray + The instance of DataFieldRecordArray holding the selected events, + i.e. a subset of the `events` argument. + (src_idxs, evt_idxs) : 1d ndarrays of ints + The indices of the sources and the selected events. + original_evt_idxs : 1d ndarray of ints + The (N_selected_events,)-shaped numpy ndarray holding the original + indices of the selected events, if ``ret_original_evt_idxs`` is set + to ``True``. + """ + if ret_original_evt_idxs: + (events, src_evt_idxs, org_evt_idxs1) =\ + self._evt_sel_method1.select_events( + events=events, + src_evt_idxs=src_evt_idxs, + ret_original_evt_idxs=True) + + (events, src_evt_idxs, org_evt_idxs2) =\ + self._evt_sel_method2.select_events( + events=events, + src_evt_idxs=src_evt_idxs, + ret_original_evt_idxs=True) + + org_evt_idxs = np.take(org_evt_idxs1, org_evt_idxs2) + + return (events, src_evt_idxs, org_evt_idxs) + + (events, src_evt_idxs) = self._evt_sel_method1.select_events( + events=events, + src_evt_idxs=src_evt_idxs) + + (events, src_evt_idxs) = self._evt_sel_method2.select_events( + events=events, + src_evt_idxs=src_evt_idxs) + + return (events, src_evt_idxs) + + +class AllEventSelectionMethod( + EventSelectionMethod): + """This event selection method selects all events. + """ + def __init__(self, shg_mgr): + """Creates a new event selection method instance. + + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager that defines the list of + sources, i.e. the list of SourceModel instances. For this particular + event selection method it has no meaning, but it is an interface + parameter. + """ + super().__init__( + shg_mgr=shg_mgr) + + def sources_to_array(self, sources): + """Creates the source array from the given list of sources. This event + selection method does not depend on the sources. Hence, ``None`` is + returned. + + Returns + ------- + arr : None + The generated numpy record ndarray holding the necessary information + for each source. Since this event selection method does not depend + on any source, ``None`` is returned. + """ + return None + + def select_events( + self, + events, + src_evt_idxs=None, + ret_original_evt_idxs=False, + tl=None): + """Selects all of the given events. Hence, the returned event array is + the same as the given array. + + Parameters + ---------- + events : instance of DataFieldRecordArray + The instance of DataFieldRecordArray holding the events, for which + the selection method should get applied. + src_evt_idxs : 2-tuple of 1d ndarrays of ints | None + The 2-element tuple holding the two 1d ndarrays of int of length + N_values, specifying to which sources the given events belong to. + ret_original_evt_idxs : bool + Flag if the original indices of the selected events should get + returned as well. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to collect + timing information about this method. + + Returns + ------- + selected_events : DataFieldRecordArray + The instance of DataFieldRecordArray holding the selected events, + i.e. a subset of the `events` argument. + (src_idxs, evt_idxs) : 1d ndarrays of ints + The indices of sources and the selected events. + original_evt_idxs : 1d ndarray of ints + The (N_selected_events,)-shaped numpy ndarray holding the original + indices of the selected events, if ``ret_original_evt_idxs`` is set + to ``True``. + """ + with TaskTimer(tl, 'ESM: Calculate indices of selected events.'): + if src_evt_idxs is None: + n_sources = self.shg_mgr.n_sources + src_idxs = np.repeat(np.arange(n_sources), len(events)) + evt_idxs = np.tile(events.indices, n_sources) + else: + (src_idxs, evt_idxs) = src_evt_idxs + + if ret_original_evt_idxs: + return (events, (src_idxs, evt_idxs), events.indices) + + return (events, (src_idxs, evt_idxs)) + + +class SpatialEventSelectionMethod( + EventSelectionMethod, + metaclass=abc.ABCMeta): + """This abstract base class defines the base class for all spatial event + selection methods. + """ + + def __init__( + self, + shg_mgr, + **kwargs): + """Creates a new event selection method instance. + + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager that defines the list of + sources, i.e. the list of SourceModel instances. + """ + super().__init__( + shg_mgr=shg_mgr, + **kwargs) + + def sources_to_array(self, sources): + """Converts the given sequence of SourceModel instances into a + structured numpy ndarray holding the necessary source information needed + for this event selection method. + + Parameters + ---------- + sources : sequence of SourceModel + The sequence of source models containing the necessary information + of the source. + + Returns + ------- + arr : numpy record ndarray + The generated numpy record ndarray holding the necessary information + for each source. It contains the following data fields: 'ra', 'dec'. + """ + if not issequenceof(sources, SourceModel): + raise TypeError( + 'The sources argument must be a sequence of SourceModel ' + 'instances! ' + f'Its current type is {classname(sources)}.') + + arr = np.empty( + (len(sources),), + dtype=[ + ('ra', np.float64), + ('dec', np.float64) + ], + order='F') + + for (i, src) in enumerate(sources): + arr['ra'][i] = src.ra + arr['dec'][i] = src.dec + + return arr + + +class DecBandEventSectionMethod( + SpatialEventSelectionMethod): + """This event selection method selects events within a declination band + around a list of point-like source positions. + """ + def __init__( + self, + shg_mgr, + delta_angle): + """Creates and configures a spatial declination band event selection + method object. + + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager that defines the list of + sources, i.e. the list of SourceModel instances. + delta_angle : float + The half-opening angle around the source in declination for which + events should get selected. + """ + super().__init__( + shg_mgr=shg_mgr) + + self.delta_angle = delta_angle + + @property + def delta_angle(self): + """The half-opening angle around the source in declination and + right-ascention for which events should get selected. + """ + return self._delta_angle + + @delta_angle.setter + def delta_angle(self, angle): + angle = float_cast( + angle, + 'The delta_angle property must be castable to type float!') + self._delta_angle = angle + + def select_events( + self, + events, + src_evt_idxs=None, + ret_original_evt_idxs=False, + tl=None): + """Selects the events within the declination band. + + Parameters + ---------- + events : instance of DataFieldRecordArray + The instance of DataFieldRecordArray that holds the event data. + The following data fields must exist: + + ``'dec'`` : float + The declination of the event. + + src_evt_idxs : 2-tuple of 1d ndarrays of ints | None + The 2-element tuple holding the two 1d ndarrays of int of length + N_values, specifying to which sources the given events belong to. + ret_original_evt_idxs : bool + Flag if the original indices of the selected events should get + returned as well. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to collect + timing information about this method. + + Returns + ------- + selected_events : instance of DataFieldRecordArray + The instance of DataFieldRecordArray holding only the selected + events. + (src_idxs, evt_idxs) : 1d ndarrays of ints + The indices of sources and the selected events. + original_evt_idxs : 1d ndarray of ints + The (N_selected_events,)-shaped numpy ndarray holding the original + indices of the selected events, if ``ret_original_evt_idxs`` is set + to ``True``. + """ + delta_angle = self._delta_angle + src_arr = self._src_arr + + # Calculates the minus and plus declination around each source and + # bound it to -90deg and +90deg, respectively. + src_dec_minus = np.maximum(-np.pi/2, src_arr['dec'] - delta_angle) + src_dec_plus = np.minimum(src_arr['dec'] + delta_angle, np.pi/2) + + # Determine the mask for the events which fall inside the declination + # window. + # mask_dec is a (N_sources,N_events)-shaped ndarray. + with TaskTimer(tl, 'ESM-DecBand: Calculate mask_dec.'): + mask_dec = ( + (events['dec'] > src_dec_minus[:, np.newaxis]) & + (events['dec'] < src_dec_plus[:, np.newaxis]) + ) + + # Determine the mask for the events that fall inside at least one + # source declination band. + # mask is a (N_events,)-shaped ndarray. + with TaskTimer(tl, 'ESM-DecBand: Calculate mask.'): + mask = np.any(mask_dec, axis=0) + + # Reduce the events according to the mask. + with TaskTimer(tl, 'ESM-DecBand: Create selected_events.'): + # Using an integer indices array for data selection is several + # factors faster than using a boolean array. + selected_events_idxs = events.indices[mask] + selected_events = events[selected_events_idxs] + + # Get selected events indices. + idxs = np.argwhere(mask_dec[:, mask]) + src_idxs = idxs[:, 0] + evt_idxs = idxs[:, 1] + + if ret_original_evt_idxs: + return (selected_events, (src_idxs, evt_idxs), selected_events_idxs) + + return (selected_events, (src_idxs, evt_idxs)) + + +class RABandEventSectionMethod( + SpatialEventSelectionMethod): + """This event selection method selects events within a right-ascension band + around a list of point-like source positions. + """ + def __init__( + self, + shg_mgr, + delta_angle): + """Creates and configures a right-ascension band event selection + method object. + + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager that defines the list of + sources, i.e. the list of SourceModel instances. + delta_angle : float + The half-opening angle around the source in right-ascension for + which events should get selected. + """ + super().__init__( + shg_mgr=shg_mgr) + + self.delta_angle = delta_angle + + @property + def delta_angle(self): + """The half-opening angle around the source in declination and + right-ascention for which events should get selected. + """ + return self._delta_angle + + @delta_angle.setter + def delta_angle(self, angle): + angle = float_cast( + angle, + 'The delta_angle property must be castable to type float!') + self._delta_angle = angle + + def select_events( + self, + events, + src_evt_idxs=None, + ret_original_evt_idxs=False, + tl=None): + """Selects the events within the right-ascention band. + + The solid angle dOmega = dRA * dSinDec = dRA * dDec * cos(dec) is a + function of declination, i.e. for a constant dOmega, the right-ascension + value has to change with declination. + + Parameters + ---------- + events : instance of DataFieldRecordArray + The instance of DataFieldRecordArray that holds the event data. + The following data fields must exist: + + ``'ra'`` : float + The right-ascention of the event. + ``'dec'`` : float + The declination of the event. + + src_evt_idxs : 2-tuple of 1d ndarrays of ints | None + The 2-element tuple holding the two 1d ndarrays of int of length + N_values, specifying to which sources the given events belong to. + ret_original_evt_idxs : bool + Flag if the original indices of the selected events should get + returned as well. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to collect + timing information about this method. + + Returns + ------- + selected_events : instance of DataFieldRecordArray + The instance of DataFieldRecordArray holding only the selected + events. + (src_idxs, evt_idxs) : 1d ndarrays of ints + The indices of the sources and the selected events. + original_evt_idxs : 1d ndarray of ints + The (N_selected_events,)-shaped numpy ndarray holding the original + indices of the selected events, if ``ret_original_evt_idxs`` is set + to ``True``. + """ + delta_angle = self._delta_angle + src_arr = self._src_arr + + # Get the minus and plus declination around the sources. + src_dec_minus = np.maximum(-np.pi/2, src_arr['dec'] - delta_angle) + src_dec_plus = np.minimum(src_arr['dec'] + delta_angle, np.pi/2) + + # Calculate the cosine factor for the largest declination distance from + # the source. We use np.amin here because smaller cosine values are + # larger angles. + # cosfact is a (N_sources,)-shaped ndarray. + cosfact = np.amin(np.cos([src_dec_minus, src_dec_plus]), axis=0) + + # Calculate delta RA, which is a function of declination. + # dRA is a (N_sources,)-shaped ndarray. + dRA_half = np.amin( + [np.repeat(2*np.pi, len(src_arr['ra'])), + np.fabs(delta_angle / cosfact)], axis=0) + + # Calculate the right-ascension distance of the events w.r.t. the + # source. We make sure to use the smaller distance on the circle, thus + # the maximal distance is 180deg, i.e. pi. + # ra_dist is a (N_sources,N_events)-shaped 2D ndarray. + with TaskTimer(tl, 'ESM-RaBand: Calculate ra_dist.'): + ra_dist = np.fabs( + np.mod( + events['ra'] - src_arr['ra'][:, np.newaxis] + np.pi, + 2*np.pi) - np.pi) + + # Determine the mask for the events which fall inside the + # right-ascention window. + # mask_ra is a (N_sources,N_events)-shaped ndarray. + with TaskTimer(tl, 'ESM-RaBand: Calculate mask_ra.'): + mask_ra = ra_dist < dRA_half[:, np.newaxis] + + # Determine the mask for the events that fall inside at least one + # source sky window. + # mask is a (N_events,)-shaped ndarray. + with TaskTimer(tl, 'ESM-RaBand: Calculate mask.'): + mask = np.any(mask_ra, axis=0) + + # Reduce the events according to the mask. + with TaskTimer(tl, 'ESM-RaBand: Create selected_events.'): + # Using an integer indices array for data selection is several + # factors faster than using a boolean array. + selected_events_idxs = events.indices[mask] + selected_events = events[selected_events_idxs] + + # Get selected events indices. + idxs = np.argwhere(mask_ra[:, mask]) + src_idxs = idxs[:, 0] + evt_idxs = idxs[:, 1] + + if ret_original_evt_idxs: + return (selected_events, (src_idxs, evt_idxs), selected_events_idxs) + + return (selected_events, (src_idxs, evt_idxs)) + + +class SpatialBoxEventSelectionMethod( + SpatialEventSelectionMethod): + """This event selection method selects events within a spatial box in + right-ascention and declination around a list of point-like source + positions. + """ + def __init__( + self, + shg_mgr, + delta_angle): + """Creates and configures a spatial box event selection method object. + + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager that defines the list of + sources, i.e. the list of SourceModel instances. + delta_angle : float + The half-opening angle around the source for which events should + get selected. + """ + super().__init__( + shg_mgr=shg_mgr) + + self.delta_angle = delta_angle + + @property + def delta_angle(self): + """The half-opening angle around the source in declination and + right-ascention for which events should get selected. + """ + return self._delta_angle + + @delta_angle.setter + def delta_angle(self, angle): + angle = float_cast( + angle, + 'The delta_angle property must be castable to type float!') + self._delta_angle = angle + + def select_events( + self, + events, + src_evt_idxs=None, + ret_original_evt_idxs=False, + tl=None): + """Selects the events within the spatial box in right-ascention and + declination. + + The solid angle dOmega = dRA * dSinDec = dRA * dDec * cos(dec) is a + function of declination, i.e. for a constant dOmega, the right-ascension + value has to change with declination. + + Parameters + ---------- + events : instance of DataFieldRecordArray + The instance of DataFieldRecordArray that holds the event data. + The following data fields must exist: + + ``'ra'`` : float + The right-ascention of the event. + ``'dec'`` : float + The declination of the event. + + src_evt_idxs : 2-tuple of 1d ndarrays of ints | None + The 2-element tuple holding the two 1d ndarrays of int of length + N_values, specifying to which sources the given events belong to. + ret_original_evt_idxs : bool + Flag if the original indices of the selected events should get + returned as well. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to collect + timing information about this method. + + Returns + ------- + selected_events : instance of DataFieldRecordArray + The instance of DataFieldRecordArray holding only the selected + events. + (src_idxs, evt_idxs) : 1d ndarrays of ints | None + The indices of sources and the selected events. + original_evt_idxs : 1d ndarray of ints + The (N_selected_events,)-shaped numpy ndarray holding the original + indices of the selected events, if ``ret_original_evt_idxs`` is set + to ``True``. + """ + delta_angle = self._delta_angle + src_arr = self._src_arr + n_sources = len(src_arr) + + srcs_ra = src_arr['ra'] + srcs_dec = src_arr['dec'] + + # Get the minus and plus declination around the sources. + src_dec_minus = np.maximum(-np.pi/2, srcs_dec - delta_angle) + src_dec_plus = np.minimum(srcs_dec + delta_angle, np.pi/2) + + # Calculate the cosine factor for the largest declination distance from + # the source. We use np.amin here because smaller cosine values are + # larger angles. + # cosfact is a (N_sources,)-shaped ndarray. + cosfact = np.amin(np.cos([src_dec_minus, src_dec_plus]), axis=0) + + # Calculate delta RA, which is a function of declination. + # dRA is a (N_sources,)-shaped ndarray. + dRA_half = np.amin( + [np.repeat(2*np.pi, n_sources), + np.fabs(delta_angle / cosfact)], axis=0) + + # Determine the mask for the events which fall inside the + # right-ascention window. + # mask_ra is a (N_sources,N_events)-shaped ndarray. + with TaskTimer(tl, 'ESM: Calculate mask_ra.'): + evts_ra = events['ra'] + # Fill in batch sizes of 128 maximum to save memory. + batch_size = 128 + if n_sources > batch_size: + mask_ra = np.zeros((n_sources, len(evts_ra)), dtype=bool) + n_batches = int(np.ceil(n_sources / float(batch_size))) + for bi in range(n_batches): + if bi == n_batches-1: + # We got the last batch of sources. + srcs_slice = slice(bi*batch_size, None) + else: + srcs_slice = slice(bi*batch_size, (bi+1)*batch_size) + + ra_diff = np.fabs( + evts_ra - srcs_ra[srcs_slice][:, np.newaxis]) + ra_mod = np.where( + ra_diff >= np.pi, 2*np.pi - ra_diff, ra_diff) + mask_ra[srcs_slice, :] = ( + ra_mod < dRA_half[srcs_slice][:, np.newaxis] + ) + else: + ra_diff = np.fabs(evts_ra - srcs_ra[:, np.newaxis]) + ra_mod = np.where(ra_diff >= np.pi, 2*np.pi-ra_diff, ra_diff) + mask_ra = ra_mod < dRA_half[:, np.newaxis] + + # Determine the mask for the events which fall inside the declination + # window. + # mask_dec is a (N_sources,N_events)-shaped ndarray. + with TaskTimer(tl, 'ESM: Calculate mask_dec.'): + mask_dec = ( + (events['dec'] > src_dec_minus[:, np.newaxis]) & + (events['dec'] < src_dec_plus[:, np.newaxis]) + ) + + # Determine the mask for the events which fall inside the + # right-ascension and declination window. + # mask_sky is a (N_sources,N_events)-shaped ndarray. + with TaskTimer(tl, 'ESM: Calculate mask_sky.'): + mask_sky = mask_ra & mask_dec + del mask_ra + del mask_dec + + # Determine the mask for the events that fall inside at least one + # source sky window. + # mask is a (N_events,)-shaped ndarray. + with TaskTimer(tl, 'ESM: Calculate mask.'): + mask = np.any(mask_sky, axis=0) + + # Reduce the events according to the mask. + with TaskTimer(tl, 'ESM: Create selected_events.'): + # Using an integer indices array for data selection is several + # factors faster than using a boolean array. + selected_events_idxs = events.indices[mask] + selected_events = events[selected_events_idxs] + + # Get selected events indices. + idxs = np.argwhere(mask_sky[:, mask]) + src_idxs = idxs[:, 0] + evt_idxs = idxs[:, 1] + + if ret_original_evt_idxs: + return (selected_events, (src_idxs, evt_idxs), selected_events_idxs) + + return (selected_events, (src_idxs, evt_idxs)) + + +class PsiFuncEventSelectionMethod( + EventSelectionMethod): + """This event selection method selects events whose psi value, i.e. the + great circle distance of the event to the source, is smaller than the value + of the provided function. + """ + def __init__( + self, + shg_mgr, + psi_name, + func, + axis_name_list): + """Creates a new PsiFuncEventSelectionMethod instance. + + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager that defines the list of + sources, i.e. the list of SourceModel instances. + psi_name : str + The name of the data field that provides the psi value of the event. + func : callable + The function that should get evaluated for each event. The call + signature must be + + ``func(*axis_data)``, + + where ``*axis_data`` is the event data of each required axis. The + number of axes must match the provided axis names through the + ``axis_name_list``. + axis_name_list : list of str + The list of data field names for each axis of the function ``func``. + All field names must be valid field names of the trial data's + DataFieldRecordArray instance. + """ + super().__init__( + shg_mgr=shg_mgr) + + self.psi_name = psi_name + self.func = func + self.axis_name_list = axis_name_list + + n_func_args = len(inspect.signature(self._func).parameters) + if n_func_args < len(self._axis_name_list): + raise TypeError( + 'The func argument must be a callable instance with at least ' + f'{len(self._axis_name_list)} arguments! Its current number ' + f'of arguments is {n_func_args}.') + + n_sources = self.shg_mgr.n_sources + if n_sources != 1: + raise ValueError( + 'The `PsiFuncEventSelectionMethod.select_events` currently ' + 'supports only a single source. It was called with ' + f'{n_sources} sources.') + + @property + def psi_name(self): + """The name of the data field that provides the psi value of the event. + """ + return self._psi_name + + @psi_name.setter + def psi_name(self, name): + if not isinstance(name, str): + raise TypeError( + 'The psi_name property must be an instance of type str! ' + f'Its current type is {classname(name)}.') + self._psi_name = name + + @property + def func(self): + """The function that should get evaluated for each event. The call + signature must be ``func(*axis_data)``, where ``*axis_data`` is the + event data of each required axis. The number of axes must match the + provided axis names through the ``axis_name_list`` property. + """ + return self._func + + @func.setter + def func(self, f): + if not callable(f): + raise TypeError( + 'The func property must be a callable instance! ' + f'Its current type is {classname(f)}.') + self._func = f + + @property + def axis_name_list(self): + """The list of data field names for each axis of the function defined + through the ``func`` property. + """ + return self._axis_name_list + + @axis_name_list.setter + def axis_name_list(self, names): + if not issequenceof(names, str): + raise TypeError( + 'The axis_name_list property must be a sequence of str ' + 'instances! ' + f'Its current type is {classname(names)}.') + self._axis_name_list = list(names) + + def select_events( + self, + events, + src_evt_idxs=None, + ret_original_evt_idxs=False, + tl=None): + """Selects the events whose psi value is smaller than the value of the + predefined function. + + Parameters + ---------- + events : instance of DataFieldRecordArray + The instance of DataFieldRecordArray that holds the event data. + The following data fields must exist: + + : float + The great circle distance of the event with the source. + : float + The name of the axis required for the function ``func`` to be + evaluated. + + src_evt_idxs : 2-tuple of 1d ndarrays of ints | None + The 2-element tuple holding the two 1d ndarrays of int of length + N_values, specifying to which sources the given events belong to. + ret_original_evt_idxs : bool + Flag if the original indices of the selected events should get + returned as well. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to collect + timing information about this method. + + Returns + ------- + selected_events : instance of DataFieldRecordArray + The instance of DataFieldRecordArray holding only the selected + events. + (src_idxs, evt_idxs) : 1d ndarrays of ints + The indices of the sources and the selected events. + original_evt_idxs : 1d ndarray of ints + The (N_selected_events,)-shaped numpy ndarray holding the original + indices of the selected events, if ``ret_original_evt_idxs`` is set + to ``True``. + """ + cls_name = classname(self) + + with TaskTimer(tl, f'{cls_name}: Get psi values.'): + psi = events[self._psi_name] + + with TaskTimer(tl, f'{cls_name}: Get axis data values.'): + func_args = [events[axis] for axis in self._axis_name_list] + + with TaskTimer(tl, f'{cls_name}: Creating mask.'): + mask = psi < self._func(*func_args) + + with TaskTimer(tl, f'{cls_name}: Create selected_events.'): + # Using an integer indices array for data selection is several + # factors faster than using a boolean array. + selected_events_idxs = events.indices[mask] + selected_events = events[selected_events_idxs] + + # Get selected events indices. + idxs = np.argwhere(np.atleast_2d(mask)) + src_idxs = idxs[:, 0] + evt_idxs = idxs[:, 1] + + if ret_original_evt_idxs: + return (selected_events, (src_idxs, evt_idxs), selected_events_idxs) + + return (selected_events, (src_idxs, evt_idxs)) + + +class AngErrOfPsiEventSelectionMethod( + SpatialEventSelectionMethod): + """This event selection method selects events within a spatial box in + right-ascention and declination around a list of point-like source + positions and performs an additional selection of events whose ang_err value + is larger than the value of the provided function at a given psi value. + """ + def __init__( + self, + shg_mgr, + func, + psi_floor=None, + **kwargs): + """Creates and configures a spatial box and psi func event selection + method object. + + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager that defines the list of + sources, i.e. the list of SourceModel instances. + delta_angle : float + The half-opening angle around the source for which events should + get selected. + psi_name : str | None + The name of the data field that provides the psi value of the event. + If set to ``None``, the psi value will be calculated automatically. + func : callable + The function that should get evaluated for each event. The call + signature must be + + ``func(psi)``, + + where ``psi`` is the opening angle between the source and the event. + psi_floor : float | None + The psi func event selection is excluded for events having psi value + below the ``psi_floor``. If None, set it to default 5 degrees. + """ + super().__init__( + shg_mgr=shg_mgr, + **kwargs) + + self.func = func + + if psi_floor is None: + psi_floor = np.deg2rad(5) + self.psi_floor = psi_floor + + @property + def func(self): + """The function that should get evaluated for each event. The call + signature must be ``func(*axis_data)``, where ``*axis_data`` is the + event data of each required axis. The number of axes must match the + provided axis names through the ``axis_name_list`` property. + """ + return self._func + + @func.setter + def func(self, f): + if not callable(f): + raise TypeError( + 'The func property must be a callable instance! ' + f'Its current type is {classname(f)}.') + self._func = f + + @property + def psi_floor(self): + """The psi func event selection is excluded for events having psi value + below the `psi_floor`. + """ + return self._psi_floor + + @psi_floor.setter + def psi_floor(self, psi): + psi = float_cast( + psi, + 'The psi_floor property must be castable to type float!') + self._psi_floor = psi + + def select_events( + self, + events, + src_evt_idxs=None, + ret_original_evt_idxs=False, + tl=None): + """Selects the events within the spatial box in right-ascention and + declination and performs an additional selection of events whose ang_err + value is larger than the value of the provided function at a given psi + value. + + The solid angle dOmega = dRA * dSinDec = dRA * dDec * cos(dec) is a + function of declination, i.e. for a constant dOmega, the right-ascension + value has to change with declination. + + Parameters + ---------- + events : instance of DataFieldRecordArray + The instance of DataFieldRecordArray that holds the event data. + The following data fields must exist: + + ``'ra'`` : float + The right-ascention of the event. + ``'dec'`` : float + The declination of the event. + + src_evt_idxs : 2-tuple of 1d ndarrays of ints | None + The 2-element tuple holding the two 1d ndarrays of int of length + N_values, specifying to which sources the given events belong to. + If set to ``None`` all given events will be considered to for all + sources. + ret_original_evt_idxs : bool + Flag if the original indices of the selected events should get + returned as well. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to collect + timing information about this method. + + Returns + ------- + selected_events : instance of DataFieldRecordArray + The instance of DataFieldRecordArray holding only the selected + events. + (src_idxs, evt_idxs) : 1d ndarrays of ints + The indices of the sources and the selected events. + original_evt_idxs : 1d ndarray of ints + The (N_selected_events,)-shaped numpy ndarray holding the original + indices of the selected events, if ``ret_original_evt_idxs`` is set + to ``True``. + """ + if src_evt_idxs is None: + n_sources = len(self._src_arr) + n_events = len(events) + src_idxs = np.repeat(np.arange(n_sources), n_events) + evt_idxs = np.tile(np.arange(n_events), n_sources) + else: + (src_idxs, evt_idxs) = src_evt_idxs + + # Perform selection based on psi values. + with TaskTimer(tl, 'ESM: Calculate psi values.'): + psi = angular_separation( + ra1=np.take(self._src_arr['ra'], src_idxs), + dec1=np.take(self._src_arr['dec'], src_idxs), + ra2=np.take(events['ra'], evt_idxs), + dec2=np.take(events['dec'], evt_idxs), + ) + + with TaskTimer(tl, 'ESM: Create mask_psi.'): + mask_psi = ( + (events['ang_err'][evt_idxs] >= self._func(psi)) | + (psi < self.psi_floor) + ) + + with TaskTimer(tl, 'ESM: Create selected_events.'): + # Have to define the shape argument in order to not truncate + # the mask in case last events are not selected. + mask_sky = scipy.sparse.csr_matrix( + (mask_psi, (src_idxs, evt_idxs)), + shape=(len(self._src_arr), len(events)) + ).toarray() + mask = np.any(mask_sky, axis=0) + + # Using an integer indices array for data selection is several + # factors faster than using a boolean array. + selected_events_idxs = events.indices[mask] + selected_events = events[selected_events_idxs] + + # Get final selected events indices. + idxs = np.argwhere(mask_sky[:, mask]) + src_idxs = idxs[:, 0] + evt_idxs = idxs[:, 1] + + if ret_original_evt_idxs: + return (selected_events, (src_idxs, evt_idxs), selected_events_idxs) + + return (selected_events, (src_idxs, evt_idxs)) diff --git a/skyllh/core/expectation_maximization.py b/skyllh/core/expectation_maximization.py index c82db849a3..5247dd31e5 100644 --- a/skyllh/core/expectation_maximization.py +++ b/skyllh/core/expectation_maximization.py @@ -1,64 +1,71 @@ import numpy as np from scipy.stats import norm -from skyllh.core.analysis import TimeIntegratedMultiDatasetSingleSourceAnalysis -from skyllh.core.backgroundpdf import BackgroundUniformTimePDF -from skyllh.core.pdf import TimePDF -from skyllh.core.pdfratio import SigOverBkgPDFRatio -from skyllh.core.random import RandomStateService -from skyllh.core.signalpdf import ( - SignalBoxTimePDF, - SignalGaussTimePDF, -) - - -def expectation_em(ns, mu, sigma, t, sob): - """ - Expectation step of expectation maximization. + +def em_expectation_step( + ns, + mu, + sigma, + t, + sob, +): + """Expectation step of expectation maximization algorithm. Parameters ---------- - ns : float | 1d ndarray of float - The number of signal neutrinos, as weight for the gaussian flare. - mu : float | 1d ndarray of float - The mean time of the gaussian flare. - sigma: float | 1d ndarray of float - Sigma of the gaussian flare. - t : 1d ndarray of float - Times of the events. - sob : 1d ndarray of float - The signal over background values of events, or weights of events + ns : instance of ndarray + The (n_flares,)-shaped numpy ndarray holding the number of signal + neutrinos, as weight for each gaussian flare. + mu : instance of ndarray + The (n_flares,)-shaped numpy ndarray holding the mean for each gaussian + flare. + sigma: instance of ndarray + The (n_flares,)-shaped numpy ndarray holding the sigma for each gaussian + flare. + t : instance of ndarray + The (n_events,)-shaped numpy ndarray holding the time of each event. + sob : instance of ndarray + The (n_events,)-shaped numpy ndarray holding the signal-over-background + values of each event. Returns ------- - expectation : list of 1d ndarray of float - Weighted "responsibility" function of each event to belong to the flare. - sum_log_denom : float - Sum of log of denominators. + expectations : instane of ndarray + The (n_flares, n_events)-shaped numpy ndarray holding the expectation + of each flare and event. + llh : float + The log-likelihood value, which is the sum of log of the signal and + background expectations. """ - ns = np.atleast_1d(ns) - mu = np.atleast_1d(mu) - sigma = np.atleast_1d(sigma) + n_flares = len(ns) b_term = (1 - np.cos(10 / 180 * np.pi)) / 2 N = len(t) - e_sig = [] - for i in range(len(ns)): - e_sig.append(norm(loc=mu[i], scale=sigma[i]).pdf(t) * sob * ns[i]) - e_bg = (N - np.sum(ns)) / (np.max(t) - np.min(t)) / b_term - denom = sum(e_sig) + e_bg + e_sig = np.empty((n_flares, N), dtype=np.float64) + for i in range(n_flares): + e_sig[i] = norm(loc=mu[i], scale=sigma[i]).pdf(t) + e_sig[i] *= sob + e_sig[i] *= ns[i] + e_bkg = (N - np.sum(ns)) / (np.max(t) - np.min(t)) / b_term + denom = np.sum(e_sig, axis=0) + e_bkg - return [e / denom for e in e_sig], np.sum(np.log(denom)) + expectations = e_sig / denom + llh = np.sum(np.log(denom)) + return (expectations, llh) -def maximization_em(e_sig, t): - """ - Maximization step of expectation maximization. + +def em_maximization_step( + e, + t, +): + """The maximization step of the expectation maximization algorithm. Parameters ---------- - e_sig : list of 1d ndarray of float - The weights for each event from the expectation step. + e : instance of ndarray + The (n_flares, n_events)-shaped numpy ndarray holding the expectation + for each event and flare. t : 1d ndarray of float The times of each event. @@ -74,79 +81,101 @@ def maximization_em(e_sig, t): mu = [] sigma = [] ns = [] - for i in range(len(e_sig)): - mu.append(np.average(t, weights=e_sig[i])) - sigma.append(np.sqrt(np.average(np.square(t - mu[i]), weights=e_sig[i]))) - ns.append(np.sum(e_sig[i])) + for i in range(e.shape[0]): + mu.append(np.average(t, weights=e[i])) + sigma.append(np.sqrt(np.average(np.square(t - mu[i]), weights=e[i]))) + ns.append(np.sum(e[i])) sigma = [max(1, s) for s in sigma] - return mu, sigma, ns + return (mu, sigma, ns) + +def em_fit( + x, + weights, + n=1, + tol=1.e-200, + iter_max=500, + weight_thresh=0, + initial_width=5000, + remove_x=None, +): + """Perform the expectation maximization fit. -def em_fit(x, weights, n=1, tol=1.e-200, iter_max=500, weight_thresh=0, initial_width=5000, - remove_x=None): - """Run expectation maximization. - Parameters ---------- - x : array[float] - Quantity to run EM on (e.g. the time if EM should find time flares) - weights : - weights for each event (e.g. the signal over background ratio) - fitparams : dict - Dictionary with value for gamma, e.g. {'gamma': 2}. + x : array of float + The quantity to run EM on (e.g. the time if EM should find time flares). + weights : array of float + The weights for each x value (e.g. the signal over background ratio). n : int How many Gaussians flares we are looking for. tol : float - the stopping criteria for expectation maximization. This is the difference in the normalized likelihood over the - last 20 iterations. + The stopping criteria for the expectation maximization. This is the + difference in the normalized likelihood over the last 20 iterations. iter_max : int - The maximum number of iterations, even if stopping criteria tolerance (`tol`) is not yet reached. + The maximum number of iterations, even if stopping criteria tolerance + (``tol``) is not yet reached. weight_thresh : float - Set a minimum threshold for event weights. Events with smaller weights will be removed. + Set a minimum threshold for event weights. Events with smaller weights + will be removed. initial_width : float - Starting width for the gaussian flare in days. + The starting width for the gaussian flare in days. remove_x : float | None Specific x of event that should be removed. - + Returns ------- - Mean, width, normalization factor + mu : list of float + The list of size ``n`` with the determined mean values. + sigma : list of float + The list of size ``n`` with the standard deviation values. + ns : list of float + The list of size ``n`` with the normalization factor values. """ - - if weight_thresh > 0: # remove events below threshold + if weight_thresh > 0: + # Remove events below threshold. for i in range(len(weights)): mask = weights > weight_thresh weights[i] = weights[i][mask] x[i] = x[i][mask] - # in case, remove event if remove_x is not None: + # Remove data point. mask = x == remove_x weights = weights[~mask] x = x[~mask] - # expectation maximization + # Do the expectation maximization. mu = np.linspace(x[0], x[-1], n+2)[1:-1] - sigma = np.ones(n) * initial_width - ns = np.ones(n) * 10 + sigma = np.full((n,), initial_width) + ns = np.full((n,), 10) + llh_diff = 100 llh_old = 0 llh_diff_list = [100] * 20 + # Run until convergence or maximum number of iterations is reached. iteration = 0 - - while iteration < iter_max and llh_diff > tol: # run until convergence or maximum number of iterations + while (iteration < iter_max) and (llh_diff > tol): iteration += 1 - e, logllh = expectation_em(ns, mu, sigma, x, weights) + (e, llh_new) = em_expectation_step( + ns=ns, + mu=mu, + sigma=sigma, + t=x, + sob=weights) - llh_new = np.sum(logllh) tmp_diff = np.abs(llh_old - llh_new) / llh_new llh_diff_list = llh_diff_list[:-1] llh_diff_list.insert(0, tmp_diff) llh_diff = np.max(llh_diff_list) + llh_old = llh_new - mu, sigma, ns = maximization_em(e, x) - return mu, sigma, ns + (mu, sigma, ns) = em_maximization_step( + e=e, + t=x) + + return (mu, sigma, ns) diff --git a/skyllh/core/flux_model.py b/skyllh/core/flux_model.py new file mode 100644 index 0000000000..19d422e627 --- /dev/null +++ b/skyllh/core/flux_model.py @@ -0,0 +1,2443 @@ +# -*- coding: utf-8 -*- + +r"""The `flux_model` module contains classes for different flux models. The +class for the most generic flux model is `FluxModel`, which is an abstract base +class. It describes a mathematical function for the differential flux: + +.. math:: + + \frac{d^4\Phi(\alpha,\delta,E,t | \vec{p}_{\mathrm{s}})}{\mathrm{d}A + \mathrm{d}\Omega \mathrm{d}E \mathrm{d}t} + +""" + +import abc +from astropy import ( + units, +) +import numpy as np +from scipy.integrate import ( + quad, +) +import scipy.special +import scipy.stats + +from skyllh.core import ( + tool, +) +from skyllh.core.config import ( + CFG, +) +from skyllh.core.math import ( + MathFunction, +) +from skyllh.core.model import ( + Model, +) +from skyllh.core.py import ( + classname, + float_cast, +) +from skyllh.core.source_model import ( + IsPointlike, +) + + +class FluxProfile( + MathFunction, + metaclass=abc.ABCMeta): + """The abstract base class for a flux profile math function. + """ + def __init__(self, **kwargs): + super().__init__(**kwargs) + + +class SpatialFluxProfile( + FluxProfile, + metaclass=abc.ABCMeta): + """The abstract base class for a spatial flux profile function. + """ + def __init__( + self, + angle_unit=None, + **kwargs): + """Creates a new SpatialFluxProfile instance. + + Parameters + ---------- + angle_unit : instance of astropy.units.UnitBase | None + The used unit for angles. + If set to ``Ǹone``, the configured default angle unit for fluxes is + used. + """ + super().__init__( + **kwargs) + + self.angle_unit = angle_unit + + @property + def angle_unit(self): + """The set unit of angle used for this spatial flux profile. + If set to ``Ǹone`` the configured default angle unit for fluxes is used. + """ + return self._angle_unit + + @angle_unit.setter + def angle_unit(self, unit): + if unit is None: + unit = CFG['units']['defaults']['fluxes']['angle'] + if not isinstance(unit, units.UnitBase): + raise TypeError( + 'The property angle_unit must be of type ' + 'astropy.units.UnitBase!') + self._angle_unit = unit + + @abc.abstractmethod + def __call__( + self, + ra, + dec, + unit=None): + """This method is supposed to return the spatial profile value for the + given celestrial coordinates. + + Parameters + ---------- + ra : float | 1d numpy ndarray of float + The right-ascention coordinate. + dec : float | 1d numpy ndarray of float + The declination coordinate. + unit : instance of astropy.units.UnitBase | None + The unit of the given celestrial angles. + If ``None``, the set angle unit of this SpatialFluxProfile is + assumed. + + Returns + ------- + values : 1D numpy ndarray + The spatial profile values. + """ + pass + + +class UnitySpatialFluxProfile( + SpatialFluxProfile): + """Spatial flux profile for the constant profile function 1 for any spatial + coordinates. + """ + def __init__( + self, + angle_unit=None, + **kwargs): + """Creates a new UnitySpatialFluxProfile instance. + + Parameters + ---------- + angle_unit : instance of astropy.units.UnitBase | None + The used unit for angles. + If set to ``Ǹone``, the configured default angle unit for fluxes is + used. + """ + super().__init__( + angle_unit=angle_unit, + **kwargs) + + @property + def math_function_str(self): + """(read-only) The string representation of the mathematical function of + this spatial flux profile instance. + """ + return '1' + + def __call__( + self, + ra, + dec, + unit=None): + """Returns 1 as numpy ndarray in same shape as ra and dec. + + Parameters + ---------- + ra : float | 1d numpy ndarray of float + The right-ascention coordinate. + dec : float | 1d numpy ndarray of float + The declination coordinate. + unit : instance of astropy.units.UnitBase | None + The unit of the given celestrial angles. + By the definition of this class this argument is ignored. + + Returns + ------- + values : 1D numpy ndarray + 1 in same shape as ra and dec. + """ + (ra, dec) = np.atleast_1d(ra, dec) + if ra.shape != dec.shape: + raise ValueError( + 'The ra and dec arguments must be of the same shape!') + + return np.ones_like(ra) + + +class PointSpatialFluxProfile( + SpatialFluxProfile): + """Spatial flux profile for a delta function at the celestrical coordinate + (ra, dec). + """ + def __init__( + self, + ra, + dec, + angle_unit=None, + **kwargs): + """Creates a new spatial flux profile for a point at equatorial + coordinate (ra, dec). + + Parameters + ---------- + ra : float | None + The right-ascention of the point. + In case it is None, the evaluation of this spatial flux profile will + return zero, unless evaluated for ra=None. + dec : float | None + The declination of the point. + In case it is None, the evaluation of this spatial flux profile will + return zero, unless evaluated for dec=None. + angle_unit : instance of astropy.units.UnitBase | None + The used unit for angles. + If set to ``Ǹone``, the configured default angle unit for fluxes is + used. + """ + super().__init__( + angle_unit=angle_unit, + **kwargs) + + self.ra = ra + self.dec = dec + + # Define the names of the parameters, which can be updated. + self.param_names = ('ra', 'dec') + + @property + def ra(self): + """The right-ascention of the point. + The unit is the set angle unit of this SpatialFluxProfile instance. + """ + return self._ra + + @ra.setter + def ra(self, v): + v = float_cast( + v, + 'The ra property must be castable to type float!', + allow_None=True) + self._ra = v + + @property + def dec(self): + """The declination of the point. + The unit is the set angle unit of this SpatialFluxProfile instance. + """ + return self._dec + + @dec.setter + def dec(self, v): + v = float_cast( + v, + 'The dec property must be castable to type float!', + allow_None=True) + self._dec = v + + @property + def math_function_str(self): + """(read-only) The string representation of the mathematical function of + this spatial flux profile instance. It is None, if the right-ascention + or declination property is set to None. + """ + if (self._ra is None) or (self._dec is None): + return None + + s = (f'delta(ra-{self._ra:g}{self._angle_unit})*' + f'delta(dec-{self._dec:g}{self._angle_unit})') + + return s + + def __call__( + self, + ra, + dec, + unit=None): + """Returns a numpy ndarray in same shape as ra and dec with 1 if + `ra` equals `self.ra` and `dec` equals `self.dec`, and 0 otherwise. + + Parameters + ---------- + ra : float | 1d numpy ndarray of float + The right-ascention coordinate at which to evaluate the spatial flux + profile. The unit must be the internally used angle unit. + dec : float | 1d numpy ndarray of float + The declination coordinate at which to evaluate the spatial flux + profile. The unit must be the internally used angle unit. + unit : instance of astropy.units.UnitBase | None + The unit of the given celestrial angles. + If set to ``None``, the set angle unit of this SpatialFluxProfile + instance is assumed. + + Returns + ------- + value : 1D numpy ndarray of int8 + A numpy ndarray in same shape as ra and dec with 1 if `ra` + equals `self.ra` and `dec` equals `self.dec`, and 0 otherwise. + """ + (ra, dec) = np.atleast_1d(ra, dec) + if ra.shape != dec.shape: + raise ValueError( + 'The ra and dec arguments must be of the same shape!') + + if (unit is not None) and (unit != self._angle_unit): + angle_unit_conv_factor = unit.to(self._angle_unit) + ra = ra * angle_unit_conv_factor + dec = dec * angle_unit_conv_factor + + value = ( + (ra == self._ra) & + (dec == self._dec) + ).astype(np.int8, copy=False) + + return value + + +class EnergyFluxProfile( + FluxProfile, + metaclass=abc.ABCMeta): + """The abstract base class for an energy flux profile function. + """ + def __init__( + self, + energy_unit=None, + **kwargs): + """Creates a new energy flux profile with a given energy unit to be used + for flux calculation. + + Parameters + ---------- + energy_unit : instance of astropy.units.UnitBase | None + The used unit for energy. + If set to ``None``, the configured default energy unit for fluxes is + used. + """ + super().__init__( + **kwargs) + + # Set the energy unit. + self.energy_unit = energy_unit + + @property + def energy_unit(self): + """The unit of energy used for the flux profile calculation. + """ + return self._energy_unit + + @energy_unit.setter + def energy_unit(self, unit): + if unit is None: + unit = CFG['units']['defaults']['fluxes']['energy'] + if not isinstance(unit, units.UnitBase): + raise TypeError( + 'The property energy_unit must be of type ' + 'astropy.units.UnitBase!') + self._energy_unit = unit + + @abc.abstractmethod + def __call__( + self, + E, + unit=None): + """This method is supposed to return the energy profile value for the + given energy value. + + Parameters + ---------- + E : float | 1d numpy ndarray of float + The energy value for which to retrieve the energy profile value. + unit : instance of astropy.units.UnitBase | None + The unit of the given energy. + If set to ``None``, the set energy unit of this EnergyFluxProfile + is assumed. + + Returns + ------- + values : 1D numpy ndarray of float + The energy profile values for the given energies. + """ + pass + + def get_integral( + self, + E1, + E2, + unit=None, + ): + """This is the default implementation for calculating the integral value + of this energy flux profile in the range ``[E1, E2]``. + + .. note:: + + This implementation utilizes the ``scipy.integrate.quad`` function + to perform a generic numeric integration. Hence, this implementation + is slow and should be reimplemented by the derived class if an + analytic integral form is available. + + Parameters + ---------- + E1 : float | 1d numpy ndarray of float + The lower energy bound of the integration. + E2 : float | 1d numpy ndarray of float + The upper energy bound of the integration. + unit : instance of astropy.units.UnitBase | None + The unit of the given energies. + If set to ``None``, the set energy unit of this EnergyFluxProfile + instance is assumed. + + Returns + ------- + integral : instance of ndarray + The (n,)-shaped numpy ndarray holding the integral values of the + given integral ranges. + """ + E1 = np.atleast_1d(E1) + E2 = np.atleast_1d(E2) + + if (unit is not None) and (unit != self._energy_unit): + time_unit_conv_factor = unit.to(self._energy_unit) + E1 = E1 * time_unit_conv_factor + E2 = E2 * time_unit_conv_factor + + integral = np.empty((len(E1),), dtype=np.float64) + + for (i, (E1_i, E2_i)) in enumerate(zip(E1, E2)): + integral[i] = quad(self, E1_i, E2_i, full_output=True)[0] + + return integral + + +class UnityEnergyFluxProfile( + EnergyFluxProfile): + """Energy flux profile for the constant function 1. + """ + def __init__( + self, + energy_unit=None, + **kwargs): + """Creates a new UnityEnergyFluxProfile instance. + + Parameters + ---------- + energy_unit : instance of astropy.units.UnitBase | None + The used unit for energy. + If set to ``None``, the configured default energy unit for fluxes is + used. + """ + super().__init__( + energy_unit=energy_unit, + **kwargs) + + @property + def math_function_str(self): + """(read-only) The string representation of the mathematical function of + this energy flux profile. + """ + return '1' + + def __call__( + self, + E, + unit=None): + """Returns 1 as numpy ndarray in some shape as E. + + Parameters + ---------- + E : float | 1D numpy ndarray of float + The energy value for which to retrieve the energy profile value. + unit : instance of astropy.units.UnitBase | None + The unit of the given energies. + By definition of this specific class, this argument is ignored. + + Returns + ------- + values : 1D numpy ndarray of int8 + 1 in same shape as E. + """ + E = np.atleast_1d(E) + + values = np.ones_like(E, dtype=np.int8) + + return values + + def get_integral( + self, + E1, + E2, + unit=None): + """Computes the integral of this energy flux profile in the range + [``E1``, ``E2``], which by definition is ``E2 - E1``. + + Parameters + ---------- + E1 : float | 1d numpy ndarray of float + The lower energy bound of the integration. + E2 : float | 1d numpy ndarray of float + The upper energy bound of the integration. + unit : instance of astropy.units.UnitBase | None + The unit of the given energies. + If set to ``None``, the set energy unit of this EnergyFluxProfile + instance is assumed. + + Returns + ------- + integral : 1d ndarray of float + The integral values of the given integral ranges. + """ + E1 = np.atleast_1d(E1) + E2 = np.atleast_1d(E2) + + if (unit is not None) and (unit != self._energy_unit): + time_unit_conv_factor = unit.to(self._energy_unit) + E1 = E1 * time_unit_conv_factor + E2 = E2 * time_unit_conv_factor + + integral = E2 - E1 + + return integral + + +class PowerLawEnergyFluxProfile( + EnergyFluxProfile): + r"""Energy flux profile for a power law profile with a reference energy + ``E0`` and a spectral index ``gamma``. + + .. math:: + + (E / E_0)^{-\gamma} + + """ + def __init__( + self, + E0, + gamma, + energy_unit=None, + **kwargs): + """Creates a new power law flux profile with the reference energy ``E0`` + and spectral index ``gamma``. + + Parameters + ---------- + E0 : castable to float + The reference energy. + gamma : castable to float + The spectral index. + energy_unit : instance of astropy.units.UnitBase | None + The used unit for energy. + If set to ``None``, the configured default energy unit for fluxes is + used. + """ + super().__init__( + energy_unit=energy_unit, + **kwargs) + + self.E0 = E0 + self.gamma = gamma + + # Define the parameters which can be set via the `set_params` + # method. + self.param_names = ('E0', 'gamma',) + + @property + def E0(self): + """The reference energy in the set energy unit of this EnergyFluxProfile + instance. + """ + return self._E0 + + @E0.setter + def E0(self, v): + v = float_cast( + v, + 'Property E0 must be castable to type float!') + self._E0 = v + + @property + def gamma(self): + """The spectral index. + """ + return self._gamma + + @gamma.setter + def gamma(self, v): + v = float_cast( + v, + 'Property gamma must be castable to type float!') + self._gamma = v + + @property + def math_function_str(self): + """(read-only) The string representation of this energy flux profile + instance. + """ + s = f'(E / ({self._E0:g} {self._energy_unit}))^-{self._gamma:g}' + + return s + + def __call__( + self, + E, + unit=None): + """Returns the power law values for the given energies as numpy ndarray + in same shape as E. + + Parameters + ---------- + E : float | 1D numpy ndarray of float + The energy value for which to retrieve the energy profile value. + unit : instance of astropy.units.UnitBase | None + The unit of the given energies. + If set to ``None``, the set energy unit of this EnergyFluxProfile + instance is assumed. + + Returns + ------- + values : 1D numpy ndarray of float + The energy profile values for the given energies. + """ + E = np.atleast_1d(E) + + if (unit is not None) and (unit != self._energy_unit): + E = E * unit.to(self._energy_unit) + + value = np.power(E / self._E0, -self._gamma) + + return value + + def get_integral( + self, + E1, + E2, + unit=None): + """Computes the integral value of this power-law energy flux profile in + the range ``[E1, E2]``. + + Parameters + ---------- + E1 : float | 1d numpy ndarray of float + The lower energy bound of the integration. + E2 : float | 1d numpy ndarray of float + The upper energy bound of the integration. + unit : instance of astropy.units.UnitBase | None + The unit of the given energies. + If set to ``None``, the set energy unit of this EnergyFluxProfile + instance is assumed. + + Returns + ------- + integral : 1d ndarray of float + The integral values of the given integral ranges. + """ + E1 = np.atleast_1d(E1) + E2 = np.atleast_1d(E2) + + if (unit is not None) and (unit != self._energy_unit): + time_unit_conv_factor = unit.to(self._energy_unit) + E1 = E1 * time_unit_conv_factor + E2 = E2 * time_unit_conv_factor + + gamma = self._gamma + + # Handle special case for gamma = 1. + if gamma == 1: + integral = self._E0 * np.log(E2/E1) + return integral + + integral = ( + np.power(self._E0, gamma) / (1-gamma) * + (np.power(E2, 1-gamma) - np.power(E1, 1-gamma)) + ) + + return integral + + +class CutoffPowerLawEnergyFluxProfile( + PowerLawEnergyFluxProfile +): + r"""Cut-off power law energy flux profile of the form + + .. math:: + + (E / E_0)^{-\gamma} \exp(-E/E_{\mathrm{cut}}) + + """ + def __init__( + self, + E0, + gamma, + Ecut, + energy_unit=None, + **kwargs, + ): + """Creates a new cut-off power law flux profile with the reference + energy ``E0``, spectral index ``gamma``, and cut-off energy ``Ecut``. + + Parameters + ---------- + E0 : castable to float + The reference energy. + gamma : castable to float + The spectral index. + Ecut : castable to float + The cut-off energy. + energy_unit : instance of astropy.units.UnitBase | None + The used unit for energy. + If set to ``None``, the configured default energy unit for fluxes is + used. + """ + super().__init__( + E0=E0, + gamma=gamma, + energy_unit=energy_unit, + **kwargs) + + self.Ecut = Ecut + + @property + def Ecut(self): + """The energy cut value. + """ + return self._Ecut + + @Ecut.setter + def Ecut(self, v): + v = float_cast( + v, + 'The Property Ecut must be castable to type float!') + self._Ecut = v + + @property + def math_function_str(self): + """(read-only) The string representation of this energy flux profile + instance. + """ + s = (f'(E / ({self._E0:g} {self._energy_unit}))^-{self._gamma:g} ' + f'exp(-E / ({self._Ecut:g} {self._energy_unit}))') + + return s + + def __call__( + self, + E, + unit=None, + ): + """Returns the cut-off power law values for the given energies as + numpy ndarray in the same shape as E. + + Parameters + ---------- + E : float | instance of numpy ndarray + The energy value(s) for which to retrieve the energy profile value. + unit : instance of astropy.units.UnitBase | None + The unit of the given energies. + If set to ``None``, the set energy unit of this EnergyFluxProfile + instance is assumed. + + Returns + ------- + values : instance of numpy ndarray + The energy profile values for the given energies. + """ + E = np.atleast_1d(E) + + if (unit is not None) and (unit != self._energy_unit): + E = E * unit.to(self._energy_unit) + + values = super().__call__(E=E, unit=None) + values *= np.exp(-E / self._Ecut) + + return values + + +class LogParabolaPowerLawEnergyFluxProfile( + PowerLawEnergyFluxProfile +): + r"""This class provides an energy flux profile for a power-law with a + spectral index that varies as a log parabola in energy of the form + + .. math:: + + \frac{E}{E_0}^{-\left(\alpha + \beta\log(\frac{E}{E_0})\right)} + + """ + def __init__( + self, + E0, + alpha, + beta, + energy_unit=None, + **kwargs, + ): + super().__init__( + E0=E0, + gamma=np.nan, + energy_unit=energy_unit, + **kwargs) + + self.alpha = alpha + self.beta = beta + + @property + def alpha(self): + """The alpha parameter of the log-parabola spectral index. + """ + return self._alpha + + @alpha.setter + def alpha(self, v): + v = float_cast( + v, + 'Property alpha must be castable to type float!') + self._alpha = v + + @property + def beta(self): + """The beta parameter of the log-parabola spectral index. + """ + return self._beta + + @beta.setter + def beta(self, v): + v = float_cast( + v, + 'Property beta must be castable to type float!') + self._beta = v + + @property + def math_function_str(self): + """(read-only) The string representation of this energy flux profile + instance. + """ + s_E0 = f'{self._E0:g} {self._energy_unit}' + s = ( + f'(E / {s_E0})' + f'^(-({self._alpha:g} + {self._beta:g} log(E / {s_E0})))' + ) + + return s + + def __call__( + self, + E, + unit=None, + ): + """Returns the log-parabola power-law values for the given energies as + numpy ndarray in the same shape as E. + + Parameters + ---------- + E : float | instance of numpy ndarray + The energy value(s) for which to retrieve the energy profile value. + unit : instance of astropy.units.UnitBase | None + The unit of the given energies. + If set to ``None``, the set energy unit of this EnergyFluxProfile + instance is assumed. + + Returns + ------- + values : instance of numpy ndarray + The energy profile values for the given energies. + """ + E = np.atleast_1d(E) + + if (unit is not None) and (unit != self._energy_unit): + E = E * unit.to(self._energy_unit) + + values = np.power( + E / self._E0, + -self._alpha - self._beta * np.log(E / self._E0) + ) + + return values + + +class PhotosplineEnergyFluxProfile( + EnergyFluxProfile, +): + """The abstract base class for an energy flux profile based on a + photospline. + """ + @tool.requires('photospline') + def __init__( + self, + splinetable, + crit_log10_energy_lower, + crit_log10_energy_upper, + energy_unit=None, + **kwargs, + ): + """Creates a new instance of PhotosplineEnergyFluxProfile. + + Parameters + ---------- + splinetable : instance of photospline.SplineTable + The instance of photospline.SplineTable representing the energy flux + profile as a spline. + crit_log10_energy_lower : float + The lower edge of the spline's supported energy range in log10(E). + crit_log10_energy_upper : float + The upper edge of the spline's supported energy range in log10(E). + energy_unit : instance of astropy.units.UnitBase | None + The used unit for energy. + If set to ``None``, the configured default energy unit for fluxes is + used. + """ + super().__init__( + energy_unit=energy_unit, + **kwargs) + + self.photospline = tool.get('photospline') + + self.splinetable = splinetable + self.crit_log10_energy_lower = crit_log10_energy_lower + self.crit_log10_energy_upper = crit_log10_energy_upper + + @property + def splinetable(self): + """The instance of photospline.SplineTable that describes the neutrino + energy flux profile as function of neutrino energy via B-spline + interpolation. + """ + return self._splinetable + + @splinetable.setter + def splinetable(self, table): + if not isinstance(table, self.photospline.SplineTable): + raise TypeError( + 'The splinetable property must be an instance of ' + 'photospline.SplineTable! ' + f'Its current type is {classname(table)}!') + self._splinetable = table + + @property + def crit_log10_energy_lower(self): + """The lower energy bound of the spline's support. + """ + return self._crit_log10_energy_lower + + @crit_log10_energy_lower.setter + def crit_log10_energy_lower(self, v): + v = float_cast( + v, + 'The property crit_log10_energy_lower must be castable to type ' + 'float!') + self._crit_log10_energy_lower = v + + @property + def crit_log10_energy_upper(self): + """The upper energy bound of the spline's support. + """ + return self._crit_log10_energy_upper + + @crit_log10_energy_upper.setter + def crit_log10_energy_upper(self, v): + v = float_cast( + v, + 'The property crit_log10_energy_upper must be castable to type ' + 'float!') + self._crit_log10_energy_upper = v + + +class TimeFluxProfile( + FluxProfile, + metaclass=abc.ABCMeta): + """The abstract base class for a time flux profile function. + """ + def __init__( + self, + t_start=-np.inf, + t_stop=np.inf, + time_unit=None, + **kwargs): + """Creates a new time flux profile instance. + + Parameters + ---------- + t_start : float + The start time of the time profile. + If set to -inf, it means, that the profile starts at the beginning + of the entire time-span of the dataset. + t_stop : float + The stop time of the time profile. + If set to +inf, it means, that the profile ends at the end of the + entire time-span of the dataset. + time_unit : instance of astropy.units.UnitBase | None + The used unit for time. + If set to ``None``, the configured default time unit for fluxes is + used. + """ + super().__init__( + **kwargs) + + self.time_unit = time_unit + + self.t_start = t_start + self.t_stop = t_stop + + # Define the parameters which can be set via the `set_params` + # method. + self.param_names = ('t_start', 't_stop') + + @property + def t_start(self): + """The start time of the time profile. Can be -inf which means, that + the profile starts at the beginning of the entire dataset. + """ + return self._t_start + + @t_start.setter + def t_start(self, t): + t = float_cast( + t, + 'The t_start property must be castable to type float! ' + f'Its current type is {classname(t)}!') + self._t_start = t + + @property + def t_stop(self): + """The stop time of the time profile. Can be +inf which means, that + the profile ends at the end of the entire dataset. + """ + return self._t_stop + + @t_stop.setter + def t_stop(self, t): + t = float_cast( + t, + 'The t_stop property must be castable to type float! ' + f'Its current type is {classname(t)}!') + self._t_stop = t + + @property + def duration(self): + """(read-only) The duration of the time profile. + """ + return self._t_stop - self._t_start + + @property + def time_unit(self): + """The unit of time used for the flux profile calculation. + """ + return self._time_unit + + @time_unit.setter + def time_unit(self, unit): + if unit is None: + unit = CFG['units']['defaults']['fluxes']['time'] + if not isinstance(unit, units.UnitBase): + raise TypeError( + 'The property time_unit must be of type ' + 'astropy.units.UnitBase! ' + f'Its current type is {classname(unit)}!') + self._time_unit = unit + + def get_total_integral(self): + """Calculates the total integral of the time profile from t_start to + t_stop. + + Returns + ------- + integral : float + The integral value of the entire time profile. + The value is in the set time unit of this TimeFluxProfile instance. + """ + integral = self.get_integral(self._t_start, self._t_stop).squeeze() + + return integral + + @abc.abstractmethod + def __call__( + self, + t, + unit=None): + """This method is supposed to return the time profile value for the + given times. + + Parameters + ---------- + t : float | 1D numpy ndarray of float + The time(s) for which to get the time flux profile values. + unit : instance of astropy.units.UnitBase | None + The unit of the given times. + If set to ``None``, the set time unit of this TimeFluxProfile + instance is assumed. + + Returns + ------- + values : 1D numpy ndarray of float + The time profile values. + """ + pass + + @abc.abstractmethod + def move( + self, + dt, + unit=None): + """Abstract method to move the time profile by the given amount of time. + + Parameters + ---------- + dt : float + The time difference of how far to move the time profile in time. + This can be a positive or negative time shift value. + unit : instance of astropy.units.UnitBase | None + The unit of the given time difference. + If set to ``Ǹone``, the set time unit of this TimeFluxProfile + instance is assumed. + """ + pass + + @abc.abstractmethod + def get_integral( + self, + t1, + t2, + unit=None): + """This method is supposed to calculate the integral of the time profile + from time ``t1`` to time ``t2``. + + Parameters + ---------- + t1 : float | array of float + The start time of the integration. + t2 : float | array of float + The end time of the integration. + unit : instance of astropy.units.UnitBase | None + The unit of the given times. + If set to ``Ǹone``, the set time unit of this TimeFluxProfile + instance is assumed. + + Returns + ------- + integral : array of float + The integral value(s) of the time profile. The values are in the + set time unit of this TimeFluxProfile instance. + """ + pass + + +class UnityTimeFluxProfile( + TimeFluxProfile): + """Time flux profile for the constant profile function ``1``. + """ + def __init__(self, time_unit=None, **kwargs): + super().__init__( + time_unit=time_unit, + **kwargs) + + @property + def math_function_str(self): + return '1' + + def __call__(self, t, unit=None): + """Returns 1 as numpy ndarray in same shape as t. + + Parameters + ---------- + t : float | 1D numpy ndarray of float + The time(s) for which to get the time flux profile values. + unit : instance of astropy.units.UnitBase | None + The unit of the given times. + By definition of this specific class, this argument is ignored. + + Returns + ------- + values : 1D numpy ndarray of int8 + 1 in same shape as ``t``. + """ + t = np.atleast_1d(t) + + values = np.ones_like(t, dtype=np.int8) + + return values + + def move(self, dt, unit=None): + """Moves the time profile by the given amount of time. By definition + this method does nothing, because the profile is 1 over the entire + dataset time range. + + Parameters + ---------- + dt : float + The time difference of how far to move the time profile in time. + This can be a positive or negative time shift value. + unit : instance of astropy.units.UnitBase | None + The unit of the given time difference. + If set to ``None``, the set time unit of this TimeFluxProfile + instance is assumed. + """ + pass + + def get_integral(self, t1, t2, unit=None): + """Calculates the integral of the time profile from time t1 to time t2. + + Parameters + ---------- + t1 : float | array of float + The start time of the integration. + t2 : float | array of float + The end time of the integration. + unit : instance of astropy.units.UnitBase | None + The unit of the given times. + If set to ``None``, the set time unit of this TimeFluxProfile + instance is assumed. + + Returns + ------- + integral : array of float + The integral value(s) of the time profile. The values are in the + set time unit of this TimeFluxProfile instance. + """ + if (unit is not None) and (unit != self._time_unit): + time_unit_conv_factor = unit.to(self._time_unit) + t1 = t1 * time_unit_conv_factor + t2 = t2 * time_unit_conv_factor + + integral = t2 - t1 + + return integral + + +class BoxTimeFluxProfile( + TimeFluxProfile): + """This class describes a box-shaped time flux profile. + It has the following parameters: + + t0 : float + The mid time of the box profile. + tw : float + The width of the box profile. + + The box is centered at ``t0`` and extends to +/-``tw``/2 around ``t0``. + """ + + @classmethod + def from_start_and_stop_time( + cls, + start, + stop, + time_unit=None, + **kwargs): + """Constructs a BoxTimeFluxProfile instance from the given start and + stop time. + + Parameters + ---------- + start : float + The start time of the box profile. + stop : float + The stop time of the box profile. + time_unit : instance of astropy.units.UnitBase | None + The used unit for time. + If set to ``None``, the configured default time unit for fluxes is + used. + + Returns + ------- + profile : instance of BoxTimeFluxProfile + The newly created instance of BoxTimeFluxProfile. + """ + t0 = 0.5*(start + stop) + tw = stop - start + + profile = cls( + t0=t0, + tw=tw, + time_unit=time_unit, + **kwargs) + + return profile + + def __init__( + self, + t0, + tw, + time_unit=None, + **kwargs): + """Creates a new box-shaped time profile instance. + + Parameters + ---------- + t0 : float + The mid time of the box profile. + tw : float + The width of the box profile. + time_unit : instance of astropy.units.UnitBase | None + The used unit for time. + If set to ``None``, the configured default time unit for fluxes is + used. + """ + t_start = t0 - tw/2. + t_stop = t0 + tw/2. + + super().__init__( + t_start=t_start, + t_stop=t_stop, + time_unit=time_unit, + **kwargs) + + # Define the parameters which can be set via the `set_params` + # method. + self.param_names = ('t0', 'tw') + + @property + def t0(self): + """The time of the mid point of the box. + The value is in the set time unit of this TimeFluxProfile instance. + """ + return 0.5*(self._t_start + self._t_stop) + + @t0.setter + def t0(self, t): + old_t0 = self.t0 + dt = t - old_t0 + self.move(dt) + + @property + def tw(self): + """The time width of the box. + The value is in the set time unit of this TimeFluxProfile instance. + """ + return self._t_stop - self._t_start + + @tw.setter + def tw(self, w): + t0 = self.t0 + self._t_start = t0 - 0.5*w + self._t_stop = t0 + 0.5*w + + @property + def math_function_str(self): + """The string representation of the mathematical function of this + TimeFluxProfile instance. + """ + t0 = self.t0 + tw = self.tw + + s = f'1 for t in [{t0:g}-{tw:g}/2; {t0:g}+{tw:g}/2], 0 otherwise' + + return s + + def __call__( + self, + t, + unit=None): + """Returns 1 for all t within the interval [t0-tw/2; t0+tw/2], and 0 + otherwise. + + Parameters + ---------- + t : float | 1D numpy ndarray of float + The time(s) for which to get the time flux profile values. + unit : instance of astropy.units.UnitBase | None + The unit of the given times. + If set to ``None``, the set time unit of this TimeFluxProfile + instance is assumed. + + Returns + ------- + values : 1D numpy ndarray of int8 + The value(s) of the time flux profile for the given time(s). + """ + t = np.atleast_1d(t) + + if (unit is not None) and (unit != self._time_unit): + t = t * unit.to(self._time_unit) + + values = np.zeros((t.shape[0],), dtype=np.int8) + m = (t >= self._t_start) & (t <= self._t_stop) + values[m] = 1 + + return values + + def cdf( + self, + t, + unit=None): + """Calculates the cumulative distribution function value for the given + time values ``t``. + + Parameters + ---------- + t : float | instance of numpy ndarray + The (N_times,)-shaped numpy ndarray holding the time values for + which to calculate the CDF values. + unit : instance of astropy.units.UnitBase | None + The unit of the given times. + If set to ``None``, the set time unit of this TimeFluxProfile is + assumed. + + Returns + ------- + values : instance of numpy ndarray + The (N_times,)-shaped numpy ndarray holding the cumulative + distribution function values for each time ``t``. + """ + t = np.atleast_1d(t) + + if (unit is not None) and (unit != self._time_unit): + t = t * unit.to(self._time_unit) + + t_start = self._t_start + t_stop = self._t_stop + + values = np.zeros(t.size, dtype=np.float64) + + m = (t_start <= t) & (t <= t_stop) + values[m] = (t[m] - t_start) / (t_stop - t_start) + + m = (t > t_stop) + values[m] = 1 + + return values + + def move( + self, + dt, + unit=None): + """Moves the box-shaped time profile by the time difference dt. + + Parameters + ---------- + dt : float + The time difference of how far to move the time profile in time. + This can be a positive or negative time shift value. + unit : instance of astropy.units.UnitBase | None + The unit of ``dt``. + If set to ``None``, the set time unit of this TimeFluxProfile + instance is assumed. + """ + if (unit is not None) and (unit != self._time_unit): + dt = dt * unit.to(self._time_unit) + + self._t_start += dt + self._t_stop += dt + + def get_integral( + self, + t1, + t2, + unit=None): + """Calculates the integral of the box-shaped time flux profile from + time t1 to time t2. + + Parameters + ---------- + t1 : float | array of float + The start time(s) of the integration. + t2 : float | array of float + The end time(s) of the integration. + unit : instance of astropy.units.UnitBase | None + The unit of the given times. + If set to ``None``, the set time unit of this TimeFluxProfile + instance is assumed. + + Returns + ------- + integral : array of float + The integral value(s). The values are in the set time unit of this + TimeFluxProfile instance. + """ + t1 = np.atleast_1d(t1) + t2 = np.atleast_1d(t2) + + if (unit is not None) and (unit != self._time_unit): + time_unit_conv_factor = unit.to(self._time_unit) + t1 = t1 * time_unit_conv_factor + t2 = t2 * time_unit_conv_factor + + integral = np.zeros((t1.shape[0],), dtype=np.float64) + + m = (t2 >= self._t_start) & (t1 <= self._t_stop) + N = np.count_nonzero(m) + + t1 = np.max(np.vstack((t1[m], np.repeat(self._t_start, N))).T, axis=1) + t2 = np.min(np.vstack((t2[m], np.repeat(self._t_stop, N))).T, axis=1) + + integral[m] = t2 - t1 + + return integral + + +class GaussianTimeFluxProfile( + TimeFluxProfile): + """This class describes a gaussian-shaped time flux profile. + It has the following parameters: + + t0 : float + The mid time of the gaussian profile. + sigma_t : float + The one-sigma width of the gaussian profile. + """ + + def __init__( + self, + t0, + sigma_t, + tol=1e-12, + time_unit=None, + **kwargs): + """Creates a new gaussian-shaped time flux profile instance. + + Parameters + ---------- + t0 : float + The mid time of the gaussian profile. + sigma_t : float + The one-sigma width of the gaussian profile. + tol : float + The tolerance of the gaussian value. This defines the start and end + time of the gaussian profile. + time_unit : instance of astropy.units.UnitBase | None + The used unit for time. + If set to ``None``, the configured default time unit for fluxes is + used. + """ + # Calculate the start and end time of the gaussian profile, such that + # at those times the gaussian values obey the given tolerance. + dt = np.sqrt(-2 * sigma_t**2 * np.log(tol)) + t_start = t0 - dt + t_stop = t0 + dt + + super().__init__( + t_start=t_start, + t_stop=t_stop, + time_unit=time_unit, + **kwargs) + + self.t0 = t0 + self.sigma_t = sigma_t + + # Define the parameters which can be set via the `set_params` + # method. + self.param_names = ('t0', 'sigma_t') + + @property + def math_function_str(self): + return 'exp(-(t-t0)^2/(2 sigma_t^2))' + + @property + def t0(self): + """The time of the mid point of the gaussian profile. + The unit of the value is the set time unit of this TimeFluxProfile + instance. + """ + return 0.5*(self._t_start + self._t_stop) + + @t0.setter + def t0(self, t): + t = float_cast( + t, + 'The t0 property must be castable to type float!') + old_t0 = self.t0 + dt = t - old_t0 + self.move(dt) + + @property + def sigma_t(self): + """The one-sigma width of the gaussian profile. + The unit of the value is the set time unit of this TimeFluxProfile + instance. + """ + return self._sigma_t + + @sigma_t.setter + def sigma_t(self, sigma): + sigma = float_cast( + sigma, + 'The sigma_t property must be castable to type float!') + self._sigma_t = sigma + + def __call__( + self, + t, + unit=None): + """Returns the gaussian profile value for the given time ``t``. + + Parameters + ---------- + t : float | 1D numpy ndarray of float + The time(s) for which to get the time flux profile values. + unit : instance of astropy.units.UnitBase | None + The unit of the given times. + If set to ``None``, the set time unit of this TimeFluxProfile is + assumed. + + Returns + ------- + values : 1D numpy ndarray of float + The value(s) of the time flux profile for the given time(s). + """ + t = np.atleast_1d(t) + + if (unit is not None) and (unit != self._time_unit): + time_unit_conv_factor = unit.to(self._time_unit) + t = t * time_unit_conv_factor + + m = (t >= self.t_start) & (t < self.t_stop) + + s = self._sigma_t + twossq = 2*s*s + t0 = 0.5*(self._t_stop + self._t_start) + dt = t[m] - t0 + + values = np.zeros_like(t) + values[m] = np.exp(-dt*dt/twossq) + + return values + + def cdf( + self, + t, + unit=None): + """Calculates the cumulative distribution function values for the given + time values ``t``. + + Parameters + ---------- + t : float | instance of numpy ndarray + The (N_times,)-shaped numpy ndarray holding the time values for + which to calculate the CDF values. + unit : instance of astropy.units.UnitBase | None + The unit of the given times. + If set to ``None``, the set time unit of this TimeFluxProfile is + assumed. + + Returns + ------- + values : instance of numpy ndarray + The (N_times,)-shaped numpy ndarray holding the cumulative + distribution function values for each time ``t``. + """ + t = np.atleast_1d(t) + + if (unit is not None) and (unit != self._time_unit): + t = t * unit.to(self._time_unit) + + t_start = self._t_start + t_stop = self._t_stop + + values = np.zeros(t.size, dtype=np.float64) + + m = (t_start <= t) & (t <= t_stop) + values[m] = ( + self.get_integral(t1=t_start, t2=t[m]) / self.get_total_integral() + ) + + m = (t > t_stop) + values[m] = 1 + + return values + + def move( + self, + dt, + unit=None): + """Moves the gaussian time profile by the given amount of time. + + Parameters + ---------- + dt : float + The time difference of how far to move the time profile in time. + This can be a positive or negative time shift value. + unit : instance of astropy.units.UnitBase | None + The unit of the given time difference. + If set to ``None``, the set time unit of this TimeFluxProfile is + assumed. + """ + if (unit is not None) and (unit != self._time_unit): + dt = dt * unit.to(self._time_unit) + + self._t_start += dt + self._t_stop += dt + + def get_integral( + self, + t1, + t2, + unit=None): + """Calculates the integral of the gaussian time profile from time ``t1`` + to time ``t2``. + + Parameters + ---------- + t1 : float | array of float + The start time(s) of the integration. + t2 : float | array of float + The end time(s) of the integration. + unit : instance of astropy.units.UnitBase | None + The unit of the given times. + If set to ``None``, the set time unit of this TimeFluxProfile + instance is assumed. + + Returns + ------- + integral : array of float + The integral value(s). The values are in the set time unit of + this TimeFluxProfile instance. + """ + if (unit is not None) and (unit != self._time_unit): + time_unit_conv_factor = unit.to(self._time_unit) + t1 = t1 * time_unit_conv_factor + t2 = t2 * time_unit_conv_factor + + t0 = 0.5*(self._t_stop + self._t_start) + sigma_t = self._sigma_t + + c1 = np.sqrt(np.pi/2) * sigma_t + c2 = np.sqrt(2) * sigma_t + i1 = c1 * scipy.special.erf((t1 - t0)/c2) + i2 = c1 * scipy.special.erf((t2 - t0)/c2) + + integral = i2 - i1 + + return integral + + +class FluxModel( + MathFunction, + Model, + metaclass=abc.ABCMeta): + r"""Abstract base class for all flux models of the form + + .. math:: + + \Phi_S(\alpha,\delta,E,t | \vec{x}_s,\vec{p}_s). + + This base class defines the units used for the flux calculation. The unit + of the flux is ([angle]^{-2} [energy]^{-1} [length]^{-2} [time]^{-1}). + + At this point the functional form of the flux model is not yet defined. + """ + @staticmethod + def get_default_units(): + """Returns the configured default units for flux models. + + Returns + ------- + units_dict : dict + The dictionary holding the configured default units used for flux + models. + """ + return CFG['units']['defaults']['fluxes'] + + def __init__( + self, + angle_unit=None, + energy_unit=None, + length_unit=None, + time_unit=None, + **kwargs): + """Creates a new FluxModel instance and defines the user-defined units. + + Parameters + ---------- + angle_unit : instance of astropy.units.UnitBase | None + The used unit for angles. + If set to ``None``, the configured default angle unit for fluxes is + used. + energy_unit : instance of astropy.units.UnitBase | None + The used unit for energy. + If set to ``None``, the configured default energy unit for fluxes is + used. + length_unit : instance of astropy.units.UnitBase | None + The used unit for length. + If set to ``None``, the configured default length unit for fluxes is + used. + time_unit : instance of astropy.units.UnitBase | None + The used unit for time. + If set to ``None``, the configured default time unit for fluxes is + used. + """ + super().__init__( + **kwargs) + + # Define the units. + self.angle_unit = angle_unit + self.energy_unit = energy_unit + self.length_unit = length_unit + self.time_unit = time_unit + + @property + def angle_unit(self): + """The unit of angle used for the flux calculation. + """ + return self._angle_unit + + @angle_unit.setter + def angle_unit(self, unit): + if unit is None: + unit = CFG['units']['defaults']['fluxes']['angle'] + if not isinstance(unit, units.UnitBase): + raise TypeError( + 'The property angle_unit must be of type ' + 'astropy.units.UnitBase!') + self._angle_unit = unit + + @property + def energy_unit(self): + """The unit of energy used for the flux calculation. + """ + return self._energy_unit + + @energy_unit.setter + def energy_unit(self, unit): + if unit is None: + unit = CFG['units']['defaults']['fluxes']['energy'] + if not isinstance(unit, units.UnitBase): + raise TypeError( + 'The property energy_unit must be of type ' + 'astropy.units.UnitBase!') + self._energy_unit = unit + + @property + def length_unit(self): + """The unit of length used for the flux calculation. + """ + return self._length_unit + + @length_unit.setter + def length_unit(self, unit): + if unit is None: + unit = CFG['units']['defaults']['fluxes']['length'] + if not isinstance(unit, units.UnitBase): + raise TypeError( + 'The property length_unit must be of type ' + 'astropy.units.UnitBase!') + self._length_unit = unit + + @property + def time_unit(self): + """The unit of time used for the flux calculation. + """ + return self._time_unit + + @time_unit.setter + def time_unit(self, unit): + if unit is None: + unit = CFG['units']['defaults']['fluxes']['time'] + if not isinstance(unit, units.UnitBase): + raise TypeError( + 'The property time_unit must be of type ' + 'astropy.units.UnitBase!') + self._time_unit = unit + + @property + def unit_str(self): + """The string representation of the flux unit. + """ + if self.angle_unit == units.radian: + angle_unit_sq = units.steradian + else: + angle_unit_sq = self.angle_unit**2 + + s = (f'({self.energy_unit.to_string()}' + f' {angle_unit_sq.to_string()}' + f' {self.length_unit.to_string()}^2' + f' {self.time_unit.to_string()})^-1') + + return s + + @property + def unit_latex_str(self): + """The latex string representation of the flux unit. + """ + if self.angle_unit == units.radian: + angle_unit_sq = units.steradian + else: + angle_unit_sq = self.angle_unit**2 + + s = (f'{self.energy_unit.to_string()}''$^{-1}$ ' + f'{angle_unit_sq.to_string()}''$^{-1}$ ' + f'{self.length_unit.to_string()}''$^{-2}$ ' + f'{self.time_unit.to_string()}''$^{-1}$') + + return s + + def __str__(self): + """Pretty string representation of this class. + """ + return f'{self.math_function_str} {self.unit_str}' + + @abc.abstractmethod + def __call__( + self, + ra=None, + dec=None, + E=None, + t=None, + angle_unit=None, + energy_unit=None, + time_unit=None): + """The call operator to retrieve a flux value for a given celestrial + position, energy, and observation time. + + Parameters + ---------- + ra : float | (Ncoord,)-shaped 1D numpy ndarray of float + The right-ascention coordinate for which to retrieve the flux value. + dec : float | (Ncoord,)-shaped 1D numpy ndarray of float + The declination coordinate for which to retrieve the flux value. + E : float | (Nenergy,)-shaped 1D numpy ndarray of float + The energy for which to retrieve the flux value. + t : float | (Ntime,)-shaped 1D numpy ndarray of float + The observation time for which to retrieve the flux value. + angle_unit : instance of astropy.units.UnitBase | None + The unit of the given angles. + If ``None``, the set angle unit of the flux model is assumed. + energy_unit : instance of astropy.units.UnitBase | None + The unit of the given energies. + If ``None``, the set energy unit of the flux model is assumed. + time_unit : instance of astropy.units.UnitBase | None + The unit of the given times. + If ``None``, the set time unit of the flux model is assumed. + + Returns + ------- + flux : (Ncoord,Nenergy,Ntime)-shaped ndarray of float + The flux values are in unit of the set flux model units + [energy]^{-1} [angle]^{-2} [length]^{-2} [time]^{-1}. + """ + pass + + def get_conversion_factor_to_internal_flux_unit(self): + """Calculates the conversion factor to convert the flux unit of this + flux model instance to the SkyLLH internally used flux unit. + + Returns + ------- + factor : float + The conversion factor. + """ + self_flux_unit = 1 / ( + self.angle_unit**2 * + self.energy_unit * + self.length_unit**2 * + self.time_unit) + + internal_units = CFG['internal_units'] + internal_flux_unit = 1 / ( + internal_units['angle']**2 * + internal_units['energy'] * + internal_units['length']**2 * + internal_units['time']) + + factor = (self_flux_unit).to(internal_flux_unit).value + + return factor + + +class NullFluxModel( + FluxModel): + """This class provides a dummy flux model class, which can be used for + testing purposes, in cases where an actual flux model is not required but + the framework interface requires one. + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def math_function_str(self): + """Since this is a dummy flux model, calling this method will raise a + NotImplementedError. + """ + raise NotImplementedError( + f'The {classname(self)} flux model is a dummy flux model which has ' + 'no math function prepresentation!') + + def __call__(self, *args, **kwargs): + """Since this is a dummy flux model, calling this method will raise a + NotImplementedError. + """ + raise NotImplementedError( + f'The {classname(self)} flux model is a dummy flux model and ' + 'cannot be called!') + + +class FactorizedFluxModel( + FluxModel): + r"""This class describes a flux model where the spatial, energy, and time + profiles of the source factorize. That means the flux can be written as: + + .. math:: + + \Phi(\alpha,\delta,E,t | \vec{p}_\mathrm{s}) = + \Phi_0 + \Psi(\alpha,\delta|\vec{p}_\mathrm{s}) + \epsilon(E|\vec{p}_\mathrm{s}) + T(t|\vec{p}_\mathrm{s}) + + where, :math:`\Phi_0` is the normalization constant of the flux, and + :math:`\Psi`, :math:`\epsilon`, and :math:`T` are the spatial, energy, and + time profiles of the flux given the source parameters + :math:`\vec{p}_\mathrm{s}`, respectively. + """ + def __init__( + self, + Phi0, + spatial_profile, + energy_profile, + time_profile, + length_unit=None, + **kwargs): + """Creates a new factorized flux model. + + Parameters + ---------- + Phi0 : float + The flux normalization constant. + spatial_profile : instance of SpatialFluxProfile | None + The SpatialFluxProfile instance providing the spatial profile + function of the flux. + If set to None, an instance of UnitySpatialFluxProfile will be used, + which represents the constant function 1. + energy_profile : instance of EnergyFluxProfile | None + The EnergyFluxProfile instance providing the energy profile + function of the flux. + If set to None, an instance of UnityEnergyFluxProfile will be used, + which represents the constant function 1. + time_profile : instance of TimeFluxProfile | None + The TimeFluxProfile instance providing the time profile function + of the flux. + If set to None, an instance of UnityTimeFluxProfile will be used, + which represents the constant function 1. + length_unit : instance of astropy.units.UnitBase | None + The used unit for length. + If set to ``None``, the configured default length unit for fluxes is + used. + """ + self.Phi0 = Phi0 + self.spatial_profile = spatial_profile + self.energy_profile = energy_profile + self.time_profile = time_profile + + # The base class will set the default (internally used) flux unit, which + # will be set automatically to the particular profile. + super().__init__( + angle_unit=self._spatial_profile.angle_unit, + energy_unit=self._energy_profile.energy_unit, + time_unit=self._time_profile.time_unit, + length_unit=length_unit, + **kwargs + ) + + # Define the parameters which can be set via the `set_params` + # method. + self.param_names = ('Phi0',) + + @property + def Phi0(self): + """The flux normalization constant. + The unit of this normalization constant is + ([angle]^{-2} [energy]^{-1} [length]^{-2} [time]^{-1}). + """ + return self._Phi0 + + @Phi0.setter + def Phi0(self, v): + v = float_cast( + v, + 'The Phi0 property must be castable to type float!') + self._Phi0 = v + + @property + def spatial_profile(self): + """Instance of SpatialFluxProfile describing the spatial profile of the + flux. + """ + return self._spatial_profile + + @spatial_profile.setter + def spatial_profile(self, profile): + if profile is None: + profile = UnitySpatialFluxProfile() + if not isinstance(profile, SpatialFluxProfile): + raise TypeError( + 'The spatial_profile property must be None, or an ' + 'instance of SpatialFluxProfile!') + self._spatial_profile = profile + + @property + def energy_profile(self): + """Instance of EnergyFluxProfile describing the energy profile of the + flux. + """ + return self._energy_profile + + @energy_profile.setter + def energy_profile(self, profile): + if profile is None: + profile = UnityEnergyFluxProfile() + if not isinstance(profile, EnergyFluxProfile): + raise TypeError( + 'The energy_profile property must be None, or an ' + 'instance of EnergyFluxProfile!') + self._energy_profile = profile + + @property + def time_profile(self): + """Instance of TimeFluxProfile describing the time profile of the flux. + """ + return self._time_profile + + @time_profile.setter + def time_profile(self, profile): + if profile is None: + profile = UnityTimeFluxProfile() + if not isinstance(profile, TimeFluxProfile): + raise TypeError( + 'The time_profile property must be None, or an ' + 'instance of TimeFluxProfile!') + self._time_profile = profile + + @property + def math_function_str(self): + """The string showing the mathematical function of the flux. + """ + s = f'{self._Phi0:.3e}' + + spatial_str = self._spatial_profile.math_function_str + if spatial_str is not None: + s += f' * {spatial_str}' + energy_str = self._energy_profile.math_function_str + if energy_str is not None: + s += f' * {energy_str}' + + time_str = self._time_profile.math_function_str + if time_str is not None: + s += f' * {time_str}' + + return s + + @property + def angle_unit(self): + """The unit of angle used for the flux calculation. The unit is + taken and set from and to the set spatial flux profile, respectively. + """ + return self._spatial_profile.angle_unit + + @angle_unit.setter + def angle_unit(self, unit): + self._spatial_profile.angle_unit = unit + + @property + def energy_unit(self): + """The unit of energy used for the flux calculation. The unit is + taken and set from and to the set energy flux profile, respectively. + """ + return self._energy_profile.energy_unit + + @energy_unit.setter + def energy_unit(self, unit): + self._energy_profile.energy_unit = unit + + @property + def time_unit(self): + """The unit of time used for the flux calculation. The unit is + taken and set from and to the set time flux profile, respectively. + """ + return self._time_profile.time_unit + + @time_unit.setter + def time_unit(self, unit): + self._time_profile.time_unit = unit + + @property + def param_names(self): + """The tuple holding the names of the math function's parameters. This + is the total set of parameter names for all flux profiles of this + FactorizedFluxModel instance. + """ + pnames = list(super(FactorizedFluxModel, type(self)).param_names) + pnames += self._spatial_profile.param_names + pnames += self._energy_profile.param_names + pnames += self._time_profile.param_names + + return tuple(pnames) + + @param_names.setter + def param_names(self, names): + super(FactorizedFluxModel, type(self)).param_names.fset(self, names) + + def __call__( + self, + ra=None, + dec=None, + E=None, + t=None, + angle_unit=None, + energy_unit=None, + time_unit=None): + """Calculates the flux values for the given celestrial positions, + energies, and observation times. + + Parameters + ---------- + ra: float | (Ncoord,)-shaped 1d numpy ndarray of float | None + The right-ascention coordinate for which to retrieve the flux value. + dec : float | (Ncoord,)-shaped 1d numpy ndarray of float | None + The declination coordinate for which to retrieve the flux value. + E : float | (Nenergy,)-shaped 1d numpy ndarray of float | None + The energy for which to retrieve the flux value. + t : float | (Ntime,)-shaped 1d numpy ndarray of float | None + The observation time for which to retrieve the flux value. + angle_unit : instance of astropy.units.UnitBase | None + The unit of the given angles. + If ``None``, the set angle unit of the spatial flux profile is + assumed. + energy_unit : instance of astropy.units.UnitBase | None + The unit of the given energies. + If ``None``, the set energy unit of the energy flux profile is + assumed. + time_unit : instance of astropy.units.UnitBase | None + The unit of the given times. + If ``None``, the set time unit of the time flux profile is + assumed. + + Returns + ------- + flux : (Ncoord,Nenergy,Ntime)-shaped ndarray of float + The flux values are in unit + [energy]^{-1} [angle]^{-2} [length]^{-2} [time]^{-1}. + """ + if (ra is not None) and (dec is not None): + spatial_profile_values = self._spatial_profile( + ra, dec, unit=angle_unit) + else: + spatial_profile_values = np.array([1]) + + if E is not None: + energy_profile_values = self._energy_profile( + E, unit=energy_unit) + else: + energy_profile_values = np.array([1]) + + if t is not None: + time_profile_values = self._time_profile( + t, unit=time_unit) + else: + time_profile_values = np.array([1]) + + flux = ( + self._Phi0 * + spatial_profile_values[:, np.newaxis, np.newaxis] * + energy_profile_values[np.newaxis, :, np.newaxis] * + time_profile_values[np.newaxis, np.newaxis, :] + ) + + return flux + + def get_param(self, name): + """Retrieves the value of the given parameter. It returns ``np.nan`` if + the parameter does not exist. + + Parameters + ---------- + name : str + The name of the parameter. + + Returns + ------- + value : float | np.nan + The value of the parameter. + """ + for obj in ( + super(), + self._spatial_profile, + self._energy_profile, + self._time_profile): + value = obj.get_param(name=name) + if not np.isnan(value): + return value + + return np.nan + + def set_params(self, pdict): + """Sets the parameters of the flux model. For this factorized flux model + it means that it sets the parameters of the spatial, energy, and time + profiles. + + Parameters + ---------- + pdict : dict + The flux parameter dictionary. + + Returns + ------- + updated : bool + Flag if parameter values were actually updated. + """ + updated = False + + updated |= super().set_params(pdict) + + updated |= self._spatial_profile.set_params(pdict) + updated |= self._energy_profile.set_params(pdict) + updated |= self._time_profile.set_params(pdict) + + return updated + + +class PointlikeFFM( + FactorizedFluxModel, + IsPointlike): + """This class describes a factorized flux model (FFM), where the spatial + profile is modeled as a point. This class provides the base class for a flux + model of a point-like source. + """ + def __init__( + self, + Phi0, + energy_profile, + time_profile, + ra=None, + dec=None, + angle_unit=None, + length_unit=None, + **kwargs): + """Creates a new factorized flux model for a point-like source. + + Parameters + ---------- + Phi0 : float + The flux normalization constant in unit of flux. + energy_profile : instance of EnergyFluxProfile | None + The EnergyFluxProfile instance providing the energy profile + function of the flux. + If set to None, an instance of UnityEnergyFluxProfile will be used, + which represents the constant function 1. + time_profile : instance of TimeFluxProfile | None + The TimeFluxProfile instance providing the time profile function + of the flux. + If set to None, an instance of UnityTimeFluxProfile will be used, + which represents the constant function 1. + ra : float | None + The right-ascention of the point. + dec : float | None + The declination of the point. + angle_unit : instance of astropy.units.UnitBase | None + The unit for angles used for the flux unit. + If set to ``None``, the configured internal angle unit is used. + length_unit : instance of astropy.units.UnitBase | None + The unit for length used for the flux unit. + If set to ``None``, the configured internal length unit is used. + """ + spatial_profile = PointSpatialFluxProfile( + ra=ra, + dec=dec, + angle_unit=angle_unit) + + super().__init__( + Phi0=Phi0, + spatial_profile=spatial_profile, + energy_profile=energy_profile, + time_profile=time_profile, + length_unit=length_unit, + ra_func_instance=spatial_profile, + get_ra_func=type(spatial_profile).ra.fget, + set_ra_func=type(spatial_profile).ra.fset, + dec_func_instance=spatial_profile, + get_dec_func=type(spatial_profile).dec.fget, + set_dec_func=type(spatial_profile).dec.fset, + **kwargs + ) + + @property + def unit_str(self): + """The string representation of the flux unit. + """ + # Note: + # For a point-like differential flux, there is no solid-angle + # element. + s = (f'({self.energy_unit.to_string()}' + f' {self.length_unit.to_string()}^2' + f' {self.time_unit.to_string()})^-1') + + return s + + @property + def unit_latex_str(self): + """The latex string representation of the flux unit. + """ + # Note: + # For a point-like differential flux, there is no solid-angle + # element. + s = (f'{self.energy_unit.to_string()}''$^{-1}$ ' + f'{self.length_unit.to_string()}''$^{-2}$ ' + f'{self.time_unit.to_string()}''$^{-1}$') + + return s + + +class SteadyPointlikeFFM( + PointlikeFFM): + """This class describes a factorized flux model (FFM), where the spatial + profile is modeled as a point and the time profile as constant 1. It is + derived from the ``PointlikeFFM`` class. + """ + def __init__( + self, + Phi0, + energy_profile, + ra=None, + dec=None, + angle_unit=None, + length_unit=None, + time_unit=None, + **kwargs): + """Creates a new factorized flux model for a point-like source with no + time dependance. + + Parameters + ---------- + Phi0 : float + The flux normalization constant. + energy_profile : instance of EnergyFluxProfile | None + The EnergyFluxProfile instance providing the energy profile + function of the flux. + If set to None, an instance of UnityEnergyFluxProfile will be used, + which represents the constant function 1. + ra : float | None + The right-ascention of the point. + dec : float | None + The declination of the point. + angle_unit : instance of astropy.units.UnitBase | None + The unit for angles used for the flux unit. + If set to ``None``, the configured default angle unit for fluxes + is used. + length_unit : instance of astropy.units.UnitBase | None + The unit for length used for the flux unit. + If set to ``None``, the configured default length unit for fluxes + is used. + time_unit : instance of astropy.units.UnitBase | None + The used unit for time. + If set to ``None``, the configured default time unit for fluxes + is used. + """ + time_profile = UnityTimeFluxProfile( + time_unit=time_unit) + + super().__init__( + Phi0=Phi0, + ra=ra, + dec=dec, + energy_profile=energy_profile, + time_profile=time_profile, + angle_unit=angle_unit, + length_unit=length_unit, + **kwargs + ) diff --git a/skyllh/core/interpolate.py b/skyllh/core/interpolate.py index 60efd7bfe6..fe2a391ec6 100644 --- a/skyllh/core/interpolate.py +++ b/skyllh/core/interpolate.py @@ -10,10 +10,14 @@ ParameterGrid, ParameterGridSet ) -from skyllh.core.py import classname +from skyllh.core.py import ( + classname, +) -class GridManifoldInterpolationMethod(object, metaclass=abc.ABCMeta): +class GridManifoldInterpolationMethod( + object, + metaclass=abc.ABCMeta): """This is an abstract base class for implementing a method to interpolate a manifold that is defined on a grid of parameter values. In general the number of parameters can be arbitrary and hence the manifold's @@ -23,58 +27,78 @@ class GridManifoldInterpolationMethod(object, metaclass=abc.ABCMeta): different dimensionality. """ - def __init__(self, f, param_grid_set): + def __init__(self, func, param_grid_set, **kwargs): """Constructor for a GridManifoldInterpolationMethod object. It must be called by the derived class. Parameters ---------- - f : callable R^d -> R - The function that takes d parameters as input and returns the - value of the d-dimensional manifold at this point for each given - event. - The call signature of f must be: - - ``__call__(tdm, gridparams, eventdata)`` - - where ``tdm`` is the TrialDataManager instance holding the trial - data, ``gridparams`` is the dictionary with the parameter values - on the grid, and ``eventdata`` is a 2-dimensional (N,V)-shaped numpy - ndarray holding the event data, where N is the number of events, and - V the dimensionality of the event data. - param_grid_set : ParameterGrid instance | ParameterGridSet instance - The set of d parameter grids. This defines the grid of the + func : callable R^D -> R + The function that takes D parameter grid values as input and returns + the value of the D-dimensional manifold at this point for each given + trial event and source. + The call signature of func must be: + + ``__call__(tdm, eventdata, gridparams_recarray, n_values)`` + + The arguments are as follows: + + tdm : instance of TrialDataManager + The TrialDataManager instance holding the trial event data. + eventdata : instance of numpy ndarray + A two-dimensional (N_events,V)-shaped numpy ndarray holding + the event data, where N_events is the number of trial + events, and V the dimensionality of the event data. + gridparams_recarray : instance of numpy record ndarray + The numpy record ndarray of length ``len(src_idxs)`` with + the D parameter names and values on the grid for all + sources. + n_values : int + The length of the output numpy ndarray of shape (n_values,). + + The return value of ``func`` should be the (n_values,)-shaped + one-dimensional ndarray holding the values for each set of parameter + values of the sources given via the ``gridparams_recarray``. + The length of the array, i.e. n_values, depends on the + ``src_evt_idx`` property of the TrialDataManager. In the worst case + n_values is N_sources * N_events. + param_grid_set : instance of ParameterGrid | instance of ParameterGridSet + The set of D parameter grids. This defines the grid of the manifold. """ - super(GridManifoldInterpolationMethod, self).__init__() + super().__init__(**kwargs) - self.f = f + self.func = func self.param_grid_set = param_grid_set @property - def f(self): + def func(self): """The R^d -> R manifold function. """ - return self._f - @f.setter - def f(self, func): - if(not callable(func)): - raise TypeError('The f property must be a callable object!') - self._f = func + return self._func + + @func.setter + def func(self, f): + if not callable(f): + raise TypeError( + 'The func property must be a callable object!') + self._func = f @property def param_grid_set(self): - """The ParameterGridSet object defining the set of d parameter grids. + """The ParameterGridSet instance defining the set of d parameter grids. This defines the grid of the manifold. """ return self._param_grid_set + @param_grid_set.setter def param_grid_set(self, obj): - if(isinstance(obj, ParameterGrid)): + if isinstance(obj, ParameterGrid): obj = ParameterGridSet([obj]) - elif(not isinstance(obj, ParameterGridSet)): - raise TypeError('The param_grid_set property must be an instance ' - 'of ParameterGridSet!') + elif not isinstance(obj, ParameterGridSet): + raise TypeError( + 'The param_grid_set property must be an instance ' + 'of ParameterGrid or ParameterGridSet!') self._param_grid_set = obj @property @@ -84,384 +108,515 @@ def ndim(self): return len(self._param_grid_set) @abc.abstractmethod - def get_value_and_gradients(self, tdm, eventdata, params): - """Retrieves the interpolated value of the manifold at the d-dimensional - point ``params`` for all given events, along with the d gradients, - i.e. partial derivatives. + def __call__(self, tdm, eventdata, params_recarray): + """Retrieves the interpolated value of the manifold at the D-dimensional + point ``params_recarray`` for all given events and sources, along with + the D gradients, i.e. partial derivatives. Parameters ---------- - tdm : TrialDataManager + tdm : instance of TrialDataManager The TrialDataManager instance holding the trial data. - eventdata : numpy (N_events,V)-shaped 2D ndarray + eventdata : numpy ndarray The 2D (N_events,V)-shaped numpy ndarray holding the event data, - where N_events is the number of events, and V the dimensionality of - the event data. - params : dict - The dictionary with the parameter values, defining the point on the - manifold for which the value should get calculated. + where N_events is the number of trial events, and V the + dimensionality of the event data. + params_recarray : instance of numpy record ndarray + The numpy record ndarray holding the N_sources set of parameter + names and values, that define the point (for each source) on the + manifold for which the value should get calculated for each event. Returns ------- - value : (N,) ndarray of float - The interpolated manifold value for the N given events. - gradients : (D,N) ndarray of float - The D manifold gradients for the N given events, where D is the - number of parameters. The order of the D parameters is defined - by the ParameterGridSet that has been provided at construction time - of this interpolation method object. + values : ndarray of float + The (N,)-shaped numpy ndarray holding the interpolated manifold + values for the given events and sources. + grads : ndarray of float + The (D,N)-shaped numpy ndarray holding the D manifold gradients for + the N given values, where D is the number of parameters. + The order of the D parameters is defined by the ParameterGridSet + that has been provided at construction time of this interpolation + method object. """ pass -class NullGridManifoldInterpolationMethod(GridManifoldInterpolationMethod): +class NullGridManifoldInterpolationMethod( + GridManifoldInterpolationMethod): """This grid manifold interpolation method performes no interpolation. When - the ``get_value_and_gradients`` method is called, it rounds the parameter - values to their nearest grid point values. All gradients are set to zero. + the + :meth:`~skyllh.core.interpolate.NullGridManifoldInterpolationMethod.__call__` + method is called, it rounds the parameter values to their nearest grid + point values. All gradients are set to zero. """ - def __init__(self, f, param_grid_set): + def __init__( + self, + func, + param_grid_set, + **kwargs): """Creates a new NullGridManifoldInterpolationMethod instance. Parameters ---------- - f : callable R^D -> R - The function that takes the parameter grid value as input and - returns the value of the n-dimensional manifold at this point for - each given event. - - ``__call__(tdm, gridparams, eventdata)`` - - where ``gridparams`` is the dictionary with the parameter names and - values on the grid, and ``eventdata`` is a 2-dimensional - (N,V)-shaped numpy ndarray holding the event data, where N is the - number of events, and V the dimensionality of the event data. - The return value of ``f`` must be a (N,)-shaped 1d ndarray of float. - param_grid_set : ParameterGrid instance | ParameterGridSet instance - The set of parameter grids. This defines the grid of the - D-dimensional manifold. + func : callable R^d -> R + The function that takes d parameter grid values as input and returns + the value of the d-dimensional manifold at this point for each given + trial event and source. + See the documentation of the + :class:`~skyllh.core.interpolate.GridManifoldInterpolationMethod` + class for more details. + param_grid_set : instance of ParameterGrid | instance of ParameterGridSet + The set of d parameter grids. This defines the grid of the + manifold. """ - super(NullGridManifoldInterpolationMethod, self).__init__( - f, param_grid_set) + super().__init__( + func=func, + param_grid_set=param_grid_set, + **kwargs) - def get_value_and_gradients(self, tdm, eventdata, params): + def __call__(self, tdm, eventdata, params_recarray): """Calculates the non-interpolated manifold value and its gradient - (zero) for each given event at the point ``params``. - By definition the D values of ``params`` must coincide with the - parameter grid values. + (zero) for each given event and source at the points given by + ``params_recarray``. Parameters ---------- - tdm : TrialDataManager + tdm : instance of TrialDataManager The TrialDataManager instance holding the trial data. - eventdata : numpy (N_events,V)-shaped 2D ndarray - The 2D (N_events,V)-shaped numpy ndarray holding the event data, + eventdata : instance of numpy.ndarray + The (N_events,V)-shaped numpy ndarray holding the event data, where N_events is the number of events, and V the dimensionality of the event data. - params : dict - The dictionary with the D parameter values, defining the point on - the manifold for which the value should get calculated. + params_recarray : instance of numpy.ndarray + The structured numpy ndarray of length N_sources holding the + parameter names and values of the sources, defining the point on the + manifold for which the values should get calculated. Returns ------- - value : (N,) ndarray of float - The interpolated manifold value for the N given events. - gradients : (D,N) ndarray of float - The D manifold gradients for the N given events, where D is the - number of parameters. + values : instance of numpy.ndarray + The (N,)-shaped numpy ndarray holding the interpolated manifold + values for the given events and sources. + grads : instance of numpy.ndarray + The (D,N)-shaped ndarray of float holding the D manifold gradients + for the N values, where D is the number of parameters of the + manifold. By definition, all gradients are zero. """ # Round the given parameter values to their nearest grid values. - gridparams = dict() - for (pname,pvalue) in params.items(): - p_grid = self._param_grid_set[pname] - gridparams[pname] = p_grid.round_to_nearest_grid_point(pvalue) + gridparams_recarray_dtype = [ + (p_grid.name, np.float64) + for p_grid in self._param_grid_set + ] + + gridparams_recarray = np.empty( + params_recarray.shape, + dtype=gridparams_recarray_dtype) + + for p_grid in self._param_grid_set: + pname = p_grid.name + pvalues = params_recarray[pname] + gridparams_recarray[pname] = p_grid.round_to_nearest_grid_point( + pvalues) - value = self._f(tdm, gridparams, eventdata) - gradients = np.zeros( - (len(params), tdm.n_selected_events), dtype=np.float64) + values = self._func( + tdm=tdm, + eventdata=eventdata, + gridparams_recarray=gridparams_recarray, + n_values=tdm.get_n_values()) - return (value, gradients) + grads = np.zeros( + (len(self.param_grid_set), len(values)), + dtype=np.float64) + return (values, grads) -class Linear1DGridManifoldInterpolationMethod(GridManifoldInterpolationMethod): + +class Linear1DGridManifoldInterpolationMethod( + GridManifoldInterpolationMethod): """This grid manifold interpolation method interpolates the 1-dimensional grid manifold using a line. """ - def __init__(self, f, param_grid_set): + def __init__( + self, + func, + param_grid_set, + **kwargs): """Creates a new Linear1DGridManifoldInterpolationMethod instance. Parameters ---------- - f : callable R -> R + func : callable R -> R The function that takes the parameter grid value as input and returns the value of the 1-dimensional manifold at this point for - each given event. - - ``__call__(tdm, gridparams, eventdata)`` - - where ``gridparams`` is the dictionary with the parameter names and - values on the grid, and ``eventdata`` is a 2-dimensional - (N,V)-shaped numpy ndarray holding the event data, where N is the - number of events, and V the dimensionality of the event data. - The return value of ``f`` must be a (N,)-shaped 1d ndarray of float. - param_grid_set : ParameterGrid instance | ParameterGridSet instance - The set of parameter grids. This defines the grid of the - 1-dimensional manifold. By definition, only the first parameter grid - is considered. + each given source and trial event. + See the documentation of the + :class:`~skyllh.core.interpolate.GridManifoldInterpolationMethod` + class for more details. + param_grid_set : instance of ParameterGrid | instance of ParameterGridSet + The one parameter grid. This defines the grid of the manifold. """ - super(Linear1DGridManifoldInterpolationMethod, self).__init__( - f, param_grid_set) - - if(len(self._param_grid_set) != 1): - raise ValueError('The %s class supports only 1D grid manifolds. ' + super().__init__( + func=func, + param_grid_set=param_grid_set, + **kwargs) + + if len(self._param_grid_set) != 1: + raise ValueError( + f'The {classname(self)} class supports only 1D grid manifolds. ' 'The param_grid_set argument must contain 1 ParameterGrid ' - 'instance! Currently it has %d!'%( - classname(self), len(self._param_grid_set))) - self.p_grid = self._param_grid_set[0] + f'instance! Currently it has {len(self._param_grid_set)}!') + self._p_grid = self._param_grid_set[0] # Create a cache for the line parameterization for the last # manifold grid point for the different events. - self._create_cache(None, np.array([]), np.array([])) - self._cache_tdm_trial_data_state_id = None + self._cache = self._create_cache( + trial_data_state_id=None, + x0=None, + m=None, + b=None) - def _create_cache(self, x0, m, b): + def _create_cache(self, trial_data_state_id, x0, m, b): """Creates a cache for the line parameterization for the last manifold grid point for the nevents different events. Parameters ---------- - x0 : float | None - The parameter grid value for the lower point of the grid manifold - used to estimate the line. - m : 1d ndarray - The slope of the line for each event. - b : 1d ndarray - The offset coefficient of the line for each event. + trial_data_state_id : int | None + The trial data state id of the TrialDataManager. + x0 : instance of ndarray | None + The (N_sources,)-shaped numpy ndarray holding the parameter grid + value of the lower point of the grid manifold for each source used + to estimate the line. + m : instance of ndarray | None + The (N_values,)-shaped numpy ndarray holding the slope of the line + for each trial event and source. + b : instance of ndarray | None + The (N_values,)-shaped numpy ndarray holding the offset coefficient + of the line for each trial event and source. """ - self._cache = { + cache = { + 'trial_data_state_id': trial_data_state_id, 'x0': x0, 'm': m, 'b': b } - def get_value_and_gradients(self, tdm, eventdata, params): + return cache + + def _is_cached(self, trial_data_state_id, x0): + """Checks if the given line parametrization are already cached for the + given x0 values. + + Returns + ------- + check : bool + ``True`` if the line parametrization for x0 is already cached, + ``False`` otherwise. + """ + self__cache = self._cache + if (self__cache['trial_data_state_id'] is not None) and\ + (self__cache['trial_data_state_id'] == trial_data_state_id) and\ + (np.all(np.isclose(self__cache['x0'], x0))): + return True + + return False + + def __call__(self, tdm, eventdata, params_recarray): """Calculates the interpolated manifold value and its gradient for each - given event at the point ``params``. + given source and trial event at the point ``params_recarray``. Parameters ---------- - tdm : TrialDataManager + tdm : instance of TrialDataManager The TrialDataManager instance holding the trial data. - eventdata : numpy (N_events,V)-shaped 2D ndarray - The 2D (N_events,V)-shaped numpy ndarray holding the event data, + eventdata : instance of numpy ndarray + The (N_events,V)-shaped numpy ndarray holding the event data, where N_events is the number of events, and V the dimensionality of the event data. - params : dict - The dictionary with the parameter values, defining the point on the - manifold for which the value should get calculated. + params_recarray : numpy record ndarray + The numpy record ndarray of length N_sources holding the parameter + names and values for each source, defining the point on the manifold + for which the value should get calculated. + This record ndarray can be of length 1. In that case the single set + of parameters is used for all sources. Returns ------- - value : (N,) ndarray of float - The interpolated manifold value for the N given events. - gradients : (D,N) ndarray of float - The D manifold gradients for the N given events, where D is the - number of parameters. + values : instance of numpy ndarray + The (N_values,)-shaped numpy ndarray of float holding the + interpolated manifold values for all sources and trial events. + grads : ndarray of float + The (D,N_values)-shaped numpy ndarray of float holding the D + manifold gradients for the N_values values for all sources and trial + events, where D is the number of interpolation parameters. """ - (xname, x) = tuple(params.items())[0] + xname = self._p_grid.name - self__p_grid = self.p_grid + x = params_recarray[xname] # Determine the nearest grid point that is lower than x and use that as # x0. - x0 = self__p_grid.round_to_lower_grid_point(x) + x0 = self._p_grid.round_to_lower_grid_point(x) # Check if the line parametrization for x0 is already cached. - self__cache = self._cache + if self._is_cached(tdm.trial_data_state_id, x0): + m = self._cache['m'] + b = self._cache['b'] - tdm_trial_data_state_id = tdm.trial_data_state_id - cache_tdm_trial_data_state_id = self._cache_tdm_trial_data_state_id + (x,) = tdm.broadcast_sources_arrays_to_values_arrays((x,)) - if((self__cache['x0'] == x0) and - (tdm.n_selected_events == len(self__cache['m'])) and - (cache_tdm_trial_data_state_id is not None) and - (cache_tdm_trial_data_state_id == tdm_trial_data_state_id) - ): - m = self__cache['m'] - b = self__cache['b'] - else: - # Calculate the line parametrization for all the given events. - self__f = self.f + values = m*x + b - # Calculate the upper grid point of x. - x1 = self__p_grid.round_to_upper_grid_point(x) + return (values, np.atleast_2d(m)) - # Check if x was on a grid point. In that case x0 and x1 are equal. - # The value will be of that grid point x0, but the gradient is - # calculated based on the two neighboring grid points of x0. - if(x1 == x0): - value = self__f(tdm, {xname:x0}, eventdata) - x0 = self__p_grid.round_to_nearest_grid_point( - x0 - self__p_grid.delta) - x1 = self__p_grid.round_to_nearest_grid_point( - x1 + self__p_grid.delta) + # The line parametrization is not cached. + # Calculate the line parametrization for all the given events. + self__func = self._func - M0 = self__f(tdm, {xname:x0}, eventdata) - M1 = self__f(tdm, {xname:x1}, eventdata) - m = (M1 - M0) / (x1 - x0) - return (value, np.atleast_2d(m)) + x1 = self._p_grid.round_to_upper_grid_point(x) - M0 = self__f(tdm, {xname:x0}, eventdata) - M1 = self__f(tdm, {xname:x1}, eventdata) + n_values = tdm.get_n_values() - m = (M1 - M0) / (x1 - x0) - b = M0 - m*x0 + values = np.empty((n_values,), dtype=np.float64) + m = np.empty((n_values,), dtype=np.float64) - # Cache the line parametrization. - self._create_cache(x0, m, b) - self._cache_tdm_trial_data_state_id = tdm_trial_data_state_id + gridparams_recarray = np.array( + x0, + dtype=[(xname, np.float64)]) + M0 = self__func( + tdm=tdm, + eventdata=eventdata, + gridparams_recarray=gridparams_recarray, + n_values=n_values) - # Calculate the interpolated manifold value. The gradient is m. - value = m*x + b + gridparams_recarray = np.array( + x1, + dtype=[(xname, np.float64)]) + M1 = self__func( + tdm=tdm, + eventdata=eventdata, + gridparams_recarray=gridparams_recarray, + n_values=n_values) - return (value, np.atleast_2d(m)) + # Broadcast x0 and x1 to the values array. + (x, v_x0, v_x1) = tdm.broadcast_sources_arrays_to_values_arrays( + (x, x0, x1)) + m = (M1 - M0) / (v_x1 - v_x0) + b = M0 - m*v_x0 -class Parabola1DGridManifoldInterpolationMethod(GridManifoldInterpolationMethod): + # Cache the line parametrization. + self._cache = self._create_cache( + trial_data_state_id=tdm.trial_data_state_id, + x0=x0, + m=m, + b=b) + + # Calculate the interpolated manifold values. The gradient is m. + values = m*x + b + + return (values, np.atleast_2d(m)) + + +class Parabola1DGridManifoldInterpolationMethod( + GridManifoldInterpolationMethod): """This grid manifold interpolation method interpolates the 1-dimensional grid manifold using a parabola. """ - def __init__(self, f, param_grid_set): + def __init__( + self, + func, + param_grid_set, + **kwargs): """Creates a new Parabola1DGridManifoldInterpolationMethod instance. Parameters ---------- - f : callable R -> R + func : callable R -> R The function that takes the parameter grid value as input and returns the value of the 1-dimensional manifold at this point for - each given event. - The call signature of f must be: - - ``__call__(tdm, gridparams, eventdata)`` - - where ``gridparams`` is the dictionary with the parameter names and - values on the grid, and ``eventdata`` is a 2-dimensional - (N,V)-shaped numpy ndarray holding the event data, where N is the - number of events, and V the dimensionality of the event data. - param_grid_set : instance of ParameterGridSet - The set of parameter grids. This defines the grid of the - 1-dimensional manifold. By definition, only the first parameter grid - is considered. + each given source and trial event. + See the documentation of the + :class:`~skyllh.core.interpolate.GridManifoldInterpolationMethod` + class for more details. + param_grid_set : instance of ParameterGrid | instance of ParameterGridSet + The one parameter grid. This defines the grid of the manifold. """ - super(Parabola1DGridManifoldInterpolationMethod, self).__init__( - f, param_grid_set) - - if(len(self._param_grid_set) != 1): - raise ValueError('The %s class supports only 1D grid manifolds. ' + super().__init__( + func=func, + param_grid_set=param_grid_set, + **kwargs) + + if len(self._param_grid_set) != 1: + raise ValueError( + f'The {classname(self)} class supports only 1D grid manifolds. ' 'The param_grid_set argument must contain 1 ParameterGrid ' - 'instance! Currently it has %d!'%( - classname(self), len(self._param_grid_set))) + f'instance! Currently it has {len(self._param_grid_set)}!') self._p_grid = self._param_grid_set[0] # Create a cache for the parabola parameterization for the last # manifold grid point for the different events. - self._create_cache(None, np.array([]), np.array([]), np.array([])) - self._cache_tdm_trial_data_state_id = None - - def _create_cache(self, x1, M1, a, b): + self._cache = self._create_cache( + trial_data_state_id=None, + x1=None, + M1=None, + a=None, + b=None) + + def _create_cache( + self, + trial_data_state_id, + x1, + M1, + a, + b): """Creates a cache for the parabola parameterization for the last manifold grid point for the nevents different events. Parameters ---------- - x1 : float | None - The parameter grid value for middle point of the grid manifold used - to estimate the parabola. - M1 : 1d ndarray - The grid manifold value for each event of the middle point (x1,). - a : 1d ndarray - The parabola coefficient ``a`` for each event. - b : 1d ndarray - The parabola coefficient ``b`` for each event. + trial_data_state_id : int | None + The trial data state ID of the TrialDataManager. + x1 : instance of numpy ndarray | None + The (N_sources,)-shaped numpy ndarray of float holding the parameter + grid value for the middle point of the grid manifold for all sources + used to estimate the parabola. + M1 : instance of numpy ndarray + The (N_values,)-shaped numpy ndarray of float holding the grid + manifold value for each source and trial event of the middle point + (x1,). + a : instance of numpy ndarray + The (N_values,)-shaped numpy ndarray of float holding the parabola + coefficient ``a`` for each source and trial event. + b : instance of numpy ndarray + The (N_values,)-shaped numpy ndarray of float holding the parabola + coefficient ``b`` for each source and trial event. + + Returns + ------- + cache : dict + The dictionary holding the cache data. """ - self._cache = { + cache = { + 'trial_data_state_id': trial_data_state_id, 'x1': x1, 'M1': M1, 'a': a, 'b': b } - def get_value_and_gradients(self, tdm, eventdata, params): + return cache + + def _is_cached(self, trial_data_state_id, x1): + """Checks if the parabola parametrization is already cached for the + given x1 values. + """ + self__cache = self._cache + if (self__cache['trial_data_state_id'] is not None) and\ + (self__cache['trial_data_state_id'] == trial_data_state_id): + if np.any(np.not_equal(self__cache['x1'], x1)): + return False + return True + + return False + + def __call__(self, tdm, eventdata, params_recarray): """Calculates the interpolated manifold value and its gradient for each - given event at the point ``params``. + given source and trial event at the point ``params_recarray``. Parameters ---------- - tdm : TrialDataManager + tdm : instance of TrialDataManager The TrialDataManager instance holding the trial data. - eventdata : numpy (N_events,V)-shaped 2D ndarray - The 2D (N_events,V)-shaped numpy ndarray holding the event data, + eventdata : instance of numpy ndarray + The (N_events,V)-shaped numpy ndarray holding the event data, where N_events is the number of events, and V the dimensionality of the event data. - params : dict - The dictionary with the parameter values, defining the point on the - manifold for which the value should get calculated. + params_recarray : numpy record ndarray + The numpy record ndarray of length N_sources holding the parameter + names and values for each source, defining the point on the manifold + for which the value should get calculated. + This record ndarray can be of length 1. In that case the single set + of parameters is used for all sources. Returns ------- - value : (N,) ndarray of float + values : (N_values,) ndarray of float The interpolated manifold value for the N given events. - gradients : (D,N) ndarray of float + grads : (D,N_values) ndarray of float The D manifold gradients for the N given events, where D is the number of parameters. """ - (xname, x) = tuple(params.items())[0] - - # Create local variable name alias to avoid Python dot lookups. - self__p_grid = self._p_grid - self__p_grid__round_to_nearest_grid_point = \ - self__p_grid.round_to_nearest_grid_point - self__cache = self._cache + xname = self._p_grid.name - tdm_trial_data_state_id = tdm.trial_data_state_id - cache_tdm_trial_data_state_id = self._cache_tdm_trial_data_state_id + x = params_recarray[xname] # Determine the nearest grid point x1. - x1 = self__p_grid__round_to_nearest_grid_point(x) + x1 = self._p_grid.round_to_nearest_grid_point(x) # Check if the parabola parametrization for x1 is already cached. - if((self__cache['x1'] == x1) and - (tdm.n_selected_events == len(self__cache['M1'])) and - (cache_tdm_trial_data_state_id is not None) and - (cache_tdm_trial_data_state_id == tdm_trial_data_state_id) - ): - M1 = self__cache['M1'] - a = self__cache['a'] - b = self__cache['b'] + if self._is_cached(tdm.trial_data_state_id, x1): + M1 = self._cache['M1'] + a = self._cache['a'] + b = self._cache['b'] else: - dx = self__p_grid.delta + dx = self._p_grid.delta # Calculate the neighboring grid points to x1: x0 and x2. - x0 = self__p_grid__round_to_nearest_grid_point(x1 - dx) - x2 = self__p_grid__round_to_nearest_grid_point(x1 + dx) + x0 = self._p_grid.round_to_nearest_grid_point(x1 - dx) + x2 = self._p_grid.round_to_nearest_grid_point(x1 + dx) # Parameterize the parabola with parameters a, b, and M1. - self__f = self.f - M0 = self__f(tdm, {xname:x0}, eventdata) - M1 = self__f(tdm, {xname:x1}, eventdata) - M2 = self__f(tdm, {xname:x2}, eventdata) + self__func = self._func + + n_values = tdm.get_n_values() + + gridparams_recarray = np.array( + x0, + dtype=[(xname, np.float64)]) + M0 = self__func( + tdm=tdm, + eventdata=eventdata, + gridparams_recarray=gridparams_recarray, + n_values=n_values) + + gridparams_recarray = np.array( + x1, + dtype=[(xname, np.float64)]) + M1 = self__func( + tdm=tdm, + eventdata=eventdata, + gridparams_recarray=gridparams_recarray, + n_values=n_values) + + gridparams_recarray = np.array( + x2, + dtype=[(xname, np.float64)]) + M2 = self__func( + tdm=tdm, + eventdata=eventdata, + gridparams_recarray=gridparams_recarray, + n_values=n_values) a = 0.5*(M0 - 2.*M1 + M2) / dx**2 b = 0.5*(M2 - M0) / dx # Cache the parabola parametrization. - self._create_cache(x1, M1, a, b) - self._cache_tdm_trial_data_state_id = tdm_trial_data_state_id - - # Calculate the interpolated manifold value. - value = a * (x - x1)**2 + b * (x - x1) + M1 - # Calculate the gradient of the manifold. - gradients = 2. * a * (x - x1) + b - - return (value, np.atleast_2d(gradients)) - + self._cache = self._create_cache( + trial_data_state_id=tdm.trial_data_state_id, + x1=x1, + M1=M1, + a=a, + b=b) + + # Broadcast x, x1, and (x-x1) to the values array. + (x, x1, x_minus_x1) = tdm.broadcast_sources_arrays_to_values_arrays( + (x, x1, x-x1)) + + # Calculate the interpolated manifold values. + values = a * x_minus_x1**2 + b * x_minus_x1 + M1 + # Calculate the gradient of the manifold for all values. + grads = 2. * a * x_minus_x1 + b + + return (values, np.atleast_2d(grads)) diff --git a/skyllh/core/livetime.py b/skyllh/core/livetime.py index d783046912..acd71df787 100644 --- a/skyllh/core/livetime.py +++ b/skyllh/core/livetime.py @@ -2,16 +2,47 @@ """The livetime module provides general functionality for detector up-time. """ + import numpy as np -from skyllh.core.random import RandomStateService -from skyllh.core.py import issequence, classname +from skyllh.core.py import ( + classname, + issequence, +) + -class Livetime(object): +class Livetime( + object): """The ``Livetime`` class defines an interface to query the up-time of the detector. """ - def __init__(self, uptime_mjd_intervals_arr): + + @staticmethod + def get_integrated_livetime(livetime): + """Gets the integrated live-time from the given livetime argument, which + is either a scalar value or an instance of Livetime. + + Parameters + ---------- + livetime : float | Livetime instance + The live-time in days as float, or an instance of Livetime. + + Returns + ------- + intgrated_livetime : float + The integrated live-time. + """ + intgrated_livetime = livetime + + if isinstance(livetime, Livetime): + intgrated_livetime = livetime.livetime + + return intgrated_livetime + + def __init__( + self, + uptime_mjd_intervals_arr, + **kwargs): """Creates a new Livetime object from a (N,2)-shaped ndarray holding the uptime intervals. @@ -32,33 +63,62 @@ def __init__(self, uptime_mjd_intervals_arr): the property setter method of ``uptime_mjd_intervals_arr`` by calling the ``assert_mjd_intervals_integrity`` method. """ - # The internal Nx2 numpy ndarray holding the MJD intervals when the - # detector was taking data. + super().__init__(**kwargs) + self.uptime_mjd_intervals_arr = uptime_mjd_intervals_arr - def assert_mjd_intervals_integrity(self): - """Checks if the internal MJD interval array conforms with all its + def assert_mjd_intervals_integrity( + self, + arr): + """Checks if the given MJD interval array conforms with all its data requirements. - Raises TypeError if the data array is not a float64 array. - Raises ValueError if the data integrity is broken. + Parameters + ---------- + arr : instance of numpy ndarray + The (N,2)-shaped numpy ndarray holding the up-time intervals. + + Raises + ------ + TypeError + If the data array is not a float64 array. + ValueError + If the data integrity is broken. """ - if(not isinstance(self._uptime_mjd_intervals_arr, np.ndarray)): - raise TypeError('The internal MJD interval array must be of type ndarray!') + if not isinstance(arr, np.ndarray): + raise TypeError( + 'The internal MJD interval array must be of type ndarray! ' + 'Its current type is ' + f'{classname(arr)}!') - if(self._uptime_mjd_intervals_arr.dtype != np.float64): - raise TypeError('The type of the internal MJD interval array is not float64!') + if arr.dtype != np.float64: + raise TypeError( + 'The type of the internal MJD interval array is not float64!') # Check the shape of the array. - if(self._uptime_mjd_intervals_arr.ndim != 2): - raise ValueError('The dimensionality of the internel MJD interval array must be 2!') - if(self._uptime_mjd_intervals_arr.shape[1] != 2): - raise ValueError('The length of the second axis of the internal MJD interval array must be 2!') + if arr.ndim != 2: + raise ValueError( + 'The dimensionality of the internel MJD interval array must ' + 'be 2! Its current dimensionality is ' + f'{arr.ndim}!') + if arr.shape[1] != 2: + raise ValueError( + 'The length of the second axis of the internal MJD interval ' + 'array must be 2! Its current length is ' + f'{arr.shape[1]}!') - bins = self._onoff_intervals # Check if the bin edges are monotonically non decreasing. - if(not np.all(np.diff(bins) >= 0)): - raise ValueError('The interval edges of the internal MJD interval array are not monotonically non decreasing!') + diff = np.diff(arr.flat) + if not np.all(diff >= 0): + info = '' + for i in range(len(diff)-1): + if diff[i] < 0: + info += f'i={int(i/2)}: {arr[int(i/2)]}\n' + info += f'i={int(i/2)+1}: {arr[int(i/2)+1]}\n' + raise ValueError( + 'The interval edges of the internal MJD interval array are not ' + 'monotonically non-decreasing!\n' + f'{info}') @property def uptime_mjd_intervals_arr(self): @@ -67,10 +127,11 @@ def uptime_mjd_intervals_arr(self): time of the up-time interval, respectively. """ return self._uptime_mjd_intervals_arr + @uptime_mjd_intervals_arr.setter def uptime_mjd_intervals_arr(self, arr): + self.assert_mjd_intervals_integrity(arr) self._uptime_mjd_intervals_arr = arr - self.assert_mjd_intervals_integrity() @property def n_uptime_mjd_intervals(self): @@ -91,36 +152,45 @@ def time_window(self): spanned by all the MJD uptime intervals. By definition this included possible detector down-time periods. """ - return (self._uptime_mjd_intervals_arr[0,0], - self._uptime_mjd_intervals_arr[-1,1]) + return (self._uptime_mjd_intervals_arr[0, 0], + self._uptime_mjd_intervals_arr[-1, 1]) @property def time_start(self): - """(read-only) The start of the detector live-time. + """(read-only) The start time of the detector live-time. """ - return self._uptime_mjd_intervals_arr[0,0] + return self._uptime_mjd_intervals_arr[0, 0] @property - def time_end(self): - """(read-only) The end of the detector live-time. - """ - return self._uptime_mjd_intervals_arr[-1,1] - - @property - def _onoff_intervals(self): - """A view on the mjd intervals where each time is a lower bin edge. - Hence, odd array elements (bins) are on-time intervals, and even array - elements are off-time intervals. + def time_stop(self): + """(read-only) The stop time of the detector live-time. """ - return np.reshape(self._uptime_mjd_intervals_arr, (self._uptime_mjd_intervals_arr.size,)) + return self._uptime_mjd_intervals_arr[-1, 1] def __str__(self): """Pretty string representation of the Livetime class instance. """ - s = '%s(time_window=(%.6f, %.6f))'%( - classname(self), self.time_window[0], self.time_window[1]) + s = (f'{classname(self)}(time_window=(' + f'{self.time_window[0]:.6f}, {self.time_window[1]:.6f}))') return s + def _get_onoff_intervals(self): + """A view on the uptime intervals where each time is a lower bin edge. + Hence, odd array elements (bins) are on-time intervals, and even array + elements are off-time intervals. + + Returns + ------- + onoff_intervals : instance of numpy ndarray + The (n_uptime_intervals*2,)-shaped numpy ndarray holding the time + edges of the uptime intervals. + """ + onoff_intervals = np.reshape( + self._uptime_mjd_intervals_arr, + (self._uptime_mjd_intervals_arr.size,)) + + return onoff_intervals + def _get_onoff_interval_indices(self, mjds): """Retrieves the indices of the on-time and off-time intervals, which correspond to the given MJD values. @@ -149,22 +219,14 @@ def _get_onoff_interval_indices(self, mjds): # function will return either 0, or len(bins). Since, there is always # an even amount of intervals edges, and 0 is also an 'even' number, # those MJDs will correspond to off-time automatically. - idxs = np.digitize(mjds, self._onoff_intervals) + idxs = np.digitize(mjds, self._get_onoff_intervals()) return idxs - def load_from_ontime_mjd_intervals(self, intervals): - """Loads the internal MJD uptime intervals from the given interval - array. - - Parameters - ---------- - intervals : Nx2 ndarray holding the MJD edges of the on-time intervals. - - """ - self._uptime_mjd_intervals = intervals - - def get_ontime_intervals_between(self, t_start, t_end): + def get_uptime_intervals_between( + self, + t_start, + t_end): """Creates a (N,2)-shaped ndarray holding the on-time detector intervals between the given time range from t_start to t_end. @@ -182,16 +244,17 @@ def get_ontime_intervals_between(self, t_start, t_end): ontime_intervals : (N,2)-shaped ndarray The (N,2)-shaped ndarray holding the on-time detector intervals. """ - onoff_intervals = self._onoff_intervals + onoff_intervals = self._get_onoff_intervals() - (t_start_idx, t_end_idx) = self._get_onoff_interval_indices((t_start, t_end)) - if(t_start_idx % 2 == 0): + (t_start_idx, t_end_idx) = self._get_onoff_interval_indices( + (t_start, t_end)) + if t_start_idx % 2 == 0: # t_start is during off-time. Use the next on-time lower edge as # first on-time edge. t_start = onoff_intervals[t_start_idx] else: t_start_idx -= 1 - if(t_end_idx % 2 == 0): + if t_end_idx % 2 == 0: # t_end is during off-time. Use the previous on-time upper edge as # the last on-time edge. t_end = onoff_intervals[t_end_idx-1] @@ -206,27 +269,29 @@ def get_ontime_intervals_between(self, t_start, t_end): # Set the first and last on-time interval edges. ontime_intervals_flat[0] = t_start ontime_intervals_flat[-1] = t_end - if(N_ontime_intervals > 1): + if N_ontime_intervals > 1: # Fill also the interval edges of the intermediate on-time bins. ontime_intervals_flat[1:-1] = onoff_intervals[t_start_idx+1:t_end_idx-1] - ontime_intervals = np.reshape(ontime_intervals_flat, (N_ontime_intervals,2)) + ontime_intervals = np.reshape( + ontime_intervals_flat, + (N_ontime_intervals, 2)) return ontime_intervals - def get_ontime_upto(self, mjd): - """Calculates the cumulative detector on-time up to the given time. + def get_livetime_upto(self, mjd): + """Calculates the cumulative detector livetime up to the given time. Parameters ---------- mjd : float | array of floats - The time in MJD up to which the detector on-time should be + The time in MJD up to which the detector livetime should be calculated. Returns ------- - ontimes : float | ndarray of floats - The ndarray holding the cumulative detector on-time corresponding + livetimes : float | ndarray of floats + The ndarray holding the cumulative detector livetime corresponding to the the given MJD times. """ mjds = np.atleast_1d(mjd) @@ -248,17 +313,24 @@ def get_ontime_upto(self, mjd): # Create a cumulative on-time array with a leading 0 element for MJDs # prior to the first on-time interval. - ontime_bins = np.diff(self._uptime_mjd_intervals_arr).reshape((self.n_uptime_mjd_intervals,)) + ontime_bins = np.diff(self._uptime_mjd_intervals_arr).reshape( + (self.n_uptime_mjd_intervals,)) cum_ontime_bins = np.array([0], dtype=np.float64) cum_ontime_bins = np.append(cum_ontime_bins, np.cumsum(ontime_bins)) # For odd (on-time) mjds, use the cumulative value of the previous bin # and add the part of the interval bin up to the mjd value. - ontimes = np.where(odd_idxs_mask, cum_ontime_bins[idxs-1] + mjds - self._onoff_intervals[onoff_idxs-1], cum_ontime_bins[idxs]) + livetimes = np.where( + odd_idxs_mask, + cum_ontime_bins[idxs-1] + + mjds + - self._get_onoff_intervals()[onoff_idxs-1], + cum_ontime_bins[idxs]) - if(not issequence(mjd)): - return np.asscalar(ontimes) - return ontimes + if not issequence(mjd): + return np.asscalar(livetimes) + + return livetimes def is_on(self, mjd): """Checks if the detector is on at the given MJD time. MJD times @@ -286,7 +358,12 @@ def is_on(self, mjd): return is_on - def draw_ontimes(self, rss, size): + def draw_ontimes( + self, + rss, + size, + t_min=None, + t_max=None): """Draws random MJD times based on the detector on-time intervals. Parameters @@ -296,18 +373,39 @@ def draw_ontimes(self, rss, size): numbers from. size : int The number of random MJD times to generate. + t_min : float + The optional minimal time to consider. If set to ``None``, the + start time of this Livetime instance will be used. + t_max : float + The optional maximal time to consider. If set to ``None``, the + end time of this Livetime instance will be used. Returns ------- ontimes : ndarray The 1d array holding the generated MJD times. """ + uptime_intervals_arr = self._uptime_mjd_intervals_arr + + if t_min is not None or t_max is not None: + if t_min is None: + t_min = self.time_start + if t_max is None: + t_max = self.time_stop + + uptime_intervals_arr = self.get_uptime_intervals_between( + t_min, t_max) + + onoff_intervals = np.reshape( + uptime_intervals_arr, + (uptime_intervals_arr.size,)) + # Create bin array with only on-time bins. We have to mask out the # off-time bins. - ontime_bins = np.diff(self._onoff_intervals) + ontime_bins = np.diff(onoff_intervals) mask = np.invert( np.array( - np.linspace(0, ontime_bins.size-1, ontime_bins.size)%2, + np.linspace(0, ontime_bins.size-1, ontime_bins.size) % 2, dtype=np.bool_)) ontime_bins = ontime_bins[mask] @@ -327,8 +425,8 @@ def draw_ontimes(self, rss, size): L = cum_ontime_bins[-1] w = x*L idxs = np.digitize(w, cum_ontime_bins) - l = self._uptime_mjd_intervals_arr[:,0] + lower = uptime_intervals_arr[:, 0] y = w - cum_ontime_bins[idxs-1] - ontimes = l[idxs-1] + y + ontimes = lower[idxs-1] + y return ontimes diff --git a/skyllh/core/llhratio.py b/skyllh/core/llhratio.py index 6d9890a2ac..cb5a805e57 100644 --- a/skyllh/core/llhratio.py +++ b/skyllh/core/llhratio.py @@ -5,123 +5,171 @@ implement the pure math of the log-likelihood ratio function. """ -from __future__ import division - import abc import numpy as np -from skyllh.core.config import CFG -from skyllh.core.debugging import get_logger +from skyllh.core.config import ( + CFG, +) +from skyllh.core.debugging import ( + get_logger, +) from skyllh.core.py import ( classname, - int_cast, - issequence, issequenceof, - float_cast + float_cast, +) +from skyllh.core.source_hypo_grouping import ( + SourceHypoGroupManager, +) +from skyllh.core.trialdata import ( + TrialDataManager, ) -from skyllh.core.source_hypothesis import SourceHypoGroupManager -from skyllh.core.trialdata import TrialDataManager -from skyllh.core.detsigyield import DetSigYield from skyllh.core.minimizer import ( Minimizer, NR1dNsMinimizerImpl, - NRNsScan2dMinimizerImpl + NRNsScan2dMinimizerImpl, ) from skyllh.core.parameters import ( - SourceFitParameterMapper, - SingleSourceFitParameterMapper, - MultiSourceFitParameterMapper + ParameterModelMapper, ) from skyllh.core.pdfratio import ( PDFRatio, - SingleSourcePDFRatioArrayArithmetic ) -from skyllh.core.timing import TaskTimer -from skyllh.physics.source import SourceModel +from skyllh.core.services import ( + DatasetSignalWeightFactorsService, + SrcDetSigYieldWeightsService, +) +from skyllh.core.timing import ( + TaskTimer, +) logger = get_logger(__name__) -class LLHRatio(object, metaclass=abc.ABCMeta): +class LLHRatio( + object, + metaclass=abc.ABCMeta): """Abstract base class for a log-likelihood (LLH) ratio function. """ - def __init__(self, minimizer): + def __init__( + self, + pmm, + minimizer, + **kwargs): """Creates a new LLH ratio function instance. Parameters ---------- + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper providing the mapping of + global parameters to local parameters of individual models. minimizer : instance of Minimizer The Minimizer instance that should be used to minimize the negative of this log-likelihood ratio function. """ - super(LLHRatio, self).__init__() + super().__init__(**kwargs) + self.pmm = pmm self.minimizer = minimizer + @property + def pmm(self): + """The ParameterModelMapper instance providing the mapping of + global floating parameters to individual models. + """ + return self._pmm + + @pmm.setter + def pmm(self, mapper): + if not isinstance(mapper, ParameterModelMapper): + raise TypeError( + 'The pmm property must be an instance of ' + 'ParameterModelMapper! ' + f'Its current type is {classname(mapper)}.') + self._pmm = mapper + @property def minimizer(self): """The Minimizer instance used to minimize the negative of the log-likelihood ratio function. """ return self._minimizer + @minimizer.setter def minimizer(self, minimizer): - if(not isinstance(minimizer, Minimizer)): - raise TypeError('The minimizer property must be an instance ' - 'of Minimizer!') + if not isinstance(minimizer, Minimizer): + raise TypeError( + 'The minimizer property must be an instance of Minimizer! ' + f'Its current type is {classname(minimizer)}.') self._minimizer = minimizer - def initialize_for_new_trial(self, tl=None): + @abc.abstractmethod + def initialize_for_new_trial( + self, + tl=None, + **kwargs): """This method will be called by the Analysis class after new trial data has been initialized to the trial data manager. Derived classes can make use of this call hook to perform LLHRatio specific trial initialization. Parameters ---------- - tl : TimeLord | None - The optional TimeLord instance to use for timing measurements. + tl : instance of TimeLord | None + The optional instance of TimeLord to use for timing measurements. """ pass @abc.abstractmethod - def evaluate(self, fitparam_values, tl=None): + def evaluate( + self, + fitparam_values, + src_params_recarray=None, + tl=None): """This method evaluates the LLH ratio function for the given set of fit parameter values. Parameters ---------- - fitparam_values : numpy 1D ndarray - The ndarray holding the current values of the (global) fit - parameters. - tl : TimeLord | None - The optional TimeLord instance to use for measuring timeing. + fitparam_values : instance of numpy ndarray + The (N_fitparams,)-shaped numpy 1D ndarray holding the current + values of the global fit parameters. + src_params_recarray : instance of numpy record ndarray | None + The numpy record ndarray of length N_sources holding the parameter + names and values of all sources. If set to ``None`` it will be + created automatically from the ``fitparam_values`` array. + See the documentation of the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for more information about this array. + tl : instance of TimeLord | None + The optional instance of TimeLord to use for measuring timing. Returns ------- log_lambda : float The calculated log-lambda value. - grads : (N_fitparams,)-shaped 1D ndarray - The ndarray holding the gradient value for each (global) fit - parameter. + grads : instance of numpy ndarray + The (N_fitparams,)-shaped 1D numpy ndarray holding the gradient + value for each global fit parameter. """ pass - def maximize(self, rss, fitparamset, tl=None): + def maximize( + self, + rss, + tl=None): """Maximize the log-likelihood ratio function, by using the ``evaluate`` method. Parameters ---------- - rss : RandomStateService instance - The RandomStateService instance to draw random numbers from. + rss : instance of RandomStateService + The instance of RandomStateService to draw random numbers from. This is needed to generate random parameter initial values. - fitparamset : FitParameterSet instance - The instance of FitParameterSet holding the global fit parameter - definitions used in the maximization process. - tl : TimeLord instance | None - The optional TimeLord instance that should be used to time the + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to time the maximization process. Returns @@ -129,8 +177,9 @@ def maximize(self, rss, fitparamset, tl=None): log_lambda_max : float The (maximum) value of the log-likelihood ratio (log_lambda) function for the best fit parameter values. - fitparam_values : (N_fitparam)-shaped 1D ndarray - The ndarray holding the global fit parameter values. + fitparam_values : instance of numpy ndarray + The (N_fitparam,)-shaped 1D numpy ndarray holding the values of the + global fit parameters. status : dict The dictionary with status information about the maximization process, i.e. from the minimizer. @@ -139,46 +188,76 @@ def maximize(self, rss, fitparamset, tl=None): # Define the negative llhratio function, that will get minimized. self_evaluate = self.evaluate + def negative_llhratio_func(fitparam_values, func_stats, tl=None): + src_params_recarray = self._pmm.create_src_params_recarray( + fitparam_values) + func_stats['n_calls'] += 1 with TaskTimer(tl, 'Evaluate llh-ratio function.'): - (f, grads) = self_evaluate(fitparam_values, tl=tl) - if(tracing): logger.debug( - 'LLH-ratio func value f={:g}, grads={}'.format( - f, str(grads))) + (f, grads) = self_evaluate( + fitparam_values=fitparam_values, + src_params_recarray=src_params_recarray, + tl=tl) + if tracing: + logger.debug( + f'LLH-ratio func value f={f:g}, grads={str(grads)}') return (-f, -grads) - minimize_kwargs = {'func_provides_grads': True} + minimize_kwargs = { + 'func_provides_grads': True + } - func_stats = {'n_calls': 0} + func_stats = { + 'n_calls': 0 + } with TaskTimer(tl, 'Minimize -llhratio function.'): (fitparam_values, fmin, status) = self._minimizer.minimize( - rss, fitparamset, negative_llhratio_func, args=(func_stats,tl), + rss=rss, + paramset=self._pmm.global_paramset, + func=negative_llhratio_func, + args=(func_stats, tl), kwargs=minimize_kwargs) log_lambda_max = -fmin status['n_llhratio_func_calls'] = func_stats['n_calls'] logger.debug( - 'Maximized LLH ratio function with {:d} calls'.format( - status['n_llhratio_func_calls'])) + f'Maximized LLH ratio function with ' + f'{status["n_llhratio_func_calls"]:d} calls') return (log_lambda_max, fitparam_values, status) -class TCLLHRatio(LLHRatio, metaclass=abc.ABCMeta): +class TCLLHRatio( + LLHRatio, + metaclass=abc.ABCMeta): """Abstract base class for a log-likelihood (LLH) ratio function with two components, i.e. signal and background. """ - def __init__(self, minimizer, mean_n_sig_0): + def __init__( + self, + pmm, + minimizer, + mean_n_sig_0, + **kwargs): """Creates a new two-component LLH ratio function instance. Parameters ---------- + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper providing the mapping of + global floating parameters to individual models. + minimizer : instance of Minimizer + The Minimizer instance that should be used to minimize the negative + of this log-likelihood ratio function. mean_n_sig_0 : float The fixed mean number of signal events for the null-hypothesis. """ - super(TCLLHRatio, self).__init__(minimizer) + super().__init__( + pmm=pmm, + minimizer=minimizer, + **kwargs) self.mean_n_sig_0 = mean_n_sig_0 @@ -188,24 +267,42 @@ def mean_n_sig_0(self): null-hypothesis. """ return self._mean_n_sig_0 + @mean_n_sig_0.setter def mean_n_sig_0(self, v): - v = float_cast(v, 'The mean_n_sig_0 property must be castable to a ' - 'float value!') + v = float_cast( + v, + 'The mean_n_sig_0 property must be castable to a float value!') self._mean_n_sig_0 = v @abc.abstractmethod - def calculate_ns_grad2(self, fitparam_values): + def calculate_ns_grad2( + self, + ns, + ns_pidx, + src_params_recarray, + tl=None, + **kwargs): """This method is supposed to calculate the second derivative of the log-likelihood ratio function w.r.t. the fit parameter ns, the number of signal events in the data set. Parameters ---------- - fitparam_values : numpy (N_fitparams,)-shaped 1D ndarray - The ndarray holding the current values of the fit parameters. - By definition, the first element is the fit parameter for the number - of signal events, ns. + fitparam_values : instance of numpy ndarray + The (N_fitparams,)-shaped 1D numpy ndarray holding the current + values of the global fit parameters. + ns_pidx : int + The index of the global ns fit parameter. + src_params_recarray : instance of numpy record ndarray + The numpy record ndarray of length N_sources holding the parameter + names and values of all sources. + See the documentation of the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for more information about this array. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used for timing + measurements. Returns ------- @@ -215,7 +312,78 @@ def calculate_ns_grad2(self, fitparam_values): """ pass - def maximize(self, rss, fitparamset, tl=None): + def maximize_with_1d_newton_rapson_minimizer( + self, + rss, + tl=None): + """Maximizes this log-likelihood ratio function, by minimizing its + negative using a 1D Newton-Rapson minimizer. + + Parameters + ---------- + rss : instance of RandomStateService + The instance of RandomStateService that should be used to draw + random numbers from. It is used by the minimizer to generate random + fit parameter initial values. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to time the + maximization of the LLH-ratio function. + + Returns + ------- + log_lambda_max : float + The (maximum) value of the log-likelihood ratio (log_lambda) + function for the best fit parameter values. + fitparam_values : instance of numpy ndarray + The (N_fitparam,)-shaped 1D numpy ndarray holding the global + fit parameter values. + status : dict + The dictionary with status information about the maximization + process, i.e. from the minimizer. + """ + # Define the negative llhratio function, that will get minimized + # when using the Newton-Rapson 1D minimizer for llhratio functions + # depending solely on ns. + self__evaluate = self.evaluate + self__calculate_ns_grad2 = self.calculate_ns_grad2 + + ns_pidx = self._pmm.get_gflp_idx(name='ns') + + def negative_llhratio_func_nr1d_ns(fitparam_values, tl): + ns = fitparam_values[ns_pidx] + src_params_recarray = self._pmm.create_src_params_recarray( + fitparam_values) + with TaskTimer( + tl, + 'Evaluate llh-ratio function.'): + (f, grads) = self__evaluate( + fitparam_values=fitparam_values, + src_params_recarray=src_params_recarray, + tl=tl) + with TaskTimer( + tl, + 'Calculate 2nd derivative of llh-ratio function w.r.t. ns'): + grad2_ns = self__calculate_ns_grad2( + ns=ns, + ns_pidx=ns_pidx, + src_params_recarray=src_params_recarray, + tl=tl) + + return (-f, -grads[ns_pidx], -grad2_ns) + + (fitparam_values, fmin, status) = self._minimizer.minimize( + rss=rss, + paramset=self._pmm.global_paramset, + func=negative_llhratio_func_nr1d_ns, + args=(tl,)) + log_lambda_max = -fmin + + return (log_lambda_max, fitparam_values, status) + + def maximize( + self, + rss, + tl=None): """Maximizes this log-likelihood ratio function, by minimizing its negative. This method has a special implementation when a 1D Newton-Rapson @@ -224,15 +392,12 @@ def maximize(self, rss, fitparamset, tl=None): Parameters ---------- - rss : RandomStateService instance - The RandomStateService instance that should be used to draw random - numbers from. It is used by the minimizer to generate random + rss : instance of RandomStateService + The instance of RandomStateService that should be used to draw + random numbers from. It is used by the minimizer to generate random fit parameter initial values. - fitparamset : FitParameterSet instance - The instance of FitParameterSet holding the global fit parameter - definitions used in the maximization process. - tl : TimeLord instance | None - The optional TimeLord instance that should be used to time the + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to time the maximization of the LLH-ratio function. Returns @@ -240,136 +405,124 @@ def maximize(self, rss, fitparamset, tl=None): log_lambda_max : float The (maximum) value of the log-likelihood ratio (log_lambda) function for the best fit parameter values. - fitparam_values : (N_fitparam)-shaped 1D ndarray - The ndarray holding the global fit parameter values. + fitparam_values : instance of numpy ndarray + The (N_fitparam,)-shaped 1D numpy ndarray holding the global + fit parameter values. status : dict The dictionary with status information about the maximization process, i.e. from the minimizer. """ - if(isinstance(self._minimizer.minimizer_impl, NR1dNsMinimizerImpl) or - isinstance(self._minimizer.minimizer_impl, NRNsScan2dMinimizerImpl)): - # Define the negative llhratio function, that will get minimized - # when using the Newton-Rapson 1D minimizer for llhratio functions - # depending solely on ns. - self__evaluate = self.evaluate - self__calculate_ns_grad2 = self.calculate_ns_grad2 - def negative_llhratio_func_nr1d_ns(fitparam_values, tl): - with TaskTimer(tl, 'Evaluate llh-ratio function.'): - (f, grads) = self__evaluate(fitparam_values, tl=tl) - with TaskTimer(tl, 'Calculate 2nd derivative of llh-ratio ' - 'function w.r.t. ns'): - grad2_ns = self__calculate_ns_grad2(fitparam_values) - - return (-f, -grads[0], -grad2_ns) - - (fitparam_values, fmin, status) = self._minimizer.minimize( - rss, fitparamset, negative_llhratio_func_nr1d_ns, args=(tl,)) - log_lambda_max = -fmin + if isinstance(self._minimizer.minimizer_impl, NR1dNsMinimizerImpl) or\ + isinstance(self._minimizer.minimizer_impl, NRNsScan2dMinimizerImpl): + return self.maximize_with_1d_newton_rapson_minimizer( + rss=rss, + tl=tl) - return (log_lambda_max, fitparam_values, status) + return super().maximize( + rss=rss, + tl=tl) - return super(TCLLHRatio, self).maximize(rss, fitparamset, tl=tl) - -class SingleDatasetTCLLHRatio(TCLLHRatio, metaclass=abc.ABCMeta): +class SingleDatasetTCLLHRatio( + TCLLHRatio, + metaclass=abc.ABCMeta): """Abstract base class for a log-likelihood (LLH) ratio function with two components, i.e. signal and background, for a single data set. """ def __init__( - self, minimizer, src_hypo_group_manager, src_fitparam_mapper, tdm, - mean_n_sig_0): + self, + pmm, + minimizer, + shg_mgr, + tdm, + mean_n_sig_0, + **kwargs): """Creates a new two-component LLH ratio function instance for a single data set. Parameters ---------- + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper providing the mapping of + global floating parameters to individual models. minimizer : instance of Minimizer The Minimizer instance that should be used to minimize the negative of this log-likelihood ratio function. - src_hypo_group_manager : SourceHypoGroupManager instance + shg_mgr : SourceHypoGroupManager instance The SourceHypoGroupManager instance that defines the source - hypotheses. - src_fitparam_mapper : SourceFitParameterMapper - The instance of SourceFitParameterMapper defining the global fit - parameters and their mapping to the source fit parameters. - The order of the fit parameters defines the order of the fit values - during the maximization process of the log-likelihood-ratio - function. The names of the source fit parameters must coincide with - the signal fit parameter names of the PDF instances. + hypothesis groups. tdm : instance of TrialDataManager The instance of TrialDataManager that holds the trial event data and additional data fields for this LLH ratio function. mean_n_sig_0 : float The fixed mean number of signal events for the null-hypothesis. """ - super(SingleDatasetTCLLHRatio, self).__init__( - minimizer, mean_n_sig_0) + super().__init__( + pmm=pmm, + minimizer=minimizer, + mean_n_sig_0=mean_n_sig_0, + **kwargs) - self.src_hypo_group_manager = src_hypo_group_manager - self.src_fitparam_mapper = src_fitparam_mapper + self.shg_mgr = shg_mgr self.tdm = tdm # Calculate the data fields that solely depend on source parameters. - self._tdm.calculate_source_data_fields(src_hypo_group_manager) + self._tdm.calculate_source_data_fields( + shg_mgr=self._shg_mgr, + pmm=self._pmm) @property - def src_hypo_group_manager(self): + def shg_mgr(self): """The SourceHypoGroupManager instance that defines the source - hypotheses. + hypothesis groups. """ - return self._src_hypo_group_manager - @src_hypo_group_manager.setter - def src_hypo_group_manager(self, manager): - if(not isinstance(manager, SourceHypoGroupManager)): - raise TypeError('The src_hypo_group_manager property must be an ' - 'instance of SourceHypoGroupManager!') - self._src_hypo_group_manager = manager + return self._shg_mgr - @property - def src_fitparam_mapper(self): - """The SourceFitParameterMapper instance defining the global fit - parameters and their mapping to the source fit parameters. - """ - return self._src_fitparam_mapper - @src_fitparam_mapper.setter - def src_fitparam_mapper(self, mapper): - if(not isinstance(mapper, SourceFitParameterMapper)): - raise TypeError('The src_fitparam_mapper property must be an ' - 'instance of SourceFitParameterMapper!') - self._src_fitparam_mapper = mapper + @shg_mgr.setter + def shg_mgr(self, mgr): + if not isinstance(mgr, SourceHypoGroupManager): + raise TypeError( + 'The shg_mgr property must be an instance of ' + 'SourceHypoGroupManager! ' + f'Its current type is {classname(mgr)}.') + self._shg_mgr = mgr @property def tdm(self): - """The TrialDataManager instance that holds the trial event data and + """The instance of TrialDataManager that holds the trial event data and additional data fields for this LLH ratio function. """ return self._tdm - @tdm.setter - def tdm(self, manager): - if(not isinstance(manager, TrialDataManager)): - raise TypeError('The tdm property must be an instance of ' - 'TrialDataManager!') - self._tdm = manager - def change_source_hypo_group_manager(self, src_hypo_group_manager): + @tdm.setter + def tdm(self, mgr): + if not isinstance(mgr, TrialDataManager): + raise TypeError( + 'The tdm property must be an instance of TrialDataManager! ' + f'Its current type is {classname(mgr)}.') + self._tdm = mgr + + def change_shg_mgr(self, shg_mgr): """Changes the source hypothesis group manager of this two-component LLH ratio function. Parameters ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The new SourceHypoGroupManager instance. + shg_mgr : instance of SourceHypoGroupManager + The new instance of SourceHypoGroupManager. """ - self.src_hypo_group_manager = src_hypo_group_manager - self._tdm.change_source_hypo_group_manager(src_hypo_group_manager) + self.shg_mgr = shg_mgr + + self._tdm.change_shg_mgr( + shg_mgr=shg_mgr, + pmm=self._pmm) -class ZeroSigH0SingleDatasetTCLLHRatio(SingleDatasetTCLLHRatio): +class ZeroSigH0SingleDatasetTCLLHRatio( + SingleDatasetTCLLHRatio): """This class implements a two-component (TC) log-likelihood (LLH) ratio - function for a single data assuming zero signal for the null-hypothesis. - The log-likelihood-ratio function uses a list of independent PDF ratio - instances. + function for a single dataset assuming zero signal for the null-hypothesis. """ # The (1 + alpha)-threshold float value for which the log-likelihood ratio # function of a single event should get approximated by a Taylor expansion. @@ -380,73 +533,105 @@ class ZeroSigH0SingleDatasetTCLLHRatio(SingleDatasetTCLLHRatio): _one_plus_alpha = 1e-3 def __init__( - self, minimizer, src_hypo_group_manager, src_fitparam_mapper, tdm, - pdfratios): + self, + pmm, + minimizer, + shg_mgr, + tdm, + pdfratio, + **kwargs): """Constructor of the two-component log-likelihood ratio function. Parameters ---------- + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper providing the mapping of + global floating parameters to individual models. minimizer : instance of Minimizer The Minimizer instance that should be used to minimize the negative of this log-likelihood ratio function. - src_hypo_group_manager : SourceHypoGroupManager instance + shg_mgr : instance of SourceHypoGroupManager The SourceHypoGroupManager instance that defines the source - hypotheses. - src_fitparam_mapper : SourceFitParameterMapper - The instance of SourceFitParameterMapper defining the global fit - parameters and their mapping to the source fit parameters. - The order of the fit parameters defines the order of the fit values - during the maximization process of the log-likelihood-ratio - function. The names of the source fit parameters must coincide with - the signal fit parameter names of the PDF ratio instances. + hypothesis groups. tdm : instance of TrialDataManager The instance of TrialDataManager that holds the trial event data and additional data fields for this LLH ratio function. - pdfratios : sequence of PDFRatio - The sequence of PDFRatio instances. A PDFRatio instance might depend + pdfratio : instance of PDFRatio + The instance of PDFRatio. A PDFRatio instance might depend on none, one, or several fit parameters. """ - super(ZeroSigH0SingleDatasetTCLLHRatio, self).__init__( - minimizer, src_hypo_group_manager, src_fitparam_mapper, tdm, - mean_n_sig_0=0) - - self.pdfratio_list = pdfratios - - # Define cache variables for evaluate method to store values needed for - # a possible calculation of the second derivative w.r.t. ns of the + super().__init__( + pmm=pmm, + minimizer=minimizer, + shg_mgr=shg_mgr, + tdm=tdm, + mean_n_sig_0=0, + **kwargs) + + self.pdfratio = pdfratio + + # Define cache variable for the evaluate method to store values needed + # for a possible calculation of the second derivative w.r.t. ns of the # log-likelihood ratio function. - self._cache_fitparam_values = None self._cache_nsgrad_i = None @SingleDatasetTCLLHRatio.mean_n_sig_0.setter def mean_n_sig_0(self, v): SingleDatasetTCLLHRatio.mean_n_sig_0.fset(self, v) - if(self._mean_n_sig_0 != 0): - raise ValueError('The %s class is only valid for ' - 'mean_n_sig_0 = 0!'%(classname(self))) + if self._mean_n_sig_0 != 0: + raise ValueError( + f'The {classname(self)} class is only valid for ' + f'mean_n_sig_0 = 0!') @property - def pdfratio_list(self): - """The list of PDFRatio instances. + def pdfratio(self): + """The instance of PDFRatio. + """ + return self._pdfratio + + @pdfratio.setter + def pdfratio(self, r): + if not isinstance(r, PDFRatio): + raise TypeError( + 'The pdfratio property must be an instance of PDFRatio! ' + f'Its current type is {classname(r)}.') + self._pdfratio = r + + def initialize_for_new_trial( + self, + tl=None, + **kwargs): + """Initializes the log-likelihood ratio function for a new trial. + It calls the + :meth:`~skyllh.core.pdfratio.PDFRatio.initialize_for_new_trial` method + of the :class:`~skyllh.core.pdfratio.PDFRatio` class. + + Parameters + ---------- + tl : instance of TimeLord + The optional instance of TimeLord to measure timing information. """ - return self._pdfratio_list - @pdfratio_list.setter - def pdfratio_list(self, seq): - if(not issequenceof(seq, PDFRatio)): - raise TypeError('The pdfratio_list property must be a sequence of ' - 'PDFRatio instances!') - self._pdfratio_list = list(seq) - - def calculate_log_lambda_and_grads(self, fitparam_values, N, ns, Xi, dXi_ps): + self._pdfratio.initialize_for_new_trial( + tdm=self._tdm, + tl=tl, + **kwargs) + + def calculate_log_lambda_and_grads( + self, + N, + ns, + ns_pidx, + p_mask, + Xi, + dXi_dp): """Calculates the log(Lambda) value and its gradient for each global fit parameter. This calculation is source and detector independent. Parameters ---------- - fitparam_values : numpy (N_fitparams+1)-shaped 1D ndarray - The ndarray holding the current values of the fit parameters. - By definition, the first element is the fit parameter for the number - of signal events, ns. + fitparam_values : instance of numpy ndarray + The (N_fitparams,)-shaped ndarray holding the current values of the + global fit parameters. These numbers are used as cache key to validate the ``nsgrad_i`` values for the given fit parameter values for a possible later calculation of the second derivative w.r.t. ns of the log-likelihood @@ -454,30 +639,36 @@ def calculate_log_lambda_and_grads(self, fitparam_values, N, ns, Xi, dXi_ps): N : int The total number of events. ns : float - The current fit parameter value for ns. - Xi : numpy (n_selected_events,)-shaped 1D ndarray - The X value of each selected event. - dXi_ps : numpy (N_fitparams,n_selected_events)-shaped 2D ndarray - The derivative value for each fit parameter ps of each event's X - value. + The value of the global fit paramater ns. + ns_pidx : int + The index of the global fit parameter ns. + p_mask : instance of numpy ndarray + The (N_fitparam,)-shaped numpy ndarray of bool selecting all global + fit parameters, except ns. + Xi : instance of numpy ndarray + The (n_selected_events,)-shaped 1D numpy ndarray holding the X value + of each selected event. + dXi_dp : instance of numpy ndarray + The (n_selected_events, N_fitparams-1,)-shaped 2D ndarray holding + the derivative value for each fit parameter p (i.e. except ns) of + each event's X value. Returns ------- log_lambda : float The value of the log-likelihood ratio function. - grads : 1D numpy (N_fitparams+1,)-shaped ndarray - The gradient value of log_lambda for each fit parameter. - The first element is the gradient for ns. + grads : instance of numpy ndarray + The (N_fitparams,)-shaped numpy ndarray holding the gradient value + of log_lambda for each fit parameter. """ tracing = CFG['debugging']['enable_tracing'] # Get the number of selected events. Nprime = len(Xi) - if(tracing): + if tracing: logger.debug( - 'N={:d}, Nprime={:d}'.format( - N, Nprime)) + f'N={N:d}, Nprime={Nprime:d}') one_plus_alpha = ZeroSigH0SingleDatasetTCLLHRatio._one_plus_alpha @@ -486,56 +677,71 @@ def calculate_log_lambda_and_grads(self, fitparam_values, N, ns, Xi, dXi_ps): # Create a mask for events which have a stable non-diverging # log-function argument, and an inverted mask thereof. - stablemask = alpha_i > alpha - unstablemask = ~stablemask - if(tracing): + m_stable = alpha_i > alpha + m_unstable = ~m_stable + + if tracing: logger.debug( '# of events doing Taylor expansion for (unstable events): ' - '{:d}'.format( - np.count_nonzero(unstablemask))) + f'{np.count_nonzero(m_unstable):d}') # Allocate memory for the log_lambda_i values. log_lambda_i = np.empty_like(alpha_i, dtype=np.float64) # Calculate the log_lambda_i value for the numerical stable events. - log_lambda_i[stablemask] = np.log1p(alpha_i[stablemask]) + np.log1p(alpha_i, where=m_stable, out=log_lambda_i) + # Calculate the log_lambda_i value for the numerical unstable events. - tildealpha_i = (alpha_i[unstablemask] - alpha) / one_plus_alpha - log_lambda_i[unstablemask] = np.log1p(alpha) + tildealpha_i - 0.5*tildealpha_i**2 + tildealpha_i = (alpha_i[m_unstable] - alpha) / one_plus_alpha + log_lambda_i[m_unstable] =\ + np.log1p(alpha) + tildealpha_i - 0.5 * tildealpha_i**2 # Calculate the log_lambda value and account for pure background events. log_lambda = np.sum(log_lambda_i) + (N - Nprime)*np.log1p(-ns/N) # Calculate the gradient for each fit parameter. - grads = np.empty((dXi_ps.shape[0]+1,), dtype=np.float64) + grads = np.empty((dXi_dp.shape[1]+1,), dtype=np.float64) # Pre-calculate value that is used twice for the gradients of the # numerical stable events. - one_over_one_plus_alpha_i_stablemask = 1 / (1 + alpha_i[stablemask]) + one_over_one_plus_alpha_i_stablemask = 1 / (1 + alpha_i[m_stable]) # For ns. nsgrad_i = np.empty_like(alpha_i, dtype=np.float64) - nsgrad_i[stablemask] = Xi[stablemask] * one_over_one_plus_alpha_i_stablemask - nsgrad_i[unstablemask] = (1 - tildealpha_i)*Xi[unstablemask] / one_plus_alpha + nsgrad_i[m_stable] =\ + Xi[m_stable] * one_over_one_plus_alpha_i_stablemask + nsgrad_i[m_unstable] =\ + (1 - tildealpha_i) * Xi[m_unstable] / one_plus_alpha + # Cache the nsgrad_i values for a possible later calculation of the # second derivative w.r.t. ns of the log-likelihood ratio function. - # Note: We create a copy of the fitparam_values array here to make sure - # that the values don't get changed outside this method before the - # calculate_ns_grad2 method is called. - self._cache_fitparam_values = fitparam_values.copy() self._cache_nsgrad_i = nsgrad_i + # Calculate the first derivative w.r.t. ns. - grads[0] = np.sum(nsgrad_i) - (N - Nprime)/(N - ns) + grads[ns_pidx] = np.sum(nsgrad_i) - (N - Nprime) / (N - ns) + + # Now for each other fit parameter. - # For each other fit parameter. # For all numerical stable events. - grads[1:] = np.sum(ns * one_over_one_plus_alpha_i_stablemask * dXi_ps[:,stablemask], axis=1) + grads[p_mask] = np.sum( + ns * one_over_one_plus_alpha_i_stablemask[:, np.newaxis] * + dXi_dp[m_stable], + axis=0) + # For all numerical unstable events. - grads[1:] += np.sum(ns*(1 - tildealpha_i)*dXi_ps[:,unstablemask] / one_plus_alpha, axis=1) + grads[p_mask] += np.sum( + ns * (1 - tildealpha_i[:, np.newaxis]) * dXi_dp[m_unstable] / + one_plus_alpha, + axis=0) return (log_lambda, grads) - def calculate_ns_grad2(self, fitparam_values): + def calculate_ns_grad2( + self, + ns, + ns_pidx=None, + src_params_recarray=None, + tl=None): """Calculates the second derivative w.r.t. ns of the log-likelihood ratio function. This method tries to use cached values for the first derivative @@ -547,9 +753,23 @@ def calculate_ns_grad2(self, fitparam_values): Parameters ---------- fitparam_values : numpy (N_fitparams+1)-shaped 1D ndarray - The ndarray holding the current values of the fit parameters. - By definition, the first element is the fit parameter for the number - of signal events, ns. + The ndarray holding the current values of the global fit + parameters. + ns : float + The value of the global fit paramater ns. + ns_pidx : int + The parameter index of the global fit parameter ns. + For this particular class this is an ignored interface parameter. + src_params_recarray : instance of numpy record ndarray + The numpy record ndarray of length N_sources holding the parameter + names and values of all sources. + See the documentation of the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for more information about this array. + For this particular class this is an ignored interface parameter. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used for timing + measurements. Returns ------- @@ -557,14 +777,11 @@ def calculate_ns_grad2(self, fitparam_values): The second derivative w.r.t. ns of the log-likelihood ratio function for the given fit parameter values. """ - # Check if the cached nsgrad_i values match the given fitparam_values. - if((self._cache_fitparam_values is None) or - (not np.all(self._cache_fitparam_values == fitparam_values))): - # Calculate the cache values by evaluating the log-likelihood ratio - # function. - self.evaluate(fitparam_values) - - ns = fitparam_values[0] + if self._cache_nsgrad_i is None: + raise RuntimeError( + 'The evaluate method needs to be called before the ' + 'calculate_ns_grad2 method can be called!') + Nprime = self._tdm.n_selected_events N = Nprime + self._tdm.n_pure_bkg_events @@ -572,841 +789,120 @@ def calculate_ns_grad2(self, fitparam_values): return nsgrad2 - -class SingleSourceZeroSigH0SingleDatasetTCLLHRatio( - ZeroSigH0SingleDatasetTCLLHRatio): - """This class implements a 2-component, i.e. signal and background, - log-likelihood ratio function for a single data set. The - log-likelihood-ratio function assumes a zero signal null-hypothesis and uses - a list of independent PDFRatio instances assuming a single source. - """ - def __init__( - self, minimizer, src_hypo_group_manager, src_fitparam_mapper, tdm, - pdfratios): - """Constructor for creating a 2-component, i.e. signal and background, - log-likelihood ratio function assuming a single source. - - Parameters - ---------- - minimizer : instance of Minimizer - The Minimizer instance that should be used to minimize the negative - of this log-likelihood ratio function. - src_hypo_group_manager : SourceHypoGroupManager instance - The SourceHypoGroupManager instance that defines the source - hypotheses. - src_fitparam_mapper : SingleSourceFitParameterMapper - The instance of SingleSourceFitParameterMapper defining the global - fit parameters and their mapping to the source fit parameters. - The order of the fit parameters defines the order of the fit values - during the maximization process. - The names of the source fit parameters must coincide with the signal - fit parameter names of the PDF ratio objects. - tdm : instance of TrialDataManager - The instance of TrialDataManager that holds the trial event data and - additional data fields for this LLH ratio function. - pdfratios : list of PDFRatio - The list of PDFRatio instances. A PDFRatio instance might depend on - none, one, or several fit parameters. - """ - if(not isinstance(src_fitparam_mapper, SingleSourceFitParameterMapper)): - raise TypeError('The src_fitparam_mapper argument must be an ' - 'instance of SingleSourceFitParameterMapper!') - - super(SingleSourceZeroSigH0SingleDatasetTCLLHRatio, self).__init__( - minimizer, src_hypo_group_manager, src_fitparam_mapper, tdm, - pdfratios) - - # Construct a PDFRatio array arithmetic object specialized for a single - # source. This will pre-calculate the PDF ratio values for all PDF ratio - # instances, which do not depend on any fit parameters. - self._pdfratioarray = SingleSourcePDFRatioArrayArithmetic( - self._pdfratio_list, - self._src_fitparam_mapper.fitparamset.fitparam_list) - - def initialize_for_new_trial(self, tl=None): - """Initializes the log-likelihood ratio function for a new trial. - - Parameters - ---------- - tl : TimeLord | None - The optional TimeLord instance that should be used for timing - measurements. - """ - self._pdfratioarray.initialize_for_new_trial(self._tdm) - - def evaluate(self, fitparam_values, tl=None): + def evaluate( + self, + fitparam_values, + src_params_recarray=None, + tl=None): """Evaluates the log-likelihood ratio function for the given set of data events. Parameters ---------- - fitparam_values : numpy (N_fitparams+1)-shaped 1D ndarray - The ndarray holding the current values of the fit parameters. - By definition, the first element is the fit parameter for the number - of signal events, ns. - tl : TimeLord instance | None - The optional TimeLord instance to measure the timing of evaluating - the LLH ratio function. + fitparam_values : instance of numpy ndarray + The (N_fitparams,)-shaped 1D ndarray holding the current values of + the global fit parameters. + src_params_recarray : instance of numpy structured ndarray | None + The numpy record ndarray of length N_sources holding the local + parameter names and values of all sources. + If it is ``None``, it will be generated automatically from the + ``fitparam_values`` argument using the + :class:`~skyllh.core.parameters.ParameterModelMapper` instance. + tl : instance of TimeLord | None + The optional instance of TimeLord to measure the timing of + evaluating the LLH ratio function. Returns ------- log_lambda : float The calculated log-lambda value. - grads : (N_fitparams+1,)-shaped 1D ndarray - The ndarray holding the gradient value of log_lambda for each fit - parameter and ns. - The first element is the gradient for ns. + grads : instance of numpy ndarray + The (N_fitparams,)-shaped 1D numpy ndarray holding the gradient + value for each global fit parameter. """ tracing = CFG['debugging']['enable_tracing'] - # Define local variables to avoid (.)-lookup procedure. - tdm = self._tdm - pdfratioarray = self._pdfratioarray - - ns = fitparam_values[0] - - N = tdm.n_events + if src_params_recarray is None: + src_params_recarray = self._pmm.create_src_params_recarray( + gflp_values=fitparam_values + ) - # Create the fitparams dictionary with the fit parameter names and - # values. - with TaskTimer(tl, 'Create fitparams dictionary.'): - fitparams = self._src_fitparam_mapper.get_src_fitparams( - fitparam_values[1:]) + tdm = self._tdm - # Calculate the data fields that depend on fit parameter values. - with TaskTimer(tl, 'Calc fit param dep data fields.'): - tdm.calculate_fitparam_data_fields( - self._src_hypo_group_manager, fitparams) + ns_pidx = self._pmm.get_gflp_idx('ns') - # Calculate the PDF ratio values of all PDF ratio objects, which depend - # on any fit parameter. - with TaskTimer(tl, 'Calc pdfratio values.'): - pdfratioarray.calculate_pdfratio_values(tdm, fitparams, tl=tl) + ns = fitparam_values[ns_pidx] - # Calculate the product of all the PDF ratio values for each (selected) - # event. - with TaskTimer(tl, 'Calc pdfratio value product Ri'): - Ri = pdfratioarray.get_ratio_product() + N = tdm.n_events - # Calculate Xi for each (selected) event. + # Calculate the data fields that depend on global fit parameters. + if tdm.has_global_fitparam_data_fields: + with TaskTimer( + tl, + 'Calculate global fit parameter dependent data fields.'): + # Create the global_fitparams dictionary with the global fit + # parameter names and values. + global_fitparams = self._pmm.get_global_floating_params_dict( + gflp_values=fitparam_values) + tdm.calculate_global_fitparam_data_fields( + shg_mgr=self._shg_mgr, + pmm=self._pmm, + global_fitparams=global_fitparams) + + # Calculate the PDF ratio values for each selected event. + with TaskTimer(tl, 'Calc pdfratio value Ri'): + Ri = self._pdfratio.get_ratio( + tdm=tdm, + src_params_recarray=src_params_recarray, + tl=tl) + + # Calculate Xi for each selected event. Xi = (Ri - 1.) / N - if(tracing): - logger.debug('dtype(Xi)={:s}'.format(str(Xi.dtype))) - # Calculate the gradients of Xi for each fit parameter (without ns). - dXi_ps = np.empty((len(fitparam_values)-1,len(Xi)), dtype=np.float64) - for (idx, fitparam_value) in enumerate(fitparam_values[1:]): - fitparam_name = self._src_fitparam_mapper.get_src_fitparam_name(idx) - - dRi = np.zeros((len(Xi),), dtype=np.float64) - for (num_k) in np.arange(len(pdfratioarray._pdfratio_list)): - # Get the PDFRatio instance from which we need the derivative from. - pdfratio = pdfratioarray.get_pdfratio(num_k) - # Calculate the derivative of Ri. - dRi += pdfratio.get_gradient(tdm, fitparams, fitparam_name) * pdfratioarray.get_ratio_product(excluded_idx=num_k) - - # Calculate the derivative of Xi w.r.t. the fit parameter. - dXi_ps[idx] = dRi / N + n_fitparams = len(fitparam_values) - if(tracing): + # Calculate the gradients of Xi for each fit parameter (without ns). + dXi_dp = np.empty( + (Xi.shape[0], n_fitparams-1), + dtype=np.float64) + + # Create a mask that selects all fit parameters except ns. + p_mask = np.ones((n_fitparams,), dtype=np.bool_) + p_mask[ns_pidx] = False + + # Loop over the global fit parameters and calculate the derivative of + # Xi w.r.t. each fit paramater. + fitparam_ids = np.arange(n_fitparams) + for (idx, fitparam_id) in enumerate(fitparam_ids[p_mask]): + dRi = self._pdfratio.get_gradient( + tdm=tdm, + src_params_recarray=src_params_recarray, + fitparam_id=fitparam_id, + tl=tl) + + # Calculate the derivative of Xi w.r.t. the global fit parameter + # with ID fitparam_id. + dXi_dp[:, idx] = dRi / N + + if tracing: logger.debug( - '{:s}.evaluate: N={:d}, Nprime={:d}, ns={:.3f}, '.format( - classname(self), N, len(Xi), ns)) + f'{classname(self)}.evaluate: N={N}, Nprime={len(Xi)}, ' + f'ns={ns:.3f}') with TaskTimer(tl, 'Calc logLamds and grads'): (log_lambda, grads) = self.calculate_log_lambda_and_grads( - fitparam_values, N, ns, Xi, dXi_ps) + N=N, + ns=ns, + ns_pidx=ns_pidx, + p_mask=p_mask, + Xi=Xi, + dXi_dp=dXi_dp) return (log_lambda, grads) -class MultiSourceZeroSigH0SingleDatasetTCLLHRatio( - SingleSourceZeroSigH0SingleDatasetTCLLHRatio): - """This class implements a 2-component, i.e. signal and background, - log-likelihood ratio function for a single data set assuming zero signal for - the null-hypothesis. It uses a list of independent PDFRatio instances - assuming multiple sources (stacking). - """ - def __init__( - self, minimizer, src_hypo_group_manager, src_fitparam_mapper, tdm, - pdfratios, detsigyields): - """Constructor for creating a 2-component, i.e. signal and background, - log-likelihood ratio function assuming a single source. - - Parameters - ---------- - minimizer : instance of Minimizer - The Minimizer instance that should be used to minimize the negative - of this log-likelihood ratio function. - src_hypo_group_manager : SourceHypoGroupManager instance - The SourceHypoGroupManager instance that defines the source - hypotheses. - src_fitparam_mapper : SingleSourceFitParameterMapper - The instance of SingleSourceFitParameterMapper defining the global - fit parameters and their mapping to the source fit parameters. - The order of the fit parameters defines the order of the fit values - during the maximization process. - The names of the source fit parameters must coincide with the signal - fit parameter names of the PDF ratio objects. - tdm : instance of TrialDataManager - The instance of TrialDataManager that holds the trial event data and - additional data fields for this LLH ratio function. - pdfratios : list of PDFRatio - The list of PDFRatio instances. A PDFRatio instance might depend on - none, one, or several fit parameters. - detsigyields : (N_source_hypo_groups,)-shaped 1D ndarray of DetSigYield - instances - The collection of DetSigYield instances for each source hypothesis - group. - """ - if(not isinstance(src_fitparam_mapper, SingleSourceFitParameterMapper)): - raise TypeError('The src_fitparam_mapper argument must be an ' - 'instance of SingleSourceFitParameterMapper!') - - super(MultiSourceZeroSigH0SingleDatasetTCLLHRatio, self).__init__( - minimizer, src_hypo_group_manager, src_fitparam_mapper, tdm, - pdfratios) - - # Construct a PDFRatio array arithmetic object specialized for a single - # source. This will pre-calculate the PDF ratio values for all PDF ratio - # instances, which do not depend on any fit parameters. - self._pdfratioarray = SingleSourcePDFRatioArrayArithmetic( - self._pdfratio_list, - self._src_fitparam_mapper.fitparamset.fitparam_list) - - self._calc_source_weights = MultiPointSourcesRelSourceWeights( - src_hypo_group_manager, src_fitparam_mapper, detsigyields) - - def evaluate(self, fitparam_values, tl=None): - """Evaluates the log-likelihood ratio function for the given set of - data events. - - Parameters - ---------- - fitparam_values : numpy (N_fitparams+1)-shaped 1D ndarray - The ndarray holding the current values of the fit parameters. - By definition, the first element is the fit parameter for the number - of signal events, ns. - tl : TimeLord instance | None - The optional TimeLord instance to measure the timing of evaluating - the LLH ratio function. - - Returns - ------- - log_lambda : float - The calculated log-lambda value. - grads : (N_fitparams+1,)-shaped 1D ndarray - The ndarray holding the gradient value of log_lambda for each fit - parameter and ns. - The first element is the gradient for ns. - """ - _src_w, _src_w_grads = self._calc_source_weights( - fitparam_values) - self._tdm.get_data('src_array')['src_w'] = _src_w - if _src_w_grads is not None: - self._tdm.get_data('src_array')['src_w_grad'] = _src_w_grads.flatten() - else: - self._tdm.get_data('src_array')['src_w_grad'] = np.zeros_like(_src_w) - - (log_lambda, grads) = super( - MultiSourceZeroSigH0SingleDatasetTCLLHRatio, self).evaluate( - fitparam_values, tl) - - return (log_lambda, grads) - - -class DatasetSignalWeights(object, metaclass=abc.ABCMeta): - """Abstract base class for a dataset signal weight calculator class. - """ - def __init__( - self, src_hypo_group_manager, src_fitparam_mapper, detsigyields): - """Base class constructor. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The instance of the SourceHypoGroupManager managing the source - hypothesis groups. - src_fitparam_mapper : SourceFitParameterMapper - The SourceFitParameterMapper instance that defines the global fit - parameters and their mapping to the source fit parameters. - detsigyields : 2D (N_source_hypo_groups,N_datasets)-shaped ndarray of - DetSigYield instances - The collection of DetSigYield instances for each - dataset and source group combination. The detector signal yield - instances are used to calculate the dataset signal weight factors. - The order must follow the definition order of the log-likelihood - ratio functions, i.e. datasets, and the definition order of the - source hypothesis groups. - """ - self.src_hypo_group_manager = src_hypo_group_manager - self.src_fitparam_mapper = src_fitparam_mapper - self.detsigyield_arr = detsigyields - - if(self._detsigyield_arr.shape[0] != self._src_hypo_group_manager.n_src_hypo_groups): - raise ValueError('The detsigyields array must have the same number ' - 'of source hypothesis groups as the source hypothesis group ' - 'manager defines!') - - # Pre-convert the source list of each source hypothesis group into a - # source array needed for the detector signal yield evaluation. - # Since all the detector signal yield instances must be of the same - # kind for each dataset, we can just use the one of the first dataset of - # each source hypothesis group. - self._src_arr_list = self._create_src_arr_list( - self._src_hypo_group_manager, self._detsigyield_arr) - - def _create_src_arr_list(self, src_hypo_group_manager, detsigyield_arr): - """Pre-convert the source list of each source hypothesis group into a - source array needed for the detector signal yield evaluation. - Since all the detector signal yield instances must be of the same - kind for each dataset, we can just use the one of the first dataset of - each source hypothesis group. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The SourceHypoGroupManager instance defining the sources. - - detsigyield_arr : 2D (N_source_hypo_groups,N_datasets)-shaped ndarray of - DetSigYield instances - The collection of DetSigYield instances for each dataset and source - group combination. - Returns - ------- - src_arr_list : list of numpy record ndarrays - The list of the source numpy record ndarrays, one for each source - hypothesis group, which is needed by the detector signal yield - instance. - """ - src_arr_list = [] - for (gidx, src_hypo_group) in enumerate(src_hypo_group_manager.src_hypo_group_list): - src_arr_list.append( - detsigyield_arr[gidx,0].source_to_array(src_hypo_group.source_list) - ) - - return src_arr_list - - @property - def src_hypo_group_manager(self): - """The instance of SourceHypoGroupManager, which defines the source - hypothesis groups. - """ - return self._src_hypo_group_manager - @src_hypo_group_manager.setter - def src_hypo_group_manager(self, manager): - if(not isinstance(manager, SourceHypoGroupManager)): - raise TypeError('The src_hypo_group_manager property must be an ' - 'instance of SourceHypoGroupManager!') - self._src_hypo_group_manager = manager - - @property - def src_fitparam_mapper(self): - """The SourceFitParameterMapper instance defining the global fit - parameters and their mapping to the source fit parameters. - """ - return self._src_fitparam_mapper - @src_fitparam_mapper.setter - def src_fitparam_mapper(self, mapper): - if(not isinstance(mapper, SourceFitParameterMapper)): - raise TypeError('The src_fitparam_mapper property must be an ' - 'instance of SourceFitParameterMapper!') - self._src_fitparam_mapper = mapper - - @property - def detsigyield_arr(self): - """The 2D (N_source_hypo_groups,N_datasets)-shaped ndarray of - DetSigYield instances. - """ - return self._detsigyield_arr - @detsigyield_arr.setter - def detsigyield_arr(self, detsigyields): - if(not isinstance(detsigyields, np.ndarray)): - raise TypeError('The detsigyield_arr property must be an instance ' - 'of numpy.ndarray!') - if(detsigyields.ndim != 2): - raise ValueError('The detsigyield_arr property must be a ' - 'numpy.ndarray with 2 dimensions!') - if(not issequenceof(detsigyields.flat, DetSigYield)): - raise TypeError('The detsigyield_arr property must contain ' - 'DetSigYield instances, one for each source hypothesis group ' - 'and dataset combination!') - self._detsigyield_arr = detsigyields - - @property - def n_datasets(self): - """(read-only) The number of datasets this DatasetSignalWeights instance - is for. - """ - return self._detsigyield_arr.shape[1] - - def change_source_hypo_group_manager(self, src_hypo_group_manager): - """Changes the SourceHypoGroupManager instance of this - DatasetSignalWeights instance. This will also recreate the internal - source numpy record arrays needed for the detector signal efficiency - calculation. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The new SourceHypoGroupManager instance, that should be used for - this dataset signal weights instance. - """ - self.src_hypo_group_manager = src_hypo_group_manager - self._src_arr_list = self._create_src_arr_list( - self._src_hypo_group_manager, self._detsigyield_arr) - - @abc.abstractmethod - def __call__(self, fitparam_values): - """This method is supposed to calculate the dataset signal weights and - their gradients. - - Parameters - ---------- - fitparam_values : (N_fitparams+1,)-shaped 1D numpy ndarray - The ndarray holding the current values of the fit parameters. - The first element of that array is, by definition, the number of - signal events, ns. - - Returns - ------- - f : (N_datasets,)-shaped 1D ndarray - The dataset signal weight factor for each dataset. - f_grads : (N_datasets,N_fitparams)-shaped 2D ndarray - The gradients of the dataset signal weight factors, one for each - fit parameter. - """ - pass - - -class SingleSourceDatasetSignalWeights(DatasetSignalWeights): - """This class calculates the dataset signal weight factors for each dataset - assuming a single source. - """ - def __init__( - self, src_hypo_group_manager, src_fitparam_mapper, detsigyields): - """Constructs a new DatasetSignalWeights instance assuming a single - source. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The instance of the SourceHypoGroupManager managing the source - hypothesis groups. - src_fitparam_mapper : SingleSourceFitParameterMapper - The instance of SingleSourceFitParameterMapper defining the global - fit parameters and their mapping to the source fit parameters. - detsigyields : 2D (N_source_hypo_groups,N_datasets)-shaped ndarray of - DetSigYield instances - The collection of DetSigYield instances for each - dataset and source group combination. The detector signal yield - instances are used to calculate the dataset signal weight factors. - The order must follow the definition order of the log-likelihood - ratio functions, i.e. datasets, and the definition order of the - source hypothesis groups. - """ - - if(not isinstance(src_fitparam_mapper, SingleSourceFitParameterMapper)): - raise TypeError('The src_fitparam_mapper argument must be an ' - 'instance of SingleSourceFitParameterMapper!') - - # Convert sequence into a 2D numpy array. - detsigyields = np.atleast_2d(detsigyields) - - super(SingleSourceDatasetSignalWeights, self).__init__( - src_hypo_group_manager, src_fitparam_mapper, detsigyields) - - def __call__(self, fitparam_values): - """Calculates the dataset signal weight and its fit parameter gradients - for each dataset. - - Parameters - ---------- - fitparam_values : (N_fitparams+1,)-shaped 1D numpy ndarray - The ndarray holding the current values of the fit parameters. - The first element of that array is, by definition, the number of - signal events, ns. - - Returns - ------- - f : (N_datasets,)-shaped 1D ndarray - The dataset signal weight factor for each dataset. - f_grads : (N_datasets,N_fitparams)-shaped 2D ndarray | None - The gradients of the dataset signal weight factors, one for each - fit parameter. None is returned if there are no fit parameters - beside ns. - """ - fitparams_arr = self._src_fitparam_mapper.get_fitparams_array(fitparam_values[1:]) - - N_datasets = self.n_datasets - N_fitparams = self._src_fitparam_mapper.n_global_fitparams - - Y = np.empty((N_datasets,), dtype=np.float64) - if(N_fitparams > 0): - Y_grads = np.empty((N_datasets, N_fitparams), dtype=np.float64) - - # Loop over the detector signal efficiency instances for the first and - # only source hypothesis group. - for (j, detsigyield) in enumerate(self._detsigyield_arr[0]): - (Yj, Yj_grads) = detsigyield(self._src_arr_list[0], fitparams_arr) - # Store the detector signal yield and its fit parameter - # gradients for the first and only source (element 0). - Y[j] = Yj[0] - if(N_fitparams > 0): - if Yj_grads is None: - Y_grads[j] = np.zeros_like(Yj[0]) - else: - Y_grads[j] = Yj_grads[0] - - # sumj_Y is a scalar. - sumj_Y = np.sum(Y, axis=0) - - # f is a (N_datasets,)-shaped 1D ndarray. - f = Y/sumj_Y - - # f_grads is a (N_datasets, N_fitparams)-shaped 2D ndarray. - if(N_fitparams > 0): - # sumj_Y_grads is a (N_fitparams,)-shaped 1D array. - sumj_Y_grads = np.sum(Y_grads, axis=0) - f_grads = (Y_grads*sumj_Y - Y[...,np.newaxis]*sumj_Y_grads) / sumj_Y**2 - else: - f_grads = None - - return (f, f_grads) - - -class MultiSourceDatasetSignalWeights(SingleSourceDatasetSignalWeights): - """This class calculates the dataset signal weight factors for each dataset - assuming multiple sources. - """ - def __init__( - self, src_hypo_group_manager, src_fitparam_mapper, detsigyields): - """Constructs a new DatasetSignalWeights instance assuming multiple - sources. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The instance of the SourceHypoGroupManager managing the source - hypothesis groups. - src_fitparam_mapper : SingleSourceFitParameterMapper - The instance of SingleSourceFitParameterMapper defining the global - fit parameters and their mapping to the source fit parameters. - detsigyields : 2D (N_source_hypo_groups,N_datasets)-shaped ndarray of - DetSigYield instances - The collection of DetSigYield instances for each - dataset and source group combination. The detector signal yield - instances are used to calculate the dataset signal weight factors. - The order must follow the definition order of the log-likelihood - ratio functions, i.e. datasets, and the definition order of the - source hypothesis groups. - """ - - if(not isinstance(src_fitparam_mapper, SingleSourceFitParameterMapper)): - raise TypeError('The src_fitparam_mapper argument must be an ' - 'instance of SingleSourceFitParameterMapper!') - - super(MultiSourceDatasetSignalWeights, self).__init__( - src_hypo_group_manager, src_fitparam_mapper, detsigyields) - - def __call__(self, fitparam_values): - """Calculates the dataset signal weight and its fit parameter gradients - for each dataset. - - Parameters - ---------- - fitparam_values : (N_fitparams+1,)-shaped 1D numpy ndarray - The ndarray holding the current values of the fit parameters. - The first element of that array is, by definition, the number of - signal events, ns. - - Returns - ------- - f : (N_datasets,)-shaped 1D ndarray - The dataset signal weight factor for each dataset. - f_grads : (N_datasets,N_fitparams)-shaped 2D ndarray | None - The gradients of the dataset signal weight factors, one for each - fit parameter. None is returned if there are no fit parameters - beside ns. - """ - fitparams_arr = self._src_fitparam_mapper.get_fitparams_array(fitparam_values[1:]) - - N_datasets = self.n_datasets - N_fitparams = self._src_fitparam_mapper.n_global_fitparams - - Y = np.empty((N_datasets, len(self._src_arr_list[0])), dtype=np.float64) - if(N_fitparams > 0): - Y_grads = np.empty((N_datasets, len(self._src_arr_list[0]), N_fitparams), dtype=np.float64) - - # Loop over the detector signal efficiency instances for the first and - # only source hypothesis group. - for (k, detsigyield_k) in enumerate(self._detsigyield_arr): - for (j, detsigyield) in enumerate(detsigyield_k): - (Yj, Yj_grads) = detsigyield(self._src_arr_list[k], fitparams_arr) - # Store the detector signal yield and its fit parameter - # gradients for the first and only source (element 0). - Y[j] = Yj - if(N_fitparams > 0): - Y_grads[j] = Yj_grads.T - - sum_Y = np.sum(Y) - - # f is a (N_datasets,)-shaped 1D ndarray. - f = np.sum(Y, axis=1) / sum_Y - - # f_grads is a (N_datasets, N_fitparams)-shaped 2D ndarray. - if(N_fitparams > 0): - # sum_Y_grads is a (N_datasets, N_fitparams,)-shaped 2D array. - sum_Y_grads = np.sum(Y_grads, axis=1) - f_grads = (sum_Y_grads*sum_Y - (f*sum_Y)[...,np.newaxis]*np.sum(sum_Y_grads, axis=0)) / sum_Y**2 - else: - f_grads = None - - return (f, f_grads) - - -class SourceWeights(object, metaclass=abc.ABCMeta): - """Abstract base class for a source weight calculator class. - """ - def __init__( - self, src_hypo_group_manager, src_fitparam_mapper, detsigyields): - """Constructs a new SourceWeights instance. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The instance of the SourceHypoGroupManager managing the source - hypothesis groups. - src_fitparam_mapper : SourceFitParameterMapper - The SourceFitParameterMapper instance that defines the global fit - parameters and their mapping to the source fit parameters. - detsigyields : (N_source_hypo_groups,)-shaped 1D ndarray of DetSigYield - instances - The collection of DetSigYield instances for each source hypothesis - group. - """ - self.src_hypo_group_manager = src_hypo_group_manager - self.src_fitparam_mapper = src_fitparam_mapper - self.detsigyield_arr = np.atleast_1d(detsigyields) - - if(self._detsigyield_arr.shape[0] != self._src_hypo_group_manager.n_src_hypo_groups): - raise ValueError('The detsigyields array must have the same number ' - 'of source hypothesis groups as the source hypothesis group ' - 'manager defines!') - - # Pre-convert the source list of each source hypothesis group into a - # source array needed for the detector signal yield evaluation. - # Since all the detector signal yield instances must be of the same - # kind for each dataset, we can just use the one of the first dataset of - # each source hypothesis group. - self._src_arr_list = self._create_src_arr_list( - self._src_hypo_group_manager, self._detsigyield_arr) - - def _create_src_arr_list(self, src_hypo_group_manager, detsigyield_arr): - """Pre-convert the source list of each source hypothesis group into a - source array needed for the detector signal yield evaluation. - Since all the detector signal yield instances must be of the same - kind for each dataset, we can just use the one of the first dataset of - each source hypothesis group. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The SourceHypoGroupManager instance defining the sources. - - detsigyield_arr : (N_source_hypo_groups,)-shaped 1D ndarray of - DetSigYield instances - The collection of DetSigYield instances for each source hypothesis - group. - Returns - ------- - src_arr_list : list of numpy record ndarrays - The list of the source numpy record ndarrays, one for each source - hypothesis group, which is needed by the detector signal yield - instance. - """ - src_arr_list = [] - for (gidx, src_hypo_group) in enumerate(src_hypo_group_manager.src_hypo_group_list): - src_arr_list.append( - detsigyield_arr[gidx].source_to_array(src_hypo_group.source_list) - ) - - return src_arr_list - - @property - def src_hypo_group_manager(self): - """The instance of SourceHypoGroupManager, which defines the source - hypothesis groups. - """ - return self._src_hypo_group_manager - @src_hypo_group_manager.setter - def src_hypo_group_manager(self, manager): - if(not isinstance(manager, SourceHypoGroupManager)): - raise TypeError('The src_hypo_group_manager property must be an ' - 'instance of SourceHypoGroupManager!') - self._src_hypo_group_manager = manager - - @property - def src_fitparam_mapper(self): - """The SourceFitParameterMapper instance defining the global fit - parameters and their mapping to the source fit parameters. - """ - return self._src_fitparam_mapper - @src_fitparam_mapper.setter - def src_fitparam_mapper(self, mapper): - if(not isinstance(mapper, SourceFitParameterMapper)): - raise TypeError('The src_fitparam_mapper property must be an ' - 'instance of SourceFitParameterMapper!') - self._src_fitparam_mapper = mapper - - @property - def detsigyield_arr(self): - """The (N_source_hypo_groups,)-shaped 1D ndarray of DetSigYield - instances. - """ - return self._detsigyield_arr - @detsigyield_arr.setter - def detsigyield_arr(self, detsigyields): - if(not isinstance(detsigyields, np.ndarray)): - raise TypeError('The detsigyield_arr property must be an instance ' - 'of numpy.ndarray!') - if(detsigyields.ndim != 1): - raise ValueError('The detsigyield_arr property must be a ' - 'numpy.ndarray with 1 dimensions!') - if(not issequenceof(detsigyields.flat, DetSigYield)): - raise TypeError('The detsigyield_arr property must contain ' - 'DetSigYield instances, one for each source hypothesis group!') - self._detsigyield_arr = detsigyields - - def change_source_hypo_group_manager(self, src_hypo_group_manager): - """Changes the SourceHypoGroupManager instance of this - DatasetSignalWeights instance. This will also recreate the internal - source numpy record arrays needed for the detector signal efficiency - calculation. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The new SourceHypoGroupManager instance, that should be used for - this dataset signal weights instance. - """ - self.src_hypo_group_manager = src_hypo_group_manager - self._src_arr_list = self._create_src_arr_list( - self._src_hypo_group_manager, self._detsigyield_arr) - - @abc.abstractmethod - def __call__(self, fitparam_values): - """This method is supposed to calculate source weights and - their gradients. - - Parameters - ---------- - fitparam_values : (N_fitparams+1,)-shaped 1D numpy ndarray - The ndarray holding the current values of the fit parameters. - The first element of that array is, by definition, the number of - signal events, ns. - - Returns - ------- - f : (N_sources,)-shaped 1D ndarray - The source weight factor for each source. - f_grads : (N_sources,)-shaped 1D ndarray | None - The gradients of the source weight factors. None is returned if - there are no fit parameters beside ns. - """ - pass - - -class MultiPointSourcesRelSourceWeights(SourceWeights): - """This class calculates the relative source weights for a group of point - sources. - """ - def __init__( - self, src_hypo_group_manager, src_fitparam_mapper, detsigyields): - """Constructs a new MultiPointSourcesRelSourceWeights instance assuming - multiple sources. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The instance of the SourceHypoGroupManager managing the source - hypothesis groups. - src_fitparam_mapper : SingleSourceFitParameterMapper - The instance of SingleSourceFitParameterMapper defining the global - fit parameters and their mapping to the source fit parameters. - detsigyields : (N_source_hypo_groups,)-shaped 1D ndarray of - DetSigYield instances - The collection of DetSigYield instances for each source hypothesis - group. - """ - - if(not isinstance(src_fitparam_mapper, SingleSourceFitParameterMapper)): - raise TypeError('The src_fitparam_mapper argument must be an ' - 'instance of SingleSourceFitParameterMapper!') - - super(MultiPointSourcesRelSourceWeights, self).__init__( - src_hypo_group_manager, src_fitparam_mapper, detsigyields) - - def __call__(self, fitparam_values): - """Calculates the source weights and its fit parameter gradients - for each source. - - Parameters - ---------- - fitparam_values : (N_fitparams+1,)-shaped 1D numpy ndarray - The ndarray holding the current values of the fit parameters. - The first element of that array is, by definition, the number of - signal events, ns. - - Returns - ------- - f : (N_sources,)-shaped 1D ndarray - The source weight factor for each source. - f_grads : (N_sources,)-shaped 1D ndarray | None - The gradients of the source weight factors. None is returned if - there are no fit parameters beside ns. - """ - fitparams_arr = self._src_fitparam_mapper.get_fitparams_array(fitparam_values[1:]) - - N_fitparams = self._src_fitparam_mapper.n_global_fitparams - - Y = [] - Y_grads = [] - - # Loop over detector signal efficiency instances for each source - # hypothesis group in source hypothesis group manager. - for (g, detsigyield) in enumerate(self._detsigyield_arr): - (Yg, Yg_grads) = detsigyield(self._src_arr_list[g], fitparams_arr) - - # Store the detector signal yield and its fit parameter - # gradients for all sources. - Y.append(Yg) - if(N_fitparams > 0): - Y_grads.append(Yg_grads.T) - - Y = np.array(Y) - sum_Y = np.sum(Y) - - # f is a (N_sources,)-shaped 1D ndarray. - f = Y / sum_Y - - # Flatten the array so that each relative weight corresponds to specific - # source. - f = f.flatten() - - if(N_fitparams > 0): - Y_grads = np.concatenate(Y_grads) - - # Sum over fit parameter gradients axis. - # f_grads is a (N_sources,)-shaped 1D ndarray. - f_grads = np.sum(Y_grads, axis=1) / sum_Y - else: - f_grads = None - - return (f, f_grads) - - -class MultiDatasetTCLLHRatio(TCLLHRatio): +class MultiDatasetTCLLHRatio( + TCLLHRatio): """This class describes a two-component log-likelihood ratio function for multiple datasets. The final log-likelihood ratio value is the sum of the individual log-likelihood ratio values. @@ -1419,52 +915,90 @@ class MultiDatasetTCLLHRatio(TCLLHRatio): By mathematical definition this class is suitable for single and multi source hypotheses. """ - def __init__(self, minimizer, dataset_signal_weights, llhratios): + def __init__( + self, + pmm, + minimizer, + src_detsigyield_weights_service, + ds_sig_weight_factors_service, + llhratio_list, + **kwargs): """Creates a new composite two-component log-likelihood ratio function. Parameters ---------- + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper providing the mapping of + global floating parameters to individual models. minimizer : instance of Minimizer The Minimizer instance that should be used to minimize the negative of this log-likelihood ratio function. - dataset_signal_weights : DatasetSignalWeights - An instance of DatasetSignalWeights, which calculates the relative - dataset weight factors. - llhratios : sequence of SingleDatasetTCLLHRatio instances - The sequence of the two-component log-likelihood ratio functions, + src_detsigyield_weights_service : instance of SrcDetSigYieldWeightsService + An instance of SrcDetSigYieldWeightsService, which provides the + roduct of the source weights with the detector signal yield. + ds_sig_weight_factors_service : instance of DatasetSignalWeightFactorsService + An instance of DatasetSignalWeightFactorsService, which provides + the relative dataset signal weight factors. + llhratio_list : list of instance of SingleDatasetTCLLHRatio + The list of the two-component log-likelihood ratio functions, one for each dataset. """ - self.dataset_signal_weights = dataset_signal_weights - self.llhratio_list = llhratios - - super(MultiDatasetTCLLHRatio, self).__init__( - minimizer, self._llhratio_list[0].mean_n_sig_0) - - # Check if the number of datasets the DatasetSignalWeights instance is - # made for equals the number of log-likelihood ratio functions. - if(self.dataset_signal_weights.n_datasets != len(self._llhratio_list)): - raise ValueError('The number of datasets the DatasetSignalWeights ' + if not issequenceof(llhratio_list, SingleDatasetTCLLHRatio): + raise TypeError( + 'The llhratio_list argument must be a sequence of ' + 'SingleDatasetTCLLHRatio instances! ' + f'Its current type is {classname(llhratio_list)}.') + self._llhratio_list = list(llhratio_list) + + super().__init__( + pmm=pmm, + minimizer=minimizer, + mean_n_sig_0=self._llhratio_list[0].mean_n_sig_0, + **kwargs) + + self.src_detsigyield_weights_service = src_detsigyield_weights_service + self.ds_sig_weight_factors_service = ds_sig_weight_factors_service + + if ( + self.ds_sig_weight_factors_service.n_datasets + != len(self._llhratio_list) + ): + raise ValueError( + 'The number of datasets the DatasetSignalWeightFactorsService ' 'instance is made for must be equal to the number of ' 'log-likelihood ratio functions!') - # Define cache variable for the dataset signal weight factors, which - # will be needed when calculating the second derivative w.r.t. ns of the - # log-likelihood ratio function. - self._cache_fitparam_values_ns = None - self._cache_f = None + @property + def src_detsigyield_weights_service(self): + """The instance of SrcDetSigYieldWeightsService, which provides the + product of the source weights with the detector signal yield. + """ + return self._src_detsigyield_weights_service + + @src_detsigyield_weights_service.setter + def src_detsigyield_weights_service(self, service): + if not isinstance(service, SrcDetSigYieldWeightsService): + raise TypeError( + 'The src_detsigyield_weights_service property must be an ' + 'instance of SrcDetSigYieldWeightsService! ' + f'Its current type is {classname(service)}.') + self._src_detsigyield_weights_service = service @property - def dataset_signal_weights(self): - """The DatasetSignalWeights instance that provides the relative dataset - weight factors. + def ds_sig_weight_factors_service(self): + """The instance of DatasetSignalWeightFactorsService that provides the + relative dataset signal weight factors. """ - return self._dataset_signal_weights - @dataset_signal_weights.setter - def dataset_signal_weights(self, obj): - if(not isinstance(obj, DatasetSignalWeights)): - raise TypeError('The dataset_signal_weights property must be an ' - 'instance of DatasetSignalWeights!') - self._dataset_signal_weights = obj + return self._ds_sig_weight_factors_service + + @ds_sig_weight_factors_service.setter + def ds_sig_weight_factors_service(self, service): + if not isinstance(service, DatasetSignalWeightFactorsService): + raise TypeError( + 'The ds_sig_weight_factors_service property must be an ' + 'instance of DatasetSignalWeightFactorsService! ' + f'Its current type is {classname(service)}.') + self._ds_sig_weight_factors_service = service @property def llhratio_list(self): @@ -1472,11 +1006,14 @@ def llhratio_list(self): composite log-likelihood-ratio function. """ return self._llhratio_list + @llhratio_list.setter def llhratio_list(self, llhratios): - if(not issequenceof(llhratios, SingleDatasetTCLLHRatio)): - raise TypeError('The llhratio_list property must be a sequence of ' - 'SingleDatasetTCLLHRatio instances!') + if not issequenceof(llhratios, SingleDatasetTCLLHRatio): + raise TypeError( + 'The llhratio_list property must be a sequence of ' + 'SingleDatasetTCLLHRatio instances! ' + f'Its current type is {classname(llhratios)}.') self._llhratio_list = list(llhratios) @property @@ -1495,63 +1032,115 @@ def mean_n_sig_0(self, v): for llhratio in self._llhratio_list: llhratio.mean_n_sig_0 = self._mean_n_sig_0 - def change_source_hypo_group_manager(self, src_hypo_group_manager): + def change_shg_mgr(self, shg_mgr): """Changes the source hypo group manager of all objects of this LLH - ratio function, hence, calling the `change_source_hypo_group_manager` - method of all TCLLHRatio objects of this LLHRatio instance. + ratio function, hence, calling the ``change_shg_mgr`` + method of all TCLLHRatio instances of this LLHRatio instance. + + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager that defines the groups of + source hypotheses. """ - # Change the source hypo group manager of the DatasetSignalWeights - # instance. - self._dataset_signal_weights.change_source_hypo_group_manager( - src_hypo_group_manager) + self._src_detsigyield_weights_service.change_shg_mgr( + shg_mgr=shg_mgr) + + self._ds_sig_weight_factors_service.change_shg_mgr( + shg_mgr=shg_mgr) for llhratio in self._llhratio_list: - llhratio.change_source_hypo_group_manager(src_hypo_group_manager) + llhratio.change_shg_mgr( + shg_mgr=shg_mgr) - def initialize_for_new_trial(self, tl=None): + def initialize_for_new_trial( + self, + tl=None, + **kwargs): """Initializes the log-likelihood-ratio function for a new trial. + It calls the + :meth:`~skyllh.core.llhratio.LLHRatio.initialize_for_new_trial` method + of the :class:`~skyllh.core.llhratio.LLHRatio` class of each individual + log-likelihood ratio function. + + Parameters + ---------- + tl : instance of TimeLord + The optional instance of TimeLord to measure timing information. """ for llhratio in self._llhratio_list: - llhratio.initialize_for_new_trial(tl=tl) - - def evaluate(self, fitparam_values, tl=None): + llhratio.initialize_for_new_trial( + tl=tl, + **kwargs) + + def evaluate( + self, + fitparam_values, + src_params_recarray=None, + tl=None): """Evaluates the composite log-likelihood-ratio function and returns its value and global fit parameter gradients. Parameters ---------- - fitparam_values : (N_fitparams)-shaped numpy 1D ndarray - The ndarray holding the current values of the global fit parameters. - The first element of that array is, by definition, the number of - signal events, ns. + fitparam_values : instance of numpy ndarray + The (N_fitparams,)-shaped numpy 1D ndarray holding the current + values of the global fit parameters. + src_params_recarray : instance of numpy record ndarray | None + The numpy record ndarray of length N_sources holding the parameter + names and values of all sources. + See the documentation of the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for more information about this array. + It case it is ``None``, it will be created automatically from the + ``fitparam_values`` argument using the + :class:`~skyllh.core.parameters.ParameterModelMapper` instance. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used for timing + measurements. Returns ------- log_lambda : float The calculated log-lambda value of the composite log-likelihood-ratio function. - grads : (N_fitparams,)-shaped 1D ndarray - The ndarray holding the gradient value of the composite - log-likelihood-ratio function for ns and each global fit parameter. - By definition the first element is the gradient for ns. + grads : instance of numpy ndarray + The (N_fitparams,)-shaped 1D ndarray holding the gradient value of + the composite log-likelihood-ratio function for each global fit + parameter. """ tracing = CFG['debugging']['enable_tracing'] - ns = fitparam_values[0] - if(tracing): + if src_params_recarray is None: + src_params_recarray = self._pmm.create_src_params_recarray( + gflp_values=fitparam_values + ) + + n_fitparams = len(fitparam_values) + + ns_pidx = self._pmm.get_gflp_idx('ns') + + ns = fitparam_values[ns_pidx] + if tracing: logger.debug( - '{:s}.evaluate: ns={:.3f}'.format( - classname(self), ns)) + f'{classname(self)}.evaluate: ns={ns:.3f}') + + # We need to calculate the source detsigyield weights and the dataset + # signal weight factors. + self._src_detsigyield_weights_service.calculate( + src_params_recarray=src_params_recarray) + self._ds_sig_weight_factors_service.calculate() # Get the dataset signal weights and their gradients. # f is a (N_datasets,)-shaped 1D ndarray. - # f_grads is a (N_datasets,N_fitparams)-shaped 2D ndarray. - (f, f_grads) = self._dataset_signal_weights(fitparam_values) + # f_grads is a dictionary holding (N_datasets,)-shaped 1D ndarrays for + # each global fit parameter. + (f, f_grads_dict) = self._ds_sig_weight_factors_service.get_weights() - # Cache f for possible later calculation of the second derivative w.r.t. - # ns of the log-likelihood ratio function. - self._cache_fitparam_values_ns = ns - self._cache_f = f + # Convert the f_grads dictionary into a (N_datasets,N_fitparams) + f_grads = np.zeros((len(f), n_fitparams), dtype=np.float64) + for pidx in f_grads_dict.keys(): + f_grads[:, pidx] = f_grads_dict[pidx] nsf = ns * f @@ -1563,47 +1152,74 @@ def evaluate(self, fitparam_values, tl=None): # Allocate an array for the gradients of the composite log-likelihood # function. It is always at least one element long, i.e. the gradient # for ns. - grads = np.zeros((len(fitparam_values),), dtype=np.float64) + grads = np.zeros((n_fitparams,), dtype=np.float64) # Create an array holding the fit parameter values for a particular # llh ratio function. Since we need to adjust ns with nsj it's more # efficient to create this array once and use it within the for loop # over the llh ratio functions. - llhratio_fitparam_values = np.empty( - (len(fitparam_values),), dtype=np.float64) + llhratio_fitparam_values = fitparam_values.copy() + + pmask = np.ones((n_fitparams,), dtype=np.bool_) + pmask[ns_pidx] = False + # Loop over the llh ratio functions. for (j, llhratio) in enumerate(self._llhratio_list): - if(tracing): + if tracing: logger.debug( - 'nsf[j={:d}] = {:.3f}'.format( - j, nsf[j])) - llhratio_fitparam_values[0] = nsf[j] - llhratio_fitparam_values[1:] = fitparam_values[1:] + f'nsf[j={j}] = {nsf[j]:.3f}') + + llhratio_fitparam_values[ns_pidx] = nsf[j] + (log_lambda_j, grads_j) = llhratio.evaluate( - llhratio_fitparam_values, tl=tl) + fitparam_values=llhratio_fitparam_values, + src_params_recarray=src_params_recarray, + tl=tl) log_lambda += log_lambda_j + # Gradient for ns. - grads[0] += grads_j[0] * f[j] + grads[ns_pidx] += grads_j[ns_pidx] * f[j] + # Gradient for each global fit parameter, if there are any. - if(len(grads) > 1): - grads[1:] += grads_j[0] * ns * f_grads[j] + grads_j[1:] + if len(grads) > 1: + ns_summand = grads_j[ns_pidx] * ns * f_grads[j][pmask] + grads[pmask] += ns_summand + grads_j[pmask] return (log_lambda, grads) - def calculate_ns_grad2(self, fitparam_values): + def calculate_ns_grad2( + self, + ns, + ns_pidx, + src_params_recarray, + tl=None): """Calculates the second derivative w.r.t. ns of the log-likelihood ratio function. - This method tries to use cached values for the dataset signal weight - factors. If cached values don't exist or do not match the given fit - parameter values, they will get calculated automatically by calling the - evaluate method with the given fit parameter values. + + Note:: + + This method takes the dataset signal weight factors from the dataset + signal weight factors service. Hence, the service needs to be + updated before calling this method. Parameters ---------- - fitparam_values : numpy (N_fitparams+1)-shaped 1D ndarray - The ndarray holding the current values of the fit parameters. - By definition, the first element is the fit parameter for the number - of signal events, ns. + fitparam_values : instance of numpy ndarray + The (N_fitparams,)-shaped 1D ndarray holding the current values of + the global fit parameters. + ns : float + The value of the global fit paramater ns. + ns_pidx : int + The index of the global parameter ns. + src_params_recarray : instance of numpy record ndarray + The numpy record ndarray of length N_sources holding the parameter + names and values of all sources. + See the documentation of the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for more information about this array. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used for timing + measurements. Returns ------- @@ -1611,52 +1227,55 @@ def calculate_ns_grad2(self, fitparam_values): The second derivative w.r.t. ns of the log-likelihood ratio function for the given fit parameter values. """ - ns = fitparam_values[0] - - # Check if the cached fit parameters match the given ones. The ns value - # is special to the multi-dataset LLH ratio function, but all the other - # fit parameters are shared by all the LLH ratio functions of the - # different datasets. So those we just query from the first LLH ratio - # function. - if((self._cache_fitparam_values_ns is None) or - (self._cache_fitparam_values_ns != ns) or - (not np.all(self._llhratio_list[0]._cache_fitparam_values[1:] == fitparam_values[1:]))): - self.evaluate(fitparam_values) + (f, f_grads_dict) = self._ds_sig_weight_factors_service.get_weights() - nsf = ns * self._cache_f + nsf = ns * f nsgrad2j = np.empty((len(self._llhratio_list),), dtype=np.float64) - # Loop over the llh ratio functions and their second derivative. - llhratio_fitparam_values = np.empty( - (len(fitparam_values),), dtype=np.float64) + + # Loop over the llh ratio functions and calculate their second + # derivative. for (j, llhratio) in enumerate(self._llhratio_list): - llhratio_fitparam_values[0] = nsf[j] - llhratio_fitparam_values[1:] = fitparam_values[1:] - nsgrad2j[j] = llhratio.calculate_ns_grad2(llhratio_fitparam_values) + nsgrad2j[j] = llhratio.calculate_ns_grad2( + ns=nsf[j], + ns_pidx=ns_pidx, + src_params_recarray=src_params_recarray, + tl=tl) - nsgrad2 = np.sum(nsgrad2j * self._cache_f**2) + nsgrad2 = np.sum(nsgrad2j * f**2) return nsgrad2 -class NsProfileMultiDatasetTCLLHRatio(TCLLHRatio): +class NsProfileMultiDatasetTCLLHRatio( + TCLLHRatio): r"""This class implements a profile log-likelihood ratio function that has only ns as fit parameter. It uses a MultiDatasetTCLLHRatio instance as log-likelihood function. Hence, mathematically it is .. math:: - \Lambda(n_s) = \frac{L(n_s)}{L(n_s=n_{s,0})}, + \Lambda(n_{\mathrm{s}}) = \frac{L(n_{\mathrm{s}})}{L(n_{\mathrm{s}}=n_{\mathrm{s},0})}, - where :math:`n_{s,0}` is the fixed mean number of signal events for the - null-hypothesis. + where :math:`n_{\mathrm{s},0}` is the fixed mean number of signal events for + the null-hypothesis. """ - def __init__(self, minimizer, mean_n_sig_0, llhratio): - """Creates a new ns-profile log-likelihood-ratio function with a - null-hypothesis where ns is fixed to `mean_n_sig_0`. + def __init__( + self, + pmm, + minimizer, + mean_n_sig_0, + llhratio, + **kwargs): + r"""Creates a new ns-profile log-likelihood-ratio function with a + null-hypothesis where :math:`n_{\mathrm{s}}` is fixed to + ``mean_n_sig_0``. Parameters ---------- + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper providing the mapping of + global parameters to local parameters of individual models. minimizer : instance of Minimizer The Minimizer instance that should be used to minimize the negative of this log-likelihood ratio function. @@ -1667,20 +1286,21 @@ def __init__(self, minimizer, mean_n_sig_0, llhratio): The instance of MultiDatasetTCLLHRatio, which should be used as log-likelihood function. """ - super(NsProfileMultiDatasetTCLLHRatio, self).__init__( - minimizer, mean_n_sig_0) + super().__init__( + pmm=pmm, + minimizer=minimizer, + mean_n_sig_0=mean_n_sig_0, + **kwargs) self.llhratio = llhratio - # Check that the given log-likelihood-ratio function has no fit - # parameters, i.e. only ns in the end. - for sub_llhratio in llhratio.llhratio_list: - n_global_fitparams = sub_llhratio.src_fitparam_mapper.n_global_fitparams - if(n_global_fitparams != 0): - raise ValueError('The log-likelihood-ratio functions of the ' - 'MultiDatasetTCLLHRatio instance must have no global fit ' - 'parameters, i.e. only ns in the end! Currently it has %d ' - 'global fit parameters'%(n_global_fitparams)) + if self._pmm.n_global_floating_params != 1: + raise ValueError( + 'The log-likelihood-ratio function implemented by ' + f'{classname(self)} provides functionality only for LLH ' + 'function with a single global fit parameter! Currently there ' + f'are {pmm.n_global_floating_params} global fit parameters ' + 'defined!') # Define a member to hold the constant null-hypothesis log-likelihood # function value for ns=mean_n_sig_0. @@ -1692,72 +1312,122 @@ def llhratio(self): log-likelihood function. """ return self._llhratio + @llhratio.setter def llhratio(self, obj): - if(not isinstance(obj, MultiDatasetTCLLHRatio)): - raise TypeError('The llhratio property must be an instance of ' - 'MultiDatasetTCLLHRatio!') + if not isinstance(obj, MultiDatasetTCLLHRatio): + raise TypeError( + 'The llhratio property must be an instance of ' + 'MultiDatasetTCLLHRatio! ' + f'Its current type is {classname(obj)}.') self._llhratio = obj - def change_source_hypo_group_manager(self, src_hypo_group_manager): + def change_shg_mgr( + self, + shg_mgr): """Changes the source hypo group manager of all objects of this LLH - ratio function, hence, calling the `change_source_hypo_group_manager` + ratio function, hence, calling the ``change_shg_mgr`` method of the underlaying MultiDatasetTCLLHRatio instance of this LLHRatio instance. + + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager + The new instance of SourceHypoGroupManager. """ - self._llhratio.change_source_hypo_group_manager(src_hypo_group_manager) + self._llhratio.change_shg_mgr(shg_mgr=shg_mgr) - def initialize_for_new_trial(self, tl=None): + def initialize_for_new_trial( + self, + tl=None, + **kwargs): """Initializes the log-likelihood-ratio function for a new trial. Parameters ---------- - tl : TimeLord | None - The optional TimeLord instance that should be used for timing + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used for timing measurements. """ - self._llhratio.initialize_for_new_trial(tl=tl) + self._llhratio.initialize_for_new_trial( + tl=tl, + **kwargs) # Compute the constant log-likelihood function value for the # null-hypothesis. fitparam_values_0 = np.array([self._mean_n_sig_0], dtype=np.float64) - (self._logL_0, grads_0) = self._llhratio.evaluate(fitparam_values_0) - - def evaluate(self, fitparam_values): + (self._logL_0, grads_0) = self._llhratio.evaluate( + fitparam_values=fitparam_values_0, + tl=tl) + + def evaluate( + self, + fitparam_values, + src_params_recarray=None, + tl=None): """Evaluates the log-likelihood-ratio function and returns its value and global fit parameter gradients. Parameters ---------- - fitparam_values : (N_fitparams)-shaped numpy 1D ndarray - The ndarray holding the current values of the global fit parameters. - The first element of that array is, by definition, the number of - signal events, ns. + fitparam_values : instance of numpy ndarray + The (1,)-shaped numpy 1D ndarray holding the current + values of the global fit parameters. + By definition of this LLH ratio function, it must contain the single + fit paramater value for ns. + src_params_recarray : instance of numpy record ndarray + The numpy record ndarray of length N_sources holding the parameter + names and values of all sources. + See the documentation of the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for more information about this array. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used for timing + measurements. Returns ------- log_lambda : float The calculated log-lambda value of this log-likelihood-ratio function. - grads : (N_fitparams,)-shaped 1D ndarray + grads : (1,)-shaped 1D ndarray The ndarray holding the gradient value of this log-likelihood-ratio for ns. - By definition the first element is the gradient for ns. """ - (logL, grads) = self._llhratio.evaluate(fitparam_values) + (logL, grads) = self._llhratio.evaluate( + fitparam_values=fitparam_values, + src_params_recarray=src_params_recarray, + tl=tl) - return (logL - self._logL_0, grads) + log_lambda = logL - self._logL_0 - def calculate_ns_grad2(self, fitparam_values): + return (log_lambda, grads) + + def calculate_ns_grad2( + self, + ns, + ns_pidx, + src_params_recarray, + tl=None): """Calculates the second derivative w.r.t. ns of the log-likelihood ratio function. Parameters ---------- - fitparam_values : numpy (N_fitparams+1)-shaped 1D ndarray - The ndarray holding the current values of the fit parameters. - By definition, the first element is the fit parameter for the number - of signal events, ns. + ns : float + The value of the global fit paramater ns. + ns_pidx : int + The index of the global fit paramater ns. By definition this must + be ``0``. + src_params_recarray : instance of numpy record ndarray + The numpy record ndarray of length N_sources holding the parameter + names and values of all sources. + See the documentation of the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for more information about this array. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used for timing + measurements. Returns ------- @@ -1765,29 +1435,15 @@ def calculate_ns_grad2(self, fitparam_values): The second derivative w.r.t. ns of the log-likelihood ratio function for the given fit parameter values. """ - return self._llhratio.calculate_ns_grad2(fitparam_values) - -#class NestedProfileLLHRatio(LLHRatio, metaclass=abc.ABCMeta): - #r"""This class provides the abstract base class for a nested profile - #log-likelihood ratio function, which is, by definition, of the form - - #.. math:: + if ns_pidx != 0: + raise ValueError( + 'The value of the ns_pidx argument must be 0! ' + f'Its current value is {ns_pidx}.') - #\Lambda = \frac{\sup_{\Theta_0} L(\theta|D)}{\sup_{\Theta} L(\theta|D)} - - #where :math:`\theta` are the possible fit parameters, and :math:`\Theta` - #and :math:`\Theta_0` are the total and nested fit parameter spaces, - #respectively. - #""" - - #def __init__(self, ): - #super(NestedProfileLLHRatio, self).__init__() - - -#class MultiDatasetNestedProfileLLHRatio(NestedProfileLLHRatio): - #"""This class provides a nested profile log-likelihood ratio function for - #multiple data sets. - #""" - #def __init__(self): - #super(MultiDatasetNestedProfileLLHRatio, self).__init__() + nsgrad2 = self._llhratio.calculate_ns_grad2( + ns=ns, + ns_pidx=ns_pidx, + src_params_recarray=src_params_recarray, + tl=tl) + return nsgrad2 diff --git a/skyllh/core/math.py b/skyllh/core/math.py index d4ed441bfc..a39c1ac27e 100644 --- a/skyllh/core/math.py +++ b/skyllh/core/math.py @@ -1,22 +1,25 @@ # -*- coding: utf-8 -*- -"""The `math` module contains classes for pure mathematical objects. +"""The ``math`` module contains classes for pure mathematical objects. """ -from __future__ import division - import abc -from copy import deepcopy +from copy import ( + deepcopy, +) +import numpy as np from skyllh.core.py import ( classname, isproperty, issequence, - issequenceof + issequenceof, ) -class MathFunction(object, metaclass=abc.ABCMeta): +class MathFunction( + object, + metaclass=abc.ABCMeta): """This abstract base class provides an implementation for a mathematical function. Such a function has defined parameters, which are implemented as class properties. The tuple of parameter names is defined through the @@ -33,23 +36,26 @@ def param_names(self): """The tuple holding the names of the math function's parameters. """ return self._param_names + @param_names.setter def param_names(self, names): - if(not issequence(names)): + if not issequence(names): names = (names,) - if(not issequenceof(names, str)): - raise TypeError('The param_names property must be a sequence of ' - 'str!') + if not issequenceof(names, str): + raise TypeError( + 'The param_names property must be a sequence of str!') names = tuple(names) # Check if all the given names are actual properties of this # MathFunction class. for name in names: - if(not hasattr(self, name)): - raise KeyError('The "%s" class does not have an attribute ' - 'named "%s"!'%(classname(self), name)) - if(not isproperty(self, name)): - raise TypeError('The attribute "%s" of "%s" is not a ' - 'property!'%(classname(self), name)) + if not hasattr(self, name): + raise KeyError( + f'The "{classname(self)}" class does not have an attribute ' + f'named "{name}"!') + if not isproperty(self, name): + raise TypeError( + f'The attribute "{classname(self)}" of "{name}" is not a ' + 'property!') self._param_names = names @property @@ -64,7 +70,9 @@ def __str__(self): """ return self.math_function_str - def copy(self, newparams=None): + def copy( + self, + newparams=None): """Copies this MathFunction object by calling the copy.deepcopy function, and sets new parameters if requested. @@ -78,12 +86,37 @@ def copy(self, newparams=None): f = deepcopy(self) # Set the new parameter values. - if(newparams is not None): + if newparams is not None: f.set_params(newparams) return f - def set_params(self, pdict): + def get_param( + self, + name): + """Retrieves the value of the given parameter. It returns ``np.nan`` if + the parameter does not exist. + + Parameters + ---------- + name : str + The name of the parameter. + + Returns + ------- + value : float | np.nan + The value of the parameter. + """ + if name not in self._param_names: + return np.nan + + value = getattr(self, name) + + return value + + def set_params( + self, + pdict): """Sets the parameters of the math function to the given parameter values. @@ -98,15 +131,16 @@ def set_params(self, pdict): updated : bool Flag if parameter values were actually updated. """ - if(not isinstance(pdict, dict)): - raise TypeError('The pdict argument must be of type dict!') + if not isinstance(pdict, dict): + raise TypeError( + 'The pdict argument must be of type dict!') updated = False for pname in self._param_names: current_value = getattr(self, pname) pvalue = pdict.get(pname, current_value) - if(pvalue != current_value): + if pvalue != current_value: setattr(self, pname, pvalue) updated = True diff --git a/skyllh/core/minimizer.py b/skyllh/core/minimizer.py index cf28c3d68e..5c68ab2136 100644 --- a/skyllh/core/minimizer.py +++ b/skyllh/core/minimizer.py @@ -6,16 +6,18 @@ import logging import numpy as np import scipy.optimize -from typing import Optional, Dict, Any, List -from skyllh.core.parameters import FitParameterSet +from skyllh.core.parameters import ParameterSet from skyllh.core.py import classname logger = logging.getLogger(__name__) -class MinimizerImpl(object, metaclass=abc.ABCMeta): +class MinimizerImpl( + object, + metaclass=abc.ABCMeta +): """Abstract base class for a minimizer implementation. It defines the interface between the implementation and the Minimizer class. """ @@ -120,7 +122,9 @@ def is_repeatable(self, status): pass -class ScipyMinimizerImpl(MinimizerImpl): +class ScipyMinimizerImpl( + MinimizerImpl +): """Wrapper for `scipy.optimize.minimize`""" def __init__(self, method: str) -> None: @@ -154,6 +158,7 @@ def minimize(self, initials, bounds, func, func_args=None, **kwargs): Additional Keyword Arguments ---------------------------- + Additional keyword arguments include options for this minimizer implementation. Possible options are: @@ -161,6 +166,7 @@ def minimize(self, initials, bounds, func, func_args=None, **kwargs): Flag if the function ``func`` also returns its gradients. Default is ``True``. + Any additional keyword arguments are passed on to the underlaying :func:`scipy.optimize.minimize` minimization function. @@ -177,13 +183,13 @@ def minimize(self, initials, bounds, func, func_args=None, **kwargs): method_supports_bounds = False - constraints: Optional[List[Dict[str, Any]]] + # constraints: List[Dict[str, Any]] constraints = None # Check if method allows for bounds - if(self._method in ["L-BFGS-B", "TNC", "SLSQP"]): + if self._method in ["L-BFGS-B", "TNC", "SLSQP"]: method_supports_bounds = True - elif(self._method == "COBYLA"): + elif self._method == "COBYLA": # COBYLA doesn't allow for bounds, but we can convert bounds # to a linear constraint @@ -198,16 +204,15 @@ def minimize(self, initials, bounds, func, func_args=None, **kwargs): constraints.append(uc) bounds = None - if((bounds is not None) and (not method_supports_bounds)): + if (bounds is not None) and (not method_supports_bounds): logger.warn( - "Selected minimization method ({}) does not " - "support bounds. Continue at your own risk.".format( - self._method)) + f'Selected minimization method ({self._method}) does not ' + 'support bounds. Continue at your own risk.') bounds = None - if(func_args is None): + if func_args is None: func_args = tuple() - if(kwargs is None): + if kwargs is None: kwargs = {} func_provides_grads = kwargs.pop('func_provides_grads', True) @@ -218,7 +223,7 @@ def minimize(self, initials, bounds, func, func_args=None, **kwargs): bounds=bounds, constraints=constraints, args=func_args, - jac = func_provides_grads, + jac=func_provides_grads, **kwargs) return (res.x, res.fun, res) @@ -279,12 +284,20 @@ def is_repeatable(self, status): return False -class LBFGSMinimizerImpl(MinimizerImpl): +class LBFGSMinimizerImpl( + MinimizerImpl +): """The LBFGSMinimizerImpl class provides the minimizer implementation for L-BFG-S minimizer used from the :mod:`scipy.optimize` module. """ - def __init__(self, ftol=1e-6, pgtol=1e-5, maxls=100): + def __init__( + self, + ftol=1e-6, + pgtol=1e-5, + maxls=100, + **kwargs, + ): """Creates a new L-BGF-S minimizer instance to minimize the given likelihood function with its given partial derivatives. @@ -297,7 +310,7 @@ def __init__(self, ftol=1e-6, pgtol=1e-5, maxls=100): maxls : int The maximum number of line search steps for an interation. """ - super(LBFGSMinimizerImpl, self).__init__() + super().__init__(**kwargs) self._ftol = ftol self._pgtol = pgtol @@ -305,7 +318,14 @@ def __init__(self, ftol=1e-6, pgtol=1e-5, maxls=100): self._fmin_l_bfgs_b = scipy.optimize.fmin_l_bfgs_b - def minimize(self, initials, bounds, func, func_args=None, **kwargs): + def minimize( + self, + initials, + bounds, + func, + func_args=None, + **kwargs, + ): """Minimizes the given function ``func`` with the given initial function argument values ``initials``. @@ -339,6 +359,7 @@ def minimize(self, initials, bounds, func, func_args=None, **kwargs): Flag if the function ``func`` also returns its gradients. Default is ``True``. + Any additional keyword arguments are passed on to the underlaying :func:`scipy.optimize.fmin_l_bfgs_b` minimization function. @@ -361,16 +382,16 @@ def minimize(self, initials, bounds, func, func_args=None, **kwargs): 0: The minimization converged. """ - if(func_args is None): + if func_args is None: func_args = tuple() - if(kwargs is None): + if kwargs is None: kwargs = {} - if('factr' not in kwargs): + if 'factr' not in kwargs: kwargs['factr'] = self._ftol / np.finfo(float).eps - if('pgtol' not in kwargs): + if 'pgtol' not in kwargs: kwargs['pgtol'] = self._pgtol - if('maxls' not in kwargs): + if 'maxls' not in kwargs: kwargs['maxls'] = self._maxls func_provides_grads = kwargs.pop('func_provides_grads', True) @@ -385,7 +406,10 @@ def minimize(self, initials, bounds, func, func_args=None, **kwargs): return (xmin, fmin, status) - def get_niter(self, status): + def get_niter( + self, + status, + ): """Returns the number of iterations needed to find the minimum. Parameters @@ -401,7 +425,10 @@ def get_niter(self, status): """ return status['nit'] - def has_converged(self, status): + def has_converged( + self, + status, + ): """Analyzes the status information dictionary if the minimization process has converged. By definition the minimization process has converged if ``status['warnflag']`` equals 0. @@ -417,11 +444,14 @@ def has_converged(self, status): converged : bool The flag if the minimization has converged (True), or not (False). """ - if(status['warnflag'] == 0): + if status['warnflag'] == 0: return True return False - def is_repeatable(self, status): + def is_repeatable( + self, + status, + ): """Checks if the minimization process can be repeated to get a better result. It's repeatable if @@ -439,25 +469,32 @@ def is_repeatable(self, status): The flag if the minimization process can be repeated to obtain a better minimum. """ - if(status['warnflag'] == 2): + if status['warnflag'] == 2: task = str(status['task']) - if('FACTR' in task): + if 'FACTR' in task: return True - if('ABNORMAL_TERMINATION_IN_LNSRCH' in task): + if 'ABNORMAL_TERMINATION_IN_LNSRCH' in task: # This is causes most probably by starting the minimization at # a parameter boundary. return True return False -class NR1dNsMinimizerImpl(MinimizerImpl): +class NR1dNsMinimizerImpl( + MinimizerImpl +): """The NR1dNsMinimizerImpl class provides a minimizer implementation for the Newton-Raphson method for finding the minimum of a one-dimensional R1->R1 function, i.e. a function that depends solely on one parameter, the number of signal events ns. """ - def __init__(self, ns_tol=1e-3, max_steps=100): + def __init__( + self, + ns_tol=1e-3, + max_steps=100, + **kwargs, + ): """Creates a new NRNs minimizer instance to minimize the given likelihood function with its given partial derivatives. @@ -469,12 +506,19 @@ def __init__(self, ns_tol=1e-3, max_steps=100): The maximum number of NR steps. If max_step is reached, the fit is considered NOT converged. """ - super(NR1dNsMinimizerImpl, self).__init__() + super().__init__(**kwargs) self.ns_tol = ns_tol self.max_steps = max_steps - def minimize(self, initials, bounds, func, func_args=None, **kwargs): + def minimize( # noqa: C901 + self, + initials, + bounds, + func, + func_args=None, + **kwargs, + ): """Minimizes the given function ``func`` with the given initial function argument values ``initials``. This minimizer implementation will only vary the first parameter. All other parameters will be set to their @@ -537,14 +581,14 @@ def minimize(self, initials, bounds, func, func_args=None, **kwargs): The description for the set warn flag. """ - if(func_args is None): + if func_args is None: func_args = tuple() (ns_min, ns_max) = bounds[0] - if(ns_min > initials[0]): - raise ValueError('The initial value for ns (%g) must be equal or ' - 'greater than the minimum bound value for ns (%g)' % ( - initials[0], ns_min)) + if ns_min > initials[0]: + raise ValueError( + f'The initial value for ns ({initials[0]:g}) must be equal or ' + f'greater than the minimum bound value for ns ({ns_min:g})') ns_tol = self.ns_tol @@ -559,7 +603,10 @@ def minimize(self, initials, bounds, func, func_args=None, **kwargs): fprime = 1000 # NR does not guarantee convergence, thus limit iterations. max_steps = self.max_steps - status = {'warnflag': 0, 'warnreason': ''} + status = { + 'warnflag': 0, + 'warnreason': '', + } f = None at_boundary = False @@ -568,35 +615,36 @@ def minimize(self, initials, bounds, func, func_args=None, **kwargs): # minimum is in a deep well. # In case the optimum is found outside the bounds on ns the best fit # will be set to the boundary value and the fit considered converged. - while( ((ns_tol < np.fabs(step)) or (np.fabs(fprime) > 1.e-1)) and (niter < max_steps) ): + while ((ns_tol < np.fabs(step)) or (np.fabs(fprime) > 1.e-1)) and\ + (niter < max_steps): x[0] = ns (f, fprime, fprimeprime) = func(x, *func_args) step = -fprime / fprimeprime # Exit optimization if ns is at boundary but next step would be outside. - if((ns == ns_min and step < 0.0) or (ns == ns_max and step > 0.0)): + if (ns == ns_min and step < 0.0) or (ns == ns_max and step > 0.0): at_boundary = True - if(ns == ns_min): + if ns == ns_min: status['warnflag'] = -2 - status['warnreason'] = ('Function minimum is below the ' - 'minimum bound of the parameter ' - 'value. Convergence forced at boundary.') - elif(ns == ns_max): + status['warnreason'] = ( + 'Function minimum is below the minimum bound of the ' + 'parameter value. Convergence forced at boundary.') + elif ns == ns_max: status['warnflag'] = -1 - status['warnreason'] = ('Function minimum is above the ' - 'maximum bound of the parameter ' - 'value. Convergence forced at boundary.') + status['warnreason'] = ( + 'Function minimum is above the maximum bound of the ' + 'parameter value. Convergence forced at boundary.') break # Always perform step in ns as it improves the solution. ns += step # Do not allow ns outside boundaries. - if(ns < ns_min): + if ns < ns_min: ns = ns_min - elif(ns > ns_max): + elif ns > ns_max: ns = ns_max # Increase counter since a step was taken. @@ -606,19 +654,22 @@ def minimize(self, initials, bounds, func, func_args=None, **kwargs): # Once converged evaluate function at minimum value unless # Convergence was forced at boundary # in which case function value is already known. - if(not at_boundary): + if not at_boundary: (f, fprime, fprimeprime) = func(x, *func_args) - if(niter == max_steps): + if niter == max_steps: status['warnflag'] = 1 - status['warnreason'] = ('NR optimization did not converge within {} ' - 'NR steps.'.format(niter)) + status['warnreason'] = ( + f'NR optimization did not converge within {niter} NR steps.') status['niter'] = niter status['last_nr_step'] = step return (x, f, status) - def get_niter(self, status): + def get_niter( + self, + status, + ): """Returns the number of iterations needed to find the minimum. Parameters @@ -634,7 +685,10 @@ def get_niter(self, status): """ return status['niter'] - def has_converged(self, status): + def has_converged( + self, + status, + ): """Analyzes the status information dictionary if the minimization process has converged. By definition the minimization process has converged if ``status['warnflag']`` is smaller or equal to 0. @@ -650,7 +704,7 @@ def has_converged(self, status): converged : bool The flag if the minimization has converged (True), or not (False). """ - if(status['warnflag'] <= 0): + if status['warnflag'] <= 0: return True return False @@ -663,13 +717,20 @@ def is_repeatable(self, status): return False -class NRNsScan2dMinimizerImpl(NR1dNsMinimizerImpl): +class NRNsScan2dMinimizerImpl( + NR1dNsMinimizerImpl +): """The NRNsScan2dMinimizerImpl class provides a minimizer implementation for the R2->R1 function where the first dimension is minimized using the Newton-Raphson minimization method and the second dimension is scanned. """ - def __init__(self, p2_scan_step, ns_tol=1e-3): + def __init__( + self, + p2_scan_step, + ns_tol=1e-3, + **kwargs, + ): """Creates a new minimizer implementation instance. Parameters @@ -680,10 +741,20 @@ def __init__(self, p2_scan_step, ns_tol=1e-3): ns_tol : float The tolerance / precision for the ns parameter value. """ - super().__init__(ns_tol=ns_tol) + super().__init__( + ns_tol=ns_tol, + **kwargs) + self.p2_scan_step = p2_scan_step - def minimize(self, initials, bounds, func, func_args=None, **kwargs): + def minimize( + self, + initials, + bounds, + func, + func_args=None, + **kwargs, + ): """Minimizes the given function ``func`` with the given initial function argument values ``initials``. This minimizer implementation will only vary the first two parameters. The first parameter is the number of @@ -753,9 +824,10 @@ def minimize(self, initials, bounds, func, func_args=None, **kwargs): p2_scan_values = np.linspace( p2_low, p2_high, int((p2_high-p2_low)/self.p2_scan_step)+1) - logger.debug('Minimize func by scanning 2nd parameter in {:d} steps ' - 'with a step size of {:g}'.format( - len(p2_scan_values), np.mean(np.diff(p2_scan_values)))) + logger.debug( + 'Minimize func by scanning 2nd parameter in ' + f'{len(p2_scan_values):d} steps with a step size of ' + f'{np.mean(np.diff(p2_scan_values)):g}') niter_total = 0 best_xmin = None @@ -766,7 +838,7 @@ def minimize(self, initials, bounds, func, func_args=None, **kwargs): (xmin, fmin, status) = super().minimize( initials, bounds, func, func_args, **kwargs) niter_total += status['niter'] - if((best_fmin is None) or (fmin < best_fmin)): + if (best_fmin is None) or (fmin < best_fmin): best_xmin = xmin best_fmin = fmin best_status = status @@ -777,13 +849,20 @@ def minimize(self, initials, bounds, func, func_args=None, **kwargs): return (best_xmin, best_fmin, best_status) -class Minimizer(object): +class Minimizer( + object +): """The Minimizer class provides the general interface for minimizing a function. The class takes an instance of MinimizerImpl for a specific minimizer implementation. """ - def __init__(self, minimizer_impl, max_repetitions=100): + def __init__( + self, + minimizer_impl, + max_repetitions=100, + **kwargs, + ): """Creates a new Minimizer instance. Parameters @@ -795,6 +874,8 @@ def __init__(self, minimizer_impl, max_repetitions=100): this option specifies the maximum number of repetitions with different initials. """ + super().__init__(**kwargs) + self.minimizer_impl = minimizer_impl self.max_repetitions = max_repetitions @@ -807,9 +888,10 @@ def minimizer_impl(self): @minimizer_impl.setter def minimizer_impl(self, impl): - if(not isinstance(impl, MinimizerImpl)): - raise TypeError('The minimizer_impl property must be an instance ' - 'of MinimizerImpl!') + if not isinstance(impl, MinimizerImpl): + raise TypeError( + 'The minimizer_impl property must be an instance of ' + 'MinimizerImpl!') self._minimizer_impl = impl @property @@ -822,12 +904,19 @@ def max_repetitions(self): @max_repetitions.setter def max_repetitions(self, n): - if(not isinstance(n, int)): - raise TypeError('The maximal repetitions property must be of type ' - 'int!') + if not isinstance(n, int): + raise TypeError( + 'The maximal repetitions property must be of type int!') self._max_repetitions = n - def minimize(self, rss, fitparamset, func, args=None, kwargs=None): + def minimize( + self, + rss, + paramset, + func, + args=None, + kwargs=None, + ): """Minimizes the the given function ``func`` by calling the ``minimize`` method of the minimizer implementation. @@ -841,8 +930,8 @@ def minimize(self, rss, fitparamset, func, args=None, kwargs=None): ---------- rss : RandomStateService instance The RandomStateService instance to draw random numbers from. - fitparamset : instance of FitParameterSet - The set of FitParameter instances defining fit parameters of the + paramset : instance of ParameterSet + The ParameterSet instances holding the floating parameters of the function ``func``. func : callable ``f(x, *args)`` The function to be minimized. It must have the call signature @@ -867,32 +956,32 @@ def minimize(self, rss, fitparamset, func, args=None, kwargs=None): The status dictionary with information about the minimization process. """ - if(not isinstance(fitparamset, FitParameterSet)): - raise TypeError('The fitparamset argument must be an instance of ' - 'FitParameterSet!') + if not isinstance(paramset, ParameterSet): + raise TypeError( + 'The paramset argument must be an instance of ParameterSet!') - if(kwargs is None): + if kwargs is None: kwargs = dict() - bounds = fitparamset.bounds - initials = fitparamset.initials - logger.debug('Do function minimization: initials: {}'.format(initials)) + bounds = paramset.floating_param_bounds + initials = paramset.floating_param_initials + logger.debug(f'Doing function minimization: initials: {initials}.') (xmin, fmin, status) = self._minimizer_impl.minimize( initials, bounds, func, args, **kwargs) reps = 0 - while((not self._minimizer_impl.has_converged(status)) and - self._minimizer_impl.is_repeatable(status) and - reps < self._max_repetitions - ): + while (not self._minimizer_impl.has_converged(status)) and\ + (self._minimizer_impl.is_repeatable(status)) and\ + (reps < self._max_repetitions): # The minimizer did not converge at the first time, but it is # possible to repeat the minimization process with different # initials to obtain a better result. # Create a new set of random parameter initials based on the # parameter bounds. - initials = fitparamset.generate_random_initials(rss) + initials = paramset.generate_random_floating_param_initials( + rss=rss) logger.debug( 'Previous rep ({}) status={}, new initials={}'.format( @@ -907,22 +996,22 @@ def minimize(self, rss, fitparamset, func, args=None, kwargs=None): # Store the number of repetitions in the status dictionary. status['skyllh_minimizer_n_reps'] = reps - if(not self._minimizer_impl.has_converged(status)): + if not self._minimizer_impl.has_converged(status): raise ValueError( - 'The minimizer did not converge after %d ' - 'repetitions! The maximum number of repetitions is %d. ' - 'The status dictionary is "%s".' % ( - reps, self._max_repetitions, str(status))) + f'The minimizer did not converge after {reps:d} repetitions! ' + 'The maximum number of repetitions is ' + f'{self._max_repetitions:d}. The status dictionary is ' + f'"{str(status)}".') # Check if any fit value is outside its bounds due to rounding errors by # the minimizer. If so, set those fit values to their respective bound # value and re-evaluate the function with the corrected fit values. condmin = xmin < bounds[:, 0] condmax = xmin > bounds[:, 1] - if(np.any(condmin) or np.any(condmax)): + if np.any(condmin) or np.any(condmax): xmin = np.where(condmin, bounds[:, 0], xmin) xmin = np.where(condmax, bounds[:, 1], xmin) - if(args is None): + if args is None: args = tuple() (fmin, grads) = func(xmin, *args) diff --git a/skyllh/core/minimizers/iminuit.py b/skyllh/core/minimizers/iminuit.py index 60491f929a..51613d7585 100644 --- a/skyllh/core/minimizers/iminuit.py +++ b/skyllh/core/minimizers/iminuit.py @@ -149,6 +149,7 @@ def minimize(self, initials, bounds, func, func_args=None, **kwargs): Flag if the function ``func`` also returns its gradients. Default is ``True``. + Any additional keyword arguments are passed on to the underlaying :func:`iminuit.minimize` minimization function. diff --git a/skyllh/core/model.py b/skyllh/core/model.py index 756668c3aa..ee786ee44c 100644 --- a/skyllh/core/model.py +++ b/skyllh/core/model.py @@ -1,20 +1,26 @@ # -*- coding: utf-8 -*- -# Author: Martin Wolf +# Author: Dr. Martin Wolf """This module defines the base class for any model class used in SkyLLH. """ from skyllh.core.py import ( NamedObjectCollection, - issequence, - str_cast + issequenceof, + str_cast, + typename, ) -class Model(object): + +class Model( + object): """This class provides a base class for all model classes used in SkyLLH. Models could be for instance source models or background models. """ - def __init__(self, name=None): + def __init__( + self, + name=None, + **kwargs): """Creates a new Model instance. Parameters @@ -23,9 +29,10 @@ def __init__(self, name=None): The name of the model. If set to `None`, the id of the object is taken as name. """ - super(Model, self).__init__() + super().__init__( + **kwargs) - if(name is None): + if name is None: name = self.id self.name = name @@ -35,9 +42,12 @@ def name(self): """The name of the model. """ return self._name + @name.setter def name(self, name): - name = str_cast(name, 'The name property must be castable to type str!') + name = str_cast( + name, + 'The name property must be castable to type str!') self._name = name @property @@ -49,12 +59,16 @@ def id(self): return id(self) -class ModelCollection(NamedObjectCollection): +class ModelCollection( + NamedObjectCollection): """This class describes a collection of Model instances. It can be used to group several models into a single object. """ @staticmethod - def cast(obj, errmsg=None): + def cast( + obj, + errmsg=None, + **kwargs): """Casts the given object to a ModelCollection object. If the cast fails, a TypeError with the given error message is raised. @@ -68,37 +82,47 @@ def cast(obj, errmsg=None): The error message if the cast fails. If set to None, a generic error message will be used. + Additional keyword arguments + ---------------------------- + Additional keyword arguments are passed to the constructor of the + ModelCollection class. + Raises ------ TypeError - If the cast fails. + If the cast failed. Returns ------- - modelcollection : instance of ModelCollection + model_collection : instance of ModelCollection The created ModelCollection instance. If `obj` is already a ModelCollection instance, it will be returned. """ - if(obj is None): - obj = ModelCollection(models=None, model_type=Model) - return obj + if obj is None: + return ModelCollection( + models=None, model_type=Model, **kwargs) - if(isinstance(obj, Model)): - obj = ModelCollection(models=[obj], model_type=Model) - return obj + if isinstance(obj, Model): + return ModelCollection( + models=[obj], model_type=Model, **kwargs) - if(isinstance(obj, ModelCollection)): + if isinstance(obj, ModelCollection): return obj - if(issequence(obj)): - obj = ModelCollection(models=obj, model_type=Model) - return obj + if issequenceof(obj, Model): + return ModelCollection( + models=obj, model_type=Model, **kwargs) - if(errmsg is None): - errmsg = 'Cast of object "%s" to ModelCollection failed!'%(str(obj)) + if errmsg is None: + errmsg = (f'Cast of object "{str(obj)}" of type ' + f'"{typename(type(obj))}" to ModelCollection failed!') raise TypeError(errmsg) - def __init__(self, models=None, model_type=None): + def __init__( + self, + models=None, + model_type=None, + **kwargs): """Creates a new Model collection. The type of the model instances this collection holds can be restricted, by setting the model_type argument. @@ -110,16 +134,16 @@ def __init__(self, models=None, model_type=None): The type of the model. It must be a subclass of class ``Model``. If set to None (default), Model will be used. """ - if(model_type is None): + if model_type is None: model_type = Model + if not issubclass(model_type, Model): + raise TypeError( + 'The model_type argument must be a subclass of Model!') - if(not issubclass(model_type, Model)): - raise TypeError('The model_type argument must be a subclass of ' - 'class Model!') - - super(ModelCollection, self).__init__( + super().__init__( objs=models, - obj_type=model_type) + obj_type=model_type, + **kwargs) @property def model_type(self): @@ -136,9 +160,9 @@ def models(self): class DetectorModel(Model): """This class provides a base class for a detector model. It can be used - in combination with the ModelParameterMapper class. + in combination with the ParameterModelMapper class. """ - def __init__(self, name): + def __init__(self, name, **kwargs): """Creates a new DetectorModel instance. Parameters @@ -146,4 +170,6 @@ def __init__(self, name): name : str The name of the detector model. """ - super().__init__(name=name) + super().__init__( + name=name, + **kwargs) diff --git a/skyllh/core/multiproc.py b/skyllh/core/multiproc.py index a79cf95432..579db79d92 100644 --- a/skyllh/core/multiproc.py +++ b/skyllh/core/multiproc.py @@ -1,23 +1,12 @@ # -*- coding: utf-8 -*- import logging -import numpy as np +from logging.handlers import ( + QueueHandler, +) import multiprocessing as mp - -try: - # For Python 3. - from logging.handlers import QueueHandler -except ImportError: - # For Python 2. - from skyllh.core.debugging import QueueHandler - -try: - # For Python 3. - import queue -except ImportError: - # For Python 2. - import Queue as queue - +import numpy as np +import queue import time from skyllh.core.config import CFG @@ -26,7 +15,9 @@ from skyllh.core.timing import TimeLord -def get_ncpu(local_ncpu): +def get_ncpu( + local_ncpu, +): """Determines the number of CPUs to use for functions that support multi-processing. @@ -44,17 +35,30 @@ def get_ncpu(local_ncpu): returned. """ ncpu = local_ncpu - if(ncpu is None): + if ncpu is None: ncpu = CFG['multiproc']['ncpu'] - if(ncpu is None): + if ncpu is None: ncpu = 1 - if(not isinstance(ncpu, int)): - raise TypeError('The ncpu setting must be of type int!') - if(ncpu < 1): - raise ValueError('The ncpu setting must be >= 1!') + + if not isinstance(ncpu, int): + raise TypeError( + 'The ncpu setting must be of type int!') + + if ncpu < 1: + raise ValueError( + 'The ncpu setting must be >= 1!') + return ncpu -def parallelize(func, args_list, ncpu, rss=None, tl=None, ppbar=None): + +def parallelize( # noqa: C901 + func, + args_list, + ncpu, + rss=None, + tl=None, + ppbar=None, +): """Parallelizes the execution of the given function for different arguments. Parameters @@ -86,8 +90,16 @@ def parallelize(func, args_list, ncpu, rss=None, tl=None, ppbar=None): """ # Define a wrapper function for the multiprocessing module that evaluates # ``func`` for a subset of `args_list` on a worker process. - def worker_wrapper(func, sub_args_list, pid, rqueue, lqueue, - squeue=None, rss=None, tl=None): + def worker_wrapper( + func, + sub_args_list, + pid, + rqueue, + lqueue, + squeue=None, + rss=None, + tl=None, + ): """Wrapper function for the multiprocessing module that evaluates ``func`` for the subset ``sub_args_list`` of ``args_list`` on a worker process. @@ -129,14 +141,14 @@ def worker_wrapper(func, sub_args_list, pid, rqueue, lqueue, queue_handler.queue = lqueue result_list = [] - for (task_idx, (args,kwargs)) in enumerate(sub_args_list): - if(rss is not None): + for (task_idx, (args, kwargs)) in enumerate(sub_args_list): + if rss is not None: kwargs['rss'] = rss - if(tl is not None): + if tl is not None: kwargs['tl'] = tl result_list.append(func(*args, **kwargs)) - if(squeue is not None): + if squeue is not None: squeue.put((pid, task_idx)) rqueue.put((pid, result_list, tl)) @@ -147,7 +159,14 @@ def worker_wrapper(func, sub_args_list, pid, rqueue, lqueue, # Define a wrapper function that evaluates ``func`` for a subset of # `args_list` on the master process. def master_wrapper( - pbar, sarr, func, sub_args_list, squeue=None, rss=None, tl=None): + pbar, + sarr, + func, + sub_args_list, + squeue=None, + rss=None, + tl=None, + ): """This is the wrapper function for the master process. Parameters @@ -184,22 +203,22 @@ def master_wrapper( tasks. """ result_list = [] - for (master_task_idx, (args,kwargs)) in enumerate(sub_args_list): - if(rss is not None): + for (master_task_idx, (args, kwargs)) in enumerate(sub_args_list): + if rss is not None: kwargs['rss'] = rss - if(tl is not None): + if tl is not None: kwargs['tl'] = tl result_list.append(func(*args, **kwargs)) # Skip the rest, if we are not in an interactive session, hence # there is not progress bar. - if(not pbar.gets_shown): + if not pbar.is_shown: continue sarr[0]['n_finished_tasks'] = master_task_idx + 1 # Get possible status information from the worker processes. - if(squeue is not None): + if squeue is not None: while not squeue.empty(): (pid, worker_task_idx) = squeue.get() sarr[pid]['n_finished_tasks'] = worker_task_idx + 1 @@ -214,7 +233,7 @@ def master_wrapper( pbar = ProgressBar(maxval=len(args_list), parent=ppbar).start() # Return result list if only one CPU is used. - if(ncpu == 1): + if ncpu == 1: sarr = np.zeros((1,), dtype=[('n_finished_tasks', np.int64)]) result_list = master_wrapper( pbar, sarr, func, args_list, squeue=None, rss=rss, tl=tl) @@ -227,9 +246,10 @@ def master_wrapper( # We will use our own process (pid = 0) as a worker too. rqueue = mp.Queue() squeue = None - if(pbar.gets_shown): + if pbar.is_shown: squeue = mp.Queue() - sub_args_list_list = np.array_split(args_list, ncpu) + + sub_args_list_list = np.array_split(np.array(args_list, dtype=object), ncpu) # Create a multiprocessing queue for each worker process. # Prepend it with None to be able to use `pid` as the list index. @@ -238,26 +258,30 @@ def master_wrapper( # Create a list of RandomStateService for each process if rss argument is # set. rss_list = [rss] - if(rss is None): + if rss is None: rss_list += [None]*(ncpu-1) else: - if(not isinstance(rss, RandomStateService)): - raise TypeError('The rss argument must be an instance of ' - 'RandomStateService!') - rss_list.extend([RandomStateService(seed=rss.random.randint(0, 2**32)) - for i in range(1, ncpu)]) + if not isinstance(rss, RandomStateService): + raise TypeError( + 'The rss argument must be an instance of RandomStateService!') + rss_list.extend([ + RandomStateService(seed=rss.random.randint(0, 2**32)) + for i in range(1, ncpu) + ]) # Create a list of TimeLord instances, one for each process if tl argument # is set. tl_list = [tl] - if(tl is None): + if tl is None: tl_list += [None]*(ncpu-1) else: - if(not isinstance(tl, TimeLord)): - raise TypeError('The tl argument must be an instance of ' - 'TimeLord!') - tl_list.extend([TimeLord() - for i in range(1, ncpu)]) + if not isinstance(tl, TimeLord): + raise TypeError( + 'The tl argument must be an instance of TimeLord!') + tl_list.extend([ + TimeLord() + for i in range(1, ncpu) + ]) # Replace all existing main process handlers with the `QueueHandler`. # This allows storing all the log record generated by worker processes at @@ -309,24 +333,26 @@ def master_wrapper( except queue.Empty: # If this exception is raised, either the child process isn't # finished yet, or it dies due to an exception. - if(proc.exitcode is None): + if proc.exitcode is None: # Child process hasn't finish yet. # We'll wait a short moment. time.sleep(0.01) - elif(proc.exitcode != 0): + elif proc.exitcode != 0: proc_died = True - if(proc_died): - raise RuntimeError('Child process %d did not return with 0! ' - 'Exit code was %d.'%(proc.pid, proc.exitcode)) + if proc_died: + raise RuntimeError( + f'Child process {proc.pid} did not return with 0! ' + f'Exit code was {proc.exitcode}.') pid_result_list_map[pid] = result_list - if(tl is not None): + if tl is not None: tl.join(proc_tl) - logger.debug('Beginning of worker process (pid=%d) log records.', pid) + logger.debug( + f'Beginning of worker process (pid={pid}) log records.') lqueue_end = False while not lqueue_end: record = lqueue_list[pid].get() - if record == None: + if record is None: lqueue_end = True else: lqueue_logger = logging.getLogger(record.name) @@ -347,13 +373,21 @@ def master_wrapper( return result_list -class IsParallelizable(object): +class IsParallelizable( + object, +): """Classifier class defining the ncpu property. Classes that derive from this class indicate, that they can make use of multi-processing on several CPUs at the same time. """ - def __init__(self, ncpu=None, *args, **kwargs): - super(IsParallelizable, self).__init__(*args, **kwargs) + def __init__( + self, + *args, + ncpu=None, + **kwargs, + ): + super().__init__(*args, **kwargs) + self.ncpu = ncpu @property @@ -363,11 +397,14 @@ def ncpu(self): is set to None, the global NCPU setting will take precedence. """ return get_ncpu(self._ncpu) + @ncpu.setter def ncpu(self, n): - if(n is not None): - if(not isinstance(n, int)): - raise TypeError('The ncpu property must be of type int!') - if(n < 1): - raise ValueError('The ncpu property must be >= 1!') + if n is not None: + if not isinstance(n, int): + raise TypeError( + 'The ncpu property must be of type int!') + if n < 1: + raise ValueError( + 'The ncpu property must be >= 1!') self._ncpu = n diff --git a/skyllh/core/optimize.py b/skyllh/core/optimize.py deleted file mode 100644 index eac91fbcfc..0000000000 --- a/skyllh/core/optimize.py +++ /dev/null @@ -1,1072 +0,0 @@ -# -*- coding: utf-8 -*- - -import abc -import inspect -import numpy as np -import scipy.sparse - -from skyllh.core.py import ( - classname, - float_cast, - issequenceof -) -from skyllh.core.source_hypothesis import SourceHypoGroupManager -from skyllh.core.timing import TaskTimer -from skyllh.physics.source import SourceModel - - -class EventSelectionMethod(object, metaclass=abc.ABCMeta): - """This is the abstract base class for all event selection method classes. - The idea is to pre-select only events that contribute to the likelihood - function, i.e. are more signal than background like. The different methods - are implemented through derived classes of this base class. - """ - - def __init__(self, src_hypo_group_manager): - """Creates a new event selection method instance. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The instance of SourceHypoGroupManager that defines the list of - sources, i.e. the list of SourceModel instances. - """ - super(EventSelectionMethod, self).__init__() - - self.src_hypo_group_manager = src_hypo_group_manager - - # The _src_arr variable holds a numpy record array with the necessary - # source information needed for the event selection method. - self._src_arr = self.source_to_array( - self._src_hypo_group_manager.source_list) - - @property - def src_hypo_group_manager(self): - """The SourceHypoGroupManager instance, which defines the list of - sources. - """ - return self._src_hypo_group_manager - @src_hypo_group_manager.setter - def src_hypo_group_manager(self, manager): - if(not isinstance(manager, SourceHypoGroupManager)): - raise TypeError( - 'The src_hypo_group_manager property must be an instance of ' - 'SourceHypoGroupManager!') - self._src_hypo_group_manager = manager - - def change_source_hypo_group_manager(self, src_hypo_group_manager): - """Changes the SourceHypoGroupManager instance of the event selection - method. This will also recreate the internal source numpy record array. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The new SourceHypoGroupManager instance, that should be used for - this event selection method. - """ - self.src_hypo_group_manager = src_hypo_group_manager - self._src_arr = self.source_to_array(self._src_hypo_group_manager.source_list) - - @abc.abstractmethod - def source_to_array(self, sources): - """This method is supposed to convert a sequence of SourceModel - instances into a structured numpy ndarray with the source information - in a format that is best understood by the actual event selection - method. - - Parameters - ---------- - sources : sequence of SourceModel - The sequence of source models containing the necessary information - of the source. - - Returns - ------- - arr : numpy record ndarray - The generated numpy record ndarray holding the necessary information - for each source. - """ - pass - - @abc.abstractmethod - def select_events(self, events, ret_src_ev_idxs=False, tl=None): - """This method selects the events, which will contribute to the - log-likelihood ratio function. - - Parameters - ---------- - events : instance of DataFieldRecordArray - The instance of DataFieldRecordArray holding the events. - ret_src_ev_idxs : bool - Flag if also the indices of the selected events should get - returned as a (src_idxs, ev_idxs) tuple of 1d ndarrays. - Default is False. - tl : instance of TimeLord | None - The optional instance of TimeLord that should be used to collect - timing information about this method. - - Returns - ------- - selected_events : DataFieldRecordArray - The instance of DataFieldRecordArray holding the selected events, - i.e. a subset of the `events` argument. - (src_idxs, ev_idxs) : 1d ndarrays of ints | None - The indices of sources and selected events, in case - `ret_src_ev_idxs` is set to True. Returns None, in case - `ret_src_ev_idxs` is set to False. - """ - pass - - -class AllEventSelectionMethod(EventSelectionMethod): - """This event selection method selects all events. - """ - def __init__(self, src_hypo_group_manager): - """Creates a new event selection method instance. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The instance of SourceHypoGroupManager that defines the list of - sources, i.e. the list of SourceModel instances. For this particular - event selection method it has no meaning, but it is an interface - parameter. - """ - super(AllEventSelectionMethod, self).__init__( - src_hypo_group_manager) - - def source_to_array(self, sources): - return None - - def select_events(self, events, ret_src_ev_idxs=False, tl=None): - """Selects all of the given events. Hence, the returned event array is - the same as the given array. - - Parameters - ---------- - events : instance of DataFieldRecordArray - The instance of DataFieldRecordArray holding the events, for which - the selection method should get applied. - ret_src_ev_idxs : bool - Flag if also the indices of the selected events should get - returned as a (src_idxs, ev_idxs) tuple of 1d ndarrays. - Default is False. - tl : instance of TimeLord | None - The optional instance of TimeLord that should be used to collect - timing information about this method. - - Returns - ------- - selected_events : DataFieldRecordArray - The instance of DataFieldRecordArray holding the selected events, - i.e. a subset of the `events` argument. - (src_idxs, ev_idxs) : 1d ndarrays of ints | None - The indices of sources and selected events, in case - `ret_src_ev_idxs` is set to True. Returns None, in case - `ret_src_ev_idxs` is set to False. - """ - if(ret_src_ev_idxs): - # Calculate events indices. - with TaskTimer(tl, 'ESM: Calculate indices of selected events.'): - n_sources = self.src_hypo_group_manager.n_sources - src_idxs = np.repeat(np.arange(n_sources), len(events.indices)) - ev_idxs = np.tile(events.indices, n_sources) - - return (events, (src_idxs, ev_idxs)) - - return (events, None) - - -class SpatialEventSelectionMethod(EventSelectionMethod, metaclass=abc.ABCMeta): - """This class defines the base class for all spatial event selection - methods. - """ - - def __init__(self, src_hypo_group_manager): - """Creates a new event selection method instance. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The instance of SourceHypoGroupManager that defines the list of - sources, i.e. the list of SourceModel instances. - """ - super(SpatialEventSelectionMethod, self).__init__( - src_hypo_group_manager) - - def source_to_array(self, sources): - """Converts the given sequence of SourceModel instances into a - structured numpy ndarray holding the necessary source information needed - for this event selection method. - - Parameters - ---------- - sources : sequence of SourceModel - The sequence of source models containing the necessary information - of the source. - - Returns - ------- - arr : numpy record ndarray - The generated numpy record ndarray holding the necessary information - for each source. It contains the following data fields: 'ra', 'dec'. - """ - if(not issequenceof(sources, SourceModel)): - raise TypeError('The sources argument must be a sequence of ' - 'SourceModel instances!') - - arr = np.empty( - (len(sources),), - dtype=[('ra', np.float64), ('dec', np.float64)], - order='F') - - for (i, src) in enumerate(sources): - arr['ra'][i] = src.loc.ra - arr['dec'][i] = src.loc.dec - - return arr - - -class DecBandEventSectionMethod(SpatialEventSelectionMethod): - """This event selection method selects events within a declination band - around a list of point-like source positions. - """ - def __init__(self, src_hypo_group_manager, delta_angle): - """Creates and configures a spatial declination band event selection - method object. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The instance of SourceHypoGroupManager that defines the list of - sources, i.e. the list of SourceModel instances. - delta_angle : float - The half-opening angle around the source in declination for which - events should get selected. - """ - super(DecBandEventSectionMethod, self).__init__( - src_hypo_group_manager) - - self.delta_angle = delta_angle - - @property - def delta_angle(self): - """The half-opening angle around the source in declination and - right-ascention for which events should get selected. - """ - return self._delta_angle - @delta_angle.setter - def delta_angle(self, angle): - angle = float_cast(angle, 'The delta_angle property must be castable ' - 'to type float!') - self._delta_angle = angle - - def select_events( - self, events, ret_src_ev_idxs=False, - ret_mask_idxs=False, tl=None): - """Selects the events within the declination band. - - Parameters - ---------- - events : instance of DataFieldRecordArray - The instance of DataFieldRecordArray that holds the event data. - The following data fields must exist: - - - 'dec' : float - The declination of the event. - ret_src_ev_idxs : bool - Flag if also the indices of the selected events should get - returned as a (src_idxs, ev_idxs) tuple of 1d ndarrays. - Default is False. - ret_mask_idxs : bool - Flag if also the indices of the selected events mask should get - returned as a mask_idxs 1d ndarray. - Default is False. - tl : instance of TimeLord | None - The optional instance of TimeLord that should be used to collect - timing information about this method. - - Returns - ------- - selected_events : instance of DataFieldRecordArray - The instance of DataFieldRecordArray holding only the selected - events. - idxs: where idxs is one of the following: - - (src_idxs, ev_idxs) : 1d ndarrays of ints - The indices of sources and selected events, in case - `ret_src_ev_idxs` is set to True. - - mask_idxs : 1d ndarrays of ints - The indices of selected events mask, in case - `ret_mask_idxs` is set to True. - - None - In case both `ret_src_ev_idxs` and `ret_mask_idxs` are set to - False. - """ - delta_angle = self._delta_angle - src_arr = self._src_arr - - # Calculates the minus and plus declination around each source and - # bound it to -90deg and +90deg, respectively. - src_dec_minus = np.maximum(-np.pi/2, src_arr['dec'] - delta_angle) - src_dec_plus = np.minimum(src_arr['dec'] + delta_angle, np.pi/2) - - # Determine the mask for the events which fall inside the declination - # window. - # mask_dec is a (N_sources,N_events)-shaped ndarray. - with TaskTimer(tl, 'ESM-DecBand: Calculate mask_dec.'): - mask_dec = ((events['dec'] > src_dec_minus[:,np.newaxis]) & - (events['dec'] < src_dec_plus[:,np.newaxis])) - - # Determine the mask for the events that fall inside at least one - # source declination band. - # mask is a (N_events,)-shaped ndarray. - with TaskTimer(tl, 'ESM-DecBand: Calculate mask.'): - mask = np.any(mask_dec, axis=0) - - # Reduce the events according to the mask. - with TaskTimer(tl, 'ESM-DecBand: Create selected_events.'): - # Using an integer indices array for data selection is several - # factors faster than using a boolean array. - mask_idxs = events.indices[mask] - selected_events = events[mask_idxs] - - if(ret_src_ev_idxs and ret_mask_idxs): - raise ValueError( - 'Only one of `ret_src_ev_idxs` and `ret_mask_idxs` can be set ' - 'to True.') - elif(ret_src_ev_idxs): - # Get selected events indices. - idxs = np.argwhere(mask_dec[:, mask]) - src_idxs = idxs[:, 0] - ev_idxs = idxs[:, 1] - return (selected_events, (src_idxs, ev_idxs)) - elif(ret_mask_idxs): - return (selected_events, mask_idxs) - - return (selected_events, None) - - -class RABandEventSectionMethod(SpatialEventSelectionMethod): - """This event selection method selects events within a right-ascension band - around a list of point-like source positions. - """ - def __init__(self, src_hypo_group_manager, delta_angle): - """Creates and configures a right-ascension band event selection - method object. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The instance of SourceHypoGroupManager that defines the list of - sources, i.e. the list of SourceModel instances. - delta_angle : float - The half-opening angle around the source in right-ascension for - which events should get selected. - """ - super(RABandEventSectionMethod, self).__init__( - src_hypo_group_manager) - - self.delta_angle = delta_angle - - @property - def delta_angle(self): - """The half-opening angle around the source in declination and - right-ascention for which events should get selected. - """ - return self._delta_angle - @delta_angle.setter - def delta_angle(self, angle): - angle = float_cast(angle, - 'The delta_angle property must be castable to type float!') - self._delta_angle = angle - - def select_events(self, events, ret_src_ev_idxs=False, tl=None): - """Selects the events within the right-ascention band. - - The solid angle dOmega = dRA * dSinDec = dRA * dDec * cos(dec) is a - function of declination, i.e. for a constant dOmega, the right-ascension - value has to change with declination. - - Parameters - ---------- - events : instance of DataFieldRecordArray - The instance of DataFieldRecordArray that holds the event data. - The following data fields must exist: - - - 'ra' : float - The right-ascention of the event. - - 'dec' : float - The declination of the event. - ret_src_ev_idxs : bool - Flag if also the indices of the selected events should get - returned as a (src_idxs, ev_idxs) tuple of 1d ndarrays. - Default is False. - tl : instance of TimeLord | None - The optional instance of TimeLord that should be used to collect - timing information about this method. - - Returns - ------- - selected_events : instance of DataFieldRecordArray - The instance of DataFieldRecordArray holding only the selected - events. - (src_idxs, ev_idxs) : 1d ndarrays of ints | None - The indices of sources and selected events, in case - `ret_src_ev_idxs` is set to True. Returns None, in case - `ret_src_ev_idxs` is set to False. - """ - delta_angle = self._delta_angle - src_arr = self._src_arr - - # Get the minus and plus declination around the sources. - src_dec_minus = np.maximum(-np.pi/2, src_arr['dec'] - delta_angle) - src_dec_plus = np.minimum(src_arr['dec'] + delta_angle, np.pi/2) - - # Calculate the cosine factor for the largest declination distance from - # the source. We use np.amin here because smaller cosine values are - # larger angles. - # cosfact is a (N_sources,)-shaped ndarray. - cosfact = np.amin(np.cos([src_dec_minus, src_dec_plus]), axis=0) - - # Calculate delta RA, which is a function of declination. - # dRA is a (N_sources,)-shaped ndarray. - dRA_half = np.amin( - [np.repeat(2*np.pi, len(src_arr['ra'])), - np.fabs(delta_angle / cosfact)], axis=0) - - # Calculate the right-ascension distance of the events w.r.t. the - # source. We make sure to use the smaller distance on the circle, thus - # the maximal distance is 180deg, i.e. pi. - # ra_dist is a (N_sources,N_events)-shaped 2D ndarray. - with TaskTimer(tl, 'ESM-RaBand: Calculate ra_dist.'): - ra_dist = np.fabs( - np.mod(events['ra'] - src_arr['ra'][:,np.newaxis] + np.pi, 2*np.pi) - np.pi) - - # Determine the mask for the events which fall inside the - # right-ascention window. - # mask_ra is a (N_sources,N_events)-shaped ndarray. - with TaskTimer(tl, 'ESM-RaBand: Calculate mask_ra.'): - mask_ra = ra_dist < dRA_half[:,np.newaxis] - - # Determine the mask for the events that fall inside at least one - # source sky window. - # mask is a (N_events,)-shaped ndarray. - with TaskTimer(tl, 'ESM-RaBand: Calculate mask.'): - mask = np.any(mask_ra, axis=0) - - # Reduce the events according to the mask. - with TaskTimer(tl, 'ESM-RaBand: Create selected_events.'): - # Using an integer indices array for data selection is several - # factors faster than using a boolean array. - selected_events = events[events.indices[mask]] - - if(ret_src_ev_idxs): - # Get selected events indices. - idxs = np.argwhere(mask_ra[:, mask]) - src_idxs = idxs[:, 0] - ev_idxs = idxs[:, 1] - - return (selected_events, (src_idxs, ev_idxs)) - - return (selected_events, None) - - -class SpatialBoxEventSelectionMethod(SpatialEventSelectionMethod): - """This event selection method selects events within a spatial box in - right-ascention and declination around a list of point-like source - positions. - """ - def __init__(self, src_hypo_group_manager, delta_angle): - """Creates and configures a spatial box event selection method object. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The instance of SourceHypoGroupManager that defines the list of - sources, i.e. the list of SourceModel instances. - delta_angle : float - The half-opening angle around the source for which events should - get selected. - """ - super(SpatialBoxEventSelectionMethod, self).__init__( - src_hypo_group_manager) - - self.delta_angle = delta_angle - - @property - def delta_angle(self): - """The half-opening angle around the source in declination and - right-ascention for which events should get selected. - """ - return self._delta_angle - @delta_angle.setter - def delta_angle(self, angle): - angle = float_cast(angle, - 'The delta_angle property must be castable to type float!') - self._delta_angle = angle - - def select_events(self, events, ret_src_ev_idxs=False, tl=None): - """Selects the events within the spatial box in right-ascention and - declination. - - The solid angle dOmega = dRA * dSinDec = dRA * dDec * cos(dec) is a - function of declination, i.e. for a constant dOmega, the right-ascension - value has to change with declination. - - Parameters - ---------- - events : instance of DataFieldRecordArray - The instance of DataFieldRecordArray that holds the event data. - The following data fields must exist: - - - 'ra' : float - The right-ascention of the event. - - 'dec' : float - The declination of the event. - ret_src_ev_idxs : bool - Flag if also the indices of the selected events should get - returned as a (src_idxs, ev_idxs) tuple of 1d ndarrays. - Default is False. - tl : instance of TimeLord | None - The optional instance of TimeLord that should be used to collect - timing information about this method. - - Returns - ------- - selected_events : instance of DataFieldRecordArray - The instance of DataFieldRecordArray holding only the selected - events. - (src_idxs, ev_idxs) : 1d ndarrays of ints | None - The indices of sources and selected events, in case - `ret_src_ev_idxs` is set to True. Returns None, in case - `ret_src_ev_idxs` is set to False. - """ - delta_angle = self._delta_angle - src_arr = self._src_arr - - # Get the minus and plus declination around the sources. - src_dec_minus = np.maximum(-np.pi/2, src_arr['dec'] - delta_angle) - src_dec_plus = np.minimum(src_arr['dec'] + delta_angle, np.pi/2) - - # Calculate the cosine factor for the largest declination distance from - # the source. We use np.amin here because smaller cosine values are - # larger angles. - # cosfact is a (N_sources,)-shaped ndarray. - cosfact = np.amin(np.cos([src_dec_minus, src_dec_plus]), axis=0) - - # Calculate delta RA, which is a function of declination. - # dRA is a (N_sources,)-shaped ndarray. - dRA_half = np.amin( - [np.repeat(2*np.pi, len(src_arr['ra'])), - np.fabs(delta_angle / cosfact)], axis=0) - - # Determine the mask for the events which fall inside the - # right-ascention window. - # mask_ra is a (N_sources,N_events)-shaped ndarray. - with TaskTimer(tl, 'ESM: Calculate mask_ra.'): - nsrc = len(src_arr['ra']) - # Fill in batch sizes of 200 maximum to save memory. - batch_size=200 - if nsrc > batch_size: - mask_ra = np.zeros((nsrc, len(events['ra'])), dtype=bool) - n_batches = int(np.ceil(nsrc / float(batch_size))) - for bi in range(n_batches): - if not (bi == n_batches-1): - mask_ra[bi*batch_size : (bi+1)*batch_size,...] = (np.fabs( - np.mod(events['ra'] - src_arr['ra'][bi*batch_size : (bi+1)*batch_size][:,np.newaxis] + np.pi, 2*np.pi) - - np.pi) < dRA_half[ bi*batch_size : (bi+1)*batch_size ][:,np.newaxis]) - else: - mask_ra[bi*batch_size : ,...] = (np.fabs( - np.mod(events['ra'] - src_arr['ra'][bi*batch_size:][:,np.newaxis] + np.pi, 2*np.pi) - - np.pi) < dRA_half[bi*batch_size:][:,np.newaxis]) - - else: - mask_ra = np.fabs( - np.mod(events['ra'] - src_arr['ra'][:,np.newaxis] + np.pi, 2*np.pi) - np.pi) < dRA_half[:,np.newaxis] - - # Determine the mask for the events which fall inside the declination - # window. - # mask_dec is a (N_sources,N_events)-shaped ndarray. - with TaskTimer(tl, 'ESM: Calculate mask_dec.'): - mask_dec = ((events['dec'] > src_dec_minus[:,np.newaxis]) & - (events['dec'] < src_dec_plus[:,np.newaxis])) - - # Determine the mask for the events which fall inside the - # right-ascension and declination window. - # mask_sky is a (N_sources,N_events)-shaped ndarray. - with TaskTimer(tl, 'ESM: Calculate mask_sky.'): - mask_sky = mask_ra & mask_dec - del mask_ra - del mask_dec - - # Determine the mask for the events that fall inside at least one - # source sky window. - # mask is a (N_events,)-shaped ndarray. - with TaskTimer(tl, 'ESM: Calculate mask.'): - mask = np.any(mask_sky, axis=0) - - # Reduce the events according to the mask. - with TaskTimer(tl, 'ESM: Create selected_events.'): - # Using an integer indices array for data selection is several - # factors faster than using a boolean array. - selected_events = events[events.indices[mask]] - - if(ret_src_ev_idxs): - # Get selected events indices. - idxs = np.argwhere(mask_sky[:, mask]) - src_idxs = idxs[:, 0] - ev_idxs = idxs[:, 1] - - return (selected_events, (src_idxs, ev_idxs)) - - return (selected_events, None) - - -class PsiFuncEventSelectionMethod(EventSelectionMethod): - """This event selection method selects events whose psi value, i.e. the - great circle distance of the event to the source, is smaller than the value - of the provided function. - """ - def __init__(self, src_hypo_group_manager, psi_name, func, axis_name_list): - """Creates a new PsiFuncEventSelectionMethod instance. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The instance of SourceHypoGroupManager that defines the list of - sources, i.e. the list of SourceModel instances. - psi_name : str - The name of the data field that provides the psi value of the event. - func : callable - The function that should get evaluated for each event. The call - signature must be ``func(*axis_data)``, where ``*axis_data`` is the - event data of each required axis. The number of axes must match the - provided axis names through the ``axis_name_list``. - axis_name_list : list of str - The list of data field names for each axis of the function ``func``. - All field names must be valid field names of the trial data's - DataFieldRecordArray instance. - """ - super(PsiFuncEventSelectionMethod, self).__init__( - src_hypo_group_manager) - - self.psi_name = psi_name - self.func = func - self.axis_name_list = axis_name_list - - if(not (len(inspect.signature(self._func).parameters) >= - len(self._axis_name_list))): - raise TypeError( - 'The func argument must be a callable instance with at least ' - '%d arguments!'%( - len(self._axis_name_list))) - - n_sources = self.src_hypo_group_manager.n_sources - if(n_sources != 1): - raise ValueError( - 'The `PsiFuncEventSelectionMethod.select_events` currently ' - f'supports only one source. It was called with {n_sources} ' - 'sources.' - ) - - @property - def psi_name(self): - """The name of the data field that provides the psi value of the event. - """ - return self._psi_name - @psi_name.setter - def psi_name(self, name): - if(not isinstance(name, str)): - raise TypeError( - 'The psi_name property must be an instance of type str!') - self._psi_name = name - - @property - def func(self): - """The function that should get evaluated for each event. The call - signature must be ``func(*axis_data)``, where ``*axis_data`` is the - event data of each required axis. The number of axes must match the - provided axis names through the ``axis_name_list`` property. - """ - return self._func - @func.setter - def func(self, f): - if(not callable(f)): - raise TypeError( - 'The func property must be a callable instance!') - self._func = f - - @property - def axis_name_list(self): - """The list of data field names for each axis of the function defined - through the ``func`` property. - """ - return self._axis_name_list - @axis_name_list.setter - def axis_name_list(self, names): - if(not issequenceof(names, str)): - raise TypeError( - 'The axis_name_list property must be a sequence of str ' - 'instances!') - self._axis_name_list = list(names) - - def source_to_array(self, sources): - """Converts the given sequence of SourceModel instances into a - structured numpy ndarray holding the necessary source information needed - for this event selection method. - - Parameters - ---------- - sources : sequence of SourceModel - The sequence of source models containing the necessary information - of the source. - - Returns - ------- - arr : None - Because this event selection method does not depend directly on the - source (only indirectly through the psi values), no source array - is required. - """ - return None - - def select_events(self, events, ret_src_ev_idxs=False, tl=None): - """Selects the events whose psi value is smaller than the value of the - predefined function. - - Parameters - ---------- - events : instance of DataFieldRecordArray - The instance of DataFieldRecordArray that holds the event data. - The following data fields must exist: - - - : float - The great circle distance of the event with the source. - - <*axis_name_list> : float - The name of the axis required for the function ``func`` to be - evaluated. - - ret_src_ev_idxs : bool - Flag if also the indices of the selected events should get - returned as a (src_idxs, ev_idxs) tuple of 1d ndarrays. - Default is False. - tl : instance of TimeLord | None - The optional instance of TimeLord that should be used to collect - timing information about this method. - - Returns - ------- - selected_events : instance of DataFieldRecordArray - The instance of DataFieldRecordArray holding only the selected - events. - (src_idxs, ev_idxs) : 1d ndarrays of ints | None - The indices of sources and selected events, in case - `ret_src_ev_idxs` is set to True. Returns None, in case - `ret_src_ev_idxs` is set to False. - """ - cls_name = classname(self) - - with TaskTimer(tl, '%s: Get psi values.'%(cls_name)): - psi = events[self._psi_name] - - with TaskTimer(tl, '%s: Get axis data values.'%(cls_name)): - func_args = [ events[axis] for axis in self._axis_name_list ] - - with TaskTimer(tl, '%s: Creating mask.'%(cls_name)): - mask = psi < self._func(*func_args) - - with TaskTimer(tl, '%s: Create selected_events.'%(cls_name)): - # Using an integer indices array for data selection is several - # factors faster than using a boolean array. - selected_events = events[events.indices[mask]] - - if(ret_src_ev_idxs): - # Get selected events indices. - idxs = np.argwhere(np.atleast_2d(mask)) - src_idxs = idxs[:, 0] - ev_idxs = idxs[:, 1] - return (selected_events, (src_idxs, ev_idxs)) - - return (selected_events, None) - - -class SpatialBoxAndPsiFuncEventSelectionMethod(SpatialBoxEventSelectionMethod): - """This event selection method selects events within a spatial box in - right-ascention and declination around a list of point-like source - positions and performs an additional selection of events whose ang_err value - is larger than the value of the provided function at a given psi value. - """ - def __init__(self, src_hypo_group_manager, delta_angle, psi_name, func, - axis_name_list, psi_floor=None): - """Creates and configures a spatial box and psi func event selection - method object. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The instance of SourceHypoGroupManager that defines the list of - sources, i.e. the list of SourceModel instances. - delta_angle : float - The half-opening angle around the source for which events should - get selected. - psi_name : str - The name of the data field that provides the psi value of the event. - func : callable - The function that should get evaluated for each event. The call - signature must be ``func(*axis_data)``, where ``*axis_data`` is the - event data of each required axis. The number of axes must match the - provided axis names through the ``axis_name_list``. - axis_name_list : list of str - The list of data field names for each axis of the function ``func``. - All field names must be valid field names of the trial data's - DataFieldRecordArray instance. - psi_floor : float | None - The psi func event selection is excluded for events having psi value - below the `psi_floor`. If None, set it to default 5 degrees. - """ - super(SpatialBoxAndPsiFuncEventSelectionMethod, self).__init__( - src_hypo_group_manager, delta_angle) - - self.psi_name = psi_name - self.func = func - self.axis_name_list = axis_name_list - - if(psi_floor is None): - psi_floor = np.deg2rad(5) - self.psi_floor = psi_floor - - if(not (len(inspect.signature(self._func).parameters) >= - len(self._axis_name_list))): - raise TypeError( - 'The func argument must be a callable instance with at least ' - '%d arguments!'%( - len(self._axis_name_list))) - - @property - def psi_name(self): - """The name of the data field that provides the psi value of the event. - """ - return self._psi_name - @psi_name.setter - def psi_name(self, name): - if(not isinstance(name, str)): - raise TypeError( - 'The psi_name property must be an instance of type str!') - self._psi_name = name - - @property - def func(self): - """The function that should get evaluated for each event. The call - signature must be ``func(*axis_data)``, where ``*axis_data`` is the - event data of each required axis. The number of axes must match the - provided axis names through the ``axis_name_list`` property. - """ - return self._func - @func.setter - def func(self, f): - if(not callable(f)): - raise TypeError( - 'The func property must be a callable instance!') - self._func = f - - @property - def axis_name_list(self): - """The list of data field names for each axis of the function defined - through the ``func`` property. - """ - return self._axis_name_list - @axis_name_list.setter - def axis_name_list(self, names): - if(not issequenceof(names, str)): - raise TypeError( - 'The axis_name_list property must be a sequence of str ' - 'instances!') - self._axis_name_list = list(names) - - @property - def psi_floor(self): - """The psi func event selection is excluded for events having psi value - below the `psi_floor`. - """ - return self._psi_floor - @psi_floor.setter - def psi_floor(self, psi): - psi = float_cast(psi, 'The psi_floor property must be castable ' - 'to type float!') - self._psi_floor = psi - - def _get_psi(self, events, idxs): - """Function to calculate the the opening angle between the source - position and the event's reconstructed position. - """ - ra = events['ra'] - dec = events['dec'] - - src_idxs, ev_idxs = idxs - src_ra = self._src_arr['ra'][src_idxs] - src_dec = self._src_arr['dec'][src_idxs] - - delta_dec = np.abs(np.take(dec, ev_idxs) - src_dec) - delta_ra = np.abs(np.take(ra, ev_idxs) - src_ra) - x = (np.sin(delta_dec / 2.))**2. + np.cos(np.take(dec, ev_idxs)) *\ - np.cos(src_dec) * (np.sin(delta_ra / 2.))**2. - - # Handle possible floating precision errors. - x[x < 0.] = 0. - x[x > 1.] = 1. - - psi = (2.0*np.arcsin(np.sqrt(x))) - # Floor psi values below the first bin location in spatial KDE PDF. - # Flooring at the boundary (1e-6) requires a regeneration of the - # spatial KDE splines. - floor = 10**(-5.95442953) - psi = np.where(psi < floor, floor, psi) - - return psi - - def select_events(self, events, ret_src_ev_idxs=False, tl=None): - """Selects the events within the spatial box in right-ascention and - declination and performs an additional selection of events whose ang_err - value is larger than the value of the provided function at a given psi - value. - - The solid angle dOmega = dRA * dSinDec = dRA * dDec * cos(dec) is a - function of declination, i.e. for a constant dOmega, the right-ascension - value has to change with declination. - - Parameters - ---------- - events : instance of DataFieldRecordArray - The instance of DataFieldRecordArray that holds the event data. - The following data fields must exist: - - - 'ra' : float - The right-ascention of the event. - - 'dec' : float - The declination of the event. - ret_src_ev_idxs : bool - Flag if also the indices of the selected events should get - returned as a (src_idxs, ev_idxs) tuple of 1d ndarrays. - Default is False. - tl : instance of TimeLord | None - The optional instance of TimeLord that should be used to collect - timing information about this method. - - Returns - ------- - selected_events : instance of DataFieldRecordArray - The instance of DataFieldRecordArray holding only the selected - events. - (src_idxs, ev_idxs) : 1d ndarrays of ints | None - The indices of sources and selected events, in case - `ret_src_ev_idxs` is set to True. Returns None, in case - `ret_src_ev_idxs` is set to False. - """ - delta_angle = self._delta_angle - src_arr = self._src_arr - - # Get the minus and plus declination around the sources. - src_dec_minus = np.maximum(-np.pi/2, src_arr['dec'] - delta_angle) - src_dec_plus = np.minimum(src_arr['dec'] + delta_angle, np.pi/2) - - # Calculate the cosine factor for the largest declination distance from - # the source. We use np.amin here because smaller cosine values are - # larger angles. - # cosfact is a (N_sources,)-shaped ndarray. - cosfact = np.amin(np.cos([src_dec_minus, src_dec_plus]), axis=0) - - # Calculate delta RA, which is a function of declination. - # dRA is a (N_sources,)-shaped ndarray. - dRA_half = np.amin( - [np.repeat(2*np.pi, len(src_arr['ra'])), - np.fabs(delta_angle / cosfact)], axis=0) - - # Determine the mask for the events which fall inside the - # right-ascention window. - # mask_ra is a (N_sources,N_events)-shaped ndarray. - with TaskTimer(tl, 'ESM: Calculate mask_ra.'): - nsrc = len(src_arr['ra']) - - # Fill in batch sizes of 200 maximum to save memory. - batch_size=200 - if nsrc > batch_size: - mask_ra = np.zeros((nsrc, len(events['ra'])), dtype=bool) - n_batches = int(np.ceil(nsrc / float(batch_size))) - for bi in range(n_batches): - if not (bi == n_batches-1): - mask_ra[bi*batch_size : (bi+1)*batch_size,...] = (np.fabs( - np.mod(events['ra'] - src_arr['ra'][bi*batch_size : (bi+1)*batch_size][:,np.newaxis] + np.pi, 2*np.pi) - - np.pi) < dRA_half[ bi*batch_size : (bi+1)*batch_size ][:,np.newaxis]) - else: - mask_ra[bi*batch_size : ,...] = (np.fabs( - np.mod(events['ra'] - src_arr['ra'][bi*batch_size:][:,np.newaxis] + np.pi, 2*np.pi) - - np.pi) < dRA_half[bi*batch_size:][:,np.newaxis]) - - else: - mask_ra = np.fabs( - np.mod(events['ra'] - src_arr['ra'][:,np.newaxis] + np.pi, 2*np.pi) - np.pi) < dRA_half[:,np.newaxis] - - # Determine the mask for the events which fall inside the declination - # window. - # mask_dec is a (N_sources,N_events)-shaped ndarray. - with TaskTimer(tl, 'ESM: Calculate mask_dec.'): - mask_dec = ((events['dec'] > src_dec_minus[:,np.newaxis]) & - (events['dec'] < src_dec_plus[:,np.newaxis])) - - # Determine the mask for the events which fall inside the - # right-ascension and declination window. - # mask_sky is a (N_sources,N_events)-shaped ndarray. - with TaskTimer(tl, 'ESM: Calculate mask_sky.'): - mask_sky = mask_ra & mask_dec - del mask_ra - del mask_dec - - # Determine the mask for the events that fall inside at least one - # source sky window. - # mask is a (N_events,)-shaped ndarray. - with TaskTimer(tl, 'ESM: Calculate mask.'): - mask = np.any(mask_sky, axis=0) - - # Reduce the events according to the mask. - with TaskTimer(tl, 'ESM: Create selected_events.'): - # Get selected events indices. - idxs = np.argwhere(mask_sky[:, mask]) - src_idxs = idxs[:, 0] - ev_idxs = idxs[:, 1] - - # Using an integer indices array for data selection is several - # factors faster than using a boolean array. - selected_events = events[events.indices[mask]] - - # Perform an additional selection based on psi values. - with TaskTimer(tl, 'ESM: Get psi values.'): - psi = self._get_psi(selected_events, (src_idxs, ev_idxs)) - - with TaskTimer(tl, 'ESM: Create mask_psi.'): - mask_psi = ( - (self._func(psi) <= selected_events['ang_err'][ev_idxs]) - | (psi < self.psi_floor) - ) - - with TaskTimer(tl, 'ESM: Create final_selected_events.'): - # Have to define the shape argument in order to not truncate - # the mask in case last events are not selected. - final_mask_sky = scipy.sparse.csr_matrix( - (mask_psi, (src_idxs, ev_idxs)), - shape=(len(src_arr['ra']), len(selected_events)) - ).toarray() - final_mask = np.any(final_mask_sky, axis=0) - - # Using an integer indices array for data selection is several - # factors faster than using a boolean array. - final_selected_events = selected_events[selected_events.indices[final_mask]] - - if(ret_src_ev_idxs): - # Get final selected events indices. - idxs = np.argwhere(final_mask_sky[:, final_mask]) - src_idxs = idxs[:, 0] - ev_idxs = idxs[:, 1] - - return (final_selected_events, (src_idxs, ev_idxs)) - - return (final_selected_events, None) diff --git a/skyllh/core/parameters.py b/skyllh/core/parameters.py index 1ee2e077f1..dbe5cdc1bb 100644 --- a/skyllh/core/parameters.py +++ b/skyllh/core/parameters.py @@ -1,31 +1,36 @@ # -*- coding: utf-8 -*- -import abc import itertools import numpy as np from copy import deepcopy -from skyllh.physics.source import ( - SourceCollection, - SourceModel +from skyllh.core import ( + display, +) +from skyllh.core.model import ( + Model, + ModelCollection, ) -from skyllh.core import display -from skyllh.core.model import ModelCollection from skyllh.core.py import ( NamedObjectCollection, bool_cast, classname, - const, float_cast, get_number_of_float_decimals, + int_cast, issequence, issequenceof, - str_cast ) -from skyllh.core.random import RandomStateService +from skyllh.core.source_model import ( + SourceModel, +) -def make_linear_parameter_grid_1d(name, low, high, delta): +def make_linear_parameter_grid_1d( + name, + low, + high, + delta): """Utility function to create a ParameterGrid object for a 1-dimensional linear parameter grid. @@ -49,33 +54,6 @@ def make_linear_parameter_grid_1d(name, low, high, delta): grid = np.arange(low, high+delta, delta) return ParameterGrid(name, grid, delta) -def make_params_hash(params): - """Utility function to create a hash value for a given parameter dictionary. - - Parameters - ---------- - params : dict | None - The dictionary holding the parameter (name: value) pairs. - If set to None, an empty dictionary is used. - - Returns - ------- - hash : int - The hash of the parameter dictionary. - """ - if(params is None): - params = {} - - if(not isinstance(params, dict)): - raise TypeError('The params argument must be of type dict!') - - # A note on the ordering of Python dictionary items: The items are ordered - # internally according to the hash value of their keys. Hence, if we don't - # insert more dictionary items, the order of the items won't change. Thus, - # we can just take the items list and make a tuple to create a hash of it. - # The hash will be the same for two dictionaries having the same items. - return hash(tuple(params.items())) - class Parameter(object): """This class describes a parameter of a mathematical function, like a PDF, @@ -106,8 +84,8 @@ def __init__(self, name, initial, valmin=None, valmax=None, isfixed=None): were specified. Otherwise, the parameter is fixed. The default is None. """ - if(isfixed is None): - if((valmin is not None) and (valmax is not None)): + if isfixed is None: + if (valmin is not None) and (valmax is not None): isfixed = False else: isfixed = True @@ -124,10 +102,13 @@ def name(self): """The name of the parameter. """ return self._name + @name.setter def name(self, name): - if(not isinstance(name, str)): - raise TypeError('The name property must be of type str!') + if not isinstance(name, str): + raise TypeError( + 'The "name" property must be of type str! ' + f'Its current type is {classname(name)}.') self._name = name @property @@ -135,10 +116,12 @@ def initial(self): """The initial value of the parameter. """ return self._initial + @initial.setter def initial(self, v): - v = float_cast(v, 'The initial property must be castable to type ' - 'float!') + v = float_cast( + v, + 'The "initial" property must be castable to type float!') self._initial = v @property @@ -146,9 +129,12 @@ def isfixed(self): """The flag if the parameter is mutable (False) or not (True). """ return self._isfixed + @isfixed.setter def isfixed(self, b): - b = bool_cast(b, 'The isfixed property must be castable to type bool!') + b = bool_cast( + b, + 'The "isfixed" property must be castable to type bool!') self._isfixed = b @property @@ -156,9 +142,12 @@ def valmin(self): """The minimum bound value of the parameter. """ return self._valmin + @valmin.setter def valmin(self, v): - v = float_cast(v, 'The valmin property must be castable to type float!', + v = float_cast( + v, + 'The "valmin" property must be castable to type float!', allow_None=True) self._valmin = v @@ -167,9 +156,12 @@ def valmax(self): """The maximum bound value of the parameter. """ return self._valmax + @valmax.setter def valmax(self, v): - v = float_cast(v, 'The valmax property must be castable to type float!', + v = float_cast( + v, + 'The "valmax" property must be castable to type float!', allow_None=True) self._valmax = v @@ -178,19 +170,23 @@ def value(self): """The current value of the parameter. """ return self._value + @value.setter def value(self, v): - v = float_cast(v, 'The value property must be castable to type float!') - if(self._isfixed): - if(v != self._initial): - raise ValueError('The value (%f) of the fixed parameter "%s" ' - 'must to equal to the parameter\'s initial value (%f)!'%( - v, self.name, self._initial)) + v = float_cast( + v, + 'The "value" property must be castable to type float!') + if self._isfixed: + if v != self._initial: + raise ValueError( + f'The value ({v}) of the fixed parameter "{self._name}" ' + 'must be equal to the parameter\'s initial value ' + f'({self._initial})!') else: - if((v < self._valmin) or (v > self._valmax)): - raise ValueError('The value (%f) of parameter "%s" must be ' - 'within the range (%f, %f)!'%( - v, self._name, self._valmin, self._valmax)) + if (v < self._valmin) or (v > self._valmax): + raise ValueError( + f'The value ({v}) of parameter "{self._name}" must be ' + f'within the range [{self._valmin:g}, {self._valmax:g}]!') self._value = v def __eq__(self, other): @@ -210,17 +206,17 @@ def __eq__(self, other): True, if this Parameter instance and the other Parameter instance have the same property values. """ - if((self.name != other.name) or - (self.value != other.value) or - (self.isfixed != other.isfixed)): + if (self.name != other.name) or\ + (self.value != other.value) or\ + (self.isfixed != other.isfixed): return False # If both parameters are floating parameters, also their initial, min, # and max values must match. - if(not self.isfixed): - if((self.initial != other.initial) or - (self.valmin != other.valmin) or - (self.valmax != other.valmax)): + if not self.isfixed: + if (self.initial != other.initial) or\ + (self.valmin != other.valmin) or\ + (self.valmax != other.valmax): return False return True @@ -231,14 +227,14 @@ def __str__(self): """ indstr = ' ' * display.INDENTATION_WIDTH - s = 'Parameter: %s = %.3f '%(self._name, self._value) + s = f'Parameter: {self._name} = {self._value:g} ' - if(self.isfixed): + if self.isfixed: s += '[fixed]' else: s += '[floating] {\n' - s += indstr + 'initial: %.3f\n'%(self._initial) - s += indstr + 'range: (%.3f, %.3f)\n'%(self._valmin, self._valmax) + s += indstr + f'initial: {self._initial:g}\n' + s += indstr + f'range: ({self._valmin:g}, {self._valmax:g})\n' s += '}' return s @@ -263,14 +259,21 @@ def as_linear_grid(self, delta): ValueError If this Parameter instance represents a fixed parameter. """ - if(self.isfixed): - raise ValueError('Cannot create a linear grid from the fixed ' - 'parameter "%s". The parameter must be floating!'%(self.name)) + if self.isfixed: + raise ValueError( + 'Cannot create a linear grid from the fixed ' + f'parameter "{self._name}". The parameter must be floating!') + + delta = float_cast( + delta, + 'The delta argument must be castable to type float!') - delta = float_cast(delta, 'The delta argument must be castable to type ' - 'float!') grid = make_linear_parameter_grid_1d( - self._name, self._valmin, self._valmax, delta) + name=self._name, + low=self._valmin, + high=self._valmax, + delta=delta) + return grid def change_fixed_value(self, value): @@ -291,9 +294,9 @@ def change_fixed_value(self, value): ValueError If this parameter is not a fixed parameter. """ - if(not self._isfixed): - raise ValueError('The parameter "%s" is not a fixed parameter!'%( - self.name)) + if not self._isfixed: + raise ValueError( + f'The parameter "{self._name}" is not a fixed parameter!') self.initial = value self.value = value @@ -315,7 +318,7 @@ def make_fixed(self, initial=None): self._isfixed = True # If no new initial value is given, use the current value. - if(initial is None): + if initial is None: self._initial = self._value return self._value @@ -324,8 +327,8 @@ def make_fixed(self, initial=None): # Undefine the valmin and valmax values if the parameter's new value is # outside the valmin and valmax range. - if((self._valmin is not None) and (self._valmax is not None) and - ((self._value < self._valmin) or (self._value > self._valmax))): + if (self._valmin is not None) and (self._valmax is not None) and\ + ((self._value < self._valmin) or (self._value > self._valmax)): self._valmin = None self._valmax = None @@ -360,19 +363,21 @@ def make_floating(self, initial=None, valmin=None, valmax=None): If valmin is set to None and this parameter has no valmin defined. If valmax is set to None and this parameter has no valmax defined. """ - if(initial is None): + if initial is None: initial = self._value - if(valmin is None): - if(self._valmin is None): - raise ValueError('The current minimal value of parameter "%s" ' + if valmin is None: + if self._valmin is None: + raise ValueError( + f'The current minimal value of parameter "{self._name}" ' 'is not set. So it must be defined through the valmin ' - 'argument!'%(self._name)) + 'argument!') valmin = self._valmin - if(valmax is None): - if(self._valmax is None): - raise ValueError('The current maximal value of parameter "%s" ' + if valmax is None: + if self._valmax is None: + raise ValueError( + f'The current maximal value of parameter "{self._name}" ' 'is not set. So it must be defined through the valmax ' - 'argument!'%(self._name)) + 'argument!') valmax = self._valmax self._isfixed = False @@ -384,7 +389,8 @@ def make_floating(self, initial=None, valmin=None, valmax=None): return self._value -class ParameterSet(object): +class ParameterSet( + object): """This class holds a set of Parameter instances. """ @staticmethod @@ -403,17 +409,19 @@ def union(*paramsets): The newly created ParameterSet instance that holds the union of the parameters provided by all the ParameterSet instances. """ - if(not issequenceof(paramsets, ParameterSet)): - raise TypeError('The arguments of the union static function must ' - 'be instances of ParameterSet!') - if(not len(paramsets) >= 1): - raise ValueError('At least 1 ParameterSet instance must be ' - 'provided to the union static function!') + if not issequenceof(paramsets, ParameterSet): + raise TypeError( + 'The arguments of the union static function must be instances ' + 'of ParameterSet!') + if len(paramsets) == 0: + raise ValueError( + 'At least 1 ParameterSet instance must be provided to the ' + 'union static function!') paramset = ParameterSet(params=paramsets[0]) for paramset_i in paramsets[1:]: for param in paramset_i._params: - if(not paramset.has_param(param)): + if not paramset.has_param(param): paramset.add_param(param) return paramset @@ -450,12 +458,13 @@ def __init__(self, params=None): self._fixed_param_values = np.empty((0,), dtype=np.float64) # Add the initial Parameter instances. - if(params is not None): - if(isinstance(params, Parameter)): + if params is not None: + if isinstance(params, Parameter): params = [params] - if(not issequenceof(params, Parameter)): - raise TypeError('The params argument must be None, an instance ' - 'of Parameter, or a sequence of Parameter instances!') + if not issequenceof(params, Parameter): + raise TypeError( + 'The params argument must be None, an instance of ' + 'Parameter, or a sequence of Parameter instances!') for param in params: self.add_param(param) @@ -465,6 +474,12 @@ def params(self): """ return self._params + @property + def params_name_list(self): + """(read-only) The list of str holding the names of all the parameters. + """ + return self._fixed_param_name_list + self._floating_param_name_list + @property def fixed_params(self): """(read-only) The 1D ndarray holding the Parameter instances, whose @@ -472,6 +487,12 @@ def fixed_params(self): """ return self._params[self._params_fixed_mask] + @property + def fixed_params_name_list(self): + """(read-only) The list of the fixed parameter names. + """ + return self._fixed_param_name_list + @property def fixed_params_mask(self): """(read-only) The 1D ndarray holding the mask for the fixed parameters @@ -479,6 +500,13 @@ def fixed_params_mask(self): """ return self._params_fixed_mask + @property + def fixed_params_idxs(self): + """The numpy ndarray holding the indices of the fixed parameters. + """ + idxs = np.argwhere(self._params_fixed_mask).flatten() + return idxs + @property def floating_params(self): """(read-only) The 1D ndarray holding the Parameter instances, @@ -486,6 +514,12 @@ def floating_params(self): """ return self._params[np.invert(self._params_fixed_mask)] + @property + def floating_params_name_list(self): + """(read-only) The list of the floating parameter names. + """ + return self._floating_param_name_list + @property def floating_params_mask(self): """(read-only) The 1D ndarray holding the mask for the floating @@ -493,6 +527,13 @@ def floating_params_mask(self): """ return np.invert(self._params_fixed_mask) + @property + def floating_params_idxs(self): + """The numpy ndarray holding the indices of the floating parameters. + """ + idxs = np.argwhere(self.floating_params_mask).flatten() + return idxs + @property def n_params(self): """(read-only) The number of parameters this ParameterSet has. @@ -513,18 +554,6 @@ def n_floating_params(self): """ return len(self._floating_param_name_list) - @property - def fixed_param_name_list(self): - """(read-only) The list of the fixed parameter names. - """ - return self._fixed_param_name_list - - @property - def floating_param_name_list(self): - """(read-only) The list of the floating parameter names. - """ - return self._floating_param_name_list - @property def fixed_param_values(self): """(read-only) The (n_fixed_params,)-shaped ndarray holding values of @@ -538,11 +567,15 @@ def floating_param_initials(self): initial values of all the global floating parameters. """ floating_params = self.floating_params - if(len(floating_params) == 0): + + if len(floating_params) == 0: return np.empty((0,), dtype=np.float64) - return np.array( - [ param.initial - for param in floating_params ], dtype=np.float64) + + initials = np.array( + [param.initial for param in floating_params], + dtype=np.float64) + + return initials @property def floating_param_bounds(self): @@ -550,11 +583,32 @@ def floating_param_bounds(self): boundaries for all the floating parameters. """ floating_params = self.floating_params - if(len(floating_params) == 0): - return np.empty((0,2), dtype=np.float64) - return np.array( - [ (param.valmin, param.valmax) - for param in floating_params ], dtype=np.float64) + + if len(floating_params) == 0: + return np.empty((0, 2), dtype=np.float64) + + bounds = np.array( + [(param.valmin, param.valmax) for param in floating_params], + dtype=np.float64) + + return bounds + + def __contains__(self, param_name): + """Implements the ``param_name in self`` expression. It calls the + :meth:`has_param` method of this class. + + Parameters + ---------- + param_name : str + The name of the parameter. + + Returns + ------- + check : bool + Returns ``True`` if the given parameter is part of this ParameterSet + instance, ``False`` otherwise. + """ + return self.has_param(param_name) def __iter__(self): """Returns an iterator over the Parameter instances of this ParameterSet @@ -571,9 +625,9 @@ def __str__(self): """Creates and returns a pretty string representation of this ParameterSet instance. """ - s = '%s: %d parameters (%d floating, %d fixed) {'%( - classname(self), self.n_params, self.n_floating_params, - self.n_fixed_params) + s = (f'{classname(self)}: {self.n_params} parameters ' + f'({self.n_floating_params} floating, ' + f'{self.n_fixed_params} fixed) ''{') for param in self._params: s += '\n' s += display.add_leading_text_line_padding( @@ -622,6 +676,33 @@ def get_floating_pidx(self, param_name): """ return self._floating_param_name_to_idx[param_name] + def generate_random_floating_param_initials(self, rss): + """Generates a set of random initials for all floating parameters. + A new random initial is defined as + + lower_bound + RAND * (upper_bound - lower_bound), + + where RAND is a uniform random variable between 0 and 1. + + Parameters + ---------- + rss : RandomStateService instance + The RandomStateService instance that should be used for drawing + random numbers from. + + Returns + ------- + ri : (N_floating_params,)-shaped numpy ndarray + The numpy 1D ndarray holding the generated random initial values. + """ + vb = self.floating_param_bounds + + # Do random_initial = lower_bound + RAND * (upper_bound - lower_bound). + ri = (vb[:, 0] + + rss.random.uniform(size=vb.shape[0])*(vb[:, 1] - vb[:, 0])) + + return ri + def has_fixed_param(self, param_name): """Checks if this ParameterSet instance has a fixed parameter named ``param_name``. @@ -678,28 +759,29 @@ def make_params_fixed(self, fix_params): self._fixed_param_values = np.empty((0,), dtype=np.float64) for (pidx, param) in enumerate(self._params): pname = param.name - if(pname in fix_params_keys): + if pname in fix_params_keys: # The parameter of name `pname` should get fixed. - if(param.isfixed is True): - raise ValueError('The parameter "%s" is already a fixed ' - 'parameter!'%(pname)) + if param.isfixed is True: + raise ValueError( + f'The parameter "{pname}" is already a fixed ' + 'parameter!') initial = fix_params[pname] param.make_fixed(initial) self._params_fixed_mask[pidx] = True - self._fixed_param_name_list += [ pname ] + self._fixed_param_name_list += [pname] self._fixed_param_values = np.concatenate( (self._fixed_param_values, [param.value])) self._fixed_param_name_to_idx[pname] = len( self._fixed_param_name_list) - 1 else: - if(param.isfixed): - self._fixed_param_name_list += [ pname ] + if param.isfixed: + self._fixed_param_name_list += [pname] self._fixed_param_values = np.concatenate( (self._fixed_param_values, [param.value])) self._fixed_param_name_to_idx[pname] = len( self._fixed_param_name_list) - 1 else: - self._floating_param_name_list += [ pname ] + self._floating_param_name_list += [pname] self._floating_param_name_to_idx[pname] = len( self._floating_param_name_list) - 1 @@ -714,14 +796,14 @@ def make_params_floating(self, float_params): floating. The format of a dictionary's entry can be one of the following formats: - - None + ``None`` The parameter's initial, minimal and maximal value should be taken from the parameter's current settings. - - initial : float + initial : float The parameter's initial value should be set to the given value. The minimal and maximal values of the parameter will be taken from the parameter's current settings. - - (initial, valmin, valmax) + (initial, valmin, valmax) The parameter's initial value, minimal and maximal value should be set to the given values. If `initial` is set to `None`, the parameter's current value will be used as @@ -736,9 +818,9 @@ def _parse_float_param_dict_entry(e): """Parses the given float_param dictionary entry into initial, valmin, and valmax values. """ - if(e is None): + if e is None: return (None, None, None) - if(issequence(e)): + if issequence(e): return (e[0], e[1], e[2]) return (e, None, None) @@ -750,27 +832,28 @@ def _parse_float_param_dict_entry(e): self._fixed_param_values = np.empty((0,), dtype=np.float64) for (pidx, param) in enumerate(self._params): pname = param.name - if(pname in float_params_keys): + if pname in float_params_keys: # The parameter of name `pname` should get set floating. - if(param.isfixed is False): - raise ValueError('The parameter "%s" is already a floating ' - 'parameter!'%(pname)) + if param.isfixed is False: + raise ValueError( + f'The parameter "{pname}" is already a floating ' + 'parameter!') (initial, valmin, valmax) = _parse_float_param_dict_entry( float_params[pname]) param.make_floating(initial, valmin, valmax) self._params_fixed_mask[pidx] = False - self._floating_param_name_list += [ pname ] + self._floating_param_name_list += [pname] self._floating_param_name_to_idx[pname] = len( self._floating_param_name_list) - 1 else: - if(param.isfixed): - self._fixed_param_name_list += [ pname ] + if param.isfixed: + self._fixed_param_name_list += [pname] self._fixed_param_values = np.concatenate( (self._fixed_param_values, [param.value])) self._fixed_param_name_to_idx[pname] = len( self._fixed_param_name_list) - 1 else: - self._floating_param_name_list += [ pname ] + self._floating_param_name_list += [pname] self._floating_param_name_to_idx[pname] = len( self._floating_param_name_list) - 1 @@ -818,37 +901,41 @@ def add_param(self, param, atfront=False): If given parameter is already present in the set. The check is performed based on the parameter name. """ - if(not isinstance(param, Parameter)): - raise TypeError('The param argument must be an instance of ' - 'Parameter!') + if not isinstance(param, Parameter): + raise TypeError( + 'The param argument must be an instance of Parameter! ' + f'Its current type is {classname(param)}.') - if(self.has_param(param)): - raise KeyError('The parameter named "%s" was already added to the ' - 'parameter set!'%(param.name)) + if self.has_param(param): + raise KeyError( + f'The parameter named "{param.name}" was already added to the ' + 'parameter set!') param_fixed_mask = True if param.isfixed else False - if(atfront): + if atfront: # Add parameter at front of parameter list. self._params = np.concatenate( ([param], self._params)) self._params_fixed_mask = np.concatenate( ([param_fixed_mask], self._params_fixed_mask)) - if(param.isfixed): + if param.isfixed: self._fixed_param_name_list = ( [param.name] + self._fixed_param_name_list) self._fixed_param_values = np.concatenate( ([param.value], self._fixed_param_values)) # Shift the index of all fixed parameters. - self._fixed_param_name_to_idx = dict([ (k,v+1) - for (k,v) in self._fixed_param_name_to_idx.items() ]) + self._fixed_param_name_to_idx = dict( + [(k, v+1) + for (k, v) in self._fixed_param_name_to_idx.items()]) self._fixed_param_name_to_idx[param.name] = 0 else: self._floating_param_name_list = ( [param.name] + self._floating_param_name_list) # Shift the index of all floating parameters. - self._floating_param_name_to_idx = dict([ (k,v+1) - for (k,v) in self._floating_param_name_to_idx.items() ]) + self._floating_param_name_to_idx = dict( + [(k, v+1) + for (k, v) in self._floating_param_name_to_idx.items()]) self._floating_param_name_to_idx[param.name] = 0 else: # Add parameter at back of parameter list. @@ -856,7 +943,7 @@ def add_param(self, param, atfront=False): (self._params, [param])) self._params_fixed_mask = np.concatenate( (self._params_fixed_mask, [param_fixed_mask])) - if(param.isfixed): + if param.isfixed: self._fixed_param_name_list = ( self._fixed_param_name_list + [param.name]) self._fixed_param_values = np.concatenate( @@ -886,13 +973,13 @@ def has_param(self, param): ``True`` if the given parameter is present in this parameter set, ``False`` otherwise. """ - if((param.name in self._floating_param_name_list) or - (param.name in self._fixed_param_name_list)): + if (param.name in self._floating_param_name_list) or\ + (param.name in self._fixed_param_name_list): return True return False - def floating_param_values_to_dict(self, floating_param_values): + def get_params_dict(self, floating_param_values): """Converts the given floating parameter values into a dictionary with the floating parameter names and values and also adds the fixed parameter names and their values to this dictionary. @@ -905,203 +992,38 @@ def floating_param_values_to_dict(self, floating_param_values): Returns ------- - param_dict : dict + params_dict : dict The dictionary with the floating and fixed parameter names and values. """ - param_dict = dict( + params_dict = dict( list(zip(self._floating_param_name_list, floating_param_values)) + - list(zip(self._fixed_param_name_list, self._fixed_param_values))) - - return param_dict - - -class ParameterSetArray(object): - """This class provides a data holder for an array of ParameterSet instances. - Given an array of global floating parameter values, it can split that array - into floating parameter value sub arrays, one for each ParameterSet instance - of this ParameterSetArray instance. This functionality is required in - order to be able to map the global floating parameter values from the - minimizer to their parameter names. - """ - def __init__(self, paramsets): - """Creates a new ParameterSetArray instance, which will hold a list of - constant ParameterSet instances. - - Parameters - ---------- - paramsets : const instance of ParameterSet | sequence of const instances - of ParameterSet - The sequence of constant ParameterSet instances holding the global - parameters. - - Raises - ------ - TypeError - If the given paramsets argument ist not a sequence of constant - instances of ParameterSet. - """ - super(ParameterSetArray, self).__init__() - - if(isinstance(paramsets, ParameterSet)): - paramsets = [paramsets] - if(not issequenceof(paramsets, ParameterSet, const)): - raise TypeError('The paramsets argument must be a constant ' - 'instance of ParameterSet or a sequence of constant ' - 'ParameterSet instances!') - self._paramset_list = list(paramsets) - - # Calculate the total number of parameters hold by this - # ParameterSetArray instance. - self._n_params = np.sum([paramset.n_params - for paramset in self._paramset_list]) - - # Calculate the total number of fixed parameters hold by this - # ParameterSetArray instance. - self._n_fixed_params = np.sum([paramset.n_fixed_params - for paramset in self._paramset_list]) - - # Calculate the total number of floating parameters hold by this - # ParameterSetArray instance. - self._n_floating_params = np.sum([paramset.n_floating_params - for paramset in self._paramset_list]) - - # Determine the array of initial values of all floating parameters. - self._floating_param_initials = np.concatenate([ - paramset.floating_param_initials - for paramset in self._paramset_list]) - - # Determine the array of bounds of all floating parameters. - self._floating_param_bounds = np.concatenate([ - paramset.floating_param_bounds - for paramset in self._paramset_list]) - - @property - def paramset_list(self): - """(read-only) The list of ParameterSet instances holding the global - parameters. - """ - return self._paramset_list - - @property - def n_params(self): - """(read-only) The total number of parameters hold by this - ParameterSetArray instance. - """ - return self._n_params - - @property - def n_fixed_params(self): - """(read-only) The total number of fixed parameters hold by this - ParameterSetArray instance. - """ - return self._n_fixed_params - - @property - def n_floating_params(self): - """(read-only) The total number of floating parameters hold by this - ParameterSetArray instance. - """ - return self._n_floating_params - - @property - def floating_param_initials(self): - """(read-only) The 1D (n_floating_params,)-shaped ndarray holding the - initial values of all the floating parameters. - """ - return self._floating_param_initials - - @property - def floating_param_bounds(self): - """(read-only) The 2D (n_floating_params,2)-shaped ndarray holding the - boundaries for all the floating parameters. - """ - return self._floating_param_bounds - - def __str__(self): - """Creates and returns a pretty string representation of this - ParameterSetArray instance. - """ - s = '%s: %d parameters (%d floating, %d fixed) {\n'%( - classname(self), self.n_params, self.n_floating_params, - self.n_fixed_params) - - for (idx,paramset) in enumerate(self._paramset_list): - if(idx > 0): - s += '\n' - s += display.add_leading_text_line_padding( - display.INDENTATION_WIDTH, - str(paramset)) - - s += '\n}' - - return s - - def generate_random_initials(self, rss): - """Generates a set of random initials for all global floating - parameters. - A new random initial is defined as - - lower_bound + RAND * (upper_bound - lower_bound), - - where RAND is a uniform random variable between 0 and 1. - - Parameters - ---------- - rss : RandomStateService instance - The RandomStateService instance that should be used for drawing - random numbers from. - """ - vb = self.floating_param_bounds - # Do random_initial = lower_bound + RAND * (upper_bound - lower_bound) - ri = vb[:,0] + rss.random.uniform(size=vb.shape[0])*(vb[:,1] - vb[:,0]) + list(zip(self._fixed_param_name_list, self._fixed_param_values)) + ) - return ri + return params_dict - def split_floating_param_values(self, floating_param_values): - """Splits the given floating parameter values into their specific - ParameterSet part. + def get_floating_params_dict(self, floating_param_values): + """Converts the given floating parameter values into a dictionary with + the floating parameter names and values. Parameters ---------- - floating_param_values : (n_floating_params,)-shaped 1D ndarray - The ndarray holding the values of all the floating parameters for - all ParameterSet instances. The order must match the order of - ParameterSet instances and their order of floating parameters. + floating_param_values : 1D ndarray + The ndarray holding the values of the floating parameters in the + order that the floating parameters are defined. Returns ------- - floating_param_values_list : list of (n_floating_params,)-shaped 1D - ndarray - The list of ndarray objects, where each ndarray holds only the - floating values of the particular ParameterSet instance. The order - matches the order of ParameterSet instances defined for this - ParameterSetArray. - """ - if(len(floating_param_values) != self.n_floating_params): - raise ValueError('The number of given floating parameter values ' - '(%d) does not match the total number of defined floating ' - 'parameters (%d)!'%(len(floating_param_values), - self.n_floating_params)) - - floating_param_values_list = [] - - offset = 0 - for paramset in self._paramset_list: - n_floating_params = paramset.n_floating_params - floating_param_values_list.append(floating_param_values[ - offset:offset+n_floating_params]) - offset += n_floating_params - - return floating_param_values_list - - def update_fixed_param_value_cache(self): - """Updates the internal cache of the fixed parameter values. This method - has to be called whenever the values of the fixed Parameter instances - change. + params_dict : dict + The dictionary with the floating and fixed parameter names and + values. """ - for paramset in self._paramset_list: - paramset.update_fixed_param_value_cache() + params_dict = dict( + list(zip(self._floating_param_name_list, floating_param_values)) + ) + + return params_dict class ParameterGrid(object): @@ -1131,7 +1053,7 @@ def from_BinningDefinition(binning, delta=None, decimals=None): Returns ------- - param_grid : ParameterGrid instance + param_grid : instance of ParameterGrid The created ParameterGrid instance. """ return ParameterGrid( @@ -1140,6 +1062,55 @@ def from_BinningDefinition(binning, delta=None, decimals=None): delta=delta, decimals=decimals) + @staticmethod + def from_range(name, start, stop, delta, decimals=None): + """Creates a ParameterGrid instance from a range definition. The stop + value will be the last grid point. + + Parameters + ---------- + name : str + The name of the parameter grid. + start : float + The start value of the range. + stop : float + The end value of the range. + delta : float + The width between the grid values. + decimals : int | None + The number of decimals the grid values should get rounded to. + The maximal number of decimals is 16. + If set to None, the number of decimals will be the maximum of the + number of decimals of the first grid value and the number of + decimals of the delta value. + + Returns + ------- + param_grid : instance of ParameterGrid + The created ParameterGrid instance. + """ + start = float_cast( + start, + 'The start argument must be castable to type float!') + stop = float_cast( + stop, + 'The stop argument must be castable to type float!') + delta = float_cast( + delta, + 'The delta argument must be castable to type float!') + decimals = int_cast( + decimals, + 'The decimals argument must be castable to type int!', + allow_None=True) + + grid = np.arange(start, stop+delta, delta) + + return ParameterGrid( + name=name, + grid=grid, + delta=delta, + decimals=decimals) + def __init__(self, name, grid, delta=None, decimals=None): """Creates a new parameter grid. @@ -1161,26 +1132,28 @@ def __init__(self, name, grid, delta=None, decimals=None): number of decimals of the first grid value and the number of decimals of the delta value. """ - if(delta is None): + if delta is None: # We need to take the mean of all the "equal" differences in order # to smooth out unlucky rounding issues of a particular difference. delta = np.mean(np.diff(grid)) - delta = float_cast(delta, 'The delta argument must be castable to ' - 'type float!') + delta = float_cast( + delta, + 'The delta argument must be castable to type float!') self._delta = np.float64(delta) # Determine the number of decimals of delta. - if(decimals is None): + if decimals is None: decimals_value = get_number_of_float_decimals(grid[0]) decimals_delta = get_number_of_float_decimals(delta) decimals = int(np.max((decimals_value, decimals_delta))) - if(not isinstance(decimals, int)): - raise TypeError('The decimals argument must be an instance of ' - 'type int!') - if(decimals > 16): - raise ValueError('The maximal number of decimals is 16! Maybe you ' - 'should consider log-space!?') + if not isinstance(decimals, int): + raise TypeError( + 'The decimals argument must be an instance of type int!') + if decimals > 16: + raise ValueError( + 'The maximal number of decimals is 16! Maybe you should ' + 'consider log-space!?') self.name = name self._decimals = decimals @@ -1192,15 +1165,23 @@ def __init__(self, name, grid, delta=None, decimals=None): # setting the delta and offser properties. self.grid = grid + def __str__(self): + """Pretty string representation. + """ + return '{:s} = {:s}, decimals = {:d}'.format( + self._name, str(self._grid), self._decimals) + @property def name(self): """The name of the parameter. """ return self._name + @name.setter def name(self, name): - if(not isinstance(name, str)): - raise TypeError('The name property must be of type str!') + if not isinstance(name, str): + raise TypeError( + 'The name property must be of type str!') self._name = name @property @@ -1214,14 +1195,17 @@ def grid(self): """The numpy.ndarray with the grid values of the parameter. """ return self._grid + @grid.setter def grid(self, arr): - if(not issequence(arr)): - raise TypeError('The grid property must be a sequence!') - if(not isinstance(arr, np.ndarray)): + if not issequence(arr): + raise TypeError( + 'The grid property must be a sequence!') + if not isinstance(arr, np.ndarray): arr = np.array(arr, dtype=np.float64) - if(arr.ndim != 1): - raise ValueError('The grid property must be a 1D numpy.ndarray!') + if arr.ndim != 1: + raise ValueError( + 'The grid property must be a 1D numpy.ndarray!') self._grid = self.round_to_nearest_grid_point(arr) @property @@ -1235,10 +1219,12 @@ def lower_bound(self): """The lower bound of the parameter grid. """ return self._lower_bound + @lower_bound.setter def lower_bound(self, v): - v = float_cast(v, 'The lower_bound property must be castable to type ' - 'float!') + v = float_cast( + v, + 'The lower_bound property must be castable to type float!') self._lower_bound = np.around(np.float64(v), self._decimals) @property @@ -1251,23 +1237,10 @@ def _calc_floatD_and_intD(self, value): """Calculates the number of delta intervals of the given values counted from the lower bound of the grid. It returns its float and integer representation. - - Raises - ------ - ValueError - If one of the values are below or above the grid range. """ value = np.atleast_1d(value).astype(np.float64) - if(hasattr(self, '_grid')): - m = (value >= self._lower_bound) & (value <= self._grid[-1]) - if(not np.all(m)): - raise ValueError('The following values are outside the range ' - 'of the parameter grid "%s": %s'%( - self.name, - ','.join(str(v) for v in value[np.invert(m)]))) - - floatD = value/self._delta - self._lower_bound/self._delta + floatD = (value - self._lower_bound)/self._delta floatD = np.around(floatD, 9) intD = floatD.astype(np.int64) @@ -1311,8 +1284,9 @@ def round_to_nearest_grid_point(self, value): gp = self._lower_bound + (np.around(floatD % 1, 0) + intD)*self._delta gp = np.around(gp, self._decimals) - if(scalar_input): + if scalar_input: return gp.item() + return gp def round_to_lower_grid_point(self, value): @@ -1338,8 +1312,9 @@ def round_to_lower_grid_point(self, value): gp = self._lower_bound + intD*self._delta gp = np.around(gp, self._decimals) - if(scalar_input): + if scalar_input: return gp.item() + return gp def round_to_upper_grid_point(self, value): @@ -1365,26 +1340,32 @@ def round_to_upper_grid_point(self, value): gp = self._lower_bound + (intD + 1)*self._delta gp = np.around(gp, self._decimals) - if(scalar_input): + if scalar_input: return gp.item() + return gp -class ParameterGridSet(NamedObjectCollection): +class ParameterGridSet( + NamedObjectCollection): """Describes a set of parameter grids. """ - def __init__(self, param_grids=None): + def __init__( + self, + param_grids=None, + **kwargs): """Constructs a new ParameterGridSet object. Parameters ---------- - param_grids : sequence of ParameterGrid instances | - ParameterGrid instance | None - The ParameterGrid instances this ParameterGridSet instance should + param_grids : sequence of instance of ParameterGrid | instance of ParameterGrid | None + The ParameterGrid instances this instance of ParameterGridSet should get initialized with. """ - super(ParameterGridSet, self).__init__( - objs=param_grids, obj_type=ParameterGrid) + super().__init__( + objs=param_grids, + obj_type=ParameterGrid, + **kwargs) @property def ndim(self): @@ -1394,25 +1375,26 @@ def ndim(self): return len(self) @property - def parameter_names(self): + def params_name_list(self): """(read-only) The list of the parameter names. """ - return [ paramgrid.name for paramgrid in self.objects ] + return self.name_list @property def parameter_permutation_dict_list(self): """(read-only) The list of parameter dictionaries constructed from all permutations of all the parameter values. """ - # Get the list of parameter names. - param_names = [ paramgrid.name for paramgrid in self.objects ] - # Get the list of parameter grids, in same order than the parameter - # names. - param_grids = [ paramgrid.grid for paramgrid in self.objects ] + param_grids = [paramgrid.grid for paramgrid in self.objects] + + dict_list = [ + dict([ + (p_i, t_i) + for (p_i, t_i) in zip(self.name_list, tup) + ]) + for tup in itertools.product(*param_grids) + ] - dict_list = [ dict([ (p_i, t_i) - for (p_i, t_i) in zip(param_names, tup) ]) - for tup in itertools.product(*param_grids) ] return dict_list def add_extra_lower_and_upper_bin(self): @@ -1430,71 +1412,116 @@ def copy(self): return copy -class ModelParameterMapper(object, metaclass=abc.ABCMeta): - """This abstract base class defines the interface of a model parameter - mapper. A model parameter mapper provides the functionality to map a global +class ParameterModelMapper( + object): + """This class provides the parameter to model mapper. + The parameter to model mapper provides the functionality to map a global parameter, usually a fit parameter, to a local parameter of a model, e.g. to a source, or a background model parameter. """ - def __init__(self, name, models): + @staticmethod + def is_global_fitparam_a_local_param( + fitparam_id, + params_recarray, + local_param_names): + """Determines if the given global fit parameter is a local parameter of + the given list of local parameter names. + + Parameters + ---------- + fitparam_id : int + The ID of the global fit parameter. + params_recarray : instance of numpy record ndarray + The (N_models,)-shaped numpy record ndarray holding the local + parameter names and values of the models. See the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for the format of this record array. + local_param_names : list of str + The list of local parameters. + + Returns + ------- + check : bool + ``True`` if the global fit parameter translates to a local parameter + contained in the ``local_param_names`` list, ``False`` otherwise. + """ + for pname in local_param_names: + if pname not in params_recarray.dtype.fields: + continue + if np.any(params_recarray[f'{pname}:gpidx'] == fitparam_id + 1): + return True + + return False + + @staticmethod + def is_local_param_a_fitparam( + local_param_name, + params_recarray): + """Checks if the given local parameter is a (partly) a fit parameter. + + Parameters + ---------- + local_param_name : str + The name of the local parameter. + params_recarray : instance of numpy record ndarray + The (N_models,)-shaped numpy record ndarray holding the local + parameter names and values of the models. See the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for the format of this record array. + + Returns + ------- + check : bool + ``True`` if the given local parameter is (partly) a fit parameter. + """ + if np.any(params_recarray[f'{local_param_name}:gpidx'] > 0): + return True + + return False + + def __init__(self, models, **kwargs): """Constructor of the parameter mapper. Parameters ---------- - name : str - The name of the model parameter mapper. In practice this is a - representative name for the set of global parameters this model - parameter mapper holds. For a two-component signal-background - likelihood model, "signal", or "background" could be useful names. - models : sequence of Model instances. + models : sequence of instance of Model. The sequence of Model instances the parameter mapper can map global parameters to. """ - super(ModelParameterMapper, self).__init__() + super().__init__(**kwargs) - self.name = name - self.models = models + models = ModelCollection.cast( + models, + 'The models property must be castable to an instance of ' + 'ModelCollection!') + self._models = models # Create the parameter set for the global parameters. self._global_paramset = ParameterSet() - # Define a (n_global_params,)-shaped numpy ndarray of str objects that - # will hold the local parameter names of the global parameters as - # defined by the models. - # The local model parameter names are the names used by the internal - # math objects, like PDFs. Thus, the global parameter names can be - # aliases of such local model parameter names. - self._model_param_names = np.empty((0,), dtype=np.object_) - - # (N_params, N_models) shaped boolean ndarray defining what global - # parameter maps to which model. - self._global_param_2_model_mask = np.zeros( - (0, len(self._models)), dtype=np.bool_) + # Define the attribute holding the boolean mask of the models that are + # source models. + self._source_model_mask = np.array( + [isinstance(model, SourceModel) for model in self._models], + dtype=bool) - @property - def name(self): - """The name of this ModelParameterMapper instance. In practice this is - a representative name for the set of global parameters this mapper - holds. - """ - return self._name - @name.setter - def name(self, name): - name = str_cast(name, 'The name property must be castable to type str!') - self._name = name + # Define a (n_models, n_global_params)-shaped numpy ndarray of str + # objects that will hold the local model parameter names of the global + # parameters. + # The local model parameter names are the names used by the internal + # math objects, like PDFs. Thus, they can be aliases for the global + # parameter names. Entries set to None, will indicate masked-out + # global parameters. + self._model_param_names = np.empty( + (len(self._models), 0), dtype=np.object_) @property def models(self): - """The ModelCollection instance defining the models the mapper can - map global parameters to. + """(read-only) The ModelCollection instance defining the models the + mapper can map global parameters to. """ return self._models - @models.setter - def models(self, obj): - obj = ModelCollection.cast(obj, 'The models property must ' - 'be castable to an instance of ModelCollection!') - self._models = obj @property def global_paramset(self): @@ -1527,46 +1554,71 @@ def n_global_floating_params(self): """ return self._global_paramset.n_floating_params + @property + def n_sources(self): + """(read-only) The number of source models the mapper knows about. + """ + return np.count_nonzero(self._source_model_mask) + + @property + def unique_model_param_names(self): + """(read-only) The unique parameters names of all the models. + """ + m = self._model_param_names != np.array(None) + return np.unique(self._model_param_names[m]) + + @property + def unique_source_param_names(self): + """(read-only) The unique parameter names of the sources. + """ + src_param_names = self._model_param_names[self._source_model_mask, ...] + m = src_param_names != np.array(None) + return np.unique(src_param_names[m]) + def __str__(self): - """Generates and returns a pretty string representation of this model - parameter mapper. + """Generates and returns a pretty string representation of this + parameter model mapper. """ n_global_params = self.n_global_params # Determine the number of models that have global parameters assigned. - # Remember self._global_param_2_model_mask is a - # (n_global_params, n_models)-shaped 2D ndarray. - n_models = np.sum(np.sum(self._global_param_2_model_mask, axis=0) > 0) + # Remember self._model_param_names is a (n_models, n_global_params)- + # shaped 2D ndarray. + n_models = self.n_models + n_sources = self.n_sources - s = classname(self) + ' "%s": '%(self._name) - s += '%d global parameter'%(n_global_params) + s = f'{classname(self)}: ' + s += f'{n_global_params} global parameter' s += '' if n_global_params == 1 else 's' s += ', ' - s += '%d model'%(n_models) + s += f'{n_models} model' s += '' if n_models == 1 else 's' + s += f' ({n_sources} source' + s += '' if n_sources == 1 else 's' + s += ')' - if(n_global_params == 0): + if n_global_params == 0: return s s1 = 'Parameters:' s += '\n' + display.add_leading_text_line_padding( display.INDENTATION_WIDTH, s1) - for (pidx,param) in enumerate(self._global_paramset.params): - model_names = [ self._models[model_idx].name - for model_idx in np.nonzero( - self._global_param_2_model_mask[pidx])[0] - ] - if(param.isfixed): - pstate = 'fixed (%.3f)'%( - param.initial) + for (pidx, p) in enumerate(self._global_paramset.params): + if p.isfixed: + pstate = ( + f'fixed ({p.initial:g})' + ) else: - pstate = 'floating (%.3f <= %.3f <= %.3f)'%( - param.valmin, param.initial, param.valmax) - ps = '\n%s [%s] --> %s\n'%( - param.name, pstate, self._model_param_names[pidx]) + pstate = ( + f'floating ({p.valmin:g} <= {p.initial:g} <= {p.valmax:g})' + ) + ps = f'\n{p.name} [{pstate}]\n' + ps1 = 'in models:\n' - ps1 += '- ' - ps1 += '\n- '.join(model_names) + for (midx, mpname) in enumerate(self._model_param_names[:, pidx]): + if mpname is not None: + ps1 += '- ' + self._models[midx].name + ': ' + mpname + "\n" + ps += display.add_leading_text_line_padding( display.INDENTATION_WIDTH, ps1) s += display.add_leading_text_line_padding( @@ -1574,222 +1626,135 @@ def __str__(self): return s - def finalize(self): - """Finalizes this ModelParameterMapper instance by declaring its - ParameterSet instance as constant. No new global parameters can be added - after calling this method. - """ - self._global_paramset = const(self._global_paramset) - - @abc.abstractmethod - def def_param(self, param, model_param_name=None, models=None): - """This method is supposed to add the given Parameter instance to the - parameter mapper and maps the global parameter to the given sequence of - models the parameter mapper knows about. - - Parameters - ---------- - param : instance of Parameter - The global parameter which should get mapped to one or more models. - model_param_name : str | None - The name of the parameter of the model. Hence, the global - parameter name can be different to the parameter name of the model. - If `None`, the name of the global parameter will be used as model - parameter name. - models : sequence of Model instances - The sequence of Model instances the parameter should get - mapped to. The instances in the sequence must match Model instances - specified at construction of this mapper. - """ - pass - - @abc.abstractmethod - def get_model_param_dict( - self, global_floating_param_values, model_idx=None): - """This method is supposed to create a dictionary with the fixed and - floating parameter names and their values for the given model. + def get_model_param_name(self, model_idx, gp_idx): + """Retrieves the local parameter name of a given model and global + parameter index. Parameters ---------- - global_floating_param_values : 1D ndarray instance - The ndarray instance holding the current values of the global - floating parameters. - model_idx : int | None - The index of the model as it was defined at construction - time of this ModelParameterMapper instance. + model_idx : int + The index of the model. + gp_idx : int + The index of the global parameter. Returns ------- - model_param_dict : dict - The dictionary holding the fixed and floating parameter names and - values of the specified model. + param_name : str | None + The name of the local model parameter. It is ``None``, if the given + global parameter is not mapped to the given model. """ - pass + param_name = self._model_param_names[model_idx, gp_idx] + return param_name -class SingleModelParameterMapper(ModelParameterMapper): - """This class provides a model parameter mapper for a single model, like a - single source, or a single background model. - """ - def __init__(self, name, model): - """Constructs a new model parameter mapper for a single model. + def get_gflp_idx(self, name): + """Gets the index of the global floating parameter of the given name. Parameters ---------- name : str - The name of the model parameter mapper. In practice this is a - representative name for the set of global parameters this model - parameter mapper holds. For a two-component signal-background - likelihood model, "signal", or "background" could be useful names. - model : instance of Model - The instance of Model the parameter mapper can map global - parameters to. + The global floating parameter's name. + + Returns + ------- + idx : int + The index of the global floating parameter. """ - super(SingleModelParameterMapper, self).__init__( - name=name, models=model) + return self._global_paramset.get_floating_pidx( + param_name=name) - def def_param(self, param, model_param_name=None): - """Adds the given Parameter instance to the parameter mapper. + def get_model_idx_by_name(self, name): + """Determines the index within this ParameterModelMapper instance of + the model with the given name. Parameters ---------- - param : instance of Parameter - The global parameter which should get mapped to the single model. - model_param_name : str | None - The parameter name of the model. Hence, the global parameter name - can be different to the parameter name of the model. - If set to `None`, the name of the global parameter will be used as - model parameter name. + name : str + The model's name. Returns ------- - self : SingleModelParameterMapper - The instance of this SingleModelParameterMapper, so that several - `def_param` calls can be concatenated. + model_idx : int + The model's index within this ParameterModelMapper instance. Raises ------ KeyError - If there is already a model parameter with the given name defined. + If there is no model of the given name. """ - if(model_param_name is None): - model_param_name = param.name - if(not isinstance(model_param_name, str)): - raise TypeError('The model_param_name argument must be None or of ' - 'type str!') - - if(model_param_name in self._model_param_names): - raise KeyError('There is already a global parameter defined for ' - 'the model parameter name "%s"!'%(model_param_name)) - - self._global_paramset.add_param(param) - self._model_param_names = np.concatenate( - (self._model_param_names,[model_param_name])) + for (model_idx, model) in enumerate(self._models): + if model.name == name: + return model_idx - mask = np.ones((1,), dtype=np.bool_) - self._global_param_2_model_mask = np.vstack( - (self._global_param_2_model_mask, mask)) + raise KeyError( + f'The model with name "{name}" does not exist within the ' + 'ParameterModelMapper instance!') - return self - - def get_model_param_dict( - self, global_floating_param_values, model_idx=None): - """Creates a dictionary with the fixed and floating parameter names and - their values for the single model. + def get_src_model_idxs(self, sources=None): + """Creates a numpy ndarray holding the indices of the requested source + models. Parameters ---------- - global_floating_param_values : 1D ndarray instance - The ndarray instance holding the current values of the global - floating parameters. The values must be in the same order as the - floating parameters were defined. - model_idx : None - The index of the model as it was defined at construction - time of this ModelParameterMapper instance. Since this is a - ModelParameterMapper for a single model, this argument is - ignored. + sources : instance of SourceModel | sequence of SourceModel | None + The requested sequence of source models. + If set to ``None``, all source models will be requested. Returns ------- - model_param_dict : dict - The dictionary holding the fixed and floating parameter names and - values of the single model. + src_model_idxs : numpy ndarray + The (N_sources,)-shaped 1D ndarray holding the indices of the + requested source models. """ - # Create the list of parameter names such that floating parameters are - # before the fixed parameters. - model_param_names = np.concatenate( - (self._model_param_names[self._global_paramset.floating_params_mask], - self._model_param_names[self._global_paramset.fixed_params_mask])) + # Get the model indices of all the source models. + src_model_idxs = np.arange(self.n_models)[self._source_model_mask] - # Create a 1D (n_global_params,)-shaped ndarray holding the values of - # the floating and fixed parameters. Since we only have a single model, - # these values coincide with the parameter values of the single model. - model_param_values = np.concatenate(( - global_floating_param_values, - self._global_paramset.fixed_param_values - )) - if(len(model_param_values) != len(self._model_param_names)): - raise ValueError('The number of parameter values (%d) does not ' - 'equal the number of parameter names (%d) for model "%s"!'% - (len(model_param_values), len(self._model_param_names), - self._models[0].name)) + if sources is None: + return src_model_idxs - model_param_dict = dict( - zip(model_param_names, model_param_values)) + # Select only the source models of interest. + if isinstance(sources, SourceModel): + sources = [sources] + if not issequenceof(sources, SourceModel): + raise TypeError( + 'The sources argument must be None, an instance of ' + 'SourceModel, or a sequence of SourceModel! ' + f'Its type is {classname(sources)}') - return model_param_dict + src_selection_mask = np.zeros((len(src_model_idxs),), dtype=bool) + for smidx in src_model_idxs: + src = self._models[smidx] + if src in sources: + src_selection_mask[smidx] = True + src_model_idxs = src_model_idxs[src_selection_mask] -class MultiModelParameterMapper(ModelParameterMapper): - """This class provides a model parameter mapper for multiple models, like - multiple sources, or multiple background models. - """ - def __init__(self, name, models): - """Constructs a new multi model parameter mapper for mapping global - parameters to the given models. + return src_model_idxs - Parameters - ---------- - name : str - The name of the model parameter mapper. In practice this is a - representative name for the set of global parameters this model - parameter mapper holds. For a two-component signal-background - likelihood model, "signal", or "background" could be useful names. - models : sequence of Model instances. - The sequence of Model instances the parameter mapper can - map global parameters to. - """ - super(MultiModelParameterMapper, self).__init__( - name=name, models=models) - - def def_param(self, param, model_param_name=None, models=None): - """Adds the given Parameter instance to this parameter mapper and maps - the parameter to the given sequence of models this model parameter - mapper knows about. + def def_param(self, param, models=None, model_param_names=None): + """Adds the given Parameter instance to this parameter model mapper and + maps the parameter to the given sequence of models this parameter model + mapper knows about. Aliases for the given parameters can be specified + for each individual model. Parameters ---------- param : instance of Parameter - The global parameter which should get mapped to one or multiple - models. - model_param_name : str | None - The parameter name of the models. The parameter name of the models - must be the same for all the models this global parameter should get - mapped to. The global parameter name can be different to the - parameter name of the models. - If set to `None`, the name of the global parameter will be used as - model parameter name. - models : sequence of Model instances | None - The sequence of Model instances the parameter should get mapped to. - The instances in the sequence must match Model instances specified - at construction of this mapper. - If set to `None` the global parameter will be mapped to all known - models. + The global parameter which should get mapped to one or more models. + models : sequence of Model instances + The sequence of Model instances the parameter should get + mapped to. The instances in the sequence must match Model instances + specified at construction of this mapper. + model_param_names : str | sequence of str | None + The name of the parameter of the model. Hence, the global + parameter name can be different to the parameter name of the model. + If `None`, the name of the global parameter will be used as model + parameter name for all models. Returns ------- - self : MultiModelParameterMapper - The instance of this MultiModelParameterMapper, so that several + self : ParameterModelMapper + The instance of this ParameterModelMapper, so that several `def_param` calls can be concatenated. Raises @@ -1798,61 +1763,64 @@ def def_param(self, param, model_param_name=None, models=None): If there is already a model parameter of the same name defined for any of the given to-be-applied models. """ - if(model_param_name is None): - model_param_name = param.name - if(not isinstance(model_param_name, str)): - raise TypeError('The model_param_name argument must be None or of ' - 'type str!') + if model_param_names is None: + model_param_names = np.array([param.name]*len(self._models)) + if isinstance(model_param_names, str): + model_param_names = np.array([model_param_names]*len(self._models)) + if not issequenceof(model_param_names, str): + raise TypeError( + 'The model_param_names argument must be None, an instance of ' + 'str, or a sequence of instances of str!') - if(models is None): + if models is None: models = self._models - models = ModelCollection.cast(models, + models = ModelCollection.cast( + models, 'The models argument must be castable to an instance of ' 'ModelCollection!') # Make sure that the user did not provide an empty sequence. - if(len(models) == 0): - raise ValueError('The sequence of models, to which the parameter ' - 'maps, cannot be empty!') + if len(models) == 0: + raise ValueError( + 'The sequence of models, to which the parameter maps, cannot ' + 'be empty!') # Get the list of model indices to which the parameter maps. mask = np.zeros((self.n_models,), dtype=np.bool_) - for ((midx,model), applied_model) in itertools.product( + for ((midx, model), applied_model) in itertools.product( enumerate(self._models), models): - if(applied_model.id == model.id): + if applied_model.id == model.id: mask[midx] = True # Check that the model parameter name is not already defined for any of # the given to-be-mapped models. - model_indices = np.arange(self.n_models)[mask] - for midx in model_indices: - param_mask = self._global_param_2_model_mask[:,midx] - if(model_param_name in self._model_param_names[param_mask]): - raise KeyError('The model parameter "%s" is already defined ' - 'for model "%s"!'%(model_param_name, - self._models[midx].name)) + for midx in np.arange(self.n_models)[mask]: + mpnames = self._model_param_names[midx][ + self._model_param_names[midx] != np.array(None)] + if model_param_names[midx] in mpnames: + raise KeyError( + f'The model parameter "{model_param_names[midx]}" is ' + f'already defined for model "{self._models[midx].name}"!') self._global_paramset.add_param(param) - self._model_param_names = np.concatenate( - (self._model_param_names, [model_param_name])) - self._global_param_2_model_mask = np.vstack( - (self._global_param_2_model_mask, mask)) + entry = np.where(mask, model_param_names, None) + self._model_param_names = np.hstack( + (self._model_param_names, entry[np.newaxis, :].T)) return self - def get_model_param_dict( - self, global_floating_param_values, model_idx): + def create_model_params_dict(self, gflp_values, model): """Creates a dictionary with the fixed and floating parameter names and their values for the given model. Parameters ---------- - global_floating_param_values : 1D ndarray instance + gflp_values : 1D ndarray of float The ndarray instance holding the current values of the global floating parameters. - model_idx : int + model : instance of Model | str | int The index of the model as it was defined at construction - time of this ModelParameterMapper instance. + time of this ParameterModelMapper instance. Returns ------- @@ -1860,26 +1828,48 @@ def get_model_param_dict( The dictionary holding the fixed and floating parameter names and values of the specified model. """ + gflp_values = np.atleast_1d(gflp_values) + + if isinstance(model, str): + midx = self.get_model_idx_by_name(name=model) + elif isinstance(model, Model): + midx = self.get_model_idx_by_name(name=model.name) + else: + midx = int_cast( + model, + 'The model argument must be an instance of Model, str, or ' + 'castable to int!') + if midx < 0 or midx >= len(self._models): + raise IndexError( + f'The model index {midx} is out of range ' + f'[0,{len(self._models)-1}]!') + # Get the model parameter mask that masks the global parameters for # the requested model. - model_mask = self._global_param_2_model_mask[:,model_idx] + m_gp_mask = self._model_param_names[midx] != np.array(None) - # Create the array of parameter names that belong to the requested - # model, where floating parameters are before the fixed parameters. - model_param_names = np.concatenate( - (self._model_param_names[ - self._global_paramset.floating_params_mask & model_mask], - self._model_param_names[ - self._global_paramset.fixed_params_mask & model_mask] - )) + _model_param_names = self._model_param_names + _global_paramset = self._global_paramset + gflp_mask = _global_paramset.floating_params_mask + gfxp_mask = _global_paramset.fixed_params_mask + + # Create the array of local parameter names that belong to the + # requested model, where the floating parameters are before the fixed + # parameters. + model_param_names = np.concatenate(( + _model_param_names[ + midx, + gflp_mask & m_gp_mask], + _model_param_names[ + midx, + gfxp_mask & m_gp_mask] + )) # Create the array of parameter values that belong to the requested # model, where floating parameters are before the fixed parameters. model_param_values = np.concatenate(( - global_floating_param_values[ - model_mask[self._global_paramset.floating_params_mask]], - self._global_paramset.fixed_param_values[ - model_mask[self._global_paramset.fixed_params_mask]] + gflp_values[m_gp_mask[gflp_mask]], + _global_paramset.fixed_param_values[m_gp_mask[gfxp_mask]] )) model_param_dict = dict( @@ -1887,682 +1877,202 @@ def get_model_param_dict( return model_param_dict - -class HypoParameterDefinition(NamedObjectCollection): - """This class provides a data holder for a list of model parameter mappers, - where each parameter mapper defines a set of global parameters for the - likelihood function, and their mapping to local model parameters. - In addition this class provides a method to create a copy of itself, where - floating parameters can get fixed to a certain values. - """ - def __init__(self, model_param_mappers): - """Creates a new instance of HypoParameterDefinition with the given list - of ModelParameterMapper instances. + def create_src_params_recarray( + self, + gflp_values=None, + sources=None): + """Creates a numpy record ndarray with a field for each local source + parameter name and parameter's value. In addition each parameter field + ```` has a field named ``<:gpidx>`` which holds the index + plus one of the corresponding global parameter for each source value. + For values mapping to fixed parameters, the index is negative. Local + parameter values that do not apply to a particular source are set to + NaN. The parameter index in such cases is undefined. + In addition to the parameter fields, the field ``:model_idx`` holds the + index of the model for which the local parameter values apply. Parameters ---------- - model_param_mappers : instance of ModelParameterMapper | sequence of - ModelParameterMapper instances - The list of ModelParameterMapper instances defining the global - parameters and their mapping to local parameters of individual - models. - """ - super(HypoParameterDefinition, self).__init__( - model_param_mappers, obj_type=ModelParameterMapper) - - # Finalize all ModelParameterMapper instances, hence no parameters can - # be added anymore. - for mapper in self._objects: - mapper.finalize() - - @property - def model_param_mapper_list(self): - """(read-only) The list of ModelParameterMapper instances defining the - global parameters and their mapping to the individual local model - parameters. - """ - return self._objects - - def __str__(self): - """Creates a pretty string representation of this - HypoParameterDefinition instance. - """ - s = '%s:\n'%(classname(self)) - - for (idx, mapper) in enumerate(self._objects): - if(idx > 0): - s += '\n' - s1 = str(mapper) - s += display.add_leading_text_line_padding( - display.INDENTATION_WIDTH, s1) - - return s - - def copy(self, fix_params=None): - """Creates a deep copy of this HypoParameterDefinition instance and - fixes the given global parameters to the given values. - - Parameters - ---------- - fix_params : dict | None - The dictionary defining the global parameters that should get fixed - in the copy. - - Returns - ------- - copy : instance of HypoParameterDefinition - The copy of this HypoParameterDefinition instance with the given - global parameters fixed to the given values. - """ - copy = deepcopy(self) - - if(fix_params is not None): - if(not isinstance(fix_params, dict)): - raise TypeError('The fix_params argument must be of type dict!') - - for mp_mapper in copy.model_param_mapper_list: - mp_mapper.global_paramset.make_params_fixed(fix_params) - - return copy - - def create_ParameterSetArray(self): - """Creates a ParameterSetArray instance for all the ModelParameterMapper - instances of this HypoParameterDefinition instance. - - Returns - ------- - paramsetarray : ParameterSetArray - The instance of ParameterSetArray holding references to the - ParameterSet instances of all the ModelParameterMapper instances of - this HypoParameterDefinition instance. - """ - paramsetarray = ParameterSetArray( - [mpmapper.global_paramset - for mpmapper in self._objects]) - return paramsetarray - - -class FitParameter(object): - """This class is DEPRECATED! Use class Parameter instead! - - This class describes a single fit parameter. A fit parameter has a name, - a value range, an initial value, and a current value. The current value will - be updated in the fitting process. - """ - def __init__(self, name, valmin, valmax, initial): - """Creates a new fit parameter object. - - Parameters - ---------- - name : str - The name of the fit parameter. - valmin : float - The minimal bound value of the fit parameter. - valmax : float - The maximal bound value of the fit parameter. - initial : float - The (initial) value (guess) of the parameter, which will be used as - start point for the fitting procedure. - """ - self.name = name - self.valmin = valmin - self.valmax = valmax - self.initial = initial - - self.value = self.initial - - @property - def name(self): - """The name of the fit parameter. - """ - return self._name - @name.setter - def name(self, name): - if(not isinstance(name, str)): - raise TypeError('The name property must be of type str!') - self._name = name - - @property - def valmin(self): - """The minimal bound value of the fit parameter. - """ - return self._valmin - @valmin.setter - def valmin(self, v): - v = float_cast(v, 'The valmin property must castable to type float!') - self._valmin = v - - @property - def valmax(self): - """The maximal bound value of the fit parameter. - """ - return self._valmax - @valmax.setter - def valmax(self, v): - v = float_cast(v, 'The valmax property must be castable to type float!') - self._valmax = v - - @property - def initial(self): - """The initial value of the fit parameter. - """ - return self._initial - @initial.setter - def initial(self, v): - v = float_cast(v, 'The initial property must be castable to type float!') - self._initial = v - - def as_linear_grid(self, delta): - """Creates a ParameterGrid instance with a linear grid with constant - grid value distances delta. - - Parameters - ---------- - delta : float - The constant distance between the grid values. By definition this - defines also the precision of the parameter values. - - Returns - ------- - grid : ParameterGrid instance - The ParameterGrid instance holding the grid values. - """ - delta = float_cast( - delta, 'The delta argument must be castable to type float!') - grid = make_linear_parameter_grid_1d( - self._name, self._valmin, self._valmax, delta) - return grid - - -class FitParameterSet(object): - """This class is DEPRECATED, use ParameterSet instead! - - This class describes a set of FitParameter instances. - """ - def __init__(self): - """Constructs a fit parameter set instance. - """ - # Define the list of fit parameters. - # Define the (N_fitparams,)-shaped numpy array of FitParameter objects. - self._fitparams = np.empty((0,), dtype=np.object_) - # Define a list for the fit parameter names. This is for optimization - # purpose only. - self._fitparam_name_list = [] - - @property - def fitparams(self): - """The 1D ndarray holding the FitParameter instances. - """ - return self._fitparams - - @property - def fitparam_list(self): - """(read-only) The list of the global FitParameter instances. - """ - return list(self._fitparams) - - @property - def fitparam_name_list(self): - """(read-only) The list of the fit parameter names. - """ - return self._fitparam_name_list - - @property - def initials(self): - """(read-only) The 1D ndarray holding the initial values of all the - global fit parameters. - """ - return np.array([ fitparam.initial - for fitparam in self._fitparams ], dtype=np.float64) - - @property - def bounds(self): - """(read-only) The 2D (N_fitparams,2)-shaped ndarray holding the - boundaries for all the global fit parameters. - """ - return np.array([ (fitparam.valmin, fitparam.valmax) - for fitparam in self._fitparams ], dtype=np.float64) - - def copy(self): - """Creates a deep copy of this FitParameterSet instance. + gflp_values : numpy ndarray | None + The (N_global_floating_param,)-shaped 1D ndarray holding the global + floating parameter values. The order must match the order of + parameter definition in this ParameterModelMapper instance. + If set to ``None``, the value ``numpy.nan`` will be used as + parameter value for floating parameters. + sources : SourceModel | sequence of SourceModel | ndarray of int32 | None + The sources which should be considered. + If a ndarray of type int is provides, it must contain the global + source indices. + If set to ``None``, all sources are considered. Returns ------- - copy : FitParameterSet instance - The copied instance of this FitParameterSet instance. - """ - copy = deepcopy(self) - return copy - - def add_fitparam(self, fitparam, atfront=False): - """Adds the given FitParameter instance to the list of fit parameters. - - Parameters - ---------- - fitparam : instance of FitParameter - The fit parameter, which should get added. - atfront : bool - Flag if the fit parameter should be added at the front of the - parameter list. If set to False (default), it will be added at the - back. - """ - if(not isinstance(fitparam, FitParameter)): - raise TypeError('The fitparam argument must be an instance of FitParameter!') - - if(atfront): - # Add fit parameter at front of list. - self._fitparams = np.concatenate(([fitparam], self._fitparams)) - self._fitparam_name_list = [fitparam.name] + self._fitparam_name_list + recarray : numpy structured ndarray + The (N_sources,)-shaped numpy structured ndarray holding the local + parameter names and their values for each requested source. + It contains the following fields: + + :model_idx + The field holding the index of the model to which the set + of local parameters apply. + + The field holding the value for the local parameter . + Not all local parameters apply to all sources. + Example: "gamma". + :gpidx + The field holding the global parameter index plus one for + the local parameter . Example: "gamma:gpidx". Indices + for values mapping to fixed parameters are negative. + """ + if gflp_values is None: + gflp_values = np.full((self.n_global_floating_params,), np.nan) + + gflp_values = np.atleast_1d(gflp_values) + + # Check input. + n_global_floating_params = self.n_global_floating_params + if len(gflp_values) != n_global_floating_params: + raise ValueError( + f'The gflp_values argument is of length ' + f'{len(gflp_values)}, but must be of length ' + f'{n_global_floating_params}!') + + if isinstance(sources, np.ndarray) and sources.dtype == np.int32: + # The sources are already specified in terms of their source + # indices. + smidxs = sources else: - # Add fit parameter at back of list. - self._fitparams = np.concatenate((self._fitparams, [fitparam])) - self._fitparam_name_list = self._fitparam_name_list + [fitparam.name] - - def fitparam_values_to_dict(self, fitparam_values): - """Converts the given fit parameter values into a dictionary with the - fit parameter names and values. - - Parameters - ---------- - fitparam_values : 1D ndarray - The ndarray holding the fit parameter values in the order that the - fit parameters are defined. - - Returns - ------- - fitparam_dict : dict - The dictionary with the fit parameter names and values. - """ - fitparam_dict = dict(zip(self._fitparam_name_list, fitparam_values)) - return fitparam_dict - - def fitparam_dict_to_values(self, fitparam_dict): - """Converts the given fit parameter dictionary into a 1D ndarray holding - the fit parameter values in the order the fit parameters are defined. - - Parameters - ---------- - fitparam_dict : dict - The dictionary with the fit parameter names and values. - - Returns - ------- - fitparam_values : 1D ndarray - The ndarray holding the fit parameter values in the order that the - fit parameters are defined. - """ - fitparam_values = np.empty_like(self._fitparams, dtype=np.float64) - for (i, fitparam) in enumerate(self._fitparams): - fitparam_values[i] = fitparam_dict[fitparam.name] - return fitparam_values - - def generate_random_initials(self, rss): - """Generates a set of random initials for all global fit parameters. - A new random initial is defined as - - lower_bound + RAND * (upper_bound - lower_bound), - - where RAND is a uniform random variable between 0 and 1. - - Parameters - ---------- - rss : RandomStateService instance - The RandomStateService instance that should be used for drawing - random numbers from. - """ - vb = self.bounds - # Do random_initial = lower_bound + RAND * (upper_bound - lower_bound) - ri = vb[:,0] + rss.random.uniform(size=vb.shape[0])*(vb[:,1] - vb[:,0]) - - return ri - - -class SourceFitParameterMapper(object, metaclass=abc.ABCMeta): - """This abstract base class defines the interface of the source fit - parameter mapper. This mapper provides the functionality to map a global fit - parameter to a source fit parameter. - """ - - def __init__(self): - """Constructor of the source fit parameter mapper. - """ - self._fitparamset = FitParameterSet() - - # Define the list of source parameter names, which map to the fit - # parameters. - # Define the (N_fitparams,)-shaped numpy array of str objects. - self._src_param_names = np.empty((0,), dtype=np.object_) - - @property - def fitparamset(self): - """(read-only) The FitParameterSet instance holding the list of global - fit parameters. - """ - return self._fitparamset - - @property - def n_global_fitparams(self): - """(read-only) The number of defined global fit parameters. - """ - return len(self._fitparamset.fitparams) - - def get_src_fitparam_name(self, fitparam_idx): - """Returns the name of the source fit parameter for the given global fit - parameter index. - - Parameters - ---------- - fitparam_idx : int - The index of the global fit parameter. - - Returns - ------- - src_fitparam_name : str - The name of the source fit parameter. - """ - return self._src_param_names[fitparam_idx] - - @abc.abstractmethod - def def_fit_parameter(self, fit_param, src_param_name=None, sources=None): - """This method is supposed to define a new fit parameter that maps to a - given source fit parameter for a list of sources. If no list of sources - is given, it maps to all sources. - - Parameters - ---------- - fit_param : FitParameter - The FitParameter instance defining the fit parameter. - src_param_name : str | None - The name of the source parameter. It must match the name of a source - model property. If set to None (default) the name of the fit - parameter will be used. - sources : sequence of SourceModel | None - The sequence of SourceModel instances for which the fit parameter - applies. If None (the default) is specified, the fit parameter will - apply to all sources. - """ - pass - - @abc.abstractmethod - def get_src_fitparams(self, fitparam_values, src_idx=0): - """This method is supposed to create a dictionary of source fit - parameter names and values for the requested source based on the given - fit parameter values. - - Parameters - ---------- - fitparam_values : 1D ndarray - The array holding the current global fit parameter values. - src_idx : int - The index of the source for which the parameters should get - retrieved. - - Returns - ------- - src_fitparams : dict - The dictionary holding the translated source parameters that are - beeing fitted. - """ - pass - - @abc.abstractmethod - def get_fitparams_array(self, fitparam_values): - """This method is supposed to create a numpy record ndarray holding the - unique source fit parameter names as key and their value for each - source. The returned array must be (N_sources,)-shaped. - - Parameters - ---------- - fitparam_values : 1D ndarray - The array holding the current global fit parameter values. - - Returns - ------- - fitparams_arr : (N_sources,)-shaped numpy record ndarray | None - The numpy record ndarray holding the fit parameter names as keys - and their value for each source in each row. - None must be returned if no global fit parameters were defined. - """ - pass - - -class SingleSourceFitParameterMapper(SourceFitParameterMapper): - """This class provides the functionality to map the global fit parameters to - the source fit parameters of the single source. This class assumes a single - source, hence the mapping can be performed faster than in the multi-source - case. - """ - def __init__(self): - """Constructs a new source fit parameter mapper for a single source. - """ - super(SingleSourceFitParameterMapper, self).__init__() + # Get the source indices of the requested sources. + smidxs = self.get_src_model_idxs(sources=sources) + + # Create the output record array with nan as default value. + dtype = [(':model_idx', np.int32)] + for name in self.unique_source_param_names: + dtype += [(name, np.float64), (f'{name}:gpidx', np.int32)] + + recarray = np.zeros( + (len(smidxs),), + dtype=dtype) + for name in self.unique_source_param_names: + recarray[name] = np.nan + + recarray[':model_idx'] = smidxs + + # Loop over the requested sources. + _model_param_names = self._model_param_names + _global_paramset = self._global_paramset + gflp_mask = _global_paramset.floating_params_mask + gfxp_mask = _global_paramset.fixed_params_mask + for (i, smidx) in enumerate(smidxs): + # Get the mask that selects the global parameters for the requested + # source. + src_gp_mask = _model_param_names[smidx] != np.array(None) + + # Create the array of local parameter names that belong to the + # requested model, where the floating parameters are before the + # fixed parameters. + model_param_names = np.concatenate(( + _model_param_names[smidx, gflp_mask & src_gp_mask], + _model_param_names[smidx, gfxp_mask & src_gp_mask] + )) - def def_fit_parameter(self, fitparam, src_param_name=None): - """Define a new fit parameter that maps to a given source fit parameter. + # Create the array of local parameter values that belong to the + # requested model, where the floating parameters are before the + # fixed parameters. + model_param_values = np.concatenate(( + gflp_values[ + src_gp_mask[gflp_mask]], + _global_paramset.fixed_param_values[ + src_gp_mask[gfxp_mask]] + )) - Parameters - ---------- - fitparam : FitParameter - The FitParameter instance defining the fit parameter. - src_param_name : str | None - The name of the source parameter. It must match the name of a source - model property. If set to None (default) the name of the fit - parameter will be used. - """ - self._fitparamset.add_fitparam(fitparam) + # Create the array of the global parameter indices. + gpidxs = np.arange(len(_global_paramset)) + model_gp_idxs = np.concatenate(( + gpidxs[gflp_mask & src_gp_mask] + 1, + -gpidxs[gfxp_mask & src_gp_mask] - 1, + )) - if(src_param_name is None): - src_param_name = fitparam.name - if(not isinstance(src_param_name, str)): - raise TypeError('The src_param_name argument must be of type str!') + # Loop over the local parameters of the source and fill the + # params record array. + for (name, value, gpidx) in zip( + model_param_names, model_param_values, model_gp_idxs): + recarray[name][i] = value + recarray[f'{name}:gpidx'][i] = gpidx - # Append the source parameter name to the internal array. - self._src_param_names = np.concatenate((self._src_param_names, [src_param_name])) + return recarray - def get_src_fitparams(self, fitparam_values): - """Create a dictionary of source fit parameter names and values based on - the given fit parameter values. + def create_global_params_dict(self, gflp_values): + """Converts the given global floating parameter values into a dictionary + holding the names and values of all floating and fixed parameters. Parameters ---------- - fitparam_values : 1D ndarray - The array holding the current global fit parameter values. + gflp_values : numpy ndarray + The (n_global_floating_params,)-shaped 1D numpy ndarray holding the + values of the global floating parameters. Returns ------- - src_fitparams : dict - The dictionary holding the translated source parameters that are - beeing fitted. - An empty dictionary is returned if no fit parameters were defined. + params_dict : dict + The dictionary holding the parameter name and values of all + floating and fixed parameters. """ - src_fitparams = dict(zip(self._src_param_names, fitparam_values)) + params_dict = self._global_paramset.get_params_dict( + floating_param_values=gflp_values) - return src_fitparams + return params_dict - def get_fitparams_array(self, fitparam_values): - """Creates a numpy record ndarray holding the fit parameters names as - key and their value for each source. The returned array is (1,)-shaped - since there is only one source defined for this mapper class. + def create_global_floating_params_dict(self, gflp_values): + """Converts the given global floating parameter values into a dictionary + holding the names and values of all floating parameters. Parameters ---------- - fitparam_values : 1D ndarray - The array holding the current global fit parameter values. + gflp_values : numpy ndarray + The (n_global_floating_params,)-shaped 1D numpy ndarray holding the + values of the global floating parameters. Returns ------- - fitparams_arr : (1,)-shaped numpy record ndarray | None - The numpy record ndarray holding the fit parameter names as keys - and their value for the one single source. - None is returned if no fit parameters were defined. - """ - if(self.n_global_fitparams == 0): - return None - - fitparams_arr = np.array([tuple(fitparam_values)], - dtype=[ (name, np.float64) - for name in self._src_param_names ]) - return fitparams_arr - - -class MultiSourceFitParameterMapper(SourceFitParameterMapper): - """This class provides the functionality to map the global fit parameters to - the source fit parameters of the sources. - Sometimes it's necessary to define a global fit parameter, which relates to - a source model fit parameter for a set of sources, while another global fit - parameter relates to the same source model fit parameter, but for another - set of sources. - - At construction time this manager takes the collection of sources. Each - source gets an index, which is defined as the position of the source within - the collection. - """ - def __init__(self, sources): - """Constructs a new source fit parameter mapper for multiple sources. - - Parameters - ---------- - sources : sequence of SourceModel - The sequence of SourceModel instances defining the list of sources. - """ - super(MultiSourceFitParameterMapper, self).__init__() - - self.sources = sources - - # (N_fitparams, N_sources) shaped boolean ndarray defining what fit - # parameter applies to which source. - self._fit_param_2_src_mask = np.zeros( - (0, len(self.sources)), dtype=np.bool_) - - # Define an array, which will hold the unique source parameter names. - self._unique_src_param_names = np.empty((0,), dtype=np.object_) - - @property - def sources(self): - """The SourceCollection defining the sources. - """ - return self._sources - @sources.setter - def sources(self, obj): - obj = SourceCollection.cast(obj, 'The sources property must be castable to an instance of SourceCollection!') - self._sources = obj - - @property - def N_sources(self): - """(read-only) The number of sources. - """ - return len(self._sources) - - def def_fit_parameter(self, fitparam, src_param_name=None, sources=None): - """Defines a new fit parameter that maps to a given source parameter - for a list of sources. If no list of sources is given, it maps to all - sources. - - Parameters - ---------- - fitparam : FitParameter - The FitParameter instance defining the fit parameter. - src_param_name : str | None - The name of the source parameter. It must match the name of a source - model property. If set to None (default) the name of the fit - parameter will be used. - sources : SourceCollection | None - The instance of SourceCollection with the sources for which the fit - parameter applies. If None (the default) is specified, the fit - parameter will apply to all sources. - """ - self._fitparamset.add_fitparam(fitparam) - - if(src_param_name is None): - src_param_name = fitparam.name - if(not isinstance(src_param_name, str)): - raise TypeError('The src_param_name argument must be of type str!') - - if(sources is None): - sources = self.sources - sources = SourceCollection.cast(sources, - 'The sources argument must be castable to an instance of SourceCollection!') - - # Append the source parameter name to the internal array and keep track - # of the unique names. - self._src_param_names = np.concatenate((self._src_param_names, [src_param_name])) - self._unique_src_param_names = np.unique(self._src_param_names) - - # Get the list of source indices for which the fit parameter applies. - mask = np.zeros((len(self.sources),), dtype=np.bool_) - for ((idx,src), applied_src) in itertools.product(enumerate(self.sources), sources): - if(applied_src.id == src.id): - mask[idx] = True - self._fit_param_2_src_mask = np.vstack((self._fit_param_2_src_mask, mask)) - - def get_src_fitparams(self, fitparam_values, src_idx): - """Constructs a dictionary with the source parameters that are beeing - fitted. As values the given global fit parameter values will be used. - Hence, this method translates the global fit parameter values into the - source parameters. - - Parameters - ---------- - fitparam_values : 1D ndarray - The array holding the current global fit parameter values. - src_idx : int - The index of the source for which the parameters should get - retieved. - - Returns - ------- - src_fitparams : dict - The dictionary holding the translated source parameters that are - beeing fitted. + params_dict : dict + The dictionary holding the parameter name and values of all + floating parameters. """ - # Get the mask of global fit parameters that apply to the requested - # source. - fp_mask = self._fit_param_2_src_mask[:,src_idx] - - # Get the source parameter names and values. - src_param_names = self._src_param_names[fp_mask] - src_param_values = fitparam_values[fp_mask] - - src_fitparams = dict(zip(src_param_names, src_param_values)) + params_dict = self._global_paramset.get_floating_params_dict( + floating_param_values=gflp_values) - return src_fitparams + return params_dict - def get_fitparams_array(self, fitparam_values): - """Creates a numpy record ndarray holding the fit parameters names as - key and their value for each source. The returned array is - (N_sources,)-shaped. + def get_local_param_is_global_floating_param_mask( + self, + local_param_names, + ): + """Checks which local parameter name is mapped to a global floating + parameter. Parameters ---------- - fitparam_values : 1D ndarray - The array holding the current global fit parameter values. + local_param_names : sequence of str + The sequence of the local parameter names to test. Returns ------- - fitparams_arr : (N_sources,)-shaped numpy record ndarray | None - The numpy record ndarray holding the unique source fit parameter - names as keys and their value for each source per row. - None is returned if no fit parameters were defined. + mask : instance of ndarray + The (N_local_param_names,)-shaped numpy ndarray holding the mask + for each local parameter name if it is mapped to a global floating + parameter. """ - if(self.n_global_fitparams == 0): - return None - - fitparams_arr = np.empty((self.N_sources,), - dtype=[ (name, np.float64) - for name in self._unique_src_param_names ]) - - for src_idx in range(self.N_sources): - # Get the mask of global fit parameters that apply to the requested - # source. - fp_mask = self._fit_param_2_src_mask[:,src_idx] - - # Get the source parameter names and values. - src_param_names = self._src_param_names[fp_mask] - src_param_values = fitparam_values[fp_mask] + mask = np.zeros(len(local_param_names), dtype=np.bool_) - # Fill the fit params array. - for (name, value) in zip(src_param_names, src_param_values): - fitparams_arr[name][src_idx] = value + global_floating_params_idxs = self._global_paramset.floating_params_idxs - return fitparams_arr + # Get the global parameter indices for each local parameter name. + for (local_param_idx, local_param_name) in enumerate(local_param_names): + gpidxs = np.unique( + np.nonzero(self._model_param_names == local_param_name)[1] + ) + if np.any(np.isin(gpidxs, global_floating_params_idxs)): + mask[local_param_idx] = True + return mask diff --git a/skyllh/core/pdf.py b/skyllh/core/pdf.py index 2c6b5a13ed..0b80b1d407 100644 --- a/skyllh/core/pdf.py +++ b/skyllh/core/pdf.py @@ -1,41 +1,46 @@ # -*- coding: utf-8 -*- -from skyllh.core.binning import BinningDefinition -from skyllh.core.interpolate import ( - GridManifoldInterpolationMethod, - Linear1DGridManifoldInterpolationMethod +import abc +import numpy as np +from scipy.interpolate import RegularGridInterpolator + +from skyllh.core import ( + tool, +) +from skyllh.core.binning import ( + BinningDefinition, +) +from skyllh.core.display import ( + INDENTATION_WIDTH, +) +from skyllh.core.livetime import ( + Livetime, ) from skyllh.core.py import ( - ObjectCollection, + NamedObjectCollection, + bool_cast, classname, + float_cast, func_has_n_args, issequenceof, - typename + make_dict_hash, +) +from skyllh.core.debugging import ( + get_logger, + is_tracing_enabled, +) +from skyllh.core.flux_model import ( + TimeFluxProfile, ) -from skyllh.core.config import CFG -from skyllh.core.debugging import get_logger from skyllh.core.parameters import ( ParameterGrid, ParameterGridSet, + ParameterModelMapper, ParameterSet, - make_params_hash ) -from skyllh.core.timing import TaskTimer -from skyllh.core.trialdata import TrialDataManager - - -import abc -import numpy as np -import scipy as scp - -from scipy.interpolate import RegularGridInterpolator - -# Try to load the photospline tool. -PHOTOSPLINE_LOADED = True -try: - import photospline -except ImportError: - PHOTOSPLINE_LOADED = False +from skyllh.core.timing import ( + TaskTimer, +) logger = get_logger(__name__) @@ -47,7 +52,7 @@ class PDFAxis(object): plot a PDF or a PDF ratio. """ - def __init__(self, name, vmin, vmax): + def __init__(self, name, vmin, vmax, *args, **kwargs): """Creates a new axis for a PDF. Parameters @@ -59,7 +64,7 @@ def __init__(self, name, vmin, vmax): vmax : float The maximal value of the axis. """ - super(PDFAxis, self).__init__() + super().__init__(*args, **kwargs) self.name = name self.vmin = vmin @@ -73,8 +78,9 @@ def name(self): @name.setter def name(self, name): - if(not isinstance(name, str)): - raise TypeError('The name property must be of type str!') + if not isinstance(name, str): + raise TypeError( + 'The name property must be of type str!') self._name = name @property @@ -85,7 +91,9 @@ def vmin(self): @vmin.setter def vmin(self, v): - self._vmin = float(v) + self._vmin = float_cast( + v, + 'The value for the vmin property must be castable to type float!') @property def vmax(self): @@ -95,7 +103,9 @@ def vmax(self): @vmax.setter def vmax(self, v): - self._vmax = float(v) + self._vmax = float_cast( + v, + 'The value for the vmax property must be castable to type float!') @property def range(self): @@ -114,25 +124,25 @@ def __eq__(self, other): """Checks if this PDFAxis object has the same properties than the given other PDFAxis object. """ - if((self.name == other.name) and - (self.vmin == other.vmin) and - (self.vmax == other.vmax) - ): + if (self.name == other.name) and\ + (self.vmin == other.vmin) and\ + (self.vmax == other.vmax): return True return False def __str__(self): """Pretty string implementation for the PDFAxis instance. """ - s = '{}: {}: vmin={:g} vmax={:g}'.format( - classname(self), self._name, self._vmin, self._vmax) + s = f'{classname(self)}: {self._name}: ' +\ + f'vmin={self._vmin:g} vmax={self._vmax:g}' return s -class PDFAxes(ObjectCollection): +class PDFAxes(NamedObjectCollection): """This class describes the set of PDFAxis objects defining the dimensionality of a PDF. """ + @staticmethod def union(*axeses): """Creates a PDFAxes instance that is the union of the given PDFAxes @@ -149,108 +159,63 @@ def union(*axeses): The newly created PDFAxes instance that holds the union of the PDFAxis instances provided by all the PDFAxes instances. """ - if(not issequenceof(axeses, PDFAxes)): - raise TypeError('The arguments of the union static function must ' - 'be instances of PDFAxes!') - if(not len(axeses) >= 1): - raise ValueError('At least 1 PDFAxes instance must be provided to ' - 'the union static function!') + if not issequenceof(axeses, PDFAxes): + raise TypeError( + 'The arguments of the union static function must ' + 'be instances of PDFAxes!') + if not len(axeses) >= 1: + raise ValueError( + 'At least 1 PDFAxes instance must be provided to ' + 'the union static function!') axes = PDFAxes(axes=axeses[0]) for axes_i in axeses[1:]: for axis in axes_i: - if(not axes.has_axis(axis.name)): + if axis.name not in axes: axes += axis return axes - def __init__(self, axes=None): - super(PDFAxes, self).__init__(objs=axes, obj_type=PDFAxis) - - def __str__(self): - """Pretty string implementation for the PDFAxes instance. - """ - s = '' - for i in range(len(self)): - if(i > 0): - s += '\n' - s += str(self[i]) - return s - - @property - def axis_name_list(self): - """(read-only) The list of the names of all the axes of this PDFAxes - instance. - """ - return [ axis.name for axis in self ] - - def get_axis(self, name): - """Retrieves the PDFAxis object with the given name. + def __init__(self, axes=None, **kwargs): + """Creates a new PDFAxes instance. Parameters ---------- - name : str | int - The name of the axis to retrieve. If an integer is given, it - specifies the index of the axis. - - Returns - ------- - axis : PDFAxis - The PDFAxis object. - - Raises - ------ - KeyError - If the axis could not be found. + axes : sequence of instance of PDFAxis | None + The sequence of instance of PDFAxis for this PDFAxes instance. + If set to ``None``, the PDFAxes instance will be empty. """ - if(isinstance(name, int)): - return self[name] - - for axis in self: - if(axis.name == name): - return axis - - raise KeyError( - 'The PDFAxis with name "%s" could not be found!' % (name)) - - def has_axis(self, name): - """Checks if an axis of the given name is present in this PDFAxes - instance. - - Parameters - ---------- - name : str - The name of this axis. + super().__init__( + objs=axes, + obj_type=PDFAxis, + **kwargs) - Returns - ------- - check : bool - True, if this Axis object has an axis of the given name, - False otherwise. + def __str__(self): + """Pretty string implementation for the PDFAxes instance. """ - if(not isinstance(name, str)): - raise TypeError( - 'The name argument must be an instance of str!') - - for axis in self: - if(axis.name == name): - return True - return False + return '\n'.join((str(axis) for axis in self)) def is_same_as(self, axes): """Checks if this PDFAxes object has the same axes and range then the given PDFAxes object. + Parameters + ---------- + axes : instance of PDFAxes | sequence of PDFAxis + The instance of PDFAxes or the sequence of instance of PDFAxis that + should be compared to the axes of this PDFAxes instance. + Returns ------- check : bool True, if this PDFAxes and the given PDFAxes have the same axes and ranges. False otherwise. """ - if(len(self) != len(axes)): + if len(self) != len(axes): return False - for i in range(len(self)): - if(not self[i] == axes[i]): + + for (self_axis, axes_axis) in zip(self, axes): + if self_axis != axes_axis: return False return True @@ -265,7 +230,7 @@ def __init__(self, *args, **kwargs): """Constructor method. Gets called when the an instance of a class is created which derives from this IsBackgroundPDF class. """ - super(IsBackgroundPDF, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) def __mul__(self, other): """Creates a BackgroundPDFProduct instance for the multiplication of @@ -276,9 +241,9 @@ def __mul__(self, other): other : instance of IsBackgroundPDF The instance of IsBackgroundPDF, which is the other background PDF. """ - if(not isinstance(other, IsBackgroundPDF)): - raise TypeError('The other PDF must be an instance of ' - 'IsBackgroundPDF!') + if not isinstance(other, IsBackgroundPDF): + raise TypeError( + 'The other PDF must be an instance of IsBackgroundPDF!') return BackgroundPDFProduct(self, other) @@ -292,7 +257,7 @@ def __init__(self, *args, **kwargs): """Constructor method. Gets called when the an instance of a class is created which derives from this IsSignalPDF class. """ - super(IsSignalPDF, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) def __mul__(self, other): """Creates a SignalPDFProduct instance for the multiplication of this @@ -303,29 +268,38 @@ def __mul__(self, other): other : instance of IsSignalPDF The instance of IsSignalPDF, which is the other signal PDF. """ - if(not isinstance(other, IsSignalPDF)): - raise TypeError('The other PDF must be an instance of ' - 'IsSignalPDF!') + if not isinstance(other, IsSignalPDF): + raise TypeError( + 'The other PDF must be an instance of IsSignalPDF!') return SignalPDFProduct(self, other) -class PDF(object, metaclass=abc.ABCMeta): - """This is the abstract base class for all probability distribution +class PDF( + object, + metaclass=abc.ABCMeta): + r"""This is the abstract base class for all probability distribution function (PDF) models. All PDF model classes must be derived from this class. Mathematically, it - represents :math::`f(\vec{x}|\vec{p})`, where :math::`\vec{x}` is the - event data and :math::`\vec{p}` is the set of parameters the PDF is given + represents :math:`f(\vec{x}|\vec{p})`, where :math:`\vec{x}` is the + event data and :math:`\vec{p}` is the set of parameters the PDF is given for. """ - def __init__(self, param_set=None, **kwargs): + def __init__( + self, + pmm=None, + param_set=None, + **kwargs): """Creates a new PDF instance. Parameters ---------- - param_set : Parameter instance | sequence of Parameter instances | - ParameterSet instance | None + pmm : instance of ParameterModelMapper | None + The instance of ParameterModelMapper defining the global parameters + and their mapping to local model/source parameters. + It can be ``None``, if the PDF does not depend on any parameters. + param_set : instance of Parameter | sequence of instance of Parameter | instance of ParameterSet | None If this PDF depends on parameters, this set of parameters defines them. If a single parameter instance is given a ParameterSet instance will be created holding this single parameter. @@ -333,9 +307,12 @@ def __init__(self, param_set=None, **kwargs): """ # Make sure that multiple inheritance can be used. This super call will # invoke the __init__ method of a possible second inheritance. - super(PDF, self).__init__(**kwargs) + super().__init__( + **kwargs) + self.pmm = pmm self.param_set = param_set + self._axes = PDFAxes() @property @@ -353,6 +330,23 @@ def ndim(self): """ return len(self._axes) + @property + def pmm(self): + """The instance of ParameterModelMapper that defines the global + parameters and their mapping to local model/source parameters. + It can be ``None`` if the PDF does not depend on any parameters. + """ + return self._pmm + + @pmm.setter + def pmm(self, mapper): + if mapper is not None: + if not isinstance(mapper, ParameterModelMapper): + raise TypeError( + 'The pmm property must be an instance of ' + f'ParameterModelMapper! Its type is "{classname(mapper)}"!') + self._pmm = mapper + @property def param_set(self): """The ParameterSet instance defining the set of parameters this PDF @@ -363,9 +357,9 @@ def param_set(self): @param_set.setter def param_set(self, param_set): - if(param_set is None): + if param_set is None: param_set = ParameterSet() - elif(not isinstance(param_set, ParameterSet)): + elif not isinstance(param_set, ParameterSet): param_set = ParameterSet(param_set) self._param_set = param_set @@ -387,84 +381,143 @@ def is_background_pdf(self): def add_axis(self, axis): """Adds the given PDFAxis object to this PDF. """ - if(not isinstance(axis, PDFAxis)): + if not isinstance(axis, PDFAxis): raise TypeError( 'The axis argument must be an instance of PDFAxis!') self._axes += axis - def assert_is_valid_for_trial_data(self, tdm): + @abc.abstractmethod + def assert_is_valid_for_trial_data( + self, + tdm, + tl=None, + **kwargs): """This method is supposed to check if this PDF is valid for all the given trial data. This means, it needs to check if there is a PDF value for each trial data event that will be used in the likelihood evaluation. This is just a seatbelt. - The method must raise a ValueError if the PDF is not valid for the + The method must raise a ``ValueError`` if the PDF is not valid for the given trial data. + + Parameters + ---------- + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial data events. + tl : instance of TimeLord | None + The optional instance of TimeLord for measuring timing information. + + Raises + ------ + ValueError + If some of the trial data is outside the PDF's value space. + """ + pass + + def initialize_for_new_trial( + self, + tdm, + tl=None, + **kwargs): + """This method is called when a new trial is initialized. Derived + classes can use this call hook to pre-compute time-expensive data, which + do not depend on any fit parameters. + + Parameters + ---------- + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the new trial data. + tl : instance of TimeLord | None + The optional instance of TimeLord to measure timing information. """ - raise NotImplementedError( - 'The derived PDF class "%s" did not implement the ' - '"assert_is_valid_for_trial_data" method!' % ( - classname(self))) + pass @abc.abstractmethod - def get_prob(self, tdm, params=None, tl=None): + def get_pd( + self, + tdm, + params_recarray=None, + tl=None): """This abstract method is supposed to calculate the probability density for the specified events given the specified parameter values. Parameters ---------- - tdm : TrialDataManager instance - The TrialDataManager instance holding the data events for which the - probability should be calculated for. What data fields are required - is defined by the derived PDF class and depends on the application. - params : dict | None - The dictionary containing the parameter names and values for which - the probability should get calculated. + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the data events for which + the probability density should be calculated. + What data fields are required is defined by the derived PDF class + and depends on the application. + params_recarray : numpy record ndarray | None + The (N_models,)-shaped numpy structured ndarray holding the local + parameter names and values of the models. + The models are defined by the ParameterModelMapper instance. + The parameter values can be different for the different models. + In case of the signal PDF, the models are the sources. + The record array must contain two fields for each source parameter, + one named with the source's local parameter name + holding the source's local parameter value, and one named + holding the global parameter index plus one for each + source value. For values mapping to non-fit parameters, the index + should be negative. This can be ``Ǹone`` for PDFs that do not depend on any parameters. - tl : TimeLord instance | None - The optional TimeLord instance that should be used to measure + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to measure timing information. Returns ------- - prob : (N_events,)-shaped numpy ndarray - The 1D numpy ndarray with the probability density for each event. - grads : (N_fitparams,N_events)-shaped ndarray | None - The 2D numpy ndarray holding the gradients of the PDF w.r.t. - each fit parameter for each event. The order of the gradients - is the same as the order of floating parameters specified through - the ``param_set`` property. - It is ``None``, if this PDF does not depend on any parameters. + pd : instance of numpy ndarray + The (N_values,)-shaped numpy ndarray holding the probability density + for each event. The length of this 1D array depends on the number + of sources and the events belonging to those sources. In the worst + case the length is N_values = N_sources * N_trial_events. + The assignment of values to sources is given by the + :py:attr:`~skyllh.core.trialdata.TrialDataManager.src_evt_idxs` + property. + grads : dict + The dictionary holding the gradients of the probability density + w.r.t. each global fit parameter. The key of the dictionary is the + id of the global fit parameter. The value is a (N_values,)-shaped + numpy ndarray. """ pass -class PDFProduct(PDF, metaclass=abc.ABCMeta): - """The PDFProduct class represents the product of two PDF instances. It - is derived from the PDF class and hence is a PDF itself. +class PDFProduct( + PDF): + """The PDFProduct class represents the product of two PDF instances, i.e. + ``pdf1 * pdf2``. It is derived from the PDF class and hence is a PDF itself. """ - def __init__(self, pdf1, pdf2): + def __init__(self, pdf1, pdf2, **kwargs): """Creates a new PDFProduct instance, which implements the operation - `pdf1 * pdf2`. + ``pdf1 * pdf2``. The axes of the two PDF instances will be merged. Parameters ---------- pdf1 : instance of PDF - The left-hand-side PDF in the operation `pdf1 op pdf2`. + The left-hand-side PDF in the operation ``pdf1 * pdf2``. pdf2 : instance of PDF - The right-hand-side PDF in the operation `pdf1 op pdf2`. + The right-hand-side PDF in the operation ``pdf1 * pdf2``. """ self.pdf1 = pdf1 self.pdf2 = pdf2 + if pdf1.pmm is not pdf2.pmm: + raise ValueError( + 'The ParameterModelMapper instance of pdf1 is not the same as ' + 'for pdf2!') + # Create the ParameterSet instance that is the union of the ParameterSet # instances of the two PDFs. param_set = ParameterSet.union( self._pdf1.param_set, self._pdf2.param_set) - super(PDFProduct, self).__init__( - param_set=param_set) + super().__init__( + pmm=pdf1.pmm, + param_set=param_set, + **kwargs) # The resulting PDFAxes object of this PDF instance is the union of the # two PDFAxes instances of the two PDF instances. @@ -472,51 +525,70 @@ def __init__(self, pdf1, pdf2): @property def pdf1(self): - """The left-hand-side PDF in the operation `pdf1 op pdf2`. It must be an - instance of PDF. + """The left-hand-side PDF in the operation ``pdf1 * pdf2``. + It must be an instance of PDF. """ return self._pdf1 @pdf1.setter def pdf1(self, pdf): - if(not isinstance(pdf, PDF)): - raise TypeError('The pdf1 property must be an instance of PDF!') + if not isinstance(pdf, PDF): + raise TypeError( + 'The pdf1 property must be an instance of PDF!') self._pdf1 = pdf @property def pdf2(self): - """The right-hand-side PDF in the operation `pdf1 op pdf2`. It must be - an instance of PDF. + """The right-hand-side PDF in the operation ``pdf1 * pdf2``. + It must be an instance of PDF. """ return self._pdf2 @pdf2.setter def pdf2(self, pdf): - if(not isinstance(pdf, PDF)): - raise TypeError('The pdf2 property must be an instance of PDF!') + if not isinstance(pdf, PDF): + raise TypeError( + 'The pdf2 property must be an instance of PDF!') self._pdf2 = pdf - def assert_is_valid_for_trial_data(self, tdm): - """Calls the ``assert_is_valid_for_trial_data`` method of ``pdf1`` and - ``pdf2``. + def assert_is_valid_for_trial_data( + self, + tdm, + tl=None, + **kwargs): + """Calls the :meth:`assert_is_valid_for_trial_data` method of ``pdf1`` + and ``pdf2``. Parameters ---------- - tdm : TrialDataManager instance - The TrialDataManager instance that should be used to get the trial - data from. + tdm : instance of TrialDataManager + The instance of TrialDataManager that should be used to get the + trial data from. + tl : instance of TimeLord | None + The optional instance of TimeLord for measuring timing information. Raises ------ ValueError If this PDF does not cover the trial data. """ - self._pdf1.assert_is_valid_for_trial_data(tdm) - self._pdf2.assert_is_valid_for_trial_data(tdm) + self._pdf1.assert_is_valid_for_trial_data( + tdm=tdm, + tl=tl, + **kwargs) - def get_prob(self, tdm, params=None, tl=None): + self._pdf2.assert_is_valid_for_trial_data( + tdm=tdm, + tl=tl, + **kwargs) + + def get_pd( + self, + tdm, + params_recarray=None, + tl=None): """Calculates the probability density for the trial events given the - specified parameters by calling the `get_prob` method of `pdf1` + specified parameters by calling the `get_pd` method of `pdf1` and `pdf2` and combining the two property densities by multiplication. The gradients will be calculated using the product rule of differentiation. @@ -526,143 +598,118 @@ def get_prob(self, tdm, params=None, tl=None): tdm : instance of TrialDataManager The TrialDataManager instance holding the trial event data for which the PDF values should get calculated. - params : dict | None - The dictionary containing the parameter names and values for - which the probability should get calculated. + params_recarray : numpy record ndarray | None + The (N_models,)-shaped numpy record ndarray holding the parameter + values of the models. The the documentation of the + :meth:`~skyllh.core.pdf.PDF.get_pd` method of the + :class:`~skyllh.core.pdf.PDF` class for further information. tl : TimeLord instance | None The optional TimeLord instance to use for measuring timing information. Returns ------- - prob : (N_events,)-shaped numpy ndarray - The 1D numpy ndarray with the probability for each trial event. - grads : (N_fitparams,N_events)-shaped numpy ndarray - The gradients of the PDF product w.r.t. the fit parameter of this - PDFProduct instance. - + pd : instance of numpy ndarray + The (N_events,)-shaped numpy ndarray holding the probability density + for each event. In case of a signal PDF product the shape will be + (N_sources,N_events). + grads : dict + The dictionary holding the gradients of the probability density + w.r.t. each fit parameter. The key of the dictionary is the id + of the global fit parameter. The value is the (N_events,)-shaped + numpy ndarray. In case of a signal PDF product, the value is a + (N_sources,N_events)-shaped ndarray. """ pdf1 = self._pdf1 pdf2 = self._pdf2 - with TaskTimer(tl, 'Get prob from individual PDFs.'): - p1 = pdf1.get_prob(tdm, params, tl=tl) - if isinstance(p1, tuple): - (prob1, grads1) = p1 - else: - prob1 = p1 - p2 = pdf2.get_prob(tdm, params, tl=tl) - if isinstance(p2, tuple): - (prob2, grads2) = p2 - else: - prob2 = p2 - - prob = prob1 * prob2 - - pdf1_param_set = pdf1.param_set - pdf2_param_set = pdf2.param_set - - N_events = prob.shape[0] - fitparam_names = self.param_set.floating_param_name_list - grads = np.zeros((len(fitparam_names), N_events), dtype=np.float64) - for (pidx, fitparam_name) in enumerate(fitparam_names): - # Calculate the gradient w.r.t. fitparam. + with TaskTimer( + tl, + f'Get probability densities from {classname(pdf1)} (pdf1) and ' + f'{classname(pdf2)} (pdf2).'): + (pd1, grads1) = pdf1.get_pd( + tdm=tdm, + params_recarray=params_recarray, + tl=tl) + (pd2, grads2) = pdf2.get_pd( + tdm=tdm, + params_recarray=params_recarray, + tl=tl) + + pd = pd1 * pd2 + + # Loop over the set of global fit parameter gradients. + grads = dict() + for gpid in set(list(grads1.keys()) + list(grads2.keys())): + # Calculate the gradient w.r.t. the fit parameter of id ``pgid``. # There are four possible cases to calculate the gradient for - # the parameter fitparam: + # the parameter gpid: # 1. Both PDFs depend on this fit parameter, the gradient is # calculated through the product rule of differentiation. # 2. Only PDF1 depends on this fit parameter. # 3. Only PDF2 depends on this fit parameter. # 4. Both PDFs are independ of this fit parameter, the gradient # is 0. - pdf1_has_fitparam = pdf1_param_set.has_floating_param( - fitparam_name) - pdf2_has_fitparam = pdf2_param_set.has_floating_param( - fitparam_name) - if(pdf1_has_fitparam and pdf2_has_fitparam): + pdf1_has_fitparam = gpid in grads1 + pdf2_has_fitparam = gpid in grads2 + if pdf1_has_fitparam and pdf2_has_fitparam: # Case 1 - grad1 = grads1[pdf1.param_set.get_floating_pidx(fitparam_name)] - grad2 = grads2[pdf2.param_set.get_floating_pidx(fitparam_name)] - grads[pidx] = prob2*grad1 + prob1*grad2 - elif(pdf1_has_fitparam): + grad1 = grads1[gpid] + grad2 = grads2[gpid] + grads[gpid] = pd1*grad2 + pd2*grad1 + elif pdf1_has_fitparam: # Case 2 - grad1 = grads1[pdf1.param_set.get_floating_pidx(fitparam_name)] - grads[pidx] = prob2*grad1 - elif(pdf2_has_fitparam): + grad1 = grads1[gpid] + grads[gpid] = pd2*grad1 + elif pdf2_has_fitparam: # Case 3 - grad2 = grads2[pdf2.param_set.get_floating_pidx(fitparam_name)] - grads[pidx] = prob1*grad2 - - n_src = len(tdm.get_data('src_array')['ra']) - if(n_src == 1): - # Only one source in the signal hypothesis. - return (prob, grads) - else: - # Signal hypothesis contains multiple sources, and the overall weight is obtained by - # multiplying the detector weight src_w, and the hypothesis weight src_w_W. - - src_w = tdm.get_data('src_array')['src_w'] * tdm.get_data('src_array')['src_w_W'] - src_w_grad = tdm.get_data('src_array')['src_w_grad'] * tdm.get_data('src_array')['src_w_W'] + grad2 = grads2[gpid] + grads[gpid] = pd1*grad2 - # Normalize source weights and grads. - norm_src_w_temp = src_w.sum() - src_w /= norm_src_w_temp - src_w_grad /= norm_src_w_temp + return (pd, grads) - src_ev_idxs = tdm.src_ev_idxs - if src_ev_idxs is not None: - (src_idxs, ev_idxs) = src_ev_idxs - prob = scp.sparse.csr_matrix((prob, (ev_idxs, src_idxs))) - else: - prob = prob.reshape((n_src, int(prob.shape[0]/n_src))).T - prob_res = prob.dot(src_w) - - n_ev = tdm.n_selected_events - norm_w = src_w.sum() - - grads_tot = np.zeros((len(fitparam_names), n_ev), dtype=np.float64) - for (pidx, fitparam_name) in enumerate(fitparam_names): - if src_ev_idxs is not None: - grad_i = scp.sparse.csr_matrix((grads[pidx], (ev_idxs, src_idxs))) - else: - grad_i = prob.reshape((n_src, int(grads[pidx].shape[0]/n_src))).T - - if fitparam_name == 'gamma': - d_wf = prob.dot(src_w_grad) + grad_i.dot(src_w) - grads_tot[pidx] = (d_wf * norm_w - src_w_grad.sum() * prob_res) / norm_w**2 - else: - d_wf = grad_i.dot(src_w) - grads_tot[pidx] = (d_wf * norm_w) / norm_w**2 - - return (prob_res, grads_tot) - - -class SignalPDFProduct(PDFProduct, IsSignalPDF): +class SignalPDFProduct( + PDFProduct, + IsSignalPDF): """This class provides a signal PDF that is the product of two signal PDF instances. """ - def __init__(self, pdf1, pdf2): - super(SignalPDFProduct, self).__init__(pdf1, pdf2) + def __init__(self, pdf1, pdf2, **kwargs): + """Creates a new PDF product of two signal PDFs. + """ + super().__init__( + pdf1=pdf1, + pdf2=pdf2, + **kwargs) -class BackgroundPDFProduct(PDFProduct, IsBackgroundPDF): +class BackgroundPDFProduct( + PDFProduct, + IsBackgroundPDF): """This class provides a background PDF that is the product of two background PDF instances. """ - def __init__(self, pdf1, pdf2): - super(BackgroundPDFProduct, self).__init__(pdf1, pdf2) + def __init__(self, pdf1, pdf2, **kwargs): + """Creates a new PDF product of two background PDFs. + """ + super().__init__( + pdf1=pdf1, + pdf2=pdf2, + **kwargs) -class SpatialPDF(PDF, metaclass=abc.ABCMeta): +class SpatialPDF( + PDF, + metaclass=abc.ABCMeta): """This is the abstract base class for a spatial PDF model. A spatial PDF has two axes, right-ascention (ra) and declination (dec). """ - def __init__(self, ra_range, dec_range, *args, **kwargs): + def __init__(self, ra_range, dec_range, **kwargs): """Constructor of a spatial PDF. It adds the PDF axes "ra" and "dec" with the specified ranges of coverage. @@ -673,14 +720,24 @@ def __init__(self, ra_range, dec_range, *args, **kwargs): dec_range : 2-element tuple The tuple specifying the declination range this PDF covers. """ - super(SpatialPDF, self).__init__(*args, **kwargs) + super().__init__(**kwargs) - self.add_axis(PDFAxis(name='ra', - vmin=ra_range[0], vmax=ra_range[1])) - self.add_axis(PDFAxis(name='dec', - vmin=dec_range[0], vmax=dec_range[1])) + self.add_axis( + PDFAxis( + name='ra', + vmin=ra_range[0], + vmax=ra_range[1])) + self.add_axis( + PDFAxis( + name='dec', + vmin=dec_range[0], + vmax=dec_range[1])) - def assert_is_valid_for_exp_data(self, data_exp): + def assert_is_valid_for_trial_data( + self, + tdm, + tl=None, + **kwargs): """Checks if this spatial PDF is valid for all the given experimental data. It checks if all the data is within the right-ascention and declination @@ -688,14 +745,16 @@ def assert_is_valid_for_exp_data(self, data_exp): Parameters ---------- - data_exp : numpy record ndarray - The array holding the experimental data. The following data fields - must exist: + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial data. + The following data fields must exist: - 'ra' : float The right-ascention of the data event. - 'dec' : float The declination of the data event. + tl : instance of TimeLord | None + The optional instance of TimeLord for measuring timing information. Raises ------ @@ -703,113 +762,296 @@ def assert_is_valid_for_exp_data(self, data_exp): If some of the data is outside the right-ascention or declination range. """ - ra_axis = self.get_axis('ra') - dec_axis = self.get_axis('dec') + ra_axis = self.axes['ra'] + dec_axis = self.axes['dec'] - sinDec_binning = self.get_binning('sin_dec') - exp_sinDec = np.sin(data_exp['dec']) + ra = tdm.get_data('ra') + dec = tdm.get_data('dec') # Check if all the data is within the right-ascention range. - if(np.any((data_exp['ra'] < ra_axis.vmin) | - (data_exp['ra'] > ra_axis.vmax))): + if np.any((ra < ra_axis.vmin) | (ra > ra_axis.vmax)): raise ValueError( - 'Some data is outside the right-ascention range (%.3f, %.3f)!' % (ra_axis.vmin, ra_axis.vmax)) + 'Some data is outside the right-ascention range ' + f'({ra_axis.vmin:.3f}, {ra_axis.vmax:.3f})!') # Check if all the data is within the declination range. - if(np.any((data_exp['dec'] < dec_axis.vmin) | - (data_exp['dec'] > dec_axis.vmax))): - raise ValueError('Some data is outside the declination range (%.3f, %.3f)!' % ( - dec_axis.vmin, dec_axis.vmax)) + if np.any((dec < dec_axis.vmin) | (dec > dec_axis.vmax)): + raise ValueError( + 'Some data is outside the declination range ' + f'({dec_axis.vmin:.3f}, {dec_axis.vmax:.3f})!') -class EnergyPDF(PDF, metaclass=abc.ABCMeta): - """This is the abstract base class for an energy PDF model. +class EnergyPDF( + PDF, + metaclass=abc.ABCMeta): + """This is the abstract base class for an energy PDF. """ def __init__(self, *args, **kwargs): - super(EnergyPDF, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) -class TimePDF(PDF, metaclass=abc.ABCMeta): - """This is the abstract base class for a time PDF model. +class TimePDF( + PDF, + metaclass=abc.ABCMeta): + """This is the abstract base class for a time PDF. It consists of + a :class:`~skyllh.core.livetime.Livetime` instance and a + :class:`~skyllh.core.flux_model.TimeFluxProfile` instance. Together they + construct the actual time PDF, which has detector down-time taking + into account. """ - def __init__(self, *args, **kwargs): - super(TimePDF, self).__init__(*args, **kwargs) + def __init__( + self, + livetime, + time_flux_profile, + **kwargs, + ): + """Creates a new time PDF instance for a given time flux profile and + detector live time. + + Parameters + ---------- + livetime : instance of Livetime + An instance of Livetime, which provides the detector live-time + information. + time_profile : instance of TimeFluxProfile + The signal's time flux profile. + **kwargs + Additional keyword arguments are passed to the constructor of the + base class, :class:`~skyllh.core.pdf.PDF`. + """ + super().__init__( + **kwargs) + + self.livetime = livetime + self.time_flux_profile = time_flux_profile + + # Define the time axis with the time boundaries of the live-time. + self.add_axis( + PDFAxis( + name='time', + vmin=self._livetime.time_window[0], + vmax=self._livetime.time_window[1])) + + # Get sum, S, of the integrals for each detector on-time interval during + # the time flux profile, in order to be able to rescale the time flux + # profile to unity with overlapping detector off-times removed. + self._S = self._calculate_sum_of_ontime_time_flux_profile_integrals() + + @property + def livetime(self): + """The instance of Livetime, which provides the detector live-time + information. + """ + return self._livetime + + @livetime.setter + def livetime(self, lt): + if not isinstance(lt, Livetime): + raise TypeError( + 'The livetime property must be an instance of Livetime!') + self._livetime = lt + + @property + def time_flux_profile(self): + """The instance of TimeFluxProfile providing the physical time flux + profile. + """ + return self._time_flux_profile + @time_flux_profile.setter + def time_flux_profile(self, profile): + if not isinstance(profile, TimeFluxProfile): + raise TypeError( + 'The time_flux_profile property must be an instance of ' + 'TimeFluxProfile! ' + f'Its current type is {classname(profile)}!') + self._time_flux_profile = profile + + def __str__(self): + """Pretty string representation of the time PDF. + """ + s = ( + f'{classname(self)}(\n' + ' '*INDENTATION_WIDTH + + f'livetime = {str(self._livetime)},\n' + ' '*INDENTATION_WIDTH + + f'time_flux_profile = {str(self._time_flux_profile)}\n' + ')' + ) + + return s -class MultiDimGridPDF(PDF): - """This class provides a multi-dimensional PDF created from pre-calculated - PDF data on a grid. The grid data is interpolated using a - :class:`scipy.interpolate.RegularGridInterpolator` instance. + def _calculate_sum_of_ontime_time_flux_profile_integrals(self): + """Calculates the sum, S, of the time flux profile integrals during the + detector on-time intervals. + + Returns + ------- + S : float + The sum of the time flux profile integrals during the detector + on-time intervals. + """ + uptime_intervals = self._livetime.get_uptime_intervals_between( + self._time_flux_profile.t_start, + self._time_flux_profile.t_stop) + + S = np.sum( + self._time_flux_profile.get_integral( + uptime_intervals[:, 0], + uptime_intervals[:, 1])) + + return S + + def assert_is_valid_for_trial_data( + self, + tdm, + tl=None, + **kwargs): + """Checks if the time PDF is valid for all the given trial data. + It checks if the time of all events is within the defined time axis of + the PDF. + + Parameters + ---------- + tdm : instance of TrialDataManager + The instance of TrialDataManager that holds the trial data. + The following data fields must exist: + + ``'time'`` : float + The time of the data event. + + tl : instance of TimeLord | None + The optional instance of TimeLord for measuring timing information. + + Raises + ------ + ValueError + If some of the data is outside the time range of the PDF. + """ + time_axis = self.axes['time'] + + time = tdm.get_data('time') + + if np.any((time < time_axis.vmin) | + (time > time_axis.vmax)): + raise ValueError( + 'Some trial data is outside the time range ' + f'[{time_axis.vmin:.3f}, {time_axis.vmax:.3f}]!') + + +class MultiDimGridPDF( + PDF): + """This class provides a multi-dimensional PDF. The PDF is created from + pre-calculated PDF data on a grid. The grid data is either interpolated + using a :class:`scipy.interpolate.RegularGridInterpolator` instance, or is + provided as a photospline fit through a photospline table file. """ def __init__( - self, axis_binnings, path_to_pdf_splinetable=None, - pdf_grid_data=None, norm_factor_func=None): + self, + pmm, + axis_binnings, + path_to_pdf_splinetable=None, + pdf_grid_data=None, + norm_factor_func=None, + cache_pd_values=False, + **kwargs): """Creates a new PDF instance for a multi-dimensional PDF given - as PDF values on a grid. The grid data is interpolated with a + as PDF values on a grid or as PDF values stored in a photospline table. + + In case of PDF values on a grid, the grid data is interpolated with a :class:`scipy.interpolate.RegularGridInterpolator` instance. As grid points the bin edges of the axis binning definitions are used. + In case of PDF values stored in a photospline table, this table is + loaded via the ``photospline.SplineTable`` class. + + Note:: + + By definition this PDF must not depend on any fit parameters. + Parameters ---------- - axis_binnings : BinningDefinition | sequence of BinningDefinition + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper that defines the mapping of + the global parameters to local model parameters. + axis_binnings : instance of BinningDefinition | sequence of instance of BinningDefinition The sequence of BinningDefinition instances defining the binning of - the PDF axes. The name of each BinningDefinition instance defines + the PDF axes. The name of each instance of BinningDefinition defines the event field name that should be used for querying the PDF. path_to_pdf_splinetable : str | None - The path to the file containing the spline table. - The spline table contains a pre-computed fit to pdf_grid_data. - pdf_grid_data : n-dimensional numpy ndarray | None + The path to the file containing the spline table, which contains + a pre-computed fit to the grid data. + If specified, ``pdf_grid_data`` must be ``None``. + pdf_grid_data : instance of numpy ndarray | None The n-dimensional numpy ndarray holding the PDF values at given grid points. The grid points must match the bin edges of the given - BinningDefinition instances of the `axis_binnings` argument. + BinningDefinition instances of the ``axis_binnings`` argument. + If specified, ``path_to_pdf_splinetable`` must be ``None``. norm_factor_func : callable | None The function that calculates a possible required normalization factor for the PDF value based on the event properties. The call signature of this function must be - `__call__(pdf, tdm, params)`, where `pdf` is this PDF - instance, `tdm` is an instance of TrialDataManager holding the - event data for which to calculate the PDF values, and `params` is a - dictionary with the current parameter names and values. - """ - super(MultiDimGridPDF, self).__init__() + + ``__call__(pdf, tdm, params_recarray, eventdata, evt_mask=None)``, + + where ``pdf`` is this PDF instance, ``tdm`` is an instance of + TrialDataManager holding the event data for which to calculate the + PDF values, ``params_recarray`` is a numpy structured ndarray + holding the local parameter names and values, ``eventdata`` is + is a (N_values,V)-shaped numpy ndarray holding the event data + necessary for this PDF, and ``evt_mask`` is an optional + (N_values,)-shaped numpy ndarray holding the mask for the events, + i.e. rows in ``eventdata``, which should be considered. If ``None``, + all events should be considered. + cache_pd_values : bool + Flag if the probability density values should be cached. + The evaluation of the photospline fit might be slow and caching the + probability density values might increase performance. + """ + super().__init__( + pmm=pmm, + **kwargs) # Need either splinetable or grid of pdf values. - if((path_to_pdf_splinetable is None) and (pdf_grid_data is None)): + if path_to_pdf_splinetable is None and\ + pdf_grid_data is None: raise ValueError( 'At least one of the following arguments are required: ' 'path_to_pdf_splinetable (str) or ' 'pdf_grid_data (numpy.ndarray)!') - elif((path_to_pdf_splinetable is not None) and - (pdf_grid_data is not None)): + elif (path_to_pdf_splinetable is not None and + pdf_grid_data is not None): raise ValueError( 'Only one of the two arguments path_to_pdf_splinetable and ' 'pdf_grid_data can be specified!') # If a path to the photospline tables is given, we raise an error if # the photospline package is not loaded. - if(path_to_pdf_splinetable is not None): - if(not isinstance(path_to_pdf_splinetable, str)): + if path_to_pdf_splinetable is not None: + if not isinstance(path_to_pdf_splinetable, str): raise TypeError( - 'The path_to_pdf_splinetable argument must be None or of ' - 'type str!') + 'The path_to_pdf_splinetable argument must be None or an ' + 'instance of str!' + 'Its current type is ' + f'{classname(path_to_pdf_splinetable)}.') - if(not PHOTOSPLINE_LOADED): + if not tool.is_available('photospline'): raise ImportError( 'The path_to_pdf_splinetable argument is specified, but ' 'the "photospline" package is not available!') - if(pdf_grid_data is not None): - if(not isinstance(pdf_grid_data, np.ndarray)): + if pdf_grid_data is not None: + if not isinstance(pdf_grid_data, np.ndarray): raise TypeError( 'The pdf_grid_data argument must be an instance of numpy ' - 'ndarray. The current type is {}!'.format( - type(pdf_grid_data))) + f'ndarray. Its current type is {classname(pdf_grid_data)}!') self.axis_binning_list = axis_binnings self.norm_factor_func = norm_factor_func + self.cache_pd_values = cache_pd_values # Define the PDF axes. for axis_binning in self._axis_binnning_list: @@ -820,26 +1062,26 @@ def __init__( )) # Create the internal PDF object. - if(path_to_pdf_splinetable is not None): - self._pdf = photospline.SplineTable(path_to_pdf_splinetable) - else: + if path_to_pdf_splinetable is None: self._pdf = RegularGridInterpolator( tuple([binning.binedges for binning in self._axis_binnning_list]), pdf_grid_data, method='linear', bounds_error=False, - fill_value=0 - ) + fill_value=0) + else: + self._pdf = tool.get('photospline').SplineTable( + path_to_pdf_splinetable) # Because this PDF does not depend on any fit parameters, the PDF values # can be cached as long as the trial data state ID of the trial data # manager has not changed. self._cache_tdm_trial_data_state_id = None - self._cache_prob = None + self._cache_pd = None logger.debug( - 'Created %s instance with axis name list %s' % ( - classname(self), str(self._axes.axis_name_list))) + f'Created {classname(self)} instance with axis name list ' + f'{str(self._axes.name_list)}') @property def axis_binning_list(self): @@ -851,603 +1093,529 @@ def axis_binning_list(self): @axis_binning_list.setter def axis_binning_list(self, binnings): - if(isinstance(binnings, BinningDefinition)): + if isinstance(binnings, BinningDefinition): binnings = [binnings] - if(not issequenceof(binnings, BinningDefinition)): + if not issequenceof(binnings, BinningDefinition): raise TypeError( 'The axis_binning_list property must be an instance of ' 'BinningDefinition or a sequence of BinningDefinition ' - 'instances!') + 'instances! ' + f'Its current type is {classname(binnings)}.') self._axis_binnning_list = list(binnings) @property def norm_factor_func(self): """The function that calculates the possible required normalization factor. The call signature of this function must be - `__call__(pdf, tdm, fitparams)`, where `pdf` is this PDF - instance, `tdm` is an instance of TrialDataManager holding the events - for which to calculate the PDF values, and `fitparams` is a dictionary - with the current fit parameter names and values. This property can be - set to `None`. In that case a unity returning function is used. + + ``__call__(pdf, tdm, params_recarray, eventdata, evt_mask=None)``, + + where ``pdf`` is this PDF instance, ``tdm`` is an instance of + TrialDataManager holding the events for which to calculate the PDF + values, ``params_recarray`` is a numpy structured ndarray holding the + local parameter names and values, ``eventdata`` is a (N_values,V)-shaped + numpy ndarray holding the event data necessary for this PDF, and + ``evt_mask`` is an optional (N_values,)-shaped numpy ndarray holding the + mask for the events, i.e. rows in ``eventdata``, which should be + considered. If ``None``, all events should be considered.. + This property can be set to ``None``. In that case a unity returning + function is used. """ return self._norm_factor_func @norm_factor_func.setter def norm_factor_func(self, func): - if(func is None): + if func is None: # Define a normalization function that just returns 1 for each # event. - def func(pdf, tdm, fitparams, eventdata): - n_src = len(tdm.get_data('src_array')['ra']) - if(n_src == 1): - n_dim = tdm.n_selected_events + def func(pdf, tdm, params_recarray, eventdata, evt_mask=None): + if evt_mask is None: + n_values = eventdata.shape[0] else: - n_dim = eventdata.shape[0] - return np.ones((n_dim,), dtype=np.float64) + n_values = np.count_nonzero(evt_mask) + return np.ones((n_values,), dtype=np.float64) - if(not callable(func)): + if not callable(func): raise TypeError( 'The norm_factor_func property must be a callable object!') - if(not func_has_n_args(func, 4)): + if not func_has_n_args(func, 5): raise TypeError( - 'The norm_factor_func property must be a function with 4 ' + 'The norm_factor_func property must be a function with 5 ' 'arguments!') self._norm_factor_func = func - def assert_is_valid_for_trial_data(self, tdm): + @property + def cache_pd_values(self): + """Flag if the probability density values should be cached. + """ + return self._cache_pd_values + + @cache_pd_values.setter + def cache_pd_values(self, b): + self._cache_pd_values = bool_cast( + b, + 'The cache_pd_values property must be castable to type bool!') + + def assert_is_valid_for_trial_data( + self, + tdm, + tl=None, + **kwargs): """Checks if the PDF is valid for all values of the given evaluation data. The evaluation data values must be within the ranges of the PDF axes. Parameters ---------- - tdm : TrialDataManager instance - The instance of TrialDataManager that holds the data which is going - to be evaluated. + tdm : instance of TrialDataManager + The instance of TrialDataManager that holds the trial data for which + the PDF should be valid. + tl : instance of TimeLord | None + The optional instance of TimeLord for measuring timing information. Raises ------ ValueError - If any of the evaluation data is out of its axis range. + If any of the evaluation trial data is out of its axis range. """ for axis in self._axes: data = tdm.get_data(axis.name) - if(np.any(data < axis.vmin) or - np.any(data > axis.vmax) - ): + m = (data < axis.vmin) | (data > axis.vmax) + if np.any(m): raise ValueError( - 'Some of the trial data for PDF axis ' - '"%s" is out of range (%g,%g)!' % ( - axis.name, axis.vmin, axis.vmax)) - - def get_prob_with_eventdata(self, tdm, params, eventdata, tl=None): - """Calculates the probability for the trial events given the specified - parameters. This method has the additional argument ``eventdata`` which - must be a 2d ndarray containing the trial event data in the correct - order for the evaluation of the RegularGridInterpolator or photospline - table instance. - This method is usefull when PDF values for the same trial data need to - get evaluated. + f'Some of the trial data for PDF axis "{axis.name}" is out' + f'of range ({axis.vmin:g},{axis.vmax:g})! ' + f'Data values out of range: {data[m]}') + + def _initialize_cache( + self, + tdm): + """Initializes the cache variables. Parameters ---------- - tdm : TrialDataManager instance - The TrialDataManager instance holding the trial event data for which - the PDF values should get calculated. - params : dict | None - The dictionary containing the parameters the probability should get - calculated for. By definition, this PDF does not depend on any - parameters. - eventdata : 2D (N_events,V)-shaped ndarray - The 2D numpy ndarray holding the V data attributes for each event - needed for the evaluation of the PDF. - tl : TimeLord instance | None - The optional TimeLord instance that should be used to measure - timing information. + tdm : instance of TrialDataManager + The instance of TrialDataManager that hold the trial data events. + """ + self._cache_tdm_trial_data_state_id = None + self._cache_pd = np.repeat( + np.array([np.nan], dtype=np.float64), + tdm.get_n_values()) - Returns - ------- - prob : (N_events,)-shaped numpy ndarray - The 1D numpy ndarray with the probability for each event. + def _store_pd_values_to_cache( + self, + tdm, + pd, + evt_mask=None): + """Stores the given pd values into the pd array cache. + + Parameters + ---------- + tdm : instance of TrialDataManager + The instance of TrialDataManager that hold the trial data events. + pd : instance of numpy ndarray + The (N,)-shaped numpy ndarray holding the pd values to be stored. + evt_mask : instance of numpy ndarray | None + The (N_values,)-shaped numpy ndarray defining the elements of the + (N_values,)-shaped pd cache array where the given pd values should + get stored. If set to ``None``, the the ``pd`` array must be of + length N_values. """ - do_caching = CFG['caching']['pdf']['MultiDimGridPDF'] - if(do_caching): - tdm_trial_data_state_id = tdm.trial_data_state_id - cache_tdm_trial_data_state_id = self._cache_tdm_trial_data_state_id + self._cache_tdm_trial_data_state_id = tdm.trial_data_state_id + + if evt_mask is None: + self._cache_pd[:] = pd + return + + self._cache_pd[evt_mask] = pd + + def _get_cached_pd_values( + self, + tdm, + evt_mask=None): + """Retrieves cached pd values for the given events. - if(cache_tdm_trial_data_state_id == tdm_trial_data_state_id): - return self._cache_prob + Parameters + ---------- + tdm : instance of TrialDataManager + The instance of TrialDataManager that hold the trial data events. + evt_mask : instance of numpy ndarray | None + The (N_values,)-shaped numpy ndarray defining the elements of the + (N_values,)-shaped pd cache array for which pd values should get + returned. + If set to ``None`` all N_values values will get retrieved. - if(isinstance(self._pdf, RegularGridInterpolator)): - with TaskTimer(tl, 'Get prob from RegularGridInterpolator.'): - prob = self._pdf(eventdata) + Returns + ------- + pd : instance of numpy ndarray | None + Returns ``None``, when no cached values are available. + Otherwise the (N,)-shaped numpy ndarray holding the pd values where + evt_mask evaluates to True. + """ + if self._cache_tdm_trial_data_state_id is None: + self._initialize_cache(tdm=tdm) + return None + + if self._cache_tdm_trial_data_state_id != tdm.trial_data_state_id: + return None + + if evt_mask is None: + if np.any(np.isnan(self._cache_pd)): + return None + pd = self._cache_pd else: - with TaskTimer(tl, 'Get prob from photospline fit.'): - V = eventdata.shape[1] - prob = self._pdf.evaluate_simple( - [eventdata[:, i] for i in range(0, V)]) + if np.any(np.isnan(self._cache_pd[evt_mask])): + return None + pd = self._cache_pd[evt_mask] - with TaskTimer(tl, 'Normalize MultiDimGridPDF with norm factor.'): - norm = self._norm_factor_func(self, tdm, params, eventdata) - prob *= norm - if(do_caching): - self._cache_tdm_trial_data_state_id = tdm_trial_data_state_id - self._cache_prob = prob + return pd - return prob + def get_pd_with_eventdata( + self, + tdm, + params_recarray, + eventdata, + evt_mask=None, + tl=None): + """Calculates the probability density value for the given ``eventdata``. - def get_prob(self, tdm, params=None, tl=None): - """Calculates the probability for the trial events given the specified - parameters. + This method is usefull when PDF values for the same trial data need to + be evaluated. Parameters ---------- - tdm : TrialDataManager instance + tdm : instance of TrialDataManager The TrialDataManager instance holding the trial event data for which the PDF values should get calculated. - params : dict | None - The dictionary containing the parameter names and values the - probability should get calculated for. Since this PDF does not - depend on any parameters, this could be ``None``. - tl : TimeLord instance | None - The optional TimeLord instance that should be used to measure + params_recarray : instance of numpy structured ndarray | None + The (N_models,)-shaped numpy structured ndarray holding the local + parameter names and values of the models. + By definition, this PDF does not depend on any parameters. + eventdata : instance of numpy ndarray + The (N_values,V)-shaped numpy ndarray holding the V data attributes + for each of the N_values events needed for the evaluation of the + PDF. + evt_mask : instance of numpy ndarray | None + The (N_values,)-shaped numpy ndarray defining the elements of the + N_values pd array for which pd values should get calculated. + This is needed to determine if the requested pd values are already + cached. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to measure timing information. Returns ------- - prob : (N_events,)-shaped numpy ndarray - The 1D numpy ndarray with the probability for each event. - grads : None - Because this PDF does not depend on any parameters, no gradients - w.r.t. the parameters are returned. - """ - do_caching = CFG['caching']['pdf']['MultiDimGridPDF'] - - if(do_caching): - tdm_trial_data_state_id = tdm.trial_data_state_id - cache_tdm_trial_data_state_id = self._cache_tdm_trial_data_state_id - - if((cache_tdm_trial_data_state_id is not None) and - (cache_tdm_trial_data_state_id == tdm_trial_data_state_id)): - return (self._cache_prob, None) - - with TaskTimer(tl, 'Get PDF event data.'): - if(self.is_signal_pdf): - # Evaluate the relevant quantities for - # all events and sources (relevant for stacking analyses). - if tdm.src_ev_idxs is not None: - (src_idxs, ev_idxs) = tdm.src_ev_idxs - eventdata = np.array( - [ - # Check `psi` axis name. - tdm.get_data(axis.name) - if ('psi' in axis.name) - - # Check `src` axis name. - else tdm.get_data(axis.name)[src_idxs] - if ('src' in axis.name) - - # Default case. - else tdm.get_data(axis.name)[ev_idxs] - for axis in self._axes - ] - ).T + pd : (N,)-shaped numpy ndarray + The (N,)-shaped numpy ndarray holding the probability density + value for each model and event. The length of this array depends on + the ``evt_mask`` argument. Only values are returned where + ``evt_mask`` evaluates to ``True``. + If ``evt_mask`` is set to ``Ǹone``, the length is N_values. + """ + if self._cache_pd_values: + pd = self._get_cached_pd_values( + tdm=tdm, + evt_mask=evt_mask) + if pd is not None: + return pd + + # Cached pd values are not available at this point. + + if isinstance(self._pdf, RegularGridInterpolator): + with TaskTimer(tl, 'Get pd from RegularGridInterpolator.'): + if evt_mask is None: + pd = self._pdf(eventdata) else: - n_src = len(tdm.get_data('src_array')['ra']) - l_ev = len(tdm.get_data('ra')) - eventdata = np.array( - [ - # Check `psi` axis name. - tdm.get_data(axis.name) - if ('psi' in axis.name) - - # Check `src` axis name. - else tdm.get_data(axis.name) - if (('src' in axis.name) and (n_src == 1)) - else np.repeat(tdm.get_data(axis.name), l_ev) - if (('src' in axis.name) and (n_src != 1)) - - # Default case. - else np.tile(tdm.get_data(axis.name), n_src) - for axis in self._axes - ] - ).T - elif (self.is_background_pdf): - eventdata = np.array( - [tdm.get_data(axis.name) for axis in self._axes]).T - else: - raise TypeError('Pdf type is unknown!') + pd = self._pdf(eventdata[evt_mask]) + else: + with TaskTimer(tl, 'Get pd from photospline fit.'): + V = eventdata.shape[1] + if evt_mask is None: + pd = self._pdf.evaluate_simple( + [eventdata[:, i] for i in range(0, V)]) + else: + pd = self._pdf.evaluate_simple( + [eventdata[:, i][evt_mask] for i in range(0, V)]) - with TaskTimer(tl, 'Get prob for all selected events.'): - prob = self.get_prob_with_eventdata(tdm, params, eventdata, tl=tl) - if(do_caching): - self._cache_tdm_trial_data_state_id = tdm_trial_data_state_id - self._cache_prob = prob + with TaskTimer(tl, 'Normalize MultiDimGridPDF with norm factor.'): + norm = self._norm_factor_func( + pdf=self, + tdm=tdm, + params_recarray=params_recarray, + eventdata=eventdata, + evt_mask=evt_mask) - return (prob, None) + pd *= norm + if self._cache_pd_values: + self._store_pd_values_to_cache( + tdm=tdm, + pd=pd, + evt_mask=evt_mask) -class NDPhotosplinePDF(PDF): - """This class provides a multi-dimensional PDF created from a n-dimensional - photospline fit. The photospline package is used to evaluate the PDF fit. - """ + return pd - def __init__( - self, - axis_binnings, - param_set, - path_to_pdf_splinefit, - norm_factor_func=None): - """Creates a new PDF instance for a n-dimensional photospline PDF fit. + @staticmethod + def create_eventdata_for_sigpdf( + tdm, + axes): + """Creates the (N_values,V)-shaped eventdata ndarray necessary for + evaluating the signal PDF. Parameters ---------- - axis_binnings : BinningDefinition | sequence of BinningDefinition - The sequence of BinningDefinition instances defining the binning of - the PDF axes. The name of each BinningDefinition instance defines - the event field name that should be used for querying the PDF. - param_set : Parameter | ParameterSet - The Parameter instance or ParameterSet instance defining the - parameters of this PDF. The ParameterSet holds the information - which parameters are fixed and which are floating (i.e. fitted). - path_to_pdf_splinefit : str - The path to the file containing the photospline fit. - norm_factor_func : callable | None - The function that calculates a possible required normalization - factor for the PDF value based on the event properties. - The call signature of this function must be - `__call__(pdf, tdm, params)`, where `pdf` is this PDF - instance, `tdm` is an instance of TrialDataManager holding the - event data for which to calculate the PDF values, and `params` is a - dictionary with the current parameter names and values. - """ - if(not PHOTOSPLINE_LOADED): - raise ImportError( - 'The photospline package could not be loaded!') - - super(NDPhotosplinePDF, self).__init__( - param_set=param_set) - - self._n_fitparams = self._param_set.n_floating_params - - if(isinstance(axis_binnings, BinningDefinition)): - axis_binnings = [axis_binnings] - if(not issequenceof(axis_binnings, BinningDefinition)): - raise TypeError( - 'The axis_binnings argument must be an instance of ' - 'BinningDefinition or a sequence of BinningDefinition ' - 'instances!') - - if(not isinstance(path_to_pdf_splinefit, str)): - raise TypeError( - 'The path_to_pdf_splinefit argument must be an instance of ' - 'str!') - - self.norm_factor_func = norm_factor_func - - # Define the PDF axes and create a mapping of fit parameter names to - # axis indices. - self._fitparam_name_to_axis_idx_map = dict() - for (axis_idx, axis_binning) in enumerate(axis_binnings): - axis_name = axis_binning.name - - self.add_axis(PDFAxis( - name=axis_name, - vmin=axis_binning.lower_edge, - vmax=axis_binning.upper_edge - )) + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial event data. + axes : instance of PDFAxes + The instance of PDFAxes defining the data field names for the PDF. + """ + eventdata_fields = [] + + (src_idxs, evt_idxs) = tdm.src_evt_idxs + for axis in axes: + name = axis.name + data = tdm.get_data(name) + if tdm.is_event_data_field(name): + eventdata_fields.append(np.take(data, evt_idxs)) + elif tdm.is_source_data_field(name): + eventdata_fields.append(np.take(data, src_idxs)) + elif tdm.is_srcevt_data_field(name): + eventdata_fields.append(data) + else: + TypeError( + f'Unable to determine the type of the data field {name}!') - if(self._param_set.has_floating_param(axis_name)): - self._fitparam_name_to_axis_idx_map[axis_name] = axis_idx + eventdata = np.array(eventdata_fields).T - self._pdf = photospline.SplineTable(path_to_pdf_splinefit) + return eventdata - logger.debug( - 'Created %s instance with axis name list %s' % ( - classname(self), str(self._axes.axis_name_list))) + @staticmethod + def create_eventdata_for_bkgpdf( + tdm, + axes): + """Creates the (N_values,V)-shaped eventdata ndarray necessary for + evaluating the background PDF. - @property - def norm_factor_func(self): - """The function that calculates the possible required normalization - factor. The call signature of this function must be - `__call__(pdf, tdm, fitparams)`, where `pdf` is this PDF - instance, `tdm` is an instance of TrialDataManager holding the events - for which to calculate the PDF values, and `fitparams` is a dictionary - with the current fit parameter names and values. This property can be - set to `None`. In that case a unity returning function is used. + Parameters + ---------- + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial event data. + axes : instance of PDFAxes + The instance of PDFAxes defining the data field names for the PDF. """ - return self._norm_factor_func - @norm_factor_func.setter - def norm_factor_func(self, func): - if(func is None): - # Define a normalization function that just returns 1 for each - # event. - def func(pdf, tdm, fitparams): - n_src = len(tdm.get_data('src_array')['ra']) - if(n_src == 1): - n_dim = tdm.n_selected_events - else: - if tdm.src_ev_idxs is None: - n_dim = tdm.n_selected_events * n_src - else: - n_dim = len(tdm.src_ev_idxs[0]) - return np.ones((n_dim,), dtype=np.float64) + eventdata_fields = [] - if(not callable(func)): - raise TypeError( - 'The norm_factor_func property must be a callable object!') - if(not func_has_n_args(func, 3)): - raise TypeError( - 'The norm_factor_func property must be a function with 3 ' - 'arguments!') - self._norm_factor_func = func + for axis in axes: + eventdata_fields.append(tdm.get_data(axis.name)) - def assert_is_valid_for_trial_data(self, tdm): - """Checks if the PDF is valid for all values of the given trial data. - The trial data values must be within the ranges of the PDF axes. + eventdata = np.array(eventdata_fields).T - Parameters - ---------- - tdm : TrialDataManager instance - The instance of TrialDataManager that holds the trial data which is - going to be evaluated. + return eventdata - Raises - ------ - ValueError - If any of the trial data is out of its axis range. - """ - for axis in self._axes: - data = tdm.get_data(axis.name) - if(np.any(data < axis.vmin) or - np.any(data > axis.vmax) - ): - raise ValueError( - 'Some of the trial data for PDF axis ' - '"%s" is out of range (%g,%g)!' % ( - axis.name, axis.vmin, axis.vmax)) - - def get_prob(self, tdm, params=None, tl=None): - """Calculates the probability for the trial events given the specified - parameters. + def get_pd( + self, + tdm, + params_recarray=None, + tl=None): + """Calculates the probability density for the given trial events given + the specified local parameters. Parameters ---------- - tdm : TrialDataManager instance - The TrialDataManager instance holding the trial event data for which - the PDF values should get calculated. - params : dict | None - The dictionary containing the parameter names and values the - probability should get calculated for. - tl : TimeLord instance | None - The optional TimeLord instance that should be used to measure + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial event data for + which the PDF values should get calculated. + params_recarray : instance of numpy structured ndarray | None + The (N_models,)-shaped numpy structured ndarray holding the local + parameter names and values of the models. + By definition, this PDF does not depend on any parameters. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to measure timing information. Returns ------- - prob : (N_events,)-shaped numpy ndarray - The 1D numpy ndarray with the probability for each event. - grads : (N_fitparams,N_events)-shaped ndarray | None - The 2D numpy ndarray holding the gradients of the PDF w.r.t. - each fit parameter for each event. The order of the gradients - is the same as the order of floating parameters specified through - the ``param_set`` property. - It is ``None``, if this PDF does not depend on any parameters. - """ - with TaskTimer(tl, 'Get PDF event data.'): + pd : (N_values,)-shaped numpy ndarray + The (N_values,)-shaped numpy ndarray holding the probability density + value for each source and event. + grads : dict + The dictionary holding the gradients of the probability density + w.r.t. each global fit parameter. Since this PDF does not depend on + any fit parameter, this is an empty dictionary. + """ + if self._cache_pd_values: + pd = self._get_cached_pd_values( + tdm=tdm) + if pd is not None: + return (pd, dict()) + + with TaskTimer(tl, 'Get PDF eventdata.'): if self.is_signal_pdf: - if tdm.src_ev_idxs is not None: - (src_idxs, ev_idxs) = tdm.src_ev_idxs - eventdata = np.empty( - (len(ev_idxs), len(self._axes)), dtype=np.float64) - for (axis_idx, axis) in enumerate(self._axes): - axis_name = axis.name - if(axis_name in tdm): - if 'src' in axis_name: - axis_data = tdm.get_data(axis_name)[src_idxs] - elif 'psi' in axis_name: - axis_data = tdm.get_data(axis_name) - else: - axis_data = tdm.get_data(axis_name)[ev_idxs] - else: - # The requested data field (for the axis) is not - # part of the trial data, so it must be a parameter. - if(axis_name not in params): - raise KeyError( - 'The PDF axis "{}" is not part of the ' - 'trial data and is not a parameter!'.format( - axis_name)) - - axis_data = np.full( - (len(ev_idxs),), params[axis_name], - dtype=np.float64) - eventdata[:, axis_idx] = axis_data - + eventdata = self.create_eventdata_for_sigpdf( + tdm=tdm, + axes=self._axes) elif self.is_background_pdf: - eventdata = np.empty( - (tdm.n_selected_events, len(self._axes)), dtype=np.float64) - - for (axis_idx, axis) in enumerate(self._axes): - axis_name = axis.name - if(axis_name in tdm): - axis_data = tdm.get_data(axis_name) - else: - # The requested data field (for the axis) is not part - # of the trial data, so it must be a parameter. - if(axis_name not in params): - raise KeyError( - 'The PDF axis "{}" is not part of the trial data ' - 'and is not a parameter!'.format( - axis_name)) - - axis_data = np.full( - (tdm.n_selected_events,), params[axis_name], - dtype=np.float64) - eventdata[:, axis_idx] = axis_data - self__pdf_evaluate_simple = self._pdf.evaluate_simple - - with TaskTimer(tl, 'Get prob from photospline fit.'): - V = eventdata.shape[1] - evaluate_simple_data = [eventdata[:, i] for i in range(0, V)] - prob = self__pdf_evaluate_simple(evaluate_simple_data) - - with TaskTimer(tl, 'Normalize NDPhotosplinePDF with norm factor.'): - norm = self._norm_factor_func(self, tdm, params) - prob *= norm - - if(self._n_fitparams == 0): - # This PDF does not depend on any fit parameters. - return (prob, None) - - with TaskTimer(tl, 'Get grads from photospline fit.'): - self__param_set = self._param_set - grads = np.empty((self._n_fitparams, len(prob)), dtype=np.float64) - # Loop through the fit parameters of this PDF and calculate their - # derivative. - for (fitparam_idx, fitparam_name) in enumerate( - self__param_set.floating_param_name_list): - # Determine the axis index of this fit parameter. - axis_idx = self._fitparam_name_to_axis_idx_map[fitparam_name] - mode = 2**axis_idx - grad = self__pdf_evaluate_simple( - evaluate_simple_data, mode) - grad *= norm - grads[fitparam_idx, :] = grad - - return (prob, grads) - - -class PDFSet(object): + eventdata = self.create_eventdata_for_bkgpdf( + tdm=tdm, + axes=self._axes) + else: + raise TypeError( + 'The PDF is neither a signal nor a background PDF!') + + with TaskTimer(tl, 'Get pd for all selected events.'): + # The call to get_pd_with_eventdata will cache the pd values. + pd = self.get_pd_with_eventdata( + tdm=tdm, + params_recarray=params_recarray, + eventdata=eventdata, + tl=tl) + + return (pd, dict()) + + +class PDFSet( + object): """This class describes a set of PDF objects which are related to each other - via different values of a set of fit parameters. A signal PDF usually - consists of multiple same-kind PDFs for different signal fit parameters. - In general background PDFs could have fit parameters, too. + via different values of a set of parameters. A signal PDF usually + consists of multiple same-kind PDFs for different signal parameters. + In general background PDFs could have parameters, too. - This class has the ``fitparams_grid_set`` property holding the set of fit + This class has the ``params_grid_set`` property holding the set of parameter grids. Also it holds a dictionary with the PDFs for the different - sets of fit parameter values. The type of the PDF objects is defined through - the ``pdf_type`` property. PDF objects of type ``pdf_type`` can be added - via the ``add_pdf`` method and retrieved via the ``get_pdf`` method. + sets of parameter values. PDF instances can be added via the :meth:`add_pdf` + method and can be retrieved via the :meth:`get_pdf` method. """ - def __init__(self, pdf_type, fitparams_grid_set, *args, **kwargs): - """Constructor method. Gets called when the an instance of a class is - created which derives from this PDFSet class. + def __init__( + self, + param_grid_set, + **kwargs): + """Constructs a new PDFSet instance. Parameters ---------- - pdf_type : type - The PDF class that can be added to the set. - fitparams_grid_set : ParameterGridSet | ParameterGrid - The ParameterGridSet with the fit parameter grids defining the - descrete fit parameter values for which the PDFs of this PDF set + param_grid_set : instance of ParameterGrid | + instance of ParameterGridSet + The instance of ParameterGridSet with the parameter grids defining + the descrete parameter values for which the PDFs of this PDF set are made for. """ # Call super to support multiple class inheritance. - super(PDFSet, self).__init__(*args, **kwargs) + super().__init__( + **kwargs) - if(not issubclass(pdf_type, PDF)): - raise TypeError('The pdf_type argument must be a subclass of PDF!') - self._pdf_type = pdf_type - self.fitparams_grid_set = fitparams_grid_set - self._gridfitparams_hash_pdf_dict = dict() + self.param_grid_set = param_grid_set - @property - def pdf_type(self): - """(read-only) The PDF type which can be added to the PDF set. - """ - return self._pdf_type + self._gridparams_hash_pdf_dict = dict() @property - def fitparams_grid_set(self): - """ DEPRECATED (Use param_grid_set instead!) - The ParameterGridSet object defining the value grids of - the different fit parameters. + def param_grid_set(self): + """The ParameterGridSet instance defining the grid values of + the different parameters. """ - return self._fitparams_grid_set + return self._param_grid_set - @fitparams_grid_set.setter - def fitparams_grid_set(self, obj): - if(isinstance(obj, ParameterGrid)): + @param_grid_set.setter + def param_grid_set(self, obj): + if isinstance(obj, ParameterGrid): obj = ParameterGridSet([obj]) - # Allow None for the MappedMultiDimGridPDFSet construction. - # Could create an unexpected behavior in other analyses! - if(not isinstance(obj, ParameterGridSet) and obj is not None): - raise TypeError('The fitparams_grid_set property must be an object ' - 'of type ParameterGridSet!') - self._fitparams_grid_set = obj - - @property - def param_grid_set(self): - return self._fitparams_grid_set + if obj is not None: + if not isinstance(obj, ParameterGridSet): + raise TypeError( + 'The params_grid_set property must be an instance of type ' + 'ParameterGridSet!') + self._param_grid_set = obj @property - def gridfitparams_list(self): - """(read-only) The list of dictionaries of all the fit parameter + def gridparams_list(self): + """(read-only) The list of dictionaries of all the parameter permutations on the grid. """ - return self._fitparams_grid_set.parameter_permutation_dict_list + return self._param_grid_set.parameter_permutation_dict_list @property def pdf_keys(self): """(read-only) The list of stored PDF object keys. """ - return list(self._gridfitparams_hash_pdf_dict.keys()) + return list(self._gridparams_hash_pdf_dict.keys()) @property def axes(self): """(read-only) The PDFAxes object of one of the PDFs of this PDF set. All PDFs of this set are supposed to have the same axes. """ - key = next(iter(self._gridfitparams_hash_pdf_dict.keys())) - return self._gridfitparams_hash_pdf_dict[key].axes + key = next(iter(self._gridparams_hash_pdf_dict.keys())) + return self._gridparams_hash_pdf_dict[key].axes + + def __contains__(self, key): + """Checks if the given key exists in this PDFSet instance. + + Parameters + ---------- + key : dict | int + If a dictionary is provided, it must be the gridparams dictionary + containing the grid parameter names and vales. + If an integer is provided, it must be the hash of the gridparams + dictionary. + """ + if isinstance(key, dict): + key = make_dict_hash(key) + + if not isinstance(key, int): + raise TypeError( + 'The key argument must be of type dict or int! ' + f'currently its type is {classname(key)}.') + + return key in self._gridparams_hash_pdf_dict + + def __getitem__(self, key): + """Implements the access operator ``self[gridparams_hash]``. + """ + return self.get_pdf(key) - def __getitem__(self, k): - """(read-only) Returns the PDF for the given PDF key. + def __iter__(self): + """Returns an iterator of the PDF dictionary of this PDFSet. """ - return self._gridfitparams_hash_pdf_dict[k] + return iter(self._gridparams_hash_pdf_dict) def items(self): - """Returns the list of 2-element tuples for the PDF stored in this - PDFSet object. + """Returns an iterator over the (gridparams_hash, PDF) pairs of this + PDFSet instance. + """ + return self._gridparams_hash_pdf_dict.items() + + def values(self): + """Returns an iterator over the PDF instances of the PDFSet instance. """ - return self._gridfitparams_hash_pdf_dict.items() + return self._gridparams_hash_pdf_dict.values() - def make_pdf_key(self, gridfitparams): - """Creates the PDF key for the given grid fit parameter values. + def make_key(self, gridparams): + """Creates the key for the given grid parameter dictionary. Parameters ---------- - gridfitparams : dict | int - The dictionary with the grid fit parameters for which the PDF key - should get made. If an integer is given, it is assumed to be - the PDF key. + gridparams : dict + The dictionary holding the grid parameter names and values. Returns ------- - pdf_key : int - The hash that represents the key for the PDF with the given grid - fit parameter values. + key : int + The key for the given grid parameter dictionary. """ - if(isinstance(gridfitparams, int)): - return gridfitparams - if(isinstance(gridfitparams, dict)): - return make_params_hash(gridfitparams) + return make_dict_hash(gridparams) - raise TypeError( - 'The gridfitparams argument must be of type dict or int!') - - def add_pdf(self, pdf, gridfitparams): + def add_pdf(self, pdf, gridparams): """Adds the given PDF object for the given parameters to the internal registry. If this PDF set is not empty, the to-be-added PDF must have the same axes than the already added PDFs. Parameters ---------- - pdf : pdf_type - The object derived from ``pdf_type`` that should be added. - gridfitparams : dict - The dictionary with the grid fit parameter values, which identify + pdf : instance of PDF + The PDF instance, that should be added + gridparams : dict + The dictionary with the grid parameter values, which identify the PDF object. Raises @@ -1460,419 +1628,167 @@ def add_pdf(self, pdf, gridfitparams): If the axes of the given PDFs are not the same as the axes of the already added PDFs. """ - if(not isinstance(pdf, self.pdf_type)): - raise TypeError('The pdf argument must be an instance of %s!' % ( - typename(self.pdf_type))) + logger = get_logger(f'{__name__}.{classname(self)}.add_pdf') - gridfitparams_hash = self.make_pdf_key(gridfitparams) + if not isinstance(pdf, PDF): + raise TypeError( + 'The pdf argument must be an instance of PDF!' + f'But its type is "{classname(pdf)}!') + if not isinstance(gridparams, dict): + raise TypeError( + 'The gridparams argument must be of type dict!' + f'But its type is "{classname(gridparams)}"!') - if(gridfitparams_hash in self._gridfitparams_hash_pdf_dict): - raise KeyError('The PDF with grid fit parameters %s was already ' - 'added!' % (str(gridfitparams))) + gridparams_hash = make_dict_hash(gridparams) + if gridparams_hash in self._gridparams_hash_pdf_dict: + raise KeyError( + f'The PDF with grid parameters {str(gridparams)} was ' + 'already added!') # Check that the new PDF has the same axes than the already added PDFs. - if(len(self._gridfitparams_hash_pdf_dict) > 0): - some_pdf = self._gridfitparams_hash_pdf_dict[ - next(iter(self._gridfitparams_hash_pdf_dict.keys()))] - if(not pdf.axes.is_same_as(some_pdf.axes)): + if len(self._gridparams_hash_pdf_dict) > 0: + some_pdf = self._gridparams_hash_pdf_dict[ + next(iter(self._gridparams_hash_pdf_dict.keys()))] + if not pdf.axes.is_same_as(some_pdf.axes): raise ValueError( 'The given PDF does not have the same axes than the ' 'already added PDFs!\n' - 'New axes:\n{}\n' - 'Old axes:\n{}'.format( - str(pdf.axes), str(some_pdf.axes)) - ) + f'New axes:\n{str(pdf.axes)}\n' + f'Old axes:\n{str(some_pdf.axes)}') + + if is_tracing_enabled(): + logger.debug(f'Adding PDF for gridparams {gridparams}.') - self._gridfitparams_hash_pdf_dict[gridfitparams_hash] = pdf + self._gridparams_hash_pdf_dict[gridparams_hash] = pdf - def get_pdf(self, gridfitparams): + def get_pdf(self, gridparams): """Retrieves the PDF object for the given set of fit parameters. Parameters ---------- - gridfitparams : dict | int - The dictionary with the grid fit parameters for which the PDF object - should get retrieved. If an integer is given, it is assumed to be - the PDF key. + gridparams : dict | int + The dictionary with the grid parameter names and values for which + the PDF object should get retrieved. If an integer is given, it is + assumed to be the PDF key. Returns ------- - pdf : pdf_type - The pdf_type object for the given parameters. + pdf : instance if PDF + The PDF instance for the given parameters. Raises ------ KeyError - If no PDF object was created for the given set of parameters. + If no PDF instance was created for the given set of parameters. """ - gridfitparams_hash = self.make_pdf_key(gridfitparams) + if isinstance(gridparams, int): + gridparams_hash = gridparams + elif isinstance(gridparams, dict): + gridparams_hash = make_dict_hash(gridparams) + else: + raise TypeError( + 'The gridparams argument must be of type dict or int!') - if(gridfitparams_hash not in self._gridfitparams_hash_pdf_dict): + if gridparams_hash not in self._gridparams_hash_pdf_dict: raise KeyError( - 'No PDF was created for the parameter set "%s"!' % - (str(gridfitparams))) + 'No PDF was created for the parameter set ' + f'"{str(gridparams)}"!') - pdf = self._gridfitparams_hash_pdf_dict[gridfitparams_hash] - return pdf + pdf = self._gridparams_hash_pdf_dict[gridparams_hash] + return pdf -class MultiDimGridPDFSet(PDFSet, PDF): - def __init__( - self, param_set, param_grid_set, gridparams_pdfs, - interpolmethod=None, pdf_type=MultiDimGridPDF, + def initialize_for_new_trial( + self, + tdm, + tl=None, **kwargs): - """Creates a new MultiDimGridPDFSet instance, which holds a set of - MultiDimGridPDF instances, one for each point of a parameter grid set. - - Parameters - ---------- - param_set : Parameter instance | sequence of Parameter instances | - ParameterSet instance - The set of parameters defining the model parameters of this PDF. - param_grid_set : ParameterGrid instance | ParameterGridSet instance - The set of ParameterGrid instances, which define the grid values of - the model parameters, the given MultiDimGridPDF instances belong to. - gridparams_pdfs : sequence of (dict, MultiDimGridPDF) tuples - The sequence of 2-element tuples which define the mapping of grid - values to PDF instances. - interpolmethod : subclass of GridManifoldInterpolationMethod - The class specifying the interpolation method. This must be a - subclass of ``GridManifoldInterpolationMethod``. - If set to None, the default grid manifold interpolation method - ``Linear1DGridManifoldInterpolationMethod`` will be used. - pdf_type : type - The PDF class that can be added to the set. - """ - super(MultiDimGridPDFSet, self).__init__( - param_set=param_set, - pdf_type=pdf_type, - fitparams_grid_set=param_grid_set, - **kwargs) - - if(interpolmethod is None): - interpolmethod = Linear1DGridManifoldInterpolationMethod - self.interpolmethod = interpolmethod - - # Add the given MultiDimGridPDF instances to the PDF set. - for (gridparams, pdf) in gridparams_pdfs: - self.add_pdf(pdf, gridparams) - - # Create the interpolation method instance. - self._interpolmethod_instance = self._interpolmethod( - self._get_prob_for_gridparams_with_eventdata_func(), param_grid_set) - - @property - def interpolmethod(self): - """The class derived from GridManifoldInterpolationMethod - implementing the interpolation of the PDF grid manifold. - """ - return self._interpolmethod - - @interpolmethod.setter - def interpolmethod(self, cls): - if(not issubclass(cls, GridManifoldInterpolationMethod)): - raise TypeError('The interpolmethod property must be a sub-class ' - 'of GridManifoldInterpolationMethod!') - self._interpolmethod = cls - - def _get_prob_for_gridparams_with_eventdata_func(self): - """Returns a function with call signature __call__(gridparams, eventdata) - that will return the probability for each event given by ``eventdata`` - from the PDFs that is registered for the given gridparams parameter - values. - """ - def _get_prob_for_gridparams_with_eventdata(tdm, gridparams, eventdata): - """Gets the probability for each event given by ``eventdata`` from - the PDFs that is registered for the given gridparams parameter - values. - - Parameters - ---------- - tdm : TrialDataManager - The TrialDataManager instance holding the trial data. - gridparams : dict - The dictionary with the grid parameter names and values, that - reference the registered PDF of interest. - eventdata : (N_events,V)-shaped numpy ndarray - The ndarray holding the data for the PDF evaluation. - - Returns - ------- - prob : (N_events,)-shaped ndarray - The ndarray holding the probability values for each event. - """ - pdf = self.get_pdf(gridparams) - prob = pdf.get_prob_with_eventdata(tdm, gridparams, eventdata) - return prob - - return _get_prob_for_gridparams_with_eventdata - - def assert_is_valid_for_trial_data(self, tdm): - """Checks if this PDF set is valid for all the given trial data. Since - the PDFs have the same axes, we just need to check the first PDFs. - """ - # Get one of the PDFs. - pdf = next(iter(self.items()))[1] - pdf.assert_is_valid_for_trial_data(tdm) - - def get_prob(self, tdm, params, tl=None): - """Calculates the probability density for each event, given the given - parameter values. + """This method is called whenever a new trial data is available. It + calls the :meth:`~skyllh.core.pdf.PDF.initialize_for_new_trial` method + of each PDF. Parameters ---------- - tdm : TrialDataManager instance - The TrialDataManager instance that will be used to get the data - from the trial events. - params : dict - The dictionary holding the parameter names and values for which the - probability should get calculated. Because this PDF is a PDFSet, - there should be at least one parameter. - tl : TimeLord instance | None - The optional TimeLord instance to use for measuring timing - information. + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the new trial data events. + tl : instance of TimeLord | None + The optional instance of TimeLord for measuring timing information. + """ + for pdf in self._gridparams_hash_pdf_dict.values(): + pdf.initialize_for_new_trial( + tdm=tdm, + tl=tl, + **kwargs) - Returns - ------- - prob : (N_events,)-shaped 1D ndarray - The probability values for each event. - grads : (N_fitparams,N_events)-shaped 2D ndarray - The PDF gradients w.r.t. the PDF fit parameters for each event. - """ - # Create the ndarray for the event data that is needed for the - # ``MultiDimGridPDF.get_prob_with_eventdata`` method. - if(isinstance(self, IsSignalPDF)): - # Evaluate the relevant quantities for - # all events and sources (relevant for stacking analyses). - if tdm.src_ev_idxs is not None: - (src_idxs, ev_idxs) = tdm.src_ev_idxs - eventdata = np.array( - [ - # Check `psi` axis name. - tdm.get_data(axis.name) - if ('psi' in axis.name) - - # Check `src` axis name. - else tdm.get_data(axis.name)[src_idxs] - if ('src' in axis.name) - - # Default case. - else tdm.get_data(axis.name)[ev_idxs] - for axis in self.axes - ] - ).T - else: - n_src = len(tdm.get_data('src_array')['ra']) - l_ev = len(tdm.get_data('ra')) - eventdata = np.array( - [ - # Check `psi` axis name. - tdm.get_data(axis.name) - if ('psi' in axis.name) - - # Check `src` axis name. - else tdm.get_data(axis.name) - if (('src' in axis.name) and (n_src == 1)) - else np.repeat(tdm.get_data(axis.name), l_ev) - if (('src' in axis.name) and (n_src != 1)) - - # Default case. - else np.tile(tdm.get_data(axis.name), n_src) - for axis in self.axes - ] - ).T - - elif (isinstance(self, IsBackgroundPDF)): - eventdata = np.array([tdm.get_data(axis.name) - for axis in self.axes]).T - - # Get the interpolated PDF values for the arbitrary parameter values. - # The (D,N_events)-shaped grads_ ndarray contains the gradient of the - # probability density w.r.t. each of the D parameters, which are defined - # by the param_grid_set. The order of the D gradients is the same as - # the parameter grids. - with TaskTimer(tl, 'Get signal PDFs for all events.'): - (prob, grads_) = self._interpolmethod_instance.get_value_and_gradients( - tdm, eventdata, params) - - # Handle the special (common) case were there is only one fit parameter - # and it coincides with the only grid parameter of this PDFSet. - fitparams = self.param_set.floating_params - params_grid_set_pnames = self.param_grid_set.parameter_names - - if((len(fitparams) == 1) and (len(params_grid_set_pnames) == 1) and - (params_grid_set_pnames[0] == fitparams[0].name)): - return (prob, grads_) - - # Create an array for the gradients, which will only contain the - # gradients for the fit (floating) parameters. - grads = np.zeros((len(fitparams), prob.shape[0]), dtype=np.float64) - - # Create a dictionary to map the name of the grid parameter to its - # index. - paramgridset_pname_to_pidx = dict( - [(pname, pidx) for (pidx, pname) in - enumerate(params_grid_set_pnames)]) - - for (pidx, fitparam) in enumerate(fitparams): - pname = fitparam.name - # Check if the fit parameter is part of the PDFSet's grid - # parameters. If so, the gradient is provided by the interpolation - # method. If not, the gradient is zero for this fit parameter. - if(pname in paramgridset_pname_to_pidx): - grads[pidx] = grads_[paramgridset_pname_to_pidx[pname]] - - return (prob, grads) - - -class MappedMultiDimGridPDFSet(PDFSet, PDF): - def __init__( - self, param_grid_set, gridparams_pdfs, src_hypo_group_manager, - pdf_type=MultiDimGridPDF, **kwargs): - """Creates a new MappedMultiDimGridPDFSet instance, which holds a set of - MultiDimGridPDF instances, one for each point of a parameter grid set. + def assert_is_valid_for_trial_data( + self, + tdm, + tl=None, + **kwargs): + """Checks if the PDFs of this PDFSet instance are valid for all the + given trial data events. + Since all PDFs should have the same axes, only the first PDF will be + checked. It calls the + :meth:`~skyllh.core.pdf.PDF.assert_is_valid_for_trial_data` method of + the first :class:`~skyllh.core.pdf.PDF` instance. Parameters ---------- - param_grid_set : ParameterGrid instance | ParameterGridSet instance - The set of ParameterGrid instances, which define the grid values of - the model parameters, the given MultiDimGridPDF instances belong to. - gridparams_pdfs : sequence of (dict, MultiDimGridPDF) tuples - The sequence of 2-element tuples which define the mapping of grid - values to PDF instances. - src_hypo_group_manager : SourceHypoGroupManager instance - The instance of SourceHypoGroupManager that defines the list of - sources, i.e. the list of SourceModel instances and flux models. - pdf_type : type - The PDF class that can be added to the set. - """ - super(MappedMultiDimGridPDFSet, self).__init__( - param_set=None, - pdf_type=pdf_type, - fitparams_grid_set=param_grid_set, - **kwargs) - - self.fluxmodel_to_source_mapping = src_hypo_group_manager.get_fluxmodel_to_source_mapping() - - # Add the given MultiDimGridPDF instances to the PDF set. - for (gridparams, pdf) in gridparams_pdfs: - self.add_pdf(pdf, gridparams) - - @property - def fluxmodel_to_source_mapping(self): - """The fluxmodel to source indices mapping list used for - MappedMultiDimGridPDFSet evaluation to get the corresponding KDE PDF. - """ - return self._fluxmodel_to_source_mapping - @fluxmodel_to_source_mapping.setter - def fluxmodel_to_source_mapping(self, mapping_list): - if(not issequenceof(mapping_list, tuple)): - raise TypeError( - 'The `fluxmodel_to_source_mapping` property must be a sequence of ' - 'tuples.') - self._fluxmodel_to_source_mapping = mapping_list + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial data events. + tl : instance of TimeLord | None + The optional instance of TimeLord for measuring timing information. - def assert_is_valid_for_trial_data(self, tdm): - """Checks if this PDF set is valid for all the given trial data. Since - the PDFs have the same axes, we just need to check the first PDFs. + Raises + ------ + ValueError + If some of the data is outside the axes range of the PDF. """ - # Get one of the PDFs. - pdf = next(iter(self.items()))[1] - pdf.assert_is_valid_for_trial_data(tdm) + key = next(iter(self._gridparams_hash_pdf_dict.keys())) + pdf = self._gridparams_hash_pdf_dict[key] + pdf.assert_is_valid_for_trial_data( + tdm=tdm, + tl=tl, + **kwargs) - def get_prob(self, tdm, params, tl=None): - """Calculates the probability density for each event, given the given - parameter values. + def get_pd( + self, + gridparams, + tdm, + params_recarray=None, + tl=None): + """Calls the ``get_pd`` method of the PDF instance that belongs to the + given grid parameter values ``gridparams``. Parameters ---------- - tdm : TrialDataManager instance - The TrialDataManager instance that will be used to get the data - from the trial events. - params : dict - The dictionary holding the parameter names and values for which the - probability should get calculated. Because this PDF is a PDFSet, - there should be at least one parameter. - tl : TimeLord instance | None - The optional TimeLord instance to use for measuring timing - information. + gridparams : dict + The dictionary holding the parameter values, which define PDF + instance within this PDFSet instance. + Note, that the parameter values must match a set of parameter grid + values for which a PDF instance has been created and added to this + PDFSet instance. + tdm : instance of TrialDataManager + The TrialDataManager instance holding the data events for which the + probability density of the events should be calculated. + params_recarray : instance of ndarray | None + The numpy record ndarray holding the parameter name and values for + each source model. Returns ------- - prob : (N_events,)-shaped 1D ndarray - The probability values for each event. - grads : (N_fitparams,N_events)-shaped 2D ndarray - The PDF gradients w.r.t. the PDF fit parameters for each event. - """ - # Create the ndarray for the event data that is needed for the - # ``MultiDimGridPDF.get_prob_with_eventdata`` method. - if(isinstance(self, IsSignalPDF)): - # Evaluate the relevant quantities for - # all events and sources (relevant for stacking analyses). - if tdm.src_ev_idxs is not None: - (src_idxs, ev_idxs) = tdm.src_ev_idxs - eventdata = np.array( - [ - # Check `psi` axis name. - tdm.get_data(axis.name) - if ('psi' in axis.name) - - # Check `src` axis name. - else tdm.get_data(axis.name)[src_idxs] - if ('src' in axis.name) - - # Default case. - else tdm.get_data(axis.name)[ev_idxs] - for axis in self.axes - ] - ).T - else: - n_src = len(tdm.get_data('src_array')['ra']) - l_ev = len(tdm.get_data('ra')) - eventdata = np.array( - [ - # Check `psi` axis name. - tdm.get_data(axis.name) - if ('psi' in axis.name) - - # Check `src` axis name. - else tdm.get_data(axis.name) - if (('src' in axis.name) and (n_src == 1)) - else np.repeat(tdm.get_data(axis.name), l_ev) - if (('src' in axis.name) and (n_src != 1)) - - # Default case. - else np.tile(tdm.get_data(axis.name), n_src) - for axis in self.axes - ] - ).T - - # Construct `src_idxs` for masking with `fluxmodel_mask`. - src_idxs = np.repeat(np.arange(n_src), l_ev) - - elif (isinstance(self, IsBackgroundPDF)): - eventdata = np.array([tdm.get_data(axis.name) - for axis in self.axes]).T - - # Get the interpolated PDF values for the arbitrary parameter values. - # The (D,N_events)-shaped grads ndarray contains the gradient of the - # probability density w.r.t. each of the D parameters, which are defined - # by the param_grid_set. The order of the D gradients is the same as - # the parameter grids. - - # Iterate over fluxmodels in `fluxmodel_to_source_mapping` list. - prob = np.zeros(eventdata.shape[0]) - grads = np.zeros(eventdata.shape[0]) - for (fluxmodel_hash, src_list) in self.fluxmodel_to_source_mapping: - # Mask for selecting events corresponding to specific flux. - fluxmodel_mask = np.isin(src_idxs, src_list) - - # Pass params in case normalization function depends on it. - # KDE normalization function does not depend on params. - with TaskTimer(tl, 'Get signal PDFs for specific flux events.'): - prob_i = self.get_pdf(fluxmodel_hash).get_prob_with_eventdata( - tdm, params, eventdata[fluxmodel_mask]) - - prob[fluxmodel_mask] = prob_i - - return (prob, grads) + pd : numpy ndarray + The 1D numpy ndarray holding the probability density values for each + event and source. + See :meth:`skyllh.core.pdf.PDF.get_pd` for further information. + grads : dict + The dictionary holding the gradient values for each global fit + parameter. + See :meth:`skyllh.core.pdf.PDF.get_pd` for further information. + """ + pdf = self.get_pdf(gridparams) + + return pdf.get_pd( + tdm=tdm, + params_recarray=params_recarray, + tl=tl) diff --git a/skyllh/core/pdfratio.py b/skyllh/core/pdfratio.py index cfe4de4cca..b51bd548ca 100644 --- a/skyllh/core/pdfratio.py +++ b/skyllh/core/pdfratio.py @@ -1,541 +1,688 @@ # -*- coding: utf-8 -*- import abc -import itertools import numpy as np from skyllh.core.py import ( classname, float_cast, + int_cast, + issequence, issequenceof, - typename ) from skyllh.core.parameters import ( - FitParameter, - make_params_hash + ParameterModelMapper, ) from skyllh.core.interpolate import ( GridManifoldInterpolationMethod, - Parabola1DGridManifoldInterpolationMethod + Parabola1DGridManifoldInterpolationMethod, ) from skyllh.core.pdf import ( - PDF, PDFSet, IsBackgroundPDF, IsSignalPDF, - SpatialPDF, - TimePDF ) -from skyllh.core.timing import TaskTimer +from skyllh.core.services import ( + SrcDetSigYieldWeightsService, +) +from skyllh.core.timing import ( + TaskTimer, +) -class PDFRatio(object, metaclass=abc.ABCMeta): - """Abstract base class for a PDF ratio class. It defines the interface - of a PDF ratio class. +class PDFRatio( + object, + metaclass=abc.ABCMeta): + """Abstract base class for a signal over background PDF ratio class. + It defines the interface of a signal over background PDF ratio class. """ - def __init__(self, pdf_type, *args, **kwargs): + def __init__( + self, + sig_param_names=None, + bkg_param_names=None, + **kwargs): """Constructor for a PDF ratio class. Parameters ---------- - pdf_type : type - The Python type of the PDF object the PDF ratio is made for. + sig_param_names : sequence of str | str | None + The sequence of signal parameter names this PDFRatio instance is a + function of. + bkg_param_names : sequence of str | str | None + The sequence of background parameter names this PDFRatio instance + is a function of. """ - super(PDFRatio, self).__init__(*args, **kwargs) + super().__init__(**kwargs) + + self.sig_param_names = sig_param_names + self.bkg_param_names = bkg_param_names - self._pdf_type = pdf_type + @property + def n_params(self): + """(read-only) The number of parameters the PDF ratio depends on. + This is the sum of signal and background parameters. + """ + return self.n_sig_params + self.n_bkg_params @property - def n_fitparams(self): - """(read-only) The number of fit parameters the PDF ratio depends on. - This is the sum of signal and background fit parameters. At the moment - only signal fit parameters are supported, so this property is equivalent - to the n_signal_fitparams property. But this might change in the future. + def param_names(self): + """(read-only) The list of parameter names this PDF ratio is a + function of. This is the superset of signal and background parameter + names. """ - return self.n_signal_fitparams + return list( + set(list(self._sig_param_names) + list(self._bkg_param_names))) @property - def fitparam_names(self): - """(read-only) The list of fit parameter names this PDF ratio is a - function of. - This is the superset of signal and background fit parameter names. - At the moment only signal fit parameters are supported, so this property - is equivalent to the signal_fitparam_names property. But this might - change in the future. + def n_sig_params(self): + """(read-only) The number of signal parameters the PDF ratio depends + on. """ - return self.signal_fitparam_names + return len(self._sig_param_names) @property - def n_signal_fitparams(self): - """(read-only) The number of signal fit parameters the PDF ratio depends + def n_bkg_params(self): + """(read-only) The number of background parameters the PDF ratio depends on. """ - return len(self._get_signal_fitparam_names()) + return len(self._bkg_param_names) @property - def signal_fitparam_names(self): - """(read-only) The list of signal fit parameter names this PDF ratio is - a function of. + def sig_param_names(self): + """The list of signal parameter names this PDF ratio is a function of. """ - return self._get_signal_fitparam_names() + return self._sig_param_names + + @sig_param_names.setter + def sig_param_names(self, names): + if names is None: + names = [] + if not issequence(names): + names = [names] + if not issequenceof(names, str): + raise TypeError( + 'The sig_param_names property must be a sequence of str ' + 'instances!') + self._sig_param_names = names @property - def pdf_type(self): - """(read-only) The Python type of the PDF object for which the PDF - ratio is made for. + def bkg_param_names(self): + """The list of background parameter names this PDF ratio is a function + of. """ - return self._pdf_type + return self._bkg_param_names + + @bkg_param_names.setter + def bkg_param_names(self, names): + if names is None: + names = [] + if not issequence(names): + names = [names] + if not issequenceof(names, str): + raise TypeError( + 'The bkg_param_names property must be a sequence of str ' + 'instances!') + self._bkg_param_names = names - def _get_signal_fitparam_names(self): - """This method must be re-implemented by the derived class and needs to - return the list of signal fit parameter names, this PDF ratio is a - function of. If it returns an empty list, the PDF ratio is independent - of any signal fit parameters. + @abc.abstractmethod + def initialize_for_new_trial( + self, + tdm, + tl=None, + **kwargs): + """Initializes the PDFRatio instance for a new trial. This method can + be utilized to pre-calculate PDFRatio values that do not depend on any + fit parameters. - Returns - ------- - list of str - The list of the signal fit parameter names, this PDF ratio is a - function of. By default this method returns an empty list indicating - that the PDF ratio depends on no signal parameter. + Parameters + ---------- + tdm : instance of TrialDataManager + The instance of TrialDataManager that holds the trial data. + tl : instance of TimeLord + The optional instance of TimeLord to measure timing information. """ - return [] + pass @abc.abstractmethod - def get_ratio(self, tdm, params=None, tl=None): - """Retrieves the PDF ratio value for each given trial data event, given - the given set of fit parameters. This method is called during the - likelihood maximization process. + def get_ratio( + self, + tdm, + src_params_recarray, + tl=None): + """Retrieves the PDF ratio value for each given trial data events (and + sources), given the given set of parameters. Parameters ---------- tdm : instance of TrialDataManager The TrialDataManager instance holding the trial data events for which the PDF ratio values should get calculated. - params : dict | None - The dictionary with the parameter name-value pairs. - It can be ``None``, if the PDF ratio does not depend on any - parameters. - tl : TimeLord instance | None + src_params_recarray : instance of numpy record ndarray | None + The (N_sources,)-shaped numpy record ndarray holding the parameter + names and values of the sources. + See the documentation of the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for more information. + tl : instance of TimeLord | None The optional TimeLord instance that should be used to measure timing information. Returns ------- - ratios : (N_events,)-shaped 1d numpy ndarray of float - The PDF ratio value for each trial event. + ratios : instance of ndarray + The (N_values,)-shaped 1d numpy ndarray of float holding the PDF + ratio value for each trial event and source. """ pass @abc.abstractmethod - def get_gradient(self, tdm, params, fitparam_name): - """Retrieves the PDF ratio gradient for the parameter ``fitparam_name`` - for each given trial event, given the given set of fit parameters. - This method is called during the likelihood maximization process. + def get_gradient( + self, + tdm, + src_params_recarray, + fitparam_id, + tl=None): + """Retrieves the PDF ratio gradient for the global fit parameter + ``fitparam_id`` for each trial data event and source, given the given + set of parameters ``src_params_recarray`` for each source. Parameters ---------- tdm : instance of TrialDataManager The TrialDataManager instance holding the trial data events for - which the PDF ratio values should get calculated. - params : dict - The dictionary with the parameter names and values. - fitparam_name : str - The name of the fit parameter for which the gradient should + which the PDF ratio gradient values should get calculated. + src_params_recarray : instance of numpy structured ndarray + The (N_sources,)-shaped numpy structured ndarray holding the + parameter names and values of the sources. + See the documentation of the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for more information. + fitparam_id : int + The ID of the global fit parameter for which the gradient should get calculated. + tl : instance of TimeLord | None + The optional TimeLord instance that should be used to measure + timing information. Returns ------- - gradient : (N_events,)-shaped 1d numpy ndarray of float - The PDF ratio gradient value for each trial event. + gradient : instance of ndarray | 0 + The (N_values,)-shaped 1d numpy ndarray of float holding the PDF + ratio gradient value for each source and trial data event. + If the PDF ratio does not depend on the given global fit parameter, + 0 should be returned. """ pass + def __mul__(self, other): + """Implements the mathematical operation ``new = self * other``, where + ``other`` is an instance of PDFRatio. It creates an instance of + PDFRatioProduct holding the two PDFRatio instances. + """ + return PDFRatioProduct(self, other) -class SingleSourcePDFRatioArrayArithmetic(object): - """This class provides arithmetic methods for arrays of PDFRatio instances. - It has methods to calculate the product of the ratio values for a given set - of PDFRatio objects. This class assumes a single source. - The rational is that in the calculation of the derivates of the - log-likelihood-ratio function for a given fit parameter, the product of the - PDF ratio values of the PDF ratio objects which do not depend on that fit - parameter is needed. +class PDFRatioProduct( + PDFRatio): + """This is the mathematical product of two PDFRatio instances, which is a + PDFRatio instance again. """ - def __init__(self, pdfratios, fitparams): - """Constructs a PDFRatio array arithmetic object assuming a single - source. + def __init__( + self, + pdfratio1, + pdfratio2, + **kwargs): + """Creates a new PDFRatioProduct instance representing the product of + two PDFRatio instances. + """ + self.pdfratio1 = pdfratio1 + self.pdfratio2 = pdfratio2 - Parameters - ---------- - pdfratios : list of PDFRatio - The list of PDFRatio instances. - fitparams : list of FitParameter - The list of fit parameters. The order must match the fit parameter - order of the minimizer. - """ - super(SingleSourcePDFRatioArrayArithmetic, self).__init__() - - self.pdfratio_list = pdfratios - self.fitparam_list = fitparams - - # The ``_ratio_values`` member variable will hold a - # (N_pdfratios,N_events)-shaped array holding the PDF ratio values of - # each PDF ratio object for each event. It will be created by the - # ``initialize_for_new_trial`` method. - self._ratio_values = None - - # Create a mapping of fit parameter index to pdfratio index. We - # initialize the mapping with -1 first in order to be able to check in - # the end if all fit parameters found a PDF ratio object. - self._fitparam_idx_2_pdfratio_idx = np.repeat( - np.array([-1], dtype=np.int64), len(self._fitparam_list)) - for ((fpidx, fitparam), (pridx, pdfratio)) in itertools.product( - enumerate(self._fitparam_list), enumerate(self.pdfratio_list)): - if(fitparam.name in pdfratio.fitparam_names): - self._fitparam_idx_2_pdfratio_idx[fpidx] = pridx - check_mask = (self._fitparam_idx_2_pdfratio_idx == -1) - if(np.any(check_mask)): - raise KeyError('%d fit parameters are not defined in any of the ' - 'PDF ratio instances!'%(np.sum(check_mask))) - - # Create the list of indices of the PDFRatio instances, which depend on - # at least one fit parameter. - self._var_pdfratio_indices = np.unique(self._fitparam_idx_2_pdfratio_idx) - - def _precompute_static_pdfratio_values(self, tdm): - """Pre-compute the PDF ratio values for the PDF ratios that do not - depend on any fit parameters. + sig_param_names = set( + list(pdfratio1.sig_param_names) + list(pdfratio2.sig_param_names)) + bkg_param_names = set( + list(pdfratio1.bkg_param_names) + list(pdfratio2.bkg_param_names)) - Parameters - ---------- - tdm : instance of TrialDataManager - The instance of TrialDataManager that holds the trial event data for - which the PDF ratio values should get calculated. - """ - for (i, pdfratio) in enumerate(self._pdfratio_list): - if(pdfratio.n_fitparams == 0): - # The PDFRatio does not depend on any fit parameters. So we - # pre-calculate the PDF ratio values for all the events. Since - # the get_ratio method of the PDFRatio class might return a 2D - # (N_sources, N_events)-shaped array, and we assume a single - # source, we need to reshape the array, which does not involve - # any data copying. - self._ratio_values[i] = np.reshape( - pdfratio.get_ratio(tdm), (tdm.n_selected_events,)) + super().__init__( + sig_param_names=sig_param_names, + bkg_param_names=bkg_param_names, + **kwargs) @property - def pdfratio_list(self): - """The list of PDFRatio objects. + def pdfratio1(self): + """The first PDFRatio instance in the muliplication + ``pdfratio1 * pdfratio2``. """ - return self._pdfratio_list - @pdfratio_list.setter - def pdfratio_list(self, seq): - if(not issequenceof(seq, PDFRatio)): - raise TypeError('The pdfratio_list property must be a sequence of ' - 'PDFRatio instances!') - self._pdfratio_list = list(seq) + return self._pdfratio1 - @property - def fitparam_list(self): - """The list of FitParameter instances. - """ - return self._fitparam_list - @fitparam_list.setter - def fitparam_list(self, seq): - if(not issequenceof(seq, FitParameter)): - raise TypeError('The fitparam_list property must be a sequence of ' - 'FitParameter instances!') - self._fitparam_list = list(seq) - - def initialize_for_new_trial(self, tdm): - """Initializes the PDFRatio array arithmetic for a new trial. For a new - trial the data events change, hence we need to recompute the PDF ratio - values of the fit parameter independent PDFRatio instances. + @pdfratio1.setter + def pdfratio1(self, pdfratio): + if not isinstance(pdfratio, PDFRatio): + raise TypeError( + 'The pdfratio1 property must be an instance of PDFRatio!') + self._pdfratio1 = pdfratio - Parameters - ---------- - tdm : instance of TrialDataManager - The instance of TrialDataManager that holds the trial event data for - that this PDFRatioArrayArithmetic instance should get initialized. + @property + def pdfratio2(self): + """The second PDFRatio instance in the muliplication + ``pdfratio1 * pdfratio2``. """ - n_events_old = 0 - if(self._ratio_values is not None): - n_events_old = self._ratio_values.shape[1] - - # If the amount of events have changed, we need a new array holding the - # ratio values. - if(n_events_old != tdm.n_selected_events): - # Create a (N_pdfratios,N_events)-shaped array to hold the PDF ratio - # values of each PDF ratio object for each event. - self._ratio_values = np.empty( - (len(self._pdfratio_list), tdm.n_selected_events), - dtype=np.float64) - - self._precompute_static_pdfratio_values(tdm) + return self._pdfratio2 + + @pdfratio2.setter + def pdfratio2(self, pdfratio): + if not isinstance(pdfratio, PDFRatio): + raise TypeError( + 'The pdfratio2 property must be an instance of PDFRatio!') + self._pdfratio2 = pdfratio + + def initialize_for_new_trial( + self, + **kwargs): + """Initializes the PDFRatioProduct instance for a new trial. + It calls the + :meth:`~skyllh.core.pdfratio.PDFRatio.initialize_for_new_trial` method + of each of the two :class:`~skyllh.core.pdfratio.PDFRatio` instances. + """ + self._pdfratio1.initialize_for_new_trial(**kwargs) + self._pdfratio2.initialize_for_new_trial(**kwargs) - def get_pdfratio(self, idx): - """Returns the PDFRatio instance that corresponds to the given fit - parameter index. + def get_ratio( + self, + tdm, + src_params_recarray, + tl=None): + """Retrieves the PDF ratio product value for each trial data + event and source, given the given set of parameters for all sources. Parameters ---------- - fitparam_idx : int - The index of the fit parameter. + tdm : instance of TrialDataManager + The TrialDataManager instance holding the trial data events for + which the PDF ratio values should get calculated. + src_params_recarray : instance of numpy record ndarray + The (N_sources,)-shaped numpy record ndarray holding the parameter + names and values of the sources. + See the documentation of the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for more information. + tl : TimeLord instance | None + The optional TimeLord instance that should be used to measure + timing information. Returns ------- - pdfratio : PDFRatio - The PDFRatio instance which corresponds to the given fit parameter - index. + ratios : instance of ndarray + The (N_values,)-shaped 1d numpy ndarray of float holding the product + of the PDF ratio values for each trial event and source. + The PDF ratio product value for each trial event. """ - return self._pdfratio_list[idx] - - def calculate_pdfratio_values(self, tdm, fitparams, tl=None): - """Calculates the PDF ratio values for the PDF ratio objects which - depend on fit parameters. + r1 = self._pdfratio1.get_ratio( + tdm=tdm, + src_params_recarray=src_params_recarray, + tl=tl) + + r2 = self._pdfratio2.get_ratio( + tdm=tdm, + src_params_recarray=src_params_recarray, + tl=tl) + + return r1 * r2 + + def get_gradient( + self, + tdm, + src_params_recarray, + fitparam_id, + tl=None): + """Retrieves the PDF ratio product gradient for the global fit parameter + with parameter ID ``fitparam_id`` for each trial data event and source, + given the set of parameters ``src_params_recarray`` for all sources. Parameters ---------- tdm : instance of TrialDataManager - The instance of TrialDataManager that holds the trial event data for + The TrialDataManager instance holding the trial data events for which the PDF ratio values should get calculated. - fitparams : dict - The dictionary with the fit parameter name-value pairs. - tl : TimeLord instance | None + src_params_recarray : instance of numpy record ndarray | None + The (N_sources,)-shaped numpy record ndarray holding the parameter + names and values of the sources. + See the documentation of the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for more information. + fitparam_id : int + The ID of the global fit parameter for which the gradient should + get calculated. + tl : instance of TimeLord | None The optional TimeLord instance that should be used to measure timing information. + + Returns + ------- + gradient : instance of ndarray | 0 + The (N_values,)-shaped 1d numpy ndarray of float holding the PDF + ratio gradient value for each trial event and source. If none of the + two PDFRatio instances depend on the given global fit parameter, the + scalar value ``0`` is returned. """ - for (i, _pdfratio_i) in enumerate(self._pdfratio_list): - # Since the get_ratio method of the PDFRatio class might return a 2D - # (N_sources, N_events)-shaped array, and we assume a single source, - # we need to reshape the array, which does not involve any data - # copying. - self._ratio_values[i] = np.reshape( - _pdfratio_i.get_ratio(tdm, fitparams, tl=tl), - (tdm.n_selected_events,)) - - def get_ratio_product(self, excluded_idx=None): - """Calculates the product of the of the PDF ratio values of each event, - but excludes the PDF ratio values that correspond to the given excluded - fit parameter index. This is useful for calculating the derivates of - the log-likelihood ratio function. + r1_depends_on_fitparam =\ + ParameterModelMapper.is_global_fitparam_a_local_param( + fitparam_id=fitparam_id, + params_recarray=src_params_recarray, + local_param_names=self._pdfratio1.param_names) + + r2_depends_on_fitparam =\ + ParameterModelMapper.is_global_fitparam_a_local_param( + fitparam_id=fitparam_id, + params_recarray=src_params_recarray, + local_param_names=self._pdfratio2.param_names) + + if r1_depends_on_fitparam: + r2 = self._pdfratio2.get_ratio( + tdm=tdm, + src_params_recarray=src_params_recarray, + tl=tl) + + r1_grad = self._pdfratio1.get_gradient( + tdm=tdm, + src_params_recarray=src_params_recarray, + fitparam_id=fitparam_id, + tl=tl) + + if r2_depends_on_fitparam: + r1 = self._pdfratio1.get_ratio( + tdm=tdm, + src_params_recarray=src_params_recarray, + tl=tl) + + r2_grad = self._pdfratio2.get_gradient( + tdm=tdm, + src_params_recarray=src_params_recarray, + fitparam_id=fitparam_id, + tl=tl) + + if r1_depends_on_fitparam and r2_depends_on_fitparam: + gradient = r1 * r2_grad + gradient += r1_grad * r2 + elif r1_depends_on_fitparam: + gradient = r1_grad * r2 + elif r2_depends_on_fitparam: + gradient = r1 * r2_grad + else: + gradient = 0 + + return gradient + + +class SourceWeightedPDFRatio( + PDFRatio): + r"""This class provides the calculation of a source weighted PDF ratio for + multiple sources: + + .. math:: + + \mathcal{R}_i(\vec{p}_{\mathrm{s}}) = \frac{1}{A(\vec{p}_{\mathrm{s}})} + \sum_{k=1}^{K} a_k(\vec{p}_{\mathrm{s}_k}) \mathcal{R}_{i,k} + (\vec{p}_{\mathrm{s}_k}) + + """ + def __init__( + self, + dataset_idx, + src_detsigyield_weights_service, + pdfratio, + **kwargs): + """Creates a new SourceWeightedPDFRatio instance. Parameters ---------- - excluded_fitparam_idx : int | None - The index of the fit parameter whose PDF ratio values should get - excluded from the product. If None, the product over all PDF ratio - values will be computed. - - Returns - ------- - product : 1D (N_events,)-shaped ndarray - The product of the PDF ratio values for each event. + dataset_idx : int + The index of the dataset. It is used to access the source detector + signal yield weight. + src_detsigyield_weights_service : instance of SrcDetSigYieldWeightsService + The instance of SrcDetSigYieldWeightsService providing the source + detector signal yield weights, i.e. the product of the theoretical + source weight with the detector signal yield. + pdfratio : instance of PDFRatio + The instance of PDFRatio providing the PDF ratio values and + derivatives. """ - if(excluded_idx is None): - return np.prod(self._ratio_values, axis=0) + if not isinstance(pdfratio, PDFRatio): + raise TypeError( + 'The pdfratio argument must be an instance of PDFRatio! ' + f'Its current type is {classname(pdfratio)}.') + self._pdfratio = pdfratio + + super().__init__( + sig_param_names=self._pdfratio.sig_param_names, + bkg_param_names=self._pdfratio.bkg_param_names, + **kwargs) + + self._dataset_idx = int_cast( + dataset_idx, + 'The dataset_idx argument must be castable to type int!') + + if not isinstance( + src_detsigyield_weights_service, + SrcDetSigYieldWeightsService): + raise TypeError( + 'The src_detsigyield_weights_service argument must be an ' + 'instance of type SrcDetSigYieldWeightsService! ' + 'Its current type is ' + f'{classname(src_detsigyield_weights_service)}.') + self._src_detsigyield_weights_service = src_detsigyield_weights_service + + self._cache_R_ik = None + self._cache_R_i = None - # Get the index of the PDF ratio object that corresponds to the excluded - # fit parameter. - #excluded_pdfratio_idx = self._fitparam_idx_2_pdfratio_idx[excluded_fitparam_idx] - pdfratio_indices = list(range(self._ratio_values.shape[0])) - pdfratio_indices.pop(excluded_idx) - return np.prod(self._ratio_values[pdfratio_indices], axis=0) + @property + def dataset_idx(self): + """(read-only) The index of the dataset for which this + SourceWeightedPDFRatio instance is made. + """ + return self._dataset_idx + @property + def src_detsigyield_weights_service(self): + """(read-only) The instance of SrcDetSigYieldWeightsService providing + the source detector signal yield weights, i.e. the product of the + theoretical source weight with the detector signal yield. + """ + return self._src_detsigyield_weights_service -class PDFRatioFillMethod(object, metaclass=abc.ABCMeta): - """Abstract base class to implement a PDF ratio fill method. It can happen, - that there are empty background bins but where signal could possibly be. - A PDFRatioFillMethod implements what happens in such cases. - """ + @property + def pdfratio(self): + """(read-only) The PDFRatio instance that is used to calculate the + source weighted PDF ratio. + """ + return self._pdfratio - def __init__(self, *args, **kwargs): - super(PDFRatioFillMethod, self).__init__(*args, **kwargs) + def initialize_for_new_trial( + self, + tdm, + tl=None, + **kwargs): + """Initializes the PDFRatio instance for a new trial. It calls the + :meth:`~skyllh.core.pdfratio.PDFRatio.initialize_for_new_trial` method + of the :class:`~skyllh.core.pdfratio.PDFRatio` instance. - @abc.abstractmethod - def fill_ratios(self, ratios, sig_prob_h, bkg_prob_h, - sig_mask_mc_covered, sig_mask_mc_covered_zero_physics, - bkg_mask_mc_covered, bkg_mask_mc_covered_zero_physics): - """The fill_ratios method is supposed to fill the ratio bins (array) - with the signal / background division values. For bins (array elements), - where the division is undefined, e.g. due to zero background, the fill - method decides how to fill those bins. - - Note: Bins which have neither signal monte-carlo nor background - monte-carlo coverage, are undefined about their signal-ness or - background-ness by construction. + Parameters + ---------- + tdm : instance of TrialDataManager + The instance of TrialDataManager that holds the trial data. + tl : instance of TimeLord + The optional instance of TimeLord to measure timing information. + """ + self._pdfratio.initialize_for_new_trial( + tdm=tdm, + tl=tl, + **kwargs) + + def get_ratio( + self, + tdm, + src_params_recarray, + tl=None): + """Retrieves the PDF ratio value for each given trial data events (and + sources), given the given set of parameters. + + Note: + + This method uses the source detector signal yield weights service. + Hence, the + :meth:`skyllh.core.weights.SrcDetSigYieldWeightsService.calculate` + method needs to be called prior to calling this method. Parameters ---------- - ratios : ndarray of float - The multi-dimensional array for the final ratio bins. The shape is - the same as the sig_h and bkg_h ndarrays. - sig_prob_h : ndarray of float - The multi-dimensional array (histogram) holding the signal - probabilities. - bkg_prob_h : ndarray of float - The multi-dimensional array (histogram) holding the background - probabilities. - sig_mask_mc_covered : ndarray of bool - The mask array indicating which array elements of sig_prob_h have - monte-carlo coverage. - sig_mask_mc_covered_zero_physics : ndarray of bool - The mask array indicating which array elements of sig_prob_h have - monte-carlo coverage but don't have physics contribution. - bkg_mask_mc_covered : ndarray of bool - The mask array indicating which array elements of bkg_prob_h have - monte-carlo coverage. - In case of experimental data as background, this mask indicate where - (experimental data) background is available. - bkg_mask_mc_covered_zero_physics : ndarray of bool - The mask array ndicating which array elements of bkg_prob_h have - monte-carlo coverage but don't have physics contribution. - In case of experimental data as background, this mask contains only - False entries. + tdm : instance of TrialDataManager + The TrialDataManager instance holding the trial data events for + which the PDF ratio values should get calculated. + src_params_recarray : instance of numpy record ndarray + The (N_sources,)-shaped numpy record ndarray holding the parameter + names and values of the sources. + See the documentation of the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for more information. + tl : instance of TimeLord | None + The optional TimeLord instance that should be used to measure + timing information. Returns ------- - ratios : ndarray - The array holding the final ratio values. + ratios : instance of ndarray + The (N_selected_events,)-shaped 1d numpy ndarray of float holding + the PDF ratio value for each selected trial data event. """ - return ratios + (a_jk, a_jk_grads) = self._src_detsigyield_weights_service.get_weights() + a_k = a_jk[self._dataset_idx] -class Skylab2SkylabPDFRatioFillMethod(PDFRatioFillMethod): - """This PDF ratio fill method implements the exact same fill method as in - the skylab2 software named "skylab". It exists just for comparsion and - backward compatibility reasons. In general, it should not be used, because - it does not distinguish between bins with MC converage and physics model - contribution, and those with MC coverage and no physics model contribution! - """ - def __init__(self): - super(Skylab2SkylabPDFRatioFillMethod, self).__init__() - self.signallike_percentile = 99. + n_sources = len(a_k) + n_sel_events = tdm.n_selected_events - def fill_ratios(self, ratio, sig_prob_h, bkg_prob_h, - sig_mask_mc_covered, sig_mask_mc_covered_zero_physics, - bkg_mask_mc_covered, bkg_mask_mc_covered_zero_physics): - """Fills the ratio array. - """ - # Check if we have predicted background for the entire background MC - # range. - if(np.any(bkg_mask_mc_covered_zero_physics)): - raise ValueError('Some of the background bins have MC coverage but no physics background prediction. I don\'t know what to do in this case!') + A = np.sum(a_k) - sig_domain = sig_prob_h > 0 - bkg_domain = bkg_prob_h > 0 + R_ik = self._pdfratio.get_ratio( + tdm=tdm, + src_params_recarray=src_params_recarray, + tl=tl) + # The R_ik ndarray is (N_values,)-shaped. - ratio[sig_domain & bkg_domain] = sig_prob_h[sig_domain & bkg_domain] / bkg_prob_h[sig_domain & bkg_domain] + R_i = np.zeros((n_sel_events,), dtype=np.double) - ratio_value = np.percentile(ratio[ratio > 1.], self.signallike_percentile) - np.copyto(ratio, ratio_value, where=sig_domain & ~bkg_domain) + (src_idxs, evt_idxs) = tdm.src_evt_idxs + for k in range(n_sources): + src_mask = src_idxs == k + R_i[evt_idxs[src_mask]] += R_ik[src_mask] * a_k[k] + R_i /= A - return ratio + self._cache_R_ik = R_ik + self._cache_R_i = R_i -class MostSignalLikePDFRatioFillMethod(PDFRatioFillMethod): - """PDF ratio fill method to set the PDF ratio to the most signal like PDF - ratio for bins, where there is signal MC coverage but no background (MC) - coverage. - """ - def __init__(self, signallike_percentile=99.): - """Creates the PDF ratio fill method object for filling PDF ratio bins, - where there is signal MC coverage but no background (MC) coverage - with the most signal-like ratio value. + return R_i + + def get_gradient( + self, + tdm, + src_params_recarray, + fitparam_id, + tl=None): + """Retrieves the PDF ratio gradient for the parameter ``fitparam_id`` + for each trial data event, given the given set of parameters + ``src_params_recarray`` for each source. + + Note: + + This method requires that the get_ratio method has been called prior + to calling this method. Parameters ---------- - signallike_percentile : float in range [0., 100.], default 99. - The percentile of signal-like ratios, which should be taken as the - ratio value for ratios with no background probability. + tdm : instance of TrialDataManager + The TrialDataManager instance holding the trial data events for + which the PDF ratio gradient values should get calculated. + src_params_recarray : instance of numpy record ndarray + The (N_sources,)-shaped numpy record ndarray holding the parameter + names and values of the sources. + See the documentation of the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for more information. + fitparam_id : int + The ID of the global fit parameter for which the gradient should + get calculated. + tl : instance of TimeLord | None + The optional TimeLord instance that should be used to measure + timing information. + + Returns + ------- + gradient : instance of ndarray | 0 + The (N_selected_events,)-shaped 1d numpy ndarray of float holding + the PDF ratio gradient value for each trial data event. If the PDF + ratio does not depend on the given global fit parameter, 0 will + be returned. """ - super(MostSignalLikePDFRatioFillMethod, self).__init__() + (a_jk, a_jk_grads) = self._src_detsigyield_weights_service.get_weights() - self.signallike_percentile = signallike_percentile + a_k = a_jk[self._dataset_idx] + A = np.sum(a_k) - @property - def signallike_percentile(self): - """The percentile of signal-like ratios, which should be taken as the - ratio value for ratios with no background probability. This percentile - must be given as a float value in the range [0, 100] inclusively. - """ - return self._signallike_percentile - @signallike_percentile.setter - def signallike_percentile(self, value): - if(not isinstance(value, float)): - raise TypeError('The signallike_percentile property must be of type float!') - if(value < 0. or value > 100.): - raise ValueError('The value of the signallike_percentile property must be in the range [0, 100]!') - self._signallike_percentile = value - - def fill_ratios(self, ratio, sig_prob_h, bkg_prob_h, - sig_mask_mc_covered, sig_mask_mc_covered_zero_physics, - bkg_mask_mc_covered, bkg_mask_mc_covered_zero_physics): - """Fills the ratio array. - """ - # Check if we have predicted background for the entire background MC - # range. - if(np.any(bkg_mask_mc_covered_zero_physics)): - raise ValueError('Some of the background bins have MC coverage but no physics background prediction. I don\'t know what to do in this case!') - - # Fill the bins where we have signal and background MC coverage. - mask_sig_and_bkg_mc_covered = sig_mask_mc_covered & bkg_mask_mc_covered - ratio[mask_sig_and_bkg_mc_covered] = sig_prob_h[mask_sig_and_bkg_mc_covered] / bkg_prob_h[mask_sig_and_bkg_mc_covered] - - # Calculate the ratio value, which should be used for ratio bins, where - # we have signal MC coverage but no background MC coverage. - ratio_value = np.percentile(ratio[ratio > 1.], self.signallike_percentile) - mask_sig_but_notbkg_mc_covered = sig_mask_mc_covered & ~bkg_mask_mc_covered - np.copyto(ratio, ratio_value, where=mask_sig_but_notbkg_mc_covered) - - return ratio - - -class MinBackgroundLikePDFRatioFillMethod(PDFRatioFillMethod): - """PDF ratio fill method to set the PDF ratio to the minimal background like - value for bins, where there is signal MC coverage but no background (MC) - coverage. - """ - def __init__(self): - """Creates the PDF ratio fill method object for filling PDF ratio bins, - where there is signal MC coverage but no background (MC) coverage - with the minimal background-like ratio value. - """ - super(MinBackgroundLikePDFRatioFillMethod, self).__init__() + n_sources = a_jk.shape[1] + n_sel_events = tdm.n_selected_events - def fill_ratios(self, ratio, sig_prob_h, bkg_prob_h, - sig_mask_mc_covered, sig_mask_mc_covered_zero_physics, - bkg_mask_mc_covered, bkg_mask_mc_covered_zero_physics): - """Fills the ratio array. - """ - # Check if we have predicted background for the entire background MC - # range. - if(np.any(bkg_mask_mc_covered_zero_physics)): - raise ValueError('Some of the background bins have MC coverage but no physics background prediction. I don\'t know what to do in this case!') + if fitparam_id not in a_jk_grads: + a_k_grad = 0 + dAdp = 0 + else: + a_k_grad = a_jk_grads[fitparam_id][self._dataset_idx] + dAdp = np.sum(a_k_grad) + + R_ik_grad = self._pdfratio.get_gradient( + tdm=tdm, + src_params_recarray=src_params_recarray, + fitparam_id=fitparam_id) + # R_ik_grad is a (N_values,)-shaped ndarray or 0. + + if (type(a_k_grad) == int) and (a_k_grad == 0) and\ + (type(R_ik_grad) == int) and (R_ik_grad == 0): + return 0 - # Fill the bins where we have signal and background MC coverage. - mask_sig_and_bkg_mc_covered = sig_mask_mc_covered & bkg_mask_mc_covered - ratio[mask_sig_and_bkg_mc_covered] = sig_prob_h[mask_sig_and_bkg_mc_covered] / bkg_prob_h[mask_sig_and_bkg_mc_covered] + R_i_grad = -self._cache_R_i * dAdp - # Calculate the minimal background-like value. - min_bkg_prob = np.min(bkg_prob_h[bkg_mask_mc_covered]) + src_sum_i = np.zeros((n_sel_events,), dtype=np.double) - # Set the ratio using the minimal background probability where we - # have signal MC coverage but no background (MC) coverage. - mask_sig_but_notbkg_mc_covered = sig_mask_mc_covered & ~bkg_mask_mc_covered - ratio[mask_sig_but_notbkg_mc_covered] = sig_prob_h[mask_sig_but_notbkg_mc_covered] / min_bkg_prob + (src_idxs, evt_idxs) = tdm.src_evt_idxs + for k in range(n_sources): + src_mask = src_idxs == k + src_evt_idxs = evt_idxs[src_mask] + if isinstance(a_k_grad, np.ndarray): + src_sum_i[src_evt_idxs] +=\ + a_k_grad[k] * self._cache_R_ik[src_mask] + if isinstance(R_ik_grad, np.ndarray): + src_sum_i[src_evt_idxs] +=\ + a_k[k] * R_ik_grad[src_mask] - return ratio + R_i_grad += src_sum_i + R_i_grad /= A + return R_i_grad -class SigOverBkgPDFRatio(PDFRatio): + +class SigOverBkgPDFRatio( + PDFRatio): """This class implements a generic signal-over-background PDF ratio for a signal and a background PDF instance. It takes a signal PDF of type *pdf_type* and a background PDF of type *pdf_type* and calculates the PDF ratio. """ - def __init__(self, sig_pdf, bkg_pdf, pdf_type=None, same_axes=True, - zero_bkg_ratio_value=1., *args, **kwargs): + def __init__( + self, + sig_pdf, + bkg_pdf, + same_axes=True, + zero_bkg_ratio_value=1., + **kwargs): """Creates a new signal-over-background PDF ratio instance. Parameters @@ -544,9 +691,6 @@ def __init__(self, sig_pdf, bkg_pdf, pdf_type=None, same_axes=True, The instance of the signal PDF. bkg_pdf : class instance derived from `pdf_type`, IsBackgroundPDF The instance of the background PDF. - pdf_type : type | None - The python type of the PDF object for which the PDF ratio is for. - If set to None, the default class ``PDF`` will be used. same_axes : bool Flag if the signal and background PDFs are supposed to have the same axes. Default is True. @@ -554,44 +698,40 @@ def __init__(self, sig_pdf, bkg_pdf, pdf_type=None, same_axes=True, The value of the PDF ratio to take when the background PDF value is zero. This is to avoid division by zero. Default is 1. """ - if(pdf_type is None): - pdf_type = PDF - - super(SigOverBkgPDFRatio, self).__init__( - pdf_type=pdf_type, *args, **kwargs) + super().__init__( + sig_param_names=sig_pdf.param_set.params_name_list, + bkg_param_names=bkg_pdf.param_set.params_name_list, + **kwargs) self.sig_pdf = sig_pdf self.bkg_pdf = bkg_pdf # Check that the PDF axes ranges are the same for the signal and # background PDFs. - if(same_axes and (not sig_pdf.axes.is_same_as(bkg_pdf.axes))): - raise ValueError('The signal and background PDFs do not have the ' - 'same axes.') + if same_axes and not sig_pdf.axes.is_same_as(bkg_pdf.axes): + raise ValueError( + 'The signal and background PDFs do not have the same axes!') self.zero_bkg_ratio_value = zero_bkg_ratio_value # Define cache member variables to calculate gradients efficiently. - self._cache_trial_data_state_id = None - self._cache_params_hash = None - self._cache_sigprob = None - self._cache_bkgprob = None - self._cache_siggrads = None - self._cache_bkggrads = None + self._cache_sig_pd = None + self._cache_bkg_pd = None + self._cache_sig_grads = None + self._cache_bkg_grads = None @property def sig_pdf(self): """The signal PDF object used to create the PDF ratio. """ return self._sig_pdf + @sig_pdf.setter def sig_pdf(self, pdf): - if(not isinstance(pdf, self.pdf_type)): - raise TypeError('The sig_pdf property must be an instance of ' - '%s!'%(typename(self.pdf_type))) - if(not isinstance(pdf, IsSignalPDF)): - raise TypeError('The sig_pdf property must be an instance of ' - 'IsSignalPDF!') + if not isinstance(pdf, IsSignalPDF): + raise TypeError( + 'The sig_pdf property must be an instance of IsSignalPDF! ' + f'Its type is "{classname(pdf)}".') self._sig_pdf = pdf @property @@ -599,14 +739,13 @@ def bkg_pdf(self): """The background PDF object used to create the PDF ratio. """ return self._bkg_pdf + @bkg_pdf.setter def bkg_pdf(self, pdf): - if(not isinstance(pdf, self.pdf_type)): - raise TypeError('The bkg_pdf property must be an instance of ' - '%s!'%(typename(self.pdf_type))) - if(not isinstance(pdf, IsBackgroundPDF)): - raise TypeError('The bkg_pdf property must be an instance of ' - 'IsBackgroundPDF!') + if not isinstance(pdf, IsBackgroundPDF): + raise TypeError( + 'The bkg_pdf property must be an instance of IsBackgroundPDF! ' + f'Its type is "{classname(pdf)}".') self._bkg_pdf = pdf @property @@ -615,18 +754,46 @@ def zero_bkg_ratio_value(self): is zero. This is to avoid division by zero. """ return self._zero_bkg_ratio_value + @zero_bkg_ratio_value.setter def zero_bkg_ratio_value(self, v): - v = float_cast(v, 'The zero_bkg_ratio_value must be castable into a ' - 'float!') + v = float_cast( + v, + 'The zero_bkg_ratio_value must be castable to type float!') self._zero_bkg_ratio_value = v - def _get_signal_fitparam_names(self): - """Returns the list of fit parameter names the signal PDF depends on. + def initialize_for_new_trial( + self, + tdm, + tl=None, + **kwargs): + """Initializes the PDFRatio instance for a new trial. It calls the + :meth:`~skyllh.core.pdf.PDF.assert_is_valid_for_trial_data` of the + signal and background :class:`~skyllh.core.pdf.PDF` instances. """ - return self._sig_pdf.param_set.floating_param_name_list - - def get_ratio(self, tdm, params=None, tl=None): + self._sig_pdf.initialize_for_new_trial( + tdm=tdm, + tl=tl, + **kwargs) + self._sig_pdf.assert_is_valid_for_trial_data( + tdm=tdm, + tl=tl, + **kwargs) + + self._bkg_pdf.initialize_for_new_trial( + tdm=tdm, + tl=tl, + **kwargs) + self._bkg_pdf.assert_is_valid_for_trial_data( + tdm=tdm, + tl=tl, + **kwargs) + + def get_ratio( + self, + tdm, + src_params_recarray, + tl=None): """Calculates the PDF ratio for the given trial events. Parameters @@ -634,264 +801,262 @@ def get_ratio(self, tdm, params=None, tl=None): tdm : instance of TrialDataManager The TrialDataManager instance holding the trial data events for which the PDF ratio values should be calculated. - params : dict | None - The dictionary holding the parameter names and values for which the - probability ratio should get calculated. - This can be ``None``, if the signal and background PDFs do not - depend on any parameters. - tl : TimeLord instance | None + src_params_recarray : instance of numpy record ndarray + The (N_sources,)-shaped numpy record ndarray holding the local + parameter names and values of the sources. + See the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for more information. + tl : instance of TimeLord | None The optional TimeLord instance that should be used to measure timing information. Returns ------- - ratios : (N_events)-shaped numpy ndarray - The ndarray holding the probability ratio for each event (and each - source). The dimensionality of the returned ndarray depends on the - dimensionality of the probability ndarray returned by the - ``get_prob`` method of the signal PDF object. - """ - with TaskTimer(tl, 'Get sig prob.'): - (sigprob, self._cache_siggrads) = self._sig_pdf.get_prob( - tdm, params, tl=tl) - with TaskTimer(tl, 'Get bkg prob.'): - (bkgprob, self._cache_bkggrads) = self._bkg_pdf.get_prob( - tdm, params, tl=tl) - - with TaskTimer(tl, 'Calc PDF ratios.'): - # Select only the events, where background pdf is greater than zero. - m = (bkgprob > 0) - ratios = np.full_like(sigprob, self._zero_bkg_ratio_value) - ratios[m] = sigprob[m] / bkgprob[m] - - # Store the current state of parameter values and trial data, so that - # the get_gradient method can verify the consistency of the signal and - # background probabilities and gradients. - self._cache_trial_data_state_id = tdm.trial_data_state_id - self._cache_params_hash = make_params_hash(params) - self._cache_sigprob = sigprob - self._cache_bkgprob = bkgprob + ratios : instance of ndarray + The (N_values,)-shaped numpy ndarray holding the probability density + ratio for each event and source. + """ + with TaskTimer(tl, 'Get sig probability densities and grads.'): + (self._cache_sig_pd, self._cache_sig_grads) = self._sig_pdf.get_pd( + tdm=tdm, + params_recarray=src_params_recarray, + tl=tl) + with TaskTimer(tl, 'Get bkg probability densities and grads.'): + (self._cache_bkg_pd, self._cache_bkg_grads) = self._bkg_pdf.get_pd( + tdm=tdm, + params_recarray=None, + tl=tl) + + with TaskTimer(tl, 'Calculate PDF ratios.'): + # Select only the events, where the background pdf is greater than + # zero. + ratios = np.full_like(self._cache_sig_pd, self._zero_bkg_ratio_value) + m = (self._cache_bkg_pd > 0) + (m, bkg_pd) = tdm.broadcast_selected_events_arrays_to_values_arrays( + (m, self._cache_bkg_pd)) + np.divide( + self._cache_sig_pd, + bkg_pd, + where=m, + out=ratios) return ratios - def get_gradient(self, tdm, params, fitparam_name): - """Retrieves the gradient of the PDF ratio w.r.t. the given fit - parameter. This method must be called after the ``get_ratio`` method. + def get_gradient( + self, + tdm, + src_params_recarray, + fitparam_id, + tl=None): + """Retrieves the gradient of the PDF ratio w.r.t. the given parameter. + + Note: + + This method uses cached values from the + :meth:`~skyllh.core.pdfratio.SigOverBkgPDFRatio.get_ratio` method. + Hence, that method needs to be called prior to this method. Parameters ---------- - tdm : TrialDataManager instance - The instance of TrialDataManager that should be used to get the - trial data from. - params : dict - The dictionary with the parameter names and values. - fitparam_name : str - The name of the fit parameter for which the gradient should + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial data. + src_params_recarray : instance of numpy record ndarray | None + The (N_models,)-shaped numpy record ndarray holding the parameter + names and values of the models. + See :meth:`skyllh.core.pdf.PDF.get_pd` for more information. + This can be ``None``, if the signal and background PDFs do not + depend on any parameters. + fitparam_id : int + The ID of the global fit parameter for which the gradient should get calculated. + tl : instance of TimeLord | None + The optional TimeLord instance that should be used to measure + timing information. Returns ------- - gradient : (N_events,)-shaped 1d numpy ndarray of float - The PDF ratio gradient value for each trial event. + grad : instance of ndarray + The (N_values,)-shaped 1d numpy ndarray of float holding the PDF + ratio gradient value for each source and trial event. """ - if((tdm.trial_data_state_id != self._cache_trial_data_state_id) or - (make_params_hash(params) != self._cache_params_hash)): - raise RuntimeError('The get_ratio method must be called prior to ' - 'the get_gradient method!') - # Create the 1D return array for the gradient. - grad = np.zeros((tdm.n_selected_events,), dtype=np.float64) + grad = np.zeros_like(self._cache_sig_pd, dtype=np.float64) - # Calculate the gradient for the given fit parameter. + # Calculate the gradient for the given parameter. # There are four cases: - # 1) Neither the signal nor the background PDF depend on the fit + # 1) Neither the signal nor the background PDF depend on the # parameter. - # 2) Only the signal PDF depends on the fit parameter. - # 3) Only the background PDF depends on the fit parameter. - # 4) Both, the signal and the background PDF depend on the fit + # 2) Only the signal PDF depends on the parameter. + # 3) Only the background PDF depends on the parameter. + # 4) Both, the signal and the background PDF depend on the # parameter. - sig_pdf_param_set = self._sig_pdf.param_set - bkg_pdf_param_set = self._bkg_pdf.param_set + sig_dep = fitparam_id in self._cache_sig_grads + bkg_dep = fitparam_id in self._cache_bkg_grads - sig_dep = sig_pdf_param_set.has_floating_param(fitparam_name) - bkg_dep = bkg_pdf_param_set.has_floating_param(fitparam_name) + if (not sig_dep) and (not bkg_dep): + # Case 1. Return zeros. + return grad + + m = self._cache_bkg_pd > 0 + b = self._cache_bkg_pd + + (m, b) = tdm.broadcast_selected_events_arrays_to_values_arrays( + (m, b)) - if(sig_dep and (not bkg_dep)): + if sig_dep and not bkg_dep: # Case 2, which should be the most common case. - # Get the signal grad idx for that fit parameter. - sig_pidx = sig_pdf_param_set.get_floating_pidx(fitparam_name) - bkgprob = self._cache_bkgprob - m = bkgprob > 0 - grad[m] = self._cache_siggrads[sig_pidx][m] / bkgprob[m] - return grad - if((not sig_dep) and (not bkg_dep)): - # Case 1. Returns zeros. + grad[m] = self._cache_sig_grads[fitparam_id][m] / b[m] return grad - if(sig_dep and bkg_dep): + bgrad = self._cache_bkg_grads[fitparam_id] + (bgrad,) = tdm.broadcast_selected_events_arrays_to_values_arrays( + (bgrad,)) + + if sig_dep and bkg_dep: # Case 4. - sig_pidx = sig_pdf_param_set.get_floating_pidx(fitparam_name) - bkg_pidx = bkg_pdf_param_set.get_floating_pidx(fitparam_name) - m = self._cache_bkgprob > 0 - s = self._cache_sigprob[m] - b = self._cache_bkgprob[m] - sgrad = self._cache_siggrads[sig_pidx][m] - bgrad = self._cache_bkggrads[bkg_pidx][m] + s = self._cache_sig_pd + sgrad = self._cache_sig_grads[fitparam_id] + # Make use of quotient rule of differentiation. - grad[m] = (sgrad * b - bgrad * s) / b**2 + grad[m] = (sgrad[m] * b[m] - bgrad[m] * s[m]) / b[m]**2 return grad # Case 3. - bkg_pidx = bkg_pdf_param_set.get_floating_pidx(fitparam_name) - bkgprob = self._cache_bkgprob - m = bkgprob > 0 - grad[m] = (-self._cache_sigprob[m] / bkgprob[m]**2 * - self._cache_bkggrads[bkg_pidx][m]) - return grad - - -class SpatialSigOverBkgPDFRatio(SigOverBkgPDFRatio): - """This class implements a signal-over-background PDF ratio for spatial - PDFs. It takes a signal PDF of type SpatialPDF and a background PDF of type - SpatialPDF and calculates the PDF ratio. - """ - def __init__(self, sig_pdf, bkg_pdf, *args, **kwargs): - """Creates a new signal-over-background PDF ratio instance for spatial - PDFs. - - Parameters - ---------- - sig_pdf : class instance derived from SpatialPDF, IsSignalPDF - The instance of the spatial signal PDF. - bkg_pdf : class instance derived from SpatialPDF, IsBackgroundPDF - The instance of the spatial background PDF. - """ - super(SpatialSigOverBkgPDFRatio, self).__init__(pdf_type=SpatialPDF, - sig_pdf=sig_pdf, bkg_pdf=bkg_pdf, *args, **kwargs) + grad[m] = -self._cache_sig_pd[m] / b[m]**2 * bgrad[m] - # Make sure that the PDFs have two dimensions, i.e. RA and Dec. - if(not sig_pdf.ndim == 2): - raise ValueError('The spatial signal PDF must have two dimensions! ' - 'Currently it has %d!'%(sig_pdf.ndim)) + return grad -class SigSetOverBkgPDFRatio(PDFRatio): - """Class for a PDF ratio class that takes a PDFSet of PDF type - *pdf_type* as signal PDF and a PDF of type *pdf_type* as background PDF. - The signal PDF depends on signal fit parameters and a interpolation method - defines how the PDF ratio gets interpolated between the fit parameter +class SigSetOverBkgPDFRatio( + PDFRatio): + """Class for a PDF ratio class that takes a PDFSet as signal PDF and a PDF + as background PDF. + The signal PDF depends on signal parameters and an interpolation method + defines how the PDF ratio gets interpolated between the parameter grid values. """ - def __init__(self, pdf_type, signalpdfset, backgroundpdf, - interpolmethod=None, *args, **kwargs): + def __init__( + self, + sig_pdf_set, + bkg_pdf, + interpolmethod_cls=None, + **kwargs): """Constructor called by creating an instance of a class which is derived from this PDFRatio class. Parameters ---------- - pdf_type : type - The Python type of the PDF object for which the PDF ratio is for. - signalpdfset : class instance derived from PDFSet (for PDF type - ``pdf_type``), and IsSignalPDF + sig_pdf_set : instance of PDFSet and instance of IsSignalPDF The PDF set, which provides signal PDFs for a set of - discrete signal fit parameters. - backgroundpdf : class instance derived from ``pdf_type``, and - IsBackgroundPDF + discrete signal parameter values. + bkg_pdf : instance of PDF and instance of IsBackgroundPDF The background PDF instance. - interpolmethod : class of GridManifoldInterpolationMethod | None - The class implementing the fit parameter interpolation method for - the PDF ratio manifold grid. - If set to None (default), the - Parabola1DGridManifoldInterpolationMethod will be used for - 1-dimensional parameter manifolds. - """ - # Call super to allow for multiple class inheritance. - super(SigSetOverBkgPDFRatio, self).__init__(pdf_type, *args, **kwargs) - - self.signalpdfset = signalpdfset - self.backgroundpdf = backgroundpdf - - # Define the default fit parameter interpolation method. The default - # depends on the dimensionality of the fit parameter manifold. - if(interpolmethod is None): - ndim = signalpdfset.fitparams_grid_set.ndim - if(ndim == 1): - interpolmethod = Parabola1DGridManifoldInterpolationMethod - else: - raise ValueError('There is no default fit parameter manifold grid interpolation method available for %d dimensions!'%(ndim)) - self.interpolmethod = interpolmethod + interpolmethod_cls : class of GridManifoldInterpolationMethod | None + The class implementing the parameter interpolation method for + the PDF ratio manifold grid. If set to ``None`` (default), the + :class:`skyllh.core.interpolate.Parabola1DGridManifoldInterpolationMethod` + will be used for 1-dimensional parameter manifolds. + """ + super().__init__( + sig_param_names=sig_pdf_set.param_grid_set.params_name_list, + bkg_param_names=bkg_pdf.param_set.params_name_list, + **kwargs) + + self.sig_pdf_set = sig_pdf_set + self.bkg_pdf = bkg_pdf - # Generate the list of signal fit parameter names once here. - self._cache_signal_fitparam_name_list = self.signal_fitparam_names + # Define the default parameter interpolation method. The default + # depends on the dimensionality of the parameter manifold. + if interpolmethod_cls is None: + ndim = self._sig_pdf_set.param_grid_set.ndim + if ndim == 1: + interpolmethod_cls = Parabola1DGridManifoldInterpolationMethod + else: + raise ValueError( + 'There is no default parameter manifold grid ' + f'interpolation method class available for {ndim} ' + 'dimensions!') + self.interpolmethod_cls = interpolmethod_cls @property - def backgroundpdf(self): - """The background PDF object, derived from ``pdf_type`` and - IsBackgroundPDF. + def bkg_pdf(self): + """The background PDF instance, derived from IsBackgroundPDF. """ - return self._bkgpdf - @backgroundpdf.setter - def backgroundpdf(self, pdf): - if(not (isinstance(pdf, self.pdf_type) and isinstance(pdf, IsBackgroundPDF))): - raise TypeError('The backgroundpdf property must be an object which is derived from %s and IsBackgroundPDF!'%(typename(self.pdf_type))) - self._bkgpdf = pdf + return self._bkg_pdf + + @bkg_pdf.setter + def bkg_pdf(self, pdf): + if not isinstance(pdf, IsBackgroundPDF): + raise TypeError( + 'The bkg_pdf property must be an instance derived from ' + 'IsBackgroundPDF! ' + f'Its current type is {classname(pdf)}.') + self._bkg_pdf = pdf @property - def signalpdfset(self): - """The signal PDFSet object for ``pdf_type`` PDF objects. + def sig_pdf_set(self): + """The signal PDFSet instance, derived from IsSignalPDF. """ - return self._sigpdfset - @signalpdfset.setter - def signalpdfset(self, pdfset): - if(not (isinstance(pdfset, PDFSet) and isinstance(pdfset, IsSignalPDF) and issubclass(pdfset.pdf_type, self.pdf_type))): - raise TypeError('The signalpdfset property must be an object which is derived from PDFSet and IsSignalPDF and whose pdf_type property is a subclass of %s!'%(typename(self.pdf_type))) - self._sigpdfset = pdfset + return self._sig_pdf_set + + @sig_pdf_set.setter + def sig_pdf_set(self, pdfset): + if not (isinstance(pdfset, PDFSet) and + isinstance(pdfset, IsSignalPDF)): + raise TypeError( + 'The sig_pdf_set property must be a class instance which is ' + 'derived from PDFSet and IsSignalPDF! ' + f'Its current type is {classname(pdfset)}.') + self._sig_pdf_set = pdfset @property - def interpolmethod(self): + def interpolmethod_cls(self): """The class derived from GridManifoldInterpolationMethod - implementing the interpolation of the fit parameter manifold. - """ - return self._interpolmethod - @interpolmethod.setter - def interpolmethod(self, cls): - if(not issubclass(cls, GridManifoldInterpolationMethod)): - raise TypeError('The interpolmethod property must be a sub-class ' - 'of GridManifoldInterpolationMethod!') - self._interpolmethod = cls - - def _get_signal_fitparam_names(self): - """Returns the list of signal fit parameter names this PDF ratio is a - function of. The list is taken from the fit parameter grid set of the - signal PDFSet object. By construction this parameter grid set defines - the signal fit parameters. + implementing the interpolation of the parameter manifold. """ - return self._sigpdfset.fitparams_grid_set.parameter_names - - def convert_signal_fitparam_name_into_index(self, signal_fitparam_name): - """Converts the given signal fit parameter name into the parameter - index, i.e. the position of parameter in the signal parameter grid set. + return self._interpolmethod_cls + + @interpolmethod_cls.setter + def interpolmethod_cls(self, cls): + if not issubclass(cls, GridManifoldInterpolationMethod): + raise TypeError( + 'The interpolmethod_cls property must be a sub-class ' + 'of GridManifoldInterpolationMethod! ' + f'Its current type is {classname(cls)}.') + self._interpolmethod_cls = cls + + def initialize_for_new_trial( + self, + tdm, + tl=None, + **kwargs): + """Initializes the PDFRatio instance for a new trial. It calls the + :meth:`~skyllh.core.pdf.PDF.assert_is_valid_for_trial_data` of the + signal :class:`~skyllh.core.pdf.PDFSet` instance and the background + :class:`~skyllh.core.pdf.PDF` instance. Parameters ---------- - signal_fitparam_name : str - The name of the signal fit parameter. - - Returns - ------- - index : int - The index of the signal fit parameter. + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial data. + tl : instance of TimeLord | None + The optional instance of TimeLord for measuring timing information. """ - # If there is only one signal fit parameter, we just return index 0. - if(len(self._cache_signal_fitparam_name_list) == 1): - return 0 - - # At this point we have to loop through the list and do name - # comparisons. - for (index, name) in enumerate(self._cache_signal_fitparam_name_list): - if(name == signal_fitparam_name): - return index - - # At this point there is no parameter defined. - raise KeyError('The PDF ratio "%s" has no signal fit parameter named ' - '"%s"!'%(classname(self), signal_fitparam_name)) + self._sig_pdf_set.initialize_for_new_trial( + tdm=tdm, + tl=tl, + **kwargs) + self._sig_pdf_set.assert_is_valid_for_trial_data( + tdm=tdm, + tl=tl, + **kwargs) + + self._bkg_pdf.initialize_for_new_trial( + tdm=tdm, + tl=tl, + **kwargs) + self._bkg_pdf.assert_is_valid_for_trial_data( + tdm=tdm, + tl=tl, + **kwargs) diff --git a/skyllh/core/pdfratio_fill.py b/skyllh/core/pdfratio_fill.py new file mode 100644 index 0000000000..f41cdecf2f --- /dev/null +++ b/skyllh/core/pdfratio_fill.py @@ -0,0 +1,277 @@ +# -*- coding: utf-8 -*- + +"""This module defines the interface and provides particular implementations +of PDF ratio fill methods. For a binned PDF ratio it could happen, that some +bins don't have background information but signal information is available. +Hence, a ratio cannot be computed for those bins. The PDF ratio fill method +specifies how those bins should get filled. +""" + +import abc +import numpy as np + +from skyllh.core.py import ( + float_cast, +) + + +class PDFRatioFillMethod(object, metaclass=abc.ABCMeta): + """Abstract base class to implement a PDF ratio fill method. It can happen, + that there are empty background bins but where signal could possibly be. + A PDFRatioFillMethod implements what happens in such cases. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @abc.abstractmethod + def __call__( + self, + ratios, + sig_pd_h, + bkg_pd_h, + sig_mask_mc_covered, + sig_mask_mc_covered_zero_physics, + bkg_mask_mc_covered, + bkg_mask_mc_covered_zero_physics): + """The __call__ method is supposed to fill the ratio bins (array) + with the signal / background ratio values. For bins (array elements), + where the division is undefined, e.g. due to zero background, the fill + method decides how to fill those bins. + + Note: Bins which have neither signal monte-carlo nor background + monte-carlo coverage, are undefined about their signal-ness or + background-ness by construction. + + Parameters + ---------- + ratios : ndarray of float + The multi-dimensional array for the final ratio bins. The shape is + the same as the sig_h and bkg_h ndarrays. + sig_pd_h : ndarray of float + The multi-dimensional array (histogram) holding the signal + probability densities. + bkg_pd_h : ndarray of float + The multi-dimensional array (histogram) holding the background + probability densities. + sig_mask_mc_covered : ndarray of bool + The mask array indicating which array elements of sig_pd_h have + monte-carlo coverage. + sig_mask_mc_covered_zero_physics : ndarray of bool + The mask array indicating which array elements of sig_pd_h have + monte-carlo coverage but don't have physics contribution. + bkg_mask_mc_covered : ndarray of bool + The mask array indicating which array elements of bkg_pd_h have + monte-carlo coverage. + In case of experimental data as background, this mask indicate where + (experimental data) background is available. + bkg_mask_mc_covered_zero_physics : ndarray of bool + The mask array ndicating which array elements of bkg_pd_h have + monte-carlo coverage but don't have physics contribution. + In case of experimental data as background, this mask contains only + False entries. + + Returns + ------- + ratios : ndarray + The array holding the final ratio values. + """ + return ratios + + +class Skylab2SkylabPDFRatioFillMethod(PDFRatioFillMethod): + """This PDF ratio fill method implements the exact same fill method as in + the skylab2 software named "skylab". It exists just for comparsion and + backward compatibility reasons. In general, it should not be used, because + it does not distinguish between bins with MC converage and physics model + contribution, and those with MC coverage and no physics model contribution! + """ + def __init__(self, **kwargs): + super().__init__(**kwargs) + + self.signallike_percentile = 99. + + @property + def signallike_percentile(self): + """The percentile of signal-like ratios, which should be taken as the + ratio value for ratios with no background probability. This percentile + must be given as a float value in the range [0, 100] inclusively. + """ + return self._signallike_percentile + + @signallike_percentile.setter + def signallike_percentile(self, value): + value = float_cast( + value, + 'The value for the signallike_percentile property must be castable ' + 'to type float!') + if (value < 0) or (value > 100): + raise ValueError( + f'The value "{value}" of the signallike_percentile property ' + 'must be in the range [0, 100]!') + self._signallike_percentile = value + + def __call__( + self, + ratio, + sig_pd_h, + bkg_pd_h, + sig_mask_mc_covered, + sig_mask_mc_covered_zero_physics, + bkg_mask_mc_covered, + bkg_mask_mc_covered_zero_physics): + """Fills the ratio array ``ratio``. + For more information see the documentation of + :meth:`skyllh.core.pdfratio_fill.PDFRatioFillMethod.__call__`. + """ + # Check if we have predicted background for the entire background MC + # range. + if np.any(bkg_mask_mc_covered_zero_physics): + raise ValueError( + 'Some of the background bins have MC coverage but no physics ' + 'background prediction. I don\'t know what to do in this case!') + + sig_domain = sig_pd_h > 0 + bkg_domain = bkg_pd_h > 0 + + sig_bkg_domain = sig_domain & bkg_domain + + ratio[sig_bkg_domain] = ( + sig_pd_h[sig_bkg_domain] / bkg_pd_h[sig_bkg_domain] + ) + + ratio_value = np.percentile( + ratio[ratio > 1.], self._signallike_percentile) + np.copyto(ratio, ratio_value, where=sig_domain & ~bkg_domain) + + return ratio + + +class MostSignalLikePDFRatioFillMethod(PDFRatioFillMethod): + """PDF ratio fill method to set the PDF ratio to the most signal like PDF + ratio for bins, where there is signal but no background coverage. + """ + def __init__(self, signallike_percentile=99., **kwargs): + """Creates the PDF ratio fill method object for filling PDF ratio bins, + where there is signal MC coverage but no background (MC) coverage + with the most signal-like ratio value. + + Parameters + ---------- + signallike_percentile : float in range [0., 100.], default 99. + The percentile of signal-like ratios, which should be taken as the + ratio value for ratios with no background probability. + """ + super().__init__(**kwargs) + + self.signallike_percentile = signallike_percentile + + @property + def signallike_percentile(self): + """The percentile of signal-like ratios, which should be taken as the + ratio value for ratios with no background probability. This percentile + must be given as a float value in the range [0, 100] inclusively. + """ + return self._signallike_percentile + + @signallike_percentile.setter + def signallike_percentile(self, value): + value = float_cast( + value, + 'The value for the signallike_percentile property must be castable ' + 'to type float!') + if (value < 0) or (value > 100): + raise ValueError( + f'The value "{value}" of the signallike_percentile property ' + 'must be in the range [0, 100]!') + self._signallike_percentile = value + + def __call__( + self, + ratio, + sig_pd_h, + bkg_pd_h, + sig_mask_mc_covered, + sig_mask_mc_covered_zero_physics, + bkg_mask_mc_covered, + bkg_mask_mc_covered_zero_physics): + """Fills the ratio array ``ratio``. + For more information see the documentation of + :meth:`skyllh.core.pdfratio_fill.PDFRatioFillMethod.__call__`. + """ + # Check if we have predicted background for the entire background MC + # range. + if np.any(bkg_mask_mc_covered_zero_physics): + raise ValueError( + 'Some of the background bins have MC coverage but no physics ' + 'background prediction. I don\'t know what to do in this case!') + + # Fill the bins where we have signal and background MC coverage. + mask_sig_and_bkg_mc_covered = sig_mask_mc_covered & bkg_mask_mc_covered + ratio[mask_sig_and_bkg_mc_covered] = ( + sig_pd_h[mask_sig_and_bkg_mc_covered] / + bkg_pd_h[mask_sig_and_bkg_mc_covered] + ) + + # Calculate the ratio value, which should be used for ratio bins, where + # we have signal MC coverage but no background MC coverage. + ratio_value = np.percentile( + ratio[ratio > 1.], self._signallike_percentile) + mask_sig_but_notbkg_mc_covered = ( + sig_mask_mc_covered & ~bkg_mask_mc_covered + ) + np.copyto(ratio, ratio_value, where=mask_sig_but_notbkg_mc_covered) + + return ratio + + +class MinBackgroundLikePDFRatioFillMethod(PDFRatioFillMethod): + """PDF ratio fill method to set the PDF ratio to the minimal background like + value for bins, where there is signal but no background coverage. + """ + def __init__(self, **kwargs): + """Creates the PDF ratio fill method object for filling PDF ratio bins, + where there is signal MC coverage but no background (MC) coverage + with the minimal background-like ratio value. + """ + super().__init__(**kwargs) + + def __call__( + self, + ratio, + sig_pd_h, + bkg_pd_h, + sig_mask_mc_covered, + sig_mask_mc_covered_zero_physics, + bkg_mask_mc_covered, + bkg_mask_mc_covered_zero_physics): + """Fills the ratio array ``ratio``. + For more information see the documentation of + :meth:`skyllh.core.pdfratio_fill.PDFRatioFillMethod.__call__`. + """ + # Check if we have predicted background for the entire background MC + # range. + if np.any(bkg_mask_mc_covered_zero_physics): + raise ValueError( + 'Some of the background bins have MC coverage but no physics ' + 'background prediction. I don\'t know what to do in this case!') + + # Fill the bins where we have signal and background MC coverage. + mask_sig_and_bkg_mc_covered = sig_mask_mc_covered & bkg_mask_mc_covered + ratio[mask_sig_and_bkg_mc_covered] = ( + sig_pd_h[mask_sig_and_bkg_mc_covered] / + bkg_pd_h[mask_sig_and_bkg_mc_covered] + ) + + # Calculate the minimal background-like value. + min_bkg_prob = np.min(bkg_pd_h[bkg_mask_mc_covered]) + + # Set the ratio using the minimal background probability where we + # have signal MC coverage but no background (MC) coverage. + mask_sig_but_notbkg_mc_covered = ( + sig_mask_mc_covered & ~bkg_mask_mc_covered + ) + ratio[mask_sig_but_notbkg_mc_covered] =\ + sig_pd_h[mask_sig_but_notbkg_mc_covered] / min_bkg_prob + + return ratio diff --git a/skyllh/core/progressbar.py b/skyllh/core/progressbar.py index 0f4164db81..ae57650529 100644 --- a/skyllh/core/progressbar.py +++ b/skyllh/core/progressbar.py @@ -1,21 +1,29 @@ # -*- coding: utf-8 -*- -from __future__ import division - -import numpy as np -import sys -import time - -from skyllh.core import display -from skyllh.core import session -from skyllh.core.py import int_cast - - -class ProgressBar(object): - """This class implements a hierarchical progress bar that can serve as a - parent for child progress bars to display different levels of processing. +from tqdm import tqdm + +from skyllh.core import ( + session, +) +from skyllh.core.py import ( + int_cast, +) + + +class ProgressBar( + object): + """This class provides an hierarchical progress bar for SkyLLH. + For rendering it uses the tqdm Python package. + In case of multiple layers of progress bars, it creates only a single + progress bar, which gets updated whenever the deeper level progress bars + are updated. """ - def __init__(self, maxval, startval=0, parent=None): + def __init__( + self, + maxval, + startval=0, + parent=None, + **kwargs): """Creates a new ProgressBar instance. Parameters @@ -24,30 +32,54 @@ def __init__(self, maxval, startval=0, parent=None): The maximal value the progress can reach. startval : int The progress value to start with. Must be smaller than `maxval`. - parent : instance of ProgressBar + parent : instance of ProgressBar | False | None The parent instance of ProgressBar if this progress bar is a sub progress bar. + If set to ``False``, this progress bar is deactivated and the + property ``is_shown`` will return ``False``. + If set to ``None``, this progress bar will be a primary progress + bar. + + Additional keyword arguments + ---------------------------- + Additional keyword arguments are passed to the constructor of the tqdm + class. """ - super(ProgressBar, self).__init__() + super().__init__( + **kwargs) + + self._is_deactivated = False + if parent is False: + self._is_deactivated = True + parent = None self.maxval = maxval self.startval = startval self.parent = parent - self._start_time = None self._val = 0 - self._last_rendered_pbar_str = None self._sub_pbar_list = [] + self._tqdm = None + if (self._parent is None) and self.is_shown: + self._tqdm = tqdm( + total=maxval, + initial=startval, + leave=True, + position=0, + **kwargs) + @property def maxval(self): """The maximal integer value the progress can reach. """ return self._maxval + @maxval.setter def maxval(self, v): - v = int_cast(v, 'The maxval property must be castable to an integer ' - 'value!') + v = int_cast( + v, + 'The maxval property must be castable to an integer value!') self._maxval = v @property @@ -56,230 +88,163 @@ def startval(self): `maxval`. """ return self._startval + @startval.setter def startval(self, v): - v = int_cast(v, 'The startval property must be castable to an integer ' - ' value!') - if(v >= self._maxval): - raise ValueError('The startval value (%d) must be smaller than the ' - 'value of the `maxval` property (%d)!', - v, self._maxval) + v = int_cast( + v, + 'The startval property must be castable to an integer value!') + + if v >= self._maxval: + raise ValueError( + f'The startval value ({v}) must be smaller than the value of ' + f'the `maxval` property ({self._maxval})!') + self._startval = v @property - def parent(self): - """The parent ProgressBar instance of this progress bar, or `None` if - no parent exist. + def val(self): + """(read-only) The current value of the progess. """ - return self._parent - @parent.setter - def parent(self, pbar): - if(pbar is not None): - if(not isinstance(pbar, ProgressBar)): - raise TypeError('The parent property must be None, or an ' - 'instance of ProgressBar!') - self._parent = pbar + return self._val @property - def progress(self): - """(read-only) The progress of this progress bar as a number between 0 - and 1. + def is_shown(self): + """(read-only) Flag if the progress bar is shown. This is ``True`` + if the program is run in an interactive session, ``False`` otherwise. """ - return (self._val - self._startval) / (self._maxval - self._startval) + if self._is_deactivated is True: + return False + return session.is_interactive_session() @property - def gets_shown(self): - """(read-only) Flag if the progress bar gets shown. This is ``True`` - if the program is run in an interactive session, ``False`` otherwise. + def parent(self): + """The parent ProgressBar instance of this progress bar, or ``None`` if + no parent exist. """ - return session.is_interactive_session() + return self._parent + + @parent.setter + def parent(self, pbar): + if pbar is not None: + if not isinstance(pbar, ProgressBar): + raise TypeError( + 'The parent property must be None, or an instance of ' + 'ProgressBar!') + self._parent = pbar def add_sub_progress_bar(self, pbar): """Adds the given progress bar to the list of running sub progress bars of this progress bar. """ - if(not isinstance(pbar, ProgressBar)): - raise TypeError('The pbar argument must be an instance of ' - 'ProgressBar!') + if not isinstance(pbar, ProgressBar): + raise TypeError( + 'The pbar argument must be an instance of ProgressBar!') self._sub_pbar_list.append(pbar) - def remove_sub_progress_bar(self, pbar): - """Removes the given progress bar instance from the list of running sub - progress bars of this progress bar. - """ - self._sub_pbar_list.remove(pbar) - - def _sec_to_hms(self, t): - """Calculate hours, minutes, and seconds from `t` given in seconds. - - Parameters - ---------- - t : float - The time in seconds. - - Returns - ------- - t_h : float - The integer hours of `t`. - t_m : float - The integer minutes of `t`. - t_s : float - The integer seconds of `t`. + def remove_sub_progress_bars(self): + """Removes all progress bar instances from the list of sub progress bars + of this progress bar. It calles the ``remove_sub_progress_bars`` method + of each sub progress bar. """ - t_h = int(t / 3600) - t -= t_h*3600 - t_m = int(t / 60) - t -= t_m*60 - t_s = int(np.round(t, 0)) + for pbar in self._sub_pbar_list: + pbar.remove_sub_progress_bars() - return (t_h, t_m, t_s) + self._sub_pbar_list = [] - def _render_pbar_str(self): - """Renders the progress bar string. This method is called only when - this progress bar has no parent. + def get_progressbar_list(self): + """Retrieves the list of ProgressBar instances. Returns ------- - pbar_str : str - The rendered progress bar string. + pbar_list : list of instance of ProgressBar + The list of ProgressBar instances, which are part of this + ProgressBar instance. """ - # Calculate the elapsed time (ELT) for the first 10 seconds or if we - # are at the end of the progress. Otherwise calculate the estimated - # remaining time (ERT). - curr_time = time.time() - t_elapsed = curr_time - self._start_time - - t_label = 'ELT' - t = t_elapsed - - progress = self.progress - if(progress > 0 and (t_elapsed >= 10) and (self._val < self._maxval)): - t_label = 'ERT' - t_total = t_elapsed / progress - t = t_total - t_elapsed - - (t_h, t_m, t_s) = self._sec_to_hms(t) - - # Get the current progress values from the sub progress bars and this - # progress bar. The value of this progress bar is the last value. - p_list = self.get_progress_list() - - sub_pbar_str = '' - for p in p_list[:-1]: - if(p != 1): - sub_pbar_str += '%d '%(int(p*10)) - else: - sub_pbar_str += '# ' - - barwidth = max(display.PAGE_WIDTH - 22 - len(sub_pbar_str), 10) - nchar = int(np.round(p_list[-1] * barwidth, 0)) - - fmt = "%s[%-"+str(barwidth)+"s] %3d%% %s %dh:%02dm:%02ds" - pbar_str = fmt%( - sub_pbar_str, - '='*nchar, - int(np.round(p_list[-1]*100, 0)), - t_label, t_h, t_m, t_s - ) - - return pbar_str - - def get_progress_list(self): - """Retrieves the list of progress values of all the sub progress bars - and this progress. The value of this progress bar is the last value. - """ - p_list = [] + pbar_list = [] for pbar in self._sub_pbar_list: - p_list.extend(pbar.get_progress_list()) - p_list.append(self.progress) + pbar_list.extend(pbar.get_progressbar_list()) + + pbar_list.append(self) - return p_list + return pbar_list def rerender(self): - """Rerenders this progress bar on the display, but only if the rendered - progress bar string changed. + """Rerenders this progress bar on the display. It calls the ``update`` + method of the tqdm progess bar. """ - pbar_str = self._render_pbar_str() - if(pbar_str == self._last_rendered_pbar_str): + if not self.is_shown: return - sys.stdout.write('\r'+pbar_str) - sys.stdout.flush() - self._last_rendered_pbar_str = pbar_str + pbar_list = self.get_progressbar_list() + + maxval = 0 + val = 0 + for pbar in pbar_list: + maxval += pbar.maxval + val += pbar.val + + dval = val - self._tqdm.n + self._tqdm.total = maxval + self._tqdm.update(dval) def trigger_rerendering(self): - """Triggers a rerendering of the most top progress bar. + """Triggers a rerendering / update of the most top progress bar. """ - parent = self._parent - if(parent is not None): - parent.trigger_rerendering() - return - - if(not session.is_interactive_session()): + if self._parent is not None: + self._parent.trigger_rerendering() return # We are the most top parent progress bar. So we need to get rerendered. self.rerender() def start(self): - """Starts the progress bar by taking the start time and setting the - progress value to the start value. - If this progress bar has a parent, it adds itself to the parent's list - of running sub progress bars. Otherwise it will render and print this - progress bar for the first time. - - Returns - ------- - self : instance of ProgressBar - The instance of this ProgressBar. + """Sets the current progess value to ``startval`` and updates the + progess bar to fulfill the start conditions. """ - self._start_time = time.time() self._val = self._startval - parent = self._parent - if(parent is not None): - # Add this progress bar to the parent progress bar. - parent.add_sub_progress_bar(self) + if self._parent is not None: + self._parent.add_sub_progress_bar(self) + elif not self.is_shown: return self + else: + self._tqdm.initial = self._val + self._tqdm.n = self._val + self._tqdm.reset() - if(not session.is_interactive_session()): - return self - - self._last_rendered_pbar_str = self._render_pbar_str() - - sys.stdout.write(self._last_rendered_pbar_str) - sys.stdout.flush() + self.trigger_rerendering() return self def finish(self): """Finishes this progress bar by setting the current progress value to its max value. - If this progress bar has a parent, it triggers a rerendering of the - parent and then removes itself from the parent's list of running sub - progress bars. Otherwise it will render and print this progress bar for - the last time. + If this progress bar is the top most progessbar, it will also close + the tqdm instance, what will trigger a flush of the output buffer. """ self._val = self._maxval - parent = self._parent - if(parent is not None): - parent.trigger_rerendering() - # Remove this progress bar from the parent progress bar. - parent.remove_sub_progress_bar(self) - return + self.trigger_rerendering() - if(not session.is_interactive_session()): - return + if (self._parent is None) and self.is_shown: + self._tqdm.close() - self._last_rendered_pbar_str = self._render_pbar_str() + self.remove_sub_progress_bars() - sys.stdout.write('\r'+self._last_rendered_pbar_str+"\n") - sys.stdout.flush() + def increment(self, dval=1): + """Updates the progress bar by incrementing the progress by the given + integral amount. + + Parameters + ---------- + dval : int + The amount of progress to increment the progress bar with. + """ + self.update(self._val + dval) def update(self, val): - """Updates the progress value. If this progress bar has no parent - progress bar, a rerendering of this progress bar is made. + """Updates the progress value to the given value. Parameters ---------- @@ -289,14 +254,3 @@ def update(self, val): self._val = val self.trigger_rerendering() - - def increment(self, dval=1): - """Updates the progress bar by incrementing the progress by the given - integral amount. - - Parameters - ---------- - dval : int - The amount of progress to increment the progress bar with. - """ - self.update(self._val + dval) diff --git a/skyllh/core/py.py b/skyllh/core/py.py index 64f179b62c..677f13a788 100644 --- a/skyllh/core/py.py +++ b/skyllh/core/py.py @@ -1,23 +1,28 @@ # -*- coding: utf-8 -*- -from __future__ import division - import abc import copy +import functools import inspect import numpy as np import sys +from collections import OrderedDict + +from skyllh.core.display import INDENTATION_WIDTH -class PyQualifier(object, metaclass=abc.ABCMeta): + +class PyQualifier( + object, + metaclass=abc.ABCMeta): """This is the abstract base class for any Python qualifier class. An object can get qualified by calling a PyQualifier instance with that object. The PyQualifier class will be added to the ``__pyqualifiers__`` attribute of the object. """ - def __init__(self): - super(PyQualifier, self).__init__() + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) def __call__(self, obj): """Declares the given Python object to be qualified with the @@ -35,7 +40,7 @@ def __call__(self, obj): The given object, but modified to be declared for this Python qualifier. """ - if(not hasattr(obj, '__pyqualifiers__')): + if not hasattr(obj, '__pyqualifiers__'): setattr(obj, '__pyqualifiers__', ()) obj.__pyqualifiers__ += (self.__class__,) @@ -57,37 +62,97 @@ class of the `self` object. The check result. `True` if the object is declared for this Python qualifier, and `False` otherwise. """ - if(not hasattr(obj, '__pyqualifiers__')): + if not hasattr(obj, '__pyqualifiers__'): return False - if(self.__class__ in obj.__pyqualifiers__): + if self.__class__ in obj.__pyqualifiers__: return True return False -class ConstPyQualifier(PyQualifier): + +class ConstPyQualifier( + PyQualifier): """This class defines a PyQualifier for constant Python objects. """ - def __init__(self): - super(ConstPyQualifier, self).__init__() + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + const = ConstPyQualifier() +def get_class_of_func(f): + """Determines the class object that defined the given method or function + ``f``. + + Parameters + ---------- + f : function | method + The function or method whose parent class should be determined. + + Returns + ------- + cls : class | None + The class object which defines the given function or method. ``None`` + is returned when no class could be determined. + """ + if isinstance(f, functools.partial): + return get_class_of_func(f.func) + + if inspect.ismethod(f) or\ + ((inspect.isbuiltin(f)) and + (getattr(f, '__self__', None) is not None) and + (getattr(f.__self__, '__class__', None))): + for cls in inspect.getmro(f.__self__.__class__): + if hasattr(cls, '__dict__') and (f.__name__ in cls.__dict__): + return cls + # Fall back to normal function evaluation. + f = getattr(f, '__func__', f) + + if inspect.isfunction(f): + cls = getattr( + inspect.getmodule(f), + f.__qualname__.split('.', 1)[0].rsplit('.', 1)[0], + None) + if isinstance(cls, type): + return cls + + # Handle special descriptor objects. + cls = getattr(f, '__objclass__', None) + return cls + + def typename(t): """Returns the name of the given type ``t``. """ return t.__name__ + def classname(obj): """Returns the name of the class of the class instance ``obj``. """ return typename(type(obj)) + def module_classname(obj): - """Returns the module and class name of the class instance ``obj``. + """Returns the module and class name of the given instance ``obj``. """ - return '{}.{}'.format(obj.__module__, classname(obj)) + return f'{obj.__module__}.{classname(obj)}' + + +def module_class_method_name(obj, meth_name): + """Returns the module, class, and method name of the given instance ``obj``. + + Parameters + ---------- + obj : instance of object + The object instance. + meth_name : str + The name of the method. + """ + return f'{module_classname(obj)}.{meth_name}' + def get_byte_size_prefix(size): """Determines the biggest size prefix for the given size in bytes such that @@ -110,7 +175,7 @@ def get_byte_size_prefix(size): prefix_idx = 0 for (prefix, factor) in prefix_factor_list[1:]: - if(size / factor < 1): + if size / factor < 1: break prefix_idx += 1 @@ -119,6 +184,7 @@ def get_byte_size_prefix(size): return (newsize, prefix) + def getsizeof(objects): """Determines the size in bytes the given objects have in memory. If an object is a sequence, the size of the elements of the sequence will @@ -134,18 +200,19 @@ def getsizeof(objects): memsize : int The memory size in bytes of the given objects. """ - if(not issequence(objects)): + if not issequence(objects): objects = [objects] memsize = 0 for obj in objects: - if(issequence(obj)): + if issequence(obj): memsize += getsizeof(obj) else: memsize += sys.getsizeof(obj) return memsize + def issequence(obj): """Checks if the given object ``obj`` is a sequence or not. The definition of a sequence in this case is, that the function ``len`` is defined for the @@ -153,13 +220,16 @@ def issequence(obj): .. note:: - A str object is NOT considered as a sequence! + A str object is NOT considered a sequence! - :return True: If the given object is a sequence. - :return False: If the given object is a str object or not a sequence. + Returns + ------- + check : bool + ``True`` if the given object is a sequence. ``False`` if the given + object is an instance of str or not a sequence. """ - if(isinstance(obj, str)): + if isinstance(obj, str): return False try: @@ -169,6 +239,7 @@ def issequence(obj): return True + def issequenceof(obj, T, pyqualifiers=None): """Checks if the given object ``obj`` is a sequence with items being instances of type ``T``, possibly qualified with the given Python @@ -176,7 +247,7 @@ def issequenceof(obj, T, pyqualifiers=None): Parameters ---------- - obj : object + obj : instance of object The Python object to check. T : type | tuple of types The type each item of the sequence should be. If a tuple of types is @@ -193,33 +264,52 @@ def issequenceof(obj, T, pyqualifiers=None): check : bool The result of the check. """ - if(pyqualifiers is None): + if pyqualifiers is None: pyqualifiers = tuple() - elif(not issequence(pyqualifiers)): + elif not issequence(pyqualifiers): pyqualifiers = (pyqualifiers,) - if(not issequence(obj)): + if not issequence(obj): return False + for item in obj: - if(not isinstance(item, T)): + if not isinstance(item, T): return False for pyqualifier in pyqualifiers: - if(not pyqualifier.check(item)): + if not pyqualifier.check(item): return False return True + def issequenceofsubclass(obj, T): """Checks if the given object ``obj`` is a sequence with items being sub-classes of class T. + + Parameters + ---------- + obj : instance of object + The object to check. + T : class + The base class of the items of the given object. + + Returns + ------- + check : bool + ``True`` if the given object is a sequence of instances which are + sub-classes of class ``T``. ``False`` if ``obj`` is not a sequence or + any item is not a sub-class of class ``T``. """ - if(not issequence(obj)): + if not issequence(obj): return False + for item in obj: - if(not issubclass(item, T)): + if not issubclass(item, T): return False + return True + def isproperty(obj, name): """Checks if the given attribute is of type property. The attribute must exist in ``obj``. @@ -244,6 +334,7 @@ def isproperty(obj, name): attr = type(obj).__class__.__getattribute__(type(obj), name) return isinstance(attr, property) + def func_has_n_args(func, n): """Checks if the given function `func` has `n` arguments. @@ -262,31 +353,35 @@ def func_has_n_args(func, n): check = (len(inspect.signature(func).parameters) == n) return check + def bool_cast(v, errmsg): """Casts the given value to a boolean value. If the cast is impossible, a TypeError is raised with the given error message. """ try: v = bool(v) - except: + except Exception: raise TypeError(errmsg) + return v + def int_cast(v, errmsg, allow_None=False): """Casts the given value to an integer value. If the cast is impossible, a TypeError is raised with the given error message. If `allow_None` is set to `True` the value `v` can also be `None`. """ - if(allow_None and v is None): + if allow_None and v is None: return v try: v = int(v) - except: + except Exception: raise TypeError(errmsg) return v + def float_cast(v, errmsg, allow_None=False): """Casts the given value to a float. If the cast is impossible, a TypeError is raised with the given error message. If `allow_None` is set to `True` @@ -311,17 +406,17 @@ def float_cast(v, errmsg, allow_None=False): """ # Define cast function for a single object. def _obj_float_cast(v, errmsg, allow_None): - if(allow_None and v is None): + if allow_None and v is None: return v try: v = float(v) - except: + except Exception: raise TypeError(errmsg) return v - if(issequence(v)): + if issequence(v): float_list = [] for el_v in v: float_list.append(_obj_float_cast(el_v, errmsg, allow_None)) @@ -329,29 +424,39 @@ def _obj_float_cast(v, errmsg, allow_None): return _obj_float_cast(v, errmsg, allow_None) -def str_cast(v, errmsg): + +def str_cast(v, errmsg, allow_None=False): """Casts the given value to a str object. If the cast is impossible, a TypeError is raised with the given error message. """ + if allow_None and v is None: + return v + try: v = str(v) - except: + except Exception: raise TypeError(errmsg) + return v + def list_of_cast(t, v, errmsg): """Casts the given value `v` to a list of items of type `t`. If the cast is impossible, a TypeError is raised with the given error message. """ - if(isinstance(v, t)): + if isinstance(v, t): v = [v] - if(not issequenceof(v, t)): + + if not issequenceof(v, t): raise TypeError(errmsg) + v = list(v) + return v + def get_smallest_numpy_int_type(values): """Returns the smallest numpy integer type that can represent the given integer values. @@ -372,17 +477,19 @@ def get_smallest_numpy_int_type(values): vmin = np.min(values) vmax = np.max(values) - if(vmin < 0): + if vmin < 0: types = [np.int8, np.int16, np.int32, np.int64] else: types = [np.uint8, np.uint16, np.uint32, np.uint64] for inttype in types: ii = np.iinfo(inttype) - if(vmin >= ii.min and vmax <= ii.max): + if vmin >= ii.min and vmax <= ii.max: return inttype - raise ValueError("No integer type spans [%d, %d]!"%(vmin, vmax)) + raise ValueError( + f'No integer type spans [{vmin}, {vmax}]!') + def get_number_of_float_decimals(value): """Determines the number of significant decimals the given float number has. @@ -398,16 +505,56 @@ def get_number_of_float_decimals(value): ------- decimals : int The number of decimals of value which are non-zero. + + Raises + ------ + ValueError + If a nan value was provided. """ + if np.isnan(value): + raise ValueError( + 'The provided value is nan!') + val_str = '{:.16f}'.format(value) (val_num_str, val_decs_str) = val_str.split('.', 1) for idx in range(len(val_decs_str)-1, -1, -1): - if(int(val_decs_str[idx]) != 0): + if int(val_decs_str[idx]) != 0: return idx+1 + return 0 -class ObjectCollection(object): +def make_dict_hash(d): + """Creates a hash value for the given dictionary. + + Parameters + ---------- + d : dict | None + The dictionary holding (name: value) pairs. + If set to None, an empty dictionary is used. + + Returns + ------- + hash : int + The hash of the dictionary. + """ + if d is None: + d = {} + + if not isinstance(d, dict): + raise TypeError( + 'The d argument must be of type dict!') + + # A note on the ordering of Python dictionary items: The items are ordered + # internally according to the hash value of their keys. Hence, if we don't + # insert more dictionary items, the order of the items won't change. Thus, + # we can just take the items list and make a tuple to create a hash of it. + # The hash will be the same for two dictionaries having the same items. + return hash(tuple(d.items())) + + +class ObjectCollection( + object): """This class provides a collection of objects of a specific type. Objects can be added to the collection via the ``add`` method or can be removed from the collection via the ``pop`` method. The objects of another object @@ -423,12 +570,20 @@ def __init__(self, objs=None, obj_type=None): objs : instance of obj_type | sequence of obj_type instances | None The sequence of objects of type ``obj_type`` with which this collection should get initialized with. - obj_type : type + obj_type : type | None The type of the objects, which can be added to the collection. + If set to None, the type will be determined from the given objects. + If no objects are given, the object type will be `object`. """ - if(obj_type is None): + if obj_type is None: obj_type = object - if(not issubclass(obj_type, object)): + if objs is not None: + if issequence(objs) and len(objs) > 0: + obj_type = type(objs[0]) + else: + obj_type = type(objs) + + if not issubclass(obj_type, object): raise TypeError( 'The obj_type argument must be a subclass of object!') @@ -436,9 +591,9 @@ def __init__(self, objs=None, obj_type=None): self._objects = [] # Add given list of objects. - if(objs is not None): - if(not issequence(objs)): - objs = [ objs ] + if objs is not None: + if not issequence(objs): + objs = [objs] for obj in objs: self.add(obj) @@ -489,7 +644,10 @@ def __add__(self, other): def __str__(self): """Pretty string representation of this object collection. """ - return classname(self)+ ': ' + str(self._objects) + obj_str = ",\n".join([ + ' '*INDENTATION_WIDTH + str(obj) for obj in self._objects + ]) + return classname(self) + ": {\n" + obj_str + "\n}" def copy(self): """Creates a copy of this ObjectCollection. The objects of the @@ -500,11 +658,13 @@ def copy(self): return oc def add(self, obj): - """Adds the given object to this object collection. + """Adds the given object, sequence of objects, or object collection to + this object collection. Parameters ---------- - obj : obj_type instance | ObjectCollection of obj_type + obj : obj_type instance | sequence of obj_type | + ObjectCollection of obj_type An instance of ``obj_type`` that should be added to the collection. If given an ObjectCollection for objects of type obj_type, it will add all objects of the given collection to this collection. @@ -515,19 +675,23 @@ def add(self, obj): The instance of this ObjectCollection, in order to be able to chain several ``add`` calls. """ - if(isinstance(obj, ObjectCollection)): - if(typename(obj.obj_type) != typename(self._obj_type)): - raise TypeError('Cannot add objects from ObjectCollection for ' - 'objects of type "%s" to this ObjectCollection for objects ' - 'of type "%s"!'%( - typename(obj.obj_type), typename(self._obj_type))) + if issequence(obj) and not isinstance(obj, ObjectCollection): + obj = ObjectCollection(obj) + + if isinstance(obj, ObjectCollection): + if not issubclass(obj.obj_type, self.obj_type): + raise TypeError( + 'Cannot add objects from ObjectCollection for objects of ' + f'type "{typename(obj.obj_type)}" to this ObjectCollection ' + f'for objects of type "{typename(self._obj_type)}"!') self._objects.extend(obj.objects) return self - if(not isinstance(obj, self._obj_type)): - raise TypeError('The object of type "%s" cannot be added to the ' - 'object collection for objects of type "%s"!'%( - classname(obj), typename(self._obj_type))) + if not isinstance(obj, self._obj_type): + raise TypeError( + f'The object of type "{classname(obj)}" cannot be added to the ' + 'object collection for objects of type ' + f'"{typename(self._obj_type)}"!') self._objects.append(obj) return self @@ -564,8 +728,8 @@ def pop(self, index=None): obj : obj_type The removed object. """ - if(index is None): - index = len(self._objects)-1 + if index is None: + index = len(self._objects) - 1 obj = self._objects.pop(index) return obj @@ -575,24 +739,82 @@ class NamedObjectCollection(ObjectCollection): via the object name is efficient because the index of each object is tracked w.r.t. its name. """ - def __init__(self, objs=None, obj_type=None): + def __init__(self, objs=None, obj_type=None, **kwargs): """Creates a new NamedObjectCollection instance. Must be called by the derived class. Parameters ---------- objs : instance of obj_type | sequence of instances of obj_type | None - The sequence of objects of type ``obj_type`` with which this collection - should get initialized with. + The sequence of objects of type ``obj_type`` with which this + collection should get initialized with. obj_type : type The type of the objects, which can be added to the collection. This type must have an attribute named ``name``. """ - self._obj_name_to_idx = dict() + self._obj_name_to_idx = OrderedDict() - super(NamedObjectCollection, self).__init__( + # The ObjectCollection class will call the add method to add individual + # objects. This will update the _obj_name_to_idx attribute. + super().__init__( objs=objs, - obj_type=obj_type) + obj_type=obj_type, + **kwargs) + + if not hasattr(self.obj_type, 'name'): + raise TypeError( + f'The object type "{typename(self.obj_type)}" has no ' + 'attribute named "name"!') + + @property + def name_list(self): + """(read-only) The list of the names of all the objects of this + NamedObjectCollection instance. + The order of this list of names is preserved to the order objects were + added to this collection. + """ + return list(self._obj_name_to_idx.keys()) + + def _create_obj_name_to_idx_dict(self, start=None, end=None): + """Creates the dictionary {obj.name: index} for object in the interval + [`start`, `end`). + + Parameters + ---------- + start : int | None + The object start index position, which is inclusive. + end : int | None + The object end index position, which is exclusive. + + Returns + ------- + obj_name_to_idx : dict + The dictionary {obj.name: index}. + """ + if start is None: + start = 0 + + return OrderedDict([ + (o.name, start+idx) + for (idx, o) in enumerate(self._objects[start:end]) + ]) + + def __contains__(self, name): + """Returns ``True`` if an object of the given name exists in this + NamedObjectCollection instance, ``False`` otherwise. + + Parameters + ---------- + name : str + The name of the object. + + Returns + ------- + check : bool + ``True`` if an object of name ``name`` exists in this + NamedObjectCollection instance, ``False`` otherwise. + """ + return name in self._obj_name_to_idx def __getitem__(self, key): """Returns an object based on its name or index. @@ -613,9 +835,9 @@ def __getitem__(self, key): KeyError If the given object is not found within this object collection. """ - if(isinstance(key, str)): - key = self.index_by_name(key) - return super(NamedObjectCollection, self).__getitem__(key) + if isinstance(key, str): + key = self.get_index_by_name(key) + return super().__getitem__(key) def add(self, obj): """Adds the given object to this named object collection. @@ -635,20 +857,17 @@ def add(self, obj): The instance of this NamedObjectCollection, in order to be able to chain several ``add`` calls. """ - super(NamedObjectCollection, self).add(obj) + n_objs = len(self) - if(isinstance(obj, NamedObjectCollection)): - # Several objects have been added, so we recreate the name to index - # dictionary. - self._obj_name_to_idx = dict([ - (o.name,idx) for (idx,o) in enumerate(self._objects) ]) - else: - # Only a single object was added at the end. - self._obj_name_to_idx[obj.name] = len(self) - 1 + super().add(obj) + + self._obj_name_to_idx.update( + self._create_obj_name_to_idx_dict(n_objs)) return self + __iadd__ = add - def index_by_name(self, name): + def get_index_by_name(self, name): """Gets the index of the object with the given name within this named object collection. @@ -680,14 +899,13 @@ def pop(self, index=None): obj : obj_type instance The removed object. """ - if(isinstance(index, str)): + if isinstance(index, str): # Get the index of the object given its name. - index = self.index_by_name(index) + index = self.get_index_by_name(index) - obj = super(NamedObjectCollection, self).pop(index) + obj = super().pop(index) # Recreate the object name to index dictionary. - self._obj_name_to_idx = dict([ - (o.name,idx) for (idx,o) in enumerate(self._objects) ]) + self._obj_name_to_idx = self._create_obj_name_to_idx_dict() return obj diff --git a/skyllh/core/random.py b/skyllh/core/random.py index dcf8f560bb..edf55c4cc7 100644 --- a/skyllh/core/random.py +++ b/skyllh/core/random.py @@ -4,13 +4,19 @@ from skyllh.core.py import int_cast -class RandomStateService(object): + +class RandomStateService( + object): """The RandomStateService class provides a container for a numpy.random.RandomState object, initialized with a given seed. This service can then be passed to any function or method within skyllh that requires a random number generator. """ - def __init__(self, seed=None): + def __init__( + self, + seed=None, + **kwargs, + ): """Creates a new random state service. The ``random`` property can then be used to draw random numbers. @@ -21,8 +27,12 @@ def __init__(self, seed=None): randomly. See the numpy documentation for numpy.random.RandomState what that means. """ - self._seed = int_cast(seed, 'The seed argument must be None, or ' - 'castable to type int!', allow_None=True) + super().__init__(**kwargs) + + self._seed = int_cast( + seed, + 'The seed argument must be None, or castable to type int!', + allow_None=True) self.random = np.random.RandomState(self._seed) @property @@ -37,10 +47,12 @@ def random(self): """The numpy.random.RandomState object. """ return self._random + @random.setter def random(self, random): - if(not isinstance(random, np.random.RandomState)): - raise TypeError('The random property must be of type numpy.random.RandomState!') + if not isinstance(random, np.random.RandomState): + raise TypeError( + 'The random property must be of type numpy.random.RandomState!') self._random = random def reseed(self, seed): @@ -53,6 +65,8 @@ def reseed(self, seed): randomly. See the numpy documentation for numpy.random.RandomState what that means. """ - self._seed = int_cast(seed, 'The seed argument must be None or ' - 'castable to type int!', allow_None=True) + self._seed = int_cast( + seed, + 'The seed argument must be None or castable to type int!', + allow_None=True) self.random.seed(self._seed) diff --git a/skyllh/core/scrambling.py b/skyllh/core/scrambling.py index ab5c3454e0..3a8b02a756 100644 --- a/skyllh/core/scrambling.py +++ b/skyllh/core/scrambling.py @@ -1,28 +1,35 @@ # -*- coding: utf-8 -*- import abc - import numpy as np -from skyllh.core.times import TimeGenerator +from skyllh.core.times import ( + TimeGenerator, +) -class DataScramblingMethod(object, metaclass=abc.ABCMeta): - """Base class (type) for implementing a data scrambling method. +class DataScramblingMethod( + object, + metaclass=abc.ABCMeta): + """Base class for implementing a data scrambling method. """ - def __init__(self): - super(DataScramblingMethod, self).__init__() + def __init__(self, **kwargs): + super().__init__( + **kwargs) @abc.abstractmethod - def scramble(self, rss, data): + def scramble( + self, + rss, + data): """The scramble method implements the actual scrambling of the given data, which is method dependent. The scrambling must be performed in-place, i.e. it alters the data inside the given data array. Parameters ---------- - rss : RandomStateService + rss : instance of RandomStateService The random state service providing the random number generator (RNG). data : instance of DataFieldRecordArray @@ -30,19 +37,23 @@ def scramble(self, rss, data): Returns ------- - data : DataFieldRecordArray + data : instance of DataFieldRecordArray The given DataFieldRecordArray holding the scrambled data. """ pass -class UniformRAScramblingMethod(DataScramblingMethod): +class UniformRAScramblingMethod( + DataScramblingMethod): r"""The UniformRAScramblingMethod method performs right-ascention scrambling uniformly within a given RA range. By default it's (0, 2\pi). Note: This alters only the ``ra`` values of the data! """ - def __init__(self, ra_range=None): + def __init__( + self, + ra_range=None, + **kwargs): r"""Initializes a new RAScramblingMethod instance. Parameters @@ -52,7 +63,8 @@ def __init__(self, ra_range=None): values should get drawn from. If set to None, the default (0, 2\pi) will be used. """ - super(UniformRAScramblingMethod, self).__init__() + super().__init__( + **kwargs) self.ra_range = ra_range @@ -62,22 +74,28 @@ def ra_range(self): should get drawn from. """ return self._ra_range + @ra_range.setter def ra_range(self, ra_range): - if(ra_range is None): + if ra_range is None: ra_range = (0, 2*np.pi) - if(not isinstance(ra_range, tuple)): - raise TypeError('The ra_range property must be a tuple!') - if(len(ra_range) != 2): - raise ValueError('The ra_range tuple must contain 2 elements!') + if not isinstance(ra_range, tuple): + raise TypeError( + 'The ra_range property must be a tuple!') + if len(ra_range) != 2: + raise ValueError( + 'The ra_range tuple must contain 2 elements!') self._ra_range = ra_range - def scramble(self, rss, data): + def scramble( + self, + rss, + data): """Scrambles the given data uniformly in right-ascention. Parameters ---------- - rss : RandomStateService + rss : instance of RandomStateService The random state service providing the random number generator (RNG). data : instance of DataFieldRecordArray @@ -86,28 +104,35 @@ def scramble(self, rss, data): Returns ------- - data : DataFieldRecordArray + data : instance of DataFieldRecordArray The given DataFieldRecordArray holding the scrambled data. """ dt = data['ra'].dtype + data['ra'] = rss.random.uniform( - *self.ra_range, size=len(data)).astype(dt) + *self.ra_range, size=len(data)).astype(dt, copy=False) + return data -class TimeScramblingMethod(DataScramblingMethod): +class TimeScramblingMethod( + DataScramblingMethod): """The TimeScramblingMethod class provides a data scrambling method to perform data coordinate scrambling based on a generated time. It draws a random time from a time generator and transforms the horizontal (local) coordinates into equatorial coordinates using a specified transformation function. """ - def __init__(self, timegen, hor_to_equ_transform): + def __init__( + self, + timegen, + hor_to_equ_transform, + **kwargs): """Initializes a new time scramling method instance. Parameters ---------- - timegen : TimeGenerator + timegen : instance of TimeGenerator The time generator that should be used to generate random MJD times. hor_to_equ_transform : callable The transformation function to transform coordinates from the @@ -120,7 +145,8 @@ def __init__(self, timegen, hor_to_equ_transform): The return signature must be: (ra, dec) """ - super(TimeScramblingMethod, self).__init__() + super().__init__( + **kwargs) self.timegen = timegen self.hor_to_equ_transform = hor_to_equ_transform @@ -131,10 +157,12 @@ def timegen(self): times. """ return self._timegen + @timegen.setter def timegen(self, timegen): - if(not isinstance(timegen, TimeGenerator)): - raise TypeError('The timegen property must be an instance of TimeGenerator!') + if not isinstance(timegen, TimeGenerator): + raise TypeError( + 'The timegen property must be an instance of TimeGenerator!') self._timegen = timegen @property @@ -143,13 +171,18 @@ def hor_to_equ_transform(self): horizontal system into the equatorial system. """ return self._hor_to_equ_transform + @hor_to_equ_transform.setter def hor_to_equ_transform(self, transform): - if(not callable(transform)): - raise TypeError('The hor_to_equ_transform property must be a callable object!') + if not callable(transform): + raise TypeError( + 'The hor_to_equ_transform property must be a callable object!') self._hor_to_equ_transform = transform - def scramble(self, rss, data): + def scramble( + self, + rss, + data): """Scrambles the given data based on random MJD times, which are generated from a TimeGenerator instance. The event's right-ascention and declination coordinates are calculated via a horizontal-to-equatorial @@ -157,7 +190,7 @@ def scramble(self, rss, data): Parameters ---------- - rss : RandomStateService + rss : instance of RandomStateService The random state service providing the random number generator (RNG). data : instance of DataFieldRecordArray @@ -166,24 +199,30 @@ def scramble(self, rss, data): Returns ------- - data : DataFieldRecordArray + data : instance of DataFieldRecordArray The given DataFieldRecordArray holding the scrambled data. """ mjds = self.timegen.generate_times(rss, len(data)) + data['time'] = mjds + (data['ra'], data['dec']) = self.hor_to_equ_transform( data['azi'], data['zen'], mjds) + return data -class DataScrambler(object): - def __init__(self, method): +class DataScrambler( + object): + def __init__( + self, + method): """Creates a data scrambler instance with a given defined scrambling method. Parameters ---------- - method : DataScramblingMethod + method : instance of DataScramblingMethod The instance of DataScramblingMethod that defines the method of the data scrambling. """ @@ -195,14 +234,20 @@ def method(self): the data. This must be an instance of the DataScramblingMethod class. """ return self._method + @method.setter def method(self, method): - if(not isinstance(method, DataScramblingMethod)): - raise TypeError('The data scrambling method must be an instance ' - 'of DataScramblingMethod!') + if not isinstance(method, DataScramblingMethod): + raise TypeError( + 'The data scrambling method must be an instance of ' + 'DataScramblingMethod!') self._method = method - def scramble_data(self, rss, data, copy=False): + def scramble_data( + self, + rss, + data, + copy=False): """Scrambles the given data by calling the scramble method of the scrambling method class, that was configured for the data scrambler. If the ``inplace_scrambling`` property is set to False, a copy of the @@ -210,7 +255,7 @@ def scramble_data(self, rss, data, copy=False): Parameters ---------- - rss : RandomStateService + rss : instance of RandomStateService The random state service providing the random number generator (RNG). data : instance of DataFieldRecordArray @@ -222,13 +267,13 @@ def scramble_data(self, rss, data, copy=False): Returns ------- - data : DataFieldRecordArray + data : instance of DataFieldRecordArray The given DataFieldRecordArray instance with the scrambled data. If the ``inplace_scrambling`` property is set to True, this output array is the same array as the input array, otherwise it's a new array. """ - if(copy): + if copy: data = data.copy() data = self._method.scramble(rss, data) diff --git a/skyllh/core/services.py b/skyllh/core/services.py new file mode 100644 index 0000000000..001464f3bd --- /dev/null +++ b/skyllh/core/services.py @@ -0,0 +1,634 @@ +# -*- coding: utf-8 -*- + +from collections import ( + defaultdict, +) +import numpy as np + +from skyllh.core.dataset import ( + Dataset, + DatasetData, +) +from skyllh.core.progressbar import ( + ProgressBar, +) +from skyllh.core.py import ( + classname, + issequenceof, +) +from skyllh.core.source_hypo_grouping import ( + SourceHypoGroupManager, +) + + +class DetSigYieldService( + object): + """This class provides a service to build and hold detector signal yield + instances for multiple datasets and source hypothesis groups. + """ + + def __init__( + self, + shg_mgr, + dataset_list, + data_list, + ppbar=None, + **kwargs): + """Creates a new DetSigYieldService instance. + """ + super().__init__( + **kwargs) + + self._set_shg_mgr(shg_mgr) + + self.dataset_list = dataset_list + self.data_list = data_list + + self._arr = self.construct_detsigyield_array( + ppbar=ppbar) + + @property + def shg_mgr(self): + """(read-only) The instance of SourceHypoGroupManager providing the list + of source hypothesis groups. + """ + return self._shg_mgr + + @property + def dataset_list(self): + """The list of instance of Dataset for which the detector signal yields + should be built. + """ + return self._dataset_list + + @dataset_list.setter + def dataset_list(self, datasets): + if not issequenceof(datasets, Dataset): + raise TypeError( + 'The dataset_list property must be a sequence of Dataset ' + 'instances! ' + f'Its current type is {classname(datasets)}!') + self._dataset_list = list(datasets) + + @property + def data_list(self): + """The list of instance of DatasetData for which the detector signal + yields should be built. + """ + return self._data_list + + @data_list.setter + def data_list(self, datas): + if not issequenceof(datas, DatasetData): + raise TypeError( + 'The data_list property must be a sequence of DatasetData ' + 'instances! ' + f'Its current type is {classname(datas)}!') + self._data_list = list(datas) + + @property + def arr(self): + """(read-only) The (N_datasets, N_source_hypo_groups)-shaped numpy + ndarray of object holding the constructed DetSigYield instances. + """ + return self._arr + + @property + def n_datasets(self): + """(read-only) The number of datasets this service was created for. + """ + return self._arr.shape[0] + + @property + def n_shgs(self): + """(read-only) The number of source hypothesis groups this service was + created for. + """ + return self._arr.shape[1] + + def _set_shg_mgr(self, mgr): + """Sets the internal member variable to the given instance of + SourceHypoGroupManager. + """ + if not isinstance(mgr, SourceHypoGroupManager): + raise TypeError( + 'The shg_mgr argument must be an instance of ' + 'SourceHypoGroupManager! ' + f'Its current type is {classname(mgr)}!') + + self._shg_mgr = mgr + + def change_shg_mgr( + self, + shg_mgr, + ppbar=None, + ): + """Changes the instance of SourceHypoGroupManager of this service. This + will also rebuild the detector signal yields. + """ + self._set_shg_mgr(shg_mgr) + + self._arr = self.construct_detsigyield_array( + ppbar=ppbar) + + def get_builder_to_shgidxs_dict( + self, + ds_idx, + ): + """Creates a dictionary with the builder instance as key and the list of + source hypo group indices to which the builder applies as value. + Hence, SHGs using the same builder instance can be grouped for + DetSigYield construction. + + Parameters + ---------- + ds_idx : int + The index of the dataset for which the same builders apply. + + Returns + ------- + builder_shgidxs_dict : dict + The dictionary with the builder instance as key and the list of + source hypo group indices to which the builder applies as value. + """ + n_datasets = len(self._dataset_list) + + if ds_idx < 0 or ds_idx >= n_datasets: + raise ValueError( + f'The dataset index {ds_idx} must be within the range ' + f'[0,{n_datasets-1}]!') + + builder_shgidxs_dict = defaultdict(list) + for (g, shg) in enumerate(self._shg_mgr.shg_list): + + builder_list = shg.detsigyield_builder_list + if (len(builder_list) != 1) and (len(builder_list) != n_datasets): + raise ValueError( + 'The number of detector signal yield builders ' + f'({len(builder_list)}) is not 1 and does not ' + f'match the number of datasets ({n_datasets}) for the ' + f'{g}th source hypothesis group!') + + builder = ( + builder_list[0] if len(builder_list) == 1 else + builder_list[ds_idx] + ) + + builder_shgidxs_dict[builder].append(g) + + return builder_shgidxs_dict + + def construct_detsigyield_array( + self, + ppbar=None, + ): + """Creates a (N_datasets, N_source_hypo_groups)-shaped numpy ndarray of + object holding the constructed DetSigYield instances. + + If the same DetSigYieldBuilder class is used for all source hypotheses + of a particular dataset, the + :meth:`~skyllh.core.detsigyield.DetSigYieldBuilder.construct_detsigyields` + method is called with different flux models to optimize the construction + of the detector signal yield functions. + + Parameters + ---------- + ppbar : instance of ProgressBar | None + The instance of ProgressBar of the optional parent progress bar. + + Returns + ------- + detsigyield_arr : instance of numpy.ndarray + The (N_datasets, N_source_hypo_groups)-shaped numpy ndarray of + object holding the constructed DetSigYield instances. + """ + n_datasets = len(self._dataset_list) + + detsigyield_arr = np.empty( + (n_datasets, + self._shg_mgr.n_src_hypo_groups), + dtype=object + ) + + pbar = ProgressBar( + self._shg_mgr.n_src_hypo_groups * n_datasets, + parent=ppbar).start() + + shg_list = self.shg_mgr.shg_list + + for (j, (dataset, data)) in enumerate(zip(self._dataset_list, + self._data_list)): + + builder_to_shgidxs_dict = self.get_builder_to_shgidxs_dict(ds_idx=j) + + for (builder, shgidxs) in builder_to_shgidxs_dict.items(): + factory = builder.get_detsigyield_construction_factory() + if factory is None: + # The builder does not provide a factory for DetSigYield + # instance construction. So we have to construct the + # detector signal yields one by one for each SHG. + for g in shgidxs: + shg = shg_list[g] + + detsigyield = builder.construct_detsigyield( + dataset=dataset, + data=data, + shg=shg, + ppbar=pbar) + + detsigyield_arr[j, g] = detsigyield + + pbar.increment() + else: + # The builder provides a factory for the construction of + # several DetSigYield instances simultaniously, one for each + # flux model. + shgs = [ + shg_list[g] + for g in shgidxs + ] + + detsigyields = factory( + dataset=dataset, + data=data, + shgs=shgs, + ppbar=pbar) + + for (i, g) in enumerate(shgidxs): + detsigyield_arr[j, g] = detsigyields[i] + + pbar.increment(len(detsigyields)) + + pbar.finish() + + return detsigyield_arr + + +class SrcDetSigYieldWeightsService( + object): + r"""This class provides a service for the source detector signal yield + weights, which are the product of the source weights with the detector + signal yield, denoted with :math:`a_{j,k}(\vec{p}_{\mathrm{s}_k})` in the + math formalism documentation. + + .. math:: + + a_{j,k}(\vec{p}_{\mathrm{s}_k}) = W_k + \mathcal{Y}_{\mathrm{s}_{j,k}}(\vec{p}_{\mathrm{s}_k}) + + The service has a method to calculate the weights and a method to retrieve + the weights. The weights are stored internally. + """ + + @staticmethod + def create_src_recarray_list_list( + detsigyield_service, + ): + """Creates a list of numpy record ndarrays, one for each source + hypothesis group suited for evaluating the detector signal yield + instance of that source hypothesis group. + + Parameters + ---------- + detsigyield_service : instance of DetSigYieldService + The instance of DetSigYieldService providing the + (N_datasets, N_source_hypo_groups)-shaped 2D ndarray of + DetSigYield instances, one for each dataset and source hypothesis + group combination. + + Returns + ------- + src_recarray_list_list : list of list of numpy record ndarrays + The (N_datasets,N_source_hypo_groups)-shaped list of list of the + source numpy record ndarrays, one for each dataset and source + hypothesis group combination, which is needed for + evaluating a particular detector signal yield instance. + """ + n_datasets = detsigyield_service.n_datasets + n_shgs = detsigyield_service.n_shgs + shg_list = detsigyield_service.shg_mgr.shg_list + + src_recarray_list_list = [] + for ds_idx in range(n_datasets): + src_recarray_list = [] + for shg_idx in range(n_shgs): + shg = shg_list[shg_idx] + src_recarray_list.append( + detsigyield_service.arr[ds_idx][shg_idx].sources_to_recarray( + shg.source_list)) + + src_recarray_list_list.append(src_recarray_list) + + return src_recarray_list_list + + @staticmethod + def create_src_weight_array_list( + shg_mgr, + ): + """Creates a list of numpy 1D ndarrays holding the source weights, one + for each source hypothesis group. + + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager defining the source + hypothesis groups with their sources. + + Returns + ------- + src_weight_array_list : list of numpy 1D ndarrays + The list of 1D numpy ndarrays holding the source weights, one for + each source hypothesis group. + """ + src_weight_array_list = [ + np.array([src.weight for src in shg.source_list]) + for shg in shg_mgr.shg_list + ] + return src_weight_array_list + + def __init__( + self, + detsigyield_service, + **kwargs, + ): + """Creates a new SrcDetSigYieldWeightsService instance. + + Parameters + ---------- + detsigyield_service : instance of DetSigYieldService + The instance of DetSigYieldService providing the + (N_datasets, N_source_hypo_groups)-shaped array of DetSigYield + instances, one instance for each combination of dataset and source + hypothesis group. + """ + super().__init__( + **kwargs) + + self.detsigyield_service = detsigyield_service + + # Create the list of list of source record arrays for each combination + # of dataset and source hypothesis group. + self._src_recarray_list_list = type(self).create_src_recarray_list_list( + detsigyield_service=self._detsigyield_service) + + # Create the list of 1D ndarrays holding the source weights for each + # source hypothesis group. + self._src_weight_array_list = type(self).create_src_weight_array_list( + shg_mgr=self._detsigyield_service.shg_mgr) + + self._a_jk = None + self._a_jk_grads = None + + @property + def shg_mgr(self): + """(read-only) The instance of SourceHypoGroupManager defining the + source hypothesis groups. + """ + return self._detsigyield_service.shg_mgr + + @property + def detsigyield_service(self): + """The instance of DetSigYieldService providing the + (N_datasets, N_source_hypo_groups)-shaped array of DetSigYield + instances. + """ + return self._detsigyield_service + + @detsigyield_service.setter + def detsigyield_service(self, service): + if not isinstance(service, DetSigYieldService): + raise TypeError( + 'The detsigyield_service property must be an instance of ' + 'DetSigYieldService! ' + f'Its current type is {classname(service)}!') + self._detsigyield_service = service + + @property + def detsigyield_arr(self): + """(read-only) The (N_datasets, N_source_hypo_groups)-shaped 2D numpy + ndarray holding the DetSigYield instances for each source hypothesis + group. + """ + return self._detsigyield_service.arr + + @property + def n_datasets(self): + """(read-only) The number of datasets this service was created for. + """ + return self._detsigyield_service.n_datasets + + @property + def n_shgs(self): + """(read-only) The number of source hypothesis groups this service was + created for. + """ + return self._detsigyield_service.n_shgs + + @property + def src_recarray_list_list(self): + """(read-only) The (N_datasets,N_source_hypo_groups)-shaped list of list + of the source numpy record ndarrays, one for each dataset and source + hypothesis group combination, which is needed for evaluating a + particular detector signal yield instance. + """ + return self._src_recarray_list_list + + def change_shg_mgr( + self, + shg_mgr, + ): + """Re-creates the internal source numpy record arrays needed for the + detector signal yield calculation. + + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager + The new SourceHypoGroupManager instance. + """ + if id(shg_mgr) != id(self._detsigyield_service.shg_mgr): + raise ValueError( + 'The provides instance of SourceHypoGroupManager does not ' + 'match the instance of the detector signal yield service!') + + self._src_recarray_list_list = type(self).create_src_recarray_list_list( + detsigyield_service=self._detsigyield_service) + + self._src_weight_array_list = type(self).create_src_weight_array_list( + shg_mgr=self._detsigyield_service.shg_mgr) + + def calculate( + self, + src_params_recarray): + """Calculates the source detector signal yield weights for each source + and their derivative w.r.t. each global floating parameter. The result + is stored internally as: + + a_jk : instance of ndarray + The (N_datasets,N_sources)-shaped numpy ndarray holding the + source detector signal yield weight for each combination of + dataset and source. + a_jk_grads : dict + The dictionary holding the (N_datasets,N_sources)-shaped numpy + ndarray with the derivatives w.r.t. the global fit parameter + the SrcDetSigYieldWeightsService depend on. The dictionary's key + is the index of the global fit parameter. + + Parameters + ---------- + src_params_recarray : instance of numpy record ndarray + The numpy record ndarray of length N_sources holding the local + source parameters. See the documentation of + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + for more information about this record array. + """ + n_datasets = self.n_datasets + + shg_mgr = self._detsigyield_service.shg_mgr + + self._a_jk = np.empty( + (n_datasets, shg_mgr.n_sources,), + dtype=np.double) + + self._a_jk_grads = defaultdict( + lambda: np.zeros( + (n_datasets, shg_mgr.n_sources), + dtype=np.double)) + + sidx = 0 + for (shg_idx, (shg, src_weights)) in enumerate(zip( + shg_mgr.shg_list, self._src_weight_array_list)): + + shg_n_src = shg.n_sources + + shg_src_slice = slice(sidx, sidx+shg_n_src) + + shg_src_params_recarray = src_params_recarray[shg_src_slice] + + for ds_idx in range(n_datasets): + detsigyield = self._detsigyield_service.arr[ds_idx, shg_idx] + src_recarray = self._src_recarray_list_list[ds_idx][shg_idx] + + (Yg, Yg_grads) = detsigyield( + src_recarray=src_recarray, + src_params_recarray=shg_src_params_recarray) + + self._a_jk[ds_idx][shg_src_slice] = src_weights * Yg + + for gpidx in Yg_grads.keys(): + self._a_jk_grads[gpidx][ds_idx, shg_src_slice] =\ + src_weights * Yg_grads[gpidx] + + sidx += shg_n_src + + def get_weights(self): + """Returns the source detector signal yield weights and their + derivatives w.r.t. the global fit parameters. + + Returns + ------- + a_jk : instance of ndarray + The (N_datasets, N_sources)-shaped numpy ndarray holding the + source detector signal yield weight for each combination of + dataset and source. + a_jk_grads : dict + The dictionary holding the (N_datasets, N_sources)-shaped numpy + ndarray with the derivatives w.r.t. the global fit parameter + the SrcDetSigYieldWeightsService depend on. The dictionary's key + is the index of the global fit parameter. + """ + return (self._a_jk, self._a_jk_grads) + + +class DatasetSignalWeightFactorsService( + object): + r"""This class provides a service to calculates the dataset signal weight + factors, :math:`f_j(\vec{p}_\mathrm{s})`, for each dataset. + It utilizes the source detector signal yield weights + :math:`a_{j,k}(\vec{p}_{\mathrm{s}_k})`, provided by the + :class:`~SrcDetSigYieldWeightsService` class. + """ + + def __init__( + self, + src_detsigyield_weights_service): + r"""Creates a new DatasetSignalWeightFactors instance. + + Parameters + ---------- + src_detsigyield_weights_service : instance of SrcDetSigYieldWeightsService + The instance of SrcDetSigYieldWeightsService providing the source + detector signal yield weights + :math:`a_{j,k}(\vec{p}_{\mathrm{s}_k})`. + """ + self.src_detsigyield_weights_service = src_detsigyield_weights_service + + @property + def src_detsigyield_weights_service(self): + r"""The instance of SrcDetSigYieldWeightsService providing the source + detector signal yield weights :math:`a_{j,k}(\vec{p}_{\mathrm{s}_k})`. + """ + return self._src_detsigyield_weights_service + + @src_detsigyield_weights_service.setter + def src_detsigyield_weights_service(self, service): + if not isinstance(service, SrcDetSigYieldWeightsService): + raise TypeError( + 'The src_detsigyield_weights_service property must be an ' + 'instance of SrcDetSigYieldWeightsService!') + self._src_detsigyield_weights_service = service + + @property + def n_datasets(self): + """(read-only) The number of datasets. + """ + return self._src_detsigyield_weights_service.n_datasets + + def calculate(self): + r"""Calculates the dataset signal weight factors, + :math:`f_j(\vec{p}_\mathrm{s})`. The result is stored internally as: + + f_j : instance of ndarray + The (N_datasets,)-shaped 1D numpy ndarray holding the dataset + signal weight factor for each dataset. + f_j_grads : dict + The dictionary holding the (N_datasets,)-shaped numpy + ndarray with the derivatives w.r.t. the global fit parameter + the DatasetSignalWeightFactorsService depend on. + The dictionary's key is the index of the global fit parameter. + """ + (a_jk, a_jk_grads) = self._src_detsigyield_weights_service.get_weights() + + a_j = np.sum(a_jk, axis=1) + a = np.sum(a_jk) + + self._f_j = a_j / a + + # Calculate the derivative of f_j w.r.t. all floating parameters present + # in the a_jk_grads using the quotient rule of differentation. + self._f_j_grads = dict() + for gpidx in a_jk_grads.keys(): + # a is a scalar. + # a_j is a (N_datasets)-shaped ndarray. + # a_jk_grads is a dict of length N_gfl_params with values of + # (N_datasets,N_sources)-shaped ndarray. + # a_j_grads is a (N_datasets,)-shaped ndarray. + # a_grads is a scalar. + a_j_grads = np.sum(a_jk_grads[gpidx], axis=1) + a_grads = np.sum(a_jk_grads[gpidx]) + self._f_j_grads[gpidx] = (a_j_grads * a - a_j * a_grads) / a**2 + + def get_weights(self): + """Returns the + + Returns + ------- + f_j : instance of ndarray + The (N_datasets,)-shaped 1D numpy ndarray holding the dataset + signal weight factor for each dataset. + f_j_grads : dict + The dictionary holding the (N_datasets,)-shaped numpy + ndarray with the derivatives w.r.t. the global fit parameter + the DatasetSignalWeightFactorsService depend on. + The dictionary's key is the index of the global fit parameter. + """ + return (self._f_j, self._f_j_grads) diff --git a/skyllh/core/session.py b/skyllh/core/session.py index e107b9f4c9..c580a9f498 100644 --- a/skyllh/core/session.py +++ b/skyllh/core/session.py @@ -9,6 +9,7 @@ # mode. Hence, progress bars will not be displayed to not screw up the output. IS_INTERACTIVE_SESSION = False + def enable_interactive_session(): """Enables interactive session mode. """ @@ -16,6 +17,7 @@ def enable_interactive_session(): IS_INTERACTIVE_SESSION = True + def disable_interactive_session(): """Disables interactive session mode. """ @@ -23,6 +25,7 @@ def disable_interactive_session(): IS_INTERACTIVE_SESSION = False + def is_interactive_session(): """Checks whether the current session is interactive (True) or not (False). @@ -33,6 +36,7 @@ def is_interactive_session(): """ return IS_INTERACTIVE_SESSION + def is_python_interpreter_in_interactive_mode(): """Checks if the Python interpreter is in interactive mode. diff --git a/skyllh/core/signal_generation.py b/skyllh/core/signal_generation.py index 91d4caff73..47926d123a 100644 --- a/skyllh/core/signal_generation.py +++ b/skyllh/core/signal_generation.py @@ -4,16 +4,24 @@ from skyllh.core.py import ( issequence, - float_cast + float_cast, ) -class SignalGenerationMethod(object, metaclass=abc.ABCMeta): + +class SignalGenerationMethod( + object, + metaclass=abc.ABCMeta +): """This is a base class for a source and detector specific signal generation method, that calculates the source flux for a given monte-carlo event, which is needed to calculate the MC event weights for the signal generator. """ - def __init__(self, energy_range): + def __init__( + self, + energy_range, + **kwargs, + ): """Constructs a new signal generation method instance. Parameters @@ -23,7 +31,7 @@ def __init__(self, energy_range): signal event generation. If set to None, the entire energy range [0, +inf] is used. """ - super(SignalGenerationMethod, self).__init__() + super().__init__(**kwargs) self.energy_range = energy_range @@ -33,24 +41,35 @@ def energy_range(self): take MC events into account for signal event generation. """ return self._energy_range + @energy_range.setter def energy_range(self, r): - if(r is not None): - if(not issequence(r)): - raise TypeError('The energy_range property must be a sequence!') - if(len(r) != 2): - raise ValueError('The energy_range property must be a sequence ' - 'of 2 elements!') + if r is not None: + if not issequence(r): + raise TypeError( + 'The energy_range property must be a sequence!') + if len(r) != 2: + raise ValueError( + 'The energy_range property must be a sequence of 2 ' + 'elements!') r = tuple( - (float_cast(r[0], 'The first element of the energy_range ' - 'sequence must be castable to type float!'), - float_cast(r[1], 'The second element of the energy_range ' - 'sequence must be castable to type float!')) + float_cast( + r[0], + 'The first element of the energy_range ' + 'sequence must be castable to type float!'), + float_cast( + r[1], + 'The second element of the energy_range ' + 'sequence must be castable to type float!') ) self._energy_range = r @abc.abstractmethod - def calc_source_signal_mc_event_flux(self, data_mc, src_hypo_group): + def calc_source_signal_mc_event_flux( + self, + data_mc, + shg, + ): """This method is supposed to calculate the signal flux of each given MC event for each source hypothesis of the given source hypothesis group. @@ -59,23 +78,30 @@ def calc_source_signal_mc_event_flux(self, data_mc, src_hypo_group): ---------- data_mc : numpy record ndarray The numpy record array holding all the MC events. - src_hypo_group : SourceHypoGroup instance - The source hypothesis group, which defines the list of sources, and - their flux model. + shg : instance of SourceHypoGroup + The source hypothesis group instance, which defines the list of + sources, and their flux model. Returns ------- - flux_list : list of 2-element tuples - The list of 2-element tuples with one tuple for each source. Each - tuple must be made of two 1D ndarrays of size - N_selected_signal_events, where the first array contains the global - MC data event indices and the second array the flux of each selected - signal event. + ev_idx_arr : ndarray + The (N_selected_signal_events,)-shaped 1D ndarray holding the index + of the MC event. + shg_src_idx_arr : ndarray + The (N_selected_signal_events,)-shaped 1D ndarray holding the index + of the source within the given source hypothesis group for each + signal candidate event. + flux_arr : ndarray + The (N_selected_signal_events,)-shaped 1D ndarray holding the flux + value of each signal candidate event. """ pass def signal_event_post_sampling_processing( - self, shg, shg_sig_events_meta, shg_sig_events + self, + shg, + shg_sig_events_meta, + shg_sig_events, ): """This method should be reimplemented by the derived class if there is some processing needed after the MC signal events have been sampled @@ -92,8 +118,9 @@ def signal_event_post_sampling_processing( The length of this array must be the same as shg_sig_events. It needs to contain the following data fields: - - 'shg_src_idx' : int - The source index within the source hypothesis group. + shg_src_idx : int + The source index within the source hypothesis group. + shg_sig_events : numpy record ndarray The numpy record ndarray holding the generated signal events for the given source hypothesis group and in the format of the original diff --git a/skyllh/core/signal_generator.py b/skyllh/core/signal_generator.py index f65d4d39b2..03e3ae99ea 100644 --- a/skyllh/core/signal_generator.py +++ b/skyllh/core/signal_generator.py @@ -1,62 +1,232 @@ # -*- coding: utf-8 -*- import abc +from astropy import units import itertools import numpy as np + +from skyllh.core.config import ( + to_internal_time_unit, +) +from skyllh.core.dataset import ( + Dataset, + DatasetData, +) +from skyllh.core.livetime import ( + Livetime, +) from skyllh.core.py import ( issequenceof, float_cast, int_cast, - get_smallest_numpy_int_type + get_smallest_numpy_int_type, +) +from skyllh.core.services import ( + DatasetSignalWeightFactorsService, +) +from skyllh.core.source_hypo_grouping import ( + SourceHypoGroupManager, ) -from skyllh.core.dataset import Dataset, DatasetData -from skyllh.core.source_hypothesis import SourceHypoGroupManager -from skyllh.core.storage import DataFieldRecordArray -from skyllh.physics.flux import ( - get_conversion_factor_to_internal_flux_unit +from skyllh.core.storage import ( + DataFieldRecordArray, ) -class SignalGeneratorBase(object, metaclass=abc.ABCMeta): +class SignalGenerator( + object, + metaclass=abc.ABCMeta): """This is the abstract base class for all signal generator classes in - SkyLLH. It defines the interface for signal generators. + SkyLLH. It defines the interface for a signal generator. """ - def __init__(self, src_hypo_group_manager, dataset_list, data_list, - *args, **kwargs): + def __init__( + self, + shg_mgr, + **kwargs): """Constructs a new signal generator instance. Parameters ---------- - src_hypo_group_manager : SourceHypoGroupManager instance + shg_mgr : instance of SourceHypoGroupManager The SourceHypoGroupManager instance defining the source hypothesis groups. - dataset_list : list of Dataset instances - The list of Dataset instances for which signal events should get - generated for. - data_list : list of DatasetData instances - The list of DatasetData instances holding the actual data of each - dataset. The order must match the order of ``dataset_list``. """ - super().__init__(*args, **kwargs) + super().__init__( + **kwargs) - self.src_hypo_group_manager = src_hypo_group_manager - self.dataset_list = dataset_list - self.data_list = data_list + self.shg_mgr = shg_mgr @property - def src_hypo_group_manager(self): + def shg_mgr(self): """The SourceHypoGroupManager instance defining the source hypothesis groups. """ - return self._src_hypo_group_manager - @src_hypo_group_manager.setter - def src_hypo_group_manager(self, manager): - if(not isinstance(manager, SourceHypoGroupManager)): + return self._shg_mgr + + @shg_mgr.setter + def shg_mgr(self, manager): + if not isinstance(manager, SourceHypoGroupManager): raise TypeError( - 'The src_hypo_group_manager property must be an instance of ' + 'The shg_mgr property must be an instance of ' 'SourceHypoGroupManager!') - self._src_hypo_group_manager = manager + self._shg_mgr = manager + + def create_src_params_recarray( + self, + src_detsigyield_weights_service): + """Creates the src_params_recarray structured ndarray of length + N_sources holding the local source parameter names and values needed for + the calculation of the detector signal yields. + + Parameters + ---------- + src_detsigyield_weights_service : instance of SrcDetSigYieldWeightsService + The instance of SrcDetSigYieldWeightsService providing the product + of the source weights with the detector signal yield. + + Returns + ------- + src_params_recarray : instance of numpy structured ndarray + The structured numpy ndarray of length N_sources, holding the local + parameter names and values of each source needed to calculate the + detector signal yield. + """ + # Get the parameter names needed for the detector signal yield + # calculation. + param_names = [] + for detsigyield in src_detsigyield_weights_service.detsigyield_arr.flat: + param_names.extend(detsigyield.param_names) + param_names = set(param_names) + + # Create an empty structured ndarray of length N_sources. + dt = [] + for pname in param_names: + dt.extend([ + (pname, np.float64), + (f'{pname}:gpidx', np.int32) + ]) + src_params_recarray = np.empty((self._shg_mgr.n_sources,), dtype=dt) + + sidx = 0 + for (shg_idx, shg) in enumerate(self._shg_mgr.shg_list): + + shg_n_src = shg.n_sources + + shg_src_slice = slice(sidx, sidx+shg_n_src) + + pvalues = [] + for pname in param_names: + pvalues.extend([ + shg.fluxmodel.get_param(pname), + 0 + ]) + + src_params_recarray[shg_src_slice] = tuple(pvalues) + + sidx += shg_n_src + + return src_params_recarray + + def change_shg_mgr( + self, + shg_mgr): + """Changes the source hypothesis group manager. Derived classes can + reimplement this method but this method of the base class must still be + called by the derived class. + """ + self.shg_mgr = shg_mgr + + @abc.abstractmethod + def generate_signal_events( + self, + rss, + mean, + poisson=True, + src_detsigyield_weights_service=None): + """This abstract method must be implemented by the derived class to + generate a given number of signal events. + + Parameters + ---------- + rss : instance of RandomStateService + The instance of RandomStateService providing the random number + generator state. + mean : int | float + The mean number of signal events. If the ``poisson`` argument is set + to True, the actual number of generated signal events will be drawn + from a Poisson distribution with this given mean value of signal + events. + poisson : bool + If set to True, the actual number of generated signal events will + be drawn from a Poisson distribution with the given mean value of + signal events. + If set to False, the argument ``mean`` specifies the actual number + of generated signal events. + src_detsigyield_weights_service : instance of SrcDetSigYieldWeightsService | None + The instance of SrcDetSigYieldWeightsService providing the weighting + of the sources within the detector. This can be ``None`` if this + signal generator does not need this information. + + Returns + ------- + n_signal : int + The number of generated signal events. + signal_events_dict : dict of DataFieldRecordArray + The dictionary holding the DataFieldRecordArray instancs with the + generated signal events. Each key of this dictionary represents the + dataset index for which the signal events have been generated. + """ + pass + + +class MultiDatasetSignalGenerator( + SignalGenerator): + """This is a signal generator class handling multiple datasets by using the + individual signal generator instances for each dataset. This is the most + general way to support multiple datasets of different formats and signal + generation. + """ + def __init__( + self, + shg_mgr, + dataset_list, + data_list, + sig_generator_list=None, + ds_sig_weight_factors_service=None, + **kwargs): + """Constructs a new signal generator handling multiple datasets. + + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager that defines the list of + source hypothesis groups, i.e. the list of sources. + dataset_list : list of instance of Dataset + The list of instance of Dataset for which signal events should get + generated. + data_list : list of instance of DatasetData + The list of instance of DatasetData holding the actual data of each + dataset. The order must match the order of ``dataset_list``. + sig_generator_list : list of instance of SignalGenerator | None + The optional list of instance of SignalGenerator holding + signal generator instances for each individual dataset. This can be + ``None`` if this signal generator does not require individual signal + generators for each dataset. + ds_sig_weight_factors_service : instance of DatasetSignalWeightFactorsService + The instance of DatasetSignalWeightFactorsService providing the + dataset signal weight factor service for calculating the dataset + signal weights. + """ + super().__init__( + shg_mgr=shg_mgr, + **kwargs) + + self.dataset_list = dataset_list + self.data_list = data_list + self.sig_generator_list = sig_generator_list + self.ds_sig_weight_factors_service = ds_sig_weight_factors_service + + self._src_params_recarray = None @property def dataset_list(self): @@ -64,9 +234,10 @@ def dataset_list(self): generated for. """ return self._dataset_list + @dataset_list.setter def dataset_list(self, datasets): - if(not issequenceof(datasets, Dataset)): + if not issequenceof(datasets, Dataset): raise TypeError( 'The dataset_list property must be a sequence of Dataset ' 'instances!') @@ -79,32 +250,89 @@ def data_list(self): property. """ return self._data_list + @data_list.setter def data_list(self, datas): - if(not issequenceof(datas, DatasetData)): + if not issequenceof(datas, DatasetData): raise TypeError( 'The data_list property must be a sequence of DatasetData ' 'instances!') - self._data_list = datas + self._data_list = list(datas) - def change_source_hypo_group_manager(self, src_hypo_group_manager): - """Changes the source hypothesis group manager. Derived classes can - reimplement this method but this method of the base class must still be - called by the derived class. + @property + def sig_generator_list(self): + """The list of instance of SignalGenerator holding signal generator + instances for each individual dataset. """ - self.src_hypo_group_manager = src_hypo_group_manager + return self._sig_generator_list + + @sig_generator_list.setter + def sig_generator_list(self, generators): + if generators is not None: + if not issequenceof(generators, (SignalGenerator, type(None))): + raise TypeError( + 'The sig_generator_list property must be a sequence of ' + 'SignalGenerator instances!') + generators = list(generators) + self._sig_generator_list = generators - @abc.abstractmethod - def generate_signal_events(self, rss, mean, poisson=True): - """This abstract method must be implemented by the derived class to - generate a given number of signal events. + @property + def ds_sig_weight_factors_service(self): + """The instance of DatasetSignalWeightFactorsService providing the + dataset signal weight factor service for calculating the dataset + signal weights. + """ + return self._ds_sig_weight_factors_service + + @ds_sig_weight_factors_service.setter + def ds_sig_weight_factors_service(self, service): + if not isinstance(service, DatasetSignalWeightFactorsService): + raise TypeError( + 'The ds_sig_weight_factors_service property must be an ' + 'instance of DatasetSignalWeightFactorsService!') + self._ds_sig_weight_factors_service = service + + @property + def n_datasets(self): + """(read-only) The number of datasets. + """ + return len(self._dataset_list) + + def change_shg_mgr( + self, + shg_mgr): + """Changes the source hypothesis group manager. This will recreate the + src_params_recarray needed for calculating the detector signal yields. + Also it calls the ``change_shg_mgr`` methods of the signal generators of + the individual datasets. + """ + super().change_shg_mgr( + shg_mgr=shg_mgr) + + src_detsigyield_weights_service =\ + self.ds_sig_weight_factors_service.src_detsigyield_weights_service + self._src_params_recarray = self.create_src_params_recarray( + src_detsigyield_weights_service=src_detsigyield_weights_service) + + for sig_generator in self.sig_generator_list: + sig_generator.change_shg_mgr( + shg_mgr=shg_mgr) + + def generate_signal_events( + self, + rss, + mean, + poisson=True, + **kwargs): + """Generates a given number of signal events distributed across the + individual datasets. Parameters ---------- rss : instance of RandomStateService The instance of RandomStateService providing the random number generator state. - mean : float + mean : float | int The mean number of signal events. If the ``poisson`` argument is set to True, the actual number of generated signal events will be drawn from a Poisson distribution with this given mean value of signal @@ -113,38 +341,94 @@ def generate_signal_events(self, rss, mean, poisson=True): If set to True, the actual number of generated signal events will be drawn from a Poisson distribution with the given mean value of signal events. - If set to False, the argument ``mean`` specifies the actual number - of generated signal events. + If set to False, the argument ``mean`` must be an integer and + specifies the actual number of generated signal events. Returns ------- n_signal : int - The number of generated signal events. + The number of actual generated signal events. signal_events_dict : dict of DataFieldRecordArray - The dictionary holding the DataFieldRecordArray instancs with the + The dictionary holding the DataFieldRecordArray instances with the generated signal events. Each key of this dictionary represents the dataset index for which the signal events have been generated. """ - pass + if poisson: + mean = rss.random.poisson( + float_cast( + mean, + 'The mean argument must be castable to type of float!')) + + n_signal = int_cast( + mean, + 'The mean argument must be castable to type of int!') + + src_detsigyield_weights_service =\ + self.ds_sig_weight_factors_service.src_detsigyield_weights_service + + # Calculate the dataset weights to distribute the signal events over the + # datasets. + if self._src_params_recarray is None: + self._src_params_recarray = self.create_src_params_recarray( + src_detsigyield_weights_service=src_detsigyield_weights_service) + + src_detsigyield_weights_service.calculate( + src_params_recarray=self._src_params_recarray) + + self._ds_sig_weight_factors_service.calculate() + (ds_weights, _) = self._ds_sig_weight_factors_service.get_weights() + + n_signal = 0 + signal_events_dict = {} + + for (ds_weight, ds_sig_generator) in zip( + ds_weights, + self._sig_generator_list): + + n_events = int(np.round(mean * ds_weight, 0)) + + (ds_n_signal, ds_sig_events_dict) =\ + ds_sig_generator.generate_signal_events( + rss=rss, + mean=n_events, + poisson=False, + src_detsigyield_weights_service=src_detsigyield_weights_service, + ) + + n_signal += ds_n_signal + + for (k, v) in ds_sig_events_dict.items(): + if k not in signal_events_dict: + signal_events_dict[k] = v + else: + signal_events_dict[k].append(v) + + return (n_signal, signal_events_dict) -class SignalGenerator(SignalGeneratorBase): - """This is the general signal generator class. It does not depend on the - detector or source hypothesis, because these dependencies are factored out - into the signal generation method. In fact the construction within this - class depends on the construction of the signal generation method. In case - of multiple sources the handling here is very suboptimal. Therefore the - MultiSourceSignalGenerator should be used instead! +class MCMultiDatasetSignalGenerator( + MultiDatasetSignalGenerator): + """This is a signal generator class, which handles multiple datasets with + monte-carlo (MC). It uses the MC events of all datasets to determine the + possible signal events for a source. + It does not depend on the detector or source hypothesis, because these + dependencies are factored out into the signal generation method. + In fact the construction within this class depends on the construction of + the signal generation method. """ - def __init__(self, src_hypo_group_manager, dataset_list, data_list, - *args, **kwargs): + def __init__( + self, + shg_mgr, + dataset_list, + data_list, + **kwargs): """Constructs a new signal generator instance. Parameters ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The SourceHypoGroupManager instance defining the source groups with - their spectra. + shg_mgr : instance of SourceHypoGroupManager + The SourceHypoGroupManager instance defining the source hypothesis + groups. dataset_list : list of Dataset instances The list of Dataset instances for which signal events should get generated for. @@ -155,8 +439,7 @@ def __init__(self, src_hypo_group_manager, dataset_list, data_list, A typical keyword argument is the instance of MultiDatasetTCLLHRatio. """ super().__init__( - *args, - src_hypo_group_manager=src_hypo_group_manager, + shg_mgr=shg_mgr, dataset_list=dataset_list, data_list=data_list, **kwargs) @@ -168,8 +451,8 @@ def _construct_signal_candidates(self): events pointing into the real MC dataset(s). """ n_datasets = len(self._dataset_list) - n_sources = self._src_hypo_group_manager.n_sources - shg_list = self._src_hypo_group_manager.src_hypo_group_list + n_sources = self._shg_mgr.n_sources + shg_list = self._shg_mgr.shg_list sig_candidates_dtype = [ ('ds_idx', get_smallest_numpy_int_type((0, n_datasets))), ('ev_idx', get_smallest_numpy_int_type( @@ -182,51 +465,72 @@ def _construct_signal_candidates(self): self._sig_candidates = np.empty( (0,), dtype=sig_candidates_dtype, order='F') + to_internal_time_unit_factor = to_internal_time_unit( + time_unit=units.day + ) + # Go through the source hypothesis groups to get the signal event # candidates. - for ((shg_idx,shg), (j,(ds,data))) in itertools.product( - enumerate(shg_list), enumerate(zip(self._dataset_list, self._data_list))): + for ((shg_idx, shg), (j, data)) in itertools.product( + enumerate(shg_list), + enumerate(self._data_list)): sig_gen_method = shg.sig_gen_method - if(sig_gen_method is None): - raise ValueError('No signal generation method has been ' - 'specified for the %dth source hypothesis group!'%(shg_idx)) + if sig_gen_method is None: + raise ValueError( + 'No signal generation method has been specified for the ' + f'source hypothesis group with index {shg_idx}!') data_mc = data.mc - (ev_indices_list, flux_list) = sig_gen_method.calc_source_signal_mc_event_flux( - data_mc, shg + + (ev_idx_arr, src_idx_arr, flux_arr) =\ + sig_gen_method.calc_source_signal_mc_event_flux( + data_mc=data_mc, + shg=shg) + + livetime_days = Livetime.get_integrated_livetime(data.livetime) + + weight = ( + data_mc[ev_idx_arr]['mcweight'] * + flux_arr * + livetime_days*to_internal_time_unit_factor ) - for (k, (ev_indices, flux)) in enumerate(zip(ev_indices_list, flux_list)): - ev = data_mc[ev_indices] - # The weight of the event specifies the number of signal events - # this one event corresponds to for the given reference flux. - # [weight] = GeV cm^2 sr * s * 1/(GeV cm^2 s sr) - weight = ev['mcweight'] * data.livetime * 86400 * flux - - sig_candidates = np.empty( - (len(ev_indices),), dtype=sig_candidates_dtype, order='F' - ) - sig_candidates['ds_idx'] = j - sig_candidates['ev_idx'] = ev_indices - sig_candidates['shg_idx'] = shg_idx - sig_candidates['shg_src_idx'] = k - sig_candidates['weight'] = weight - self._sig_candidates = np.append(self._sig_candidates, sig_candidates) + sig_candidates = np.empty( + (len(ev_idx_arr),), + dtype=sig_candidates_dtype, + order='F' + ) + sig_candidates['ds_idx'] = j + sig_candidates['ev_idx'] = ev_idx_arr + sig_candidates['shg_idx'] = shg_idx + sig_candidates['shg_src_idx'] = src_idx_arr + sig_candidates['weight'] = weight + + self._sig_candidates = np.append( + self._sig_candidates, sig_candidates) + del sig_candidates # Normalize the signal candidate weights. self._sig_candidates_weight_sum = np.sum(self._sig_candidates['weight']) self._sig_candidates['weight'] /= self._sig_candidates_weight_sum - def change_source_hypo_group_manager(self, src_hypo_group_manager): + def change_shg_mgr( + self, + shg_mgr): """Recreates the signal candidates with the changed source hypothesis group manager. """ - super().change_source_hypo_group_manager(src_hypo_group_manager) + super().change_shg_mgr( + shg_mgr=shg_mgr) self._construct_signal_candidates() - def mu2flux(self, mu, per_source=False): + def mu2flux( + self, + mu, + per_source=False): """Translate the mean number of signal events `mu` into the - corresponding flux. The unit of the returned flux is 1/(GeV cm^2 s). + corresponding flux. The unit of the returned flux is the internally used + flux unit. Parameters ---------- @@ -256,31 +560,40 @@ def mu2flux(self, mu, per_source=False): # The mu_fluxes array is the flux of each source for mu mean detected # signal events. - n_sources = self._src_hypo_group_manager.n_sources + n_sources = self._shg_mgr.n_sources mu_fluxes = np.empty((n_sources,), dtype=np.float64) - shg_list = self._src_hypo_group_manager.src_hypo_group_list + shg_list = self._shg_mgr.shg_list mu_fluxes_idx_offset = 0 - for (shg_idx,shg) in enumerate(shg_list): + for (shg_idx, shg) in enumerate(shg_list): fluxmodel = shg.fluxmodel # Calculate conversion factor from the flux model unit into the - # internal flux unit GeV^-1 cm^-2 s^-1. - toGeVcm2s = get_conversion_factor_to_internal_flux_unit(fluxmodel) + # internal flux unit. + to_internal_flux_unit =\ + fluxmodel.get_conversion_factor_to_internal_flux_unit() for k in range(shg.n_sources): mask = ((self._sig_candidates['shg_idx'] == shg_idx) & (self._sig_candidates['shg_src_idx'] == k)) ref_N_k = np.sum(self._sig_candidates[mask]['weight']) * ref_N - mu_flux_k = mu / ref_N * (ref_N_k / ref_N) * fluxmodel.Phi0*toGeVcm2s + mu_flux_k = ( + (mu / ref_N) * + (ref_N_k / ref_N) * + fluxmodel.Phi0 * to_internal_flux_unit) mu_fluxes[mu_fluxes_idx_offset + k] = mu_flux_k mu_fluxes_idx_offset += shg.n_sources - if(per_source): + if per_source: return mu_fluxes mu_flux = np.sum(mu_fluxes) return mu_flux - def generate_signal_events(self, rss, mean, poisson=True): + def generate_signal_events( + self, + rss, + mean, + poisson=True, + **kwargs): """Generates a given number of signal events from the signal candidate monte-carlo events. @@ -289,7 +602,7 @@ def generate_signal_events(self, rss, mean, poisson=True): rss : instance of RandomStateService The instance of RandomStateService providing the random number generator state. - mean : float + mean : float | int The mean number of signal events. If the ``poisson`` argument is set to True, the actual number of generated signal events will be drawn from a Poisson distribution with this given mean value of signal @@ -298,24 +611,27 @@ def generate_signal_events(self, rss, mean, poisson=True): If set to True, the actual number of generated signal events will be drawn from a Poisson distribution with the given mean value of signal events. - If set to False, the argument ``mean`` specifies the actual number - of generated signal events. + If set to False, the argument ``mean`` must be an integer and + specifies the actual number of generated signal events. Returns ------- n_signal : int - The number of generated signal events. + The number of actual generated signal events. signal_events_dict : dict of DataFieldRecordArray - The dictionary holding the DataFieldRecordArray instancs with the + The dictionary holding the DataFieldRecordArray instances with the generated signal events. Each key of this dictionary represents the dataset index for which the signal events have been generated. """ - if(poisson): - mean = rss.random.poisson(float_cast( - mean, 'The mean argument must be castable to type of float!')) + if poisson: + mean = rss.random.poisson( + float_cast( + mean, + 'The mean argument must be castable to type of float!')) n_signal = int_cast( - mean, 'The mean argument must be castable to type of int!') + mean, + 'The mean argument must be castable to type of int!') # Draw n_signal signal candidates according to their weight. sig_events_meta = rss.random.choice( @@ -326,8 +642,8 @@ def generate_signal_events(self, rss, mean, poisson=True): # Get the list of unique dataset and source hypothesis group indices of # the drawn signal events. # Note: This code does not assume the same format for each of the - # individual MC dataset numpy record arrays, thus might be a bit - # slower. If one could assume the same MC dataset format, one + # individual MC datasets, thus might be a bit slower. + # If one could assume the same MC dataset format, one # could gather all the MC events of all the datasets first and do # the signal event post processing for all datasets at once. signal_events_dict = dict() @@ -337,11 +653,15 @@ def generate_signal_events(self, rss, mean, poisson=True): ds_mask = sig_events_meta['ds_idx'] == ds_idx n_sig_events_ds = np.count_nonzero(ds_mask) - data = dict( - [(fname, np.empty( - (n_sig_events_ds,), - dtype=mc.get_field_dtype(fname)) - ) for fname in mc.field_name_list]) + data = dict([ + ( + fname, + np.empty( + (n_sig_events_ds,), + dtype=mc.get_field_dtype(fname)) + ) + for fname in mc.field_name_list + ]) sig_events = DataFieldRecordArray(data, copy=False) fill_start_idx = 0 @@ -349,7 +669,7 @@ def generate_signal_events(self, rss, mean, poisson=True): # current dataset. shg_idxs = np.unique(sig_events_meta[ds_mask]['shg_idx']) for shg_idx in shg_idxs: - shg = self._src_hypo_group_manager.src_hypo_group_list[shg_idx] + shg = self._shg_mgr.shg_list[shg_idx] shg_mask = sig_events_meta['shg_idx'] == shg_idx # Get the MC events for the drawn signal events. ds_shg_mask = ds_mask & shg_mask @@ -358,92 +678,18 @@ def generate_signal_events(self, rss, mean, poisson=True): ev_idx = shg_sig_events_meta['ev_idx'] # Get the signal MC events of the current dataset and source # hypothesis group. - shg_sig_events = mc.get_selection(ev_idx) + shg_sig_events = mc[ev_idx] # Do the signal event post sampling processing. - shg_sig_events = shg.sig_gen_method.signal_event_post_sampling_processing( - shg, shg_sig_events_meta, shg_sig_events) + shg_sig_events = shg.sig_gen_method.\ + signal_event_post_sampling_processing( + shg, shg_sig_events_meta, shg_sig_events) indices = np.indices((n_shg_sig_events,))[0] + fill_start_idx sig_events.set_selection(indices, shg_sig_events) - #sig_events[fill_start_idx:fill_start_idx+n_shg_sig_events] = shg_sig_events fill_start_idx += n_shg_sig_events signal_events_dict[ds_idx] = sig_events return (n_signal, signal_events_dict) - - -class MultiSourceSignalGenerator(SignalGenerator): - """More optimal signal generator for multiple sources. - """ - def __init__(self, src_hypo_group_manager, dataset_list, data_list, - **kwargs): - """Constructs a new signal generator instance. - - Parameters - ---------- - src_hypo_group_manager : SourceHypoGroupManager instance - The SourceHypoGroupManager instance defining the source groups with - their spectra. - dataset_list : list of Dataset instances - The list of Dataset instances for which signal events should get - generated for. - data_list : list of DatasetData instances - The list of DatasetData instances holding the actual data of each - dataset. The order must match the order of ``dataset_list``. - kwargs - A typical keyword argument is the instance of MultiDatasetTCLLHRatio. - """ - super(MultiSourceSignalGenerator, self).__init__( - src_hypo_group_manager, dataset_list, data_list, **kwargs) - - def _construct_signal_candidates(self): - """Constructs an array holding pointer information of signal candidate - events pointing into the real MC dataset(s). - """ - n_datasets = len(self._dataset_list) - n_sources = self._src_hypo_group_manager.n_sources - shg_list = self._src_hypo_group_manager.src_hypo_group_list - sig_candidates_dtype = [ - ('ds_idx', get_smallest_numpy_int_type((0, n_datasets))), - ('ev_idx', get_smallest_numpy_int_type( - [0]+[len(data.mc) for data in self._data_list])), - ('shg_idx', get_smallest_numpy_int_type((0, n_sources))), - ('shg_src_idx', get_smallest_numpy_int_type( - [0]+[shg.n_sources for shg in shg_list])), - ('weight', np.float64) - ] - self._sig_candidates = np.empty( - (0,), dtype=sig_candidates_dtype, order='F') - - # Go through the source hypothesis groups to get the signal event - # candidates. - for ((shg_idx, shg), (j, (ds, data))) in itertools.product( - enumerate(shg_list), - enumerate(zip(self._dataset_list, self._data_list))): - sig_gen_method = shg.sig_gen_method - if(sig_gen_method is None): - raise ValueError( - 'No signal generation method has been specified ' - 'for the %dth source hypothesis group!' % (shg_idx)) - data_mc = data.mc - (ev_indices, src_indices, flux) = sig_gen_method.calc_source_signal_mc_event_flux( - data_mc, shg) - - sig_candidates = np.empty( - (len(ev_indices),), dtype=sig_candidates_dtype, order='F' - ) - sig_candidates['ds_idx'] = j - sig_candidates['ev_idx'] = ev_indices - sig_candidates['shg_idx'] = shg_idx - sig_candidates['shg_src_idx'] = src_indices - sig_candidates['weight'] = data_mc[ev_indices]['mcweight'] * data.livetime * 86400 * flux - - self._sig_candidates = np.append(self._sig_candidates, sig_candidates) - del sig_candidates - - # Normalize the signal candidate weights. - self._sig_candidates_weight_sum = np.sum(self._sig_candidates['weight']) - self._sig_candidates['weight'] /= self._sig_candidates_weight_sum diff --git a/skyllh/core/signalpdf.py b/skyllh/core/signalpdf.py index 749cfd6937..9b49ba2baa 100644 --- a/skyllh/core/signalpdf.py +++ b/skyllh/core/signalpdf.py @@ -5,46 +5,64 @@ """ import numpy as np -import scipy as scp -from skyllh.core import display -from skyllh.core.py import ( - classname, - issequenceof +from skyllh.core.debugging import ( + get_logger, + is_tracing_enabled, +) +from skyllh.core.interpolate import ( + GridManifoldInterpolationMethod, + Linear1DGridManifoldInterpolationMethod, ) -from skyllh.core.livetime import Livetime from skyllh.core.pdf import ( - PDFAxis, + PDF, + PDFSet, IsSignalPDF, MultiDimGridPDF, - MultiDimGridPDFSet, - MappedMultiDimGridPDFSet, - NDPhotosplinePDF, SpatialPDF, - TimePDF + TimePDF, +) +from skyllh.core.py import ( + classname, + str_cast, +) +from skyllh.core.source_hypo_grouping import ( + SourceHypoGroupManager, +) +from skyllh.core.timing import ( + TaskTimer, +) +from skyllh.core.utils.coords import ( + angular_separation, ) -from skyllh.core.source_hypothesis import SourceHypoGroupManager -from skyllh.physics.source import PointLikeSource -from skyllh.physics.time_profile import TimeProfileModel -class GaussianPSFPointLikeSourceSignalSpatialPDF(SpatialPDF, IsSignalPDF): - """This spatial signal PDF model describes the spatial PDF for a point +class GaussianPSFPointLikeSourceSignalSpatialPDF( + SpatialPDF, + IsSignalPDF): + r"""This spatial signal PDF model describes the spatial PDF for a point source smeared with a 2D gaussian point-spread-function (PSF). Mathematically, it's the convolution of a point in the sky, i.e. the source location, with the PSF. The result of this convolution has the gaussian form - 1/(2*\pi*\sigma^2) * exp(-1/2*(r / \sigma)**2), + .. math:: - where \sigma is the spatial uncertainty of the event and r the distance on - the sphere between the source and the data event. + \frac{1}{2\pi\sigma^2} \exp(-\frac{r^2}{2\sigma^2}), + + where :math:`\sigma` is the spatial uncertainty of the event and :math:`r` + the distance on the sphere between the source and the data event. This PDF requires the `src_array` data field, that is numpy record ndarray with the data fields `ra` and `dec` holding the right-ascention and declination of the point-like sources, respectively. """ - def __init__(self, ra_range=None, dec_range=None, **kwargs): + def __init__( + self, + ra_range=None, + dec_range=None, + pd_event_data_field_name=None, + **kwargs): """Creates a new spatial signal PDF for point-like sources with a gaussian point-spread-function (PSF). @@ -56,156 +74,188 @@ def __init__(self, ra_range=None, dec_range=None, **kwargs): dec_range : 2-element tuple | None The range in declination this spatial PDF is valid for. If set to None, the range (-pi/2, +pi/2) is used. + pd_event_data_field_name : str | None + The probability density values can be pre-calculated by the user. + This specifies the name of the event data field, where these values + are stored. """ - if(ra_range is None): + if ra_range is None: ra_range = (0, 2*np.pi) - if(dec_range is None): + if dec_range is None: dec_range = (-np.pi/2, np.pi/2) - super(GaussianPSFPointLikeSourceSignalSpatialPDF, self).__init__( + super().__init__( + pmm=None, ra_range=ra_range, dec_range=dec_range, **kwargs) - def get_prob(self, tdm, fitparams=None, tl=None): - """Calculates the spatial signal probability of each event for all given - sources. + self.pd_event_data_field_name = pd_event_data_field_name + + @property + def pd_event_data_field_name(self): + """The event data field name where pre-calculated probability density + values are stored. + """ + return self._pd_event_data_field_name + + @pd_event_data_field_name.setter + def pd_event_data_field_name(self, name): + name = str_cast( + name, + 'The pd_event_data_field_name property must be castable to type ' + f'str! Its current type is {classname(name)}!') + self._pd_event_data_field_name = name + + def calculate_pd(self, tdm): + """Calculates the gaussian PSF probability density values for all events + and sources. Parameters ---------- tdm : instance of TrialDataManager - The TrialDataManager instance holding the trial event data for which - to calculate the PDF values. The following data fields need to be - present: + The instance of TrialDataManager holding the trial event data for + which to calculate the PDF values. The following data fields need to + be present: - 'src_array' : numpy record ndarray + src_array : numpy record ndarray The numpy record ndarray with the following data fields: - `ra`: float + ra : float The right-ascention of the point-like source. - `dec`: float + dec : float The declination of the point-like source. - 'ra' : float + ra : float The right-ascention in radian of the data event. - 'dec' : float + dec : float The declination in radian of the data event. - 'ang_err': float + ang_err: float The reconstruction uncertainty in radian of the data event. - fitparams : None - Unused interface argument. - tl : TimeLord instance | None - The optional TimeLord instance to use for measuring timing - information. Returns ------- - prob : (N_sources,N_events) shaped 2D ndarray - The ndarray holding the spatial signal probability on the sphere for - each source and event. + pd : instance of numpy ndarray + The (N_values,)-shaped numpy ndarray holding the probability density + for each event. The length of this 1D array depends on the number + of sources and the events belonging to those sources. In the worst + case the length is N_sources * N_trial_events. """ get_data = tdm.get_data - src_ev_idxs = tdm.src_ev_idxs + src_array = get_data('src_array') ra = get_data('ra') dec = get_data('dec') sigma = get_data('ang_err') - if len(ra) == 1: - self.param_set = None - - try: - # angular difference is pre calculated - prob = get_data('spatial_pdf_gauss') - - if src_ev_idxs is None: - prob = prob.reshape((len(get_data('src_array')), len(ra))) - else: - (src_idxs, ev_idxs) = src_ev_idxs - sigma = np.take(sigma, src_ev_idxs[1]) - - except: - # psi is calculated here - if src_ev_idxs is None: - # Make the source position angles two-dimensional so the PDF value can - # be calculated via numpy broadcasting automatically for several - # sources. This is useful for stacking analyses. - src_ra = get_data('src_array')['ra'][:, np.newaxis] - src_dec = get_data('src_array')['dec'][:, np.newaxis] - - delta_dec = np.abs(dec - src_dec) - delta_ra = np.abs(ra - src_ra) - x = (np.sin(delta_dec / 2.))**2. + np.cos(dec) *\ - np.cos(src_dec) * (np.sin(delta_ra / 2.))**2. - else: - # Calculate the angular difference only for events that are close - # to the respective source poisition. This is useful for stacking - # analyses. - (src_idxs, ev_idxs) = src_ev_idxs - src_ra = get_data('src_array')['ra'][src_idxs] - src_dec = get_data('src_array')['dec'][src_idxs] - - delta_dec = np.abs(np.take(dec, ev_idxs) - src_dec) - delta_ra = np.abs(np.take(ra, ev_idxs) - src_ra) - x = (np.sin(delta_dec / 2.))**2. + np.cos(np.take(dec, ev_idxs)) *\ - np.cos(src_dec) * (np.sin(delta_ra / 2.))**2. - - # also extend the sigma array to account for all relevant events - sigma = np.take(sigma, ev_idxs) - - # Handle possible floating precision errors. - x[x < 0.] = 0. - x[x > 1.] = 1. - - psi = (2.0*np.arcsin(np.sqrt(x))) - - prob = 0.5/(np.pi*sigma**2)*np.exp(-0.5*(psi/sigma)**2) - - # If the signal hypothesis contains single source - # return the output here. - if(len(get_data('src_array')['ra']) == 1): - grads = np.array([], dtype=np.float64) - # The new interface returns the pdf only for a single source. - return (prob[0], grads) - else: - # If the signal hypothesis contains multiple sources convolve - # the pdfs with the source weights. - src_w = get_data('src_array')['src_w'] * tdm.get_data('src_array')['src_w_W'] - src_w_grads = get_data('src_array')['src_w_grad'] * tdm.get_data('src_array')['src_w_W'] - - norm = src_w.sum() - src_w /= norm - src_w_grads /= norm - - if src_ev_idxs is not None: - prob = scp.sparse.csr_matrix((prob, (ev_idxs, src_idxs))) - else: - prob = prob.T - prob_res = prob.dot(src_w) - grads = (prob.dot(src_w_grads) - - prob_res*src_w_grads.sum()) - - return (prob_res, np.atleast_2d(grads)) - -class RayleighPSFPointSourceSignalSpatialPDF(SpatialPDF, IsSignalPDF): - """This spatial signal PDF model describes the spatial PDF for a point-like + (src_idxs, evt_idxs) = tdm.src_evt_idxs + src_ra = np.take(src_array['ra'], src_idxs) + src_dec = np.take(src_array['dec'], src_idxs) + + dec = np.take(dec, evt_idxs) + ra = np.take(ra, evt_idxs) + sigma_sq = np.take(sigma**2, evt_idxs) + + psi = angular_separation(src_ra, src_dec, ra, dec) + + pd = 0.5/(np.pi*sigma_sq) * np.exp(-0.5*(psi**2/sigma_sq)) + + return pd + + def get_pd( + self, + tdm, + params_recarray=None, + tl=None): + """Calculates the spatial signal probability density of each event for + all sources. + + Parameters + ---------- + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial event data for + which to calculate the PDF values. The following data fields need to + be present: + + src_array : numpy record ndarray + The numpy record ndarray with the following data fields: + + ra : float + The right-ascention of the point-like source. + dec : float + The declination of the point-like source. + + ra : float + The right-ascention in radian of the data event. + dec : float + The declination in radian of the data event. + ang_err: float + The reconstruction uncertainty in radian of the data event. + + In case the probability density values were pre-calculated, + params_recarray : None + Unused interface argument. + tl : TimeLord instance | None + The optional TimeLord instance to use for measuring timing + information. + + Returns + ------- + pd : instance of numpy ndarray + The (N_values,)-shaped numpy ndarray holding the probability density + for each event. The length of this 1D array depends on the number + of sources and the events belonging to those sources. In the worst + case the length is N_sources * N_trial_events. + grads : dict + The dictionary holding the gradients of the probability density + w.r.t. each fit parameter. By definition this PDF does not depend + on any fit parameters and hence, this dictionary is empty. + """ + + logger = get_logger(f'{__name__}.{classname(self)}.get_pd') + + # Check if the probability density was pre-calculated. + if self._pd_event_data_field_name in tdm: + if is_tracing_enabled(): + logger.debug( + 'Retrieve precalculated probability density values from ' + f'data field "{self._pd_event_data_field_name}"') + pd = tdm[self._pd_event_data_field_name] + return (pd, dict()) + + pd = self.calculate_pd(tdm) + + return (pd, dict()) + + +class RayleighPSFPointSourceSignalSpatialPDF( + SpatialPDF, + IsSignalPDF): + r"""This spatial signal PDF model describes the spatial PDF for a point-like source following a Rayleigh distribution in the opening angle between the source and reconstructed muon direction. Mathematically, it's the convolution of a point in the sky, i.e. the source location, with the PSF. The result of this convolution has the following - form: + form - 1/(2*\pi \sin \Psi) * \Psi/\sigma^2 \exp(-\Psi^2/(2*\sigma^2)), + .. math:: - where \sigma is the spatial uncertainty of the event and \Psi the distance - on the sphere between the source and the data event. + 1/(2\pi \sin\Psi) * \Psi/\sigma^2 \exp(-\Psi^2/(2\sigma^2)), - This PDF requires the `src_array` data field, that is numpy record ndarray - with the data fields `ra` and `dec` holding the right-ascention and - declination of the point-like sources, respectively. + where :math:`\sigma` is the spatial uncertainty of the event and + :math:`\Psi` the distance on the sphere between the source and the data + event. + + This PDF requires the ``src_array`` source data field, that is numpy + structured ndarray with the data fields ``ra`` and ``dec`` holding the + right-ascention and declination of the point-like sources, respectively. """ - def __init__(self, ra_range=None, dec_range=None, **kwargs): - """Creates a new spatial signal PDF for point-like sources with a + def __init__( + self, + ra_range=None, + dec_range=None, + **kwargs): + r"""Creates a new spatial signal PDF for point-like sources with a Rayleigh point-spread-function (PSF). Parameters @@ -217,190 +267,237 @@ def __init__(self, ra_range=None, dec_range=None, **kwargs): The range in declination this spatial PDF is valid for. If set to None, the range (-pi/2, +pi/2) is used. """ - if(ra_range is None): + if ra_range is None: ra_range = (0, 2*np.pi) - if(dec_range is None): + if dec_range is None: dec_range = (-np.pi/2, np.pi/2) super().__init__( + pmm=None, ra_range=ra_range, dec_range=dec_range, **kwargs ) - def get_prob(self, tdm, fitparams=None, tl=None): + self._pd = None + + def initialize_for_new_trial( + self, + tdm, + tl=None, + **kwargs): + """Pre-computes the probability density values once a new trial data is + available. + """ + get_data = tdm.get_data + + (src_idxs, evt_idxs) = tdm.src_evt_idxs + + psi = get_data('psi') + sigma = get_data('ang_err') + sigma_sq = np.take(sigma**2, evt_idxs) + + self._pd = ( + 0.5/(np.pi*np.sin(psi)) * + (psi / sigma_sq) * + np.exp(-0.5*(psi**2/sigma_sq)) + ) + + def get_pd( + self, + tdm, + params_recarray=None, + tl=None): """Calculates the spatial signal probability density of each event for - the defined source. + all sources. Parameters ---------- tdm : instance of TrialDataManager - The TrialDataManager instance holding the trial event data for which - to calculate the PDF values. The following data fields need to be - present: + The instance of TrialDataManager holding the trial event data for + which to calculate the PDF values. The following data fields need to + be present: - 'psi' : float + psi : float The opening angle in radian between the source direction and the reconstructed muon direction. - 'ang_err': float + ang_err: float The reconstruction uncertainty in radian of the data event. - fitparams : None + params_recarray : None Unused interface argument. - tl : TimeLord instance | None - The optional TimeLord instance to use for measuring timing + tl : instance of TimeLord | None + The optional instance of TimeLord to use for measuring timing information. Returns ------- - pd : (N_events,)-shaped numpy ndarray - The 1D numpy ndarray with the probability density for each event in - unit 1/rad. - grads : (0,)-shaped 1D numpy ndarray - Since this PDF does not depend on fit parameters, an empty array - is returned. + pd : (N_values,)-shaped numpy ndarray + The (N_values,)-shaped 1D numpy ndarray holding the probability + density value for each event and source in unit 1/rad. + grads : dict + The dictionary holding the gradients of the probability density + w.r.t. each global fit parameter. By definition this PDF does not + depend on any global fit parameters and hence, this dictionary is + empty. """ - get_data = tdm.get_data - - psi = get_data('psi') - sigma = get_data('ang_err') + if self._pd is None: + raise ValueError( + 'The PDF has not been initialized with trial data!') - pd = ( - 0.5/(np.pi*np.sin(psi)) * - (psi / sigma**2) * - np.exp(-0.5*(psi/sigma)**2) - ) + grads = dict() - grads = np.array([], dtype=np.double) + return (self._pd, grads) - return (pd, grads) - - -class SignalTimePDF(TimePDF, IsSignalPDF): - """This class provides a time PDF class for a signal source. It consists of - a Livetime instance and a TimeProfileModel instance. Together they construct - the actual signal time PDF, which has detector down-time taking into - account. +class SignalTimePDF( + TimePDF, + IsSignalPDF, +): + """This class provides a signal time PDF class. It consists of + a :class:`~skyllh.core.livetime.Livetime` instance and a + :class:`~skyllh.core.flux_model.TimeFluxProfile` instance. Together they + construct the actual signal time PDF, which has detector down-time taking + into account. """ - def __init__(self, livetime, time_profile): - """Creates a new signal time PDF instance for a given time profile of - the source. + def __init__( + self, + livetime, + time_flux_profile, + **kwargs + ): + """Creates a new signal time PDF instance for a given time flux profile + and detector live time. Parameters ---------- - livetime : Livetime instance + livetime : instance of Livetime An instance of Livetime, which provides the detector live-time information. - time_profile : TimeProfileModel instance - The time profile of the source. - """ - super(SignalTimePDF, self).__init__() + time_flux_profile : instance of TimeFluxProfile + The signal's time flux profile. - self.livetime = livetime - self.time_profile = time_profile + .. note:: - # Define the time axis with the time boundaries of the live-time. - self.add_axis(PDFAxis( - name='time', - vmin=self._livetime.time_window[0], - vmax=self._livetime.time_window[1])) + This instance of TimeFluxProfile will be altered by this PDF + class when calculating the probability density values! - # Get the total integral, I, of the time profile and the sum, S, of the - # integrals for each detector on-time interval during the time profile, - # in order to be able to rescale the time profile to unity with - # overlapping detector off-times removed. - (self._I, self._S) = self._calculate_time_profile_I_and_S() - - @property - def livetime(self): - """The instance of Livetime, which provides the detector live-time - information. """ - return self._livetime + super().__init__( + livetime=livetime, + time_flux_profile=time_flux_profile, + **kwargs) - @livetime.setter - def livetime(self, lt): - if(not isinstance(lt, Livetime)): - raise TypeError( - 'The livetime property must be an instance of Livetime!') - self._livetime = lt + self._pd = None - @property - def time_profile(self): - """The instance of TimeProfileModel providing the (assumed) physical - time profile of the source. - """ - return self._time_profile + def _calculate_pd( + self, + tdm, + params_recarray, + tl=None, + ): + """Calculates the probability density values for the given trial data + and source parameters. - @time_profile.setter - def time_profile(self, tp): - if(not isinstance(tp, TimeProfileModel)): - raise TypeError( - 'The time_profile property must be an instance of ' - 'TimeProfileModel!') - self._time_profile = tp + Parameters + ---------- + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial data. + The following data fields must exist: - def __str__(self): - """Pretty string representation of the signal time PDF. - """ - s = '%s(\n' % (classname(self)) - s += ' '*display.INDENTATION_WIDTH + \ - 'livetime = %s,\n' % (str(self._livetime)) - s += ' '*display.INDENTATION_WIDTH + \ - 'time_profile = %s\n' % (str(self._time_profile)) - s += ')' - return s - - def _calculate_time_profile_I_and_S(self): - """Calculates the total integral, I, of the time profile and the sum, A, - of the time-profile integrals during the detector on-time intervals. + ``'time'`` : float + The time of the event. + + params_recarray : instance of structured ndarray + The structured numpy ndarray of length N_sources holding the local + parameter names and values of the sources. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to measure + timing information. Returns ------- - I : float - The total integral of the source time-profile. - S : float - The sum of the source time-profile integrals during the detector - on-time intervals. + pd : instance of ndarray + The (N_values,)-shaped numpy ndarray holding the probability density + values for each trial data event and source. """ - ontime_intervals = self._livetime.get_ontime_intervals_between( - self._time_profile.t_start, self._time_profile.t_end) - I = self._time_profile.get_total_integral() - S = np.sum(self._time_profile.get_integral( - ontime_intervals[:, 0], ontime_intervals[:, 1])) - return (I, S) - - def assert_is_valid_for_exp_data(self, data_exp): - """Checks if the time PDF is valid for all the given experimental data. - It checks if the time of all events is within the defined time axis of - the PDF. + (src_idxs, evt_idxs) = tdm.src_evt_idxs + n_values = len(evt_idxs) - Parameters - ---------- - data_exp : numpy record ndarray - The array holding the experimental data. The following data fields - must exist: + pd = np.zeros((n_values,), dtype=np.float64) - - 'time' : float - The MJD time of the data event. + events_time = tdm.get_data('time') + for (src_idx, src_params_row) in enumerate(params_recarray): + params = dict(zip( + params_recarray.dtype.fields.keys(), + src_params_row)) - Raises - ------ - ValueError - If some of the data is outside the time range of the PDF. - """ - time_axis = self.get_axis('time') + # Update the time flux profile if its parameter values have changed + # and recalculate self._I and self._S if an update was actually + # performed. + updated = self._time_flux_profile.set_params(params) + if updated: + self._S = self._calculate_sum_of_ontime_time_flux_profile_integrals() + + src_m = src_idxs == src_idx + idxs = evt_idxs[src_m] + + times = events_time[idxs] + + # Get a mask of the event times which fall inside a detector on-time + # interval. + on = self._livetime.is_on(times) - if(np.any((data_exp['time'] < time_axis.vmin) | - (data_exp['time'] > time_axis.vmax))): - raise ValueError('Some data is outside the time range (%.3f, %.3f)!' % ( - time_axis.vmin, time_axis.vmax)) + pd_src = pd[src_m] + pd_src[on] = ( + self._time_flux_profile(t=times[on]) / self._S + ) + pd[src_m] = pd_src - def get_prob(self, tdm, fitparams): + return pd + + def initialize_for_new_trial( + self, + tdm, + tl=None, + **kwargs, + ): + # Check if this time PDF is not constant and does depend on any global + # floating parameters. If that's not the case we can pre-calculate the + # PDF values. + is_constant = ( + (self.param_set is None) or + (len(self.param_set.params_name_list) == 0) or + ((self.pmm is not None) and + np.all(~self.pmm.get_local_param_is_global_floating_param_mask( + self.param_set.params_name_list))) + ) + if not is_constant: + self._pd = None + return + + # At this point it has been checked that the PDF is constant and we can + # pre-calculate the PDF values. + + if self.pmm is None: + params_recarray = np.empty((tdm.n_sources,), dtype=[]) + else: + params_recarray = self.pmm.create_src_params_recarray() + + self._pd = self._calculate_pd( + tdm=tdm, + params_recarray=params_recarray, + tl=tl) + + def get_pd( + self, + tdm, + params_recarray, + tl=None, + ): """Calculates the signal time probability density of each event for the - given set of signal time fit parameter values. + given set of time parameter values for each source. Parameters ---------- @@ -409,396 +506,559 @@ def get_prob(self, tdm, fitparams): which to calculate the PDF value. The following data fields must exist: - - 'time' : float - The MJD time of the event. - fitparams : dict - The dictionary holding the signal time parameter values for which - the signal time probability density should be calculated. + ``'time'`` : float + The time of the event. + + params_recarray : instance of numpy structured ndarray + The numpy structured ndarray holding the local parameter values for + each source. + tl : instance of TimeLord | None + The optional TimeLord instance that should be used to measure + timing information. Returns ------- - pd : (N_events,)-shaped numpy ndarray - The 1D numpy ndarray with the probability density for each event. + pd : instance of numpy ndarray + The (N_values,)-shaped 1D numpy ndarray holding the probability + density value for each trial event and source. + grads : dict + The dictionary holding the gradients of the probability density + w.r.t. each global fit parameter. """ - # Update the time-profile if its fit-parameter values have changed and - # recalculate self._I and self._S if an updated was actually performed. - updated = self._time_profile.update(fitparams) - if(updated): - (self._I, self._S) = self._calculate_time_profile_I_and_S() + # Check if we have pre-calculated PDF values. + if self._pd is not None: + return (self._pd, dict()) - time = tdm.get_data('time') + pd = self._calculate_pd( + tdm=tdm, + params_recarray=params_recarray, + tl=tl) - # Get a mask of the event times which fall inside a detector on-time - # interval. - on = self._livetime.is_on(time) + return (pd, dict()) - # The sum of the on-time integrals of the time profile, A, will be zero - # if the time profile is entirly during detector off-time. - pd = np.zeros((tdm.n_selected_events,), dtype=np.float64) - if(self._S > 0): - pd[on] = self._time_profile.get_value( - time[on]) / (self._I * self._S) - return pd - +class SignalMultiDimGridPDF( + MultiDimGridPDF, + IsSignalPDF): + """This class provides a multi-dimensional signal PDF. The PDF is created + from pre-calculated PDF data on a grid. The grid data is interpolated using + a :class:`scipy.interpolate.RegularGridInterpolator` instance. + """ + + def __init__( + self, + *args, + **kwargs): + """Creates a new PDF instance for a multi-dimensional PDF given + as PDF values on a grid or as PDF values stored in a photospline table. + + See the documentation of the + :meth:`skyllh.core.pdf.MultiDimGridPDF.__init__` method for the + documentation of possible arguments. + """ + super().__init__( + *args, + **kwargs) -class SignalGaussTimePDF(TimePDF, IsSignalPDF): - def __init__(self, grl, mu, sigma, **kwargs): - """Creates a new signal time PDF instance for a given time profile of - the source. +class SignalMultiDimGridPDFSet( + IsSignalPDF, + PDFSet, + PDF, +): + """This class provides a set of MultiDimGridPDF instances that implements + also the PDF interface. + """ + + def __init__( + self, + pmm, + param_set, + param_grid_set, + gridparams_pdfs, + interpol_method_cls=None, + **kwargs): + """Creates a new MultiDimGridPDFSet instance, which holds a set of + MultiDimGridPDF instances, one for each point of a parameter grid set. Parameters ---------- - grl : ndarray - Array of the detector good run list - mu : float - Mean of the gaussian flare. - sigma : float - Sigma of the gaussian flare. + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper that defines the mapping of + the global parameters to local model parameters. + param_set : instance of Parameter | sequence of instance of Parameter | instance of ParameterSet + The set of parameters defining the parameters of this PDF. + param_grid_set : ParameterGrid instance | ParameterGridSet instance + The set of ParameterGrid instances, which define the grid values of + the model parameters, the given MultiDimGridPDF instances belong to. + gridparams_pdfs : sequence of (dict, MultiDimGridPDF) tuples + The sequence of 2-element tuples which define the mapping of grid + values to PDF instances. + interpol_method_cls : subclass of GridManifoldInterpolationMethod + The class specifying the interpolation method. This must be a + subclass of ``GridManifoldInterpolationMethod``. + If set to None, the default grid manifold interpolation method + ``Linear1DGridManifoldInterpolationMethod`` will be used. """ - super(SignalGaussTimePDF, self).__init__(**kwargs) - self.mu = mu - self.sigma = sigma - self.grl = grl + super().__init__( + pmm=pmm, + param_set=param_set, + param_grid_set=param_grid_set, + **kwargs) + if interpol_method_cls is None: + interpol_method_cls = Linear1DGridManifoldInterpolationMethod + self.interpol_method_cls = interpol_method_cls - def norm_uptime(self): - """Compute the normalization with the dataset uptime. Distributions like - scipy.stats.norm are normalized (-inf, inf). - These must be re-normalized such that the function sums to 1 over the - finite good run list domain. + # Add the given MultiDimGridPDF instances to the PDF set. + for (gridparams, pdf) in gridparams_pdfs: + self.add_pdf(pdf, gridparams) - Returns - ------- - norm : float - Normalization such that cdf sums to 1 over good run list domain - """ - cdf = scp.stats.norm(self.mu, self.sigma).cdf + # Create the interpolation method instance. + self._interpol_method = self._interpol_method_cls( + func=self._evaluate_pdfs, + param_grid_set=self.param_grid_set) - integral = (cdf(self.grl["stop"]) - cdf(self.grl["start"])).sum() + # Save the parameter names needed for the interpolation for later usage. + self._interpol_param_names =\ + self.param_grid_set.params_name_list - if np.isclose(integral, 0): - return 0 + self._cache_tdm_trial_data_state_id = None + self._cache_eventdata = None - return 1. / integral + @property + def interpol_method_cls(self): + """The class derived from GridManifoldInterpolationMethod + implementing the interpolation of the PDF grid manifold. + """ + return self._interpol_method_cls + @interpol_method_cls.setter + def interpol_method_cls(self, cls): + if not issubclass(cls, GridManifoldInterpolationMethod): + raise TypeError( + 'The interpol_method_cls property must be a sub-class of ' + 'GridManifoldInterpolationMethod!') + self._interpol_method_cls = cls - def get_prob(self, tdm, fitparams=None, tl=None): - """Calculates the signal time probability density of each event for the - given set of signal time fit parameter values. + def _get_eventdata(self, tdm, tl=None): + """Creates and caches the event data for this PDFSet. If the + TrialDataManager's trail data state id changed, the eventdata will be + recreated. Parameters ---------- tdm : instance of TrialDataManager - The instance of TrialDataManager holding the trial event data for - which to calculate the PDF value. The following data fields must - exist: - - - 'time' : float - The MJD time of the event. - - fitparams : None - Unused interface argument. - tl : TimeLord instance | None - The optional TimeLord instance to use for measuring timing - information. + The instance of TrialDataManager holding the trial data. + tl : instance of TimeLord | None + The optional instance of TimeLord to measure task timing. Returns ------- - pd : (N_events,)-shaped numpy ndarray - The 1D numpy ndarray with the probability density for each event. - grads : empty array of float - Empty, since it does not depend on any fit parameter + eventdata : instance of numpy ndarray + The (N_values,V)-shaped eventdata ndarray. """ - time = tdm.get_data('time') - - pd = scp.stats.norm.pdf(time, self.mu, self.sigma) * self.norm_uptime() - grads = np.array([], dtype=np.double) + if (self._cache_tdm_trial_data_state_id is None) or\ + (self._cache_tdm_trial_data_state_id != tdm.trial_data_state_id): - return (pd, grads) + with TaskTimer(tl, 'Create MultiDimGridPDFSet eventdata.'): + # All PDFs of this PDFSet should have the same axes, so we use + # the axes from the first PDF in this PDF set. + pdf = next(iter(self.items()))[1] + self._cache_tdm_trial_data_state_id = tdm.trial_data_state_id + self._cache_eventdata =\ + MultiDimGridPDF.create_eventdata_for_sigpdf( + tdm=tdm, + axes=pdf.axes) -class SignalBoxTimePDF(TimePDF, IsSignalPDF): + return self._cache_eventdata - def __init__(self, grl, start, end, **kwargs): - """Creates a new signal time PDF instance for a given time profile of - the source. + def _get_pdf_for_interpol_param_values( + self, + interpol_param_values): + """Retrieves the PDF for the given set of interpolation parameter + values. Parameters ---------- - grl : ndarray - Array of the detector good run list - start : float - Start time of box profile. - end : float - End time of box profile. + interpol_param_values : instance of numpy ndarray + The (N_interpol_params,)-shaped numpy ndarray holding the values of + the interpolation parameters. + + Returns + ------- + pdf : instance of MultiDimGridPDF + The requested PDF instance. """ - super(SignalBoxTimePDF, self).__init__(**kwargs) - self.start = start - self.end = end - self.grl = grl + gridparams = dict( + zip(self._interpol_param_names, interpol_param_values)) + + pdf = self.get_pdf(gridparams) + return pdf + + def _evaluate_pdfs( + self, + tdm, + eventdata, + gridparams_recarray, + n_values): + """Evaluates the PDFs for the given event data. The particular PDF is + selected based on the grid parameter values for each model. - def cdf(self, t): - """Compute the cumulative density function for the box pdf. This is needed for normalization. - Parameters ---------- - t : float, ndarray - MJD times + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial event data. + eventdata : instance of numpy ndarray + The (N_values,V)-shaped numpy ndarray holding the event data for + the PDF evaluation. + gridparams_recarray : instance of numpy structured ndarray + The numpy structured ndarray of length N_sources with the + parameter names and values needed for the interpolation on the grid + for all sources. If the length of this structured array is + 1, the set of parameters will be used for all sources. + n_values : int + The size of the output array. Returns ------- - cdf : float, ndarray - Values of cumulative density function evaluated at t + pd : instance of ndarray + The (N_values,)-shaped numpy ndarray holding the probability density + values for each event. """ - t_start = self.start - t_end = self.end - t = np.atleast_1d(t) + logger = get_logger(f'{__name__}.{classname(self)}._evaluate_pdfs') - cdf = np.zeros(t.size, float) - sample_start = self.grl["start"][0] - sample_end = self.grl["stop"][-1] + # Check for special case when a single set of parameters are provided. + if len(gridparams_recarray) == 1: + if is_tracing_enabled(): + logger.debug( + 'Get PDF for ' + f'interpol_param_values={gridparams_recarray[0]}.') + pdf = self._get_pdf_for_interpol_param_values( + interpol_param_values=gridparams_recarray[0]) - if t_start < sample_start and t_end > sample_start: - t_start = sample_start - if t_end > sample_end and t_start < sample_end: - t_end = sample_end + pd = pdf.get_pd_with_eventdata( + tdm=tdm, + params_recarray=None, + eventdata=eventdata) - # values between start and stop times - mask = (t_start <= t) & (t <= t_end) - cdf[mask] = (t[mask] - t_start) / [t_end - t_start] + return pd - # take care of values beyond stop time in sample - if t_end > sample_start: - mask = (t_end < t) - cdf[mask] = 1. + pd = np.empty(n_values, dtype=np.float64) - return cdf - + (src_idxs, evt_idxs) = tdm.src_evt_idxs - def norm_uptime(self): - """Compute the normalization with the dataset uptime. Distributions like - scipy.stats.norm are normalized (-inf, inf). - These must be re-normalized such that the function sums to 1 over the - finite good run list domain. + v_start = 0 + for (sidx, interpol_param_values) in enumerate(gridparams_recarray): + pdf = self._get_pdf_for_interpol_param_values( + interpol_param_values=interpol_param_values) - Returns - ------- - norm : float - Normalization such that cdf sums to 1 over good run list domain - """ - integral = (self.cdf(self.grl["stop"]) - self.cdf(self.grl["start"])).sum() + # Determine the events that belong to the current source. + evt_mask = src_idxs == sidx - if np.isclose(integral, 0): - return 0 + n = np.count_nonzero(evt_mask) + sl = slice(v_start, v_start+n) + pd[sl] = pdf.get_pd_with_eventdata( + tdm=tdm, + params_recarray=None, + eventdata=eventdata, + evt_mask=evt_mask) - return 1. / integral + v_start += n - - def get_prob(self, tdm, fitparams=None, tl=None): - """Calculates the signal time probability of each event for the given - set of signal time fit parameter values. + return pd + + def assert_is_valid_for_trial_data( + self, + tdm, + tl=None, + **kwargs): + """Checks if the PDFs of this PDFSet instance are valid for all the + given trial data events. + Since all PDFs should have the same axes, only the first PDF will be + checked. + + This method calls the + :meth:`~skyllh.core.pdf.PDFSet.assert_is_valid_for_trial_data` method of + the :class:`~skyllh.core.pdf.PDFSet` class. Parameters ---------- tdm : instance of TrialDataManager - The instance of TrialDataManager holding the trial event data for - which to calculate the PDF value. The following data fields must - exist: + The instance of TrialDataManager holding the trial data events. + tl : instance of TimeLord | None + The optional instance of TimeLord for measuring timing information. - - 'time' : float - The MJD time of the event. + Raises + ------ + ValueError + If some of the data is outside the axes range of the PDF. + """ + super().assert_is_valid_for_trial_data( + tdm=tdm, + tl=tl, + **kwargs) - fitparams : None - Unused interface argument. - tl : TimeLord instance | None - The optional TimeLord instance to use for measuring timing + def get_pd( + self, + tdm, + params_recarray, + tl=None): + """Calculates the probability density for each event, given the given + parameter values. + + Parameters + ---------- + tdm : instance of TrialDataManager + The instance of TrialDataManager that will be used to get the data + from the trial events. + params_recarray : instance of structured ndarray | None + The numpy record ndarray holding the parameter name and values for + each source model. + tl : instance of TimeLord | None + The optional instance of TimeLord to use for measuring timing information. Returns ------- - pd : (N_events,)-shaped numpy ndarray - The 1D numpy ndarray with the probability density for each event. - grads : empty array of float - Does not depend on fit parameter, so no gradient + pd : instance of numpy ndarray + The (N_values,)-shaped numpy ndarray holding the probability density + value for each source and event. + grads : dict + The dictionary holding the PDF gradient value for each event w.r.t. + each global fit parameter. + The key of the dictionary is the ID of the global fit parameter. + The value is the (N_values,)-shaped numpy ndarray holding the + gradient value for each event. """ - time = tdm.get_data('time') + logger = get_logger(f'{__name__}.{classname(self)}.get_pd') + + # Create the ndarray for the event data that is needed for the + # ``MultiDimGridPDF.get_pd_with_eventdata`` method. + eventdata = self._get_eventdata( + tdm=tdm, + tl=tl) + + # Get the interpolated PDF values for the arbitrary parameter values. + # The (D,N_events)-shaped grads_arr ndarray contains the gradient of the + # probability density w.r.t. each of the D parameters, which are defined + # by the param_grid_set. The order of the D gradients is the same as + # the parameter grids. + with TaskTimer( + tl, + 'Call interpolate method to get probability densities for all ' + 'events.'): + if is_tracing_enabled(): + logger.debug( + 'Call interpol_method with ' + f'params_recarray={params_recarray} of fields ' + f'{list(params_recarray.dtype.fields.keys())}.') + (pd, grads_arr) = self._interpol_method( + tdm=tdm, + eventdata=eventdata, + params_recarray=params_recarray) + + # Construct the gradients dictionary with all the fit parameters, that + # contribute to the local interpolation parameters. + grads = dict() + + tdm_n_sources = tdm.n_sources + for fitparam_id in range(self.pmm.n_global_floating_params): + grad = np.zeros((tdm.get_n_values(),), dtype=np.float64) + + # Loop through the local interpolation parameters and match them + # with the global fit parameter fitparam_id. + fitparam_id_contributes = False + for (pidx, pname) in enumerate(self._interpol_param_names): + if pname not in params_recarray.dtype.fields: + continue + p_gpidxs = params_recarray[f'{pname}:gpidx'] + src_mask = p_gpidxs == (fitparam_id + 1) + n_sources = np.count_nonzero(src_mask) + if n_sources == 0: + continue + + fitparam_id_contributes = True + + if n_sources == tdm_n_sources: + # This parameter applies to all sources, hence to all + # values, and hence it's the only local parameter + # contributing to the global parameter fitparam_id. + grad = grads_arr[pidx] + break + + # The current parameter does not apply to all sources. + # Create a values mask that matches a given source mask. + values_mask = tdm.get_values_mask_for_source_mask(src_mask) + grad[values_mask] = grads_arr[pidx][values_mask] + + if fitparam_id_contributes: + grads[fitparam_id] = grad - # Get a mask of the event times which fall inside a detector on-time - # interval. - # Gives 0 for outside the flare and 1 for inside the flare. - box_mask = np.piecewise(time, [self.start <= time, time <= self.end], [1., 1.]) + return (pd, grads) - sample_start = self.grl["start"][0] - sample_end = self.grl["stop"][-1] - t_start = self.start - t_end = self.end - # check if the whole flare lies in this dataset for normalization. - # If one part lies outside, adjust to datasample start or end time. - # For the case where everything lies outside, the pdf will be 0 by definition. - if t_start < sample_start and t_end > sample_start: - t_start = sample_start - if t_end > sample_end and t_start < sample_end: - t_end = sample_end +class SignalSHGMappedMultiDimGridPDFSet( + IsSignalPDF, + PDFSet, + PDF, +): + """This class provides a set of MultiDimGridPDF instances, one for one or + more source hypothesis groups. + """ - pd = box_mask / (t_end - t_start) * self.norm_uptime() - grads = np.array([], dtype=np.double) + def __init__( + self, + shg_mgr, + pmm, + shgidxs_pdf_list, + **kwargs, + ): + """Creates a new SignalSHGMappedMultiDimGridPDFSet instance, which holds + a set of MultiDimGridPDF instances, one for one or more source + hypothesis groups. - return (pd, grads) + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager that defines the source + hypothesis groups and their sources. + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper which defines the mapping of + global parameters to local source parameters. + shgidxs_pdf_list : sequence of (shg_idxs, MultiDimGridPDF) tuples + The sequence of 2-element tuples which define the mapping of the + source hypothesis groups to a PDF instance. + """ + super().__init__( + pmm=pmm, + param_set=None, + param_grid_set=None, + **kwargs) + if not isinstance(shg_mgr, SourceHypoGroupManager): + raise TypeError( + 'The shg_mgr argument must be an instance of ' + 'SourceHypoGroupManager! ' + f'Its current type is {classname(shg_mgr)}.') + self._shg_mgr = shg_mgr + self._shgidxs_list = [] + for (shg_idxs, pdf) in shgidxs_pdf_list: + self._shgidxs_list.append(shg_idxs) + self.add_pdf( + pdf=pdf, + gridparams={'shg_idxs': shg_idxs}) + self._cache_tdm_trial_data_state_id = None + self._cache_eventdata = None -class SignalMultiDimGridPDF(MultiDimGridPDF, IsSignalPDF): - """This class provides a multi-dimensional signal PDF. The PDF is created - from pre-calculated PDF data on a grid. The grid data is interpolated using - a :class:`scipy.interpolate.RegularGridInterpolator` instance. - """ + @property + def shg_mgr(self): + """(read-only) The instance of SourceHypoGroupManager that defines the + source hypothesis groups and their sources. + """ + return self._shg_mgr - def __init__(self, axis_binnings, path_to_pdf_splinetable=None, - pdf_grid_data=None, norm_factor_func=None): - """Creates a new signal PDF instance for a multi-dimensional PDF given - as PDF values on a grid. The grid data is interpolated with a - :class:`scipy.interpolate.RegularGridInterpolator` instance. As grid - points the bin edges of the axis binning definitions are used. + def _get_eventdata(self, tdm, tl=None): + """Creates and caches the event data for this PDFSet. If the + TrialDataManager's trail data state id changed, the eventdata will be + recreated. Parameters ---------- - axis_binnings : sequence of BinningDefinition - The sequence of BinningDefinition instances defining the binning of - the PDF axes. The name of each BinningDefinition instance defines - the event field name that should be used for querying the PDF. - path_to_pdf_splinetable : str - The path to the file containing the spline table. - The spline table contains a pre-computed fit to pdf_grid_data. - pdf_grid_data : n-dimensional numpy ndarray - The n-dimensional numpy ndarray holding the PDF values at given grid - points. The grid points must match the bin edges of the given - BinningDefinition instances of the `axis_binnings` argument. - norm_factor_func : callable | None - The function that calculates a possible required normalization - factor for the PDF value based on the event properties. - The call signature of this function - must be `__call__(pdf, events, fitparams)`, where `pdf` is this PDF - instance, `events` is a numpy record ndarray holding the events for - which to calculate the PDF values, and `fitparams` is a dictionary - with the current fit parameter names and values. + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial data. + tl : instance of TimeLord | None + The optional instance of TimeLord to measure task timing. + + Returns + ------- + eventdata : instance of numpy ndarray + The (N_values,V)-shaped eventdata ndarray. """ - super(SignalMultiDimGridPDF, self).__init__( - axis_binnings=axis_binnings, - path_to_pdf_splinetable=path_to_pdf_splinetable, - pdf_grid_data=pdf_grid_data, - norm_factor_func=norm_factor_func) + if (self._cache_tdm_trial_data_state_id is None) or\ + (self._cache_tdm_trial_data_state_id != tdm.trial_data_state_id): + with TaskTimer(tl, 'Create MultiDimGridPDFSet eventdata.'): + # All PDFs of this PDFSet should have the same axes, so we use + # the axes from the first PDF in this PDF set. + pdf = next(iter(self.items()))[1] -class SignalMultiDimGridPDFSet(MultiDimGridPDFSet, IsSignalPDF): - """This class extends the MultiDimGridPDFSet PDF class to be a signal PDF. - See the documentation of the :class:`skyllh.core.pdf.MultiDimGridPDFSet` - class for what this PDF provides. - """ + self._cache_tdm_trial_data_state_id = tdm.trial_data_state_id + self._cache_eventdata =\ + MultiDimGridPDF.create_eventdata_for_sigpdf( + tdm=tdm, + axes=pdf.axes) - def __init__(self, param_set, param_grid_set, gridparams_pdfs, - interpolmethod=None, **kwargs): - """Creates a new SignalMultiDimGridPDFSet instance, which holds a set of - MultiDimGridPDF instances, one for each point of a parameter grid set. + return self._cache_eventdata + + def get_pd( + self, + tdm, + params_recarray, + tl=None): + """Calculates the probability density for each event, given the given + parameter values. Parameters ---------- - param_set : Parameter instance | sequence of Parameter instances | - ParameterSet instance - The set of parameters defining the model parameters of this PDF. - param_grid_set : ParameterGrid instance | ParameterGridSet instance - The set of ParameterGrid instances, which define the grid values of - the model parameters, the given MultiDimGridPDF instances belong to. - gridparams_pdfs : sequence of (dict, MultiDimGridPDF) tuples - The sequence of 2-element tuples which define the mapping of grid - values to PDF instances. - interpolmethod : subclass of GridManifoldInterpolationMethod - The class specifying the interpolation method. This must be a - subclass of ``GridManifoldInterpolationMethod``. - If set to None, the default grid manifold interpolation method - ``Linear1DGridManifoldInterpolationMethod`` will be used. + tdm : instance of TrialDataManager + The instance of TrialDataManager that will be used to get the data + from the trial events. + params_recarray : instance of structured ndarray | None + The numpy record ndarray holding the parameter name and values for + each source model. + tl : instance of TimeLord | None + The optional instance of TimeLord to use for measuring timing + information. + + Returns + ------- + pd : instance of numpy ndarray + The (N_values,)-shaped numpy ndarray holding the probability density + value for each event. + grads : dict + The dictionary holding the PDF gradient value for each event w.r.t. + each global fit parameter. + The key of the dictionary is the ID of the global fit parameter. + The value is the (N_values,)-shaped numpy ndarray holding the + gradient value for each event. + By definition this PDF set does not depend on any fit parameters, + hence, this dictionary is empty. """ - super(SignalMultiDimGridPDFSet, self).__init__( - param_set=param_set, - param_grid_set=param_grid_set, - gridparams_pdfs=gridparams_pdfs, - interpolmethod=interpolmethod, - pdf_type=SignalMultiDimGridPDF, - **kwargs) + # Create the ndarray for the event data that is needed for the + # ``MultiDimGridPDF.get_pd_with_eventdata`` method. + eventdata = self._get_eventdata( + tdm=tdm, + tl=tl) + pd = np.zeros((tdm.get_n_values(),), dtype=np.float64) -class SignalMappedMultiDimGridPDFSet(MappedMultiDimGridPDFSet, IsSignalPDF): - """This class extends the MappedMultiDimGridPDFSet PDF class to be a signal - PDF. See the documentation of the - :class:`skyllh.core.pdf.MappedMultiDimGridPDFSet` class for what this PDF - provides. - """ + src_idxs = tdm.src_evt_idxs[0] + src_idxs_arr = np.arange(self._shg_mgr.n_sources) - def __init__(self, param_grid_set, gridparams_pdfs, - interpolmethod=None, **kwargs): - """Creates a new SignalMappedMultiDimGridPDFSet instance, which holds a - set of MultiDimGridPDF instances, one for each point of a parameter grid - set. + # Loop over the individual PDFs (via their key). + for shg_idxs in self._shgidxs_list: - Parameters - ---------- - param_grid_set : ParameterGrid instance | ParameterGridSet instance - The set of ParameterGrid instances, which define the grid values of - the model parameters, the given MultiDimGridPDF instances belong to. - gridparams_pdfs : sequence of (dict, MultiDimGridPDF) tuples - The sequence of 2-element tuples which define the mapping of grid - values to PDF instances. - """ - super(SignalMappedMultiDimGridPDFSet, self).__init__( - param_grid_set=param_grid_set, - gridparams_pdfs=gridparams_pdfs, - pdf_type=SignalMultiDimGridPDF, - **kwargs) + src_mask = np.zeros((self._shg_mgr.n_sources,), dtype=np.bool_) + for shg_idx in shg_idxs: + src_mask |= self._shg_mgr.get_src_mask_of_shg(shg_idx) + pdf_src_idxs = src_idxs_arr[src_mask] + values_mask = np.isin(src_idxs, pdf_src_idxs) + pdf_key = self.make_key({'shg_idxs': shg_idxs}) + pdf = self.get_pdf(pdf_key) -class SignalNDPhotosplinePDF(NDPhotosplinePDF, IsSignalPDF): - """This class provides a multi-dimensional signal PDF created from a - n-dimensional photospline fit. The photospline package is used to evaluate - the PDF fit. - """ + with TaskTimer(tl, f'Get PD values for PDF of SHG {shg_idx}.'): + pd_pdf = pdf.get_pd_with_eventdata( + tdm=tdm, + params_recarray=params_recarray, + eventdata=eventdata, + evt_mask=values_mask) - def __init__( - self, - axis_binnings, - param_set, - path_to_pdf_splinefit, - norm_factor_func=None): - """Creates a new signal PDF instance for a n-dimensional photospline PDF - fit. + pd[values_mask] = pd_pdf - Parameters - ---------- - axis_binnings : BinningDefinition | sequence of BinningDefinition - The sequence of BinningDefinition instances defining the binning of - the PDF axes. The name of each BinningDefinition instance defines - the event field name that should be used for querying the PDF. - param_set : Parameter | ParameterSet - The Parameter instance or ParameterSet instance defining the - parameters of this PDF. The ParameterSet holds the information - which parameters are fixed and which are floating (i.e. fitted). - path_to_pdf_splinefit : str - The path to the file containing the photospline fit. - norm_factor_func : callable | None - The function that calculates a possible required normalization - factor for the PDF value based on the event properties. - The call signature of this function must be - `__call__(pdf, tdm, params)`, where `pdf` is this PDF - instance, `tdm` is an instance of TrialDataManager holding the - event data for which to calculate the PDF values, and `params` is a - dictionary with the current parameter names and values. - """ - super(SignalNDPhotosplinePDF, self).__init__( - axis_binnings=axis_binnings, - param_set=param_set, - path_to_pdf_splinefit=path_to_pdf_splinefit, - norm_factor_func=norm_factor_func - ) + return (pd, dict()) diff --git a/skyllh/core/smoothing.py b/skyllh/core/smoothing.py index a855e78e7f..fbf6046bc1 100644 --- a/skyllh/core/smoothing.py +++ b/skyllh/core/smoothing.py @@ -12,12 +12,16 @@ # unsmooth, i.e. no smoothing should be applied along that axis. UNSMOOTH_AXIS = np.ones(1) -class HistSmoothingMethod(object, metaclass=abc.ABCMeta): + +class HistSmoothingMethod( + object, + metaclass=abc.ABCMeta, +): """Abstract base class for implementing a histogram smoothing method. """ - def __init__(self): - super(HistSmoothingMethod, self).__init__() + def __init__(self, **kwargs): + super().__init__(**kwargs) @abc.abstractmethod def smooth(self, h): @@ -36,11 +40,13 @@ def smooth(self, h): pass -class NoHistSmoothingMethod(HistSmoothingMethod): +class NoHistSmoothingMethod( + HistSmoothingMethod, +): """This class implements a no-shoothing histogram method. """ - def __init__(self): - super(NoHistSmoothingMethod, self).__init__() + def __init__(self, **kwargs): + super().__init__(**kwargs) def smooth(self, h): """Does not perform any smoothing and just returns the input histogram. @@ -58,10 +64,17 @@ def smooth(self, h): return h -class NeighboringBinHistSmoothingMethod(HistSmoothingMethod): - """This class implements +class NeighboringBinHistSmoothingMethod( + HistSmoothingMethod, +): + """This class implements a smoothing algorithm that smoothes a histogram + based on the neighboring bins. """ - def __init__(self, axis_kernel_arrays): + def __init__( + self, + axis_kernel_arrays, + **kwargs, + ): """Constructs a new neighboring bin histogram smoothing method. Parameters @@ -71,17 +84,20 @@ def __init__(self, axis_kernel_arrays): axis should not get smoothed, the UNSMOOTH_AXIS constant should be used for that axis' smoothing kernel array. """ - super(NeighboringBinHistSmoothingMethod, self).__init__() + super().__init__(**kwargs) - if(not issequenceof(axis_kernel_arrays, np.ndarray)): - raise TypeError('The axis_kernel_arrays argument must be a sequence of numpy.ndarray instances!') + if not issequenceof(axis_kernel_arrays, np.ndarray): + raise TypeError( + 'The axis_kernel_arrays argument must be a sequence of ' + 'numpy.ndarray instances!') self._ndim = len(axis_kernel_arrays) # Construct the smoothing kernel k used by the smooth method. # k is a N-dimensional ndarray. It defines which neighboring bin values # of the histogram will contribute how much to the central bin value. - self._k = np.product(np.meshgrid(*axis_kernel_arrays, indexing='ij'), axis=0) + self._k = np.product( + np.meshgrid(*axis_kernel_arrays, indexing='ij'), axis=0) @property def ndim(self): @@ -91,7 +107,8 @@ def ndim(self): return self._ndim def smooth(self, h): - """Smoothes the given histogram array h with the internal kernel array k. Both arrays must have the same dimensionality. The shape + """Smoothes the given histogram array h with the internal kernel array + k. Both arrays must have the same dimensionality. The shape values of k must be smaller than or equal to the shape values of h. Parameters @@ -103,11 +120,18 @@ def smooth(self, h): ------- smoothed_h : N-dimensional ndarray. """ - if(h.ndim != self._ndim): - raise ValueError('The ndarrays of argument h and k must have the same dimensionality! Currently they are %d and %d, respectively.'%(h.ndim, self._ndim)) + if h.ndim != self._ndim: + raise ValueError( + 'The ndarrays of argument h and k must have the same ' + f'dimensionality! Currently they are {h.ndim:d} and ' + f'{self._ndim:d}, respectively.') for d in range(h.ndim): - if(self._k.shape[d] > h.shape[d]): - raise ValueError('The shape value (%d) of dimension %d of ndarray k must be smaller than or equal to the shape value (%d) of dimension %d of ndarray h!'%(self._k.shape[d], d, h.shape[d], d)) + if self._k.shape[d] > h.shape[d]: + raise ValueError( + f'The shape value ({self._k.shape[d]:d}) of dimension ' + f'{d:d} of ndarray k must be smaller than or equal to the ' + f'shape value ({h.shape[d]:d}) of dimension {d:d} of ' + 'ndarray h!') norm = scipy.signal.convolve(np.ones_like(h), self._k, mode="same") smoothed_h = scipy.signal.convolve(h, self._k, mode="same") / norm @@ -115,13 +139,17 @@ def smooth(self, h): return smoothed_h -class SmoothingFilter(object): +class SmoothingFilter( + object): """This class provides a base class for a histogram smoothing filter. It provides an axis kernel array that defines how many neighboring bins of a histogram bin should be used to smooth that histogram bin. """ - def __init__(self, axis_kernel_array): - super(SmoothingFilter, self).__init__() + def __init__( + self, + axis_kernel_array, + **kwargs): + super().__init__(**kwargs) self.axis_kernel_array = axis_kernel_array @@ -130,57 +158,78 @@ def axis_kernel_array(self): """The kernel array for a histogram axis. """ return self._axis_kernel_array + @axis_kernel_array.setter def axis_kernel_array(self, arr): - if(not isinstance(arr, np.ndarray)): - raise TypeError('The axis_kernel_array property must be an instance of numpy.ndarray!') + if not isinstance(arr, np.ndarray): + raise TypeError( + 'The axis_kernel_array property must be an instance of ' + 'numpy.ndarray!') self._axis_kernel_array = arr -class BlockSmoothingFilter(SmoothingFilter): +class BlockSmoothingFilter( + SmoothingFilter, +): """This class defines the histogram smoothing filter for smoothing a histogram via a block kernel function. The half-width of that block is specified via the nbins argument. """ - def __init__(self, nbins): - """ + def __init__( + self, + nbins, + **kwargs + ): + """Creates a new BlockSmoothingFilter instance. + Parameters ---------- nbins : int The number of neighboring bins into one direction of a histogram bin, which should be used to smooth that histogram bin. """ - if(not isinstance(nbins, int)): - raise TypeError('The nbins argument must be of type int!') - if(nbins <= 0): - raise ValueError('The nbins argument must be greater zero!') + if not isinstance(nbins, int): + raise TypeError( + 'The nbins argument must be of type int!') + if nbins <= 0: + raise ValueError( + 'The nbins argument must be greater zero!') arr = np.ones(2*nbins + 1, dtype=np.float64) - super(BlockSmoothingFilter, self).__init__(arr) + super().__init__(arr, **kwargs) -class GaussianSmoothingFilter(SmoothingFilter): +class GaussianSmoothingFilter( + SmoothingFilter, +): """This class defines the histogram smoothing filter for smoothing a histogram via a Gaussian kernel function. The width of that Gaussian is approximately one standard deviation, spread over nbins on each side of the central histogram bin. """ - def __init__(self, nbins): - """ + def __init__( + self, + nbins, + **kwargs, + ): + """Creates a new GaussianSmoothingFilter instance. + Parameters ---------- nbins : int The number of neighboring bins into one direction of a histogram bin, which should be used to smooth that histogram bin. """ - if(not isinstance(nbins, int)): - raise TypeError('The nbins argument must be of type int!') - if(nbins <= 0): - raise ValueError('The nbins argument must be greater zero!') + if not isinstance(nbins, int): + raise TypeError( + 'The nbins argument must be of type int!') + if nbins <= 0: + raise ValueError( + 'The nbins argument must be greater zero!') val = 1.6635 r = np.linspace(-val, val, 2*nbins + 1) arr = scipy.stats.norm.pdf(r) - super(GaussianSmoothingFilter, self).__init__(arr) + super().__init__(arr, **kwargs) diff --git a/skyllh/core/source_hypo_group.py b/skyllh/core/source_hypo_group.py deleted file mode 100644 index 966769f903..0000000000 --- a/skyllh/core/source_hypo_group.py +++ /dev/null @@ -1,137 +0,0 @@ -# -*- coding: utf-8 -*- -import numpy as np - -from skyllh.core.py import issequenceof -from skyllh.core.detsigyield import DetSigYieldImplMethod -from skyllh.core.signal_generation import SignalGenerationMethod -from skyllh.physics.source import SourceModel -from skyllh.physics.flux import FluxModel - - -class SourceHypoGroup(object): - """The source hypothesis group class provides a data container to describe - a group of sources that share the same flux model, detector signal yield, - and signal generation implementation methods. - """ - def __init__( - self, sources, fluxmodel, detsigyield_implmethods, - sig_gen_method=None, source_weights=None): - """Constructs a new source hypothesis group. - - Parameters - ---------- - sources : SourceModel | sequence of SourceModel - The source or sequence of sources that define the source group. - fluxmodel : instance of FluxModel - The FluxModel instance that applies to the list of sources of the - group. - detsigyield_implmethods : sequence of DetSigYieldImplMethod instances - The sequence of detector signal yield implementation method - instances, which should be used to create the detector signal - yield for the sources of this group. Each element is the - detector signal yield implementation method for the particular - dataset, if several datasets are used. If this list contains only - one implementation method, it should be used for all datasets. - sig_gen_method : SignalGenerationMethod instance | None - The instance of SignalGenerationMethod that implements the signal - generation for the specific detector and source hypothesis. It can - be set to None, which means, no signal can be generated. Useful for - data unblinding and data trial generation, where no signal is - required. - source_weights : float | sequence of floats | None - The sequence of relative source weights, normalized to 1. - """ - self.source_list = sources - self.fluxmodel = fluxmodel - self.detsigyield_implmethod_list = detsigyield_implmethods - self.sig_gen_method = sig_gen_method - self.source_weights = source_weights - - @property - def source_list(self): - """The list of SourceModel instances for which the group is defined. - """ - return self._source_list - @source_list.setter - def source_list(self, sources): - if(isinstance(sources, SourceModel)): - sources = [ sources ] - if(not issequenceof(sources, SourceModel)): - raise TypeError('The source_list property must be an instance of SourceModel or a sequence of SourceModel instances!') - self._source_list = list(sources) - - @property - def fluxmodel(self): - """The FluxModel instance that applies to the list of sources of this - source group. - """ - return self._fluxmodel - @fluxmodel.setter - def fluxmodel(self, fluxmodel): - if(not isinstance(fluxmodel, FluxModel)): - raise TypeError('The fluxmodel property must be an instance of ' - 'FluxModel!') - self._fluxmodel = fluxmodel - - @property - def detsigyield_implmethod_list(self): - """The list of DetSigYieldImplMethod instances, which should be used to - create the detector signal yield for this group of sources. Each - element is the detector signal yield implementation method for - the particular dataset, if several datasets are used. If this list - contains only one implementation method, it should be used for all - datasets. - """ - return self._detsigyield_implmethod_list - @detsigyield_implmethod_list.setter - def detsigyield_implmethod_list(self, methods): - if(isinstance(methods, DetSigYieldImplMethod)): - methods = [ methods ] - if(not issequenceof(methods, DetSigYieldImplMethod)): - raise TypeError('The detsigyield_implmethod_list property must be ' - 'a sequence of DetSigYieldImplMethod instances!') - self._detsigyield_implmethod_list = methods - - @property - def sig_gen_method(self): - """The instance of SignalGenerationMethod that implements the signal - generation for the specific detector and source hypothesis. It can - be None, which means, no signal can be generated. Useful for - data unblinding and data trial generation, where no signal is - required. - """ - return self._sig_gen_method - @sig_gen_method.setter - def sig_gen_method(self, method): - if(method is not None): - if(not isinstance(method, SignalGenerationMethod)): - raise TypeError('The sig_gen_method property must be an ' - 'instance of SignalGenerationMethod!') - self._sig_gen_method = method - - @property - def source_weights(self): - """The 1d array of relative source weights. - """ - return self._source_weights - @source_weights.setter - def source_weights(self, source_weights): - if(source_weights is None): - self._source_weights = source_weights - else: - if(isinstance(source_weights, (int, float))): - source_weights = [source_weights] - if(not issequenceof(source_weights, (int, float))): - raise TypeError( - 'The source_weights property must be a sequence of floats!') - if not(1.0 - 1e-3 <= np.sum(source_weights) <= 1.0 + 1e-3): - raise ValueError( - 'The sum of source_weights has to be equal to 1!') - self._source_weights = np.array(source_weights) - - @property - def n_sources(self): - """(read-only) The number of sources within this source hypothesis - group. - """ - return len(self._source_list) diff --git a/skyllh/core/source_hypo_grouping.py b/skyllh/core/source_hypo_grouping.py new file mode 100644 index 0000000000..bf30d75f33 --- /dev/null +++ b/skyllh/core/source_hypo_grouping.py @@ -0,0 +1,425 @@ +# -*- coding: utf-8 -*- + +"""This module contains classes for source hypothesis grouping functionalities. +Same kind sources can be grouped to allow more efficient calculations in the +analysis. +""" + +import numpy as np + +from skyllh.core.display import ( + add_leading_text_line_padding, + INDENTATION_WIDTH, +) +from skyllh.core.py import ( + classname, + issequenceof, +) +from skyllh.core.detsigyield import ( + DetSigYieldBuilder, +) +from skyllh.core.flux_model import ( + FluxModel, +) +from skyllh.core.signal_generation import ( + SignalGenerationMethod, +) +from skyllh.core.source_model import ( + SourceModel, +) +from skyllh.core.types import ( + SourceHypoGroup_t, +) + + +class SourceHypoGroup( + SourceHypoGroup_t): + """The source hypothesis group class provides a data container to describe + a group of sources that share the same flux model, detector signal yield, + and signal generation methods. + """ + + def __init__( + self, + sources, + fluxmodel, + detsigyield_builders, + sig_gen_method=None, + **kwargs): + """Constructs a new source hypothesis group. + + Parameters + ---------- + sources : SourceModel | sequence of SourceModel + The source or sequence of sources that define the source group. + fluxmodel : instance of FluxModel + The FluxModel instance that applies to the list of sources of the + group. + detsigyield_builders : sequence of DetSigYieldBuilder instances + The sequence of detector signal yield builder instances, + which should be used to create the detector signal + yield for the sources of this group. Each element is the + detector signal yield builder for the particular dataset, if + several datasets are used. If this list contains only one builder, + it should be used for all datasets. + sig_gen_method : SignalGenerationMethod instance | None + The instance of SignalGenerationMethod that implements the signal + generation for the specific detector and source hypothesis. It can + be set to None, which means, no signal can be generated. Useful for + data unblinding and data trial generation, where no signal is + required. + """ + self.source_list = sources + self.fluxmodel = fluxmodel + self.detsigyield_builder_list = detsigyield_builders + self.sig_gen_method = sig_gen_method + + @property + def source_list(self): + """The list of SourceModel instances for which the group is defined. + """ + return self._source_list + + @source_list.setter + def source_list(self, sources): + if isinstance(sources, SourceModel): + sources = [sources] + if not issequenceof(sources, SourceModel): + raise TypeError( + 'The source_list property must be an instance of SourceModel ' + 'or a sequence of SourceModel instances! ' + f'Its current type is {classname(sources)}.') + self._source_list = list(sources) + + @property + def fluxmodel(self): + """The FluxModel instance that applies to the list of sources of this + source group. + """ + return self._fluxmodel + + @fluxmodel.setter + def fluxmodel(self, fluxmodel): + if not isinstance(fluxmodel, FluxModel): + raise TypeError( + 'The fluxmodel property must be an instance of FluxModel! ' + f'Its current type is {classname(fluxmodel)}.') + self._fluxmodel = fluxmodel + + @property + def detsigyield_builder_list(self): + """The list of DetSigYieldBuilder instances, which should be used to + create the detector signal yield for this group of sources. Each + element is the detector signal yield builder for the particular dataset, + if several datasets are used. If this list contains only one builder, + it should be used for all datasets. + """ + return self._detsigyield_builder_list + + @detsigyield_builder_list.setter + def detsigyield_builder_list(self, builders): + if isinstance(builders, DetSigYieldBuilder): + builders = [builders] + if not issequenceof(builders, DetSigYieldBuilder): + raise TypeError( + 'The detsigyield_builder_list property must be a sequence of ' + 'DetSigYieldBuilder instances!') + self._detsigyield_builder_list = builders + + @property + def sig_gen_method(self): + """The instance of SignalGenerationMethod that implements the signal + generation for the specific detector and source hypothesis. It can + be None, which means, no signal can be generated. Useful for + data unblinding and data trial generation, where no signal is + required. + """ + return self._sig_gen_method + + @sig_gen_method.setter + def sig_gen_method(self, method): + if method is not None: + if not isinstance(method, SignalGenerationMethod): + raise TypeError( + 'The sig_gen_method property must be an instance of ' + 'SignalGenerationMethod! ' + f'Its current type is {classname(method)}.') + self._sig_gen_method = method + + @property + def n_sources(self): + """(read-only) The number of sources within this source hypothesis + group. + """ + return len(self._source_list) + + def __str__(self): + """Pretty string representation of this SourceHypoGroup instance. + """ + s = f'{classname(self)}:\n' + + s1 = f'sources ({len(self._source_list)}):' + for (idx, source) in enumerate(self._source_list): + s1 += '\n' + s2 = f'{idx}: {source}' + s1 += add_leading_text_line_padding(INDENTATION_WIDTH, s2) + s1 += '\n' + s1 += 'fluxmodel:\n' + s2 = f'{self._fluxmodel}' + s1 += add_leading_text_line_padding(INDENTATION_WIDTH, s2) + s1 += '\n' + s1 += f'detector signal yield builders ({len(self._detsigyield_builder_list)}):\n' + s2 = '\n'.join((classname(builder) for builder in self._detsigyield_builder_list)) + s1 += add_leading_text_line_padding(INDENTATION_WIDTH, s2) + s1 += '\n' + s1 += 'signal generation method:\n' + s2 = f'{classname(self._sig_gen_method)}' + s1 += add_leading_text_line_padding(INDENTATION_WIDTH, s2) + + s += add_leading_text_line_padding(INDENTATION_WIDTH, s1) + + return s + + def get_source_weights(self): + """Gets the weight from each source of this source hypothesis group. + + Returns + ------- + weights : numpy ndarray | None + The (N_sources,)-shaped numpy ndarray holding the theoretical + weight of each source. + It is ``None`` if any of the individual source weights is None. + """ + weights = [] + for src in self._source_list: + if src.weight is None: + return None + weights.append(src.weight) + + return np.array(weights) + + +class SourceHypoGroupManager( + object): + """The source hypothesis group manager provides the functionality to group + sources of the same source hypothesis, i.e. spatial model and flux model, + with an assigned detector signal yield implementation method. + + This helps to evaluate the log-likelihood ratio function in an efficient + way. + """ + def __init__( + self, + src_hypo_groups=None, + **kwargs): + """Creates a new source hypothesis group manager instance. + + Parameters + ---------- + src_hypo_groups : SourceHypoGroup instance | + sequence of SourceHypoGroup instances | None + The SourceHypoGroup instances to initialize the manager with. + """ + super().__init__(**kwargs) + + self._shg_list = list() + # Define a 2D numpy array of shape (N_sources,2) that maps the source + # index (0 to N_sources-1) to the index of the group and the source + # index within the group for fast access. + self._sidx_to_gidx_gsidx_map_arr = np.empty((0, 2), dtype=np.int32) + + # Add source hypo groups if specified. + if src_hypo_groups is not None: + if isinstance(src_hypo_groups, SourceHypoGroup): + src_hypo_groups = [src_hypo_groups] + if not issequenceof(src_hypo_groups, SourceHypoGroup): + raise TypeError( + 'The src_hypo_groups argument must be an instance of ' + 'SourceHypoGroup, or a sequence of SourceHypoGroup ' + 'instances!') + for shg in src_hypo_groups: + self._shg_list.append(shg) + self._extend_sidx_to_gidx_gsidx_map_arr(shg) + + @property + def source_list(self): + """The list of defined SourceModel instances. + """ + source_list = [] + for shg in self._shg_list: + source_list += shg.source_list + return source_list + + @property + def n_sources(self): + """(read-only) The total number of sources defined in all source groups. + """ + return self._sidx_to_gidx_gsidx_map_arr.shape[0] + + @property + def n_src_hypo_groups(self): + """DEPRICATED: Use n_shgs instead. + The number of defined source hypothesis groups. + """ + return len(self._shg_list) + + @property + def n_shgs(self): + """The number of defined source hypothesis groups. + """ + return len(self._shg_list) + + @property + def shg_list(self): + """(read-only) The list of source hypothesis groups, i.e. + SourceHypoGroup instances. + """ + return self._shg_list + + def __str__(self): + """Pretty string representation of this SourceHypoGroupManager. + """ + s = f'{classname(self)}\n' + + s1 = 'Source Hypothesis Groups:\n' + s1 += '=========================' + for (idx, shg) in enumerate(self._shg_list): + s1 += '\n' + s1 += add_leading_text_line_padding(INDENTATION_WIDTH, f'{idx}: {shg}') + + s += add_leading_text_line_padding(INDENTATION_WIDTH, s1) + + return s + + def _extend_sidx_to_gidx_gsidx_map_arr(self, shg): + """Extends the source index to (group index, group source index) map + array by one source hypo group. + + Parameters + ---------- + shg : SourceHypoGroup instance + The SourceHypoGroup instance for which the map array should get + extented. + """ + arr = np.empty((shg.n_sources, 2), dtype=np.int32) + arr[:, 0] = self.n_src_hypo_groups-1 # Group index. + arr[:, 1] = np.arange(shg.n_sources) # Group source index. + self._sidx_to_gidx_gsidx_map_arr = np.vstack( + (self._sidx_to_gidx_gsidx_map_arr, arr)) + + def create_source_hypo_group( + self, + sources, + fluxmodel, + detsigyield_builders, + sig_gen_method=None): + """Creates and adds a source hypothesis group to this source hypothesis + group manager. A source hypothesis group shares sources of the same + source model with the same flux model and hence the same detector signal + yield and signal generation methods. + + Parameters + ---------- + sources : SourceModel | sequence of SourceModel + The source or sequence of sources that define the source group. + fluxmodel : instance of FluxModel + The FluxModel instance that applies to the list of sources of the + group. + detsigyield_builders : sequence of DetSigYieldBuilder instances + The sequence of detector signal yield builder instances, + which should be used to create the detector signal + yield for the sources of this group. Each element is the + detector signal yield builder for the particular dataset, if + several datasets are used. If this list contains only one builder, + it should be used for all datasets. + sig_gen_method : instance of SignalGenerationMethod | None + The SignalGenerationMethod instance that implements the detector + and source hypothesis specific signal generation. + It can be set to None which means no signal can be generated. + """ + # Create the source group. + group = SourceHypoGroup( + sources=sources, + fluxmodel=fluxmodel, + detsigyield_builders=detsigyield_builders, + sig_gen_method=sig_gen_method) + + # Add the group. + self._shg_list.append(group) + + # Extend the source index to (group index, group source index) map + # array. + self._extend_sidx_to_gidx_gsidx_map_arr(group) + + def get_fluxmodel_by_src_idx(self, src_idx): + """Retrieves the FluxModel instance for the source specified by its + source index. + + Parameters + ---------- + src_idx : int + The index of the source, which must be in the range + [0, N_sources-1]. + + Returns + ------- + fluxmodel : instance of FluxModel + The FluxModel instance that applies to the specified source. + """ + gidx = self._sidx_to_gidx_gsidx_map_arr[src_idx, 0] + return self._shg_list[gidx]._fluxmodel + + def get_detsigyield_builder_list_by_src_idx(self, src_idx): + """Retrieves the list of DetSigYieldBuilder instances for the source + specified by its source index. + + Parameters + ---------- + src_idx : int + The index of the source, which must be in the range + [0, N_sources-1]. + + Returns + ------- + detsigyield_builder_list : list of DetSigYieldBuilder instances + The list of DetSigYieldBuilder instances that apply to the + specified source. + """ + gidx = self._sidx_to_gidx_gsidx_map_arr[src_idx, 0] + return self._shg_list[gidx]._detsigyield_builder_list + + def get_src_mask_of_shg(self, shg_idx): + """Creates a source mask for the sources of the ``shg_idx`` th source + hypothesis group. + + Parameters + ---------- + shg_idx : int + The index of the source hypothesis group. + + Returns + ------- + src_mask : instance of numpy ndarray + The (N_sources,)-shaped numpy ndarray of bool holding the mask for + selecting the sources of the given source hypothesis group. + """ + return (self._sidx_to_gidx_gsidx_map_arr[:, 0] == shg_idx) + + def get_src_idxs_of_shg(self, shg_idx): + """Creates an array of indices of sources that belong to the given + source hypothesis group. + + Parameters + ---------- + shg_idx : int + The index of the source hypothesis group. + + Returns + ------- + src_idxs : instance of numpy ndarray + The numpy ndarray of int holding the indices of the sources that + belong to the given source hypothesis group. + """ + src_idxs = np.arange(self.n_sources)[self.get_src_mask_of_shg(shg_idx)] + + return src_idxs diff --git a/skyllh/core/source_hypothesis.py b/skyllh/core/source_hypothesis.py deleted file mode 100644 index fd8a7f03f7..0000000000 --- a/skyllh/core/source_hypothesis.py +++ /dev/null @@ -1,191 +0,0 @@ -# -*- coding: utf-8 -*- - -"""The source_hypothesis module provides classes to define groups of source -hypotheses. The SourceHypoGroupManager manages the groups of source hypotheses. -""" - -import numpy as np - -from skyllh.core.parameters import make_params_hash -from skyllh.core.py import issequenceof -from skyllh.core.source_hypo_group import SourceHypoGroup - - -class SourceHypoGroupManager(object): - """The source hypothesis group manager provides the functionality to group - sources of the same source hypothesis, i.e. spatial model and flux model, - with an assign detector signal efficiency implementation method. - - This helps to evaluate the log-likelihood ratio function in an efficient - way. - """ - def __init__(self, src_hypo_groups=None): - """Creates a new source hypothesis group manager instance. - - Parameters - ---------- - src_hypo_groups : SourceHypoGroup instance | - sequence of SourceHypoGroup instances | None - The SourceHypoGroup instances to initialize the manager with. - """ - super(SourceHypoGroupManager, self).__init__() - - self._src_hypo_group_list = list() - # Define a 2D numpy array of shape (N_sources,2) that maps the source - # index (0 to N_sources-1) to the index of the group and the source - # index within the group for fast access. - self._sidx_to_gidx_gsidx_map_arr = np.empty((0,2), dtype=np.int64) - - # Add source hypo groups if specified. - if(src_hypo_groups is not None): - if(isinstance(src_hypo_groups, SourceHypoGroup)): - src_hypo_groups = [ src_hypo_groups ] - if(not issequenceof(src_hypo_groups, SourceHypoGroup)): - raise TypeError('The src_hypo_groups argument must be an ' - 'instance of SourceHypoGroup, or a sequence of ' - 'SourceHypoGroup instances!') - for shg in src_hypo_groups: - self._src_hypo_group_list.append(shg) - self._extend_sidx_to_gidx_gsidx_map_arr(shg) - - @property - def source_list(self): - """The list of defined SourceModel instances. - """ - source_list = [] - for group in self._src_hypo_group_list: - source_list += group.source_list - return source_list - - @property - def n_sources(self): - """(read-only) The total number of sources defined in all source groups. - """ - return self._sidx_to_gidx_gsidx_map_arr.shape[0] - - @property - def n_src_hypo_groups(self): - """The number of defined source hypothesis groups. - """ - return len(self._src_hypo_group_list) - - @property - def src_hypo_group_list(self): - """(read-only) The list of source hypothesis groups, i.e. - SourceHypoGroup instances. - """ - return self._src_hypo_group_list - - def _extend_sidx_to_gidx_gsidx_map_arr(self, shg): - """Extends the source index to (group index, group source index) map - array by one source hypo group. - - Parameters - ---------- - shg : SourceHypoGroup instance - The SourceHypoGroup instance for which the map array should get - extented. - """ - arr = np.empty((shg.n_sources,2), dtype=np.int64) - arr[:,0] = self.n_src_hypo_groups-1 # Group index. - arr[:,1] = np.arange(shg.n_sources) # Group source index. - self._sidx_to_gidx_gsidx_map_arr = np.vstack( - (self._sidx_to_gidx_gsidx_map_arr, arr)) - - def add_source_hypo_group( - self, sources, fluxmodel, detsigyield_implmethods, sig_gen_method=None - ): - """Adds a source hypothesis group to the source hypothesis group - manager. A source hypothesis group share sources of the same source - model with the same flux model and hence the same detector signal - yield and signal generation implementation methods. - - Parameters - ---------- - sources : SourceModel | sequence of SourceModel - The source or sequence of sources that define the source group. - fluxmodel : instance of FluxModel - The FluxModel instance that applies to the list of sources of the - group. - detsigyield_implmethods : sequence of DetSigYieldImplMethod instances - The sequence of detector signal yield implementation method - instances, which should be used to create the detector signal - yield for the sources of the group. Each element is the - detector signal yield implementation method for the particular - dataset, if several datasets are used. If this list contains only - one implementation method, it should be used for all datasets. - sig_gen_method : instance of SignalGenerationMethod | None - The SignalGenerationMethod instance that implements the detector - and source hypothesis specific signal generation. - It can be set to None which means no signal can be generated. - """ - # Create the source group. - group = SourceHypoGroup(sources, fluxmodel, detsigyield_implmethods, sig_gen_method) - - # Add the group. - self._src_hypo_group_list.append(group) - - # Extend the source index to (group index, group source index) map - # array. - self._extend_sidx_to_gidx_gsidx_map_arr(group) - - def get_fluxmodel_by_src_idx(self, src_idx): - """Retrieves the FluxModel instance for the source specified by its - source index. - - Parameters - ---------- - src_idx : int - The index of the source, which must be in the range - [0, N_sources-1]. - - Returns - ------- - fluxmodel : instance of FluxModel - The FluxModel instance that applies to the specified source. - """ - gidx = self._sidx_to_gidx_gsidx_map_arr[src_idx,0] - return self._src_hypo_group_list[gidx]._fluxmodel - - def get_detsigyield_implmethod_list_by_src_idx(self, src_idx): - """Retrieves the list of DetSigYieldImplMethod instances for the source - specified by its source index. - - Parameters - ---------- - src_idx : int - The index of the source, which must be in the range - [0, N_sources-1]. - - Returns - ------- - detsigyield_implmethod_list : list of DetSigYieldImplMethod instances - The list of DetSigYieldImplMethod instances that apply to the - specified source. - """ - gidx = self._sidx_to_gidx_gsidx_map_arr[src_idx,0] - return self._src_hypo_group_list[gidx]._detsigyield_implmethod_list - - def get_fluxmodel_to_source_mapping(self): - """Returns the list of tuples mapping fluxmodel to the source indices. - - Returns - ------- - fluxmodel_to_source_mapping : list of (hash, src_index_array) tuples - The list that maps hash of the source hypothesis fluxmodel to - the corresponding source indices array in the source hypothesis - group. - """ - fluxmodel_to_source_mapping = [] - n_sources_offset = 0 - for shg in self._src_hypo_group_list: - # Mapping tuple. - fluxmodel_to_source_mapping.append( - ( - make_params_hash({'fluxmodel': shg.fluxmodel}), - n_sources_offset + np.arange(shg.n_sources) - ) - ) - n_sources_offset += shg.n_sources - - return fluxmodel_to_source_mapping diff --git a/skyllh/core/source_model.py b/skyllh/core/source_model.py new file mode 100644 index 0000000000..cba9b285c7 --- /dev/null +++ b/skyllh/core/source_model.py @@ -0,0 +1,326 @@ +# -*- coding: utf-8 -*- + +"""The :mod:`~skyllh.core.model` module contains the base class ``SourceModel`` +for modelling a source in the sky. What kind of properties this source has is +modeled by a derived class. The most common one is the PointLikeSource source +model for a point-like source at a given location in the sky. +""" + +import numpy as np + +from skyllh.core.model import ( + Model, + ModelCollection, +) +from skyllh.core.py import ( + classname, + float_cast, + issequenceof, + str_cast, + typename, +) + + +class SourceModel( + Model, +): + """The base class for all source models in SkyLLH. A source can have a + relative weight w.r.t. other sources. + """ + def __init__( + self, + name=None, + classification=None, + weight=None, + **kwargs): + """Creates a new source model instance. + + Parameters + ---------- + name : str | None + The name of the source model. + classification : str | None + The astronomical classification of the source. + weight : float | None + The relative weight of the source w.r.t. other sources. + If set to None, unity will be used. + """ + super().__init__( + name=name, + **kwargs) + + self.classification = classification + self.weight = weight + + @property + def classification(self): + """The astronomical classification of the source. + """ + return self._classification + + @classification.setter + def classification(self, c): + self._classification = str_cast( + c, + 'The classification property must be castable to type str!', + allow_None=True) + + @property + def weight(self): + """The weight of the source. + """ + return self._weight + + @weight.setter + def weight(self, w): + if w is None: + w = 1. + w = float_cast( + w, + 'The weight property must be castable to type float!') + self._weight = w + + +class SourceModelCollection( + ModelCollection, +): + """This class describes a collection of source models. It can be used to + group sources into a single object, for instance for a stacking analysis. + """ + @staticmethod + def cast( + obj, + errmsg=None, + **kwargs): + """Casts the given object to a SourceModelCollection object. If the cast + fails, a TypeError with the given error message is raised. + + Parameters + ---------- + obj : SourceModel | sequence of SourceModel | SourceModelCollection | + None + The object that should be casted to SourceModelCollection. + If set to None, an empty SourceModelCollection is created. + errmsg : str | None + The error message if the cast fails. + If set to None, a generic error message will be used. + + Additional keyword arguments + ---------------------------- + Additional keyword arguments are passed to the constructor of the + SourceModelCollection class. + + Raises + ------ + TypeError + If the cast failed. + """ + if obj is None: + return SourceModelCollection( + sources=None, source_type=SourceModel, **kwargs) + + if isinstance(obj, SourceModel): + return SourceModelCollection( + sources=[obj], source_type=SourceModel, **kwargs) + + if isinstance(obj, SourceModelCollection): + return obj + + if issequenceof(obj, SourceModel): + return SourceModelCollection( + sources=obj, source_type=SourceModel, **kwargs) + + if errmsg is None: + errmsg = (f'Cast of object "{str(obj)}" of type ' + f'"{typename(obj)}" to SourceModelCollection failed!') + raise TypeError(errmsg) + + def __init__( + self, + sources=None, + source_type=None, + **kwargs): + """Creates a new source collection. + + Parameters + ---------- + sources : sequence of source_type instances | None + The sequence of sources this collection should be initalized with. + If set to None, an empty SourceModelCollection instance is created. + source_type : type | None + The type of the source. + If set to None (default), SourceModel will be used. + """ + if source_type is None: + source_type = SourceModel + + super().__init__( + models=sources, + model_type=source_type, + **kwargs) + + @property + def source_type(self): + """(read-only) The type of the source model. + This property is an alias for the `obj_type` property. + """ + return self.model_type + + @property + def sources(self): + """(read-only) The list of sources of type ``source_type``. + """ + return self.models + + +class IsPointlike( + object): + """This is a classifier class that can be used by other classes to indicate + that the specific class describes a point-like object. + """ + def __init__( + self, + ra_func_instance=None, + get_ra_func=None, + set_ra_func=None, + dec_func_instance=None, + get_dec_func=None, + set_dec_func=None, + **kwargs): + """Constructor method. Gets called when the an instance of a class is + created which derives from this IsPointlike class. + + Parameters + ---------- + ra_func_instance : object + The instance object the right-ascention property's getter and setter + functions are defined in. + get_ra_func : callable + The callable object of the getter function of the right-ascention + property. It must have the call signature + `__call__(ra_func_instance)`. + set_ra_func : callable + The callable object of the setter function of the right-ascention + property. It must have the call signature + `__call__(ra_func_instance, value)`. + dec_func_instance : object + The instance object the declination property's getter and setter + functions are defined in. + get_dec_func : object + The callable object of the getter function of the declination + property. It must have the call signature + `__call__(dec_func_instance)`. + set_dec_func : object + The callable object of the setter function of the declination + property. It must have the call signature + `__call__(dec_func_instance, value)`. + """ + super().__init__(**kwargs) + + self._ra_func_instance = ra_func_instance + self._get_ra_func = get_ra_func + self._set_ra_func = set_ra_func + + self._dec_func_instance = dec_func_instance + self._get_dec_func = get_dec_func + self._set_dec_func = set_dec_func + + @property + def ra(self): + """The right-ascention coordinate of the point-like source. + """ + return self._get_ra_func(self._ra_func_instance) + + @ra.setter + def ra(self, v): + v = float_cast( + v, + 'The ra property must be castable to type float!') + self._set_ra_func(self._ra_func_instance, v) + + @property + def dec(self): + """The declination coordinate of the point-like source. + """ + return self._get_dec_func(self._dec_func_instance) + + @dec.setter + def dec(self, v): + v = float_cast( + v, + 'The dec property must be castable to type float!') + self._set_dec_func(self._dec_func_instance, v) + + +class PointLikeSource( + SourceModel, + IsPointlike): + """The PointLikeSource class is a source model for a point-like source + object in the sky at a given location (right-ascention and declination). + """ + def __init__( + self, + ra, + dec, + name=None, + weight=None, + **kwargs): + """Creates a new PointLikeSource instance for defining a point-like + source. + + Parameters + ---------- + ra : float + The right-ascention coordinate of the source in radians. + dec : float + The declination coordinate of the source in radians. + name : str | None + The name of the source. + weight : float | None + The relative weight of the source w.r.t. other sources. + If set to None, unity will be used. + """ + super().__init__( + name=name, + weight=weight, + ra_func_instance=self, + get_ra_func=type(self)._get_ra, + set_ra_func=type(self)._set_ra, + dec_func_instance=self, + get_dec_func=type(self)._get_dec, + set_dec_func=type(self)._set_dec, + **kwargs, + ) + + self.ra = ra + self.dec = dec + + def _get_ra(self): + return self._ra + + def _set_ra(self, ra): + self._ra = ra + + def _get_dec(self): + return self._dec + + def _set_dec(self, dec): + self._dec = dec + + def __str__(self): + """Pretty string representation. + """ + c = '' + if self.classification is not None: + c = f', classification={self.classification}' + + s = ( + f'{classname(self)}: "{self.name}": ' + '{ ' + f'ra={np.rad2deg(self.ra):.3f} deg, ' + f'dec={np.rad2deg(self.dec):.3f} deg' + f'{c}' + ' }' + ) + + return s diff --git a/skyllh/core/storage.py b/skyllh/core/storage.py index b05a85c164..599b3866d6 100644 --- a/skyllh/core/storage.py +++ b/skyllh/core/storage.py @@ -12,7 +12,10 @@ get_byte_size_prefix, getsizeof, issequence, - issequenceof + issequenceof, +) +from skyllh.core import ( + tool, ) from skyllh.core import display as dsp @@ -21,7 +24,10 @@ # file formats. _FILE_LOADER_REG = dict() -def register_FileLoader(formats, fileloader_cls): + +def register_FileLoader( + formats, + fileloader_cls): """Registers the given file formats (file extensions) to the given FileLoader class. @@ -30,23 +36,29 @@ def register_FileLoader(formats, fileloader_cls): formats : str | list of str The list of file name extensions that should be mapped to the FileLoader class. - fileloader_cls : FileLoader + fileloader_cls : instance of FileLoader The subclass of FileLoader that should be used for the given file formats. """ - if(isinstance(formats, str)): - formats = [ formats ] - if(not issequence(formats)): - raise TypeError('The "formats" argument must be a sequence!') - if(not issubclass(fileloader_cls, FileLoader)): - raise TypeError('The "fileloader_cls" argument must be a subclass of FileLoader!') + if isinstance(formats, str): + formats = [formats] + if not issequence(formats): + raise TypeError( + 'The "formats" argument must be a sequence!') + if not issubclass(fileloader_cls, FileLoader): + raise TypeError( + 'The "fileloader_cls" argument must be a subclass of FileLoader!') for fmt in formats: - if(fmt in _FILE_LOADER_REG.keys()): - raise KeyError('The format "%s" is already registered!'%(fmt)) + if fmt in _FILE_LOADER_REG.keys(): + raise KeyError( + f'The format "{fmt}" is already registered!') _FILE_LOADER_REG[fmt] = fileloader_cls -def create_FileLoader(pathfilenames, **kwargs): + +def create_FileLoader( + pathfilenames, + **kwargs): """Creates the appropriate FileLoader object for the given file names. It looks up the FileLoader class from the FileLoader registry for the file name extension of the first file name in the given list. @@ -67,36 +79,46 @@ def create_FileLoader(pathfilenames, **kwargs): fileloader : FileLoader The appropiate FileLoader instance for the given type of data files. """ - if(isinstance(pathfilenames, str)): + if isinstance(pathfilenames, str): pathfilenames = [pathfilenames] - if(not issequenceof(pathfilenames, str)): - raise TypeError('The pathfilenames argument must be a sequence of str!') + if not issequenceof(pathfilenames, str): + raise TypeError( + 'The pathfilenames argument must be a sequence of str!') # Sort the file names extensions with shorter extensions before longer ones # to support a format that is sub-string of another format. formats = sorted(_FILE_LOADER_REG.keys()) for fmt in formats: - l = len(fmt) - if(pathfilenames[0][-l:].lower() == fmt.lower()): + fmt_len = len(fmt) + if pathfilenames[0][-fmt_len:].lower() == fmt.lower(): cls = _FILE_LOADER_REG[fmt] return cls(pathfilenames, **kwargs) - raise RuntimeError('No FileLoader class is suitable to load the data file ' - '"%s"!'%(pathfilenames[0])) + raise RuntimeError( + 'No FileLoader class is suitable to load the data file ' + f'"{pathfilenames[0]}"!') + -def assert_file_exists(pathfilename): +def assert_file_exists( + pathfilename): """Checks if the given file exists and raises a RuntimeError if it does not exist. """ - if(not os.path.isfile(pathfilename)): - raise RuntimeError('The data file "%s" does not exist!'%(pathfilename)) + if not os.path.isfile(pathfilename): + raise RuntimeError( + f'The data file "{pathfilename}" does not exist!') -class FileLoader(object, metaclass=abc.ABCMeta): +class FileLoader( + object, + metaclass=abc.ABCMeta): """Abstract base class for a FileLoader class. """ - def __init__(self, pathfilenames, **kwargs): - """Initializes a new FileLoader instance. + def __init__( + self, + pathfilenames, + **kwargs): + """Creates a new FileLoader instance. Parameters ---------- @@ -104,7 +126,8 @@ def __init__(self, pathfilenames, **kwargs): The sequence of fully qualified file names of the data files that need to be loaded. """ - super(FileLoader, self).__init__(**kwargs) + super().__init__( + **kwargs) self.pathfilename_list = pathfilenames @@ -113,13 +136,15 @@ def pathfilename_list(self): """The list of fully qualified file names of the data files. """ return self._pathfilename_list + @pathfilename_list.setter def pathfilename_list(self, pathfilenames): - if(isinstance(pathfilenames, str)): - pathfilenames = [ pathfilenames ] - if(not issequence(pathfilenames)): - raise TypeError('The pathfilename_list property must be of type ' - 'str or a sequence of type str!') + if isinstance(pathfilenames, str): + pathfilenames = [pathfilenames] + if not issequence(pathfilenames): + raise TypeError( + 'The pathfilename_list property must be of type str or a ' + 'sequence of type str!') self._pathfilename_list = list(pathfilenames) @abc.abstractmethod @@ -129,17 +154,34 @@ def load_data(self, **kwargs): pass -class NPYFileLoader(FileLoader): +class NPYFileLoader( + FileLoader): """The NPYFileLoader class provides the data loading functionality for numpy data files containing numpy arrays. It uses the ``numpy.load`` function for loading the data and the numpy.append function to concatenate several data files. """ - def __init__(self, pathfilenames, **kwargs): - super(NPYFileLoader, self).__init__(pathfilenames) + def __init__( + self, + pathfilenames, + **kwargs): + """Creates a new NPYFileLoader instance. + + Parameters + ---------- + pathfilenames : str | sequence of str + The sequence of fully qualified file names of the data files that + need to be loaded. + """ + super().__init__( + pathfilenames=pathfilenames, + **kwargs) def _load_file_memory_efficiently( - self, pathfilename, keep_fields, dtype_convertions, + self, + pathfilename, + keep_fields, + dtype_convertions, dtype_convertion_except_fields): """Loads a single file in a memory efficient way. @@ -147,6 +189,8 @@ def _load_file_memory_efficiently( ---------- pathfilename : str The fully qualified file name of the to-be-loaded file. + keep_fields : list of str | None + The list of field names which should be kept. Returns ------- @@ -154,12 +198,15 @@ def _load_file_memory_efficiently( An instance of DataFieldRecordArray holding the data. """ assert_file_exists(pathfilename) + # Create a memory map into the data file. This loads the data only when # accessing the data. mmap_ndarray = np.load(pathfilename, mmap_mode='r') field_names = mmap_ndarray.dtype.names - fname_to_fidx = dict( - [ (fname,idx) for (idx,fname) in enumerate(field_names) ]) + fname_to_fidx = dict([ + (fname, idx) + for (idx, fname) in enumerate(field_names) + ]) dt_fields = mmap_ndarray.dtype.fields n_rows = mmap_ndarray.shape[0] @@ -168,14 +215,14 @@ def _load_file_memory_efficiently( # Create empty arrays for each column of length n_rows. for fname in field_names: # Ignore fields that should not get kept. - if((keep_fields is not None) and (fname not in keep_fields)): + if (keep_fields is not None) and (fname not in keep_fields): continue # Get the original data type of the field. dt = dt_fields[fname][0] # Convert the data type if requested. - if((fname not in dtype_convertion_except_fields) and - (dt in dtype_convertions)): + if (fname not in dtype_convertion_except_fields) and\ + (dt in dtype_convertions): dt = dtype_convertions[dt] data[fname] = np.empty((n_rows,), dtype=dt) @@ -189,7 +236,7 @@ def _load_file_memory_efficiently( data[fname][ridx] = row[fidx] # Reopen the data file after each given blocksize. - if(ridx % bs == 0): + if ridx % bs == 0: del mmap_ndarray mmap_ndarray = np.load(pathfilename, mmap_mode='r') @@ -202,12 +249,16 @@ def _load_file_memory_efficiently( return data def _load_file_time_efficiently( - self, pathfilename, keep_fields, dtype_convertions, + self, + pathfilename, + keep_fields, + dtype_convertions, dtype_convertion_except_fields): """Loads a single file in a time efficient way. This will load the data column-wise. """ assert_file_exists(pathfilename) + # Create a memory map into the data file. This loads the data only when # accessing the data. mmap_ndarray = np.load(pathfilename, mmap_mode='r') @@ -226,9 +277,12 @@ def _load_file_time_efficiently( return data - def load_data( - self, keep_fields=None, dtype_convertions=None, - dtype_convertion_except_fields=None, efficiency_mode=None): + def load_data( # noqa: C901 + self, + keep_fields=None, + dtype_convertions=None, + dtype_convertion_except_fields=None, + efficiency_mode=None): """Loads the data from the files specified through their fully qualified file names. @@ -248,11 +302,11 @@ def load_data( The efficiency mode the data should get loaded with. Possible values are: - - 'memory': + ``'memory'``: The data will be load in a memory efficient way. This will require more time, because all data records of a file will be loaded sequentially. - - 'time' + ``'time'`` The data will be loaded in a time efficient way. This will require more memory, because each data file gets loaded in memory at once. @@ -262,47 +316,50 @@ def load_data( Returns ------- - data : DataFieldRecordArray + data : instance of DataFieldRecordArray The DataFieldRecordArray holding the loaded data. Raises ------ RuntimeError if a file does not exist. """ - if(keep_fields is not None): - if(isinstance(keep_fields, str)): - keep_fields = [ keep_fields ] - elif(not issequenceof(keep_fields, str)): - raise TypeError('The keep_fields argument must be None, an ' - 'instance of type str, or a sequence of instances of ' - 'type str!') - - if(dtype_convertions is None): + if keep_fields is not None: + if isinstance(keep_fields, str): + keep_fields = [keep_fields] + elif not issequenceof(keep_fields, str): + raise TypeError( + 'The keep_fields argument must be None, an instance of ' + 'type str, or a sequence of instances of type str!') + + if dtype_convertions is None: dtype_convertions = dict() - elif(not isinstance(dtype_convertions, dict)): - raise TypeError('The dtype_convertions argument must be None, ' - 'or an instance of dict!') + elif not isinstance(dtype_convertions, dict): + raise TypeError( + 'The dtype_convertions argument must be None, or an instance ' + 'of dict!') - if(dtype_convertion_except_fields is None): + if dtype_convertion_except_fields is None: dtype_convertion_except_fields = [] - elif(isinstance(dtype_convertion_except_fields, str)): - dtype_convertion_except_fields = [ dtype_convertion_except_fields ] - elif(not issequenceof(dtype_convertion_except_fields, str)): - raise TypeError('The dtype_convertion_except_fields argument ' - 'must be a sequence of str instances.') + elif isinstance(dtype_convertion_except_fields, str): + dtype_convertion_except_fields = [dtype_convertion_except_fields] + elif not issequenceof(dtype_convertion_except_fields, str): + raise TypeError( + 'The dtype_convertion_except_fields argument must be a ' + 'sequence of str instances.') efficiency_mode2func = { 'memory': self._load_file_memory_efficiently, 'time': self._load_file_time_efficiently } - if(efficiency_mode is None): + if efficiency_mode is None: efficiency_mode = 'time' - if(not isinstance(efficiency_mode, str)): - raise TypeError('The efficiency_mode argument must be an instance ' - 'of type str!') - if(efficiency_mode not in efficiency_mode2func): - raise ValueError('The efficiency_mode argument value must be one ' - 'of %s!'%(', '.join(efficiency_mode2func.keys()))) + if not isinstance(efficiency_mode, str): + raise TypeError( + 'The efficiency_mode argument must be an instance of type str!') + if efficiency_mode not in efficiency_mode2func: + raise ValueError( + 'The efficiency_mode argument value must be one of ' + f'{", ".join(efficiency_mode2func.keys())}!') load_file_func = efficiency_mode2func[efficiency_mode] # Load the first data file. @@ -325,12 +382,95 @@ def load_data( return data -class PKLFileLoader(FileLoader): +class ParquetFileLoader( + FileLoader +): + """The ParquetFileLoader class provides the data loading functionality for + parquet files. It uses the ``pyarrow`` package. + """ + @tool.requires('pyarrow', 'pyarrow.parquet') + def __init__( + self, + pathfilenames, + **kwargs + ): + """Creates a new file loader instance for parquet data files. + + Parameters + ---------- + pathfilenames : str | sequence of str + The sequence of fully qualified file names of the data files that + need to be loaded. + """ + super().__init__( + pathfilenames=pathfilenames, + **kwargs) + + self.pa = tool.get('pyarrow') + self.pq = tool.get('pyarrow.parquet') + + def load_data( + self, + keep_fields=None, + dtype_convertions=None, + dtype_convertion_except_fields=None, + copy=False, + **kwargs, + ): + """Loads the data from the files specified through their fully qualified + file names. + + Parameters + ---------- + keep_fields : str | sequence of str | None + Load the data into memory only for these data fields. If set to + ``None``, all in-file-present data fields are loaded into memory. + dtype_convertions : dict | None + If not ``None``, this dictionary defines how data fields of specific + data types get converted into the specified data types. + This can be used to use less memory. + dtype_convertion_except_fields : str | sequence of str | None + The sequence of field names whose data type should not get + converted. + copy : bool + If set to ``True``, the column data from the pyarrow.Table instance + will be copied into the DataFieldRecordArray. This should not be + necessary. + + Returns + ------- + data : instance of DataFieldRecordArray + The DataFieldRecordArray holding the loaded data. + """ + assert_file_exists(self.pathfilename_list[0]) + table = self.pq.read_table(self.pathfilename_list[0], columns=keep_fields) + for pathfilename in self.pathfilename_list[1:]: + assert_file_exists(pathfilename) + next_table = self.pq.read_table(pathfilename, columns=keep_fields) + table = self.pa.concat_tables([table, next_table]) + + data = DataFieldRecordArray( + data=table, + data_table_accessor=ParquetDataTableAccessor(), + keep_fields=keep_fields, + dtype_convertions=dtype_convertions, + dtype_convertion_except_fields=dtype_convertion_except_fields, + copy=copy) + + return data + + +class PKLFileLoader( + FileLoader): """The PKLFileLoader class provides the data loading functionality for pickled Python data files containing Python data structures. It uses the `pickle.load` function for loading the data from the file. """ - def __init__(self, pathfilenames, pkl_encoding=None, **kwargs): + def __init__( + self, + pathfilenames, + pkl_encoding=None, + **kwargs): """Creates a new file loader instance for a pickled data file. Parameters @@ -342,7 +482,9 @@ def __init__(self, pathfilenames, pkl_encoding=None, **kwargs): The encoding of the pickled data files. If None, the default encodings 'ASCII' and 'latin1' will be tried to load the data. """ - super(PKLFileLoader, self).__init__(pathfilenames) + super().__init__( + pathfilenames=pathfilenames, + **kwargs) self.pkl_encoding = pkl_encoding @@ -353,15 +495,18 @@ def pkl_encoding(self): load the data. """ return self._pkl_encoding + @pkl_encoding.setter def pkl_encoding(self, encoding): - if(encoding is not None): - if(not isinstance(encoding, str)): - raise TypeError('The pkl_encoding property must be None or of ' - 'type str!') + if encoding is not None: + if not isinstance(encoding, str): + raise TypeError( + 'The pkl_encoding property must be None or of type str!') self._pkl_encoding = encoding - def load_data(self, **kwargs): + def load_data( + self, + **kwargs): """Loads the data from the files specified through their fully qualified file names. @@ -378,7 +523,7 @@ def load_data(self, **kwargs): """ # Define the possible encodings of the pickled files. encodings = ['ASCII', 'latin1'] - if(self._pkl_encoding is not None): + if self._pkl_encoding is not None: encodings = [self._pkl_encoding] + encodings data = [] @@ -399,24 +544,30 @@ def load_data(self, **kwargs): ifile.seek(0) else: load_ok = True - if(obj is None): - raise RuntimeError('The file "%s" could not get unpickled! ' - 'No correct encoding available!'%(pathfilename)) + if obj is None: + raise RuntimeError( + f'The file "{pathfilename}" could not get unpickled! ' + 'No correct encoding available!') data.append(obj) - if(len(data) == 1): + if len(data) == 1: data = data[0] return data -class TextFileLoader(FileLoader): +class TextFileLoader( + FileLoader): """The TextFileLoader class provides the data loading functionality for data text files where values are stored in a comma, or whitespace, separated format. It uses the numpy.loadtxt function to load the data. It reads the first line of the text file for a table header. """ - def __init__(self, pathfilenames, header_comment='#', header_separator=None, + def __init__( + self, + pathfilenames, + header_comment='#', + header_separator=None, **kwargs): """Creates a new file loader instance for a text data file. @@ -431,7 +582,9 @@ def __init__(self, pathfilenames, header_comment='#', header_separator=None, The separator of the header field names. If None, it assumes whitespaces. """ - super().__init__(pathfilenames, **kwargs) + super().__init__( + pathfilenames=pathfilenames, + **kwargs) self.header_comment = header_comment self.header_separator = header_separator @@ -441,10 +594,12 @@ def header_comment(self): """The character that defines a comment line in the text file. """ return self._header_comment + @header_comment.setter def header_comment(self, s): - if(not isinstance(s, str)): - raise TypeError('The header_comment property must be of type str!') + if not isinstance(s, str): + raise TypeError( + 'The header_comment property must be of type str!') self._header_comment = s @property @@ -453,12 +608,14 @@ def header_separator(self): whitespaces. """ return self._header_separator + @header_separator.setter def header_separator(self, s): - if(s is not None): - if(not isinstance(s, str)): - raise TypeError('The header_separator property must be None or ' - 'of type str!') + if s is not None: + if not isinstance(s, str): + raise TypeError( + 'The header_separator property must be None or of type ' + 'str!') self._header_separator = s def _extract_column_names(self, line): @@ -479,7 +636,7 @@ def _extract_column_names(self, line): # Remove possible new-line character and leading white-spaces. line = line.strip() # Check if the line is a comment line. - if(line[0:len(self._header_comment)] != self._header_comment): + if line[0:len(self._header_comment)] != self._header_comment: return None # Remove the leading comment character(s). line = line.strip(self._header_comment) @@ -488,14 +645,18 @@ def _extract_column_names(self, line): # Split the line into the column names. names = line.split(self._header_separator) # Remove possible whitespaces of column names. - names = [ n.strip() for n in names ] + names = [n.strip() for n in names] - if(len(names) == 0): + if len(names) == 0: return None return names - def _load_file(self, pathfilename, keep_fields, dtype_convertions, + def _load_file( + self, + pathfilename, + keep_fields, + dtype_convertions, dtype_convertion_except_fields): """Loads the given file. @@ -525,24 +686,27 @@ def _load_file(self, pathfilename, keep_fields, dtype_convertions, with open(pathfilename, 'r') as ifile: line = ifile.readline() column_names = self._extract_column_names(line) - if(column_names is None): - raise ValueError('The data text file "{}" does not contain a ' - 'readable table header as first line!'.format(pathfilename)) + if column_names is None: + raise ValueError( + f'The data text file "{pathfilename}" does not contain a ' + 'readable table header as first line!') usecols = None - dtype = [(n,np.float64) for n in column_names] - if(keep_fields is not None): + dtype = [(n, np.float64) for n in column_names] + if keep_fields is not None: # Select only the given columns. usecols = [] dtype = [] - for (idx,name) in enumerate(column_names): - if(name in keep_fields): + for (idx, name) in enumerate(column_names): + if name in keep_fields: usecols.append(idx) - dtype.append((name,np.float64)) + dtype.append((name, np.float64)) usecols = tuple(usecols) - if(len(dtype) == 0): - raise ValueError('No data columns were selected to be loaded!') + if len(dtype) == 0: + raise ValueError( + 'No data columns were selected to be loaded!') - data_ndarray = np.loadtxt(ifile, + data_ndarray = np.loadtxt( + ifile, dtype=dtype, comments=self._header_comment, usecols=usecols) @@ -556,8 +720,12 @@ def _load_file(self, pathfilename, keep_fields, dtype_convertions, return data - def load_data(self, keep_fields=None, dtype_convertions=None, - dtype_convertion_except_fields=None, **kwargs): + def load_data( + self, + keep_fields=None, + dtype_convertions=None, + dtype_convertion_except_fields=None, + **kwargs): """Loads the data from the data files specified through their fully qualified file names. @@ -576,7 +744,7 @@ def load_data(self, keep_fields=None, dtype_convertions=None, Returns ------- - data : DataFieldRecordArray + data : instance of DataFieldRecordArray The DataFieldRecordArray holding the loaded data. Raises @@ -586,27 +754,29 @@ def load_data(self, keep_fields=None, dtype_convertions=None, ValueError If the table header cannot be read. """ - if(keep_fields is not None): - if(isinstance(keep_fields, str)): - keep_fields = [ keep_fields ] - elif(not issequenceof(keep_fields, str)): - raise TypeError('The keep_fields argument must be None, an ' - 'instance of type str, or a sequence of instances of ' - 'type str!') - - if(dtype_convertions is None): + if keep_fields is not None: + if isinstance(keep_fields, str): + keep_fields = [keep_fields] + elif not issequenceof(keep_fields, str): + raise TypeError( + 'The keep_fields argument must be None, an instance of ' + 'type str, or a sequence of instances of type str!') + + if dtype_convertions is None: dtype_convertions = dict() - elif(not isinstance(dtype_convertions, dict)): - raise TypeError('The dtype_convertions argument must be None, ' - 'or an instance of dict!') + elif not isinstance(dtype_convertions, dict): + raise TypeError( + 'The dtype_convertions argument must be None, or an instance ' + 'of dict!') - if(dtype_convertion_except_fields is None): + if dtype_convertion_except_fields is None: dtype_convertion_except_fields = [] - elif(isinstance(dtype_convertion_except_fields, str)): - dtype_convertion_except_fields = [ dtype_convertion_except_fields ] - elif(not issequenceof(dtype_convertion_except_fields, str)): - raise TypeError('The dtype_convertion_except_fields argument ' - 'must be a sequence of str instances.') + elif isinstance(dtype_convertion_except_fields, str): + dtype_convertion_except_fields = [dtype_convertion_except_fields] + elif not issequenceof(dtype_convertion_except_fields, str): + raise TypeError( + 'The dtype_convertion_except_fields argument must be a ' + 'sequence of str instances.') # Load the first data file. data = self._load_file( @@ -628,27 +798,255 @@ def load_data(self, keep_fields=None, dtype_convertions=None, return data -class DataFieldRecordArray(object): +class DataTableAccessor( + object, + metaclass=abc.ABCMeta, +): + """This class provides an interface wrapper to access the data table of a + particular format in a unified way. + """ + def __init__(self, **kwargs): + super().__init__(**kwargs) + + @abc.abstractmethod + def get_column(self, data, name): + """This method is supposed to return a numpy.ndarray holding the data of + the column with name ``name``. + + Parameters + ---------- + data : any + The data table. + name : str + The name of the column. + + Returns + ------- + arr : instance of numpy.ndarray + The column data as numpy ndarray. + """ + pass + + @abc.abstractmethod + def get_field_names(self, data): + """This method is supposed to return a list of field names. + """ + pass + + @abc.abstractmethod + def get_field_name_to_dtype_dict(self, data): + """This method is supposed to return a dictionary with field name and + numpy dtype instance for each field. + """ + pass + + @abc.abstractmethod + def get_length(self, data): + """This method is supposed to return the length of the data table. + """ + pass + + +class NDArrayDataTableAccessor( + DataTableAccessor, +): + """This class provides an interface wrapper to access the data table stored + as a structured numpy ndarray. + """ + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def get_column(self, data, name): + """Gets the column data from the structured ndarray. + + Parameters + ---------- + data : instance of numpy.ndarray + The structured numpy ndarray holding the table data. + name : str + The name of the column. + """ + return data[name] + + def get_field_names(self, data): + return data.dtype.names + + def get_field_name_to_dtype_dict(self, data): + """Returns the dictionary with field name and numpy dtype instance for + each field. + """ + fname_to_dtype_dict = dict([ + (k, v[0]) for (k, v) in data.dtype.fields.items() + ]) + return fname_to_dtype_dict + + def get_length(self, data): + """Returns the length of the data table. + """ + length = data.shape[0] + return length + + +class DictDataTableAccessor( + DataTableAccessor, +): + """This class provides an interface wrapper to access the data table stored + as a Python dictionary. + """ + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def get_column(self, data, name): + """Gets the column data from the dictionary. + + Parameters + ---------- + data : dict + The dictionary holding the table data. + name : str + The name of the column. + """ + return data[name] + + def get_field_names(self, data): + return list(data.keys()) + + def get_field_name_to_dtype_dict(self, data): + """Returns the dictionary with field name and numpy dtype instance for + each field. + """ + fname_to_dtype_dict = dict([ + (fname, data[fname].dtype) for fname in data.keys() + ]) + return fname_to_dtype_dict + + def get_length(self, data): + """Returns the length of the data table. + """ + length = 0 + if len(data) > 0: + length = data[next(iter(data))].shape[0] + return length + + +class ParquetDataTableAccessor( + DataTableAccessor, +): + """This class provides an interface wrapper to access the data table stored + as a Parquet table. + """ + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def get_column(self, data, name): + """Gets the column data from the Parquet table. + + Parameters + ---------- + data : instance of pyarrow.Table + The instance of pyarrow.Table holding the table data. + name : str + The name of the column. + """ + return data[name].to_numpy() + + def get_field_names(self, data): + return data.column_names + + def get_field_name_to_dtype_dict(self, data): + """Returns the dictionary with field name and numpy dtype instance for + each field. + """ + fname_to_dtype_dict = dict([ + (fname, data.field(fname).type.to_pandas_dtype()) + for fname in data.column_names + ]) + return fname_to_dtype_dict + + def get_length(self, data): + """Returns the length of the data table. + """ + return len(data) + + +class DataFieldRecordArrayDataTableAccessor( + DataTableAccessor, +): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def get_column(self, data, name): + """Gets the column data from the Parquet table. + + Parameters + ---------- + data : instance of pyarrow.Table + The instance of pyarrow.Table holding the table data. + name : str + The name of the column. + """ + return data[name] + + def get_field_names(self, data): + return data.field_name_list + + def get_field_name_to_dtype_dict(self, data): + """Returns the dictionary with field name and numpy dtype instance for + each field. + """ + fname_to_dtype_dict = dict([ + (fname, data[fname].dtype) + for fname in data.field_name_list + ]) + return fname_to_dtype_dict + + def get_length(self, data): + """Returns the length of the data table. + """ + return len(data) + + +class DataFieldRecordArray( + object): """The DataFieldRecordArray class provides a data container similar to a numpy record ndarray. But the data fields are stored as individual numpy ndarray objects. Hence, access of single data fields is much faster compared to access on the record ndarray. """ - def __init__(self, data, keep_fields=None, dtype_convertions=None, - dtype_convertion_except_fields=None, copy=True): - """Creates a DataFieldRecordArray from the given numpy record ndarray. + def __init__( # noqa: C901 + self, + data, + data_table_accessor=None, + keep_fields=None, + dtype_convertions=None, + dtype_convertion_except_fields=None, + copy=True, + ): + """Creates a DataFieldRecordArray from the given data. Parameters ---------- - data : numpy record ndarray | dict | DataFieldRecordArray | None - The numpy record ndarray that needs to get transformed into the - DataFieldRecordArray instance. Alternative a dictionary with field - names as keys and numpy ndarrays as values can be provided. If an - instance of DataFieldRecordArray is provided, the new - DataFieldRecordArray gets constructed from the copy of the data of - the provided DataFieldRecordArray instance. + data : any | None + The tabulated data in any format. The only requirement is that + there is a DataTableAccessor instance available for the given data + format. Supported data types are: + + numpy.ndarray + A structured numpy ndarray. + dict + A Python dictionary with field names as keys and + one-dimensional numpy ndarrays as values. + pyarrow.Table + An instance of pyarrow.Table. + DataFieldRecordArray + An instance of DataFieldRecordArray. + If set to `None`, the DataFieldRecordArray instance is initialized with no data and the length of the array is set to 0. + data_table_accessor : instance of DataTableAccessor | None + The instance of DataTableAccessor which provides column access to + ``data``. If set to ``None``, an appropriate ``DataTableAccessor`` + instance will be selected based on the type of ``data``. keep_fields : str | sequence of str | None If not None (default), this specifies the data fields that should get kept from the given data. Otherwise all data fields get kept. @@ -662,89 +1060,91 @@ def __init__(self, data, keep_fields=None, dtype_convertions=None, copy : bool Flag if the input data should get copied. Default is True. If a DataFieldRecordArray instance is provided, this option is set to - `True` automatically. + ``True`` automatically. """ self._data_fields = dict() self._len = None - if(data is None): + if data is None: data = dict() - if(keep_fields is not None): - if(isinstance(keep_fields, str)): - keep_fields = [ keep_fields ] - elif(not issequenceof(keep_fields, str)): - raise TypeError('The keep_fields argument must be None, an ' - 'instance of type str, or a sequence of instances of ' - 'type str!') + if keep_fields is not None: + if isinstance(keep_fields, str): + keep_fields = [keep_fields] + elif not issequenceof(keep_fields, str): + raise TypeError( + 'The keep_fields argument must be None, an instance of ' + 'type str, or a sequence of instances of type str!') - if(dtype_convertions is None): + if dtype_convertions is None: dtype_convertions = dict() - elif(not isinstance(dtype_convertions, dict)): - raise TypeError('The dtype_convertions argument must be None, ' - 'or an instance of dict!') + elif not isinstance(dtype_convertions, dict): + raise TypeError( + 'The dtype_convertions argument must be None, or an instance ' + 'of dict!') - if(dtype_convertion_except_fields is None): + if dtype_convertion_except_fields is None: dtype_convertion_except_fields = [] - elif(isinstance(dtype_convertion_except_fields, str)): - dtype_convertion_except_fields = [ dtype_convertion_except_fields ] - elif(not issequenceof(dtype_convertion_except_fields, str)): - raise TypeError('The dtype_convertion_except_fields argument ' - 'must be a sequence of str instances.') - - if(isinstance(data, np.ndarray)): - field_names = data.dtype.names - fname2dtype = dict( - [(k,v[0]) for (k,v) in data.dtype.fields.items() ]) - length = data.shape[0] - elif(isinstance(data, dict)): - field_names = list(data.keys()) - fname2dtype = dict( - [ (fname, data[fname].dtype) for fname in field_names ]) - length = 0 - if(len(field_names) > 0): - length = data[field_names[0]].shape[0] - elif(isinstance(data, DataFieldRecordArray)): - field_names = data.field_name_list - fname2dtype = dict( - [ (fname, data[fname].dtype) for fname in field_names ]) - length = len(data) - copy = True - else: - raise TypeError('The data argument must be an instance of ndarray, ' - 'dict, or DataFieldRecordArray!') + elif isinstance(dtype_convertion_except_fields, str): + dtype_convertion_except_fields = [dtype_convertion_except_fields] + elif not issequenceof(dtype_convertion_except_fields, str): + raise TypeError( + 'The dtype_convertion_except_fields argument must be a ' + 'sequence of str instances.') + + # Select an appropriate data table accessor for the type of data. + if data_table_accessor is None: + if isinstance(data, np.ndarray): + data_table_accessor = NDArrayDataTableAccessor() + elif isinstance(data, dict): + data_table_accessor = DictDataTableAccessor() + elif (tool.is_available('pyarrow') and + isinstance(data, tool.get('pyarrow').Table)): + data_table_accessor = ParquetDataTableAccessor() + elif isinstance(data, DataFieldRecordArray): + data_table_accessor = DataFieldRecordArrayDataTableAccessor() + else: + raise TypeError( + 'No TableDataAccessor instance has been specified for the ' + f'data of type {type(data)}!') + + field_names = data_table_accessor.get_field_names(data) + fname2dtype = data_table_accessor.get_field_name_to_dtype_dict(data) + length = data_table_accessor.get_length(data) for fname in field_names: # Ignore fields that should not get kept. - if((keep_fields is not None) and (fname not in keep_fields)): + if (keep_fields is not None) and (fname not in keep_fields): continue copy_field = copy dt = fname2dtype[fname] - if((fname not in dtype_convertion_except_fields) and - (dt in dtype_convertions)): + if (fname not in dtype_convertion_except_fields) and\ + (dt in dtype_convertions): dt = dtype_convertions[dt] # If a data type convertion is needed, the data of the field # needs to get copied. copy_field = True - if(copy_field is True): + if copy_field is True: # Create a ndarray with the final data type and then assign the # values from the data, which technically is a copy. field_arr = np.empty((length,), dtype=dt) - field_arr[:] = data[fname] + np.copyto(field_arr, data_table_accessor.get_column(data, fname)) else: - field_arr = data[fname] - if(self._len is None): + field_arr = data_table_accessor.get_column(data, fname) + + if self._len is None: self._len = len(field_arr) - elif(len(field_arr) != self._len): - raise ValueError('All field arrays must have the same length. ' - 'Field "%s" has length %d, but must be %d!'%( - fname, len(field_arr), self._len)) + elif len(field_arr) != self._len: + raise ValueError( + 'All field arrays must have the same length. ' + f'Field "{fname}" has length {len(field_arr)}, but must be ' + f'{self._len}!') self._data_fields[fname] = field_arr - if(self._len is None): + if self._len is None: # The DataFieldRecordArray is initialized with no fields, i.e. also # also no data. self._len = 0 @@ -752,7 +1152,9 @@ def __init__(self, data, keep_fields=None, dtype_convertions=None, self._field_name_list = list(self._data_fields.keys()) self._indices = None - def __contains__(self, name): + def __contains__( + self, + name): """Checks if the given field exists in this DataFieldRecordArray instance. @@ -769,7 +1171,9 @@ def __contains__(self, name): """ return (name in self._data_fields) - def __getitem__(self, name): + def __getitem__( + self, + name): """Implements data field value access. Parameters @@ -787,20 +1191,24 @@ def __getitem__(self, name): Returns ------- - data : numpy ndarray | DataFieldRecordArray + data : numpy ndarray | instance of DataFieldRecordArray The requested field data or a DataFieldRecordArray holding the requested selection of the entire data. """ - if(isinstance(name, np.ndarray)): + if isinstance(name, np.ndarray): return self.get_selection(name) - if(name not in self._data_fields): - raise KeyError('The data field "%s" is not present in the ' - 'DataFieldRecordArray instance.'%(name)) + if name not in self._data_fields: + raise KeyError( + f'The data field "{name}" is not present in the ' + 'DataFieldRecordArray instance.') return self._data_fields[name] - def __setitem__(self, name, arr): + def __setitem__( + self, + name, + arr): """Implements data field value assigment. If values are assigned to a data field that does not exist yet, it will be added via the ``append_field`` method. @@ -821,22 +1229,23 @@ def __setitem__(self, name, arr): If the given data array is not of the same length as this DataFieldRecordArray instance. """ - if(isinstance(name, np.ndarray)): + if isinstance(name, np.ndarray): self.set_selection(name, arr) return # Check if a new field is supposed to be added. - if(name not in self): + if name not in self: self.append_field(name, arr) return # We set a particular already existing data field. - if(len(arr) != self._len): - raise ValueError('The length of the to-be-set data (%d) must ' - 'match the length (%d) of the DataFieldRecordArray instance!'%( - len(arr), self._len)) + if len(arr) != self._len: + raise ValueError( + f'The length of the to-be-set data ({len(arr)}) must match ' + f'the length ({self._len}) of the DataFieldRecordArray ' + 'instance!') - if(not isinstance(arr, np.ndarray)): + if not isinstance(arr, np.ndarray): raise TypeError( 'When setting a field directly, the data must be provided as a ' 'numpy ndarray!') @@ -876,21 +1285,25 @@ def __str__(self): # Generates a pretty string representation of the given field name. def _pretty_str_field(name): field = self._data_fields[name] - s = '%s: {dtype: %s, vmin: % .3e, vmax: % .3e}'%( - name.ljust(max_field_name_len), str(field.dtype), np.min(field), - np.max(field)) + s = (f'{name.ljust(max_field_name_len)}: ' + '{' + f'dtype: {str(field.dtype)}, ' + f'vmin: {np.min(field):.3e}, ' + f'vmax: {np.max(field)}' + '}') return s indent_str = ' '*dsp.INDENTATION_WIDTH - s = '%s: %d fields, %d entries, %.0f %sbytes '%( - classname(self), len(self._field_name_list), self.__len__(), - np.round(size, 0), prefix) - if(len(self._field_name_list) > 0): - s += '\n' + indent_str + 'fields = {' - s += '\n' + indent_str*2 + _pretty_str_field(self._field_name_list[0]) - for fname in self._field_name_list[1:]: - s += '\n' + indent_str*2 + _pretty_str_field(fname) - s += '\n' + indent_str + '}' + s = (f'{classname(self)}: {len(self._field_name_list)} fields, ' + f'{len(self)} entries, {np.round(size, 0):.0f} {prefix}bytes ') + if len(self._field_name_list) > 0: + s += f'\n{indent_str}fields = ' + s += '{' + for fname in self._field_name_list: + s += f'\n{indent_str*2}{_pretty_str_field(fname)}' + s += f'\n{indent_str}' + s += '}' + return s @property @@ -904,8 +1317,8 @@ def indices(self): """(read-only) The numpy ndarray holding the indices of this DataFieldRecordArray. """ - if(self._indices is None): - self._indices = np.indices((self._len,))[0] + if self._indices is None: + self._indices = np.arange(self._len) return self._indices def append(self, arr): @@ -919,9 +1332,9 @@ def append(self, arr): this DataFieldRecordArray. It must contain the same data fields. Additional data fields are ignored. """ - if(not isinstance(arr, DataFieldRecordArray)): - raise TypeError('The arr argument must be an instance of ' - 'DataFieldRecordArray!') + if not isinstance(arr, DataFieldRecordArray): + raise TypeError( + 'The arr argument must be an instance of DataFieldRecordArray!') for fname in self._field_name_list: self._data_fields[fname] = np.append( @@ -938,7 +1351,8 @@ def append_field(self, name, data): name : str The name of the new data field. data : numpy ndarray - The numpy ndarray holding the data. + The numpy ndarray holding the data. The length of the ndarray must + match the current length of this DataFieldRecordArray instance. Raises ------ @@ -951,20 +1365,20 @@ def append_field(self, name, data): TypeError If the arguments are of the wrong type. """ - if(not isinstance(name, str)): + if not isinstance(name, str): raise TypeError( 'The name argument must be an instance of str!') - if(not isinstance(data, np.ndarray)): + if not isinstance(data, np.ndarray): raise TypeError( 'The data argument must be an instance of ndarray!') - if(name in self._data_fields): + if name in self._data_fields: raise KeyError( - 'The data field "%s" already exists in this %s instance!'%( - name, classname(self))) - #if(len(data) != self._len): - # raise ValueError( - # 'The length of the given data is %d, but must be %d!'%( - # len(data), self._len)) + f'The data field "{name}" already exists in this ' + f'{classname(self)} instance!') + if len(data) != self._len: + raise ValueError( + f'The length of the given data is {len(data)}, but must be ' + f'{self._len}!') self._data_fields[name] = data self._field_name_list.append(name) @@ -975,13 +1389,14 @@ def as_numpy_record_array(self): Returns ------- - arr : numpy record ndarray + arr : instance of numpy record ndarray The numpy recarray ndarray holding the data of this DataFieldRecordArray instance. """ - dt = np.dtype( - [(name, self._data_fields[name].dtype) - for name in self.field_name_list]) + dt = np.dtype([ + (name, self._data_fields[name].dtype) + for name in self.field_name_list + ]) arr = np.empty((len(self),), dtype=dt) for name in self.field_name_list: @@ -989,7 +1404,9 @@ def as_numpy_record_array(self): return arr - def copy(self, keep_fields=None): + def copy( + self, + keep_fields=None): """Creates a new DataFieldRecordArray that is a copy of this DataFieldRecordArray instance. @@ -1028,17 +1445,20 @@ def set_field_dtype(self, name, dt): dt : numpy.dtype The dtype instance defining the new data type. """ - if(name not in self): + if name not in self: raise KeyError( f'The data field "{name}" does not exist in this ' - 'DataFieldRecordArray!') - if(not isinstance(dt, np.dtype)): + f'{classname(self)} instance!') + if not isinstance(dt, np.dtype): raise TypeError( 'The dt argument must be an instance of type numpy.dtype!') self._data_fields[name] = self._data_fields[name].astype(dt, copy=False) - def convert_dtypes(self, convertions, except_fields=None): + def convert_dtypes( + self, + convertions, + except_fields=None): """Converts the data type of the data fields of this DataFieldRecordArray. This method can be used to compress the data. @@ -1049,26 +1469,28 @@ def convert_dtypes(self, convertions, except_fields=None): except_fields : sequence of str | None The sequence of field names, which should not get converted. """ - if(not isinstance(convertions, dict)): - raise TypeError('The convertions argument must be an instance of ' - 'dict!') + if not isinstance(convertions, dict): + raise TypeError( + 'The convertions argument must be an instance of dict!') - if(except_fields is None): + if except_fields is None: except_fields = [] - if(not issequenceof(except_fields, str)): - raise TypeError('The except_fields argument must be a sequence ' - 'of str!') + if not issequenceof(except_fields, str): + raise TypeError( + 'The except_fields argument must be a sequence of str!') _data_fields = self._data_fields for fname in self._field_name_list: - if(fname in except_fields): + if fname in except_fields: continue old_dtype = _data_fields[fname].dtype - if(old_dtype in convertions): + if old_dtype in convertions: new_dtype = convertions[old_dtype] _data_fields[fname] = _data_fields[fname].astype(new_dtype) - def get_selection(self, indices): + def get_selection( + self, + indices): """Creates an DataFieldRecordArray that contains a selection of the data of this DataFieldRecordArray instance. @@ -1079,7 +1501,7 @@ def get_selection(self, indices): Returns ------- - data_field_array : DataFieldRecordArray + data_field_array : instance of DataFieldRecordArray The DataFieldRecordArray that contains the selection of the original DataFieldRecordArray. The selection data is a copy of the original data. @@ -1091,7 +1513,10 @@ def get_selection(self, indices): data[fname] = self._data_fields[fname][indices] return DataFieldRecordArray(data, copy=False) - def set_selection(self, indices, arr): + def set_selection( + self, + indices, + arr): """Sets a selection of the data of this DataFieldRecordArray instance to the data given in arr. @@ -1105,14 +1530,17 @@ def set_selection(self, indices, arr): It must have the same fields defined as this DataFieldRecordArray instance. """ - if(not isinstance(arr, DataFieldRecordArray)): - raise TypeError('The arr argument must be an instance of ' - 'DataFieldRecordArray!') + if not isinstance(arr, DataFieldRecordArray): + raise TypeError( + 'The arr argument must be an instance of DataFieldRecordArray!') for fname in self._field_name_list: self._data_fields[fname][indices] = arr[fname] - def rename_fields(self, convertions, must_exist=False): + def rename_fields( + self, + convertions, + must_exist=False): """Renames the given fields of this array. Parameters @@ -1130,15 +1558,17 @@ def rename_fields(self, convertions, must_exist=False): exist. """ for (old_fname, new_fname) in convertions.items(): - if(old_fname in self.field_name_list): + if old_fname in self.field_name_list: self._data_fields[new_fname] = self._data_fields.pop(old_fname) - elif(must_exist is True): - raise KeyError('The required field "%s" does not exist!'%( - old_fname)) + elif must_exist is True: + raise KeyError( + f'The required field "{old_fname}" does not exist!') self._field_name_list = list(self._data_fields.keys()) - def tidy_up(self, keep_fields): + def tidy_up( + self, + keep_fields): """Removes all fields that are not specified through the keep_fields argument. @@ -1153,20 +1583,22 @@ def tidy_up(self, keep_fields): If keep_fields is not an instance of str or a sequence of str instances. """ - if(isinstance(keep_fields, str)): - keep_fields = [ keep_fields ] - if(not issequenceof(keep_fields, str)): - raise TypeError('The keep_fields argument must be a sequence of ' - 'str!') + if isinstance(keep_fields, str): + keep_fields = [keep_fields] + if not issequenceof(keep_fields, str): + raise TypeError( + 'The keep_fields argument must be a sequence of str!') # We need to make a copy of the field_name_list because that list will # get changed by the `remove_field` method. field_name_list = copy.copy(self._field_name_list) for fname in field_name_list: - if(fname not in keep_fields): + if fname not in keep_fields: self.remove_field(fname) - def sort_by_field(self, name): + def sort_by_field( + self, + name): """Sorts the data along the given field name in ascending order. Parameters @@ -1184,9 +1616,10 @@ def sort_by_field(self, name): KeyError If the given data field does not exist. """ - if(name not in self._data_fields): - raise KeyError('The data field "{}" does not exist in this ' - 'DataFieldRecordArray instance!'.format(name)) + if name not in self._data_fields: + raise KeyError( + f'The data field "{name}" does not exist in this ' + f'{classname(self)} instance!') sorted_idxs = np.argsort(self._data_fields[name]) @@ -1195,6 +1628,8 @@ def sort_by_field(self, name): return sorted_idxs + register_FileLoader(['.npy'], NPYFileLoader) +register_FileLoader(['.parquet'], ParquetFileLoader) register_FileLoader(['.pkl'], PKLFileLoader) register_FileLoader(['.csv'], TextFileLoader) diff --git a/skyllh/core/test_statistic.py b/skyllh/core/test_statistic.py index 98c0ac64f3..43bcfd9067 100644 --- a/skyllh/core/test_statistic.py +++ b/skyllh/core/test_statistic.py @@ -8,30 +8,38 @@ import numpy as np -class TestStatistic(object, metaclass=abc.ABCMeta): +class TestStatistic( + object, + metaclass=abc.ABCMeta): """This is the abstract base class for a test statistic class. """ - def __init__(self): + def __init__(self, **kwargs): """Constructs the test-statistic function instance. """ - super(TestStatistic, self).__init__() + super().__init__(**kwargs) @abc.abstractmethod - def evaluate(self, llhratio, log_lambda, fitparam_values, *args, **kwargs): + def __call__( + self, + pmm, + log_lambda, + fitparam_values, + **kwargs): """This method is supposed to evaluate the test-statistic function. Parameters ---------- - llhratio : LLHRatio instance - The log-likelihood ratio function, which should be used for the - test-statistic function. + pmm : instance of ParameterModelMapper + The ParameterModelMapper instance that defines the global + parameter set. log_lambda : float The value of the log-likelihood ratio function. Usually, this is its maximum. - fitparam_values : (N_fitparams+1)-shaped 1D numpy ndarray - The numpy ndarray holding the fit parameter values of the - log-likelihood ratio function for the given log_lambda value. + fitparam_values : instance of numpy ndarray + The (N_fitparam,)-shaped 1D numpy ndarray holding the + global fit parameter values of the log-likelihood ratio + function for the given log_lambda value. Returns ------- @@ -41,40 +49,68 @@ def evaluate(self, llhratio, log_lambda, fitparam_values, *args, **kwargs): pass -class TestStatisticWilks(TestStatistic): - """This class implements the standard Wilks theorem test-statistic function: +class WilksTestStatistic( + TestStatistic): + r"""This class implements the standard Wilks theorem test-statistic function: - TS = 2 * sign(ns_best) * log( L(fitparam_best) / L(ns = 0) ) + .. math:: - where the sign(ns_best) is negative for ns_best < 0, and positive otherwise. + TS = 2 \text{sign}(\hat{n}_{\text{s}}) \log \left( + \frac{\mathcal{L}(\hat{\vec{p}})}{\mathcal{L}(n_{\text{s}} = 0)} \right) + + where the :math:`\text{sign}(\hat{n}_{\text{s}})` is negative for + :math:`\hat{n}_{\text{s}} < 0`, and positive otherwise. """ - def __init__(self): + def __init__(self, ns_param_name='ns', **kwargs): """Constructs the test-statistic function instance. + + Parameters + ---------- + ns_param_name : str + The name of the global fit parameter for the number of signal + events in the detector, ns. + """ + super().__init__(**kwargs) + + self._ns_param_name = ns_param_name + + @property + def ns_param_name(self): + """(read-only) The name of the global fit parameter for the number of + signal events in the detector, ns. """ - super(TestStatisticWilks, self).__init__() + return self._ns_param_name - def evaluate(self, llhratio, log_lambda, fitparam_values): - """Evaluates this test-statistic function. + def __call__( + self, + pmm, + log_lambda, + fitparam_values, + **kwargs): + """Evaluates the test-statistic function. Parameters ---------- - llhratio : LLHRatio instance - The log-likelihood ratio function, which should be used for the - test-statistic function. + pmm : instance of ParameterModelMapper + The ParameterModelMapper instance that defines the global + parameter set. log_lambda : float The value of the log-likelihood ratio function. Usually, this is its maximum. - fitparam_values : (N_fitparams+1)-shaped 1D numpy ndarray - The numpy ndarray holding the fit parameter values of the - log-likelihood ratio function for the given log_lambda value. - By definition, the first element is the value of 'ns'. + fitparam_values : instance of numpy ndarray + The (N_fitparam,)-shaped 1D numpy ndarray holding the + global fit parameter values of the log-likelihood ratio + function for the given log_lambda value. Returns ------- TS : float The calculated test-statistic value. """ - ns = fitparam_values[0] + ns_pidx = pmm.get_gflp_idx( + name=self._ns_param_name) + + ns = fitparam_values[ns_pidx] # We need to distinguish between ns=0 and ns!=0, because the np.sign(ns) # function returns 0 for ns=0, but we want it to be 1 in such cases. @@ -85,68 +121,115 @@ def evaluate(self, llhratio, log_lambda, fitparam_values): return TS -class TestStatisticWilksZeroNsTaylor(TestStatistic): - """Similar to the TestStatisticWilks class, this class implements the +class LLHRatioZeroNsTaylorWilksTestStatistic( + TestStatistic): + r"""Similar to the TestStatisticWilks class, this class implements the standard Wilks theorem test-statistic function. But for zero ns values, the log-likelihood ratio function is taylored up to second order and the resulting apex is used as log_lambda value. Hence, the TS function is defined as: - TS = 2 * sign(ns_best) * log( L(fitparam_best) / L(ns = 0) ) + .. math:: + + TS = 2 \text{sign}(\hat{n}_{\text{s}}) \log \left( + \frac{\mathcal{L}(\hat{\vec{p}})}{\mathcal{L}(n_{\text{s}} = 0)} \right) + + for :math:`\hat{n}_{\text{s}} \neq 0`, and - for ns_best != 0, and + .. math:: - TS = 2 * a^2 / (4*b) + TS = -2 \frac{a^2}{4b} - for ns_best == 0, with + for :math:`\hat{n}_{\text{s}} = 0`, with - a = d/dns ( L(fitparam_best) / L(ns = 0) ) + .. math:: - being the derivative w.r.t. ns of the log-likelihood ratio function, and + a = \frac{\text{d}}{\text{d}n_{\text{s}}} \left( + \frac{\mathcal{L}(\hat{\vec{p}})}{\mathcal{L}(n_{\text{s}} = 0)} \right) - b = d/dns ( a ) + being the derivative w.r.t. :math:`n_{\text{s}}` of the log-likelihood ratio + function, and + + .. math:: + + b = \frac{\text{d}a}{\text{d}n_{\text{s}}} being its second derivative w.r.t. ns. """ - def __init__(self): + def __init__(self, ns_param_name='ns', **kwargs): """Constructs the test-statistic function instance. + + Parameters + ---------- + ns_param_name : str + The name of the global fit parameter for the number of signal + events in the detector, ns. """ - super(TestStatisticWilksZeroNsTaylor, self).__init__() + super().__init__(**kwargs) + + self._ns_param_name = ns_param_name - def evaluate(self, llhratio, log_lambda, fitparam_values, grads): - """Evaluates this test-statistic function. + @property + def ns_param_name(self): + """(read-only) The name of the global fit parameter for the number of + signal events in the detector, ns. + """ + return self._ns_param_name + + def __call__( + self, + pmm, + log_lambda, + fitparam_values, + llhratio, + grads, + tl=None, + **kwargs): + """Evaluates the test-statistic function. Parameters ---------- - llhratio : LLHRatio instance - The log-likelihood ratio function, which should be used for the - test-statistic function. + pmm : instance of ParameterModelMapper + The ParameterModelMapper instance that defines the global + parameter set. log_lambda : float The value of the log-likelihood ratio function. Usually, this is its maximum. - fitparam_values : (N_fitparams+1)-shaped 1D numpy ndarray - The numpy ndarray holding the fit parameter values of the - log-likelihood ratio function for the given log_lambda value. - By definition, the first element is the value of 'ns'. - grads : (N_fitparams+1)-shaped 1D ndarray - The ndarray holding the values of the first derivative of the - log-likelihood ratio function w.r.t. each global fit parameter. - By definition the first element is the first derivative - w.r.t. the fit parameter ns. + fitparam_values : instance of numpy ndarray + The (N_fitparam,)-shaped 1D numpy ndarray holding the + global fit parameter values of the log-likelihood ratio + function for the given log_lambda value. + llhratio : instance of LLHRatio + The log-likelihood ratio function, which should be used for the + test-statistic function. + grads : instance of numpy ndarray + The (N_fitparam,)-shaped 1D numpy ndarray holding the + values of the first derivative of the log-likelihood ratio function + w.r.t. each global fit parameter. + tl : instance of TimeLord | None + The optional instance of TimeLord to measure timing information. Returns ------- TS : float The calculated test-statistic value. """ - ns = fitparam_values[0] + ns_pidx = pmm.get_gflp_idx( + name=self._ns_param_name) - if(ns == 0): - nsgrad = grads[0] - nsgrad2 = llhratio.calculate_ns_grad2(fitparam_values) + ns = fitparam_values[ns_pidx] + + if ns == 0: + nsgrad = grads[ns_pidx] + nsgrad2 = llhratio.calculate_ns_grad2( + fitparam_values=fitparam_values, + ns_pidx=ns_pidx, + tl=tl) TS = -2 * nsgrad**2 / (4*nsgrad2) - else: - TS = 2 * np.sign(ns) * log_lambda + + return TS + + TS = 2 * np.sign(ns) * log_lambda return TS diff --git a/skyllh/core/times.py b/skyllh/core/times.py index 63176da79d..409fe072cc 100644 --- a/skyllh/core/times.py +++ b/skyllh/core/times.py @@ -5,15 +5,22 @@ from skyllh.core.livetime import Livetime -class TimeGenerationMethod(object, metaclass=abc.ABCMeta): +class TimeGenerationMethod( + object, + metaclass=abc.ABCMeta, +): """Base class (type) for implementing a method to generate times. """ - def __init__(self): - pass + def __init__(self, **kwargs): + super().__init__(**kwargs) @abc.abstractmethod - def generate_times(self, rss, size): + def generate_times( + self, + rss, + size, + ): """The ``generate_times`` method implements the actual generation of times, which is method dependent. @@ -33,13 +40,15 @@ def generate_times(self, rss, size): pass -class LivetimeTimeGenerationMethod(TimeGenerationMethod): +class LivetimeTimeGenerationMethod( + TimeGenerationMethod, +): """The LivetimeTimeGenerationMethod provides the method to generate times from a Livetime object. It will uniformely generate times that will coincide with the on-time intervals of the detector, by calling the `draw_ontimes` method of the Livetime class. """ - def __init__(self, livetime): + def __init__(self, livetime, **kwargs): """Creates a new LivetimeTimeGeneration instance. Parameters @@ -47,6 +56,8 @@ def __init__(self, livetime): livetime : Livetime The Livetime instance that should be used to generate times from. """ + super().__init__(**kwargs) + self.livetime = livetime @property @@ -54,13 +65,20 @@ def livetime(self): """The Livetime instance used to draw times from. """ return self._livetime + @livetime.setter def livetime(self, livetime): - if(not isinstance(livetime, Livetime)): - raise TypeError('The livetime property must be an instance of Livetime!') + if not isinstance(livetime, Livetime): + raise TypeError( + 'The livetime property must be an instance of Livetime!') self._livetime = livetime - def generate_times(self, rss, size): + def generate_times( + self, + rss, + size, + **kwargs, + ): """Generates `size` MJD times according to the detector on-times provided by the Livetime instance. @@ -77,17 +95,23 @@ def generate_times(self, rss, size): times : ndarray The 1d (`size`,)-shaped numpy ndarray holding the generated times. """ - return self.livetime.draw_ontimes(rss, size) + times = self._livetime.draw_ontimes( + rss=rss, + size=size, + **kwargs) + return times -class TimeGenerator(object): + +class TimeGenerator( + object): def __init__(self, method): """Creates a time generator instance with a given defined time generation method. Parameters ---------- - method : TimeGenerationMethod + method : instance of TimeGenerationMethod The instance of TimeGenerationMethod that defines the method of generating times. """ @@ -99,27 +123,43 @@ def method(self): the times. """ return self._method + @method.setter def method(self, method): - if(not isinstance(method, TimeGenerationMethod)): - raise TypeError('The time generation method must be an instance of TimeGenerationMethod!') + if not isinstance(method, TimeGenerationMethod): + raise TypeError( + 'The time generation method must be an instance of ' + 'TimeGenerationMethod!') self._method = method - def generate_times(self, rss, size): + def generate_times( + self, + rss, + size, + **kwargs, + ): """Generates ``size`` amount of times by calling the ``generate_times`` method of the TimeGenerationMethod class. Parameters ---------- - rss : RandomStateService - The random state service providing the random number - generator (RNG). + rss : instance of RandomStateService + The random state service providing the random number generator + (RNG). size : int The number of time that should get generated. + **kwargs + Additional keyword arguments are passed to the ``generate_times`` + method of the TimeGenerationMethod class. Returns ------- times : ndarray - The 1d (`size`,)-shaped ndarray holding the generated times. + The 1d (``size``,)-shaped ndarray holding the generated times. """ - return self._method.generate_times(rss, size) + times = self._method.generate_times( + rss=rss, + size=size, + **kwargs) + + return times diff --git a/skyllh/core/timing.py b/skyllh/core/timing.py index c379cd1ab0..1218728323 100644 --- a/skyllh/core/timing.py +++ b/skyllh/core/timing.py @@ -1,53 +1,60 @@ # -*- coding: utf-8 -*- -import numpy as np -import time - -from skyllh.core import display -from skyllh.core.py import classname - """The timing module provides code execution timing functionalities. The TimeLord class keeps track of execution times of specific code segments, called "tasks". The TaskTimer class can be used within a `with` statement to time the execution of the code within the `with` block. """ -class TaskRecord(object): - def __init__(self, name, tstart, tend): +import numpy as np +import time + +from skyllh.core import ( + display, +) +from skyllh.core.py import ( + classname, +) + + +class TaskRecord( + object): + def __init__( + self, + name, + start_times, + end_times): """Creates a new TaskRecord instance. Parameters ---------- name : str The name of the task. - tstart : float | 1d ndarray of float - The start time(s) of the task in seconds. - tend : float | 1d ndarray of float - The end time(s) of the task in seconds. + start_times : list of float + The start times of the task in seconds. + end_times : list of float + The end times of the task in seconds. """ self.name = name - tstart = np.atleast_1d(tstart) - tend = np.atleast_1d(tend) - - if(len(tstart) != len(tend)): - raise ValueError('The number of start and end time stamps must ' - 'be equal!') + if len(start_times) != len(end_times): + raise ValueError( + 'The number of start and end time stamps must be equal!') - self._tstart_list = list(tstart) - self._tend_list = list(tend) + self._start_times = start_times + self._end_times = end_times @property def tstart(self): """(read-only) The time stamps the execution of this task started. """ - return self._tstart_list + return self._start_times @property def tend(self): """(read-only) The time stamps the execution of this task was stopped. """ - return self._tend_list + return self._end_times @property def duration(self): @@ -57,18 +64,20 @@ def duration(self): # Create a (2,Niter)-shaped 2D ndarray holding the start and end time # stamps of the task executions. This array gets then sorted by the # start time stamps. - arr = np.sort(np.vstack((self._tstart_list, self._tend_list)), axis=1) + arr = np.sort( + np.vstack((self._start_times, self._end_times)), + axis=1) - d = arr[1,0] - arr[0,0] - last_tend = arr[1,0] + d = arr[1, 0] - arr[0, 0] + last_tend = arr[1, 0] for idx in range(1, arr.shape[1]): - tstart = arr[0,idx] - tend = arr[1,idx] - if(tend <= last_tend): + tstart = arr[0, idx] + tend = arr[1, idx] + if tend <= last_tend: continue - if(tstart <= last_tend and tend > last_tend): + if tstart <= last_tend and tend > last_tend: d += tend - last_tend - elif(tstart >= last_tend): + elif tstart >= last_tend: d += tend - tstart last_tend = tend @@ -78,23 +87,25 @@ def duration(self): def niter(self): """(read-only) The number of times this task was executed. """ - return len(self._tstart_list) + return len(self._start_times) def join(self, tr): """Joins this TaskRecord with the given TaskRecord instance. Parameters ---------- - tr : TaskRecord + tr : instance of TaskRecord The instance of TaskRecord that should be joined with this TaskRecord instance. """ - self._tstart_list.extend(tr._tstart_list) - self._tend_list.extend(tr._tend_list) + self._start_times.extend(tr._start_times) + self._end_times.extend(tr._end_times) -class TimeLord(object): - def __init__(self): +class TimeLord( + object): + def __init__( + self): self._task_records = [] self._task_records_name_idx_map = {} @@ -104,12 +115,14 @@ def task_name_list(self): """ return list(self._task_records_name_idx_map.keys()) - def add_task_record(self, tr): + def add_task_record( + self, + tr): """Adds a given task record to the internal list of task records. """ tname = tr.name - if(self.has_task_record(tname)): + if self.has_task_record(tname): # The TaskRecord already exists. Update the task record. self_tr = self.get_task_record(tname) self_tr.join(tr) @@ -118,7 +131,9 @@ def add_task_record(self, tr): self._task_records.append(tr) self._task_records_name_idx_map[tr.name] = len(self._task_records)-1 - def get_task_record(self, name): + def get_task_record( + self, + name): """Retrieves a task record of the given name. Parameters @@ -128,12 +143,14 @@ def get_task_record(self, name): Returns ------- - task_record : TaskRecord + task_record : instance of TaskRecord The instance of TaskRecord with the requested name. """ return self._task_records[self._task_records_name_idx_map[name]] - def has_task_record(self, name): + def has_task_record( + self, + name): """Checks if this TimeLord instance has a task record of the given name. Parameters @@ -149,19 +166,21 @@ def has_task_record(self, name): """ return name in self._task_records_name_idx_map - def join(self, tl): + def join( + self, + tl): """Joins a given TimeLord instance with this TimeLord instance. Tasks of the same name will be updated and new tasks will be added. Parameters ---------- - tl : TimeLord instance + tl : instance of TimeLord The instance of TimeLord whos tasks should be joined with the tasks of this TimeLord instance. """ for tname in tl.task_name_list: other_tr = tl.get_task_record(tname) - if(self.has_task_record(tname)): + if self.has_task_record(tname): # Update the task record. tr = self.get_task_record(tname) tr.join(other_tr) @@ -181,11 +200,14 @@ def __str__(self): n_tasks = len(task_name_list) s = f'{classname(self)}: Executed tasks:' - if(n_tasks == 0): + if n_tasks == 0: s += ' None.' return s - task_name_len_list = [ len(task_name) for task_name in task_name_list ] + task_name_len_list = [ + len(task_name) + for task_name in task_name_list + ] max_task_name_len = np.minimum( np.max(task_name_len_list), display.PAGE_WIDTH-25) @@ -198,15 +220,19 @@ def __str__(self): s += line.format( task_name=task_name, t=t, - p=1 if t>1e3 or t<1e-3 else 3, + p=1 if t > 1e3 or t < 1e-3 else 3, c='e' if t > 1e3 or t < 1e-3 else 'f', niter=tr.niter) return s -class TaskTimer(object): - def __init__(self, time_lord, name): +class TaskTimer( + object): + def __init__( + self, + time_lord, + name): """ Parameters ---------- @@ -227,12 +253,14 @@ def time_lord(self): can be None, which means that the task should not get recorded. """ return self._time_lord + @time_lord.setter def time_lord(self, lord): - if(lord is not None): - if(not isinstance(lord, TimeLord)): - raise TypeError('The time_lord property must be None or an ' - 'instance of TimeLord!') + if lord is not None: + if not isinstance(lord, TimeLord): + raise TypeError( + 'The time_lord property must be None or an instance of ' + 'TimeLord!') self._time_lord = lord @property @@ -240,10 +268,12 @@ def name(self): """The name if the task. """ return self._name + @name.setter def name(self, name): - if(not isinstance(name, str)): - raise TypeError('The name property must be an instance of str!') + if not isinstance(name, str): + raise TypeError( + 'The name property must be an instance of str!') self._name = name @property @@ -263,8 +293,11 @@ def __exit__(self, exc_type, exc_value, traceback): """ self._end = time.process_time() - if(self._time_lord is None): + if self._time_lord is None: return - self._time_lord.add_task_record(TaskRecord( - self._name, self._start, self._end)) + self._time_lord.add_task_record( + TaskRecord( + name=self._name, + start_times=[self._start], + end_times=[self._end])) diff --git a/skyllh/core/tool.py b/skyllh/core/tool.py new file mode 100644 index 0000000000..9e89decead --- /dev/null +++ b/skyllh/core/tool.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- + +"""The tool module provides functionality to interface with an optional external +python package (tool). The tool can be imported dynamically at run-time when +needed. +""" + +import importlib +import importlib.util +import sys + +from skyllh.core.py import ( + get_class_of_func, +) + + +def is_available(name): + """Checks if the given Python package is available for import. + + Parameters + ---------- + name : str + The name of the Python package. + + Returns + ------- + check : bool + ``True`` if the given Python package is available, ``False`` otherwise. + + Raises + ------ + ModuleNotFoundError + If the package is not a Python package, i.e. lacks a __path__ attribute. + """ + # Check if module is already imported. + if name in sys.modules: + return True + + spec = importlib.util.find_spec(name) + if spec is not None: + return True + + return False + + +def get(name): + """Returns the module object of the given tool. This will import the Python + package if it was not yet imported. + + Parameters + ---------- + name : str + The name of the Python package. + + Returns + ------- + module : Python module + The (imported) Python module object. + """ + if name in sys.modules: + return sys.modules[name] + + module = importlib.import_module(name) + return module + + +def requires(*tools): + """This is decorator function that can be used whenever a function requires + optional tools. + + Parameters + ---------- + *tools : sequence of str + The name of the required Python packages. + + Raises + ------ + ModuleNotFoundError + If any of the specified tools is not available. + """ + def decorator(f): + def wrapper(*args, **kwargs): + for tool in tools: + if not is_available(tool): + raise ModuleNotFoundError( + f'The Python module "{tool}" is not available, but is ' + f'required by "{get_class_of_func(f)}.{f.__name__}"!') + return f(*args, **kwargs) + return wrapper + return decorator diff --git a/skyllh/core/trialdata.py b/skyllh/core/trialdata.py index 5aa4a12019..63a6a37c1f 100644 --- a/skyllh/core/trialdata.py +++ b/skyllh/core/trialdata.py @@ -10,16 +10,19 @@ from collections import OrderedDict import numpy as np -from skyllh.core.debugging import get_logger +from skyllh.core.debugging import ( + get_logger, +) from skyllh.core import display as dsp from skyllh.core.py import ( classname, func_has_n_args, int_cast, issequenceof, - typename ) -from skyllh.core.storage import DataFieldRecordArray +from skyllh.core.storage import ( + DataFieldRecordArray, +) logger = get_logger(__name__) @@ -31,7 +34,14 @@ class DataField(object): function. """ def __init__( - self, name, func, fitparam_names=None, dt=None): + self, + name, + func, + global_fitparam_names=None, + dt=None, + is_src_field=False, + is_srcevt_data=False, + **kwargs): """Creates a new instance of DataField that might depend on fit parameters. @@ -43,39 +53,57 @@ def __init__( func : callable The function that calculates the values of this data field. The call signature must be - `__call__(tdm, src_hypo_group_manager, fitparams)`, - where `tdm` is the TrialDataManager instance holding the event data, - `src_hypo_group_manager` is the SourceHypoGroupManager instance, and - `fitparams` is the dictionary with the current fit parameter names - and values. If the data field depends solely on source parameters, - the call signature must be `__call__(tdm, src_hypo_group_manager)` - instead. - fitparam_names : sequence of str | None - The sequence of str instances specifying the names of the fit + + __call__(tdm, shg_mgr, pmm, global_fitparams_dict=None) + + where ``tdm`` is the instance of TrialDataManager holding the trial + event data, ``shg_mgr`` is the instance of SourceHypoGroupManager, + ``pmm`` is the instance of ParameterModelMapper, and + ``global_fitparams_dict`` is the dictionary with the current global + fit parameter names and values. + global_fitparam_names : str | sequence of str | None + The sequence of str instances specifying the names of the global fit parameters this data field depends on. If set to None, the data field does not depend on any fit parameters. dt : numpy dtype | str | None If specified it defines the data type this data field should have. If a str instance is given, it defines the name of the data field whose data type should be taken for this data field. + is_src_field : bool + Flag if this data field is a source data field (``True``) and values + should be stored within this DataField instance, instead of the + events DataFieldRecordArray instance of the TrialDataManager + (``False``). + is_srcevt_data : bool + Flag if the data field will hold source-event data, i.e. data of + length N_values. In that case the data cannot be stored within the + events attribute of the TrialDataManager, but must be stored in the + values attribute of this DataField instance. """ - super(DataField, self).__init__() + super().__init__(**kwargs) self.name = name self.func = func - if(fitparam_names is None): - fitparam_names = [] - if(not issequenceof(fitparam_names, str)): - raise TypeError('The fitparam_names argument must be None or a ' - 'sequence of str instances!') - self._fitparam_name_list = list(fitparam_names) + if global_fitparam_names is None: + global_fitparam_names = [] + if isinstance(global_fitparam_names, str): + global_fitparam_names = [global_fitparam_names] + if not issequenceof(global_fitparam_names, str): + raise TypeError( + 'The global_fitparam_names argument must be None or a sequence ' + 'of str instances! It is of type ' + f'{classname(global_fitparam_names)}!') + self._global_fitparam_name_list = list(global_fitparam_names) self.dt = dt # Define the list of fit parameter values for which the fit parameter # depend data field values have been calculated for. - self._fitparam_value_list = [None]*len(self._fitparam_name_list) + self._global_fitparam_value_list = [None] *\ + len(self._global_fitparam_name_list) + + self._is_srcevt_data = is_srcevt_data # Define the member variable that holds the numpy ndarray with the data # field values. @@ -83,22 +111,25 @@ def __init__( # Define the most efficient `calculate` method for this kind of data # field. - if(func_has_n_args(self._func, 2)): + if is_src_field: self.calculate = self._calc_source_values - elif(len(self._fitparam_name_list) == 0): + elif len(self._global_fitparam_name_list) == 0: self.calculate = self._calc_static_values else: - self.calculate = self._calc_fitparam_dependent_values + self.calculate = self._calc_global_fitparam_dependent_values @property def name(self): """The name of the data field. """ return self._name + @name.setter def name(self, name): - if(not isinstance(name, str)): - raise TypeError('The name property must be an instance of str!') + if not isinstance(name, str): + raise TypeError( + 'The name property must be an instance of str!' + f'It is of type {classname(name)}!') self._name = name @property @@ -106,14 +137,16 @@ def func(self): """The function that calculates the data field values. """ return self._func + @func.setter def func(self, f): - if(not callable(f)): - raise TypeError('The func property must be a callable object!') - if((not func_has_n_args(f, 2)) and - (not func_has_n_args(f, 3))): - raise TypeError('The func property must be a function with 2 or 3 ' - 'arguments!') + if not callable(f): + raise TypeError( + 'The func property must be a callable object!') + if (not func_has_n_args(f, 3)) and\ + (not func_has_n_args(f, 4)): + raise TypeError( + 'The func property must be a function with 3 or 4 arguments!') self._func = f @property @@ -125,17 +158,25 @@ def dt(self): field. """ return self._dt + @dt.setter def dt(self, obj): - if(obj is not None): - if((not isinstance(obj, np.dtype)) and - (not isinstance(obj, str))): + if obj is not None: + if (not isinstance(obj, np.dtype)) and\ + (not isinstance(obj, str)): raise TypeError( 'The dt property must be None, an instance of numpy.dtype, ' - 'or an instance of str! Currently it is of type %s.'%( - str(type(obj)))) + 'or an instance of str! Currently it is of type ' + f'{classname(obj)}.') self._dt = obj + @property + def is_srcevt_data(self): + """(read-only) Flag if the data field contains source-event data, i.e. + is of length N_values. + """ + return self._is_srcevt_data + @property def values(self): """(read-only) The calculated data values of the data field. @@ -148,30 +189,27 @@ def __str__(self): dtype = 'None' vmin = np.nan vmax = np.nan - if(self._values is not None): + + if self._values is not None: dtype = str(self._values.dtype) - try: - vmin = np.min(self._values) - except: - pass - try: - vmax = np.max(self._values) - except: - pass - s = '{}: {}: '.format(classname(self), self.name) - s +='{dtype: ' - s += '{}, vmin: {: .3e}, vmax: {: .3e}'.format( - dtype, vmin, vmax) + vmin = np.min(self._values) + vmax = np.max(self._values) + + s = f'{classname(self)}: {self.name}: ' + s += '{dtype: ' + s += f'{dtype}, vmin: {vmin: .3e}, vmax: {vmax: .3e}' s += '}' return s def _get_desired_dtype(self, tdm): - """Retrieves the data type this field should have. It's None, if no + """Retrieves the data type this field should have. It's ``None``, if no data type was defined for this data field. """ - if(self._dt is not None): - if(isinstance(self._dt, str)): + if self._dt is not None: + if isinstance(self._dt, str): + # The _dt attribute defines the name of the data field whose + # data type should be used. self._dt = tdm.get_dtype(self._dt) return self._dt @@ -180,29 +218,50 @@ def _convert_to_desired_dtype(self, tdm, values): type. """ dt = self._get_desired_dtype(tdm) - if(dt is not None): + if dt is not None: values = values.astype(dt, copy=False) return values def _calc_source_values( - self, tdm, src_hypo_group_manager, fitparams): + self, + tdm, + shg_mgr, + pmm): """Calculates the data field values utilizing the defined external function. The data field values solely depend on fixed source parameters. + + Parameters + ---------- + tdm : instance of TrialDataManager + The TrialDataManager instance this data field is part of and is + holding the event data. + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager, which defines the source + hypothesis groups. + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper, which defines the global + parameters and their mapping to local source parameters. """ - self._values = self._func(tdm, src_hypo_group_manager) - if(not isinstance(self._values, np.ndarray)): + self._values = self._func( + tdm=tdm, + shg_mgr=shg_mgr, + pmm=pmm) + + if not isinstance(self._values, np.ndarray): raise TypeError( - 'The calculation function for the data field "%s" must ' - 'return an instance of numpy.ndarray! ' - 'Currently it is of type "%s".'%( - self._name, typename(type(self._values)))) + f'The calculation function for the data field "{self._name}" ' + 'must return an instance of numpy.ndarray! ' + f'Currently it is of type "{classname(self._values)}".') # Convert the data type. self._values = self._convert_to_desired_dtype(tdm, self._values) def _calc_static_values( - self, tdm, src_hypo_group_manager, fitparams): + self, + tdm, + shg_mgr, + pmm): """Calculates the data field values utilizing the defined external function, that are static and only depend on source parameters. @@ -211,30 +270,46 @@ def _calc_static_values( tdm : instance of TrialDataManager The TrialDataManager instance this data field is part of and is holding the event data. - src_hypo_group_manager : instance of SourceHypoGroupManager - The instance of SourceHypoGroupManager, which defines the groups of - source hypotheses. - fitparams : dict - The dictionary holding the current fit parameter names and values. - By definition this dictionary is empty. + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager, which defines the source + hypothesis groups. + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper, which defines the global + parameters and their mapping to local source parameters. """ - values = self._func(tdm, src_hypo_group_manager, fitparams) - if(not isinstance(values, np.ndarray)): + values = self._func( + tdm=tdm, + shg_mgr=shg_mgr, + pmm=pmm) + + if not isinstance(values, np.ndarray): raise TypeError( - 'The calculation function for the data field "%s" must ' - 'return an instance of numpy.ndarray! ' - 'Currently it is of type "%s".'%( - self._name, typename(type(values)))) + f'The calculation function for the data field "{self._name}" ' + 'must return an instance of numpy.ndarray! ' + f'Currently it is of type "{classname(values)}".') # Convert the data type. values = self._convert_to_desired_dtype(tdm, values) - # Set the data values. This will add the data field to the - # DataFieldRecordArray if it does not exist yet. - tdm.events[self._name] = values + if self._is_srcevt_data: + n_values = tdm.get_n_values() + if values.shape[0] != n_values: + raise ValueError( + 'The calculation function for the data field ' + f'"{self._name}" must return a numpy ndarray of shape ' + f'({n_values},), but the shape is {values.shape}!') + self._values = values + else: + # Set the data values. This will add the data field to the + # DataFieldRecordArray if it does not exist yet. + tdm.events[self._name] = values - def _calc_fitparam_dependent_values( - self, tdm, src_hypo_group_manager, fitparams): + def _calc_global_fitparam_dependent_values( + self, + tdm, + shg_mgr, + pmm, + global_fitparams_dict): """Calculate data field values utilizing the defined external function, that depend on fit parameter values. We check if the fit parameter values have changed. @@ -244,68 +319,77 @@ def _calc_fitparam_dependent_values( tdm : instance of TrialDataManager The TrialDataManager instance this data field is part of and is holding the event data. - src_hypo_group_manager : instance of SourceHypoGroupManager - The instance of SourceHypoGroupManager, which defines the groups of - source hypotheses. - fitparams : dict - The dictionary holding the current fit parameter names and values. - """ - if(self._name not in tdm.events): - # It's the first time this method is called, so we need to calculate - # the data field values for sure. - values = self._func(tdm, src_hypo_group_manager, fitparams) - if(not isinstance(values, np.ndarray)): - raise TypeError( - 'The calculation function for the data field "%s" must ' - 'return an instance of numpy.ndarray! ' - 'Currently it is of type "%s".'%( - self._name, typename(type(values)))) - - # Convert the data type. - values = self._convert_to_desired_dtype(tdm, values) - - # Set the data values. This will add the data field to the - # DataFieldRecordArray if it does not exist yet. - tdm.events[self._name] = values + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager, which defines the source + hypothesis groups. + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper defining the mapping of the + global parameters to local source parameters. + global_fitparams_dict : dict + The dictionary holding the current global fit parameter names and + values. + """ + # Determine if we need to calculate the values. + calc_values = False - # We store the fit parameter values for which the field values were - # calculated for. So they have to get recalculated only when the - # fit parameter values the field depends on change. - self._fitparam_value_list = [ - fitparams[name] for name in self._fitparam_name_list - ] + if self._name not in tdm.events: + calc_values = True + else: + for (idx, name) in enumerate(self._global_fitparam_name_list): + if global_fitparams_dict[name] !=\ + self._global_fitparam_value_list[idx]: + calc_values = True + break + if not calc_values: return - for (idx, fitparam_name) in enumerate(self._fitparam_name_list): - if(fitparams[fitparam_name] != self._fitparam_value_list[idx]): - # This current fit parameter value has changed. So we need to - # re-calculate the data field values. - values = self._func(tdm, src_hypo_group_manager, fitparams) + values = self._func( + tdm=tdm, + shg_mgr=shg_mgr, + pmm=pmm, + global_fitparams_dict=global_fitparams_dict) - # Convert the data type. - values = self._convert_to_desired_dtype(tdm, values) + if not isinstance(values, np.ndarray): + raise TypeError( + 'The calculation function for the data field ' + f'"{self._name}" must return an instance of numpy.ndarray! ' + f'Currently it is of type "{classname(values)}".') - # Set the data values. - tdm.events[self._name] = values + # Convert the data type. + values = self._convert_to_desired_dtype(tdm, values) - # Store the new fit parameter values. - self._fitparam_value_list = [ - fitparams[name] for name in self._fitparam_name_list - ] + if self._is_srcevt_data: + n_values = tdm.get_n_values() + if values.shape[0] != n_values: + raise ValueError( + 'The calculation function for the data field ' + f'"{self._name}" must return a numpy ndarray of shape ' + f'({n_values},), but the shape is {values.shape}!') + self._values = values + else: + # Set the data values. This will add the data field to the + # DataFieldRecordArray if it does not exist yet. + tdm.events[self._name] = values - break + # We store the global fit parameter values for which the field values + # were calculated. So they have to get recalculated only when the + # global fit parameter values, the field depends on, change. + self._global_fitparam_value_list = [ + global_fitparams_dict[name] + for name in self._global_fitparam_name_list + ] class TrialDataManager(object): """The TrialDataManager class manages the event data for an analysis trial. It provides possible additional data fields and their calculation. - New data fields can be defined via the `add_data_field` method. + New data fields can be defined via the :py:meth:`add_data_field` method. Whenever a new trial is being initialized the data fields get re-calculated. The data trial manager is provided to the PDF evaluation method. Hence, data fields are calculated only once. """ - def __init__(self, index_field_name=None): + def __init__(self, index_field_name=None, **kwargs): """Creates a new TrialDataManager instance. Parameters @@ -315,7 +399,7 @@ def __init__(self, index_field_name=None): If provided, the events will be sorted along this data field. This might be useful for run-time performance. """ - super(TrialDataManager, self).__init__() + super().__init__(**kwargs) self.index_field_name = index_field_name @@ -332,10 +416,17 @@ def __init__(self, index_field_name=None): # a new evaluation data is available. self._static_data_fields_dict = OrderedDict() - # Define the list of data fields that depend on fit parameters. These - # data fields have to be re-calculated whenever a fit parameter value - # changes. - self._fitparam_data_fields_dict = OrderedDict() + # Define the list of data fields that depend on global fit parameters. + # These data fields have to be re-calculated whenever a global fit + # parameter value changes. + self._global_fitparam_data_fields_dict = OrderedDict() + + # Define the member variable that will hold the number of sources. + self._n_sources = None + + # Define the member variable that will hold the total number of events + # of the dataset this TrialDataManager belongs to. + self._n_events = None # Define the member variable that will hold the raw events for which the # data fields get calculated. @@ -343,7 +434,7 @@ def __init__(self, index_field_name=None): # Define the member variable that holds the source to event index # mapping. - self._src_ev_idxs = None + self._src_evt_idxs = None # We store an integer number for the trial data state and increase it # whenever the state of the trial data changed. This way other code, @@ -357,13 +448,14 @@ def index_field_name(self): be sorted by this data field. """ return self._index_field_name + @index_field_name.setter def index_field_name(self, name): - if(name is not None): - if(not isinstance(name, str)): + if name is not None: + if not isinstance(name, str): raise TypeError( 'The index_field_name property must be an instance of ' - 'type str!') + f'type str! It is of type {classname(name)}!') self._index_field_name = name @property @@ -372,20 +464,36 @@ def events(self): should get evaluated. """ return self._events + @events.setter def events(self, arr): - if(not isinstance(arr, DataFieldRecordArray)): + if not isinstance(arr, DataFieldRecordArray): raise TypeError( 'The events property must be an instance of ' - 'DataFieldRecordArray!') + f'DataFieldRecordArray! It is of type {classname(arr)}!') self._events = arr + @property + def has_global_fitparam_data_fields(self): + """(read-only) ``True`` if the TrialDataManager has global fit parameter + data fields defined, ``False`` otherwise. + """ + return len(self._global_fitparam_data_fields_dict) > 0 + + @property + def n_sources(self): + """(read-only) The number of sources. This information is taken from + the source hypo group manager when a new trial is initialized. + """ + return self._n_sources + @property def n_events(self): """The total number of events of the dataset this trial data manager corresponds to. """ return self._n_events + @n_events.setter def n_events(self, n): self._n_events = int_cast( @@ -406,11 +514,12 @@ def n_pure_bkg_events(self): return self._n_events - len(self._events) @property - def src_ev_idxs(self): - """(read-only) The 2-tuple holding the source index and event index - 1d ndarray arrays. + def src_evt_idxs(self): + """(read-only) The 2-tuple holding the source indices and event indices + 1d ndarray arrays. This can be ``None``, indicating that all trial data + events should be considered for all sources. """ - return self._src_ev_idxs + return self._src_evt_idxs @property def trial_data_state_id(self): @@ -434,19 +543,25 @@ def __contains__(self, name): False otherwise. """ # Check if the data field is part of the original trial data. - if((self._events is not None) and - (name in self._events.field_name_list)): + if (self._events is not None) and\ + (name in self._events.field_name_list): return True # Check if the data field is a user defined data field. - if((name in self._source_data_fields_dict) or - (name in self._pre_evt_sel_static_data_fields_dict) or - (name in self._static_data_fields_dict) or - (name in self._fitparam_data_fields_dict)): + if (name in self._source_data_fields_dict) or\ + (name in self._pre_evt_sel_static_data_fields_dict) or\ + (name in self._static_data_fields_dict) or\ + (name in self._global_fitparam_data_fields_dict): return True return False + def __getitem__(self, name): + """Implements the evaluation of ``self[name]`` to access data fields. + This method calls the :meth:`get_data` method of this class. + """ + return self.get_data(name) + def __str__(self): """Implements pretty string representation of this TrialDataManager instance. @@ -459,64 +574,179 @@ def __str__(self): s += '\n' s1 = 'Source data fields:\n' - s2 = '' - for (idx, dfield) in enumerate(self._source_data_fields_dict): - if(idx > 0): - s2 += '\n' - s2 += str(dfield) - if(s2 == ''): + s2 = '\n'.join( + [ + str(df) + for (_, df) in self._source_data_fields_dict.items() + ] + ) + if s2 == '': s2 = 'None' s1 += dsp.add_leading_text_line_padding(dsp.INDENTATION_WIDTH, s2) s += dsp.add_leading_text_line_padding(dsp.INDENTATION_WIDTH, s1) s += '\n' s1 = 'Pre-event-selection static data fields:\n' - s2 = '' - for (idx, dfield) in enumerate(self._pre_evt_sel_static_data_fields_dict): - if(idx > 0): - s2 += '\n' - s2 += str(dfield) + s2 = '\n'.join( + [ + str(df) + for (_, df) in self._pre_evt_sel_static_data_fields_dict.items() + ] + ) + if s2 == '': + s2 = 'None' s1 += dsp.add_leading_text_line_padding(dsp.INDENTATION_WIDTH, s2) s += dsp.add_leading_text_line_padding(dsp.INDENTATION_WIDTH, s1) s += '\n' s1 = 'Static data fields:\n' - s2 = '' - for (idx, dfield) in enumerate(self._static_data_fields_dict): - if(idx > 0): - s2 += '\n' - s2 += str(dfield) + s2 = '\n'.join( + [ + str(df) + for (_, df) in self._static_data_fields_dict.items() + ] + ) + if s2 == '': + s2 = 'None' s1 += dsp.add_leading_text_line_padding(dsp.INDENTATION_WIDTH, s2) s += dsp.add_leading_text_line_padding(dsp.INDENTATION_WIDTH, s1) s += '\n' - s1 = 'Fitparam data fields:\n' - s2 = '' - for (idx, dfield) in enumerate(self._fitparam_data_fields_dict): - if(idx > 0): - s2 += '\n' - s2 += str(dfield) - if(s2 == ''): + s1 = 'Global fitparam data fields:\n' + s2 = '\n'.join( + [ + str(df) + for (_, df) in self._global_fitparam_data_fields_dict.items() + ] + ) + if s2 == '': s2 = 'None' s1 += dsp.add_leading_text_line_padding(dsp.INDENTATION_WIDTH, s2) s += dsp.add_leading_text_line_padding(dsp.INDENTATION_WIDTH, s1) return s - def change_source_hypo_group_manager(self, src_hypo_group_manager): - """Recalculate the source data fields. + def broadcast_sources_array_to_values_array( + self, + arr): + """Broadcasts the given 1d numpy ndarray of length 1 or N_sources to a + numpy ndarray of length N_values. Parameters ---------- - src_hypo_group_manager : instance of SourceHypoGroupManager - The SourceHypoGroupManager manager that defines the groups of - source hypotheses. + arr : instance of ndarray + The (N_sources,)- or (1,)-shaped numpy ndarray holding values for + each source. + + Returns + ------- + out_arr : instance of ndarray + The (N_values,)-shaped numpy ndarray holding the source values + broadcasted to each event value. + """ + arr_dtype = arr.dtype + n_values = self.get_n_values() + + if len(arr) == 1: + return np.full((n_values,), arr[0], dtype=arr_dtype) + + if len(arr) != self.n_sources: + raise ValueError( + f'The length of arr ({len(arr)}) must be 1 or equal to the ' + f'number of sources ({self.n_sources})!') + + out_arr = np.empty( + (n_values,), + dtype=arr.dtype) + + src_idxs = self.src_evt_idxs[0] + v_start = 0 + for (src_idx, src_value) in enumerate(arr): + n = np.count_nonzero(src_idxs == src_idx) + # n = len(evt_idxs[src_idxs == src_idx]) + out_arr[v_start:v_start+n] = np.full( + (n,), src_value, dtype=arr_dtype) + v_start += n + + return out_arr + + def broadcast_sources_arrays_to_values_arrays( + self, + arrays): + """Broadcasts the 1d numpy ndarrays to the values array. + + Parameters + ---------- + arrays : sequence of numpy 1d ndarrays + The sequence of (N_sources,)-shaped numpy ndarrays holding the + parameter values. + + Returns + ------- + out_arrays : list of numpy 1d ndarrays + The list of (N_values,)-shaped numpy ndarrays holding the + broadcasted array values. """ - self.calculate_source_data_fields(src_hypo_group_manager) + out_arrays = [ + self.broadcast_sources_array_to_values_array(arr) + for arr in arrays + ] + + return out_arrays + + def broadcast_selected_events_arrays_to_values_arrays( + self, + arrays): + """Broadcasts the given arrays of length N_selected_events to arrays + of length N_values. + + Parameters + ---------- + arrays : sequence of instance of ndarray + The sequence of instance of ndarray with the arrays to be + broadcasted. + + Returns + ------- + out_arrays : list of instance of ndarray + The list of broadcasted numpy ndarray instances. + """ + evt_idxs = self._src_evt_idxs[1] + out_arrays = [ + np.take(arr, evt_idxs) + for arr in arrays + ] + + return out_arrays + + def change_shg_mgr(self, shg_mgr, pmm): + """This method is called when the source hypothesis group manager has + changed. Hence, the source data fields need to get recalculated. + + After calling this method, a new trial should be initialized via the + :meth:`initialize_trial` method! + + Parameters + ---------- + shg_mgr : instance of SourceHypoGroupManager + The instance of SourceHypoGroupManager that defines the source + hypothesis groups. + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper that defines the global + parameters and their mapping to local source parameter. + """ + self.calculate_source_data_fields( + shg_mgr=shg_mgr, + pmm=pmm) def initialize_trial( - self, src_hypo_group_manager, events, n_events=None, - evt_sel_method=None, store_src_ev_idxs=False, tl=None): + self, + shg_mgr, + pmm, + events, + n_events=None, + evt_sel_method=None, + tl=None): """Initializes the trial data manager for a new trial. It sets the raw events, calculates pre-event-selection data fields, performs a possible event selection and calculates the static data fields for the left-over @@ -524,9 +754,12 @@ def initialize_trial( Parameters ---------- - src_hypo_group_manager : SourceHypoGroupManager instance + shg_mgr : instance of SourceHypoGroupManager The instance of SourceHypoGroupManager that defines the source hypothesis groups. + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper, that defines the global + parameters and their mapping to local source parameters. events : DataFieldRecordArray instance The DataFieldRecordArray instance holding the entire raw events. n_events : int | None @@ -534,57 +767,116 @@ def initialize_trial( corresponds to. If None, the number of events is taken from the number of events present in the ``events`` array. - evt_sel_method : EventSelectionMethod | None + evt_sel_method : instance of EventSelectionMethod | None The optional event selection method that should be used to select potential signal events. - store_src_ev_idxs : bool - If the evt_sel_method is not None, it determines if source and - event indices of the selected events should get calculated and - stored. - tl : TimeLord | None + tl : instance of TimeLord | None The optional TimeLord instance that should be used for timing measurements. """ # Set the events property, so that the calculation functions of the data # fields can access them. self.events = events - self._src_ev_idxs = None + self._src_evt_idxs = None - if(n_events is None): + # Save the number of sources. + self._n_sources = shg_mgr.n_sources + + if n_events is None: n_events = len(self._events) self.n_events = n_events # Calculate pre-event-selection data fields that are required by the # event selection method. - self.calculate_pre_evt_sel_static_data_fields(src_hypo_group_manager) + self.calculate_pre_evt_sel_static_data_fields( + shg_mgr=shg_mgr, + pmm=pmm) - if(evt_sel_method is not None): + if evt_sel_method is not None: logger.debug( f'Performing event selection method ' f'"{classname(evt_sel_method)}".') - (selected_events, src_ev_idxs) = evt_sel_method.select_events( - self._events, tl=tl, ret_src_ev_idxs=store_src_ev_idxs) + (selected_events, src_evt_idxs) = evt_sel_method.select_events( + events=self._events, + tl=tl) logger.debug( f'Selected {len(selected_events)} out of {len(self._events)} ' 'events.') self.events = selected_events - self._src_ev_idxs = src_ev_idxs + self._src_evt_idxs = src_evt_idxs # Sort the events by the index field, if a field was provided. - if(self._index_field_name is not None): + if self._index_field_name is not None: logger.debug( f'Sorting events in index field "{self._index_field_name}"') sorted_idxs = self._events.sort_by_field(self._index_field_name) # If event indices are stored, we need to re-assign also those event # indices according to the new order. - if self._src_ev_idxs is not None: - self._src_ev_idxs[1] = sorted_idxs[self._src_ev_idxs[1]] + if self._src_evt_idxs is not None: + self._src_evt_idxs[1] = np.take( + sorted_idxs, self._src_evt_idxs[1]) + + # Create the src_evt_idxs property data in case it was not provided by + # the event selection. In that case all events are selected for all + # sources. This simplifies the implementations of the PDFs. + if self._src_evt_idxs is None: + self._src_evt_idxs = ( + np.repeat(np.arange(self.n_sources), self.n_selected_events), + np.tile(np.arange(self.n_selected_events), self.n_sources) + ) # Now calculate all the static data fields. This will increment the # trial data state ID. - self.calculate_static_data_fields(src_hypo_group_manager) + self.calculate_static_data_fields( + shg_mgr=shg_mgr, + pmm=pmm) + + def get_n_values(self): + """Returns the expected size of the values array after a PDF + evaluation, which will include PDF values for all trial data events and + all sources. - def add_source_data_field(self, name, func, dt=None): + Returns + ------- + n : int + The length of the expected values array after a PDF evaluation. + """ + return len(self._src_evt_idxs[0]) + + def get_values_mask_for_source_mask(self, src_mask): + """Creates a boolean mask for the values array where entries belonging + to the sources given by the source mask are selected. + + Parameters + ---------- + src_mask : instance of numpy ndarray + The (N_sources,)-shaped numpy ndarray holding the boolean selection + of the sources. + + Returns + ------- + values_mask : instance of numpy ndarray + The (N_values,)-shaped numpy ndarray holding the boolean selection + of the values. + """ + tdm_src_idxs = self.src_evt_idxs[0] + src_idxs = np.arange(self.n_sources)[src_mask] + + values_mask = np.zeros((self.get_n_values(),), dtype=np.bool_) + + def make_values_mask(src_idx): + global values_mask + values_mask |= tdm_src_idxs == src_idx + + np.vectorize(make_values_mask)(src_idxs) + + return values_mask + + def add_source_data_field( + self, + name, + func, + dt=None): """Adds a new data field to the manager. The data field must depend solely on source parameters. @@ -596,25 +888,37 @@ def add_source_data_field(self, name, func, dt=None): func : callable The function that calculates the data field values. The call signature must be - `__call__(tdm, src_hypo_group_manager, fitparams)`, where - `tdm` is the TrialDataManager instance holding the event data, - `src_hypo_group_manager` is the SourceHypoGroupManager instance, - and `fitparams` is an unused interface argument. + + __call__(tdm, shg_mgr, pmm) + + where ``tdm`` is the TrialDataManager instance holding the event + data, ``shg_mgr`` is the instance of SourceHypoGroupManager, + and ``pmm`` is the instance of ParameterModelMapper. dt : numpy dtype | str | None If specified it defines the data type this data field should have. If a str instance is given, it defines the name of the data field whose data type should be taken for the data field. """ - if(name in self): + if name in self: raise KeyError( - 'The data field "%s" is already defined!'%(name)) + f'The data field "{name}" is already defined!') - data_field = DataField(name, func, dt=dt) + data_field = DataField( + name=name, + func=func, + dt=dt, + is_src_field=True) self._source_data_fields_dict[name] = data_field def add_data_field( - self, name, func, fitparam_names=None, dt=None, pre_evt_sel=False): + self, + name, + func, + global_fitparam_names=None, + dt=None, + pre_evt_sel=False, + is_srcevt_data=False): """Adds a new data field to the manager. Parameters @@ -625,15 +929,19 @@ def add_data_field( func : callable The function that calculates the data field values. The call signature must be - `__call__(tdm, src_hypo_group_manager, fitparams)`, where - `tdm` is the TrialDataManager instance holding the event data, - `src_hypo_group_manager` is the SourceHypoGroupManager instance, - and `fitparams` is the dictionary with the current fit parameter - names and values. - fitparam_names : sequence of str | None - The sequence of str instances specifying the names of the fit - parameters this data field depends on. If set to None, it means that - the data field does not depend on any fit parameters. + + __call__(tdm, shg_mgr, pmm, global_fitparams_dict=None) + + where ``tdm`` is the TrialDataManager instance holding the trial + event data, ``shg_mgr`` is the instance of SourceHypoGroupManager, + ``pmm`` is the instance of ParameterModelMapper, and + ``global_fitparams_dict`` is the dictionary with the current global + fit parameter names and values. + The shape of the returned array must be (N_selected_events,). + global_fitparam_names : str | sequence of str | None + The sequence of str instances specifying the names of the global fit + parameters this data field depends on. If set to ``None``, it means + that the data field does not depend on any fit parameters. dt : numpy dtype | str | None If specified it defines the data type this data field should have. If a str instance is given, it defines the name of the data field @@ -642,108 +950,161 @@ def add_data_field( Flag if this data field should get calculated before potential signal events get selected (True), or afterwards (False). Default is False. + is_srcevt_data : bool + Flag if this data field contains source-event data, hence the length + of the data array will be N_values. + Default is False. """ - if(name in self): + if name in self: raise KeyError( - 'The data field "%s" is already defined!'%(name)) - - if(pre_evt_sel and (fitparam_names is not None)): - raise ValueError( - f'The pre-event-selection data field "{name}" must not depend ' - 'on fit parameters!') - - data_field = DataField(name, func, fitparam_names, dt=dt) - - if(pre_evt_sel): + 'The data field "{name}" is already defined!') + + if pre_evt_sel: + if global_fitparam_names is not None: + raise ValueError( + f'The pre-event-selection data field "{name}" must not ' + 'depend on global fit parameters!') + + if is_srcevt_data: + raise ValueError( + 'By definition the pre-event-selection data field ' + f'"{name}" cannot hold source-event data! The ' + 'is_srcevt_data argument must be set to False!') + + data_field = DataField( + name=name, + func=func, + global_fitparam_names=global_fitparam_names, + dt=dt, + is_src_field=False, + is_srcevt_data=is_srcevt_data) + + if pre_evt_sel: self._pre_evt_sel_static_data_fields_dict[name] = data_field - elif(fitparam_names is None): + elif global_fitparam_names is None: self._static_data_fields_dict[name] = data_field else: - self._fitparam_data_fields_dict[name] = data_field + self._global_fitparam_data_fields_dict[name] = data_field - def calculate_source_data_fields(self, src_hypo_group_manager): + def calculate_source_data_fields( + self, + shg_mgr, + pmm): """Calculates the data values of the data fields that solely depend on source parameters. Parameters ---------- - src_hypo_group_manager : instance of SourceHypoGroupManager + shg_mgr : instance of SourceHypoGroupManager The instance of SourceHypoGroupManager, which defines the groups of source hypotheses. + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper, that defines the global + parameters and their mapping to local source parameters. """ - if(len(self._source_data_fields_dict) == 0): + if len(self._source_data_fields_dict) == 0: return - fitparams = None for (name, dfield) in self._source_data_fields_dict.items(): - dfield.calculate(self, src_hypo_group_manager, fitparams) + dfield.calculate( + tdm=self, + shg_mgr=shg_mgr, + pmm=pmm) self._trial_data_state_id += 1 - def calculate_pre_evt_sel_static_data_fields(self, src_hypo_group_manager): + def calculate_pre_evt_sel_static_data_fields( + self, + shg_mgr, + pmm): """Calculates the data values of the data fields that should be available for the event selection method and do not depend on any fit parameters. Parameters ---------- - src_hypo_group_manager : instance of SourceHypoGroupManager + shg_mgr : instance of SourceHypoGroupManager The instance of SourceHypoGroupManager, which defines the groups of source hypotheses. + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper, that defines the global + parameters and their mapping to local source parameters. """ - if(len(self._pre_evt_sel_static_data_fields_dict) == 0): + if len(self._pre_evt_sel_static_data_fields_dict) == 0: return - fitparams = dict() for (name, dfield) in self._pre_evt_sel_static_data_fields_dict.items(): - dfield.calculate(self, src_hypo_group_manager, fitparams) + dfield.calculate( + tdm=self, + shg_mgr=shg_mgr, + pmm=pmm) self._trial_data_state_id += 1 - def calculate_static_data_fields(self, src_hypo_group_manager): + def calculate_static_data_fields( + self, + shg_mgr, + pmm): """Calculates the data values of the data fields that do not depend on any source or fit parameters. Parameters ---------- - src_hypo_group_manager : instance of SourceHypoGroupManager + shg_mgr : instance of SourceHypoGroupManager The instance of SourceHypoGroupManager, which defines the groups of source hypotheses. + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper, that defines the global + parameters and their mapping to local source parameters. """ - if(len(self._static_data_fields_dict) == 0): + if len(self._static_data_fields_dict) == 0: return - fitparams = dict() for (name, dfield) in self._static_data_fields_dict.items(): - dfield.calculate(self, src_hypo_group_manager, fitparams) + dfield.calculate( + tdm=self, + shg_mgr=shg_mgr, + pmm=pmm) self._trial_data_state_id += 1 - def calculate_fitparam_data_fields(self, src_hypo_group_manager, fitparams): - """Calculates the data values of the data fields that depend on fit - parameter values. + def calculate_global_fitparam_data_fields( + self, + shg_mgr, + pmm, + global_fitparams_dict): + """Calculates the data values of the data fields that depend on global + fit parameter values. Parameters ---------- - src_hypo_group_manager : instance of SourceHypoGroupManager + shg_mgr : instance of SourceHypoGroupManager The instance of SourceHypoGroupManager, which defines the groups of source hypotheses. - fitparams : dict - The dictionary holding the fit parameter names and values. + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper, that defines the global + parameters and their mapping to local source parameters. + global_fitparams_dict : dict + The dictionary holding the current global fit parameter names and + values. """ - if(len(self._fitparam_data_fields_dict) == 0): + if len(self._global_fitparam_data_fields_dict) == 0: return - for (name, dfield) in self._fitparam_data_fields_dict.items(): - dfield.calculate(self, src_hypo_group_manager, fitparams) + for (name, dfield) in self._global_fitparam_data_fields_dict.items(): + dfield.calculate( + tdm=self, + shg_mgr=shg_mgr, + pmm=pmm, + global_fitparams_dict=global_fitparams_dict) self._trial_data_state_id += 1 def get_data(self, name): """Gets the data for the given data field name. The data is stored - either in the raw events record ndarray or in one of the additional - defined data fields. Data from the raw events record ndarray is - prefered. + either in the raw events DataFieldRecordArray or in one of the + additional defined data fields. Data from the raw events + DataFieldRecordArray is prefered. Parameters ---------- @@ -752,33 +1113,35 @@ def get_data(self, name): Returns ------- - data : numpy ndarray - The data of the requested data field. + data : instance of numpy ndarray + The numpy ndarray holding the data of the requested data field. + The length of the array is either N_sources, N_selected_events, or + N_values. Raises ------ KeyError If the given data field is not defined. """ - if((self._events is not None) and - (name in self._events.field_name_list)): + # Data fields which are static or depend on global fit parameters are + # stored within the _events DataFieldRecordArray if they do not contain + # source-event data. For all other cases, the data is stored in the + # .values attribute of the DataField class instance. + if self._events is not None and\ + name in self._events.field_name_list: return self._events[name] - if(name in self._source_data_fields_dict): - data = self._source_data_fields_dict[name].values + if name in self._source_data_fields_dict: + return self._source_data_fields_dict[name].values - # Broadcast the value of an one-element 1D ndarray to the length - # of the number of events. Note: Make sure that we don't broadcast - # recarrays. - if(self._events is not None): - if((len(data) == 1) and (data.ndim == 1) and - (data.dtype.fields is None)): - data = np.repeat(data, len(self._events)) - else: - raise KeyError( - f'The data field "{name}" is not defined!') + if name in self._static_data_fields_dict: + return self._static_data_fields_dict[name].values + + if name in self._global_fitparam_data_fields_dict: + return self._global_fitparam_data_fields_dict[name].values - return data + raise KeyError( + f'The data field "{name}" is not defined!') def get_dtype(self, name): """Gets the data type of the given data field. @@ -801,3 +1164,67 @@ def get_dtype(self, name): dt = self.get_data(name).dtype return dt + + def is_event_data_field(self, name): + """Checks if the given data field is an events data field, i.e. its + length is N_selected_events. + + Parameters + ---------- + name : str + The name of the data field. + + Returns + ------- + check : bool + ``True`` if the given data field contains event data, ``False`` + otherwise. + """ + if self._events is not None and\ + name in self._events.field_name_list: + return True + + return False + + def is_source_data_field(self, name): + """Checks if the given data field is a source data field, i.e. its + length is N_sources. + + Parameters + ---------- + name : str + The name of the data field. + + Returns + ------- + check : bool + ``True`` if the given data field contains source data, ``False`` + otherwise. + """ + if name in self._source_data_fields_dict: + return True + + return False + + def is_srcevt_data_field(self, name): + """Checks if the given data field is a source-event data field, i.e. its + length is N_values. + + Parameters + ---------- + name : str + The name of the data field. + + Returns + ------- + check : bool + ``True`` if the given data field contains source-event data, + ``False`` otherwise. + """ + if name in self._static_data_fields_dict: + return self._static_data_fields_dict[name].is_srcevt_data + + if name in self._global_fitparam_data_fields_dict: + return self._global_fitparam_data_fields_dict[name].is_srcevt_data + + return False diff --git a/skyllh/core/types.py b/skyllh/core/types.py new file mode 100644 index 0000000000..fd40fb70f3 --- /dev/null +++ b/skyllh/core/types.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- + +"""This modules defines base types for some of the SkyLLH classes to avoid +circular imports when actively checking for types. +""" + + +class SourceHypoGroup_t( + object, +): + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) diff --git a/skyllh/core/analysis_utils.py b/skyllh/core/utils/analysis.py similarity index 80% rename from skyllh/core/analysis_utils.py rename to skyllh/core/utils/analysis.py index 3d051939a0..611169b34f 100644 --- a/skyllh/core/analysis_utils.py +++ b/skyllh/core/utils/analysis.py @@ -1,34 +1,58 @@ # -*- coding: utf-8 -*- -from __future__ import division - +import itertools import logging import numpy as np -from numpy.lib import recfunctions as np_rfn -import itertools -from os import makedirs +from numpy.lib import ( + recfunctions as np_rfn, +) +from os import ( + makedirs, +) import os.path +from scipy.interpolate import ( + interp1d, +) +from scipy.stats import ( + gamma, +) + +try: + from iminuit import minimize +except Exception: + IMINUIT_LOADED = False +else: + IMINUIT_LOADED = True -from skyllh.core.progressbar import ProgressBar +from skyllh.core.progressbar import ( + ProgressBar, +) from skyllh.core.py import ( float_cast, int_cast, issequence, - issequenceof + issequenceof, +) +from skyllh.core.session import ( + is_interactive_session, +) +from skyllh.core.source_model import ( + PointLikeSource, +) +from skyllh.core.storage import ( + NPYFileLoader, +) +from skyllh.core.utils.spline import ( + make_spline_1d, ) -from skyllh.core.session import is_interactive_session -from skyllh.core.storage import NPYFileLoader -from skyllh.physics.source import PointLikeSource -from scipy.interpolate import interp1d -from scipy.stats import gamma -from iminuit import minimize """This module contains common utility functions useful for an analysis. """ + def pointlikesource_to_data_field_array( - tdm, src_hypo_group_manager): + tdm, shg_mgr, pmm): """Function to transform a list of PointLikeSource sources into a numpy record ndarray. The resulting numpy record ndarray contains the following fields: @@ -37,46 +61,45 @@ def pointlikesource_to_data_field_array( The right-ascention of the point-like source. `dec`: float The declination of the point-like source. - `src_w`: float - The nomalized detector weight of the point-like source. - `src_w_grad`: float - The normalized weight gradient of the point-like source. - `src_w_W`: float - The nomalized hypothesis weight of the point-like source. + `weight`: float + The weight of the point-like source. Parameters ---------- tdm : instance of TrialDataManager The TrialDataManager instance. - src_hypo_group_manager : instance of SourceHypoGroupManager + shg_mgr : instance of SourceHypoGroupManager The instance of SourceHypoGroupManager that defines the sources. + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper that defines the mapping of global + parameters to local model parameters. Returns ------- arr : (N_sources,)-shaped numpy record ndarray - The numpy record ndarray holding the source parameters `ra` and `dec`. + The numpy record ndarray holding the source parameters. """ - sources = src_hypo_group_manager.source_list + sources = shg_mgr.source_list - if(not issequenceof(sources, PointLikeSource)): - raise TypeError('The sources of the SourceHypoGroupManager must be ' + if not issequenceof(sources, PointLikeSource): + raise TypeError( + 'The sources of the SourceHypoGroupManager must be ' 'PointLikeSource instances!') arr = np.empty( (len(sources),), - dtype=[('ra', np.float64), - ('dec', np.float64), - ('src_w', np.float64), - ('src_w_grad', np.float64), - ('src_w_W', np.float64)] - , order='F') + dtype=[ + ('ra', np.float64), + ('dec', np.float64), + ('weight', np.float64), + ], + order='F') for (i, src) in enumerate(sources): - arr['ra'][i] = src.ra - arr['dec'][i] = src.dec - arr['src_w'][i] = src.weight.src_w - arr['src_w_grad'][i] = src.weight.src_w_grad - arr['src_w_W'][i] = src.weight.src_w_W + arr['ra'][i] = src.ra + arr['dec'][i] = src.dec + arr['weight'][i] = src.weight + return arr @@ -116,8 +139,11 @@ def calculate_pval_from_trials( return (p, p_sigma) -def calculate_pval_from_gammafit_to_trials(ts_vals, ts_threshold, - eta=3.0, n_max=500000): +def calculate_pval_from_gammafit_to_trials( + ts_vals, + ts_threshold, + eta=3.0, + n_max=500000): """Calculates the probability (p-value) of test-statistic exceeding the given test-statistic threshold. This calculation relies on fitting a gamma distribution to a list of ts values. @@ -139,7 +165,13 @@ def calculate_pval_from_gammafit_to_trials(ts_vals, ts_threshold, ------- p, p_sigma: tuple(float, float) """ - if(ts_threshold < eta): + if not IMINUIT_LOADED: + raise ImportError( + 'The iminuit module was not imported! ' + 'This module is a requirement for the function ' + '"calculate_pval_from_gammafit_to_trials"!') + + if ts_threshold < eta: raise ValueError( 'ts threshold value = %e, eta = %e. The calculation of the p-value' 'from the fit is correct only for ts threshold larger than ' @@ -154,9 +186,14 @@ def calculate_pval_from_gammafit_to_trials(ts_vals, ts_threshold, N_prime = len(ts_eta) alpha = N_prime/Ntot - obj = lambda x: truncated_gamma_logpdf(x[0], x[1], eta=eta, - ts_above_eta=ts_eta, - N_above_eta=N_prime) + def obj(x): + return truncated_gamma_logpdf( + x[0], + x[1], + eta=eta, + ts_above_eta=ts_eta, + N_above_eta=N_prime) + x0 = [0.75, 1.8] # Initial values of function parameters. bounds = [[0.1, 10], [0.1, 10]] # Ranges for the minimization fitter. r = minimize(obj, x0, bounds=bounds) @@ -173,8 +210,13 @@ def calculate_pval_from_gammafit_to_trials(ts_vals, ts_threshold, return (p, p_sigma) -def calculate_pval_from_trials_mixed(ts_vals, ts_threshold, switch_at_ts=3.0, - eta=None, n_max=500000, comp_operator='greater_equal'): +def calculate_pval_from_trials_mixed( + ts_vals, + ts_threshold, + switch_at_ts=3.0, + eta=None, + n_max=500000, + comp_operator='greater_equal'): """Calculates the probability (p-value) of test-statistic exceeding the given test-statistic threshold. This calculation relies on fitting a gamma distribution to a list of ts values if ts_threshold is larger than @@ -211,13 +253,24 @@ def calculate_pval_from_trials_mixed(ts_vals, ts_threshold, switch_at_ts=3.0, eta = switch_at_ts if ts_threshold < switch_at_ts: - return calculate_pval_from_trials(ts_vals, ts_threshold, comp_operator=comp_operator) + return calculate_pval_from_trials( + ts_vals, + ts_threshold, + comp_operator=comp_operator) else: - return calculate_pval_from_gammafit_to_trials(ts_vals, ts_threshold, eta=eta, n_max=n_max) + return calculate_pval_from_gammafit_to_trials( + ts_vals, + ts_threshold, + eta=eta, + n_max=n_max) def truncated_gamma_logpdf( - a, scale, eta, ts_above_eta, N_above_eta): + a, + scale, + eta, + ts_above_eta, + N_above_eta): """Calculates the -log(likelihood) of a sample of random numbers generated from a gamma pdf truncated from below at x=eta. @@ -243,12 +296,20 @@ def truncated_gamma_logpdf( """ c0 = 1. - gamma.cdf(eta, a=a, scale=scale) c0 = 1./c0 - logl = N_above_eta*np.log(c0) + np.sum(gamma.logpdf(ts_above_eta, - a=a, scale=scale)) + logl = N_above_eta*np.log(c0) + logl += np.sum( + gamma.logpdf( + ts_above_eta, + a=a, + scale=scale)) + return -logl + def calculate_critical_ts_from_gamma( - ts, h0_ts_quantile, eta=3.0): + ts, + h0_ts_quantile, + eta=3.0): """Calculates the critical test-statistic value corresponding to h0_ts_quantile by fitting the ts distribution with a truncated gamma function. @@ -267,14 +328,25 @@ def calculate_critical_ts_from_gamma( ------- critical_ts : float """ + if not IMINUIT_LOADED: + raise ImportError( + 'The iminuit module was not imported! ' + 'This module is a requirement of the function ' + '"calculate_critical_ts_from_gamma"!') + Ntot = len(ts) ts_eta = ts[ts > eta] N_prime = len(ts_eta) alpha = N_prime/Ntot - obj = lambda x: truncated_gamma_logpdf(x[0], x[1], eta=eta, - ts_above_eta=ts_eta, - N_above_eta=N_prime) + def obj(x): + return truncated_gamma_logpdf( + x[0], + x[1], + eta=eta, + ts_above_eta=ts_eta, + N_above_eta=N_prime) + x0 = [0.75, 1.8] # Initial values of function parameters. bounds = [[0.1, 10], [0.1, 10]] # Ranges for the minimization fitter. r = minimize(obj, x0, bounds=bounds) @@ -283,7 +355,7 @@ def calculate_critical_ts_from_gamma( norm = alpha/gamma.sf(eta, a=pars[0], scale=pars[1]) critical_ts = gamma.ppf(1 - 1./norm*h0_ts_quantile, a=pars[0], scale=pars[1]) - if(critical_ts < eta): + if critical_ts < eta: raise ValueError( 'Critical ts value = %e, eta = %e. The calculation of the critical ' 'ts value from the fit is correct only for critical ts larger than ' @@ -292,7 +364,13 @@ def calculate_critical_ts_from_gamma( return critical_ts -def polynomial_fit(ns, p, p_weight, deg, p_thr): + +def polynomial_fit( + ns, + p, + p_weight, + deg, + p_thr): """Performs a polynomial fit on the p-values of test-statistic trials associated to each ns.. Using the fitted parameters it computes the number of signal events @@ -321,16 +399,16 @@ def polynomial_fit(ns, p, p_weight, deg, p_thr): # Check if the second order coefficient is positive and eventually # change to a polynomial fit of order 1 to avoid to overestimate # the mean number of signal events for the chosen ts quantile. - if(deg == 2 and params[0] > 0): + if deg == 2 and params[0] > 0: deg = 1 (params, cov) = np.polyfit(ns, p, deg, w=p_weight, cov=True) - if(deg == 1): + if deg == 1: (a, b) = (params[0], params[1]) ns = (p_thr - b)/a return ns - elif(deg == 2): + elif deg == 2: (a, b, c) = (params[0], params[1], params[2]) ns = (- b + np.sqrt((b**2)-4*a*(c-p_thr))) / (2*a) return ns @@ -341,10 +419,22 @@ def polynomial_fit(ns, p, p_weight, deg, p_thr): 'must be 1 or 2.', deg) -def estimate_mean_nsignal_for_ts_quantile( - ana, rss, p, eps_p, mu_range, critical_ts=None, h0_trials=None, - h0_ts_quantile=None, min_dmu=0.5, bkg_kwargs=None, sig_kwargs=None, - ppbar=None, tl=None, pathfilename=None): + +def estimate_mean_nsignal_for_ts_quantile( # noqa: C901 + ana, + rss, + p, + eps_p, + mu_range, + critical_ts=None, + h0_trials=None, + h0_ts_quantile=None, + min_dmu=0.5, + bkg_kwargs=None, + sig_kwargs=None, + ppbar=None, + tl=None, + pathfilename=None): """Calculates the mean number of signal events needed to be injected to reach a test statistic distribution with defined properties for the given analysis. @@ -409,14 +499,13 @@ def estimate_mean_nsignal_for_ts_quantile( n_total_generated_trials = 0 - if(critical_ts is None) and (h0_ts_quantile is None): + if (critical_ts is None) and (h0_ts_quantile is None): raise RuntimeError( "Both the critical test-statistic value and the null-hypothesis " "test-statistic quantile are set to None. One of the two is " "needed to have the critical test-statistic value that defines " - "the type of test to run." - ) - elif(critical_ts is None): + "the type of test to run.") + elif critical_ts is None: n_trials_max = int(5.e5) # Via binomial statistics, calcuate the minimum number of trials # needed to get the required precision on the critial TS value. @@ -427,21 +516,26 @@ def estimate_mean_nsignal_for_ts_quantile( # which one is smaller. If n_trials_max trials are computed, a # fit to the ts distribution is performed to get the critial TS. n_trials_total = min(n_trials_min, n_trials_max) - if(h0_trials is None): + if h0_trials is None: h0_ts_vals = ana.do_trials( - rss, n_trials_total, mean_n_sig=0, bkg_kwargs=bkg_kwargs, - sig_kwargs=sig_kwargs, ppbar=ppbar, tl=tl)['ts'] + rss=rss, + n=n_trials_total, + mean_n_sig=0, + bkg_kwargs=bkg_kwargs, + sig_kwargs=sig_kwargs, + ppbar=ppbar, + tl=tl)['ts'] logger.debug( 'Generate %d null-hypothesis trials', n_trials_total) n_total_generated_trials += n_trials_total - if(pathfilename is not None): + if pathfilename is not None: makedirs(os.path.dirname(pathfilename), exist_ok=True) np.save(pathfilename, h0_ts_vals) else: - if(h0_trials.size < n_trials_total): + if h0_trials.size < n_trials_total: if not ('seed' in h0_trials.dtype.names): logger.debug( 'Uploaded trials miss the rss_seed field. ' @@ -449,12 +543,21 @@ def estimate_mean_nsignal_for_ts_quantile( 'safely. Uploaded trials will *not* be used.') n_trials = n_trials_total h0_ts_vals = ana.do_trials( - rss, n_trials, mean_n_sig=0, bkg_kwargs=bkg_kwargs, - sig_kwargs=sig_kwargs, ppbar=ppbar, tl=tl)['ts'] + rss=rss, + n=n_trials, + mean_n_sig=0, + bkg_kwargs=bkg_kwargs, + sig_kwargs=sig_kwargs, + ppbar=ppbar, + tl=tl)['ts'] else: n_trials = n_trials_total - h0_trials.size - h0_ts_vals = extend_trial_data_file(ana, rss, - n_trials, trial_data=h0_trials, mean_n_sig=0, + h0_ts_vals = extend_trial_data_file( + ana, + rss, + n_trials, + trial_data=h0_trials, + mean_n_sig=0, pathfilename=pathfilename)['ts'] logger.debug( 'Generate %d null-hypothesis trials', @@ -475,14 +578,14 @@ def estimate_mean_nsignal_for_ts_quantile( # on the critical TS value is smaller then 500k, compute the critical ts # value directly from trials; otherwise calculate it from the gamma # function fitted to the ts distribution. - if(n_trials_min <= n_trials_max): + if n_trials_min <= n_trials_max: c = np.percentile(h0_ts_vals, (1 - h0_ts_quantile)*100) else: c = calculate_critical_ts_from_gamma(h0_ts_vals, h0_ts_quantile) logger.debug( 'Critical ts value for bkg ts quantile %g: %e', h0_ts_quantile, c) - elif(h0_ts_quantile is None): + elif h0_ts_quantile is None: # Make sure that the critical ts is a float. if not isinstance(critical_ts, float): raise TypeError( @@ -499,9 +602,7 @@ def estimate_mean_nsignal_for_ts_quantile( "quantile were given. If you want to use your critical_ts " "value, set h0_ts_quantile to None; if you want to compute the " "critical ts from the background distribution, set critical_ts " - "to None." - ) - + "to None.") # Make sure ns_range is mutable. ns_range_ = list(mu_range) @@ -516,9 +617,9 @@ def estimate_mean_nsignal_for_ts_quantile( # Define the range of p-values that will be possible to fit with a # polynomial function of order not larger than 2. min_fit_p, max_fit_p = p - 0.35, p + 0.35 - if(min_fit_p < 0.5): + if min_fit_p < 0.5: min_fit_p = 0.5 - if(max_fit_p > 0.985): + if max_fit_p > 0.985: max_fit_p = 0.985 (n_sig, p_vals, p_val_weights) = ([], [], []) @@ -538,13 +639,17 @@ def estimate_mean_nsignal_for_ts_quantile( # Initially generate trials for a 5-times larger uncertainty ``eps_p`` # to catch ns0 points far away from the desired propability quicker. dn_trials = max(100, int(n_trials/5**2 + 0.5)) - trial_vals0 = None (ts_vals0, p0_sigma, delta_p) = ([], 2*eps_p, 0) while (delta_p < p0_sigma*5) and (p0_sigma > eps_p): ts_vals0 = np.concatenate(( ts_vals0, ana.do_trials( - rss, dn_trials, mean_n_sig=ns0, bkg_kwargs=bkg_kwargs, - sig_kwargs=sig_kwargs, ppbar=ppbar, tl=tl)['ts'])) + rss=rss, + n=dn_trials, + mean_n_sig=ns0, + bkg_kwargs=bkg_kwargs, + sig_kwargs=sig_kwargs, + ppbar=ppbar, + tl=tl)['ts'])) (p0, p0_sigma) = calculate_pval_from_trials(ts_vals0, c) n_total_generated_trials += dn_trials @@ -563,11 +668,11 @@ def estimate_mean_nsignal_for_ts_quantile( # n_trials estimate was initially too low. dn_trials = max(100, n_trials - ts_vals0.size) - if((p0_sigma < eps_p) and (delta_p < eps_p)): + if (p0_sigma < eps_p) and (delta_p < eps_p): # We found the ns0 value that corresponds to the desired # probability within the desired uncertainty. - if(p0 < max_fit_p and p0 > min_fit_p): + if (p0 < max_fit_p) and (p0 > min_fit_p): n_sig.append(ns0) p_vals.append(p0) p_val_weights.append(1. / p0_sigma) @@ -576,22 +681,27 @@ def estimate_mean_nsignal_for_ts_quantile( 'Found mu value %g with p value %g within uncertainty +-%g', ns0, p0, p0_sigma) - if(p0 > p): + if p0 > p: ns1 = ns_range_[0] - if(np.abs(ns0 - ns1) > 1.0): + if np.abs(ns0 - ns1) > 1.0: ns1 = ns0 - 1.0 - if(np.abs(ns0 - ns1) < min_dmu): + if np.abs(ns0 - ns1) < min_dmu: ns1 = ns0 - min_dmu else: ns1 = ns_range_[1] - if(np.abs(ns0 - ns1) > 1.0): + if np.abs(ns0 - ns1) > 1.0: ns1 = ns0 + 1.0 - if(np.abs(ns0 - ns1) < min_dmu): + if np.abs(ns0 - ns1) < min_dmu: ns1 = ns0 + min_dmu ts_vals1 = ana.do_trials( - rss, ts_vals0.size, mean_n_sig=ns1, bkg_kwargs=bkg_kwargs, - sig_kwargs=sig_kwargs, ppbar=ppbar, tl=tl)['ts'] + rss=rss, + n=ts_vals0.size, + mean_n_sig=ns1, + bkg_kwargs=bkg_kwargs, + sig_kwargs=sig_kwargs, + ppbar=ppbar, + tl=tl)['ts'] n_total_generated_trials += ts_vals0.size (p1, p1_sigma) = calculate_pval_from_trials(ts_vals1, c) @@ -600,15 +710,15 @@ def estimate_mean_nsignal_for_ts_quantile( 'corresponding to p=(%g +-%g, %g +-%g)', ns0, ns1, p0, p0_sigma, p1, p1_sigma) - if(p1 < max_fit_p and p1 > min_fit_p): + if (p1 < max_fit_p) and (p1 > min_fit_p): n_sig.append(ns1) p_vals.append(p1) p_val_weights.append(1. / p1_sigma) - if(len(n_sig)>2): + if len(n_sig) > 2: scanned_range = np.max(n_sig) - np.min(n_sig) - if(len(n_sig) < 5 or scanned_range < 1.5): + if (len(n_sig) < 5) or (scanned_range < 1.5): deg = 1 else: deg = 2 @@ -631,14 +741,15 @@ def estimate_mean_nsignal_for_ts_quantile( 'Doing a linear interpolation.', np.min(n_sig), np.max(n_sig), len(n_sig)) - # Check if p1 and p0 are equal, which would result in a divison - # by zero. - if(p0 == p1): + # Check if p1 and p0 are equal, which would result in a + # divison by zero. + if p0 == p1: mu = 0.5*(ns0 + ns1) mu_err = 0.5*np.abs(ns1 - ns0) logger.debug( - 'Probability for mu=%g and mu=%g has the same value %g', + 'Probability for mu=%g and mu=%g has the same ' + 'value %g', ns0, ns1, p0) else: dns_dp = np.abs((ns1 - ns0) / (p1 - p0)) @@ -647,7 +758,7 @@ def estimate_mean_nsignal_for_ts_quantile( 'Estimated |dmu/dp| = %g within mu range (%g,%g) ' 'corresponding to p=(%g +-%g, %g +-%g)', dns_dp, ns0, ns1, p0, p0_sigma, p1, p1_sigma) - if(p0 > p): + if p0 > p: mu = ns0 - dns_dp * delta_p else: mu = ns0 + dns_dp * delta_p @@ -660,7 +771,7 @@ def estimate_mean_nsignal_for_ts_quantile( return (mu, mu_err) - if(delta_p < p0_sigma*5): + if delta_p < p0_sigma*5: # The desired probability is within the 5 sigma region of the # current probability. So we use a linear approximation to find the # next ns range. @@ -670,19 +781,19 @@ def estimate_mean_nsignal_for_ts_quantile( # Store ns0 for the new lower or upper bound depending on where the # p0 lies. - if(p0 < max_fit_p and p0 > min_fit_p): + if (p0 < max_fit_p) and (p0 > min_fit_p): n_sig.append(ns0) p_vals.append(p0) p_val_weights.append(1. / p0_sigma) - if(p0+p0_sigma+eps_p <= p): + if p0+p0_sigma+eps_p <= p: ns_lower_bound = ns0 - elif(p0-p0_sigma-eps_p >= p): + elif p0-p0_sigma-eps_p >= p: ns_upper_bound = ns0 ns1 = ns0 * (1 - np.sign(p0 - p) * 0.05) - if(np.abs(ns0 - ns1) < min_dmu): - if((p0 - p) < 0): + if np.abs(ns0 - ns1) < min_dmu: + if (p0 - p) < 0: ns1 = ns0 + min_dmu else: ns1 = ns0 - min_dmu @@ -692,20 +803,25 @@ def estimate_mean_nsignal_for_ts_quantile( ns0, ns1) ts_vals1 = ana.do_trials( - rss, ts_vals0.size, mean_n_sig=ns1, bkg_kwargs=bkg_kwargs, - sig_kwargs=sig_kwargs, ppbar=ppbar, tl=tl)['ts'] + rss=rss, + n=ts_vals0.size, + mean_n_sig=ns1, + bkg_kwargs=bkg_kwargs, + sig_kwargs=sig_kwargs, + ppbar=ppbar, + tl=tl)['ts'] n_total_generated_trials += ts_vals0.size (p1, p1_sigma) = calculate_pval_from_trials(ts_vals1, c) - if(p1 < max_fit_p and p1 > min_fit_p): + if (p1 < max_fit_p) and (p1 > min_fit_p): n_sig.append(ns1) p_vals.append(p1) p_val_weights.append(1. / p1_sigma) # Check if p0 and p1 are equal, which would result into a division # by zero. - if(p0 == p1): + if p0 == p1: dp = 0.5*(p0_sigma + p1_sigma) logger.debug( 'p1 and p0 are equal to %g, causing division by zero. ' @@ -716,7 +832,7 @@ def estimate_mean_nsignal_for_ts_quantile( dns_dp = np.abs((ns1 - ns0) / (p1 - p0)) # p0 and p1 might be very similar, resulting into a numerically # infitite slope. - if(np.isinf(dns_dp)): + if np.isinf(dns_dp): dp = 0.5*(p0_sigma + p1_sigma) logger.debug( 'Infinite dns/dp dedected: ns0=%g, ns1=%g, p0=%g, ' @@ -726,7 +842,7 @@ def estimate_mean_nsignal_for_ts_quantile( dns_dp = np.abs((ns1 - ns0) / dp) logger.debug('dns/dp = %g', dns_dp) - if(p0 > p): + if p0 > p: ns_range_[0] = ns0 - dns_dp * (delta_p + p0_sigma) ns_range_[1] = ns0 + dns_dp * p0_sigma else: @@ -740,7 +856,7 @@ def estimate_mean_nsignal_for_ts_quantile( # In case the new calculated mu range is smaller than the minimum # delta mu, the mu range gets widened by half of the minimum delta # mu on both sides. - if(np.abs(ns_range_[1] - ns_range_[0]) < min_dmu): + if np.abs(ns_range_[1] - ns_range_[0]) < min_dmu: ns_range_[0] -= 0.5*min_dmu ns_range_[1] += 0.5*min_dmu else: @@ -748,30 +864,40 @@ def estimate_mean_nsignal_for_ts_quantile( # 5 sigma away from the desired probability p, hence # delta_p >= p0_sigma*5. - if(p0 < max_fit_p and p0 > min_fit_p): + if (p0 < max_fit_p) and (p0 > min_fit_p): n_sig.append(ns0) p_vals.append(p0) p_val_weights.append(1. / p0_sigma) - if(p0 < p): + if p0 < p: ns_range_[0] = ns0 else: ns_range_[1] = ns0 - if(np.abs(ns_range_[1] - ns_range_[0]) < min_dmu): + if np.abs(ns_range_[1] - ns_range_[0]) < min_dmu: # The mu range became smaller than the minimum delta mu and # still beeing far away from the desired probability. # So move the mu range towards the desired probability. - if(p0 < p): + if p0 < p: ns_range_[1] += 10*min_dmu else: ns_range_[0] -= 10*min_dmu def estimate_sensitivity( - ana, rss, h0_trials=None, h0_ts_quantile=0.5, p=0.9, eps_p=0.005, - mu_range=None, min_dmu=0.5, bkg_kwargs=None, sig_kwargs=None, - ppbar=None, tl=None, pathfilename=None): + ana, + rss, + h0_trials=None, + h0_ts_quantile=0.5, + p=0.9, + eps_p=0.005, + mu_range=None, + min_dmu=0.5, + bkg_kwargs=None, + sig_kwargs=None, + ppbar=None, + tl=None, + pathfilename=None): """Estimates the mean number of signal events that whould have to be injected into the data such that the test-statistic value of p*100% of all trials are larger than the critical test-statistic value c, which @@ -833,7 +959,7 @@ def estimate_sensitivity( mu_err : float The uncertainty of the estimated mean number of signal events. """ - if(mu_range is None): + if mu_range is None: mu_range = (0, 10) (mu, mu_err) = estimate_mean_nsignal_for_ts_quantile( @@ -855,9 +981,19 @@ def estimate_sensitivity( def estimate_discovery_potential( - ana, rss, h0_trials=None, h0_ts_quantile=2.8665e-7, p=0.5, eps_p=0.005, - mu_range=None, min_dmu=0.5, bkg_kwargs=None, sig_kwargs=None, - ppbar=None, tl=None, pathfilename=None): + ana, + rss, + h0_trials=None, + h0_ts_quantile=2.8665e-7, + p=0.5, + eps_p=0.005, + mu_range=None, + min_dmu=0.5, + bkg_kwargs=None, + sig_kwargs=None, + ppbar=None, + tl=None, + pathfilename=None): """Estimates the mean number of signal events that whould have to be injected into the data such that the test-statistic value of p*100% of all trials are larger than the critical test-statistic value c, which @@ -919,17 +1055,18 @@ def estimate_discovery_potential( mu_err : float Estimated error of `mu`. """ - if(mu_range is None): + if mu_range is None: mu_range = (0, 10) (mu, mu_err) = estimate_mean_nsignal_for_ts_quantile( ana=ana, rss=rss, - h0_trials=h0_trials, - h0_ts_quantile=h0_ts_quantile, p=p, eps_p=eps_p, mu_range=mu_range, + h0_trials=h0_trials, + h0_ts_quantile=h0_ts_quantile, + min_dmu=min_dmu, bkg_kwargs=bkg_kwargs, sig_kwargs=sig_kwargs, ppbar=ppbar, @@ -940,8 +1077,18 @@ def estimate_discovery_potential( def generate_mu_of_p_spline_interpolation( - ana, rss, h0_ts_vals, h0_ts_quantile, eps_p, mu_range, mu_step, - kind='cubic', bkg_kwargs=None, sig_kwargs=None, ppbar=None, tl=None): + ana, + rss, + h0_ts_vals, + h0_ts_quantile, + eps_p, + mu_range, + mu_step, + kind='cubic', + bkg_kwargs=None, + sig_kwargs=None, + ppbar=None, + tl=None): """Generates a spline interpolation for mu(p) function for a pre-defined range of mu, where mu is the mean number of injected signal events and p the probability for the ts value larger than the ts value corresponding to the @@ -997,12 +1144,17 @@ def generate_mu_of_p_spline_interpolation( n_total_generated_trials = 0 - if(h0_ts_vals is None): + if h0_ts_vals is None: n_bkg = int(100/(1 - h0_ts_quantile)) logger.debug('Generate %d null-hypothesis trials', n_bkg) h0_ts_vals = ana.do_trials( - rss, n_bkg, mean_n_sig=0, bkg_kwargs=bkg_kwargs, - sig_kwargs=sig_kwargs, ppbar=ppbar, tl=tl)['ts'] + rss=rss, + n=n_bkg, + mean_n_sig=0, + bkg_kwargs=bkg_kwargs, + sig_kwargs=sig_kwargs, + ppbar=ppbar, + tl=tl)['ts'] n_total_generated_trials += n_bkg n_h0_ts_vals = len(h0_ts_vals) @@ -1026,18 +1178,23 @@ def generate_mu_of_p_spline_interpolation( # Create the progress bar if we are in an interactive session. pbar = None - if(is_interactive_session()): + if is_interactive_session(): pbar = ProgressBar(len(mu_vals), parent=ppbar).start() - for (idx,mu) in enumerate(mu_vals): + for (idx, mu) in enumerate(mu_vals): p = None (ts_vals, p_sigma) = ([], 2*eps_p) while (p_sigma > eps_p): ts_vals = np.concatenate( (ts_vals, ana.do_trials( - rss, 100, mean_n_sig=mu, bkg_kwargs=bkg_kwargs, - sig_kwargs=sig_kwargs, ppbar=pbar, tl=tl)['ts'])) + rss=rss, + n=100, + mean_n_sig=mu, + bkg_kwargs=bkg_kwargs, + sig_kwargs=sig_kwargs, + ppbar=pbar, + tl=tl)['ts'])) (p, p_sigma) = calculate_pval_from_trials(ts_vals, c) n_total_generated_trials += 100 logger.debug( @@ -1045,32 +1202,36 @@ def generate_mu_of_p_spline_interpolation( mu, ts_vals.size, p, p_sigma) p_vals[idx] = p - if(pbar is not None): + if pbar is not None: pbar.increment() # Make a mu(p) spline via interp1d. - # The interp1d function requires unique x values. So we need to sort the - # p_vals in increasing order and mask out repeating p values. - p_mu_vals = np.array(sorted(zip(p_vals, mu_vals)), dtype=np.float64) - p_vals = p_mu_vals[:,0] - unique_pval_mask = np.concatenate(([True], np.invert( - p_vals[1:] <= p_vals[:-1]))) - p_vals = p_vals[unique_pval_mask] - mu_vals = p_mu_vals[:,1][unique_pval_mask] - - spline = interp1d(p_vals, mu_vals, kind=kind, copy=False, + spline = make_spline_1d( + p_vals, + mu_vals, + kind=kind, + copy=False, assume_sorted=True) - if(pbar is not None): + if pbar is not None: pbar.finish() return spline -def create_trial_data_file( - ana, rss, n_trials, mean_n_sig=0, mean_n_sig_null=0, - mean_n_bkg_list=None, bkg_kwargs=None, sig_kwargs=None, - pathfilename=None, ncpu=None, ppbar=None, tl=None): +def create_trial_data_file( # noqa: C901 + ana, + rss, + n_trials, + mean_n_sig=0, + mean_n_sig_null=0, + mean_n_bkg_list=None, + bkg_kwargs=None, + sig_kwargs=None, + pathfilename=None, + ncpu=None, + ppbar=None, + tl=None): """Creates and fills a trial data file with `n_trials` generated trials for each mean number of injected signal events from `ns_min` up to `ns_max` for a given analysis. @@ -1079,7 +1240,7 @@ def create_trial_data_file( ---------- ana : instance of Analysis The Analysis instance to use for the trial generation. - rss : RandomStateService + rss : instance of RandomStateService The RandomStateService instance to use for generating random numbers. n_trials : int @@ -1093,8 +1254,7 @@ def create_trial_data_file( MNOISEs with a step size of one. If a 3-element sequence of floats is given, it specifies the range plus the step size of the MNOISEs. - mean_n_sig_null : ndarray of float | float | 2- or 3-element sequence of - float + mean_n_sig_null : ndarray of float | float | 2- or 3-element sequence of float The array of the fixed mean number of signal events (FMNOSEs) for the null-hypothesis for which to generate trials. If this argument is not a ndarray, an array of FMNOSEs is generated based on this argument. @@ -1140,45 +1300,50 @@ def create_trial_data_file( trial_data : structured numpy ndarray The generated trial data. """ - n_trials = int_cast(n_trials, + n_trials = int_cast( + n_trials, 'The n_trials argument must be castable to type int!') - if(not isinstance(mean_n_sig, np.ndarray)): - if(not issequence(mean_n_sig)): - mean_n_sig = float_cast(mean_n_sig, + if not isinstance(mean_n_sig, np.ndarray): + if not issequence(mean_n_sig): + mean_n_sig = float_cast( + mean_n_sig, 'The mean_n_sig argument must be castable to type float!') mean_n_sig_min = mean_n_sig mean_n_sig_max = mean_n_sig mean_n_sig_step = 1 else: - mean_n_sig = float_cast(mean_n_sig, + mean_n_sig = float_cast( + mean_n_sig, 'The sequence elements of the mean_n_sig argument must be ' 'castable to float values!') - if(len(mean_n_sig) == 2): + if len(mean_n_sig) == 2: (mean_n_sig_min, mean_n_sig_max) = mean_n_sig mean_n_sig_step = 1 - elif(len(mean_n_sig) == 3): + elif len(mean_n_sig) == 3: (mean_n_sig_min, mean_n_sig_max, mean_n_sig_step) = mean_n_sig mean_n_sig = np.arange( mean_n_sig_min, mean_n_sig_max+1, mean_n_sig_step, dtype=np.float64) - if(not isinstance(mean_n_sig_null, np.ndarray)): - if(not issequence(mean_n_sig_null)): - mean_n_sig_null = float_cast(mean_n_sig_null, + if not isinstance(mean_n_sig_null, np.ndarray): + if not issequence(mean_n_sig_null): + mean_n_sig_null = float_cast( + mean_n_sig_null, 'The mean_n_sig_null argument must be castable to type float!') mean_n_sig_null_min = mean_n_sig_null mean_n_sig_null_max = mean_n_sig_null mean_n_sig_null_step = 1 else: - mean_n_sig_null = float_cast(mean_n_sig_null, + mean_n_sig_null = float_cast( + mean_n_sig_null, 'The sequence elements of the mean_n_sig_null argument must ' 'be castable to float values!') - if(len(mean_n_sig_null) == 2): + if len(mean_n_sig_null) == 2: (mean_n_sig_null_min, mean_n_sig_null_max) = mean_n_sig_null mean_n_sig_null_step = 1 - elif(len(mean_n_sig_null) == 3): + elif len(mean_n_sig_null) == 3: (mean_n_sig_null_min, mean_n_sig_null_max, mean_n_sig_null_step) = mean_n_sig_null @@ -1193,25 +1358,33 @@ def create_trial_data_file( mean_n_sig, mean_n_sig_null): trials = ana.do_trials( - rss, n=n_trials, mean_n_bkg_list=mean_n_bkg_list, - mean_n_sig=mean_n_sig_, mean_n_sig_0=mean_n_sig_null_, - bkg_kwargs=bkg_kwargs, sig_kwargs=sig_kwargs, ncpu=ncpu, tl=tl, + rss=rss, + n=n_trials, + mean_n_bkg_list=mean_n_bkg_list, + mean_n_sig=mean_n_sig_, + mean_n_sig_0=mean_n_sig_null_, + bkg_kwargs=bkg_kwargs, + sig_kwargs=sig_kwargs, + ncpu=ncpu, + tl=tl, ppbar=pbar) - if(trial_data is None): + if trial_data is None: trial_data = trials else: trial_data = np_rfn.stack_arrays( - [trial_data, trials], usemask=False, asrecarray=True) + [trial_data, trials], + usemask=False, + asrecarray=True) pbar.increment() pbar.finish() - if(trial_data is None): - raise RuntimeError('No trials have been generated! Check your ' - 'generation boundaries!') + if trial_data is None: + raise RuntimeError( + 'No trials have been generated! Check your generation boundaries!') - if(pathfilename is not None): + if pathfilename is not None: # Save the trial data to file. makedirs(os.path.dirname(pathfilename), exist_ok=True) np.save(pathfilename, trial_data) @@ -1220,17 +1393,25 @@ def create_trial_data_file( def extend_trial_data_file( - ana, rss, n_trials, trial_data, mean_n_sig=0, mean_n_sig_null=0, - mean_n_bkg_list=None, bkg_kwargs=None, sig_kwargs=None, - pathfilename=None, **kwargs): + ana, + rss, + n_trials, + trial_data, + mean_n_sig=0, + mean_n_sig_null=0, + mean_n_bkg_list=None, + bkg_kwargs=None, + sig_kwargs=None, + pathfilename=None, + **kwargs): """Appends to the trial data file `n_trials` generated trials for each mean number of injected signal events up to `ns_max` for a given analysis. Parameters ---------- - ana : Analysis + ana : instance of Analysis The Analysis instance to use for sensitivity estimation. - rss : RandomStateService + rss : instance of RandomStateService The RandomStateService instance to use for generating random numbers. n_trials : int @@ -1246,8 +1427,7 @@ def extend_trial_data_file( MNOISEs with a step size of one. If a 3-element sequence of floats is given, it specifies the range plus the step size of the MNOISEs. - mean_n_sig_null : ndarray of float | float | 2- or 3-element sequence of - float + mean_n_sig_null : ndarray of float | float | 2- or 3-element sequence of float The array of the fixed mean number of signal events (FMNOSEs) for the null-hypothesis for which to generate trials. If this argument is not a ndarray, an array of FMNOSEs is generated based on this argument. @@ -1280,9 +1460,11 @@ def extend_trial_data_file( """ # Use unique seed to generate non identical trials. if rss.seed in trial_data['seed']: - seed = next(i for i, e in - enumerate(sorted(np.unique(trial_data['seed'])) + - [None], 1) if i != e) + seed = next( + i + for (i, e) in enumerate( + sorted(np.unique(trial_data['seed'])) + [None], 1) + if i != e) rss.reseed(seed) (seed, mean_n_sig, mean_n_sig_null, trials) = create_trial_data_file( @@ -1290,6 +1472,10 @@ def extend_trial_data_file( rss=rss, n_trials=n_trials, mean_n_sig=mean_n_sig, + mean_n_sig_null=mean_n_sig_null, + mean_n_bkg_list=mean_n_bkg_list, + bkg_kwargs=bkg_kwargs, + sig_kwargs=sig_kwargs, **kwargs ) trial_data = np_rfn.stack_arrays( @@ -1297,15 +1483,20 @@ def extend_trial_data_file( usemask=False, asrecarray=True) - if(pathfilename is not None): + if pathfilename is not None: # Save the trial data to file. makedirs(os.path.dirname(pathfilename), exist_ok=True) np.save(pathfilename, trial_data) return trial_data + def calculate_upper_limit_distribution( - analysis, rss, pathfilename, N_bkg=5000, n_bins=100): + ana, + rss, + pathfilename, + n_bkg=5000, + n_bins=100): """Function to calculate upper limit distribution. It loads the trial data file containing test statistic distribution and calculates 10 percentile value for each mean number of injected signal event. Then it finds upper @@ -1314,14 +1505,14 @@ def calculate_upper_limit_distribution( Parameters ---------- - analysis : Analysis + ana : instance of Analysis The Analysis instance to use for sensitivity estimation. - rss : RandomStateService + rss : instance of RandomStateService The RandomStateService instance to use for generating random numbers. pathfilename : string Trial data file path including the filename. - N_bkg : int, optional + n_bkg : int, optional Number of times to perform background analysis trial. n_bins : int, optional Number of returned test statistic histograms bins. @@ -1331,19 +1522,19 @@ def calculate_upper_limit_distribution( result : dict Result dictionary which contains the following fields: - - ul : list of float + ul : list of float List of upper limit values. - - mean : float + mean : float Mean of upper limit values. - - median : float + median : float Median of upper limit values. - - var : float + var : float Variance of upper limit values. - - ts_hist : numpy ndarray + ts_hist : numpy ndarray 2D array of test statistic histograms calculated by axis 1. - - extent : list of float + extent : list of float Test statistic histogram boundaries. - - q_values : list of float + q_values : list of float `q` percentile values of test statistic for different injected events means. """ @@ -1352,7 +1543,7 @@ def calculate_upper_limit_distribution( ns_max = max(trial_data['sig_mean']) + 1 ts_bins_range = (min(trial_data['TS']), max(trial_data['TS'])) - q = 10 # Upper limit criterion. + q = 10 # Upper limit criterion. trial_data_q_values = np.empty((ns_max,)) trial_data_ts_hist = np.empty((ns_max, n_bins)) for ns in range(ns_max): @@ -1363,7 +1554,10 @@ def calculate_upper_limit_distribution( bins=n_bins, range=ts_bins_range) ts_inv_f = interp1d(trial_data_q_values, range(ns_max), kind='linear') - ts_bkg = analysis.do_trials(rss, N_bkg, sig_mean=0)['TS'] + ts_bkg = ana.do_trials( + rss=rss, + n=n_bkg, + mean_n_sig=0)['TS'] # Cut away lower background test statistic values than the minimal # `ts_inv_f` interpolation boundary. diff --git a/skyllh/core/coords.py b/skyllh/core/utils/coords.py similarity index 66% rename from skyllh/core/coords.py rename to skyllh/core/utils/coords.py index e4eef3ffac..751e8d75d0 100644 --- a/skyllh/core/coords.py +++ b/skyllh/core/utils/coords.py @@ -21,10 +21,11 @@ def rotate_spherical_vector(ra1, dec1, ra2, dec2, ra3, dec3): ra3 = np.atleast_1d(ra3) dec3 = np.atleast_1d(dec3) - assert (len(ra1) == len(dec1) == - len(ra2) == len(dec2) == - len(ra3) == len(dec3) - ), 'All input argument arrays must be of the same length!' + assert ( + len(ra1) == len(dec1) == + len(ra2) == len(dec2) == + len(ra3) == len(dec3) + ), 'All input argument arrays must be of the same length!' N_event = len(ra1) @@ -32,7 +33,7 @@ def rotate_spherical_vector(ra1, dec1, ra2, dec2, ra3, dec3): # correct for possible rounding erros. cos_alpha = (np.cos(ra2 - ra1) * np.cos(dec1) * np.cos(dec2) + np.sin(dec1) * np.sin(dec2)) - cos_alpha[cos_alpha > 1] = 1 + cos_alpha[cos_alpha > 1] = 1 cos_alpha[cos_alpha < -1] = -1 alpha = np.arccos(cos_alpha) @@ -59,7 +60,7 @@ def rotate_spherical_vector(ra1, dec1, ra2, dec2, ra3, dec3): # Calculate the rotation matrix R_i for each event i and perform the # rotation on vector 3 for each event. - vec = np.empty((N_event,3), dtype=np.float64) + vec = np.empty((N_event, 3), dtype=np.float64) sin_alpha = np.sin(alpha) twopi = 2*np.pi @@ -87,3 +88,45 @@ def rotate_spherical_vector(ra1, dec1, ra2, dec2, ra3, dec3): dec = np.arcsin(vec[:, 2]) return (ra, dec) + + +def angular_separation(ra1, dec1, ra2, dec2, psi_floor=None): + """Calculates the angular separation on the shpere between two vectors on + the sphere. + + Parameters + ---------- + ra1 : float | array of float + The right-ascention or longitude coordinate of the first vector in + radians. + dec1 : float | array of float + The declination or latitude coordinate of the first vector in radians. + ra2 : float | array of float + The right-ascention or longitude coordinate of the second vector in + radians. + dec2 : float | array of float + The declination coordinate of the second vector in radians. + psi_floor : float | None + If not ``None``, specifies the floor value of psi. + + Returns + ------- + psi : float | array of float + The calculated angular separation value(s). + """ + delta_ra = np.abs(ra1 - ra2) + delta_dec = np.abs(dec1 - dec2) + + x = np.sin(delta_dec / 2.)**2. +\ + np.cos(dec1) * np.cos(dec2) * np.sin(delta_ra / 2.)**2. + + # Handle possible floating precision errors. + x[x < 0.] = 0. + x[x > 1.] = 1. + + psi = 2. * np.arcsin(np.sqrt(x)) + + if psi_floor is not None: + psi = np.where(psi < psi_floor, psi_floor, psi) + + return psi diff --git a/skyllh/core/utils/flux_model.py b/skyllh/core/utils/flux_model.py new file mode 100644 index 0000000000..041ecde294 --- /dev/null +++ b/skyllh/core/utils/flux_model.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- + +from scipy.stats import ( + rv_continuous, +) + +from skyllh.core.flux_model import ( + TimeFluxProfile, +) +from skyllh.core.py import ( + classname, +) + + +def create_scipy_stats_rv_continuous_from_TimeFluxProfile( + profile, +): + """This function builds a scipy.stats.rv_continuous instance for a given + :class:`~skyllh.core.flux_model.TimeFluxProfile` instance. + + It can be used to generate random numbers according to the given time flux + profile function. + + Parameters + ---------- + profile : instance of TimeFluxProfile + The instance of TimeFluxProfile providing the function of the time flux + profile. + + Returns + ------- + rv : instance of rv_continuous_frozen + The instance of rv_continuous_frozen representing the time flux profile + as a continuous random variate instance. + """ + if not isinstance(profile, TimeFluxProfile): + raise TypeError( + 'The profile argument must be an instance of TimeFluxProfile! ' + f'Its current type is {classname(profile)}!') + + norm = 0 + tot_integral = profile.get_total_integral() + if tot_integral != 0: + norm = 1 / tot_integral + + class rv_continuous_from_TimeFluxProfile( + rv_continuous): + + def __init__(self, *args, **kwargs): + """Creates a new instance of the subclass of rv_continuous using + the time flux profile. + """ + self._profile = profile + self._norm = norm + + super().__init__(*args, **kwargs) + + def _pdf(self, t): + """Calculates the probability density of the time flux profile + function for given time values. + """ + pd = self._profile(t=t) * self._norm + + return pd + + def _cdf(self, t): + """Calculates the cumulative distribution function values for rhe + given time values. If the time flux profile instance provides a + ``cdf`` method, it will be used. Otherwise the generic ``_cdf`` + method of the ``rv_continuous`` class will be used. + """ + if hasattr(self._profile, 'cdf') and callable(self._profile.cdf): + return self._profile.cdf(t=t) + + return super()._cdf(t) + + rv = rv_continuous_from_TimeFluxProfile( + a=profile.t_start, + b=profile.t_stop, + ).freeze(loc=0, scale=1) + + return rv diff --git a/skyllh/core/utils/multidimgridpdf.py b/skyllh/core/utils/multidimgridpdf.py index 0a024e82e0..8e23bc4a0e 100644 --- a/skyllh/core/utils/multidimgridpdf.py +++ b/skyllh/core/utils/multidimgridpdf.py @@ -5,92 +5,137 @@ """ import numpy as np -import os -from skyllh.core.binning import BinningDefinition -from skyllh.core.pdf import MultiDimGridPDF -from skyllh.core.signalpdf import SignalMultiDimGridPDF -from skyllh.core.backgroundpdf import BackgroundMultiDimGridPDF - - -def kde_pdf_sig_spatial_norm_factor_func(pdf, tdm, fitparams, eventdata): - """This is the standard normalization factor function for the spatial signal - MultiDimGridPDF, which is created from KDE PDF values. +from skyllh.core.binning import ( + BinningDefinition, +) +from skyllh.core.pdf import ( + MultiDimGridPDF, +) +from skyllh.core.py import ( + classname, +) + + +def get_kde_pdf_sig_spatial_norm_factor_func( + log10_psi_name='log10_psi'): + """Returns the standard normalization factor function for the spatial + signal MultiDimGridPDF, which is created from KDE PDF values. It can be used for the ``norm_factor_func`` argument of the + ``create_MultiDimGridPDF_from_photosplinetable`` and ``create_MultiDimGridPDF_from_kde_pdf`` function. + + Parameters + ---------- + log10_psi_name : str + The name of the event data field for the log10(psi) values. """ - log10_psi_idx = pdf._axes.axis_name_list.index('log10_psi') - # psi = tdm.get_data('psi') - # Convert to psi. - psi = 10**eventdata[:, log10_psi_idx] - norm = 1. / (2 * np.pi * np.log(10) * psi * np.sin(psi)) - return norm + def kde_pdf_sig_spatial_norm_factor_func( + pdf, + tdm, + params_recarray, + eventdata, + evt_mask=None): + + log10_psi_idx = pdf._axes.get_index_by_name(log10_psi_name) + + if evt_mask is None: + psi = 10**eventdata[:, log10_psi_idx] + else: + psi = 10**eventdata[:, log10_psi_idx][evt_mask] + + norm = 1. / (2 * np.pi * np.log(10) * psi * np.sin(psi)) + return norm -def kde_pdf_bkg_norm_factor_func(pdf, tdm, fitparams, eventdata): - """This is the standard normalization factor function for the background + return kde_pdf_sig_spatial_norm_factor_func + + +def get_kde_pdf_bkg_norm_factor_func(): + """Returns the standard normalization factor function for the background MultiDimGridPDF, which is created from KDE PDF values. It can be used for the ``norm_factor_func`` argument of the + ``create_MultiDimGridPDF_from_photosplinetable`` and ``create_MultiDimGridPDF_from_kde_pdf`` function. """ - return 1. / (2 * np.pi) + def kde_pdf_bkg_norm_factor_func( + pdf, + tdm, + params_recarray, + eventdata, + evt_mask=None): + + return 1. / (2 * np.pi) + + return kde_pdf_bkg_norm_factor_func def create_MultiDimGridPDF_from_photosplinetable( - ds, data, info_key, splinetable_key, norm_factor_func=None, - kind=None, tl=None): + multidimgridpdf_cls, + pmm, + ds, + data, + info_key, + splinetable_key, + kde_pdf_axis_name_map_key='KDE_PDF_axis_name_map', + norm_factor_func=None, + cache_pd_values=False, + tl=None): """ - Creates a MultiDimGridPDF instance with pdf values taken from photospline pdf, - a spline interpolation of KDE PDF values stored in a splinetable on disk. + Creates a MultiDimGridPDF instance with pdf values taken from a photospline + pdf, i.e. a spline interpolation of KDE PDF values stored in a splinetable + on disk. Parameters ---------- - ds : Dataset instance - The Dataset instance the PDF applies to. - data : DatasetData instance - The DatasetData instance that holds the auxiliary data of the data set. + multidimgridpdf_cls : subclass of MultiDimGridPDF + The MultiDimGridPDF class, which should be used. + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper, which defines the mapping of + global parameters to local model parameters. + ds : instance of Dataset + The instance of Dataset the PDF applies to. + data : instance of DatasetData + The instance of DatasetData that holds the experimental and monte-carlo + data of the dataset. info_key : str The auxiliary data name for the file containing PDF information. splinetable_key : str - The auxiliary data name for the name of the splinetablefile. + The auxiliary data name for the name of the file containing the + photospline spline table. + kde_pdf_axis_name_map_key : str + The auxiliary data name for the KDE PDF axis name map. norm_factor_func : callable | None - The normalization factor function. It must have the following call - signature: - __call__(pdf, tdm, fitparams) - kind : str | None - The kind of PDF to create. This is either ``'sig'`` for a - SignalMultiDimGridPDF or ``'bkg'`` for a BackgroundMultiDimGridPDF - instance. If set to None, a MultiDimGridPDF instance is created. - tl : TimeLord instance | None - The optional TimeLord instance to use for measuring timing information. + The function that calculates a possible required normalization + factor for the PDF value based on the event properties. + For more information about this argument see the documentation of the + :meth:`skyllh.core.pdf.MultiDimGridPDF.__init__` method. + cache_pd_values : bool + Flag if the probability density values should get cached by the + MultiDimGridPDF class. + tl : instance of TimeLord | None + The optional instance of TimeLord to use for measuring timing + information. Returns ------- - pdf : SignalMultiDimGridPDF instance | BackgroundMultiDimGridPDF instance | - MultiDimGridPDF instance - The created PDF instance. Depending on the ``kind`` argument, this is - a SignalMultiDimGridPDF, a BackgroundMultiDimGridPDF, or a - MultiDimGridPDF instance. + pdf : instance of ``multidimgridpdf_cls`` + The created PDF instance of MultiDimGridPDF. """ - - if(kind is None): - pdf_type = MultiDimGridPDF - elif(kind == 'sig'): - pdf_type = SignalMultiDimGridPDF - elif(kind == 'bkg'): - pdf_type = BackgroundMultiDimGridPDF - else: - raise ValueError('The kind argument must be None, "sig", or "bkg"! ' - 'Currently it is '+str(kind)+'!') + if not issubclass(multidimgridpdf_cls, MultiDimGridPDF): + raise TypeError( + 'The multidimgridpdf_cls argument must be a subclass of ' + 'MultiDimGridPDF! ' + f'Its current type is {classname(multidimgridpdf_cls)}.') # Load the PDF data from the auxilary files. num_dict = ds.load_aux_data(info_key, tl=tl) - kde_pdf_axis_name_map = ds.load_aux_data('KDE_PDF_axis_name_map', tl=tl) + kde_pdf_axis_name_map = ds.load_aux_data(kde_pdf_axis_name_map_key, tl=tl) kde_pdf_axis_name_map_inv = dict( [(v, k) for (k, v) in kde_pdf_axis_name_map.items()]) for var in num_dict['vars']: - if(var not in kde_pdf_axis_name_map_inv): + if var not in kde_pdf_axis_name_map_inv: kde_pdf_axis_name_map_inv[var] = var if 'bin_centers' in num_dict: @@ -99,86 +144,97 @@ def create_MultiDimGridPDF_from_photosplinetable( bin_centers_key = 'bins' else: raise KeyError( - "The PDF information file is missing 'bin_centers' or 'bins' key.") + 'The PDF information file is missing "bin_centers" or "bins" key!') axis_binnings = [ BinningDefinition( - kde_pdf_axis_name_map_inv[var], num_dict[bin_centers_key][idx]) + name=kde_pdf_axis_name_map_inv[var], + binedges=num_dict[bin_centers_key][idx]) for (idx, var) in enumerate(num_dict['vars']) ] # Getting the name of the splinetable file - splinetable_file_list = ds.get_aux_data_definition(splinetable_key) - # This is a list with only one element. - splinetable_file = os.path.join(ds.root_dir, splinetable_file_list[0]) + splinetable_file = ds.get_abs_pathfilename_list( + ds.get_aux_data_definition(splinetable_key))[0] - pdf = pdf_type( - axis_binnings, + pdf = multidimgridpdf_cls( + pmm=pmm, + axis_binnings=axis_binnings, path_to_pdf_splinetable=splinetable_file, - pdf_grid_data=None, - norm_factor_func=norm_factor_func) + norm_factor_func=norm_factor_func, + cache_pd_values=cache_pd_values) return pdf -def create_MultiDimGridPDF_from_kde_pdf( - ds, data, numerator_key, denumerator_key=None, norm_factor_func=None, - kind=None, tl=None): +def create_MultiDimGridPDF_from_kde_pdf( # noqa: C901 + multidimgridpdf_cls, + pmm, + ds, + data, + numerator_key, + denumerator_key=None, + kde_pdf_axis_name_map_key='KDE_PDF_axis_name_map', + norm_factor_func=None, + cache_pd_values=False, + tl=None): """Creates a MultiDimGridPDF instance with pdf values taken from KDE PDF values stored in the dataset's auxiliary data. Parameters ---------- - ds : Dataset instance - The Dataset instance the PDF applies to. - data : DatasetData instance - The DatasetData instance that holds the auxiliary data of the data set. + multidimgridpdf_cls : subclass of MultiDimGridPDF + The MultiDimGridPDF class, which should be used. + pmm : instance of ParameterModelMapper + The instance of ParameterModelMapper, which defines the mapping of + global parameters to local model parameters. + ds : instance of Dataset + The instance of Dataset the PDF applies to. + data : instance of DatasetData + The instance of DatasetData that holds the auxiliary data of the + dataset. numerator_key : str The auxiliary data name for the PDF numerator array. denumerator_key : str | None The auxiliary data name for the PDF denumerator array. - This can be None, if no denumerator array is required. + This can be ``None``, if no denumerator array is required. + kde_pdf_axis_name_map_key : str + The auxiliary data name for the KDE PDF axis name map. norm_factor_func : callable | None - The normalization factor function. It must have the following call - signature: - __call__(pdf, tdm, fitparams) - kind : str | None - The kind of PDF to create. This is either ``'sig'`` for a - SignalMultiDimGridPDF or ``'bkg'`` for a BackgroundMultiDimGridPDF - instance. If set to None, a MultiDimGridPDF instance is created. - tl : TimeLord instance | None - The optional TimeLord instance to use for measuring timing information. + The function that calculates a possible required normalization + factor for the PDF value based on the event properties. + For more information about this argument see the documentation of the + :meth:`skyllh.core.pdf.MultiDimGridPDF.__init__` method. + cache_pd_values : bool + Flag if the probability density values should get cached by the + MultiDimGridPDF class. + tl : instance of TimeLord | None + The optional instance of TimeLord to use for measuring timing + information. Returns ------- - pdf : SignalMultiDimGridPDF instance | BackgroundMultiDimGridPDF instance | - MultiDimGridPDF instance - The created PDF instance. Depending on the ``kind`` argument, this is - a SignalMultiDimGridPDF, a BackgroundMultiDimGridPDF, or a - MultiDimGridPDF instance. + pdf : instance of ``multidimgridpdf_cls`` + The created PDF instance of MultiDimGridPDF. """ - if(kind is None): - pdf_type = MultiDimGridPDF - elif(kind == 'sig'): - pdf_type = SignalMultiDimGridPDF - elif(kind == 'bkg'): - pdf_type = BackgroundMultiDimGridPDF - else: - raise ValueError('The kind argument must be None, "sig", or "bkg"! ' - 'Currently it is '+str(kind)+'!') + if not issubclass(multidimgridpdf_cls, MultiDimGridPDF): + raise TypeError( + 'The multidimgridpdf_cls argument must be a subclass of ' + 'MultiDimGridPDF! ' + f'Its current type is {classname(multidimgridpdf_cls)}.') # Load the PDF data from the auxilary files. num_dict = ds.load_aux_data(numerator_key, tl=tl) denum_dict = None - if(denumerator_key is not None): + if denumerator_key is not None: denum_dict = ds.load_aux_data(denumerator_key, tl=tl) - kde_pdf_axis_name_map = ds.load_aux_data('KDE_PDF_axis_name_map', tl=tl) + kde_pdf_axis_name_map = ds.load_aux_data(kde_pdf_axis_name_map_key, tl=tl) kde_pdf_axis_name_map_inv = dict( [(v, k) for (k, v) in kde_pdf_axis_name_map.items()]) for var in num_dict['vars']: - if(var not in kde_pdf_axis_name_map_inv): + if var not in kde_pdf_axis_name_map_inv: kde_pdf_axis_name_map_inv[var] = var if 'bin_centers' in num_dict: @@ -187,7 +243,7 @@ def create_MultiDimGridPDF_from_kde_pdf( bin_centers_key = 'bins' else: raise KeyError( - "The PDF information file is missing 'bin_centers' or 'bins' key.") + 'The PDF information file is missing "bin_centers" or "bins" key!') axis_binnings = [ BinningDefinition( @@ -196,7 +252,7 @@ def create_MultiDimGridPDF_from_kde_pdf( ] vals = num_dict['pdf_vals'] - if(denum_dict is not None): + if denum_dict is not None: # A denumerator is required, so we need to divide the numerator pdf # values by the denumerator pdf values, by preserving the correct axis # order. @@ -204,7 +260,7 @@ def create_MultiDimGridPDF_from_kde_pdf( # match the axis order of the numerator pdf values array. selector = [] for var in num_dict['vars']: - if(var in denum_dict['vars']): + if var in denum_dict['vars']: # The variable is present in both pdf value arrays. So select # all values of that dimension. selector.append(slice(None, None)) @@ -224,10 +280,11 @@ def create_MultiDimGridPDF_from_kde_pdf( # Set NaN values to 0. vals[np.isnan(vals)] = 0 - pdf = pdf_type( - axis_binnings, - path_to_pdf_splinetable=None, + pdf = multidimgridpdf_cls( + pmm=pmm, + axis_binnings=axis_binnings, pdf_grid_data=vals, - norm_factor_func=norm_factor_func) + norm_factor_func=norm_factor_func, + cache_pd_values=cache_pd_values) return pdf diff --git a/skyllh/core/utils/ndphotosplinepdf.py b/skyllh/core/utils/ndphotosplinepdf.py deleted file mode 100644 index 32c10683e6..0000000000 --- a/skyllh/core/utils/ndphotosplinepdf.py +++ /dev/null @@ -1,108 +0,0 @@ -# -*- coding: utf-8 -*- -# Author: Dr. Martin Wolf - -"""This module contains utility functions for creating and managing -NDPhotosplinePDF instances. -""" - -from skyllh.core.binning import BinningDefinition -from skyllh.core.pdf import NDPhotosplinePDF -from skyllh.core.backgroundpdf import BackgroundNDPhotosplinePDF -from skyllh.core.signalpdf import SignalNDPhotosplinePDF - - -def create_NDPhotosplinePDF_from_photosplinefit( - ds, kind, info_key, splinefit_key, param_set=None, - norm_factor_func=None, - tl=None - ): - """Creates a new NDPhotosplinePDF instance from a photospline fits file that - is defined in the given data set. - - Parameters - ---------- - ds : Dataset instance - The Dataset instance the PDF applies to. - kind : str | None - The kind of PDF to create. This is either ``'sig'`` for a - SignalNDPhotosplinePDF, or ``'bkg'`` for a BackgroundNDPhotosplinePDF - instance. If set to None, a NDPhotosplinePDF instance is created. - info_key : str - The auxiliary data name for the file containing PDF meta data - information. - splinefit_key : str - The auxiliary data name defining the path to the file containing the - photospline fit. - param_set : Parameter | ParameterSet | None - The Parameter instance or ParameterSet instance defining the - parameters of the new PDF. The ParameterSet holds the information - which parameters are fixed and which are floating (i.e. fitted). - norm_factor_func : callable | None - The normalization factor function. It must have the following call - signature: - __call__(pdf, tdm, params) - tl : TimeLord instance | None - The optional TimeLord instance to use for measuring timing information. - - Returns - ------- - pdf : SignalNDPhotosplinePDF instance | - BackgroundNDPhotosplinePDF instance | NDPhotosplinePDF instance - The created PDF instance. Depending on the ``kind`` argument, this is - a SignalNDPhotosplinePDF, a BackgroundNDPhotosplinePDF, or a - NDPhotosplinePDF instance. - """ - - if(kind is None): - pdf_type = NDPhotosplinePDF - elif(kind == 'sig'): - pdf_type = SignalNDPhotosplinePDF - elif(kind == 'bkg'): - pdf_type = BackgroundNDPhotosplinePDF - else: - raise ValueError( - 'The kind argument must be None, "sig", or "bkg"! ' - 'Currently it is '+str(kind)+'!') - - # Load the PDF data from the auxilary files. - info_dict = ds.load_aux_data(info_key, tl=tl) - - kde_pdf_axis_name_map = ds.load_aux_data('KDE_PDF_axis_name_map', tl=tl) - kde_pdf_axis_name_map_inv = dict( - [(v, k) for (k, v) in kde_pdf_axis_name_map.items()]) - for var in info_dict['vars']: - if(var not in kde_pdf_axis_name_map_inv): - kde_pdf_axis_name_map_inv[var] = var - - # Select the bin center information from the meta data information file. - # The "bins" key is for backward compatibility. - if('bin_centers' in info_dict): - bin_centers_key = 'bin_centers' - elif('bins' in info_dict): - bin_centers_key = 'bins' - else: - raise KeyError( - 'The PDF information file is missing "bin_centers" or "bins" key!') - - axis_binnings = [ - BinningDefinition( - kde_pdf_axis_name_map_inv[var], info_dict[bin_centers_key][idx]) - for (idx, var) in enumerate(info_dict['vars']) - ] - - # Getting the name of the splinetable file - splinefit_file_list = ds.get_abs_pathfilename_list( - ds.get_aux_data_definition(splinefit_key)) - if(len(splinefit_file_list) != 1): - raise ValueError( - 'The spline fit file list must contain only a single file name! ' - 'Currently it contains {} file names!'.format( - len(splinefit_file_list))) - - pdf = pdf_type( - axis_binnings=axis_binnings, - param_set=param_set, - path_to_pdf_splinefit=splinefit_file_list[0], - norm_factor_func=norm_factor_func) - - return pdf diff --git a/skyllh/core/utils/spline.py b/skyllh/core/utils/spline.py new file mode 100644 index 0000000000..fc9cbadaac --- /dev/null +++ b/skyllh/core/utils/spline.py @@ -0,0 +1,274 @@ +# -*- coding: utf-8 -*- + +import numpy as np + +from scipy.interpolate import interp1d + + +def make_spline_1d( + x, + y, + kind='linear', + **kwargs): + """Creates a 1D spline for the function y(x) using + :class:`scipy.interpolate.interp1d`. + + Parameters + ---------- + x : array_like + The x values. + y : array_like + The y values. + kind : str + The kind of the spline. See the :class:`scipy.interpolate.interp1d` + documentation for possible values. Default is ``'linear'``. + **kwargs + Additional keyword arguments are passed to the :class:`~scipy.interpolate.interp1d` function. + + Returns + ------- + spline : + The created 1D spline instance. + """ + x = np.atleast_1d(x) + y = np.atleast_1d(y) + + # The interp1d function requires unique x values. So we need to sort x in + # increasing order and mask out repeating x values. + xy = np.array(sorted(zip(x, y)), dtype=y.dtype) + x = xy[:, 0] + unique_x_mask = np.concatenate(([True], np.invert( + x[1:] <= x[:-1]))) + x = x[unique_x_mask] + y = xy[:, 1][unique_x_mask] + + spline = interp1d( + x, + y, + kind=kind, + copy=False, + assume_sorted=True, + **kwargs) + + return spline + + +class CatmullRomRegular1DSpline( + object): + """This class provides a one-dimensional Catmull-Rom spline which is a C^1 + continous spline, where the control points coincide with the data points. + The x data points need to be equal distant. + + .. note:: + + The first and last data point are not part of the splined curve! + + """ + + def __init__( + self, + x, + y, + **kwargs, + ): + """Creates a new CatmullRom1DSpline instance. + + Parameters + ---------- + x : instance of ndarray + The x values of the data points. + y : instance of ndarray + The y values of the data points. + """ + super().__init__( + **kwargs) + + if len(x) != len(y): + raise ValueError( + f'The number of x ({len(x)}) and y ({len(y)}) data values ' + 'need to be equal!') + if len(x) < 4: + raise ValueError( + f'The number of data points ({len(x)}) must be at least 4!') + + unique_delta_x = np.unique(np.diff(x)) + if len(unique_delta_x) != 1: + raise ValueError( + 'The data points must be equal distant in x!') + + self._delta_x = unique_delta_x[0] + self._x_start = x[1] + self._x_stop = x[-2] + + # Calculates the number of segments given the number of data points. + # Since there are 4 points necessary per segment, we need to subtract 3. + self._num_segments = len(x) - 3 + + # Calculate the required data for each segment. + self._segment_data = [] + for seg_idx in range(self._num_segments): + sl = slice(seg_idx, seg_idx+4) + (t1, t2, t3) = self._calc_segment_coefficients(Px=x[sl], Py=y[sl]) + self._segment_data.append( + (t1, t2, t3, x[sl], y[sl])) + + def _eval_for_valid_x( + self, + x, + ): + """Evaluates the spline given valid x-values in data coordinates. + + Parameters + ---------- + x : instance of ndarray + The instance of ndarray holding the valid values for which the + spline should get evaluated. + + Returns + ------- + y : instance of ndarray + The instance of ndarray with the spline values at the given x + values. + """ + y = np.zeros((len(x),), dtype=np.float64) + + # Determine on which spline segment the data value belongs to. + seg_idxs = np.empty((len(x),), dtype=np.int64) + np.floor( + (x - self._x_start) / self._delta_x, + out=seg_idxs, + casting='unsafe') + m = x == self._x_stop + seg_idxs[m] = self._num_segments - 1 + + # Loop over the unique segments. + for seg_idx in np.unique(seg_idxs): + # Create a mask of the values belonging to this segment. + mask = seg_idxs == seg_idx + n_points = np.count_nonzero(mask) + + (t1, t2, t3, Px, Py) = self._segment_data[seg_idx] + + # Note: This linear relation between x and t is the reason why the + # data point x values must be equal distant. + t = (x[mask] - Px[1]) / (Px[2] - Px[1]) * (t2 - t1) + t1 + t = t.reshape(n_points, 1) + + P0 = (Px[0], Py[0]) + P1 = (Px[1], Py[1]) + P2 = (Px[2], Py[2]) + P3 = (Px[3], Py[3]) + + t1_m_t0 = t1 + t2_m_t0 = t2 + t2_m_t1 = t2 - t1 + t3_m_t1 = t3 - t1 + t3_m_t2 = t3 - t2 + + t_m_t0 = t + t_m_t1 = t - t1 + t2_m_t = t2 - t + t3_m_t = t3 - t + + A1 = (t1 - t) / (t1_m_t0) * P0 + (t_m_t0) / (t1_m_t0) * P1 + A2 = (t2_m_t) / (t2_m_t1) * P1 + (t_m_t1) / (t2_m_t1) * P2 + A3 = (t3_m_t) / (t3_m_t2) * P2 + (t - t2) / (t3_m_t2) * P3 + B1 = (t2_m_t) / (t2_m_t0) * A1 + (t_m_t0) / (t2_m_t0) * A2 + B2 = (t3_m_t) / (t3_m_t1) * A2 + (t_m_t1) / (t3_m_t1) * A3 + seg_points = t2_m_t / t2_m_t1 * B1 + t_m_t1 / t2_m_t1 * B2 + + y[mask] = seg_points[:, 1] + + return y + + def __call__( + self, + x, + oor_value=np.nan, + ): + """Evaluates the spline given x-values in data coordinates. + + Parameters + ---------- + x : instance of ndarray + The instance of ndarray holding the values for which the spline + should get evaluated. + + Returns + ------- + y : instance of ndarray + The instance of ndarray with the spline values at the given x + values. + """ + x = np.atleast_1d(x) + + m_valid_x = (x >= self._x_start) & (x <= self._x_stop) + + y = np.full((len(x),), oor_value, dtype=np.float64) + y[m_valid_x] = self._eval_for_valid_x(x=x[m_valid_x]) + + return y + + def _calc_tj( + self, + ti, + Pi_x, + Pi_y, + Pj_x, + Pj_y, + ): + """Calculates the next segment coefficient ``tj`` given the previous + segment coefficient ``ti`` and the previous and next data point + ``(Pi_x, Pi_y)`` and ``(Pj_x, Pj_y)``, respectively. + + Parameters + ---------- + ti : float + The previous segment coefficient. + Pi_x : float + The x-value of the previous data point. + Pi_y : float + The y-value of the previous data point. + Pj_x : float + The x-value of the next data point. + Pj_y : float + The y-value of the next data point. + + Returns + ------- + tj : float + The next segment coefficient. + """ + dx = Pj_x - Pi_x + dy = Pj_y - Pi_y + tj = ti + np.sqrt(np.sqrt(dx*dx + dy*dy)) + + return tj + + def _calc_segment_coefficients( + self, + Px, + Py, + ): + """Calculates the segment coefficients t1, t2, and t3 given the 4 + data (control) points of the segment. The coefficient t0 is 0 by + definition. + + Parameters + ---------- + Px : instance of ndarray + The (4,)-shaped numpy ndarray holding the 4 x-values of the + segment's data points. + Py : instance of ndarray + The (4,)-shaped numpy ndarray holding the 4 y-values of the + segment's data points. + """ + t0 = 0 + t1 = self._calc_tj( + ti=t0, Pi_x=Px[0], Pi_y=Py[0], Pj_x=Px[1], Pj_y=Py[1]) + t2 = self._calc_tj( + ti=t1, Pi_x=Px[1], Pi_y=Py[1], Pj_x=Px[2], Pj_y=Py[2]) + t3 = self._calc_tj( + ti=t2, Pi_x=Px[2], Pi_y=Py[2], Pj_x=Px[3], Pj_y=Py[3]) + + return (t1, t2, t3) diff --git a/skyllh/core/utils/trials.py b/skyllh/core/utils/trials.py index 31e07cc4ea..d988cd041e 100644 --- a/skyllh/core/utils/trials.py +++ b/skyllh/core/utils/trials.py @@ -3,10 +3,11 @@ """This module contains utility functions related analysis trials. """ -import numpy as np import pickle -from skyllh.core.timing import TaskTimer +from skyllh.core.timing import ( + TaskTimer, +) def create_pseudo_data_file( @@ -18,8 +19,9 @@ def create_pseudo_data_file( bkg_kwargs=None, sig_kwargs=None, tl=None - ): - """Creates a pickle file that contains the pseudo data for a single trial. +): + """Creates a pickle file that contains the pseudo data for a single trial + by generating background and signal events. Parameters ---------- @@ -51,29 +53,29 @@ def create_pseudo_data_file( """ (n_bkg_events_list, bkg_events_list) = ana.generate_background_events( - rss = rss, - mean_n_bkg_list = mean_n_bkg_list, - bkg_kwargs = bkg_kwargs, - tl = tl + rss=rss, + mean_n_bkg_list=mean_n_bkg_list, + bkg_kwargs=bkg_kwargs, + tl=tl ) (n_sig, n_sig_events_list, sig_events_list) = ana.generate_signal_events( - rss = rss, - mean_n_sig = mean_n_sig, - sig_kwargs = sig_kwargs, - tl = tl + rss=rss, + mean_n_sig=mean_n_sig, + sig_kwargs=sig_kwargs, + tl=tl ) trial_data = dict( - mean_n_bkg_list = mean_n_bkg_list, - mean_n_sig = mean_n_sig, - bkg_kwargs = bkg_kwargs, - sig_kwargs = sig_kwargs, - n_sig = n_sig, - n_bkg_events_list = n_bkg_events_list, - n_sig_events_list = n_sig_events_list, - bkg_events_list = bkg_events_list, - sig_events_list = sig_events_list + mean_n_bkg_list=mean_n_bkg_list, + mean_n_sig=mean_n_sig, + bkg_kwargs=bkg_kwargs, + sig_kwargs=sig_kwargs, + n_sig=n_sig, + n_bkg_events_list=n_bkg_events_list, + n_sig_events_list=n_sig_events_list, + bkg_events_list=bkg_events_list, + sig_events_list=sig_events_list ) with TaskTimer(tl, 'Writing pseudo data to file.'): @@ -106,7 +108,7 @@ def load_pseudo_data(filename, tl=None): bkg_events_list : list of DataFieldRecordArray instances The list of DataFieldRecordArray instances containing the background pseudo data events for each data set. - sig_events_list : list of DataFieldRecordArray instances or None + sig_events_list : list of DataFieldRecordArray instances | None The list of DataFieldRecordArray instances containing the signal pseudo data events for each data set. If a particular dataset has no signal events, the entry for that dataset can be None. diff --git a/skyllh/i3/background_generation.py b/skyllh/i3/background_generation.py index 69d6a7d06c..4e4e14775f 100644 --- a/skyllh/i3/background_generation.py +++ b/skyllh/i3/background_generation.py @@ -30,11 +30,13 @@ def data_scrambler(self): """The DataScrambler instance that implements the data scrambling. """ return self._data_scrambler + @data_scrambler.setter def data_scrambler(self, scrambler): - if(not isinstance(scrambler, DataScrambler)): - raise TypeError('The data_scrambler property must be an instance ' - 'of DataScrambler!') + if not isinstance(scrambler, DataScrambler): + raise TypeError( + 'The data_scrambler property must be an instance of ' + 'DataScrambler!') self._data_scrambler = scrambler def generate_events(self, rss, dataset, data, **kwargs): diff --git a/skyllh/i3/backgroundpdf.py b/skyllh/i3/backgroundpdf.py index 602ac1cb02..05cd33d2fd 100644 --- a/skyllh/i3/backgroundpdf.py +++ b/skyllh/i3/backgroundpdf.py @@ -5,21 +5,33 @@ import scipy.interpolate from skyllh.core.binning import ( - BinningDefinition, - UsesBinning + UsesBinning, ) from skyllh.core.pdf import ( - EnergyPDF, IsBackgroundPDF, - SpatialPDF + SpatialPDF, +) +from skyllh.core.py import ( + classname, + int_cast, + issequence, + issequenceof, +) +from skyllh.core.storage import ( + DataFieldRecordArray, +) +from skyllh.core.timing import ( + TaskTimer, +) +from skyllh.i3.pdf import ( + I3EnergyPDF, ) -from skyllh.core.py import issequenceof -from skyllh.core.storage import DataFieldRecordArray -from skyllh.core.timing import TaskTimer -from skyllh.i3.pdf import I3EnergyPDF -class BackgroundI3SpatialPDF(SpatialPDF, UsesBinning, IsBackgroundPDF): +class BackgroundI3SpatialPDF( + SpatialPDF, + UsesBinning, + IsBackgroundPDF): """This is the base class for all IceCube specific spatial background PDF models. IceCube spatial background PDFs depend solely on the zenith angle, and hence, on the declination of the event. @@ -27,34 +39,41 @@ class BackgroundI3SpatialPDF(SpatialPDF, UsesBinning, IsBackgroundPDF): The IceCube spatial background PDF is modeled as a 1d spline function in sin(declination). """ - def __init__(self, data_sinDec, data_weights, sinDec_binning, - spline_order_sinDec): + def __init__( + self, + data_sin_dec, + data_weights, + sin_dec_binning, + spline_order_sin_dec): """Creates a new IceCube spatial background PDF object. Parameters ---------- - data_sinDec : 1d ndarray + data_sin_dec : 1d ndarray The array holding the sin(dec) values of the events. data_weights : 1d ndarray The array holding the weight of each event used for histogramming. - sinDec_binning : BinningDefinition + sin_dec_binning : BinningDefinition The binning definition for the sin(declination) axis. - spline_order_sinDec : int + spline_order_sin_dec : int The order of the spline function for the logarithmic values of the spatial background PDF along the sin(dec) axis. """ - super(BackgroundI3SpatialPDF, self).__init__( + super().__init__( + pmm=None, ra_range=(0, 2*np.pi), - dec_range=(np.arcsin(sinDec_binning.lower_edge), - np.arcsin(sinDec_binning.upper_edge))) + dec_range=( + np.arcsin(sin_dec_binning.lower_edge), + np.arcsin(sin_dec_binning.upper_edge))) - self.add_binning(sinDec_binning, 'sin_dec') - self.spline_order_sinDec = spline_order_sinDec + self.add_binning(sin_dec_binning, 'sin_dec') + self.spline_order_sin_dec = spline_order_sin_dec - (h, bins) = np.histogram(data_sinDec, - bins = sinDec_binning.binedges, - weights = data_weights, - range = sinDec_binning.range) + (h, bins) = np.histogram( + data_sin_dec, + bins=sin_dec_binning.binedges, + weights=data_weights, + range=sin_dec_binning.range) # Save original histogram. self._orig_hist = h @@ -63,30 +82,39 @@ def __init__(self, data_sinDec, data_weights, sinDec_binning, h = h / h.sum() / (bins[1:] - bins[:-1]) # Check if there are any NaN values. - if(np.any(np.isnan(h))): - raise ValueError('The declination histogram contains NaN values! Check your sin(dec) binning! The bins with NaN values are: {0}'.format(sinDec_binning.bincenters[np.isnan(h)])) - - if(np.any(h <= 0.)): - raise ValueError('Some declination histogram bins for the spatial background PDF are empty, this must not happen! The empty bins are: {0}'.format(sinDec_binning.bincenters[h <= 0.])) + if np.any(np.isnan(h)): + nan_bcs = sin_dec_binning.bincenters[np.isnan(h)] + raise ValueError( + 'The declination histogram contains NaN values! Check your ' + 'sin(dec) binning! The bins with NaN values are: ' + f'{nan_bcs}') + + if np.any(h <= 0.): + empty_bcs = sin_dec_binning.bincenters[h <= 0.] + raise ValueError( + 'Some declination histogram bins for the spatial background ' + 'PDF are empty, this must not happen! The empty bins are: ' + f'{empty_bcs}') # Create the logarithmic spline. self._log_spline = scipy.interpolate.InterpolatedUnivariateSpline( - sinDec_binning.bincenters, np.log(h), k=self.spline_order_sinDec) + sin_dec_binning.bincenters, np.log(h), k=self.spline_order_sin_dec) # Save original spline. self._orig_log_spline = self._log_spline @property - def spline_order_sinDec(self): + def spline_order_sin_dec(self): """The order (int) of the logarithmic spline function, that splines the background PDF, along the sin(dec) axis. """ - return self._spline_order_sinDec - @spline_order_sinDec.setter - def spline_order_sinDec(self, order): - if(not isinstance(order, int)): - raise TypeError('The spline_order_sinDec property must be of type int!') - self._spline_order_sinDec = order + return self._spline_order_sin_dec + + @spline_order_sin_dec.setter + def spline_order_sin_dec(self, order): + self._spline_order_sin_dec = int_cast( + order, + 'The spline_order_sin_dec property must be castable to type int!') def add_events(self, events): """Add events to spatial background PDF object and recalculate @@ -98,16 +126,18 @@ def add_events(self, events): The array holding the event data. The following data fields must exist: - - 'sin_dec' : float - The sin(declination) value of the event. + sin_dec : float + The sin(declination) value of the event. + """ - data = events['sin_dec'] + data_sin_dec = events['sin_dec'] - sinDec_binning = self.get_binning('sin_dec') + sin_dec_binning = self.get_binning('sin_dec') - (h_upd, bins) = np.histogram(data, - bins = sinDec_binning.binedges, - range = sinDec_binning.range) + (h_upd, bins) = np.histogram( + data_sin_dec, + bins=sin_dec_binning.binedges, + range=sin_dec_binning.range) # Construct histogram with added events. h = self._orig_hist + h_upd @@ -117,7 +147,7 @@ def add_events(self, events): # Create the updated logarithmic spline. self._log_spline = scipy.interpolate.InterpolatedUnivariateSpline( - sinDec_binning.bincenters, np.log(h), k=self.spline_order_sinDec) + sin_dec_binning.bincenters, np.log(h), k=self.spline_order_sin_dec) def reset(self): """Reset the logarithmic spline to the original function, which was @@ -125,7 +155,24 @@ def reset(self): """ self._log_spline = self._orig_log_spline - def get_prob(self, tdm, fitparams=None, tl=None): + def initialize_for_new_trial( + self, + tdm, + tl=None, + **kwargs): + """Pre-cumputes the probability density values when new trial data is + available. + """ + with TaskTimer(tl, 'Evaluating bkg log-spline.'): + log_spline_val = self._log_spline(tdm.get_data('sin_dec')) + + self._pd = 0.5 / np.pi * np.exp(log_spline_val) + + def get_pd( + self, + tdm, + params_recarray=None, + tl=None): """Calculates the spatial background probability on the sphere of each event. @@ -135,34 +182,39 @@ def get_prob(self, tdm, fitparams=None, tl=None): The TrialDataManager instance holding the trial event data for which to calculate the PDF values. The following data fields must exist: - - 'sin_dec' : float - The sin(declination) value of the event. - fitparams : None + sin_dec : float + The sin(declination) value of the event. + + params_recarray : None Unused interface parameter. - tl : TimeLord instance | None + tl : instance of TimeLord | None The optional TimeLord instance that should be used to measure timing information. Returns ------- - prob : 1d ndarray - The spherical background probability of each data event. + pd : instance of numpy ndarray + The (N_events,)-shaped numpy ndarray holding the background + probability density value for each event. + grads : dict + The dictionary holding the gradients of the probability density + w.r.t. each global fit parameter. + The background PDF does not depend on any global fit parameter, + hence, this is an empty dictionary. """ - with TaskTimer(tl, 'Evaluating bkg log-spline.'): - log_spline_val = self._log_spline(tdm.get_data('sin_dec')) - - prob = 0.5 / np.pi * np.exp(log_spline_val) - - grads = np.array([], dtype=np.float64) - - return (prob, grads) + return (self._pd, dict()) -class DataBackgroundI3SpatialPDF(BackgroundI3SpatialPDF): +class DataBackgroundI3SpatialPDF( + BackgroundI3SpatialPDF): """This is the IceCube spatial background PDF, which gets constructed from experimental data. """ - def __init__(self, data_exp, sinDec_binning, spline_order_sinDec=2): + def __init__( + self, + data_exp, + sin_dec_binning, + spline_order_sin_dec=2): """Constructs a new IceCube spatial background PDF from experimental data. @@ -172,34 +224,44 @@ def __init__(self, data_exp, sinDec_binning, spline_order_sinDec=2): The instance of DataFieldRecordArray holding the experimental data. The following data fields must exist: - - 'dec' : float - The declination of the data event. + sin_dec : float + The sin(declination) of the data event. - sinDec_binning : BinningDefinition + sin_dec_binning : BinningDefinition The binning definition for the sin(declination). - spline_order_sinDec : int + spline_order_sin_dec : int The order of the spline function for the logarithmic values of the spatial background PDF along the sin(dec) axis. The default is 2. """ - if(not isinstance(data_exp, DataFieldRecordArray)): - raise TypeError('The data_exp argument must be of type ' - 'numpy.ndarray!') + if not isinstance(data_exp, DataFieldRecordArray): + raise TypeError( + 'The data_exp argument must be an instance of ' + 'DataFieldRecordArray! ' + f'It is of type "{classname(data_exp)}"!') - data_sinDec = np.sin(data_exp['dec']) + data_sin_dec = data_exp['sin_dec'] data_weights = np.ones((len(data_exp),)) # Create the PDF using the base class. - super(DataBackgroundI3SpatialPDF, self).__init__( - data_sinDec, data_weights, sinDec_binning, spline_order_sinDec) + super().__init__( + data_sin_dec=data_sin_dec, + data_weights=data_weights, + sin_dec_binning=sin_dec_binning, + spline_order_sin_dec=spline_order_sin_dec) -class MCBackgroundI3SpatialPDF(BackgroundI3SpatialPDF): +class MCBackgroundI3SpatialPDF( + BackgroundI3SpatialPDF): """This is the IceCube spatial background PDF, which gets constructed from monte-carlo data. """ - def __init__(self, data_mc, physics_weight_field_names, sinDec_binning, - spline_order_sinDec=2): + def __init__( + self, + data_mc, + physics_weight_field_names, + sin_dec_binning, + spline_order_sin_dec=2): """Constructs a new IceCube spatial background PDF from monte-carlo data. @@ -209,54 +271,66 @@ def __init__(self, data_mc, physics_weight_field_names, sinDec_binning, The array holding the monte-carlo data. The following data fields must exist: - - 'sin_dec' : float - The sine of the reconstructed declination of the data event. + sin_dec : float + The sine of the reconstructed declination of the data event. physics_weight_field_names : str | list of str The name or the list of names of the monte-carlo data fields, which should be used as event weights. If a list is given, the weight values of all the fields will be summed to construct the final event weight. - sinDec_binning : BinningDefinition + sin_dec_binning : BinningDefinition The binning definition for the sin(declination). - spline_order_sinDec : int + spline_order_sin_dec : int The order of the spline function for the logarithmic values of the spatial background PDF along the sin(dec) axis. The default is 2. """ - if(not isinstance(data_mc, DataFieldRecordArray)): - raise TypeError('The data_mc argument must be and instance of ' - 'DataFieldRecordArray!') + if not isinstance(data_mc, DataFieldRecordArray): + raise TypeError( + 'The data_mc argument must be and instance of ' + 'DataFieldRecordArray! ' + f'It is of type {classname(data_mc)}') - if(isinstance(physics_weight_field_names, str)): + if not issequence(physics_weight_field_names): physics_weight_field_names = [physics_weight_field_names] - if(not issequenceof(physics_weight_field_names, str)): - raise TypeError('The physics_weight_field_names argument must be ' - 'of type str or a sequence of type str!') + if not issequenceof(physics_weight_field_names, str): + raise TypeError( + 'The physics_weight_field_names argument must be of type str ' + 'or a sequence of type str! It is of type ' + f'"{classname(physics_weight_field_names)}"!') - data_sinDec = data_mc['sin_dec'] + data_sin_dec = data_mc['sin_dec'] # Calculate the event weights as the sum of all the given data fields # for each event. data_weights = np.zeros(len(data_mc), dtype=np.float64) for name in physics_weight_field_names: - if(name not in data_mc.field_name_list): - raise KeyError('The field "%s" does not exist in the MC ' - 'data!'%(name)) + if name not in data_mc: + raise KeyError( + f'The field "{name}" does not exist in the MC data!') data_weights += data_mc[name] # Create the PDF using the base class. - super(MCBackgroundI3SpatialPDF, self).__init__( - data_sinDec, data_weights, sinDec_binning, spline_order_sinDec - ) + super().__init__( + data_sin_dec=data_sin_dec, + data_weights=data_weights, + sin_dec_binning=sin_dec_binning, + spline_order_sin_dec=spline_order_sin_dec) -class DataBackgroundI3EnergyPDF(I3EnergyPDF, IsBackgroundPDF): +class DataBackgroundI3EnergyPDF( + I3EnergyPDF, + IsBackgroundPDF): """This is the IceCube energy background PDF, which gets constructed from experimental data. This class is derived from I3EnergyPDF. """ - def __init__(self, data_exp, logE_binning, sinDec_binning, - smoothing_filter=None): + def __init__( + self, + data_exp, + log10_energy_binning, + sin_dec_binning, + smoothing_filter=None): """Constructs a new IceCube energy background PDF from experimental data. @@ -266,45 +340,57 @@ def __init__(self, data_exp, logE_binning, sinDec_binning, The array holding the experimental data. The following data fields must exist: - - 'log_energy' : float - The logarithm of the reconstructed energy value of the data - event. - - 'sin_dec' : float - The sine of the reconstructed declination of the data event. + log_energy : float + The logarithm of the reconstructed energy value of the data + event. + sin_dec : float + The sine of the reconstructed declination of the data event. - logE_binning : BinningDefinition + log10_energy_binning : instance of BinningDefinition The binning definition for the binning in log10(E). - sinDec_binning : BinningDefinition + sin_dec_binning : instance of BinningDefinition The binning definition for the sin(declination). - smoothing_filter : SmoothingFilter instance | None + smoothing_filter : instance of SmoothingFilter | None The smoothing filter to use for smoothing the energy histogram. If None, no smoothing will be applied. """ - if(not isinstance(data_exp, DataFieldRecordArray)): - raise TypeError('The data_exp argument must be an instance of ' - 'DataFieldRecordArray!') - - data_logE = data_exp['log_energy'] - data_sinDec = data_exp['sin_dec'] + if not isinstance(data_exp, DataFieldRecordArray): + raise TypeError( + 'The data_exp argument must be an instance of ' + 'DataFieldRecordArray! ' + f'It is of type "{classname(data_exp)}"!') + + data_log10_energy = data_exp['log_energy'] + data_sin_dec = data_exp['sin_dec'] # For experimental data, the MC and physics weight are unity. data_mcweight = np.ones((len(data_exp),)) data_physicsweight = data_mcweight # Create the PDF using the base class. - super(DataBackgroundI3EnergyPDF, self).__init__( - data_logE, data_sinDec, data_mcweight, data_physicsweight, - logE_binning, sinDec_binning, smoothing_filter - ) - # Check if this PDF is valid for all the given experimental data. - self.assert_is_valid_for_exp_data(data_exp) - - -class MCBackgroundI3EnergyPDF(I3EnergyPDF, IsBackgroundPDF): + super().__init__( + pmm=None, + data_log10_energy=data_log10_energy, + data_sin_dec=data_sin_dec, + data_mcweight=data_mcweight, + data_physicsweight=data_physicsweight, + log10_energy_binning=log10_energy_binning, + sin_dec_binning=sin_dec_binning, + smoothing_filter=smoothing_filter) + + +class MCBackgroundI3EnergyPDF( + I3EnergyPDF, + IsBackgroundPDF): """This is the IceCube energy background PDF, which gets constructed from monte-carlo data. This class is derived from I3EnergyPDF. """ - def __init__(self, data_mc, physics_weight_field_names, logE_binning, - sinDec_binning, smoothing_filter=None): + def __init__( + self, + data_mc, + physics_weight_field_names, + log10_energy_binning, + sin_dec_binning, + smoothing_filter=None): """Constructs a new IceCube energy background PDF from monte-carlo data. @@ -314,52 +400,61 @@ def __init__(self, data_mc, physics_weight_field_names, logE_binning, The array holding the monte-carlo data. The following data fields must exist: - - 'log_energy' : float - The logarithm of the reconstructed energy value of the data - event. - - 'sin_dec' : float - The sine of the reconstructed declination of the data event. - - 'mcweight': float - The monte-carlo weight of the event. + log_energy : float + The logarithm of the reconstructed energy value of the data + event. + sin_dec : float + The sine of the reconstructed declination of the data event. + mcweight: float + The monte-carlo weight of the event. physics_weight_field_names : str | list of str The name or the list of names of the monte-carlo data fields, which should be used as physics event weights. If a list is given, the weight values of all the fields will be summed to construct the final event physics weight. - logE_binning : BinningDefinition + log10_energy_binning : BinningDefinition The binning definition for the binning in log10(E). - sinDec_binning : BinningDefinition + sin_dec_binning : BinningDefinition The binning definition for the sin(declination). smoothing_filter : SmoothingFilter instance | None The smoothing filter to use for smoothing the energy histogram. If None, no smoothing will be applied. """ - if(not isinstance(data_mc, DataFieldRecordArray)): - raise TypeError('The data_mc argument must be an instance of ' - 'DataFieldRecordArray!') + if not isinstance(data_mc, DataFieldRecordArray): + raise TypeError( + 'The data_mc argument must be an instance of ' + 'DataFieldRecordArray! ' + f'It is of type "{classname(data_mc)}"!') - if(isinstance(physics_weight_field_names, str)): + if not issequence(physics_weight_field_names): physics_weight_field_names = [physics_weight_field_names] - if(not issequenceof(physics_weight_field_names, str)): - raise TypeError('The physics_weight_field_names argument must be ' - 'of type str or a sequence of type str!') - - data_logE = data_mc['log_energy'] - data_sinDec = data_mc['sin_dec'] + if not issequenceof(physics_weight_field_names, str): + raise TypeError( + 'The physics_weight_field_names argument must be ' + 'of type str or a sequence of type str! ' + f'It is of type {classname(physics_weight_field_names)}') + + data_log10_energy = data_mc['log_energy'] + data_sin_dec = data_mc['sin_dec'] data_mcweight = data_mc['mcweight'] # Calculate the event weights as the sum of all the given data fields # for each event. data_physicsweight = np.zeros(len(data_mc), dtype=np.float64) for name in physics_weight_field_names: - if(name not in data_mc.field_name_list): - raise KeyError('The field "%s" does not exist in the MC ' - 'data!'%(name)) + if name not in data_mc: + raise KeyError( + f'The field "{name}" does not exist in the MC data!') data_physicsweight += data_mc[name] # Create the PDF using the base class. - super(MCBackgroundI3EnergyPDF, self).__init__( - data_logE, data_sinDec, data_mcweight, data_physicsweight, - logE_binning, sinDec_binning, smoothing_filter - ) + super().__init__( + pmm=None, + data_log10_energy=data_log10_energy, + data_sin_dec=data_sin_dec, + data_mcweight=data_mcweight, + data_physicsweight=data_physicsweight, + log10_energy_binning=log10_energy_binning, + sin_dec_binning=sin_dec_binning, + smoothing_filter=smoothing_filter) diff --git a/skyllh/i3/config.py b/skyllh/i3/config.py index 68b708a082..03ceb3e073 100644 --- a/skyllh/i3/config.py +++ b/skyllh/i3/config.py @@ -3,11 +3,12 @@ """This file defines IceCube specific global configuration. """ -# Import everything from the core config, So the user sees the same -# functionality as if using the core config. -from skyllh.core.config import * - -# Add default analysis required data fields for experimental and monte-carlo -# data that are IceCube specific. -CFG['dataset']['analysis_required_exp_field_names'] += ['azi', 'zen', 'sin_dec'] -CFG['dataset']['analysis_required_mc_field_names'] += ['sin_true_dec'] +from skyllh.core.config import ( + add_analysis_required_exp_data_field_names, + add_analysis_required_mc_data_field_names, +) + + +def add_icecube_specific_analysis_required_data_fields(): + add_analysis_required_exp_data_field_names(['azi', 'zen', 'sin_dec']) + add_analysis_required_mc_data_field_names(['sin_true_dec']) diff --git a/skyllh/i3/dataset.py b/skyllh/i3/dataset.py index d74be79fd6..81117fe4be 100644 --- a/skyllh/i3/dataset.py +++ b/skyllh/i3/dataset.py @@ -6,25 +6,37 @@ from skyllh.core import display from skyllh.core.py import ( issequenceof, - module_classname + module_classname, ) from skyllh.core.dataset import ( Dataset, - DatasetData + DatasetData, +) +from skyllh.core.debugging import ( + get_logger, ) -from skyllh.core.debugging import get_logger from skyllh.core.storage import ( DataFieldRecordArray, - create_FileLoader + create_FileLoader, +) +from skyllh.core.timing import ( + TaskTimer, ) -from skyllh.core.timing import TaskTimer # Load the IceCube specific config defaults. # This will change the skyllh.core.config.CFG dictionary. -from skyllh.i3 import config +from skyllh.i3.config import ( + add_icecube_specific_analysis_required_data_fields, +) +# Add default analysis required data fields for experimental and monte-carlo +# data that are IceCube specific. +add_icecube_specific_analysis_required_data_fields() -class I3Dataset(Dataset): + +class I3Dataset( + Dataset, +): """The I3Dataset class is an IceCube specific Dataset class that adds IceCube specific properties to the Dataset class. These additional properties are: @@ -47,8 +59,10 @@ def get_combined_grl_pathfilenames(datasets): grl_pathfilenames : list The combined list of grl pathfilenames. """ - if(not issequenceof(datasets, I3Dataset)): - raise TypeError('The datasets argument must be a sequence of I3Dataset instances!') + if not issequenceof(datasets, I3Dataset): + raise TypeError( + 'The datasets argument must be a sequence of I3Dataset ' + 'instances!') grl_pathfilenames = [] for ds in datasets: @@ -56,16 +70,27 @@ def get_combined_grl_pathfilenames(datasets): return grl_pathfilenames - def __init__(self, grl_pathfilenames=None, *args, **kwargs): + def __init__( + self, + livetime=None, + grl_pathfilenames=None, + **kwargs, + ): """Creates a new IceCube specific dataset, that also can hold a list of GRL data files. Parameters ---------- + livetime : float | None + The live-time of the dataset in days. It can be ``None``, if + good-run-list data files are provided. grl_pathfilenames : str | sequence of str - + The sequence of pathfilenames pointing to the good-run-list (GRL) + data files. """ - super(I3Dataset, self).__init__(*args, **kwargs) + super().__init__( + livetime=livetime, + **kwargs) self._logger = get_logger(module_classname(self)) @@ -75,21 +100,21 @@ def __init__(self, grl_pathfilenames=None, *args, **kwargs): @property def grl_pathfilename_list(self): - """The list of file names of the good-run-list data files for this - dataset. - If a file name is given with a relative path, it will be relative to the - root_dir property of this Dataset instance. + """The list of file names of the good-run-list (GRL) data files for this + dataset. If a file name is given with a relative path, it will be + relative to the ``root_dir`` property of this Dataset instance. """ return self._grl_pathfilename_list + @grl_pathfilename_list.setter def grl_pathfilename_list(self, pathfilenames): - if(pathfilenames is None): + if pathfilenames is None: pathfilenames = [] - if(isinstance(pathfilenames, str)): + if isinstance(pathfilenames, str): pathfilenames = [pathfilenames] - if(not issequenceof(pathfilenames, str)): - raise TypeError('The grl_pathfilename_list property must be a ' - 'sequence of str!') + if not issequenceof(pathfilenames, str): + raise TypeError( + 'The grl_pathfilename_list property must be a sequence of str!') self._grl_pathfilename_list = list(pathfilenames) @property @@ -106,11 +131,13 @@ def grl_field_name_renaming_dict(self): keys are the old names and their values are the new names. """ return self._grl_field_name_renaming_dict + @grl_field_name_renaming_dict.setter def grl_field_name_renaming_dict(self, d): - if(not isinstance(d, dict)): - raise TypeError('The grl_field_name_renaming_dict property must ' - 'be an instance of dict!') + if not isinstance(d, dict): + raise TypeError( + 'The grl_field_name_renaming_dict property must be an ' + 'instance of dict!') self._grl_field_name_renaming_dict = d @property @@ -118,11 +145,11 @@ def exists(self): """(read-only) Flag if all the data files of this data set exists. It is ``True`` if all data files exist and ``False`` otherwise. """ - if(not super(I3Dataset,self).exists): + if not super().exists: return False for pathfilename in self.grl_abs_pathfilename_list: - if(not os.path.exists(pathfilename)): + if not os.path.exists(pathfilename): return False return True @@ -131,15 +158,15 @@ def __str__(self): """Implementation of the pretty string representation of the I3Dataset object. """ - s = super(I3Dataset, self).__str__() + s = super().__str__() s += '\n' s1 = '' s1 += 'GRL data:\n' s2 = '' - if(len(self._grl_pathfilename_list) > 0): + if len(self._grl_pathfilename_list) > 0: for (idx, pathfilename) in enumerate(self.grl_abs_pathfilename_list): - if(idx > 0): + if idx > 0: s2 += '\n' s2 += self._gen_datafile_pathfilename_entry(pathfilename) else: @@ -200,6 +227,9 @@ def load_grl(self, efficiency_mode=None, tl=None): efficiency_mode=efficiency_mode) grl_data.rename_fields(self._grl_field_name_renaming_dict) + with TaskTimer(tl, 'Sort grl data according to start time'): + grl_data.sort_by_field(name='start') + return grl_data def load_data( @@ -255,7 +285,7 @@ def load_data( # Load the good-run-list (GRL) data if it is provided for this dataset, # and calculate the livetime based on the GRL. data_grl = None - if(len(self._grl_pathfilename_list) > 0): + if len(self._grl_pathfilename_list) > 0: data_grl = self.load_grl( efficiency_mode=efficiency_mode, tl=tl) @@ -273,7 +303,11 @@ def load_data( return data - def prepare_data(self, data, tl=None): + def prepare_data( # noqa: C901 + self, + data, + tl=None + ): """Prepares the data for IceCube by pre-calculating the following experimental data fields: @@ -293,10 +327,25 @@ def prepare_data(self, data, tl=None): The TimeLord instance that should be used to time the data preparation. """ + # Set the livetime of the dataset from the GRL data when no livetime + # was specified previously. + if data.livetime is None and data.grl is not None: + if 'start' not in data.grl: + raise KeyError( + f'The GRL data for dataset "{self.name}" has no data ' + 'field named "start"!') + if 'stop' not in data.grl: + raise KeyError( + f'The GRL data for dataset "{self.name}" has no data ' + 'field named "stop"!') + data.livetime = np.sum(data.grl['stop'] - data.grl['start']) + # Execute all the data preparation functions for this dataset. - super(I3Dataset, self).prepare_data(data, tl=tl) + super().prepare_data( + data=data, + tl=tl) - if(data.exp is not None): + if data.exp is not None: # Append sin(dec) data field to the experimental data. task = 'Appending IceCube-specific data fields to exp data.' with TaskTimer(tl, task): @@ -304,7 +353,7 @@ def prepare_data(self, data, tl=None): data.exp.append_field( 'sin_dec', np.sin(data.exp['dec'])) - if(data.mc is not None): + if data.mc is not None: # Append sin(dec) and sin(true_dec) to the MC data. task = 'Appending IceCube-specific data fields to MC data.' with TaskTimer(tl, task): @@ -315,37 +364,33 @@ def prepare_data(self, data, tl=None): data.mc.append_field( 'sin_true_dec', np.sin(data.mc['true_dec'])) - # Set the livetime of the dataset from the GRL data when no livetime - # was specified previously. - if(data.livetime is None and data.grl is not None): - if('start' not in data.grl): - raise KeyError('The GRL data for dataset "{}" has no data ' - 'field named "start"!'.format(self.name)) - if('stop' not in data.grl): - raise KeyError('The GRL data for dataset "{}" has no data ' - 'field named "stop"!'.format(self.name)) - data.livetime = np.sum(data.grl['stop'] - data.grl['start']) - # Select only the experimental data which fits the good-run-list for # this dataset. - if data.grl is not None: + if (data.grl is not None) and (data.exp is not None): # Select based on run information. - if (('run' in data.grl) and - ('run' in data.exp)): - task = 'Selected only the experimental data that matches the '\ - 'run information in the GRL for dataset "%s".'%(self.name) + if ('run' in data.grl) and ('run' in data.exp): + task = ( + 'Select only the experimental data that matches the run ' + f'information in the GRL for dataset "{self.name}".') with TaskTimer(tl, task): runs = np.unique(data.grl['run']) mask = np.isin(data.exp['run'], runs) - data.exp = data.exp[mask] + + if np.any(~mask): + n_cut_runs = np.count_nonzero(~mask) + self._logger.info( + f'Cutting {n_cut_runs} runs from dataset ' + f'{self.name} due to GRL run information.') + data.exp = data.exp[mask] # Select based on detector on-time information. - if (('start' in data.grl) and - ('stop' in data.grl) and - ('time' in data.exp)): - task = 'Selected only the experimental data that matches the '\ - 'detector\'s on-time information in the GRL for dataset '\ - '"%s".'%(self.name) + if ('start' in data.grl) and\ + ('stop' in data.grl) and\ + ('time' in data.exp): + task = ( + 'Select only the experimental data that matches the ' + 'detector\'s on-time information in the GRL for dataset ' + f'"{self.name}".') with TaskTimer(tl, task): mask = np.zeros((len(data.exp),), dtype=np.bool_) for (start, stop) in zip(data.grl['start'], @@ -364,7 +409,9 @@ def prepare_data(self, data, tl=None): data.exp = data.exp[mask] -class I3DatasetData(DatasetData): +class I3DatasetData( + DatasetData, +): """The class provides the container for the loaded experimental and monto-carlo data of a data set. It's the IceCube specific class that also holds the good-run-list (GRL) data. @@ -393,10 +440,12 @@ def grl(self): available for this IceCube data set. """ return self._grl + @grl.setter def grl(self, data): - if(data is not None): - if(not isinstance(data, DataFieldRecordArray)): - raise TypeError('The grl property must be an instance of ' + if data is not None: + if not isinstance(data, DataFieldRecordArray): + raise TypeError( + 'The grl property must be an instance of ' 'DataFieldRecordArray!') self._grl = data diff --git a/skyllh/i3/detsigyield.py b/skyllh/i3/detsigyield.py index 2f1503057c..42fedb830a 100644 --- a/skyllh/i3/detsigyield.py +++ b/skyllh/i3/detsigyield.py @@ -5,71 +5,115 @@ """ import abc +from astropy import units import numpy as np import scipy.interpolate from skyllh.core import multiproc -from skyllh.core.py import issequenceof -from skyllh.core.binning import BinningDefinition -from skyllh.core.parameters import ParameterGrid +from skyllh.core.config import ( + to_internal_time_unit, +) +from skyllh.core.py import ( + classname, + issequenceof, +) +from skyllh.core.binning import ( + BinningDefinition, +) +from skyllh.core.parameters import ( + ParameterGrid, +) from skyllh.core.detsigyield import ( DetSigYield, - DetSigYieldImplMethod, - get_integrated_livetime_in_days + DetSigYieldBuilder, +) +from skyllh.core.livetime import ( + Livetime, ) -from skyllh.core.livetime import Livetime -from skyllh.physics.source import PointLikeSource -from skyllh.physics.flux import ( - FluxModel, - PowerLawFlux, - get_conversion_factor_to_internal_flux_unit +from skyllh.core.source_model import ( + PointLikeSource, ) -class I3DetSigYield(DetSigYield, metaclass=abc.ABCMeta): +class I3DetSigYield( + DetSigYield, + metaclass=abc.ABCMeta): """Abstract base class for all IceCube specific detector signal yield classes. It assumes that sin(dec) binning is required for calculating the detector effective area and hence the detector signal yield. """ - def __init__(self, implmethod, dataset, fluxmodel, livetime, sin_dec_binning): + def __init__( + self, + param_names, + dataset, + fluxmodel, + livetime, + sin_dec_binning, + **kwargs): """Constructor of the IceCube specific detector signal yield base class. + + Parameters + ---------- + param_names : sequence of str + The sequence of parameter names this detector signal yield depends + on. These are either fixed or floating parameters. + dataset : Dataset instance + The Dataset instance holding the monte-carlo event data. + fluxmodel : FluxModel + The flux model instance. Must be an instance of FluxModel. + livetime : float | Livetime instance + The live-time. + sin_dec_binning : BinningDefinition instance + The BinningDefinition instance defining the sin(dec) binning. """ - super(I3DetSigYield, self).__init__(implmethod, dataset, fluxmodel, livetime) + super().__init__( + param_names=param_names, + dataset=dataset, + fluxmodel=fluxmodel, + livetime=livetime, + **kwargs) self.sin_dec_binning = sin_dec_binning @property def sin_dec_binning(self): - """The BinningDefinition instance defining the sin(dec) binning - definition. + """The BinningDefinition instance defining the sin(dec) binning. """ return self._sin_dec_binning + @sin_dec_binning.setter def sin_dec_binning(self, bd): - if(not isinstance(bd, BinningDefinition)): - raise TypeError('The sin_dec_binning property must be an instance ' - 'of BinningDefinition!') + if not isinstance(bd, BinningDefinition): + raise TypeError( + 'The sin_dec_binning property must be an instance of ' + f'BinningDefinition! Its current type is {classname(bd)}.') self._sin_dec_binning = bd -class I3DetSigYieldImplMethod(DetSigYieldImplMethod, metaclass=abc.ABCMeta): +class I3DetSigYieldBuilder( + DetSigYieldBuilder, + metaclass=abc.ABCMeta): """Abstract base class for an IceCube specific detector signal yield - implementation method class. + builder class. """ - def __init__(self, sin_dec_binning=None, **kwargs): + def __init__( + self, + sin_dec_binning=None, + **kwargs, + ): """Constructor of the IceCube specific detector signal yield - implementation base class. + builder class. Parameters ---------- sin_dec_binning : BinningDefinition instance - The instance of BinningDefinition defining the binning of sin(dec). + The instance of BinningDefinition defining the sin(dec) binning. """ - super(I3DetSigYieldImplMethod, self).__init__(**kwargs) + super().__init__(**kwargs) self.sin_dec_binning = sin_dec_binning @@ -81,12 +125,15 @@ def sin_dec_binning(self): binning definitions. """ return self._sin_dec_binning + @sin_dec_binning.setter def sin_dec_binning(self, binning): - if((binning is not None) and - (not isinstance(binning, BinningDefinition))): - raise TypeError('The sin_dec_binning property must be None, or ' - 'an instance of BinningDefinition!') + if (binning is not None) and\ + (not isinstance(binning, BinningDefinition)): + raise TypeError( + 'The sin_dec_binning property must be None, or an instance of ' + 'BinningDefinition! ' + f'Its current type is "{classname(binning)}".') self._sin_dec_binning = binning def get_sin_dec_binning(self, dataset): @@ -95,86 +142,141 @@ def get_sin_dec_binning(self, dataset): given dataset. """ sin_dec_binning = self.sin_dec_binning - if(sin_dec_binning is None): - if(not dataset.has_binning_definition('sin_dec')): - raise KeyError('No binning definition named "sin_dec" is ' - 'defined in the dataset and no user defined binning ' + if sin_dec_binning is None: + if not dataset.has_binning_definition('sin_dec'): + raise KeyError( + 'No binning definition named "sin_dec" is defined in the ' + f'dataset "{dataset.name}" and no user defined binning ' 'definition was provided to this detector signal yield ' - 'implementation method!') + f'builder "{classname(self)}"!') sin_dec_binning = dataset.get_binning_definition('sin_dec') return sin_dec_binning -class PointLikeSourceI3DetSigYieldImplMethod( - I3DetSigYieldImplMethod, metaclass=abc.ABCMeta): +class PointLikeSourceI3DetSigYield( + I3DetSigYield): """Abstract base class for all IceCube specific detector signal yield - implementation methods for a point-like source. All IceCube detector signal - yield implementation methods require a sinDec binning definition for - the effective area. By default it is taken from the binning definitios - stored in the dataset, but a user-defined sinDec binning can be specified - if needed. + classes for point-like sources. """ - def __init__(self, sin_dec_binning=None, **kwargs): - """Initializes a new detector signal yield implementation method - object. + def __init__( + self, + param_names, + dataset, + fluxmodel, + livetime, + sin_dec_binning, + **kwargs): + """Constructor of the IceCube specific detector signal yield base + class for point-like sources. Parameters ---------- - sin_dec_binning : BinningDefinition | None - The BinningDefinition instance defining the sin(dec) binning that - should be used to compute the sin(dec) dependency of the detector - effective area. If set to None, the binning will be taken from the - Dataset binning definitions. + param_names : sequence of str + The sequence of parameter names this detector signal yield depends + on. These are either fixed or floating parameters. + implmethod : instance of DetSigYieldImplMethod + The implementation method to use for constructing and receiving + the detector signal yield. The appropriate method depends on + the used flux model. + dataset : Dataset instance + The Dataset instance holding the monte-carlo event data. + fluxmodel : FluxModel + The flux model instance. Must be an instance of FluxModel. + sin_dec_binning : BinningDefinition instance + The BinningDefinition instance defining the sin(dec) binning. """ - super(PointLikeSourceI3DetSigYieldImplMethod, self).__init__( - sin_dec_binning, **kwargs) - - # Define the supported source models. - self.supported_sourcemodels = (PointLikeSource,) - - def source_to_array(self, sources): + super().__init__( + param_names=param_names, + dataset=dataset, + fluxmodel=fluxmodel, + livetime=livetime, + sin_dec_binning=sin_dec_binning, + **kwargs) + + def sources_to_recarray(self, sources): """Converts the sequence of PointLikeSource sources into a numpy record - array holding the spatial information of the sources needed for the + array holding the information of the sources needed for the detector signal yield calculation. Parameters ---------- sources : SourceModel | sequence of SourceModel - The source model containing the spatial information of the source. + The source model(s) containing the information of the source(s). Returns ------- - arr : numpy record ndarray - The generated numpy record ndarray holding the spatial information - for each source. + recarr : numpy record ndarray + The generated (N_sources,)-shaped 1D numpy record ndarray holding + the information for each source. """ - if(isinstance(sources, PointLikeSource)): - sources = [ sources ] - if(not issequenceof(sources, PointLikeSource)): - raise TypeError('The source argument must be an instance of PointLikeSource!') - - arr = np.empty((len(sources),), dtype=[('dec', np.float64)]) + if isinstance(sources, PointLikeSource): + sources = [sources] + if not issequenceof(sources, PointLikeSource): + raise TypeError( + 'The sources argument must be an instance or a sequence of ' + 'instances of PointLikeSource!') + + recarr = np.empty((len(sources),), dtype=[('dec', np.float64)]) for (i, src) in enumerate(sources): - arr['dec'][i] = src.dec + recarr['dec'][i] = src.dec + + return recarr + + +class PointLikeSourceI3DetSigYieldBuilder( + I3DetSigYieldBuilder, + metaclass=abc.ABCMeta, +): + """Abstract base class for all IceCube specific detector signal yield + builders for point-like sources. All IceCube detector signal + yield builders require a sin(dec) binning definition for + the effective area. By default it is taken from the binning definitions + stored in the dataset, but a user-defined sin(dec) binning can be specified + if needed. + """ + + def __init__( + self, + sin_dec_binning=None, + **kwargs, + ): + """Initializes a new detector signal yield builder object. - return arr + Parameters + ---------- + sin_dec_binning : BinningDefinition | None + The BinningDefinition instance defining the sin(dec) binning that + should be used to compute the sin(dec) dependency of the detector + effective area. If set to None, the binning will be taken from the + Dataset binning definitions. + """ + super().__init__( + sin_dec_binning=sin_dec_binning, + **kwargs) -class FixedFluxPointLikeSourceI3DetSigYield(I3DetSigYield): - """The detector signal yield class for the - FixedFluxPointLikeSourceI3DetSigYieldImplMethod detector signal yield - implementation method. +class FixedFluxPointLikeSourceI3DetSigYield( + PointLikeSourceI3DetSigYield): + """The detector signal yield class for a point-source with a fixed flux. """ - def __init__(self, implmethod, dataset, fluxmodel, livetime, sin_dec_binning, log_spl_sinDec): + def __init__( + self, + param_names, + dataset, + fluxmodel, + livetime, + sin_dec_binning, + log_spl_sinDec, + **kwargs): """Constructs an IceCube detector signal yield instance for a point-like source with a fixed flux. Parameters ---------- - implmethod : FixedFluxPointLikeSourceI3DetSigYieldImplMethod instance - The instance of the detector signal yield implementation - method. + param_names : sequence of str + The sequence of parameter names this detector signal yield depends + on. These are either fixed or floating parameters. dataset : Dataset instance The instance of Dataset holding the monte-carlo data this detector signal yield is made for. @@ -189,12 +291,13 @@ def __init__(self, implmethod, dataset, fluxmodel, livetime, sin_dec_binning, lo The spline instance representing the log value of the detector signal yield as a function of sin(dec). """ - if(not isinstance(implmethod, FixedFluxPointLikeSourceI3DetSigYieldImplMethod)): - raise TypeError('The implmethod argument must be an instance of ' - 'FixedFluxPointLikeSourceI3DetSigYieldImplMethod!') - - super(FixedFluxPointLikeSourceI3DetSigYield, self).__init__( - implmethod, dataset, fluxmodel, livetime, sin_dec_binning) + super().__init__( + param_names=param_names, + dataset=dataset, + fluxmodel=fluxmodel, + livetime=livetime, + sin_dec_binning=sin_dec_binning, + **kwargs) self.log_spl_sinDec = log_spl_sinDec @@ -205,72 +308,82 @@ def log_spl_sinDec(self): yield as a function of sin(dec). """ return self._log_spl_sinDec + @log_spl_sinDec.setter def log_spl_sinDec(self, spl): - if(not isinstance(spl, scipy.interpolate.InterpolatedUnivariateSpline)): - raise TypeError('The log_spl_sinDec property must be an instance ' + if not isinstance(spl, scipy.interpolate.InterpolatedUnivariateSpline): + raise TypeError( + 'The log_spl_sinDec property must be an instance ' 'of scipy.interpolate.InterpolatedUnivariateSpline!') self._log_spl_sinDec = spl - def __call__(self, src, src_flux_params=None): + def __call__(self, src_recarray, src_params_recarray=None): """Retrieves the detector signal yield for the list of given sources. Parameters ---------- - src : numpy record ndarray + src_recarray : numpy record ndarray The numpy record ndarray with the field ``dec`` holding the declination of the source. - src_flux_params : None - Unused interface argument, because this implementation does not - depend on any source flux fit parameters. + src_params_recarray : None + Unused interface argument, because this detector signal yield does + not depend on any source parameters. Returns ------- values : numpy 1d ndarray The array with the detector signal yield for each source. - grads : None - Because with this implementation the detector signal yield - does not depend on any fit parameters. So there are no gradients - and None is returned. + grads : dict + This detector signal yield does not depend on any parameters. + So there are no gradients and the dictionary is empty. """ - src_dec = np.atleast_1d(src['dec']) + src_dec = np.atleast_1d(src_recarray['dec']) # Create results array. values = np.zeros_like(src_dec, dtype=np.float64) # Create mask for all source declinations which are inside the # declination range. - mask = (np.sin(src_dec) >= self._sin_dec_binning.lower_edge)\ - &(np.sin(src_dec) <= self._sin_dec_binning.upper_edge) + mask = ( + (np.sin(src_dec) >= self._sin_dec_binning.lower_edge) & + (np.sin(src_dec) <= self._sin_dec_binning.upper_edge) + ) values[mask] = np.exp(self._log_spl_sinDec(np.sin(src_dec[mask]))) - return (values, None) + return (values, {}) -class FixedFluxPointLikeSourceI3DetSigYieldImplMethod( - PointLikeSourceI3DetSigYieldImplMethod): - """This detector signal yield implementation method constructs a +class FixedFluxPointLikeSourceI3DetSigYieldBuilder( + PointLikeSourceI3DetSigYieldBuilder, + multiproc.IsParallelizable, +): + """This detector signal yield builder constructs a detector signal yield for a fixed flux model, assuming a point-like source. This means that the detector signal yield does not depend on - any source flux parameters, hence it is only dependent on the detector + any source parameters, hence it is only dependent on the detector effective area. It constructs a one-dimensional spline function in sin(dec), using a :class:`scipy.interpolate.InterpolatedUnivariateSpline`. - This detector signal yield implementation method works with all flux - models. + This detector signal yield builder works with all flux models. It is tailored to the IceCube detector at the South Pole, where the effective area depends soley on the zenith angle, and hence on the declination, of the source. """ - def __init__(self, sin_dec_binning=None, spline_order_sinDec=2, **kwargs): - """Creates a new IceCube detector signal yield implementation - method object for a fixed flux model. It requires a sinDec binning - definition to compute the sin(dec) dependency of the detector effective - area. The construct class method of this implementation method will - create a spline function of a given order in logarithmic space of the + + def __init__( + self, + sin_dec_binning=None, + spline_order_sinDec=2, + **kwargs, + ): + """Creates a new IceCube detector signal yield builder object for a + fixed flux model. It requires a sinDec binning definition to compute + the sin(dec) dependency of the detector effective area. + The construct_detsigyield class method of this builder will create a + spline function of a given order in logarithmic space of the effective area. Parameters @@ -284,10 +397,9 @@ def __init__(self, sin_dec_binning=None, spline_order_sinDec=2, **kwargs): detector signal yield along the sin(dec) axis. The default is 2. """ - super(FixedFluxPointLikeSourceI3DetSigYieldImplMethod, self).__init__( - sin_dec_binning, **kwargs) - - self.supported_fluxmodels = (FluxModel,) + super().__init__( + sin_dec_binning=sin_dec_binning, + **kwargs) self.spline_order_sinDec = spline_order_sinDec @@ -297,23 +409,127 @@ def spline_order_sinDec(self): detector signal yield, along the sin(dec) axis. """ return self._spline_order_sinDec + @spline_order_sinDec.setter def spline_order_sinDec(self, order): - if(not isinstance(order, int)): - raise TypeError('The spline_order_sinDec property must be of ' - 'type int!') + if not isinstance(order, int): + raise TypeError( + 'The spline_order_sinDec property must be of type int! ' + f'Its current type is {classname(order)}.') self._spline_order_sinDec = order - def construct_detsigyield(self, dataset, data, fluxmodel, livetime, ppbar=None): - """Constructs a detector signal yield log spline function for the - given fixed flux model. + def _create_hist( + self, + data_sin_true_dec, + data_true_energy, + sin_dec_binning, + weights, + fluxmodel, + to_internal_flux_unit_factor, + ): + """Creates a histogram of the detector signal yield with the + given sin(dec) binning for the given flux model. Parameters ---------- - dataset : Dataset instance - The Dataset instance holding meta information about the data. - data : DatasetData instance - The DatasetData instance holding the monte-carlo event data. + data_sin_true_dec : instance of numpy.ndarray + The (N_data,)-shaped numpy.ndarray holding the sin(true_dec) values + of the monte-carlo events. + data_true_energy : instance of numpy.ndarray + The (N_data,)-shaped numpy.ndarray holding the true energy of the + monte-carlo events. + sin_dec_binning : instance of BinningDefinition + The sin(dec) binning definition to use for the histogram. + weights : 1d ndarray + The (N_data,)-shaped numpy.ndarray holding the weight factor of + each monte-carlo event where only the flux value needs to be + multiplied with in order to get the detector signal yield. + fluxmodel : instance of FluxModel + The flux model to get the flux values from. + to_internal_flux_unit_factor : float + The conversion factor to convert the flux unit into the internal + flux unit. + + Returns + ------- + hist : instance of numpy.ndarray + The (N_sin_dec_bins,)-shaped numpy.ndarray containing the histogram + values. + """ + weights = ( + weights * + fluxmodel(E=data_true_energy).squeeze() * + to_internal_flux_unit_factor + ) + + (hist, _) = np.histogram( + data_sin_true_dec, + bins=sin_dec_binning.binedges, + weights=weights, + density=False) + + # Normalize by solid angle of each bin which is + # 2*\pi*(\Delta sin(\delta)). + hist /= (2.*np.pi * np.diff(sin_dec_binning.binedges)) + + return hist + + def _create_detsigyield_from_hist( + self, + hist, + sin_dec_binning, + **kwargs, + ): + """Create a single instance of FixedFluxPointLikeSourceI3DetSigYield + from the given histogram. + + Parameters + ---------- + hist : instance of numpy.ndarray + The (N_sin_dec_bins,)-shaped numpy.ndarray holding the normalized + histogram of the detector signal yield. + sin_dec_binning : instance of BinningDefinition + The sin(dec) binning definition to use for the histogram. + **kwargs + Additional keyword arguments are passed to the constructor of the + FixedFluxPointLikeSourceI3DetSigYield class. + + Returns + ------- + detsigyield : instance of FixedFluxPointLikeSourceI3DetSigYield + The instance of FixedFluxPointLikeSourceI3DetSigYield for the given + flux model. + """ + # Create spline in ln(hist) at the histogram's bin centers. + log_spl_sinDec = scipy.interpolate.InterpolatedUnivariateSpline( + sin_dec_binning.bincenters, + np.log(hist), + k=self.spline_order_sinDec) + + detsigyield = FixedFluxPointLikeSourceI3DetSigYield( + param_names=[], + sin_dec_binning=sin_dec_binning, + log_spl_sinDec=log_spl_sinDec, + **kwargs) + + return detsigyield + + def construct_detsigyields( + self, + dataset, + data, + shgs, + ppbar=None, + ): + """Constructs a set of FixedFluxPointLikeSourceI3DetSigYield instances, + one for each provided fluxmodel. + + Parameters + ---------- + dataset : instance of Dataset + The instance of Dataset holding meta information about the data. + data : instance of DatasetData + The instance of DatasetData holding the monte-carlo event data. The numpy record ndarray holding the monte-carlo event data must contain the following data fields: @@ -325,175 +541,344 @@ def construct_detsigyield(self, dataset, data, fluxmodel, livetime, ppbar=None): The monte-carlo weight of the data event in the unit GeV cm^2 sr. - fluxmodel : FluxModel - The flux model instance. Must be an instance of FluxModel. - livetime : float | Livetime - The live-time in days to use for the detector signal yield. - ppbar : ProgressBar instance | None - The instance of ProgressBar of the optional parent progress bar. + shgs : sequence of instance of SourceHypoGroup + The sequence of instance of SourceHypoGroup specifying the + source hypothesis groups (i.e. flux model) for which the detector + signal yields should get constructed. + ppbar : instance of ProgressBar | None + The optional instance of ProgressBar of the parent progress bar. Returns ------- - detsigyield : FixedFluxPointLikeSourceI3DetSigYield instance - The DetSigYield instance for point-like source with a fixed flux. + detsigyields : list of instance of FixedFluxPointLikeSourceI3DetSigYield + The list of instance of FixedFluxPointLikeSourceI3DetSigYield + providing the detector signal yield function for a point-like source + with each of the given fixed flux models. """ - # Check data types of the input arguments. - super(FixedFluxPointLikeSourceI3DetSigYieldImplMethod, self).construct_detsigyield( - dataset, data, fluxmodel, livetime) + self.assert_types_of_construct_detsigyield_arguments( + dataset=dataset, + data=data, + shgs=shgs, + ppbar=ppbar) + + # Calculate conversion factor from the flux model unit into the + # internal flux unit (usually GeV^-1 cm^-2 s^-1). + to_internal_flux_unit_factors = [ + shg.fluxmodel.get_conversion_factor_to_internal_flux_unit() + for shg in shgs + ] + + to_internal_time_unit_factor = to_internal_time_unit( + time_unit=units.day + ) # Get integrated live-time in days. - livetime_days = get_integrated_livetime_in_days(livetime) + livetime_days = Livetime.get_integrated_livetime(data.livetime) # Get the sin(dec) binning definition either as setting from this # implementation method, or from the dataset. sin_dec_binning = self.get_sin_dec_binning(dataset) - # Calculate conversion factor from the flux model unit into the internal - # flux unit GeV^-1 cm^-2 s^-1. - toGeVcm2s = get_conversion_factor_to_internal_flux_unit(fluxmodel) + data_sin_true_dec = np.sin(data.mc['true_dec']) + + # Generate a list of indices that would sort the data according to the + # sin(true_dec) values. We will sort the MC data according to it, + # because the histogram creation is much faster (2x) when the + # to-be-histogrammed values are already sorted. + sorted_idxs = np.argsort(data_sin_true_dec) + + data_sin_true_dec = np.take(data_sin_true_dec, sorted_idxs) + data_true_energy = np.take(data.mc['true_energy'], sorted_idxs) + mc_weight = np.take(data.mc['mcweight'], sorted_idxs) + + weights = ( + mc_weight * + livetime_days*to_internal_time_unit_factor + ) + + args_list = [ + ((), dict( + data_sin_true_dec=data_sin_true_dec, + data_true_energy=data_true_energy, + sin_dec_binning=sin_dec_binning, + weights=weights, + fluxmodel=shg.fluxmodel, + to_internal_flux_unit_factor=to_internal_flux_unit_factor, + )) + for (shg, to_internal_flux_unit_factor) in zip( + shgs, to_internal_flux_unit_factors) + ] + + hists = multiproc.parallelize( + func=self._create_hist, + args_list=args_list, + ncpu=multiproc.get_ncpu(local_ncpu=self.ncpu), + ppbar=ppbar, + ) + + detsigyields = [ + self._create_detsigyield_from_hist( + hist=hist, + sin_dec_binning=sin_dec_binning, + dataset=dataset, + livetime=data.livetime, + fluxmodel=shg.fluxmodel, + ) + for (hist, shg) in zip(hists, shgs) + ] + + return detsigyields - # Calculate the detector signal yield contribution of each event. - # The unit of mcweight is assumed to be GeV cm^2 sr. - w = data.mc["mcweight"] * fluxmodel(data.mc["true_energy"])*toGeVcm2s * livetime_days * 86400. + def construct_detsigyield( + self, + dataset, + data, + shg, + ppbar=None, + ): + """Constructs a detector signal yield log spline function for the + given fixed flux model. - # Create a histogram along sin(true_dec). - (h, bins) = np.histogram(np.sin(data.mc["true_dec"]), - weights = w, - bins = sin_dec_binning.binedges, - density = False) + This method calls the :meth:`construct_detsigyiels` method of this + class. - # Normalize by solid angle of each bin which is - # 2*\pi*(\Delta sin(\delta)). - h /= (2.*np.pi * np.diff(sin_dec_binning.binedges)) + Parameters + ---------- + dataset : instance of Dataset + The instance of Dataset holding meta information about the data. + data : instance of DatasetData + The instance of DatasetData holding the monte-carlo event data. + The numpy record ndarray holding the monte-carlo event data must + contain the following data fields: - # Create spline in ln(h) at the histogram's bin centers. - log_spl_sinDec = scipy.interpolate.InterpolatedUnivariateSpline( - sin_dec_binning.bincenters, np.log(h), k=self.spline_order_sinDec) + - 'true_dec' : float + The true declination of the data event. + - 'true_energy' : float + The true energy value of the data event. + - 'mcweight' : float + The monte-carlo weight of the data event in the unit + GeV cm^2 sr. - detsigyield = FixedFluxPointLikeSourceI3DetSigYield( - self, dataset, fluxmodel, livetime, sin_dec_binning, log_spl_sinDec) + shg : instance of SourceHypoGroup + The instance of SourceHypoGroup (i.e. sources and flux model) for + which the detector signal yield should get constructed. + ppbar : instance of ProgressBar | None + The optional instance of ProgressBar of the parent progress bar. + + Returns + ------- + detsigyield : instance of FixedFluxPointLikeSourceI3DetSigYield + The instance of FixedFluxPointLikeSourceI3DetSigYield providing the + detector signal yield function for a point-like source with a + fixed flux. + """ + detsigyield = self.construct_detsigyields( + dataset=dataset, + data=data, + shgs=[shg], + ppbar=ppbar, + )[0] return detsigyield + def get_detsigyield_construction_factory(self): + """Returns the factory callable for constructing a set of instance of + FixedFluxPointLikeSourceI3DetSigYield. -class PowerLawFluxPointLikeSourceI3DetSigYield(I3DetSigYield): - """The detector signal yield class for the - PowerLawFluxPointLikeSourceI3DetSigYieldImplMethod detector signal yield - implementation method. + Returns + ------- + factory : callable + The factory callable for constructing a set of instance of + FixedFluxPointLikeSourceI3DetSigYield. + """ + factory = self.construct_detsigyields + return factory + + +class SingleParamFluxPointLikeSourceI3DetSigYield( + PointLikeSourceI3DetSigYield): + """The detector signal yield class for a flux that depends on a single + source parameter. """ - def __init__(self, implmethod, dataset, fluxmodel, livetime, - sin_dec_binning, log_spl_sinDec_gamma): + def __init__( + self, + param_name, + dataset, + fluxmodel, + livetime, + sin_dec_binning, + log_spl_sinDec_param, + **kwargs): """Constructs the detector signal yield instance. + Parameters + ---------- + param_name : str + The parameter name this detector signal yield depends + on. These are either fixed or floating parameter. + dataset : Dataset instance + The Dataset instance holding the monte-carlo event data. + fluxmodel : FluxModel + The flux model instance. Must be an instance of FluxModel. + livetime : float | Livetime instance + The live-time. + sin_dec_binning : BinningDefinition instance + The BinningDefinition instance defining the sin(dec) binning. + log_spl_sinDec_param : scipy.interpolate.RectBivariateSpline instance + The 2D spline in sin(dec) and the parameter this detector signal + yield depends on. """ - if(not isinstance(implmethod, PowerLawFluxPointLikeSourceI3DetSigYieldImplMethod)): - raise TypeError('The implmethod argument must be an instance of ' - 'PowerLawFluxPointLikeSourceI3DetSigYieldImplMethod!') - - super(PowerLawFluxPointLikeSourceI3DetSigYield, self).__init__( - implmethod, dataset, fluxmodel, livetime, sin_dec_binning) + super().__init__( + param_names=[param_name], + dataset=dataset, + fluxmodel=fluxmodel, + livetime=livetime, + sin_dec_binning=sin_dec_binning, + **kwargs) - self.log_spl_sinDec_gamma = log_spl_sinDec_gamma + self.log_spl_sinDec_param = log_spl_sinDec_param @property - def log_spl_sinDec_gamma(self): + def log_spl_sinDec_param(self): """The :class:`scipy.interpolate.RectBivariateSpline` instance representing the spline for the log value of the detector signal - yield as a function of sin(dec) and gamma. + yield as a function of sin(dec) and the floating parameter. """ - return self._log_spl_sinDec_gamma - @log_spl_sinDec_gamma.setter - def log_spl_sinDec_gamma(self, spl): - if(not isinstance(spl, scipy.interpolate.RectBivariateSpline)): - raise TypeError('The log_spl_sinDec_gamma property must be an ' - 'instance of scipy.interpolate.RectBivariateSpline!') - self._log_spl_sinDec_gamma = spl - - def __call__(self, src, src_flux_params): + return self._log_spl_sinDec_param + + @log_spl_sinDec_param.setter + def log_spl_sinDec_param(self, spl): + if not isinstance(spl, scipy.interpolate.RectBivariateSpline): + raise TypeError( + 'The log_spl_sinDec_param property must be an instance of ' + 'scipy.interpolate.RectBivariateSpline! ' + f'Its current type is {classname(spl)}.') + self._log_spl_sinDec_param = spl + + def __call__(self, src_recarray, src_params_recarray): """Retrieves the detector signal yield for the given list of sources and their flux parameters. Parameters ---------- - src : numpy record ndarray + src_recarray : numpy record ndarray The numpy record ndarray with the field ``dec`` holding the declination of the source. - src_flux_params : (N_sources,)-shaped numpy record ndarray - The numpy record ndarray containing the flux parameter ``gamma`` for - the sources. ``gamma`` can be different for the different sources. + src_params_recarray : (N_sources,)-shaped numpy record ndarray + The numpy record ndarray containing the parameter values of the + sources. The parameter values can be different for the different + sources. + The record array needs to contain two fields for each source + parameter, one named with the source's local parameter name + holding the source's local parameter value, and one named + holding the global parameter index plus one for each + source value. For values mapping to non-fit parameters, the index + should be negative. Returns ------- values : numpy (N_sources,)-shaped 1D ndarray The array with the detector signal yield for each source. - grads : numpy (N_sources,N_fitparams)-shaped 2D ndarray - The array containing the gradient values for each source and fit - parameter. Since, this implementation depends on only one fit - parameter, i.e. gamma, the array is (N_sources,1)-shaped. + grads : dict + The dictionary holding the gradient values for each global floating + parameter. The key is the global floating parameter index and the + value is the (N_sources,)-shaped numpy ndarray holding the gradient + value dY_k/dp_s. """ - src_dec = np.atleast_1d(src['dec']) - if src_flux_params is None: - # Gamma is not a fit parameter. So we take it from the - # initial flux model. - src_gamma = np.array([self.fluxmodel.gamma], dtype=np.double) - else: - src_gamma = src_flux_params['gamma'] + local_param_name = self.param_names[0] - # Create results array. - values = np.zeros_like(src_dec, dtype=np.float64) - grads = np.zeros_like(src_dec, dtype=np.float64) + src_dec = np.atleast_1d(src_recarray['dec']) + src_param = src_params_recarray[local_param_name] + src_param_gp_idxs = src_params_recarray[f'{local_param_name}:gpidx'] + + n_sources = len(src_dec) + + # Check for correct input format. + if not (len(src_param) == n_sources and + len(src_param_gp_idxs) == n_sources): + raise RuntimeError( + f'The length ({len(src_param)}) of the array for the ' + f'source parameter "{local_param_name}" does not match the ' + f'number of sources ({n_sources})!') # Calculate the detector signal yield only for the sources for # which we actually have detector acceptance. For the other sources, # the detector signal yield is zero. - mask = (np.sin(src_dec) >= self._sin_dec_binning.lower_edge)\ - &(np.sin(src_dec) <= self._sin_dec_binning.upper_edge) - - if len(src_gamma) == len(src_dec): - src_gamma = src_gamma[mask] - else: - src_gamma = src_gamma[0] - - values[mask] = np.exp(self._log_spl_sinDec_gamma( - np.sin(src_dec[mask]), src_gamma, grid=False)) - grads[mask] = values[mask] * self._log_spl_sinDec_gamma( - np.sin(src_dec[mask]), src_gamma, grid=False, dy=1) - - return (values, np.atleast_2d(grads)) - - -class PowerLawFluxPointLikeSourceI3DetSigYieldImplMethod( - PointLikeSourceI3DetSigYieldImplMethod, multiproc.IsParallelizable): - """This detector signal yield implementation method constructs a - detector signal yield for a variable power law flux model, which has - the spectral index gamma as fit parameter, assuming a point-like source. - It constructs a two-dimensional spline function in sin(dec) and gamma, using - a :class:`scipy.interpolate.RectBivariateSpline`. Hence, the detector signal yield - can vary with the declination and the spectral index, gamma, of the source. - - This detector signal yield implementation method works with a - PowerLawFlux flux model. + src_mask = (np.sin(src_dec) >= self._sin_dec_binning.lower_edge) &\ + (np.sin(src_dec) <= self._sin_dec_binning.upper_edge) + + values = np.zeros((n_sources,), dtype=np.float64) + values[src_mask] = np.exp(self._log_spl_sinDec_param( + np.sin(src_dec[src_mask]), src_param[src_mask], grid=False)) + + # Determine the number of global parameters the local parameter is + # made of. + gfp_idxs = np.unique(src_param_gp_idxs) + gfp_idxs = gfp_idxs[gfp_idxs > 0] - 1 + + # Calculate the gradients for each global fit parameter. + grads = dict() + for gfp_idx in gfp_idxs: + # Create the gradient array of shape (n_sources,). This could be + # a masked array to save memory, when there are many sources and + # global fit parameters. + grads[gfp_idx] = np.zeros((n_sources,), dtype=np.float64) + + # Create a mask to select the sources that depend on the global + # fit parameter with index gfp_idx. + gfp_src_mask = (src_param_gp_idxs == gfp_idx+1) + + # m is a (n_sources,)-shaped ndarray, which selects only sources + # that have detector exceptance and depend on the global fit + # parameter gfp_idx. + m = src_mask & gfp_src_mask + + grads[gfp_idx][m] = values[m] * self._log_spl_sinDec_param( + np.sin(src_dec[m]), src_param[m], grid=False, dy=1) + + return (values, grads) + + +class SingleParamFluxPointLikeSourceI3DetSigYieldBuilder( + PointLikeSourceI3DetSigYieldBuilder, + multiproc.IsParallelizable, +): + """This detector signal yield builder constructs a + detector signal yield for a variable flux model with a single parameter, + assuming a point-like source. + It constructs a two-dimensional spline function in sin(dec) and the + parameter, using a :class:`scipy.interpolate.RectBivariateSpline`. + Hence, the detector signal yield can vary with the declination and the + parameter of the flux model. It is tailored to the IceCube detector at the South Pole, where the effective area depends soley on the zenith angle, and hence on the declination, of the source. """ def __init__( - self, gamma_grid, sin_dec_binning=None, spline_order_sinDec=2, - spline_order_gamma=2, ncpu=None): - """Creates a new IceCube detector signal yield implementation - method object for a power law flux model. It requires a sinDec binning - definition to compute the sin(dec) dependency of the detector effective - area, and a gamma parameter grid to compute the gamma dependency of the - detector signal yield. + self, + param_grid, + sin_dec_binning=None, + spline_order_sinDec=2, + spline_order_param=2, + ncpu=None, + **kwargs, + ): + """Creates a new IceCube detector signal yield builder instance for a + flux model with a single parameter. + It requires a sinDec binning definition to compute the sin(dec) + dependency of the detector effective area, and a parameter grid to + compute the parameter dependency of the detector signal yield. Parameters ---------- - gamma_grid : ParameterGrid instance - The ParameterGrid instance which defines the grid of gamma values. - sin_dec_binning : BinningDefinition | None - The BinningDefinition instance which defines the sin(dec) binning. - If set to None, the sin(dec) binning will be taken from the + param_grid : instance of ParameterGrid + The instance of ParameterGrid which defines the grid of the + parameter values. The name of the parameter is defined via the name + property of the ParameterGrid instance. + sin_dec_binning : instance of BinningDefinition | None + The instance of BinningDefinition which defines the sin(dec) + binning. If set to None, the sin(dec) binning will be taken from the dataset's binning definitions. spline_order_sinDec : int The order of the spline function for the logarithmic values of the @@ -504,30 +889,33 @@ def __init__( detector signal yield along the gamma axis. The default is 2. ncpu : int | None - The number of CPUs to utilize. Global setting will take place if - not specified, i.e. set to None. + The number of CPUs to utilize. If set to ``None``, global setting + will take place. """ - super(PowerLawFluxPointLikeSourceI3DetSigYieldImplMethod, self).__init__( - sin_dec_binning, ncpu=ncpu) + super().__init__( + sin_dec_binning, + ncpu=ncpu, + **kwargs) - self.supported_fluxmodels = (PowerLawFlux,) - - self.gamma_grid = gamma_grid + self.param_grid = param_grid self.spline_order_sinDec = spline_order_sinDec - self.spline_order_gamma = spline_order_gamma + self.spline_order_param = spline_order_param @property - def gamma_grid(self): - """The ParameterGrid instance for the gamma grid that should be used for - computing the gamma dependency of the detector signal yield. + def param_grid(self): + """The ParameterGrid instance for the parameter grid that should be used + for computing the parameter dependency of the detector signal yield. """ - return self._gamma_grid - @gamma_grid.setter - def gamma_grid(self, grid): - if(not isinstance(grid, ParameterGrid)): - raise TypeError('The gamma_grid property must be an instance of ' - 'ParameterGrid!') - self._gamma_grid = grid + return self._param_grid + + @param_grid.setter + def param_grid(self, grid): + if not isinstance(grid, ParameterGrid): + raise TypeError( + 'The param_grid property must be an instance of ' + 'ParameterGrid! ' + f'Its current type is {classname(grid)}.') + self._param_grid = grid @property def spline_order_sinDec(self): @@ -535,74 +923,77 @@ def spline_order_sinDec(self): detector signal yield, along the sin(dec) axis. """ return self._spline_order_sinDec + @spline_order_sinDec.setter def spline_order_sinDec(self, order): - if(not isinstance(order, int)): - raise TypeError('The spline_order_sinDec property must be of ' - 'type int!') + if not isinstance(order, int): + raise TypeError( + 'The spline_order_sinDec property must be of type int! ' + f'Its current type is {classname(order)}.') self._spline_order_sinDec = order @property - def spline_order_gamma(self): + def spline_order_param(self): """The order (int) of the logarithmic spline function, that splines the - detector signal yield, along the gamma axis. + detector signal yield, along the parameter axis. """ - return self._spline_order_gamma - @spline_order_gamma.setter - def spline_order_gamma(self, order): - if(not isinstance(order, int)): - raise TypeError('The spline_order_gamma property must be of ' - 'type int!') - self._spline_order_gamma = order - - def _get_signal_fitparam_names(self): - """The list of signal fit parameter names the detector signal yield - depends on. - """ - return ['gamma'] + return self._spline_order_param + + @spline_order_param.setter + def spline_order_param(self, order): + if not isinstance(order, int): + raise TypeError( + 'The spline_order_param property must be of type int! ' + f'Its current type is {classname(order)}.') + self._spline_order_param = order def construct_detsigyield( - self, dataset, data, fluxmodel, livetime, ppbar=None): + self, + dataset, + data, + shg, + ppbar=None, + ): """Constructs a detector signal yield 2-dimensional log spline - function for the given power law flux model with varying gamma values. + function for the given flux model with varying parameter values. Parameters ---------- - dataset : Dataset instance - The Dataset instance holding the sin(dec) binning definition. - data : DatasetData instance - The DatasetData instance holding the monte-carlo event data. + dataset : instance of Dataset + The instance of Dataset holding the sin(dec) binning definition. + data : instance of DatasetData + The instance of DatasetData holding the monte-carlo event data. The numpy record array for the monte-carlo data of the dataset must contain the following data fields: - - 'true_dec' : float + ``'true_dec'`` : float The true declination of the data event. - - 'mcweight' : float + ``'mcweight'`` : float The monte-carlo weight of the data event in the unit GeV cm^2 sr. - - 'true_energy' : float + ``'true_energy'`` : float The true energy value of the data event. - fluxmodel : FluxModel - The flux model instance. Must be an instance of FluxModel. - livetime : float | Livetime instance - The live-time in days or an instance of Livetime to use for the - detector signal yield. - ppbar : ProgressBar instance | None + shg : instance of SourceHypoGroup + The instance of SourceHypoGroup for which the detector signal yield + should get constructed. + ppbar : instance of ProgressBar | None The instance of ProgressBar of the optional parent progress bar. Returns ------- - detsigyield : PowerLawFluxPointLikeSourceI3DetSigYield instance - The DetSigYield instance for a point-like source with a power law - flux with variable gamma parameter. + detsigyield : instance of SingleParamFluxPointLikeSourceI3DetSigYield + The I3DetSigYield instance for a point-like source with a flux model + of a single parameter. """ - # Check for the correct data types of the input arguments. - super(PowerLawFluxPointLikeSourceI3DetSigYieldImplMethod, self).construct_detsigyield( - dataset, data, fluxmodel, livetime) + self.assert_types_of_construct_detsigyield_arguments( + dataset=dataset, + data=data, + shgs=shg, + ppbar=ppbar) # Get integrated live-time in days. - livetime_days = get_integrated_livetime_in_days(livetime) + livetime_days = Livetime.get_integrated_livetime(data.livetime) # Get the sin(dec) binning definition either as setting from this # implementation method, or from the dataset. @@ -610,12 +1001,23 @@ def construct_detsigyield( # Calculate conversion factor from the flux model unit into the internal # flux unit GeV^-1 cm^-2 s^-1. - toGeVcm2s = get_conversion_factor_to_internal_flux_unit(fluxmodel) + to_internal_flux_unit_factor = shg.fluxmodel.get_conversion_factor_to_internal_flux_unit() + + to_internal_time_unit_factor = to_internal_time_unit( + time_unit=units.day + ) # Define a function that creates a detector signal yield histogram # along sin(dec) for a given flux model, i.e. for given spectral index, # gamma. - def hist(data_sin_true_dec, data_true_energy, sin_dec_binning, weights, fluxmodel): + def _create_hist( + data_sin_true_dec, + data_true_energy, + sin_dec_binning, + weights, + fluxmodel, + to_internal_flux_unit_factor, + ): """Creates a histogram of the detector signal yield with the given sin(dec) binning. @@ -633,48 +1035,90 @@ def hist(data_sin_true_dec, data_true_energy, sin_dec_binning, weights, fluxmode detector signal yield. fluxmodel : FluxModel The flux model to get the flux values from. + to_internal_flux_unit_factor : float + The conversion factor to convert the flux unit into the internal + flux unit. Returns ------- h : 1d ndarray The numpy array containing the histogram values. """ - (h, edges) = np.histogram(data_sin_true_dec, - bins = sin_dec_binning.binedges, - weights = weights * fluxmodel(data_true_energy), - density = False) + weights = ( + weights * + fluxmodel(E=data_true_energy).squeeze() * + to_internal_flux_unit_factor + ) + + (h, edges) = np.histogram( + data_sin_true_dec, + bins=sin_dec_binning.binedges, + weights=weights, + density=False) + return h - data_sin_true_dec = np.sin(data.mc["true_dec"]) - weights = data.mc["mcweight"] * toGeVcm2s * livetime_days * 86400. + data_sin_true_dec = np.sin(data.mc['true_dec']) + + # Generate a list of indices that would sort the data according to the + # sin(true_dec) values. We will sort the MC data according to it, + # because the histogram creation is much faster (2x) when the + # to-be-histogrammed values are already sorted. + sorted_idxs = np.argsort(data_sin_true_dec) - # Make a copy of the gamma grid and extend the grid by one bin on each - # side. - gamma_grid = self._gamma_grid.copy() - gamma_grid.add_extra_lower_and_upper_bin() + data_sin_true_dec = np.take(data_sin_true_dec, sorted_idxs) + data_true_energy = np.take(data.mc['true_energy'], sorted_idxs) + + weights = ( + np.take(data.mc['mcweight'], sorted_idxs) * + livetime_days*to_internal_time_unit_factor + ) + + # Make a copy of the parameter grid and extend the grid by one bin on + # each side. + param_grid = self._param_grid.copy() + param_grid.add_extra_lower_and_upper_bin() # Construct the arguments for the hist function to be used in the # multiproc.parallelize function. - args_list = [ ((data_sin_true_dec, - data.mc['true_energy'], - sin_dec_binning, - weights, - fluxmodel.copy({'gamma':gamma})), {}) - for gamma in gamma_grid.grid ] + args_list = [ + ( + ( + data_sin_true_dec, + data_true_energy, + sin_dec_binning, + weights, + shg.fluxmodel.copy({param_grid.name: param_val}), + to_internal_flux_unit_factor, + ), + {} + ) + for param_val in param_grid.grid + ] h = np.vstack( multiproc.parallelize( - hist, args_list, self.ncpu, ppbar=ppbar)).T + _create_hist, args_list, self.ncpu, ppbar=ppbar)).T # Normalize by solid angle of each bin along the sin(dec) axis. - # The solid angle is given by 2*\pi*(\Delta sin(\delta)) + # The solid angle is given by 2*\pi*(\Delta sin(\delta)). h /= (2.*np.pi * np.diff(sin_dec_binning.binedges)).reshape( - (sin_dec_binning.nbins,1)) - - log_spl_sinDec_gamma = scipy.interpolate.RectBivariateSpline( - sin_dec_binning.bincenters, gamma_grid.grid, np.log(h), - kx = self.spline_order_sinDec, ky = self.spline_order_gamma, s = 0) - - detsigyield = PowerLawFluxPointLikeSourceI3DetSigYield( - self, dataset, fluxmodel, livetime, sin_dec_binning, log_spl_sinDec_gamma) + (sin_dec_binning.nbins, 1)) + + # Create the 2D spline. + log_spl_sinDec_param = scipy.interpolate.RectBivariateSpline( + sin_dec_binning.bincenters, + param_grid.grid, + np.log(h), + kx=self.spline_order_sinDec, + ky=self.spline_order_param, + s=0) + + detsigyield = SingleParamFluxPointLikeSourceI3DetSigYield( + param_name=self._param_grid.name, + dataset=dataset, + fluxmodel=shg.fluxmodel, + livetime=data.livetime, + sin_dec_binning=sin_dec_binning, + log_spl_sinDec_param=log_spl_sinDec_param) return detsigyield diff --git a/skyllh/i3/livetime.py b/skyllh/i3/livetime.py index 4881245636..f261db8ace 100644 --- a/skyllh/i3/livetime.py +++ b/skyllh/i3/livetime.py @@ -2,22 +2,63 @@ import numpy as np -from skyllh.core.livetime import Livetime -from skyllh.core import storage -from skyllh.i3.dataset import I3Dataset +from skyllh.core.livetime import ( + Livetime, +) +from skyllh.core.storage import ( + create_FileLoader, +) +from skyllh.i3.dataset import ( + I3Dataset, +) -class I3Livetime(Livetime): + +class I3Livetime( + Livetime): """The I3Livetime class provides the functionality to load a Livetime object from a good-run-list data file. """ + + @classmethod + def from_grl_data(cls, grl_data): + """Creates an I3LiveTime instance from the given good-run-list (GRL) + data. + + Parameters + ---------- + grl_data : instance of numpy structured ndarray. + The numpy structured ndarray of length N_runs holding the start end + end times of the good runs. The following fields need to exist: + + start : float + The MJD of the run start. + end : float + The MJD of the run stop. + + Returns + ------- + livetime : instance of I3Livetime + The created instance of I3Livetime for the provided GRL data. + """ + uptime_mjd_intervals_arr = np.hstack(( + grl_data['start'].reshape((len(grl_data), 1)), + grl_data['stop'].reshape((len(grl_data), 1)) + )) + + livetime = cls( + uptime_mjd_intervals_arr=uptime_mjd_intervals_arr) + + return livetime + @staticmethod - def from_GRL_files(pathfilenames): + def from_grl_files( + pathfilenames): """Loads an I3Livetime instance from the given good-run-list (GRL) data file. The data file needs to contain the following data fields: - - start : float + start : float The MJD of the run start. - - stop : float + stop : float The MJD of the run stop. Parameters @@ -27,17 +68,20 @@ def from_GRL_files(pathfilenames): Returns ------- - livetime : I3Livetime instance - The created I3Livetime instance for the provided GRL data. + livetime : instance of I3Livetime + The created instance of I3Livetime for the provided GRL data. """ - grl_data = storage.create_FileLoader(pathfilenames).load_data() + grl_data = create_FileLoader(pathfilenames).load_data() uptime_mjd_intervals_arr = np.hstack(( - grl_data['start'].reshape((len(grl_data),1)), - grl_data['stop'].reshape((len(grl_data),1)) + grl_data['start'].reshape((len(grl_data), 1)), + grl_data['stop'].reshape((len(grl_data), 1)) )) - return I3Livetime(uptime_mjd_intervals_arr) + livetime = I3Livetime( + uptime_mjd_intervals_arr=uptime_mjd_intervals_arr) + + return livetime @staticmethod def from_I3Dataset(ds): @@ -52,15 +96,28 @@ def from_I3Dataset(ds): Returns ------- - livetime : I3Livetime instance - The created I3Livetime instance for the GRL data from the provided - dataset. + livetime : instance of I3Livetime + The created instance of I3Livetime for the GRL data from the + provided dataset. + """ + if not isinstance(ds, I3Dataset): + raise TypeError( + 'The ds argument must be an instance of I3Dataset!') + if len(ds.grl_pathfilename_list) == 0: + raise ValueError( + 'No GRL files have been defined for the given dataset!') + + livetime = I3Livetime.from_grl_files( + pathfilenames=ds.grl_pathfilename_list) + + return livetime + + def __init__( + self, + *args, + **kwargs): + """Creates a new instance of I3Livetime. """ - if(not isinstance(ds, I3Dataset)): - raise TypeError('The ds argument must be an instance of I3Dataset!') - if(len(ds.grl_pathfilename_list) == 0): - raise ValueError('No GRL files have been defined for the given dataset!') - return I3Livetime.from_GRL_files(ds.grl_pathfilename_list) - - def __init__(self, uptime_mjd_intervals_arr): - super(I3Livetime, self).__init__(uptime_mjd_intervals_arr) + super().__init__( + *args, + **kwargs) diff --git a/skyllh/i3/pdf.py b/skyllh/i3/pdf.py index 3fe6804a4f..447bb4e6e1 100644 --- a/skyllh/i3/pdf.py +++ b/skyllh/i3/pdf.py @@ -2,19 +2,31 @@ import numpy as np -from skyllh.core.binning import UsesBinning +from skyllh.core.binning import ( + UsesBinning, +) +from skyllh.core.debugging import ( + get_logger, +) from skyllh.core.pdf import ( EnergyPDF, - PDFAxis + PDFAxis, +) +from skyllh.core.py import ( + classname, ) from skyllh.core.smoothing import ( UNSMOOTH_AXIS, SmoothingFilter, HistSmoothingMethod, NoHistSmoothingMethod, - NeighboringBinHistSmoothingMethod + NeighboringBinHistSmoothingMethod, +) +from skyllh.core.timing import ( + TaskTimer, ) -from skyllh.core.timing import TaskTimer + +logger = get_logger(__name__) class I3EnergyPDF(EnergyPDF, UsesBinning): @@ -25,15 +37,27 @@ class I3EnergyPDF(EnergyPDF, UsesBinning): The IceCube energy PDF is modeled as a 1d histogram in energy, but for different sin(declination) bins, hence, stored as a 2d histogram. """ - def __init__(self, data_logE, data_sinDec, data_mcweight, data_physicsweight, - logE_binning, sinDec_binning, smoothing_filter): + def __init__( + self, + pmm, + data_log10_energy, + data_sin_dec, + data_mcweight, + data_physicsweight, + log10_energy_binning, + sin_dec_binning, + smoothing_filter): """Creates a new IceCube energy PDF object. Parameters ---------- - data_logE : 1d ndarray + pmm : instance of ParameterModelMapper | None + The instance of ParameterModelMapper defining the global parameters + and their mapping to local model/source parameters. + It can be ``None``, if the PDF does not depend on any parameters. + data_log10_energy : 1d ndarray The array holding the log10(E) values of the events. - data_sinDec : 1d ndarray + data_sin_dec : 1d ndarray The array holding the sin(dec) values of the events. data_mcweight : 1d ndarray The array holding the monte-carlo weights of the events. @@ -43,35 +67,40 @@ def __init__(self, data_logE, data_sinDec, data_mcweight, data_physicsweight, The array holding the physics weights of the events. The final data weight will be the product of data_mcweight and data_physicsweight. - logE_binning : BinningDefinition - The binning definition for the log(E) axis. - sinDec_binning : BinningDefinition + log10_energy_binning : instance of BinningDefinition + The binning definition for the log10(E) axis. + sin_dec_binning : instance of BinningDefinition The binning definition for the sin(declination) axis. - smoothing_filter : SmoothingFilter instance | None + smoothing_filter : instance of SmoothingFilter | None The smoothing filter to use for smoothing the energy histogram. - If None, no smoothing will be applied. + If ``None``, no smoothing will be applied. """ - super(I3EnergyPDF, self).__init__() - - #self.logger = logging.getLogger(__name__) + super().__init__( + pmm=pmm) # Define the PDF axes. - self.add_axis(PDFAxis(name='log_energy', - vmin=logE_binning.lower_edge, - vmax=logE_binning.upper_edge)) - self.add_axis(PDFAxis(name='sin_dec', - vmin=sinDec_binning.lower_edge, - vmax=sinDec_binning.upper_edge)) - - self.add_binning(logE_binning, 'log_energy') - self.add_binning(sinDec_binning, 'sin_dec') + self.add_axis( + PDFAxis( + name='log_energy', + vmin=log10_energy_binning.lower_edge, + vmax=log10_energy_binning.upper_edge)) + self.add_axis( + PDFAxis( + name='sin_dec', + vmin=sin_dec_binning.lower_edge, + vmax=sin_dec_binning.upper_edge)) + + self.add_binning(log10_energy_binning, 'log_energy') + self.add_binning(sin_dec_binning, 'sin_dec') # Create the smoothing method instance tailored to the energy PDF. - # We will smooth only the first axis (logE). - if((smoothing_filter is not None) and - (not isinstance(smoothing_filter, SmoothingFilter))): - raise TypeError('The smoothing_filter argument must be None or an instance of SmoothingFilter!') - if(smoothing_filter is None): + # We will smooth only the first axis (log10(E)). + if (smoothing_filter is not None) and\ + (not isinstance(smoothing_filter, SmoothingFilter)): + raise TypeError( + 'The smoothing_filter argument must be None or an instance of ' + f'SmoothingFilter! It is of type {classname(smoothing_filter)}') + if smoothing_filter is None: self.hist_smoothing_method = NoHistSmoothingMethod() else: self.hist_smoothing_method = NeighboringBinHistSmoothingMethod( @@ -83,10 +112,12 @@ def __init__(self, data_logE, data_sinDec, data_mcweight, data_physicsweight, # Create a 2D histogram with only the MC events to determine the MC # coverage. - (h, bins_logE, bins_sinDec) = np.histogram2d(data_logE, data_sinDec, - bins = [logE_binning.binedges, sinDec_binning.binedges], - range = [logE_binning.range, sinDec_binning.range], - normed = False) + (h, bins_logE, bins_sinDec) = np.histogram2d( + data_log10_energy, + data_sin_dec, + bins=[log10_energy_binning.binedges, sin_dec_binning.binedges], + range=[log10_energy_binning.range, sin_dec_binning.range], + density=False) h = self._hist_smoothing_method.smooth(h) self._hist_mask_mc_covered = h > 0 @@ -97,10 +128,12 @@ def __init__(self, data_logE, data_sinDec, data_mcweight, data_physicsweight, # Create a 2D histogram with only the MC events that have zero physics # contribution. Note: By construction the zero physics contribution bins # are a subset of the MC covered bins. - (h, bins_logE, bins_sinDec) = np.histogram2d(data_logE[mask], data_sinDec[mask], - bins = [logE_binning.binedges, sinDec_binning.binedges], - range = [logE_binning.range, sinDec_binning.range], - normed = False) + (h, bins_logE, bins_sinDec) = np.histogram2d( + data_log10_energy[mask], + data_sin_dec[mask], + bins=[log10_energy_binning.binedges, sin_dec_binning.binedges], + range=[log10_energy_binning.range, sin_dec_binning.range], + density=False) h = self._hist_smoothing_method.smooth(h) self._hist_mask_mc_covered_zero_physics = h > 0 @@ -108,21 +141,24 @@ def __init__(self, data_logE, data_sinDec, data_mcweight, data_physicsweight, # contribution. We will do the normalization along the logE # axis manually. data_weights = data_mcweight[~mask] * data_physicsweight[~mask] - (h, bins_logE, bins_sinDec) = np.histogram2d(data_logE[~mask], data_sinDec[~mask], - bins = [logE_binning.binedges, sinDec_binning.binedges], - weights = data_weights, - range = [logE_binning.range, sinDec_binning.range], - normed = False) + (h, bins_logE, bins_sinDec) = np.histogram2d( + data_log10_energy[~mask], + data_sin_dec[~mask], + bins=[log10_energy_binning.binedges, sin_dec_binning.binedges], + weights=data_weights, + range=[log10_energy_binning.range, sin_dec_binning.range], + density=False) # Calculate the normalization for each logE bin. Hence we need to sum # over the logE bins (axis 0) for each sin(dec) bin and need to divide # by the logE bin widths along the sin(dec) bins. The result array norm # is a 2D array of the same shape as h. - norms = np.sum(h, axis=(0,))[np.newaxis,...] * np.diff(logE_binning.binedges)[...,np.newaxis] + norms = np.sum(h, axis=(0,))[np.newaxis, ...] *\ + np.diff(log10_energy_binning.binedges)[..., np.newaxis] h /= norms h = self._hist_smoothing_method.smooth(h) - self._hist_logE_sinDec = h + self._hist_log10_energy_sin_dec = h @property def hist_smoothing_method(self): @@ -130,17 +166,20 @@ def hist_smoothing_method(self): energy PDF histogram. """ return self._hist_smoothing_method + @hist_smoothing_method.setter def hist_smoothing_method(self, method): - if(not isinstance(method, HistSmoothingMethod)): - raise TypeError('The hist_smoothing_method property must be an instance of HistSmoothingMethod!') + if not isinstance(method, HistSmoothingMethod): + raise TypeError( + 'The hist_smoothing_method property must be an instance of ' + f'HistSmoothingMethod! It is of type {classname(method)}') self._hist_smoothing_method = method @property def hist(self): """(read-only) The 2D logE-sinDec histogram array. """ - return self._hist_logE_sinDec + return self._hist_log10_energy_sin_dec @property def hist_mask_mc_covered(self): @@ -163,57 +202,80 @@ def hist_mask_mc_covered_with_physics(self): bins for which there is monte-carlo coverage and has physics contribution. """ - return self._hist_mask_mc_covered & ~self._hist_mask_mc_covered_zero_physics - - def assert_is_valid_for_exp_data(self, data_exp): - """Checks if this energy PDF is valid for all the given experimental - data. - It checks if all the data is within the logE and sin(dec) binning range. + mask = ( + self._hist_mask_mc_covered & ~self._hist_mask_mc_covered_zero_physics + ) + return mask + + def assert_is_valid_for_trial_data( + self, + tdm, + tl=None, + **kwargs): + """Checks if this energy PDF is valid for all the given trial events. + It checks if all the data is within the log10(E) and sin(dec) binning + range. Parameters ---------- - data_exp : numpy record ndarray - The array holding the experimental data. The following data fields - must exist: + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial data events. + The following data fields must exist: - - 'log_energy' : float - The logarithm of the energy value of the data event. - - 'dec' : float + log_energy : float + The base-10 logarithm of the energy value of the data event. + dec : float The declination of the data event. + tl : instance of TimeLord | None + The optional instance of TimeLord for measuring timing information. + Raises ------ ValueError - If some of the data is outside the logE or sin(dec) binning range. + If some of the data is outside the log10(E) or sin(dec) binning + range. """ - logE_binning = self.get_binning('log_energy') - sinDec_binning = self.get_binning('sin_dec') - - exp_logE = data_exp['log_energy'] - exp_sinDec = np.sin(data_exp['dec']) - - # Check if all the data is within the binning range. - #if(logE_binning.any_data_out_of_binning_range(exp_logE)): - #self.logger.warning('Some data is outside the logE range (%.3f, %.3f)', logE_binning.lower_edge, logE_binning.upper_edge) - #if(sinDec_binning.any_data_out_of_binning_range(exp_sinDec)): - #self.logger.warning('Some data is outside the sin(dec) range (%.3f, %.3f)', sinDec_binning.lower_edge, sinDec_binning.upper_edge) - - def get_prob(self, tdm, fitparams=None, tl=None): - """Calculates the energy probability (in logE) of each event. + log10_energy_binning = self.get_binning('log_energy') + sin_dec_binning = self.get_binning('sin_dec') + + data_log10_energy = tdm['log_energy'] + data_sin_dec = np.sin(tdm['dec']) + + if log10_energy_binning.any_data_out_of_range(data_log10_energy): + oor_data = log10_energy_binning.get_out_of_range_data( + data_log10_energy) + raise ValueError( + 'Some data is outside the log10(E) range ' + f'({log10_energy_binning.lower_edge:.3f},' + f' {log10_energy_binning.upper_edge:.3f})! ' + f'The following data values are out of range: {oor_data}') + + if sin_dec_binning.any_data_out_of_range(data_sin_dec): + oor_data = sin_dec_binning.get_out_of_range_data( + data_sin_dec) + raise ValueError( + 'Some data is outside the sin(dec) range ' + f'({sin_dec_binning.lower_edge:.3f},' + f' {sin_dec_binning.upper_edge:.3f})! ' + f'The following data values are out of range: {oor_data}') + + def get_pd(self, tdm, params_recarray=None, tl=None): + """Calculates the energy probability density of each event. Parameters ---------- tdm : instance of TrialDataManager The TrialDataManager instance holding the data events for which the - probability should be calculated for. The following data fields must - exist: + probability density should be calculated. + The following data fields must exist: - - 'log_energy' : float - The logarithm of the energy value of the event. - - 'sin_dec' : float + log_energy : float + The base-10 logarithm of the energy value of the event. + sin_dec : float The sin(declination) value of the event. - fitparams : None + params_recarray : None Unused interface parameter. tl : TimeLord instance | None The optional TimeLord instance that should be used to measure @@ -221,20 +283,25 @@ def get_prob(self, tdm, fitparams=None, tl=None): Returns ------- - prob : 1D (N_events,) shaped ndarray - The array with the energy probability for each event. + pd : instance of ndarray + The 1D (N_events,)-shaped numpy ndarray with the energy probability + density for each event. + grads : dict + The dictionary holding the gradients of the probability density + w.r.t. each fit parameter. The key of the dictionary is the id + of the global fit parameter. Because this energy PDF does not depend + on any fit parameters, an empty dictionary is returned. """ - get_data = tdm.get_data - - logE_binning = self.get_binning('log_energy') - sinDec_binning = self.get_binning('sin_dec') + log10_energy_binning = self.get_binning('log_energy') + sin_dec_binning = self.get_binning('sin_dec') - logE_idx = np.digitize( - get_data('log_energy'), logE_binning.binedges) - 1 - sinDec_idx = np.digitize( - get_data('sin_dec'), sinDec_binning.binedges) - 1 + log10_energy_idx = np.digitize( + tdm['log_energy'], log10_energy_binning.binedges) - 1 + sin_dec_idx = np.digitize( + tdm['sin_dec'], sin_dec_binning.binedges) - 1 - with TaskTimer(tl, 'Evaluating logE-sinDec histogram.'): - prob = self._hist_logE_sinDec[(logE_idx,sinDec_idx)] + with TaskTimer(tl, 'Evaluating log10_energy-sin_dec histogram.'): + pd = self._hist_log10_energy_sin_dec[ + (log10_energy_idx, sin_dec_idx)] - return prob + return (pd, dict()) diff --git a/skyllh/i3/pdfratio.py b/skyllh/i3/pdfratio.py index 3c338e8524..57d67b6c4c 100644 --- a/skyllh/i3/pdfratio.py +++ b/skyllh/i3/pdfratio.py @@ -1,37 +1,55 @@ # -*- coding: utf-8 -*- -import abc import numpy as np +from numpy.lib.recfunctions import repack_fields import scipy.interpolate -from skyllh.core.parameters import make_params_hash -from skyllh.core.multiproc import IsParallelizable, parallelize -from skyllh.core.pdfratio import SigSetOverBkgPDFRatio, PDFRatioFillMethod, MostSignalLikePDFRatioFillMethod - -from skyllh.i3.pdf import I3EnergyPDF - - -class I3EnergySigSetOverBkgPDFRatioSpline(SigSetOverBkgPDFRatio, IsParallelizable): - """This class implements a signal over background PDF ratio spline for - I3EnergyPDF enegry PDFs. It takes an object, which is derived from PDFSet - for I3EnergyPDF PDF types, and which is derived from IsSignalPDF, as signal - PDF. Furthermore, it takes an object, which is derived from I3EnergyPDF and - IsBackgroundPDF, as background PDF, and creates a spline for the ratio of - the signal and background PDFs for a grid of different discrete energy - signal fit parameters, which are defined by the signal PDF set. +from skyllh.core.py import ( + make_dict_hash, +) +from skyllh.core.multiproc import ( + IsParallelizable, + parallelize, +) +from skyllh.core.pdfratio import ( + SigSetOverBkgPDFRatio, +) +from skyllh.core.pdfratio_fill import ( + MostSignalLikePDFRatioFillMethod, + PDFRatioFillMethod, +) + + +class SplinedI3EnergySigSetOverBkgPDFRatio( + SigSetOverBkgPDFRatio, + IsParallelizable): + """This class implements a splined signal over background PDF ratio for + enegry PDFs of type I3EnergyPDF. + It takes an instance, which is derived from PDFSet, and which is derived + from IsSignalPDF, as signal PDF. Furthermore, it takes an instance, which + is derived from I3EnergyPDF and IsBackgroundPDF, as background PDF, and + creates a spline for the ratio of the signal and background PDFs for a grid + of different discrete energy signal parameters, which are defined by the + signal PDF set. """ def __init__( - self, signalpdfset, backgroundpdf, - fillmethod=None, interpolmethod=None, ncpu=None, ppbar=None): - """Creates a new IceCube signal-over-background energy PDF ratio object. + self, + sig_pdf_set, + bkg_pdf, + fillmethod=None, + interpolmethod_cls=None, + ncpu=None, + ppbar=None): + """Creates a new IceCube signal-over-background energy PDF ratio spline + instance. Parameters ---------- - signalpdfset : class instance derived from PDFSet (for PDF type + sig_pdf_set : class instance derived from PDFSet (for PDF type I3EnergyPDF), IsSignalPDF, and UsesBinning The PDF set, which provides signal energy PDFs for a set of discrete signal parameters. - backgroundpdf : class instance derived from I3EnergyPDF, and + bkg_pdf : class instance derived from I3EnergyPDF, and IsBackgroundPDF The background energy PDF object. fillmethod : instance of PDFRatioFillMethod | None @@ -39,8 +57,8 @@ def __init__( the desired ratio fill method. If set to None (default), the default ratio fill method MostSignalLikePDFRatioFillMethod will be used. - interpolmethod : class of GridManifoldInterpolationMethod - The class implementing the fit parameter interpolation method for + interpolmethod_cls : class of GridManifoldInterpolationMethod + The class implementing the parameter interpolation method for the PDF ratio manifold grid. ncpu : int | None The number of CPUs to use to create the ratio splines for the @@ -53,52 +71,63 @@ def __init__( ValueError If the signal and background PDFs use different binning. """ - super(I3EnergySigSetOverBkgPDFRatioSpline, self).__init__( - pdf_type=I3EnergyPDF, - signalpdfset=signalpdfset, backgroundpdf=backgroundpdf, - interpolmethod=interpolmethod, + super().__init__( + sig_pdf_set=sig_pdf_set, + bkg_pdf=bkg_pdf, + interpolmethod_cls=interpolmethod_cls, ncpu=ncpu) # Define the default ratio fill method. - if(fillmethod is None): + if fillmethod is None: fillmethod = MostSignalLikePDFRatioFillMethod() self.fillmethod = fillmethod # Ensure same binning of signal and background PDFs. - for (sigpdf_hash, sigpdf) in self.signalpdfset.items(): - if(not sigpdf.has_same_binning_as(self.backgroundpdf)): - raise ValueError('At least one signal PDF does not have the same binning as the background PDF!') - - def create_log_ratio_spline(sigpdfset, bkgpdf, fillmethod, gridfitparams): + for sig_pdf in self._sig_pdf_set.values(): + if not sig_pdf.has_same_binning_as(self._bkg_pdf): + raise ValueError( + 'At least one signal PDF does not have the same binning ' + 'as the background PDF!') + + def create_log_ratio_spline( + sig_pdf_set, + bkg_pdf, + fillmethod, + gridparams): """Creates the signal/background ratio spline for the given signal parameters. Returns ------- - log_ratio_spline : RegularGridInterpolator + log_ratio_spline : instance of RegularGridInterpolator The spline of the logarithmic PDF ratio values. """ # Get the signal PDF for the given signal parameters. - sigpdf = sigpdfset.get_pdf(gridfitparams) + sig_pdf = sig_pdf_set[gridparams] # Create the ratio array with the same shape than the background pdf # histogram. - ratio = np.ones_like(bkgpdf.hist, dtype=np.float64) + ratio = np.ones_like(bkg_pdf.hist, dtype=np.float64) # Fill the ratio array. - ratio = fillmethod.fill_ratios(ratio, - sigpdf.hist, bkgpdf.hist, - sigpdf.hist_mask_mc_covered, sigpdf.hist_mask_mc_covered_zero_physics, - bkgpdf.hist_mask_mc_covered, bkgpdf.hist_mask_mc_covered_zero_physics) + ratio = fillmethod( + ratio, + sig_pdf.hist, + bkg_pdf.hist, + sig_pdf.hist_mask_mc_covered, + sig_pdf.hist_mask_mc_covered_zero_physics, + bkg_pdf.hist_mask_mc_covered, + bkg_pdf.hist_mask_mc_covered_zero_physics) # Define the grid points for the spline. In general, we use the bin # centers of the binning, but for the first and last point of each # dimension we use the lower and upper bin edge, respectively, to # ensure full coverage of the spline across the binning range. points_list = [] - for binning in sigpdf.binnings: + for binning in sig_pdf.binnings: points = binning.bincenters - (points[0], points[-1]) = (binning.lower_edge, binning.upper_edge) + (points[0], points[-1]) = ( + binning.lower_edge, binning.upper_edge) points_list.append(points) # Create the spline for the ratio values. @@ -111,36 +140,58 @@ def create_log_ratio_spline(sigpdfset, bkgpdf, fillmethod, gridfitparams): return log_ratio_spline - # Get the list of fit parameter permutations on the grid for which we - # need to create PDF ratio arrays. - gridfitparams_list = self.signalpdfset.gridfitparams_list + # Get the list of parameter permutations on the grid for which we + # need to create PDF ratio splines. + gridparams_list = self._sig_pdf_set.gridparams_list - args_list = [ ((signalpdfset, backgroundpdf, self.fillmethod, gridfitparams),{}) - for gridfitparams in gridfitparams_list ] + args_list = [ + ((self._sig_pdf_set, + self._bkg_pdf, + self._fillmethod, + gridparams), + {}) + for gridparams in gridparams_list + ] log_ratio_spline_list = parallelize( - create_log_ratio_spline, args_list, self.ncpu, ppbar=ppbar) + func=create_log_ratio_spline, + args_list=args_list, + ncpu=self.ncpu, + ppbar=ppbar) # Save all the log_ratio splines in a dictionary. - self._gridfitparams_hash_log_ratio_spline_dict = dict() - for (gridfitparams, log_ratio_spline) in zip(gridfitparams_list, log_ratio_spline_list): - gridfitparams_hash = make_params_hash(gridfitparams) - self._gridfitparams_hash_log_ratio_spline_dict[gridfitparams_hash] = log_ratio_spline + self._gridparams_hash_log_ratio_spline_dict = dict() + for (gridparams, log_ratio_spline) in zip(gridparams_list, + log_ratio_spline_list): + gridparams_hash = make_dict_hash(gridparams) + self._gridparams_hash_log_ratio_spline_dict[gridparams_hash] =\ + log_ratio_spline # Save the list of data field names. - self._data_field_names = [ binning.name - for binning in self.backgroundpdf.binnings ] + self._data_field_names = [ + binning.name + for binning in self._bkg_pdf.binnings + ] - # Construct the instance for the fit parameter interpolation method. - self._interpolmethod_instance = self.interpolmethod(self._get_spline_value, signalpdfset.fitparams_grid_set) + # Construct the instance for the parameter interpolation method. + self._interpolmethod = self._interpolmethod_cls( + func=self._evaluate_splines, + param_grid_set=sig_pdf_set.param_grid_set) - # Create cache variables for the last ratio value and gradients in order + # Save the parameter names needed for the interpolation for later usage. + self._interpol_param_names = \ + self._sig_pdf_set.param_grid_set.params_name_list + + # Create cache variable for the last ratio values and gradients in order # to avoid the recalculation of the ratio value when the # ``get_gradient`` method is called (usually after the ``get_ratio`` # method was called). - self._cache_fitparams_hash = None - self._cache_ratio = None - self._cache_gradients = None + self._cache = self._create_cache( + trial_data_state_id=None, + interpol_params_recarray=None, + ratio=None, + grads=None + ) @property def fillmethod(self): @@ -148,59 +199,228 @@ def fillmethod(self): PDF ratio bins. """ return self._fillmethod + @fillmethod.setter def fillmethod(self, obj): - if(not isinstance(obj, PDFRatioFillMethod)): - raise TypeError('The fillmethod property must be an instance of PDFRatioFillMethod!') + if not isinstance(obj, PDFRatioFillMethod): + raise TypeError( + 'The fillmethod property must be an instance of ' + 'PDFRatioFillMethod!') self._fillmethod = obj - def _get_spline_value(self, tdm, gridfitparams, eventdata): - """Selects the spline object for the given fit parameter grid point and - evaluates the spline for all the given events. + def _create_cache( + self, + trial_data_state_id, + interpol_params_recarray, + ratio, + grads): + """Creates a cache dictionary holding cache data. + + Parameters + ---------- + trial_data_state_id : int | None + The trial data state ID of the TrialDataManager. + interpol_params_recarray : instance of numpy record ndarray | None + The numpy record ndarray of length N_sources holding the parameter + names and values necessary for the interpolation for all sources. + ratio : instance of numpy ndarray + The (N_values,)-shaped numpy ndarray holding the PDF ratio values + for all sources and trial events. + grads : instance of numpy ndarray + The (D,N_values)-shaped numpy ndarray holding the gradients for each + PDF ratio value w.r.t. each interpolation parameter. + """ + cache = { + 'trial_data_state_id': trial_data_state_id, + 'interpol_params_recarray': interpol_params_recarray, + 'ratio': ratio, + 'grads': grads + } + + return cache + + def _is_cached(self, trial_data_state_id, interpol_params_recarray): + """Checks if the ratio and gradients for the given set of interpolation + parameters are already cached. """ - # Get the spline object for the given fit parameter grid values. - gridfitparams_hash = make_params_hash(gridfitparams) - spline = self._gridfitparams_hash_log_ratio_spline_dict[gridfitparams_hash] + if self._cache['trial_data_state_id'] is None: + return False + + if self._cache['trial_data_state_id'] != trial_data_state_id: + return False + + if not np.all( + self._cache['interpol_params_recarray'] == + interpol_params_recarray): + return False - # Evaluate the spline. - value = spline(eventdata) + return True - return value + def _get_spline_for_param_values(self, interpol_param_values): + """Retrieves the spline for a given set of parameter values. - def _is_cached(self, tdm, fitparams_hash): - """Checks if the ratio and gradients for the given set of fit parameters - are already cached. + Parameters + ---------- + interpol_param_values : instance of numpy ndarray + The (N_interpol_params,)-shaped numpy ndarray holding the values of + the interpolation parameters. + + Returns + ------- + spline : instance of scipy.interpolate.RegularGridInterpolator + The requested spline instance. """ - if((self._cache_fitparams_hash == fitparams_hash) and - (len(self._cache_ratio) == tdm.n_selected_events) - ): - return True - return False - - def _calculate_ratio_and_gradients(self, tdm, fitparams, fitparams_hash): - """Calculates the ratio values and ratio gradients for all the events - given the fit parameters. It caches the results. + gridparams = dict( + zip(self._interpol_param_names, interpol_param_values)) + gridparams_hash = make_dict_hash(gridparams) + + spline = self._gridparams_hash_log_ratio_spline_dict[gridparams_hash] + + return spline + + def _evaluate_splines( + self, + tdm, + eventdata, + gridparams_recarray, + n_values): + """For each set of parameter values given by ``gridparams_recarray``, + the spline is retrieved and evaluated for the events suitable for that + source model. + + Parameters + ---------- + tdm : instance of TrialDataManager + The TrialDataManager instance holding the trial data and the event + mapping to the sources via the ``src_evt_idx`` property. + eventdata : instance of numpy ndarray + The (N_events,V)-shaped numpy ndarray holding the event data, where + N_events is the number of events, and V the dimensionality of the + event data. + gridparams_recarray : instance of numpy structured ndarray + The numpy structured ndarray of length N_sources with the parameter + names and values needed for the interpolation on the grid for all + sources. If the length of this record array is 1, the set of + parameters will be used for all sources. + n_values : int + The size of the output array. + + Returns + ------- + values : instance of ndarray + The (N_values,)-shaped numpy ndarray holding the values for each set + of parameter values of the ``gridparams_recarray``. The length of + the array depends on the ``src_evt_idx`` property of the + TrialDataManager. In the worst case it is + ``N_sources * N_selected_events``. """ - get_data = tdm.get_data + (src_idxs, evt_idxs) = tdm.src_evt_idxs + + # Check for special case when a single set of parameters are provided. + if len(gridparams_recarray) == 1: + # We got a single parameter set. We will use it for all sources. + spline = self._get_spline_for_param_values(gridparams_recarray[0]) + + eventdata = np.take(eventdata, evt_idxs, axis=0) + values = spline(eventdata) + + return values + + values = np.empty(n_values, dtype=np.float64) + + v_start = 0 + for (sidx, param_values) in enumerate(gridparams_recarray): + spline = self._get_spline_for_param_values(param_values) + # Select the eventdata that belongs to the current source. + m = src_idxs == sidx + src_eventdata = np.take(eventdata, evt_idxs[m], axis=0) + + n = src_eventdata.shape[0] + sl = slice(v_start, v_start+n) + values[sl] = spline(src_eventdata) + + v_start += n + + return values + + def _create_interpol_params_recarray(self, src_params_recarray): + """Creates the params_recarray needed for the interpolation. It selects + The interpolation parameters from the ``params_recarray`` argument. + If all parameters have the same value for all sources, the length will + be 1. + + Parameters + ---------- + src_params_recarray : instance of numpy record ndarray + The numpy record ndarray of length N_sources holding all local + parameter names and values. + + Returns + ------- + interpol_params_recarray : instance of numpy record ndarray + The numpy record ndarray of length N_sources or 1 holding only the + parameters needed for the interpolation. + """ + interpol_params_recarray = repack_fields( + src_params_recarray[self._interpol_param_names]) + + all_params_are_equal_for_all_sources = True + for pname in self._interpol_param_names: + if not np.all( + np.isclose(np.diff(interpol_params_recarray[pname]), 0)): + all_params_are_equal_for_all_sources = False + break + if all_params_are_equal_for_all_sources: + return interpol_params_recarray[:1] + + return interpol_params_recarray + + def _calculate_ratio_and_grads( + self, + tdm, + interpol_params_recarray): + """Calculates the ratio values and ratio gradients for all the sources + and trial events given the source parameter values. + The result is stored in the class member variable ``_cache``. + + Parameters + ---------- + tdm : instance of TrialDataManager + The TrialDataManager instance holding the trial data. + interpol_params_recarray : instance of numpy record ndarray + The numpy record ndarray of length N_sources holding the parameter + names and values for all sources. + It must contain only the parameters necessary for the interpolation. + """ # Create a 2D event data array holding only the needed event data fields # for the PDF ratio spline evaluation. - eventdata = np.vstack([get_data(fn) for fn in self._data_field_names]).T + eventdata = np.vstack([tdm[fn] for fn in self._data_field_names]).T + + (ratio, grads) = self._interpolmethod( + tdm=tdm, + eventdata=eventdata, + params_recarray=interpol_params_recarray) - (ratio, gradients) = self._interpolmethod_instance.get_value_and_gradients( - tdm, eventdata, fitparams) # The interpolation works on the logarithm of the ratio spline, hence # we need to transform it using the exp function, and we need to account # for the exp function in the gradients. ratio = np.exp(ratio) - gradients = ratio * gradients + grads = ratio * grads # Cache the value and the gradients. - self._cache_fitparams_hash = fitparams_hash - self._cache_ratio = ratio - self._cache_gradients = gradients - - def get_ratio(self, tdm, fitparams, tl=None): + self._cache = self._create_cache( + trial_data_state_id=tdm.trial_data_state_id, + interpol_params_recarray=interpol_params_recarray, + ratio=ratio, + grads=grads + ) + + def get_ratio( + self, + tdm, + src_params_recarray, + tl=None): """Retrieves the PDF ratio values for each given trial event data, given the given set of fit parameters. This method is called during the likelihood maximization process. @@ -212,51 +432,107 @@ def get_ratio(self, tdm, fitparams, tl=None): tdm : instance of TrialDataManager The TrialDataManager instance holding the trial event data for which the PDF ratio values should get calculated. - fitparams : dict - The dictionary with the fit parameter values. - tl : TimeLord instance | None + src_params_recarray : instance of numpy record ndarray | None + The (N_sources,)-shaped numpy record ndarray holding the parameter + names and values of the sources. See the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + for more information. + tl : instance of TimeLord | None The optional TimeLord instance that should be used to measure timing information. Returns ------- - ratio : 1d ndarray of float - The PDF ratio value for each given event. + ratio : instance of numpy ndarray + The (N_values,)-shaped numpy ndarray of float holding the PDF ratio + value for each source and trial event. """ - fitparams_hash = make_params_hash(fitparams) - - # Check if the ratio value is already cached. - if(self._is_cached(tdm, fitparams_hash)): - return self._cache_ratio - - self._calculate_ratio_and_gradients(tdm, fitparams, fitparams_hash) - - return self._cache_ratio - - def get_gradient(self, tdm, fitparams, fitparam_name): - """Retrieves the PDF ratio gradient for the pidx'th fit parameter. + # Select only the parameters necessary for the interpolation. + interpol_params_recarray = self._create_interpol_params_recarray( + src_params_recarray) + + # Check if the ratio values are already cached. + if self._is_cached( + trial_data_state_id=tdm.trial_data_state_id, + interpol_params_recarray=interpol_params_recarray): + return self._cache['ratio'] + + self._calculate_ratio_and_grads( + tdm=tdm, + interpol_params_recarray=interpol_params_recarray) + + return self._cache['ratio'] + + def get_gradient( + self, + tdm, + src_params_recarray, + fitparam_id, + tl=None): + """Retrieves the PDF ratio gradient for the given fit parameter + ``fitparam_id``. Parameters ---------- tdm : instance of TrialDataManager The TrialDataManager instance holding the trial event data for which the PDF ratio gradient values should get calculated. - fitparams : dict - The dictionary with the fit parameter values. - fitparam_name : str - The name of the fit parameter for which the gradient should get - calculated. - """ - fitparams_hash = make_params_hash(fitparams) - - # Convert the fit parameter name into the local fit parameter index. - pidx = self.convert_signal_fitparam_name_into_index(fitparam_name) - - # Check if the gradients have been calculated already. - if(self._is_cached(tdm, fitparams_hash)): - return self._cache_gradients[pidx] - - # The gradients have not been calculated yet. - self._calculate_ratio_and_gradients(tdm, fitparams, fitparams_hash) + src_params_recarray : instance of numpy record ndarray | None + The (N_sources,)-shaped numpy record ndarray holding the local + parameter names and values of all sources. See the + :meth:`skyllh.core.parameters.ParameterModelMapper.create_src_params_recarray` + method for more information. + fitparam_id : int + The ID of the global fit parameter for which the gradient should + get calculated. + tl : instance of TimeLord | None + The optional TimeLord instance that should be used to measure + timing information. - return self._cache_gradients[pidx] + Returns + ------- + grad : instance of ndarray + The (N_values,)-shaped numpy ndarray holding the gradient values + for all sources and trial events w.r.t. the given global fit + parameter. + """ + # Select only the parameters necessary for the interpolation. + interpol_params_recarray = self._create_interpol_params_recarray( + src_params_recarray) + + # Calculate the gradients if necessary. + if not self._is_cached( + trial_data_state_id=tdm.trial_data_state_id, + interpol_params_recarray=interpol_params_recarray + ): + self._calculate_ratio_and_grads( + tdm=tdm, + interpol_params_recarray=interpol_params_recarray) + + tdm_n_sources = tdm.n_sources + + grad = np.zeros((tdm.get_n_values(),), dtype=np.float64) + + # Loop through the parameters of the signal PDF set and match them with + # the global fit parameter. + for (pidx, pname) in enumerate( + self._sig_pdf_set.param_grid_set.params_name_list): + if pname not in src_params_recarray.dtype.fields: + continue + p_gpidxs = src_params_recarray[f'{pname}:gpidx'] + src_mask = p_gpidxs == (fitparam_id + 1) + n_sources = np.count_nonzero(src_mask) + if n_sources == 0: + continue + if n_sources == tdm_n_sources: + # This parameter applies to all sources, hence to all values, + # and hence it's the only local parameter contributing to the + # global parameter fitparam_id. + return self._cache['grads'][pidx] + + # The current parameter does not apply to all sources. + # Create a values mask that matches a given source mask. + values_mask = tdm.get_values_mask_for_source_mask(src_mask) + grad[values_mask] = self._cache['grads'][pidx][values_mask] + + return grad diff --git a/skyllh/i3/scrambling.py b/skyllh/i3/scrambling.py index f844f1afa3..d6224becb4 100644 --- a/skyllh/i3/scrambling.py +++ b/skyllh/i3/scrambling.py @@ -6,7 +6,7 @@ DataScramblingMethod, TimeScramblingMethod, ) -from skyllh.i3.coords import ( +from skyllh.i3.utils.coords import ( azi_to_ra_transform, hor_to_equ_transform, ) diff --git a/skyllh/i3/signal_generation.py b/skyllh/i3/signal_generation.py index afa4e5c8f6..afc89280cf 100644 --- a/skyllh/i3/signal_generation.py +++ b/skyllh/i3/signal_generation.py @@ -4,12 +4,18 @@ from skyllh.core.py import ( get_smallest_numpy_int_type, - float_cast + float_cast, + int_cast, +) +from skyllh.core.utils.coords import ( + rotate_spherical_vector, +) +from skyllh.core.signal_generation import ( + SignalGenerationMethod, +) +from skyllh.core.source_model import ( + PointLikeSource, ) -from skyllh.core.coords import rotate_spherical_vector -from skyllh.core.signal_generation import SignalGenerationMethod -from skyllh.physics.source import PointLikeSource -from skyllh.physics.flux import get_conversion_factor_to_internal_flux_unit def source_sin_dec_shift_linear(x, w, L, U): @@ -81,20 +87,23 @@ def source_sin_dec_shift_cubic(x, w, L, U): x = np.atleast_1d(x) m = w / (x - 0.5*(L+U))**3 - S = m * np.power(x-0.5*(L+U),3) + S = m * np.power(x-0.5*(L+U), 3) return S class PointLikeSourceI3SignalGenerationMethod(SignalGenerationMethod): - """This class provides a signal generation method for a point-like source + """This class provides a signal generation method for point-like sources seen in the IceCube detector. """ + def __init__( self, src_sin_dec_half_bandwidth=np.sin(np.radians(1)), src_sin_dec_shift_func=None, - energy_range=None + energy_range=None, + src_batch_size=128, + **kwargs ): """Constructs a new signal generation method instance for a point-like source detected with IceCube. @@ -113,26 +122,35 @@ def __init__( The energy range from which to take MC events into account for signal event generation. If set to None, the entire energy range [0, +inf] is used. + src_batch_size : int + The source processing batch size used for the signal event flux + calculation. """ - super(PointLikeSourceI3SignalGenerationMethod, self).__init__( - energy_range=energy_range) + super().__init__( + energy_range=energy_range, + **kwargs) self.src_sin_dec_half_bandwidth = src_sin_dec_half_bandwidth - if(src_sin_dec_shift_func is None): + if src_sin_dec_shift_func is None: src_sin_dec_shift_func = source_sin_dec_shift_linear self.src_sin_dec_shift_func = src_sin_dec_shift_func + self.src_batch_size = src_batch_size + @property def src_sin_dec_half_bandwidth(self): """The half-width of the sin(dec) band to take MC events from around a source. """ return self._src_sin_dec_half_bandwidth + @src_sin_dec_half_bandwidth.setter def src_sin_dec_half_bandwidth(self, v): - v = float_cast(v, 'The src_sin_dec_half_bandwidth property must be ' - 'castable to a float type!') + v = float_cast( + v, + 'The src_sin_dec_half_bandwidth property must be castable to type ' + 'float!') self._src_sin_dec_half_bandwidth = v @property @@ -142,13 +160,29 @@ def src_sin_dec_shift_func(self): monte-carlo events from. """ return self._src_sin_dec_shift_func + @src_sin_dec_shift_func.setter def src_sin_dec_shift_func(self, func): - if(not callable(func)): - raise TypeError('The src_sin_dec_shift_func property must be a ' - 'callable object!') + if not callable(func): + raise TypeError( + 'The src_sin_dec_shift_func property must be a callable ' + 'object!') self._src_sin_dec_shift_func = func + @property + def src_batch_size(self): + """The source processing batch size used for the signal event flux + calculation. + """ + return self._src_batch_size + + @src_batch_size.setter + def src_batch_size(self, v): + v = int_cast( + v, + 'The src_batch_size property must be castable to type int!') + self._src_batch_size = v + def _get_src_dec_bands(self, src_dec, max_sin_dec_range): """Calculates the minimum and maximum sin(dec) values for each source to use with a specified maximal sin(dec) range, which should get @@ -182,14 +216,15 @@ def _get_src_dec_bands(self, src_dec, max_sin_dec_range): src_sin_dec_band_max = src_sin_dec + self._src_sin_dec_half_bandwidth # Calculate the solid angle of the declination band. - src_dec_band_omega = 2*np.pi*(src_sin_dec_band_max - src_sin_dec_band_min) + src_dec_band_omega = ( + 2 * np.pi * (src_sin_dec_band_max - src_sin_dec_band_min) + ) return (src_sin_dec_band_min, src_sin_dec_band_max, src_dec_band_omega) def calc_source_signal_mc_event_flux(self, data_mc, shg): """Calculates the signal flux of each given MC event for each source - hypothesis of the given source hypothesis group. The unit of the signal - flux must be 1/(GeV cm^2 s sr). + hypothesis of the given source hypothesis group. Parameters ---------- @@ -201,18 +236,16 @@ def calc_source_signal_mc_event_flux(self, data_mc, shg): Returns ------- - indices_list : list of 1D ndarrays - The list of event indices arrays specifying which MC events have - been selected as signal candidate events for each source of the - given source hypothesis group. Hence, the length of that list is the - number of sources of the source hypothesis group. The length of the - different 1D ndarrays is variable and depends on the source. - flux_list : list of 1D ndarrays - The list of 1D ndarrays holding the flux value of the selected - signal candidate events. One array for each source of the given - source hypothesis group. Hence, the length of that list is the - number of sources of the source hypothesis group. The length of the - different 1D ndarrays is variable and depends on the source. + ev_idx_arr : ndarray + The (N_selected_signal_events,)-shaped 1D ndarray holding the index + of the MC event. + shg_src_idx_arr : ndarray + The (N_selected_signal_events,)-shaped 1D ndarray holding the index + of the source within the given source hypothesis group for each + signal candidate event. + flux_arr : ndarray + The (N_selected_signal_events,)-shaped 1D ndarray holding the flux + value of each signal candidate event. """ indices = np.arange( 0, len(data_mc), @@ -223,8 +256,9 @@ def calc_source_signal_mc_event_flux(self, data_mc, shg): # Get 1D array of source declination. src_dec = np.empty((n_sources,), dtype=np.float64) for (k, source) in enumerate(shg.source_list): - if(not isinstance(source, PointLikeSource)): - raise TypeError('The source instance must be an instance of ' + if not isinstance(source, PointLikeSource): + raise TypeError( + 'The source instance must be an instance of ' 'PointLikeSource!') src_dec[k] = source.dec @@ -236,37 +270,75 @@ def calc_source_signal_mc_event_flux(self, data_mc, shg): np.min(data_mc_sin_true_dec), np.max(data_mc_sin_true_dec) ) - (src_sin_dec_band_min, src_sin_dec_band_max, src_dec_band_omega) = self._get_src_dec_bands(src_dec, max_sin_dec_range) + (src_sin_dec_band_min, src_sin_dec_band_max, src_dec_band_omega) =\ + self._get_src_dec_bands(src_dec, max_sin_dec_range) - # Get the flux model of this source hypo group. + # Get the flux model of this source hypo group (SHG). fluxmodel = shg.fluxmodel + # Get the theoretical weights of all the sources of this SHG. + src_weights = shg.get_source_weights() + # Calculate conversion factor from the flux model unit into the internal - # flux unit GeV^-1 cm^-2 s^-1. - toGeVcm2s = get_conversion_factor_to_internal_flux_unit(fluxmodel) + # flux unit. + to_internal_flux_unit =\ + fluxmodel.get_conversion_factor_to_internal_flux_unit() # Select the events that belong to a given source. - indices_list = [] - flux_list = [] - - for k in range(n_sources): - # Create the sin(true_dec) range event mask for the source. - src_event_mask = ( - (data_mc_sin_true_dec >= src_sin_dec_band_min[k]) & - (data_mc_sin_true_dec <= src_sin_dec_band_max[k]) + ev_idx_arr = np.empty( + (0,), + dtype=get_smallest_numpy_int_type((0, len(data_mc)))) + shg_src_idx_arr = np.empty( + (0,), + dtype=get_smallest_numpy_int_type((0, n_sources))) + flux_arr = np.empty( + (0,), + dtype=np.float32) + + src_batch_size = self._src_batch_size + n_batches = int(np.ceil(n_sources / src_batch_size)) + + for bi in range(n_batches): + src_start = bi*src_batch_size + src_end = np.min([(bi+1)*src_batch_size, n_sources]) + bs = src_end - src_start + + src_slice = slice(src_start, src_end) + + # Create an event mask of shape (N_sources,N_events). + ev_mask = np.logical_and( + (data_mc_sin_true_dec >= + src_sin_dec_band_min[src_slice][:, np.newaxis]), + (data_mc_sin_true_dec <= + src_sin_dec_band_max[src_slice][:, np.newaxis]) ) - # Apply energy range cut if an energy range is defined. - if(self.energy_range is not None): - src_event_mask &= ( - (data_mc_true_energy >= self.energy_range[0]) & + + if self.energy_range is not None: + ev_mask &= np.logical_and( + (data_mc_true_energy >= self.energy_range[0]), (data_mc_true_energy <= self.energy_range[1]) ) - indices_list.append(indices[src_event_mask]) - flux = fluxmodel(data_mc_true_energy[src_event_mask])*toGeVcm2s / src_dec_band_omega[k] - flux_list.append(flux) + ev_idxs = np.tile(indices, bs)[ev_mask.ravel()] + shg_src_idxs = bi*src_batch_size + np.repeat( + np.arange(bs), + ev_mask.sum(axis=1) + ) + del ev_mask + + flux = ( + fluxmodel(E=data_mc_true_energy[ev_idxs]).squeeze() * + to_internal_flux_unit / + src_dec_band_omega[shg_src_idxs] + ) + if src_weights is not None: + flux *= src_weights[shg_src_idxs] - return (indices_list, flux_list) + ev_idx_arr = np.append(ev_idx_arr, ev_idxs) + shg_src_idx_arr = np.append(shg_src_idx_arr, shg_src_idxs) + flux_arr = np.append(flux_arr, flux) + + return (ev_idx_arr, shg_src_idx_arr, flux_arr) def signal_event_post_sampling_processing( self, shg, shg_sig_events_meta, shg_sig_events @@ -323,164 +395,3 @@ def signal_event_post_sampling_processing( shg_sig_events[shg_src_mask] = shg_src_sig_events return shg_sig_events - - -class MultiPointLikeSourceI3SignalGenerationMethod( - PointLikeSourceI3SignalGenerationMethod): - """This class provides a signal generation method for a multiple point-like - sources seen in the IceCube detector. - """ - def __init__( - self, - src_sin_dec_half_bandwidth=np.sin(np.radians(1)), - src_sin_dec_shift_func=None, - energy_range=None, - batch_size=200 - ): - """Constructs a new signal generation method instance for a point-like - source detected with IceCube. - - Parameters - ---------- - src_sin_dec_half_bandwidth : float - The half-width of the sin(dec) band to take MC events from around a - source. The default is sin(1deg), i.e. a 1deg half-bandwidth. - src_sin_dec_shift_func : callable | None - The function that provides the source sin(dec) shift needed for - constructing the source declination bands from where to draw - monte-carlo events from. If set to None, the default function - ``source_sin_dec_shift_linear`` will be used. - energy_range : 2-element tuple of float | None - The energy range from which to take MC events into account for - signal event generation. - If set to None, the entire energy range [0, +inf] is used. - batch_size : int, optional - Batch size for signal generation. - """ - super(MultiPointLikeSourceI3SignalGenerationMethod, self).__init__( - src_sin_dec_half_bandwidth=src_sin_dec_half_bandwidth, - src_sin_dec_shift_func=src_sin_dec_shift_func, - energy_range=energy_range - ) - self.batch_size = batch_size - - def calc_source_signal_mc_event_flux(self, data_mc, shg): - """Calculates the signal flux of each given MC event for each source - hypothesis of the given source hypothesis group. The unit of the signal - flux must be 1/(GeV cm^2 s sr). - - Parameters - ---------- - data_mc : numpy record ndarray - The numpy record array holding the MC events of a dataset. - shg : SourceHypoGroup instance - The source hypothesis group, which defines the list of sources, and - their flux model. - - Returns - ------- - ev_indices : 1D ndarray - Event indices array specifying which MC events have been selected as - signal candidate events for each source of the given source - hypothesis group. The length of the 1D ndarray is variable and - depends on the source. - src_indices : 1D ndarray - Source indices array specifying which source corresponds to the - event in ev_indices array. - flux_list : list of 1D ndarrays - The list of 1D ndarrays holding the flux value of the selected - signal candidate events. One array for each source of the given - source hypothesis group. Hence, the length of that list is the - number of sources of the source hypothesis group. The length of the - different 1D ndarrays is variable and depends on the source. - """ - indices = np.arange( - 0, len(data_mc), - dtype=get_smallest_numpy_int_type((0, len(data_mc))) - ) - n_sources = shg.n_sources - - # Get 1D array of source declination. - src_dec = np.empty((n_sources,), dtype=np.float64) - for (k, source) in enumerate(shg.source_list): - if(not isinstance(source, PointLikeSource)): - raise TypeError( - 'The source instance must be an instance of ' - 'PointLikeSource!') - src_dec[k] = source.dec - - data_mc_sin_true_dec = data_mc['sin_true_dec'] - data_mc_true_energy = data_mc['true_energy'] - - # Calculate the source declination bands and their solid angle. - max_sin_dec_range = ( - np.min(data_mc_sin_true_dec), - np.max(data_mc_sin_true_dec) - ) - (src_sin_dec_band_min, src_sin_dec_band_max, src_dec_band_omega) = self._get_src_dec_bands(src_dec, max_sin_dec_range) - - # Get the flux model and source weights of this source hypo group. - fluxmodel = shg.fluxmodel - src_weights = shg.source_weights - - # Calculate conversion factor from the flux model unit into the internal - # flux unit GeV^-1 cm^-2 s^-1. - toGeVcm2s = get_conversion_factor_to_internal_flux_unit(fluxmodel) - - # Select the events that belong to a given source. - ev_indices = np.empty( - (0,), dtype=get_smallest_numpy_int_type((0, len(data_mc)))) - src_indices = np.empty( - (0,), dtype=get_smallest_numpy_int_type((0, n_sources))) - flux = np.empty((0,), dtype='float32') - - n_batches = int(np.ceil(n_sources / float(self.batch_size))) - - for bi in range(n_batches): - if(bi != n_batches-1): - band_mask = np.logical_and( - (data_mc_sin_true_dec >= - src_sin_dec_band_min[bi*self.batch_size:(bi+1)*self.batch_size][:, np.newaxis]), - (data_mc_sin_true_dec <= - src_sin_dec_band_max[bi*self.batch_size:(bi+1)*self.batch_size][:, np.newaxis]) - ) - if(self.energy_range is not None): - band_mask &= np.logical_and( - (data_mc_true_energy >= self.energy_range[0]), - (data_mc_true_energy <= self.energy_range[1])) - - ev_indi = np.tile(indices, self.batch_size)[band_mask.ravel()] - src_indi = bi*self.batch_size + np.repeat( - np.arange(self.batch_size), - band_mask.sum(axis=1) - ) - del band_mask - else: - n_final_batch = int(n_sources - bi*self.batch_size) - band_mask = np.logical_and( - (data_mc_sin_true_dec >= - src_sin_dec_band_min[bi*self.batch_size:][:, np.newaxis]), - (data_mc_sin_true_dec <= - src_sin_dec_band_max[bi*self.batch_size:][:, np.newaxis]) - ) - if(self.energy_range is not None): - band_mask &= np.logical_and( - (data_mc_true_energy >= self.energy_range[0]), - (data_mc_true_energy <= self.energy_range[1])) - - ev_indi = np.tile(indices, n_final_batch)[band_mask.ravel()] - src_indi = bi*self.batch_size + np.repeat( - np.arange(n_final_batch), - band_mask.sum(axis=1) - ) - del band_mask - - if(src_weights is None): - fluxi = fluxmodel(data_mc_true_energy[ev_indi])*toGeVcm2s / src_dec_band_omega[src_indi] - else: - fluxi = src_weights[src_indi]*fluxmodel(data_mc_true_energy[ev_indi])*toGeVcm2s / src_dec_band_omega[src_indi] - - ev_indices = np.append(ev_indices, ev_indi) - src_indices = np.append(src_indices, src_indi) - flux = np.append(flux, fluxi) - return (ev_indices, src_indices, flux) diff --git a/skyllh/i3/signalpdf.py b/skyllh/i3/signalpdf.py index a5f737aacc..1dafc386ab 100644 --- a/skyllh/i3/signalpdf.py +++ b/skyllh/i3/signalpdf.py @@ -2,236 +2,240 @@ import numpy as np -from skyllh.core.binning import BinningDefinition +from skyllh.core.binning import ( + BinningDefinition, +) +from skyllh.core.flux_model import ( + FluxModel, +) from skyllh.core.multiproc import ( IsParallelizable, - parallelize + parallelize, ) from skyllh.core.parameters import ( ParameterGrid, - ParameterGridSet + ParameterGridSet, +) +from skyllh.core.py import ( + classname, +) +from skyllh.core.smoothing import ( + SmoothingFilter, ) -from skyllh.core.smoothing import SmoothingFilter from skyllh.core.pdf import ( PDFSet, - IsSignalPDF + IsSignalPDF, +) +from skyllh.i3.pdf import ( + I3EnergyPDF, ) -from skyllh.physics.flux import FluxModel -from skyllh.physics.source import PointLikeSource -from skyllh.i3.pdf import I3EnergyPDF class SignalI3EnergyPDFSet(PDFSet, IsSignalPDF, IsParallelizable): """This is the signal energy PDF for IceCube. It creates a set of I3EnergyPDF objects for a discrete set of energy signal parameters. Energy - signal parameters are the parameters that influence the source flux model. + signal parameters influence the source's flux model. """ - def __init__(self, data_mc, logE_binning, sinDec_binning, fluxmodel, - fitparam_grid_set, smoothing_filter=None, ncpu=None, - ppbar=None): + def __init__( + self, + data_mc, + log10_energy_binning, + sin_dec_binning, + fluxmodel, + param_grid_set, + smoothing_filter=None, + ncpu=None, + ppbar=None, + **kwargs): """Creates a new IceCube energy signal PDF for a given flux model and - a set of fit parameter grids for the flux model. + a set of parameter grids for the flux model. It creates a set of I3EnergyPDF objects for each signal parameter value - permutation and stores it inside the ``_params_hash_I3EnergyPDF_dict`` - dictionary, where the hash of the fit parameters dictionary is the key. + permutation and stores it in an internal dictionary, where the hash of + the parameters dictionary is the key. Parameters ---------- data_mc : instance of DataFieldRecordArray - The array holding the monte-carlo data. The following data fields - must exist: + The instance of DataFieldRecordArray holding the monte-carlo data. + The following data fields must exist: - - 'true_energy' : float + true_energy : float The true energy value of the data event. - - 'log_energy' : float - The logarithm of the reconstructed energy value of the data - event. - - 'dec' : float + log_energy : float + The base10-logarithm of the reconstructed energy value of the + data event. + sin_dec : float The declination of the data event. - - 'mcweight' : float + mcweight : float The monte-carlo weight value of the data events in unit GeV cm^2 sr. - logE_binning : BinningDefinition - The binning definition for the binning in log10(E). - sinDec_binning : BinningDefinition - The binning definition for the sin(declination). - fluxmodel : FluxModel + log10_energy_binning : instance of BinningDefinition + The binning definition for the reconstructed energy binning in + log10(E). + sin_dec_binning : instance of BinningDefinition + The binning definition for the binning in sin(declination). + fluxmodel : instance of FluxModel The flux model to use to create the signal energy PDF. - fitparam_grid_set : ParameterGridSet | ParameterGrid - The set of parameter grids. A ParameterGrid object for each - energy fit parameter, for which an I3EnergyPDF object needs to be + param_grid_set : instance of ParameterGridSet | + instance of ParameterGrid + The set of parameter grids. A ParameterGrid instance for each + energy parameter, for which an I3EnergyPDF object needs to be created. - smoothing_filter : SmoothingFilter instance | None + smoothing_filter : instance of SmoothingFilter | None The smoothing filter to use for smoothing the energy histogram. - If None, no smoothing will be applied. - ncpu : int | None (default) + If ``None``, no smoothing will be applied. + ncpu : int | None The number of CPUs to use to create the different I3EnergyPDF - objects for the different fit parameter grid values. - ppbar : ProgressBar instance | None + instances for the different parameter grid values. + If set to ``None``, the configured default number of CPUs will be + used. + ppbar : instance of ProgressBar | None The instance of ProgressBar of the optional parent progress bar. """ - if(isinstance(fitparam_grid_set, ParameterGrid)): - fitparam_grid_set = ParameterGridSet([fitparam_grid_set]) - if(not isinstance(fitparam_grid_set, ParameterGridSet)): - raise TypeError('The fitparam_grid_set argument must be an ' - 'instance of ParameterGrid or ParameterGridSet!') - - # We need to extend the fit parameter grids on the lower and upper end + if isinstance(param_grid_set, ParameterGrid): + param_grid_set = ParameterGridSet([param_grid_set]) + if not isinstance(param_grid_set, ParameterGridSet): + raise TypeError( + 'The param_grid_set argument must be an instance of ' + 'ParameterGrid or ParameterGridSet! But its type is ' + f'{classname(param_grid_set)}!') + + # We need to extend the parameter grids on the lower and upper end # by one bin to allow for the calculation of the interpolation. But we # will do this on a copy of the object. - fitparam_grid_set = fitparam_grid_set.copy() - fitparam_grid_set.add_extra_lower_and_upper_bin() - - super(SignalI3EnergyPDFSet, self).__init__(pdf_type=I3EnergyPDF, - fitparams_grid_set=fitparam_grid_set, ncpu=ncpu) - - if(not isinstance(logE_binning, BinningDefinition)): - raise TypeError('The logE_binning argument must be an instance of ' - 'BinningDefinition!') - if(not isinstance(sinDec_binning, BinningDefinition)): - raise TypeError('The sinDec_binning argument must be an instance ' - 'of BinningDefinition!') - if(not isinstance(fluxmodel, FluxModel)): - raise TypeError('The fluxmodel argument must be an instance of ' - 'FluxModel!') - if((smoothing_filter is not None) and - (not isinstance(smoothing_filter, SmoothingFilter))): - raise TypeError('The smoothing_filter argument must be None or ' - 'an instance of SmoothingFilter!') - - # Create I3EnergyPDF objects for all permutations of the fit parameter + param_grid_set = param_grid_set.copy() + param_grid_set.add_extra_lower_and_upper_bin() + + super().__init__( + param_grid_set=param_grid_set, + ncpu=ncpu, + **kwargs) + + if not isinstance(log10_energy_binning, BinningDefinition): + raise TypeError( + 'The log10_energy_binning argument must be an instance of ' + 'BinningDefinition! ' + f'Its type is {classname(log10_energy_binning)}!') + if not isinstance(sin_dec_binning, BinningDefinition): + raise TypeError( + 'The sin_dec_binning argument must be an instance ' + 'of BinningDefinition! ' + f'Its type is {classname(sin_dec_binning)}!') + if not isinstance(fluxmodel, FluxModel): + raise TypeError( + 'The fluxmodel argument must be an instance of FluxModel! ' + f'Its type is {classname(fluxmodel)}!') + if (smoothing_filter is not None) and\ + (not isinstance(smoothing_filter, SmoothingFilter)): + raise TypeError( + 'The smoothing_filter argument must be None or ' + 'an instance of SmoothingFilter! ' + f'Its type is {classname(smoothing_filter)}!') + + # Create I3EnergyPDF objects for all permutations of the parameter # grid values. def create_I3EnergyPDF( - data_logE, data_sinDec, data_mcweight, data_true_energy, - logE_binning, sinDec_binning, smoothing_filter, fluxmodel, - gridfitparams): + data_log10_energy, + data_sin_dec, + data_mcweight, + data_true_energy, + log10_energy_binning, + sin_dec_binning, + smoothing_filter, + fluxmodel, + flux_unit_conv_factor, + gridparams): """Creates an I3EnergyPDF object for the given flux model and flux parameters. Parameters ---------- - data_logE : 1d ndarray - The logarithm of the reconstructed energy value of the data - events. - data_sinDec : 1d ndarray + data_log10_energy : 1d ndarray + The base-10 logarithm of the reconstructed energy value of the + data events. + data_sin_dec : 1d ndarray The sin(dec) value of the the data events. data_mcweight : 1d ndarray The monte-carlo weight value of the data events. data_true_energy : 1d ndarray The true energy value of the data events. - logE_binning : BinningDefinition + log10_energy_binning : instance of BinningDefinition The binning definition for the binning in log10(E). - sinDec_binning : BinningDefinition + sin_dec_binning : instance of BinningDefinition The binning definition for the sin(declination). - smoothing_filter : SmoothingFilter instance | None + smoothing_filter : instance of SmoothingFilter | None The smoothing filter to use for smoothing the energy histogram. - If None, no smoothing will be applied. - fluxmodel : FluxModel + If ``None``, no smoothing will be applied. + fluxmodel : instance of FluxModel The flux model to use to create the signal event weights. - gridfitparams : dict + flux_unit_conv_factor : float + The factor to convert the flux unit into the internal flux unit. + gridparams : dict The dictionary holding the specific signal flux parameters. Returns ------- - i3energypdf : I3EnergyPDF - The created I3EnergyPDF object for the given flux model and flux - parameters. + i3energypdf : instance of I3EnergyPDF + The created I3EnergyPDF instance for the given flux model and + flux parameters. """ # Create a copy of the FluxModel with the given flux parameters. # The copy is needed to not interfer with other CPU processes. - myfluxmodel = fluxmodel.copy(newprop=gridfitparams) + myfluxmodel = fluxmodel.copy(newparams=gridparams) # Calculate the signal energy weight of the event. Note, that # because we create a normalized PDF, we can ignore all constants. # So we don't have to convert the flux unit into the internally used # flux unit. - data_physicsweight = myfluxmodel(data_true_energy) + data_physicsweight = np.squeeze(myfluxmodel(E=data_true_energy)) + data_physicsweight *= flux_unit_conv_factor i3energypdf = I3EnergyPDF( - data_logE, data_sinDec, data_mcweight, data_physicsweight, - logE_binning, sinDec_binning, smoothing_filter) + pmm=None, + data_log10_energy=data_log10_energy, + data_sin_dec=data_sin_dec, + data_mcweight=data_mcweight, + data_physicsweight=data_physicsweight, + log10_energy_binning=log10_energy_binning, + sin_dec_binning=sin_dec_binning, + smoothing_filter=smoothing_filter) return i3energypdf - data_logE = data_mc['log_energy'] - data_sinDec = np.sin(data_mc['dec']) + data_log10_energy = data_mc['log_energy'] + data_sin_dec = data_mc['sin_dec'] data_mcweight = data_mc['mcweight'] data_true_energy = data_mc['true_energy'] - args_list = [ ((data_logE, data_sinDec, data_mcweight, data_true_energy, - logE_binning, sinDec_binning, smoothing_filter, - fluxmodel, gridfitparams), {}) - for gridfitparams in self.gridfitparams_list ] + flux_unit_conv_factor =\ + fluxmodel.get_conversion_factor_to_internal_flux_unit() + + args_list = [ + ( + (data_log10_energy, + data_sin_dec, + data_mcweight, + data_true_energy, + log10_energy_binning, + sin_dec_binning, + smoothing_filter, + fluxmodel, + flux_unit_conv_factor, + gridparams), + {} + ) + for gridparams in self.gridparams_list + ] i3energypdf_list = parallelize( - create_I3EnergyPDF, args_list, self.ncpu, ppbar=ppbar) + func=create_I3EnergyPDF, + args_list=args_list, + ncpu=self.ncpu, + ppbar=ppbar) - # Save all the I3EnergyPDF objects in the IsSignalPDF PDF registry with + # Save all the I3EnergyPDF instances in the PDFSet registry with # the hash of the individual parameters as key. - for (gridfitparams, i3energypdf) in zip(self.gridfitparams_list, i3energypdf_list): - self.add_pdf(i3energypdf, gridfitparams) - - def assert_is_valid_for_exp_data(self, data_exp): - """Checks if this signal energy PDF is valid for all the given - experimental data. - It checks if all the data is within the logE and sin(dec) binning range. - - Parameters - ---------- - data_exp : numpy record ndarray - The array holding the experimental data. The following data fields - must exist: - - - 'log_energy' : float - The logarithm of the energy value of the data event. - - 'dec' : float - The declination of the data event. - - Raises - ------ - ValueError - If some of the data is outside the logE or sin(dec) binning range. - """ - # Since we use the same binning for all the I3EnergyPDF objects, we - # can just use an arbitrary object to verify the data. - self.get_pdf(self.pdf_keys[0]).assert_is_valid_for_exp_data(data_exp) - - def get_prob(self, tdm, gridfitparams): - """Calculates the signal energy probability (in logE) of each event for - a given set of signal fit parameters on a grid. - - Parameters - ---------- - tdm : instance of TrialDataManager - The TrialDataManager instance holding the data events for which the - probability should be calculated for. The following data fields must - exist: - - - 'log_energy' : float - The logarithm of the energy value of the event. - - 'sin_dec' : float - The sin(declination) value of the event. - - gridfitparams : dict - The dictionary holding the signal parameter values for which the - signal energy probability should be calculated. Note, that the - parameter values must match a set of parameter grid values for which - an I3EnergyPDF object has been created at construction time of this - SignalI3EnergyPDF object. There is no interpolation method defined - at this point to allow for arbitrary parameter values! - - Returns - ------- - prob : 1d ndarray - The array with the signal energy probability for each event. - - Raises - ------ - KeyError - If no energy PDF can be found for the given signal parameter values. - """ - i3energypdf = self.get_pdf(gridfitparams) - - prob = i3energypdf.get_prob(tdm) - return prob + for (gridparams, i3energypdf) in zip(self.gridparams_list, + i3energypdf_list): + self.add_pdf(i3energypdf, gridparams) diff --git a/skyllh/i3/utils/sensitivity.py b/skyllh/i3/utils/analysis.py similarity index 87% rename from skyllh/i3/utils/sensitivity.py rename to skyllh/i3/utils/analysis.py index 7d8b8a3f7f..c16a657fdc 100644 --- a/skyllh/i3/utils/sensitivity.py +++ b/skyllh/i3/utils/analysis.py @@ -3,9 +3,16 @@ import logging import numpy as np -from skyllh.core.analysis_utils import estimate_sensitivity, estimate_discovery_potential -from skyllh.core.progressbar import ProgressBar -from skyllh.physics.source import PointLikeSource +from skyllh.core.utils.analysis import ( + estimate_discovery_potential, + estimate_sensitivity, +) +from skyllh.core.progressbar import ( + ProgressBar, +) +from skyllh.core.source_model import ( + PointLikeSource, +) def generate_ps_sin_dec_h0_ts_values( @@ -75,11 +82,11 @@ def generate_ps_sin_dec_h0_ts_values( pbar_iter = ProgressBar(n_iter, parent=ppbar).start() for iter_idx in range(n_iter): pbar_sin_dec = ProgressBar(len(sin_dec_arr), parent=pbar_iter).start() - for (sin_dec_idx,sin_dec) in enumerate(sin_dec_arr): + for (sin_dec_idx, sin_dec) in enumerate(sin_dec_arr): source = PointLikeSource(np.pi, np.arcsin(sin_dec)) ana.change_source(source) - h0_ts_vals_arr[sin_dec_idx,iter_idx] = ana.do_trials( + h0_ts_vals_arr[sin_dec_idx, iter_idx] = ana.do_trials( rss, n_bkg_trials, mean_n_sig=0, bkg_kwargs=bkg_kwargs, ppbar=pbar_sin_dec)['ts'] @@ -180,7 +187,7 @@ def estimate_ps_sin_dec_sensitivity_curve( flux_scaling_arr = np.empty((len(sin_dec_arr), n_iter)) pbar_sin_dec = ProgressBar(n_iter, parent=ppbar).start() - for (sin_dec_idx,sin_dec) in enumerate(sin_dec_arr): + for (sin_dec_idx, sin_dec) in enumerate(sin_dec_arr): logger.debug( 'Estimate point-source sensitivity for sin(dec) = %g, %d times', sin_dec, n_iter) @@ -189,25 +196,25 @@ def estimate_ps_sin_dec_sensitivity_curve( pbar_iter = ProgressBar(len(sin_dec_arr), parent=pbar_sin_dec).start() for iter_idx in range(n_iter): - h0_ts_vals = h0_ts_vals_arr[sin_dec_idx,iter_idx] + h0_ts_vals = h0_ts_vals_arr[sin_dec_idx, iter_idx] mu_min = mu_min_arr[sin_dec_idx] mu_max = mu_max_arr[sin_dec_idx] (mean_ns, mean_ns_err) = estimate_sensitivity( - ana, rss, mu_range=(mu_min,mu_max), eps_p=eps_p, + ana, rss, mu_range=(mu_min, mu_max), eps_p=eps_p, h0_ts_vals=h0_ts_vals, bkg_kwargs=bkg_kwargs, sig_kwargs=sig_kwargs, ppbar=pbar_iter) - mean_ns_arr[sin_dec_idx,iter_idx] = mean_ns - mean_ns_err_arr[sin_dec_idx,iter_idx] = mean_ns_err - flux_scaling_arr[sin_dec_idx,iter_idx] = ana.calculate_fluxmodel_scaling_factor( + mean_ns_arr[sin_dec_idx, iter_idx] = mean_ns + mean_ns_err_arr[sin_dec_idx, iter_idx] = mean_ns_err + flux_scaling_arr[sin_dec_idx, iter_idx] = ana.calculate_fluxmodel_scaling_factor( mean_ns=mean_ns, fitparam_values=np.array(fitparam_values)) # A new iteration is done, update the mu range using the previous # results. - mu_min_arr = np.mean(mean_ns_arr[:,0:iter_idx+1]*0.8, axis=1) - mu_max_arr = np.mean(mean_ns_arr[:,0:iter_idx+1]*1.2, axis=1) + mu_min_arr = np.mean(mean_ns_arr[:, 0:iter_idx+1]*0.8, axis=1) + mu_max_arr = np.mean(mean_ns_arr[:, 0:iter_idx+1]*1.2, axis=1) rss.reseed(rss.seed+1) @@ -219,30 +226,30 @@ def estimate_ps_sin_dec_sensitivity_curve( # variance of the further iterations and check if the first estimation # deviates more than 2 times that variance. If so, recalculate the first # estimation. - if(n_iter >= 5): - mean_ns = mean_ns_arr[sin_dec_idx,0] - mean_ns_mean = np.mean(mean_ns_arr[sin_dec_idx,1:]) - mean_ns_std = np.std(mean_ns_arr[sin_dec_idx,1:]) - if(np.abs(mean_ns - mean_ns_mean) >= 2*mean_ns_std): + if n_iter >= 5: + mean_ns = mean_ns_arr[sin_dec_idx, 0] + mean_ns_mean = np.mean(mean_ns_arr[sin_dec_idx, 1:]) + mean_ns_std = np.std(mean_ns_arr[sin_dec_idx, 1:]) + if np.abs(mean_ns - mean_ns_mean) >= 2*mean_ns_std: logger.debug( 'Detected unprecise estimate for first iteration (mu=%g) ' 'for sin(dec)=%g: (|%g - %g| >= 2*%g). Recalculating ...', mean_ns, sin_dec, mean_ns, mean_ns_mean, mean_ns_std) iter_idx = 0 - h0_ts_vals = h0_ts_vals_arr[sin_dec_idx,iter_idx] + h0_ts_vals = h0_ts_vals_arr[sin_dec_idx, iter_idx] mu_min = mu_min_arr[sin_dec_idx] mu_max = mu_max_arr[sin_dec_idx] (mean_ns, mean_ns_err) = estimate_sensitivity( - ana, rss, mu_range=(mu_min,mu_max), eps_p=eps_p, + ana, rss, mu_range=(mu_min, mu_max), eps_p=eps_p, h0_ts_vals=h0_ts_vals, bkg_kwargs=bkg_kwargs, sig_kwargs=sig_kwargs, ppbar=pbar_sin_dec) - mean_ns_arr[sin_dec_idx,iter_idx] = mean_ns - mean_ns_err_arr[sin_dec_idx,iter_idx] = mean_ns_err - flux_scaling_arr[sin_dec_idx,iter_idx] = ana.calculate_fluxmodel_scaling_factor( + mean_ns_arr[sin_dec_idx, iter_idx] = mean_ns + mean_ns_err_arr[sin_dec_idx, iter_idx] = mean_ns_err + flux_scaling_arr[sin_dec_idx, iter_idx] = ana.calculate_fluxmodel_scaling_factor( mean_ns=mean_ns, fitparam_values=np.array(fitparam_values)) pbar_sin_dec.increment() @@ -342,32 +349,32 @@ def estimate_ps_sin_dec_discovery_potential_curve( pbar_iter = ProgressBar(n_iter, parent=ppbar).start() for iter_idx in range(n_iter): pbar = ProgressBar(len(sin_dec_arr), parent=pbar_iter).start() - for (sin_dec_idx,sin_dec) in enumerate(sin_dec_arr): + for (sin_dec_idx, sin_dec) in enumerate(sin_dec_arr): source = PointLikeSource(np.pi, np.arcsin(sin_dec)) ana.change_source(source) - h0_ts_vals = h0_ts_vals_arr[sin_dec_idx,iter_idx] + h0_ts_vals = h0_ts_vals_arr[sin_dec_idx, iter_idx] mu_min = mu_min_arr[sin_dec_idx] mu_max = mu_max_arr[sin_dec_idx] (mean_ns, mean_ns_err) = estimate_discovery_potential( ana, rss, h0_ts_quantile=h0_ts_quantile, - mu_range=(mu_min,mu_max), eps_p=eps_p, + mu_range=(mu_min, mu_max), eps_p=eps_p, h0_ts_vals=h0_ts_vals, bkg_kwargs=bkg_kwargs, sig_kwargs=sig_kwargs, ppbar=pbar, **kwargs) - mean_ns_arr[sin_dec_idx,iter_idx] = mean_ns - mean_ns_err_arr[sin_dec_idx,iter_idx] = mean_ns_err - flux_scaling_arr[sin_dec_idx,iter_idx] = ana.calculate_fluxmodel_scaling_factor( + mean_ns_arr[sin_dec_idx, iter_idx] = mean_ns + mean_ns_err_arr[sin_dec_idx, iter_idx] = mean_ns_err + flux_scaling_arr[sin_dec_idx, iter_idx] = ana.calculate_fluxmodel_scaling_factor( mean_ns=mean_ns, fitparam_values=np.array(fitparam_values)) pbar.increment() pbar.finish() # One iteration is done, update the mu range using the previous results. - mu_min_arr = np.mean(mean_ns_arr[:,0:iter_idx+1]*0.8, axis=1) - mu_max_arr = np.mean(mean_ns_arr[:,0:iter_idx+1]*1.2, axis=1) + mu_min_arr = np.mean(mean_ns_arr[:, 0:iter_idx+1]*0.8, axis=1) + mu_max_arr = np.mean(mean_ns_arr[:, 0:iter_idx+1]*1.2, axis=1) pbar_iter.increment() diff --git a/skyllh/i3/coords.py b/skyllh/i3/utils/coords.py similarity index 64% rename from skyllh/i3/coords.py rename to skyllh/i3/utils/coords.py index 48ba6441ec..4b288306c5 100644 --- a/skyllh/i3/coords.py +++ b/skyllh/i3/utils/coords.py @@ -5,6 +5,7 @@ import numpy as np + def azi_to_ra_transform(azi, mjd): """Rotates the given IceCube azimuth angles into right-ascention angles for the given MJD times. This function is IceCube specific and assumes that the @@ -13,31 +14,46 @@ def azi_to_ra_transform(azi, mjd): Parameters ---------- - azi : ndarray + azi : instance of numpy.ndarray The array with the azimuth angles. - mjd : ndarray + mjd : instance of numpy.ndarray The array with the MJD times for each azimuth angle. + + Returns + ------- + ra : instance of numpy.ndarray + The right-ascention values. """ - _sidereal_length = 0.997269566 # sidereal day = length * solar day - _sidereal_offset = 2.54199002505 # RA = offset + 2pi * (MJD/sidereal_length)%1 - azimuth - sidereal_day_residuals = ((mjd/_sidereal_length)%1) + # sidereal day = length * solar day + _sidereal_length = 0.997269566 + _sidereal_offset = 2.54199002505 + sidereal_day_residuals = (mjd / _sidereal_length) % 1 ra = _sidereal_offset + 2 * np.pi * sidereal_day_residuals - azi ra = np.mod(ra, 2*np.pi) + return ra + def ra_to_azi_transform(ra, mjd): """Rotates the given right-ascention angles to local IceCube azimuth angles. Parameters ---------- - ra : ndarray + ra : instance of numpy.ndarray The array with the right-ascention angles. - mjd : ndarray + mjd : instance of numpy.ndarray The array with the MJD times for each right-ascention angle. + Returns + ------- + azi : instance of numpy.ndarray + The azimuth angle for each right-ascention angle. """ # Use the azi_to_ra_transform function because it is symmetric. - return azi_to_ra_transform(ra, mjd) + azi = azi_to_ra_transform(ra, mjd) + + return azi + def hor_to_equ_transform(azi, zen, mjd): """Transforms the coordinate from the horizontal system (azimuth, zenith) @@ -47,18 +63,19 @@ def hor_to_equ_transform(azi, zen, mjd): Parameters ---------- - azi : ndarray + azi : instance of numpy.ndarray The azimuth angle. - zen : ndarray + zen : instance of numpy.ndarray The zenith angle. - mjd : ndarray + mjd : instance of numpy.ndarray The time in MJD. Returns ------- - (ra, dec) : (ndarray, ndarray) - The two-element tuple with the arrays of right-ascention and - declination. + ra : instance of numpy.ndarray + The right-ascention angle. + dec : instance of numpy.ndarray + The declination angle. """ ra = azi_to_ra_transform(azi, mjd) dec = np.pi - zen diff --git a/skyllh/physics/flux.py b/skyllh/physics/flux.py deleted file mode 100644 index b8b8424f95..0000000000 --- a/skyllh/physics/flux.py +++ /dev/null @@ -1,696 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Note: This module is deprecated and new flux models should be implemented in - `flux_model.py`. However, the framework currently doesn't support flux - models derived from `flux_model.FluxModel`. - -The ``flux`` module contains all standard flux models for a source. -The abstract class ``FluxModel`` serves as a base class for all flux model -classes. -The unit of the resulting flux value must be [energy]^-1 [length]^-2 [time]^-1. -The units are defined using the astropy.units module and can be set through -the properties ``energy_unit``, ``length_unit``, and ``time_unit``. -The default units are [energy] = GeV, [length] = cm, [time] = s. -""" -import abc -import numpy as np - -from copy import deepcopy - -from astropy import units - -from skyllh.core.py import classname, isproperty, float_cast -from skyllh.core.config import CFG - - -def get_conversion_factor_to_internal_flux_unit(fluxmodel): - """Calculates the unit conversion factor for converting the used flux - unit of the given flux model into the skyllh internally used flux unit - 1/(GeV cm2 s). - - Parameters - ---------- - fluxmodel : FluxModel - The flux model instance for which to calculate the unit conversion - factor. - - Returns - ------- - unit_conversion_factor : float - The unit conversion factor. - """ - fluxmodel_flux_unit = 1/( - fluxmodel.energy_unit * fluxmodel.length_unit**2 * fluxmodel.time_unit) - - internal_units = CFG['internal_units'] - internal_flux_unit = 1/( - internal_units['energy'] * internal_units['length']**2 * - internal_units['time']) - - unit_conversion_factor = (fluxmodel_flux_unit).to(internal_flux_unit).value - return unit_conversion_factor - - -class FluxModel(object, metaclass=abc.ABCMeta): - """Abstract base class for all flux models. - This base class defines the units used for the flux calculation. At this - point the function form of the flux model is not yet defined. - - Attributes - ---------- - energy_unit : str - The used unit of energy. - length_unit : str - The used unit of length. - time_unit : str - The used unit of time. - math_function_str : str - The string showing the mathematical function of the flux calculation. - """ - - def __init__(self): - super(FluxModel, self).__init__() - - # Define the default units. - self.energy_unit = units.GeV - self.length_unit = units.cm - self.time_unit = units.s - - @property - def energy_unit(self): - """The unit of energy used for the flux calculation. - """ - return self._energy_unit - @energy_unit.setter - def energy_unit(self, unit): - if(not isinstance(unit, units.UnitBase)): - raise TypeError('The property energy_unit must be of type astropy.units.UnitBase!') - self._energy_unit = unit - - @property - def length_unit(self): - """The unit of length used for the flux calculation. - """ - return self._length_unit - @length_unit.setter - def length_unit(self, unit): - if(not isinstance(unit, units.UnitBase)): - raise TypeError('The property length_unit must be of type astropy.units.UnitBase!') - self._length_unit = unit - - @property - def time_unit(self): - """The unit of length used for the flux calculation. - """ - return self._time_unit - @time_unit.setter - def time_unit(self, unit): - if(not isinstance(unit, units.UnitBase)): - raise TypeError('The property time_unit must be of type astropy.units.UnitBase!') - self._time_unit = unit - - @property - def unit_str(self): - """The string representation of the flux unit. - """ - return '1/(%s %s^2 %s)'%( - self.energy_unit.to_string(), self.length_unit.to_string(), - self.time_unit.to_string()) - - @property - def unit_latex_str(self): - """The latex string representation of the flux unit. - """ - return r'%s$^{-1}$ %s$^{-2}$ %s$^{-1}$'%( - self.energy_unit.to_string(), self.length_unit.to_string(), - self.time_unit.to_string()) - - @property - @abc.abstractmethod - def math_function_str(self): - """The string showing the mathematical function of the flux calculation. - """ - pass - - @abc.abstractmethod - def __call__(self, E): - """The call operator to retrieve a flux value for a given energy. - - Parameters - ---------- - E : float | 1d numpy.ndarray of float - The energy for which to retrieve the flux value. - - Returns - ------- - flux : ndarray of float - Flux at energy E in unit [energy]^-1 [length]^-2 [time]^-1. - By default that is GeV^-1 cm^-2 s^-1. - """ - pass - - def __str__(self): - """Pretty string representation of this class. - """ - return self.math_function_str + ' ' + self.unit_str - - def copy(self, newprop=None): - """Copies this flux model object by calling the copy.deepcopy function, - and sets new properties if requested. - - Parameters - ---------- - newprop : dict | None - The dictionary with the new property values to set, where the - dictionary key is the property name and the dictionary value is the - new value of the property. - """ - fluxmodel = deepcopy(self) - - # Set the new property values. - if(newprop is not None): - fluxmodel.set_properties(newprop) - - return fluxmodel - - def set_properties(self, propdict): - """Sets the properties of the flux model to the given property values. - - Parameters - ---------- - propdict : dict (name: value) - The dictionary holding the names of the properties and their new - values. - """ - if(not isinstance(propdict, dict)): - raise TypeError('The propdict argument must be of type dict!') - for (prop, val) in propdict.items(): - if(not hasattr(self, prop)): - raise KeyError('The flux model "%s" does not have a property named "%s"!'%(classname(self), prop)) - if(not isproperty(self, prop)): - raise TypeError('The attribute "%s" of flux model "%s" is no property!'%(classname(self), prop)) - setattr(self, prop, val) - - -class NormedFluxModel(FluxModel, metaclass=abc.ABCMeta): - """Abstract base class for all normalized flux models of the form - - dN/(dEdAdt) = Phi0 * f(E/E0), - - where Phi0 is the flux normalization at E=E0 in the flux unit - [energy]^-1 [length]^-2 [time]^-1, and f(E/E0) is the unit-less energy - dependence of the flux. - - The unit of dN/(dEdAdt) is [energy]^-1 [length]^-2 [time]^-1. - By default the unit is GeV^-1 cm^-2 s^-1. - - Attributes - ---------- - Phi0 : float - Flux value (dN/(dEdAdt)) at E0 in unit - [energy]^-1 [length]^-2 [time]^-1. - E0 : float - Normalization energy in unit of energy. - """ - - def __init__(self, Phi0, E0): - super(NormedFluxModel, self).__init__() - - self.Phi0 = Phi0 - self.E0 = E0 - - @property - def Phi0(self): - """The flux value (dN/(dEdAdt)) at energy E0 in unit - [energy]^-1 [length]^-2 [time]^-1. - """ - return self._Phi0 - - @Phi0.setter - def Phi0(self, v): - v = float_cast(v, 'Property Phi0 must be castable to type float!') - self._Phi0 = v - - @property - def E0(self): - """The normalization energy. - """ - return self._E0 - @E0.setter - def E0(self, v): - v = float_cast(v, 'Property E0 must be castable to type float!') - self._E0 = v - - -class SplineFluxModel(FluxModel, metaclass=abc.ABCMeta): - """Abstract base class for all flux models that are represented - numerically through photospline splinetables in .fits format - - dN/(dEdAdt) = Phi0 * f(E), - - where Phi0 is the relative flux normalization (dimensionless) - and f(E) represents the photospline in units of - [energy]^-1 [length]^-2 [time]^-1 i.e. the energy dependence of the flux. - - The unit of dN/(dEdAdt) is [energy]^-1 [length]^-2 [time]^-1. - By default the unit is GeV^-1 cm^-2 s^-1. - - Outside of support [crit_log_nu_energy_lower, crit_log_nu_energy_upper] the - flux will be set to 0. - - Attributes - ---------- - Phi0 : float - Flux normalization relative to model prediction. - psp_table : object - The photospline.SplineTable object. - crit_log_nu_energy_lower : float - Lower end of energy range (support) of spline flux. - crit_log_nu_energy_upper : float - Upper end of energy range (support) of spline flux. - """ - def __init__(self, Phi0, psp_table, crit_log_nu_energy_lower, crit_log_nu_energy_upper): - super(SplineFluxModel, self).__init__() - self._psp_table = psp_table - self._Phi0 = Phi0 - self._crit_log_nu_energy_lower = crit_log_nu_energy_lower - self._crit_log_nu_energy_upper = crit_log_nu_energy_upper - - @property - def psp_table(self): - """The photospline.SplineTable object that describes the neutrino flux - as function of neutrino energy via B-spline interpolation. - """ - return self._psp_table - @psp_table.setter - def psp_table(self, t): - self._psp_table = t - - @property - def Phi0(self): - """The relative flux normalization. Phi0=1 corresponds to the nominal - model flux. - """ - return self._Phi0 - @Phi0.setter - def Phi0(self, v): - v = float_cast(v, 'Property Phi0 must be castable to type float!') - self._Phi0 = v - - @property - def crit_log_nu_energy_lower(self): - """The lower bound of the support of the spline interpolator. - """ - return self._crit_log_nu_energy_lower - @crit_log_nu_energy_lower.setter - def crit_log_nu_energy_lower(self, v): - v = float_cast( - v, 'Property crit_log_nu_energy_lower must be castable to type float!') - self._crit_log_nu_energy_lower = v - - @property - def crit_log_nu_energy_upper(self): - """The upper bound of the support of the spline interpolator. - """ - return self._crit_log_nu_energy_upper - @crit_log_nu_energy_upper.setter - def crit_log_nu_energy_upper(self, v): - v = float_cast( - v, 'Property crit_log_nu_energy_upper must be castable to type float!') - self._crit_log_nu_energy_upper = v - - -class SeyfertCoreCoronaFlux(SplineFluxModel): - """Implements the Core-Corona Seyfert Galaxy neutrino flux model of - A. Kheirandish et al., Astrophys.J. 922 (2021) 45 by means of B-spline - interpolation. - - Attributes - ---------- - Phi0 : float - Flux normalization relative to model prediction. - log_xray_lumin : float - log10 of intrinsic x-ray luminosity of source in 2-10 keV band. - psp_table : object - photospline.SplineTable object - crit_log_nu_energy_lower : float - Lower end of energy range (support) of spline flux. - crit_log_nu_energy_upper : float - Upper end of energy range (support) of spline flux. - src_dist : float - Distance to source in units of Mpc. - lumin_scale : float - A relative flux scaling factor. Can correct cases when the model - calculation has a different normalization from what is desired. - crit_log_energy_flux : float - The spline is parameterized in log10(flux). This value determines - when the flux should be considered 0. - """ - def __init__( - self, psp_table, log_xray_lumin, src_dist, Phi0, - lumin_scale=1.0, - crit_log_energy_flux=-50, - crit_log_nu_energy_lower=2.0, - crit_log_nu_energy_upper=7.0): - - super(SeyfertCoreCoronaFlux, self).__init__( - Phi0, psp_table, crit_log_nu_energy_lower, crit_log_nu_energy_upper) - - self._lumin_scale = lumin_scale - self._crit_log_energy_flux = crit_log_energy_flux - self._src_dist = src_dist - self._log_xray_lumin = log_xray_lumin - - @property - def log_xray_lumin(self): - """The log10 of the intrinsic source luminosity in 2-10keV x-ray band. - """ - return self._log_xray_lumin - @log_xray_lumin.setter - def log_xray_lumin(self, v): - v = float_cast( - v, 'Property log_xray_lumin must be castable to type float!') - self._log_xray_lumin = v - - @property - def lumin_scale(self): - """Relative factor for model flux normalization correction. - """ - return self._lumin_scale - @lumin_scale.setter - def lumin_scale(self, v): - v = float_cast( - v, 'Property lumin_scale must be castable to type float!') - self._lumin_scale = v - - @property - def src_dist(self): - """The distance to the source in units of Mpc. - """ - return self._src_dist - @src_dist.setter - def src_dist(self, v): - v = float_cast( - v, 'Property src_dist must be castable to type float!') - self._src_dist = v - - @property - def crit_log_energy_flux(self): - """Defines critical log energy when the flux is considered to be 0. - """ - return self._crit_log_energy_flux - @crit_log_energy_flux.setter - def crit_log_energy_flux(self, v): - v = float_cast( - v, 'Property crit_log_energy_flux must be castable to type float!') - self._crit_log_energy_flux = v - - @property - def math_function_str(self): - return ( - f'dN/dE = {self.Phi0:.2f} * {self.lumin_scale:.2f} ' - f'* 10^(log10(f(E)) - 2*log10(E) - 2*log10({self.src_dist:.2f}), ' - f'with log_xray_lumin={self.log_xray_lumin:.2f}' - ) - - def __call__(self, E): - """The flux value dN/dE at energy E. - - Parameters - ---------- - E : float | 1D ndarray of float - Evaluation energy [GeV] - - Returns - ------- - flux : float | 1D ndarray of float - Flux at energy E in units of GeV^-1 cm^-2 s^-1. - """ - - log_enu = np.log10(E) - log_energy_flux = self.psp_table.evaluate_simple([log_enu]) - - # Convert energy flux to particle flux accounting for source distance. - flux = 10**(log_energy_flux - 2.0*log_enu - 2.0*np.log10(self.src_dist)) - - # Have to take care of very small fluxes (set to 0 beyond critical - # energy or below the critical flux). - out_of_bounds1 = log_energy_flux < self.crit_log_energy_flux - out_of_bounds2 = np.logical_or(log_enu < self.crit_log_nu_energy_lower, - log_enu > self.crit_log_nu_energy_upper) - flux[np.logical_or(out_of_bounds1, out_of_bounds2)] = 0 - - return self.Phi0 * self.lumin_scale * flux - - def __deepcopy__(self, memo): - """The photospline.SplineTable objects are strictly immutable. - Hence no copy should be required, ever! - """ - return SeyfertCoreCoronaFlux( - self.psp_table, self.log_xray_lumin, self.src_dist, self.Phi0, - self.lumin_scale, self.crit_log_energy_flux, - self.crit_log_nu_energy_lower, self.crit_log_nu_energy_upper - ) - - def __hash__(self): - """We use hash in - `skyllh.core.source_hypothesis.get_fluxmodel_to_source_mapping()` for - mapping fluxes to KDE PDFs. Seyfert model KDEs only depend on the - `log_xray_lumin` parameter. - """ - hash_arg = (self.log_xray_lumin,) - return hash(hash_arg) - - def __eq__(self, other): - return ( - self.__class__ == other.__class__ and - self.log_xray_lumin == other.log_xray_lumin - ) - - -class PowerLawFlux(NormedFluxModel): - """Power law flux of the form - - dN/(dEdAdt) = Phi0 * (E / E0)^(-gamma) - - The unit of dN/(dEdAdt) is [energy]^-1 [length]^-2 [time]^-1. - By default the unit is GeV^-1 cm^-2 s^-1. - """ - def __init__(self, Phi0, E0, gamma): - """Creates a new power law flux object. - - Parameters - ---------- - Phi0 : float - Flux value (dN/(dEdAdt)) at E0 in unit - [energy]^-1 [length]^-2 [time]^-1. - By default that is GeV^-1 cm^-2 s^-1. - E0 : float - Normalization energy. - gamma : float - Spectral index - """ - super(PowerLawFlux, self).__init__(Phi0, E0) - self.gamma = gamma - - @property - def gamma(self): - return self._gamma - @gamma.setter - def gamma(self, v): - v = float_cast(v, 'Property gamma must be castable to type float!') - self._gamma = v - - @property - def math_function_str(self): - return "dN/dE = %.2e * (E / %.2e %s)^-%.2f" \ - % (self.Phi0, self.E0, self.energy_unit, self.gamma) - - def __call__(self, E): - """The flux value dN/dE at energy E. - - Parameters - ---------- - E : float | 1d numpy.ndarray of float - Evaluation energy [GeV] - - Returns - ------- - flux : float | 1d ndarray of float - Flux at energy E in unit [energy]^-1 [length]^-2 [time]^-1. - By default in GeV^-1 cm^-2 s^-1. - """ - flux = self.Phi0 * np.power(E / self.E0, -self.gamma) - return flux - - def get_integral(self, E_min, E_max): - """Returns the integral value of the flux between the given energy - range. - - Parameters - ---------- - E_min : float | 1d numpy ndarray of float - The lower energy bound of the integration. - E_max : float | 1d numpy ndarray of float - The upper energy bound of the integration. - - Returns - ------- - integral : float | 1d ndarray of float - The integral value(s) of the given integral range(s). - """ - gamma = self.gamma - - # Handle special case for gamma = 1. - if(gamma == 1): - integral = self.Phi0 * self.E0 * ( - np.log(np.abs(E_max)) - np.log(np.abs(E_min))) - return integral - - integral = (self.Phi0 / ((1.-gamma)*np.power(self.E0, -gamma)) * - (np.power(E_max, 1.-gamma) - np.power(E_min, 1.-gamma))) - - return integral - - def get_inv_normed_cdf(self, x, E_min, E_max): - """Calculates the inverse cumulative distribution function value for - each given value of x, which is a number between 0 and 1. - - Parameters - ---------- - x : float | 1d numpy ndarray of float - The argument value(s) of the inverse cumulative distribution - function. Must be between 0 and 1. - E_min : float - The lower energy edge of the flux to be considered. - E_max : float - The upper energy edge of the flux to be considered. - - Returns - ------- - inv_normed_cdf : float | 1d numpy ndarray - The energy value(s) from the inverse normed cumulative distribution - function. - """ - gamma = self.gamma - - if(gamma == 1): - N_0 = np.log(E_max / E_min) - inv_normed_cdf = E_min * np.exp(x * N_0) - return inv_normed_cdf - - N_0 = E_max ** (1. - gamma) - E_min ** (1. - gamma) - inv_normed_cdf = np.power( - x * N_0 + E_min**(1. - gamma), - (1. / (1. - gamma))) - - return inv_normed_cdf - - -class CutoffPowerLawFlux(PowerLawFlux): - """Cut-off power law flux of the form - - dN/(dEdAdt) = Phi0 * (E / E0)^(-gamma) * exp(-E/Ecut) - - The unit of dN/(dEdAdt) is [energy]^-1 [length]^-2 [time]^-1. - By default the unit is GeV^-1 cm^-2 s^-1. - """ - def __init__(self, Phi0, E0, gamma, Ecut): - """Creates a new cut-off power law flux object. - - Parameters - ---------- - Phi0 : float - Flux value (dN/(dEdAdt)) at E0 in unit - [energy]^-1 [length]^-2 [time]^-1. By default the unit is - GeV^-1 cm^-2 s^-1. - E0 : float - Normalization energy [GeV] - gamma : float - Spectral index - Ecut : float - Cut-off energy [GeV] - """ - super(CutoffPowerLawFlux, self).__init__(Phi0, E0, gamma) - self.Ecut = Ecut - - @property - def Ecut(self): - return self._Ecut - @Ecut.setter - def Ecut(self, val): - val = float_cast(val, 'Property val must be castable to type float!') - self._Ecut = val - - @property - def math_function_str(self): - return super(CutoffPowerLawFlux, self).math_function_str + ' * exp(-E / %.2e %s)'%(self.Ecut, self.energy_unit) - - def __call__(self, E): - """The flux value dN/(dEdAdt) at energy E. - - Parameters - ---------- - E : float | 1d numpy.ndarray of float - Evaluation energy. - - Returns - ------- - flux : float | 1d ndarray of float - Flux at energy E in unit [energy]^-1 [length]^-2 [time]^-1. - By default that is GeV^-1 cm^-2 s^-1. - """ - flux = super(CutoffPowerLawFlux, self).__call__(E) * np.exp(-E / self.Ecut) - return flux - - -class LogParabolaPowerLawFlux(NormedFluxModel): - """Power law flux with an index which varies as a log parabola in energy of - the form - - dN/(dEdAdt) = Phi0 * (E / E0)^(-(alpha + beta*log(E / E0))) - - The unit of dN/(dEdAdt) is [energy]^-1 [length]^-2 [time]^-1. - By default the unit is GeV^-1 cm^-2 s^-1. - """ - def __init__(self, Phi0, E0, alpha, beta): - super(LogParabolaPowerLawFlux, self).__init__(Phi0, E0) - self.alpha = alpha - self.beta = beta - - @property - def alpha(self): - return self._alpha - @alpha.setter - def alpha(self, v): - v = float_cast(v, 'Property alpha must be castable to type float!') - self._alpha = v - - @property - def beta(self): - return self._beta - @beta.setter - def beta(self, v): - v = float_cast(v, 'Property beta must be castable to type float!') - self._beta = v - - @property - def math_function_str(self): - return 'dN/dE = %.2e * (E / %.2e %s)^(-(%.2e + %.2e * log(E / %.2e %s)))'%(self.Phi0, self.E0, self.energy_unit, self.alpha, self.beta, self.E0, self.energy_unit) - - def __call__(self, E): - """The flux value dN/(dEdAdt) at energy E. - - Parameters - ---------- - E : float | 1d numpy.ndarray of float - The evaluation energy. - - Returns - ------- - flux : float | 1d ndarray of float - Flux at energy E in unit [energy]^-1 [length]^-2 [time]^-1. - By default that is GeV^-1 cm^-2 s^-1. - """ - flux = self.Phi0 * np.power(E / self.E0, -self.alpha - self.beta * np.log(E / self.E0)) - return flux diff --git a/skyllh/physics/flux_model.py b/skyllh/physics/flux_model.py deleted file mode 100644 index 2f9eae1935..0000000000 --- a/skyllh/physics/flux_model.py +++ /dev/null @@ -1,1568 +0,0 @@ -# -*- coding: utf-8 -*- - -r"""The `flux_model` module contains classes for different flux models. The -class for the most generic flux model is `FluxModel`, which is an abstract base -class. It describes a mathematical function for the differential flux: - -.. math:: - - d^4\Phi_S(\alpha,\delta,E,t | \vec{x}_s,\vec{p}_s) / (dA d\Omega dE dt) -""" - -from __future__ import division - -import abc -import numpy as np -import scipy.stats - -from astropy import units -from copy import deepcopy - -from skyllh.core.config import CFG -from skyllh.core.math import MathFunction -from skyllh.core.model import Model -from skyllh.core.py import ( - classname, - isproperty, - issequence, - issequenceof, - float_cast -) - - -class FluxProfile(MathFunction, metaclass=abc.ABCMeta): - """The abstract base class for a flux profile math function. - """ - - def __init__(self): - super(FluxProfile, self).__init__() - - -class SpatialFluxProfile(FluxProfile, metaclass=abc.ABCMeta): - """The abstract base class for a spatial flux profile function. - """ - - def __init__( - self, angle_unit=None): - """Creates a new SpatialFluxProfile instance. - - Parameters - ---------- - angle_unit : instance of astropy.units.UnitBase | None - The used unit for angles. - If set to ``Ǹone``, the configured default angle unit for fluxes is - used. - """ - super(SpatialFluxProfile, self).__init__() - - self.angle_unit = angle_unit - - @property - def angle_unit(self): - """The set unit of angle used for this spatial flux profile. - If set to ``Ǹone`` the configured default angle unit for fluxes is used. - """ - return self._angle_unit - @angle_unit.setter - def angle_unit(self, unit): - if(unit is None): - unit = CFG['units']['defaults']['fluxes']['angle'] - if(not isinstance(unit, units.UnitBase)): - raise TypeError('The property angle_unit must be of type ' - 'astropy.units.UnitBase!') - self._angle_unit = unit - - @abc.abstractmethod - def __call__(self, alpha, delta, unit=None): - """This method is supposed to return the spatial profile value for the - given celestrial coordinates. - - Parameters - ---------- - alpha : float | 1d numpy ndarray of float - The right-ascention coordinate. - delta : float | 1d numpy ndarray of float - The declination coordinate. - unit : instance of astropy.units.UnitBase | None - The unit of the given celestrial angles. - If ``None``, the set angle unit of this SpatialFluxProfile is - assumed. - - Returns - ------- - values : 1D numpy ndarray - The spatial profile values. - """ - pass - - -class UnitySpatialFluxProfile(SpatialFluxProfile): - """Spatial flux profile for the constant profile function 1 for any spatial - coordinates. - """ - def __init__(self, angle_unit=None): - """Creates a new UnitySpatialFluxProfile instance. - - Parameters - ---------- - angle_unit : instance of astropy.units.UnitBase | None - The used unit for angles. - If set to ``Ǹone``, the configured default angle unit for fluxes is - used. - """ - super(UnitySpatialFluxProfile, self).__init__( - angle_unit=angle_unit) - - @property - def math_function_str(self): - return '1' - - def __call__(self, alpha, delta, unit=None): - """Returns 1 as numpy ndarray in same shape as alpha and delta. - - Parameters - ---------- - alpha : float | 1d numpy ndarray of float - The right-ascention coordinate. - delta : float | 1d numpy ndarray of float - The declination coordinate. - unit : instance of astropy.units.UnitBase | None - The unit of the given celestrial angles. - By the definition of this class this argument is ignored. - - Returns - ------- - values : 1D numpy ndarray - 1 in same shape as alpha and delta. - """ - (alpha, delta) = np.atleast_1d(alpha, delta) - if(alpha.shape != delta.shape): - raise ValueError('The alpha and delta arguments must be of the ' - 'same shape!') - - return np.ones_like(alpha) - - -class PointSpatialFluxProfile(SpatialFluxProfile): - """Spatial flux profile for a delta function at the celestrical coordinate - (alpha_s, delta_s). - """ - def __init__(self, alpha_s, delta_s, angle_unit=None): - """Creates a new spatial flux profile for a point. - - Parameters - ---------- - alpha_s : float - The right-ascention of the point-like source. - delta_s : float - The declination of the point-like source. - angle_unit : instance of astropy.units.UnitBase | None - The used unit for angles. - If set to ``Ǹone``, the configured default angle unit for fluxes is - used. - """ - super(PointSpatialFluxProfile, self).__init__( - angle_unit=angle_unit) - - self.alpha_s = alpha_s - self.delta_s = delta_s - - # Define the names of the parameters, which can be updated. - self.param_names = ('alpha_s', 'delta_s') - - @property - def alpha_s(self): - """The right-ascention of the point-like source. - The unit is the set angle unit of this SpatialFluxProfile instance. - """ - return self._alpha_s - @alpha_s.setter - def alpha_s(self, v): - v = float_cast(v, - 'The alpha_s property must be castable to type float!') - self._alpha_s = v - - @property - def delta_s(self): - """The declination of the point-like source. - The unit is the set angle unit of this SpatialFluxProfile instance. - """ - return self._delta_s - @delta_s.setter - def delta_s(self, v): - v = float_cast(v, - 'The delta_s property must be castable to type float!') - self._delta_s = v - - @property - def math_function_str(self): - """(read-only) The string representation of the mathematical function of - this spatial flux profile instance. - """ - return 'delta(alpha-%g%s)*delta(delta-%g%s)'%( - self._alpha_s, self._angle_unit.to_string(), self._delta_s, - self._angle_unit.to_string()) - - def __call__(self, alpha, delta, unit=None): - """Returns a numpy ndarray in same shape as alpha and delta with 1 if - `alpha` equals `alpha_s` and `delta` equals `delta_s`, and 0 otherwise. - - Parameters - ---------- - alpha : float | 1d numpy ndarray of float - The right-ascention coordinate at which to evaluate the spatial flux - profile. The unit must be the internally used angle unit. - delta : float | 1d numpy ndarray of float - The declination coordinate at which to evaluate the spatial flux - profile. The unit must be the internally used angle unit. - unit : instance of astropy.units.UnitBase | None - The unit of the given celestrial angles. - If set to ``None``, the set angle unit of this SpatialFluxProfile - instance is assumed. - - Returns - ------- - value : 1D numpy ndarray of int8 - A numpy ndarray in same shape as alpha and delta with 1 if `alpha` - equals `alpha_s` and `delta` equals `delta_s`, and 0 otherwise. - """ - (alpha, delta) = np.atleast_1d(alpha, delta) - if(alpha.shape != delta.shape): - raise ValueError('The alpha and delta arguments must be of the ' - 'same shape!') - - if((unit is not None) and (unit != self._angle_unit)): - angle_unit_conv_factor = unit.to(self._angle_unit) - alpha = alpha * angle_unit_conv_factor - delta = delta * angle_unit_conv_factor - - value = ((alpha == self._alpha_s) & - (delta == self._delta_s)).astype(np.int8, copy=False) - - return value - - -class EnergyFluxProfile(FluxProfile, metaclass=abc.ABCMeta): - """The abstract base class for an energy flux profile function. - """ - - def __init__(self, energy_unit=None): - """Creates a new energy flux profile with a given energy unit to be used - for flux calculation. - - Parameters - ---------- - energy_unit : instance of astropy.units.UnitBase | None - The used unit for energy. - If set to ``None``, the configured default energy unit for fluxes is - used. - """ - super(EnergyFluxProfile, self).__init__() - - # Set the energy unit. - self.energy_unit = energy_unit - - @property - def energy_unit(self): - """The unit of energy used for the flux profile calculation. - """ - return self._energy_unit - @energy_unit.setter - def energy_unit(self, unit): - if(unit is None): - unit = CFG['units']['defaults']['fluxes']['energy'] - if(not isinstance(unit, units.UnitBase)): - raise TypeError('The property energy_unit must be of type ' - 'astropy.units.UnitBase!') - self._energy_unit = unit - - @abc.abstractmethod - def __call__(self, E, unit=None): - """This method is supposed to return the energy profile value for the - given energy value. - - Parameters - ---------- - E : float | 1d numpy ndarray of float - The energy value for which to retrieve the energy profile value. - unit : instance of astropy.units.UnitBase | None - The unit of the given energy. - If set to ``None``, the set energy unit of this EnergyFluxProfile - is assumed. - - Returns - ------- - values : 1D numpy ndarray of float - The energy profile values for the given energies. - """ - pass - - -class UnityEnergyFluxProfile(EnergyFluxProfile): - """Energy flux profile for the constant function 1. - """ - def __init__(self, energy_unit=None): - """Creates a new UnityEnergyFluxProfile instance. - - Parameters - ---------- - energy_unit : instance of astropy.units.UnitBase | None - The used unit for energy. - If set to ``None``, the configured default energy unit for fluxes is - used. - """ - super(UnityEnergyFluxProfile, self).__init__( - energy_unit=energy_unit) - - @property - def math_function_str(self): - """The string representation of the mathematical function of this energy - flux profile. - """ - return '1' - - def __call__(self, E, unit=None): - """Returns 1 as numpy ndarray in some shape as E. - - Parameters - ---------- - E : float | 1D numpy ndarray of float - The energy value for which to retrieve the energy profile value. - unit : instance of astropy.units.UnitBase | None - The unit of the given energies. - By definition of this specific class, this argument is ignored. - - Returns - ------- - values : 1D numpy ndarray of int8 - 1 in same shape as E. - """ - E = np.atleast_1d(E) - - values = np.ones_like(E, dtype=np.int8) - - return values - - -class PowerLawEnergyFluxProfile(EnergyFluxProfile): - """Energy flux profile for a power law profile with a reference energy - ``E0`` and a spectral index ``gamma``. - - .. math:: - (E / E_0)^{-\gamma} - """ - def __init__(self, E0, gamma, energy_unit=None): - """Creates a new power law flux profile with the reference energy ``E0`` - and spectral index ``gamma``. - - Parameters - ---------- - E0 : castable to float - The reference energy. - gamma : castable to float - The spectral index. - energy_unit : instance of astropy.units.UnitBase | None - The used unit for energy. - If set to ``None``, the configured default energy unit for fluxes is - used. - """ - super(PowerLawEnergyFluxProfile, self).__init__( - energy_unit=energy_unit) - - self.E0 = E0 - self.gamma = gamma - - # Define the parameters which can be set via the `set_parameters` - # method. - self.parameter_names = ('E0', 'gamma',) - - @property - def E0(self): - """The reference energy in the set energy unit of this EnergyFluxProfile - instance. - """ - return self._E0 - @E0.setter - def E0(self, v): - v = float_cast(v, - 'Property E0 must be castable to type float!') - self._E0 = v - - @property - def gamma(self): - """The spectral index. - """ - return self._gamma - @gamma.setter - def gamma(self, v): - v = float_cast(v, - 'Property gamma must be castable to type float!') - self._gamma = v - - @property - def math_function_str(self): - """The string representation of this EnergyFluxProfile instance. - """ - return '(E / (%g %s))^-%g'%(self._E0, self._energy_unit, self._gamma) - - def __call__(self, E, unit=None): - """Returns the power law values for the given energies as numpy ndarray - in same shape as E. - - Parameters - ---------- - E : float | 1D numpy ndarray of float - The energy value for which to retrieve the energy profile value. - unit : instance of astropy.units.UnitBase | None - The unit of the given energies. - If set to ``None``, the set energy unit of this EnergyFluxProfile - instance is assumed. - - Returns - ------- - values : 1D numpy ndarray of float - The energy profile values for the given energies. - """ - E = np.atleast_1d(E) - - if((unit is not None) and (unit != self._energy_unit)): - energy_unit_conv_factor = unit.to(self._energy_unit) - E = E * energy_unit_conv_factor - - value = np.power(E / self._E0, -self._gamma) - - return value - - -class TimeFluxProfile(FluxProfile, metaclass=abc.ABCMeta): - """The abstract base class for a time flux profile function. - """ - - def __init__(self, t_start=-np.inf, t_end=np.inf, time_unit=None): - """Creates a new time flux profile instance. - - Parameters - ---------- - t_start : float - The start time of the time profile. - If set to -inf, it means, that the profile starts at the beginning - of the entire time-span of the dataset. - t_end : float - The end time of the time profile. - If set to +inf, it means, that the profile ends at the end of the - entire time-span of the dataset. - time_unit : instance of astropy.units.UnitBase | None - The used unit for time. - If set to ``None``, the configured default time unit for fluxes is - used. - """ - super(TimeFluxProfile, self).__init__() - - self.time_unit = time_unit - - self.t_start = t_start - self.t_end = t_end - - # Define the parameters which can be set via the `set_parameters` - # method. - self.parameter_names = ('t_start', 't_end') - - @property - def t_start(self): - """The start time of the time profile. Can be -inf which means, that - the profile starts at the beginning of the entire dataset. - """ - return self._t_start - @t_start.setter - def t_start(self, t): - t = float_cast(t, - 'The t_start property must be castable to type float!') - self._t_start = t - - @property - def t_end(self): - """The end time of the time profile. Can be +inf which means, that - the profile ends at the end of the entire dataset. - """ - return self._t_end - @t_end.setter - def t_end(self, t): - t = float_cast(t, - 'The t_end property must be castable to type float!') - self._t_end = t - - @property - def duration(self): - """(read-only) The duration of the time profile. - """ - return self._t_end - self._t_start - - @property - def time_unit(self): - """The unit of time used for the flux profile calculation. - """ - return self._time_unit - @time_unit.setter - def time_unit(self, unit): - if(unit is None): - unit = CFG['units']['defaults']['fluxes']['time'] - if(not isinstance(unit, units.UnitBase)): - raise TypeError('The property time_unit must be of type ' - 'astropy.units.UnitBase!') - self._time_unit = unit - - def get_total_integral(self): - """Calculates the total integral of the time profile from t_start to - t_end. - - Returns - ------- - integral : float - The integral value of the entire time profile. - The value is in the set time unit of this TimeFluxProfile instance. - """ - integral = self.get_integral(self._t_start, self._t_end) - - return integral - - @abc.abstractmethod - def __call__(self, t, unit=None): - """This method is supposed to return the time profile value for the - given times. - - Parameters - ---------- - t : float | 1D numpy ndarray of float - The time(s) for which to get the time flux profile values. - unit : instance of astropy.units.UnitBase | None - The unit of the given times. - If set to ``None``, the set time unit of this TimeFluxProfile - instance is assumed. - - Returns - ------- - values : 1D numpy ndarray of float - The time profile values. - """ - pass - - @abc.abstractmethod - def move(self, dt, unit=None): - """Abstract method to move the time profile by the given amount of time. - - Parameters - ---------- - dt : float - The time difference of how far to move the time profile in time. - This can be a positive or negative time shift value. - unit : instance of astropy.units.UnitBase | None - The unit of the given time difference. - If set to ``Ǹone``, the set time unit of this TimeFluxProfile - instance is assumed. - """ - pass - - @abc.abstractmethod - def get_integral(self, t1, t2, unit=None): - """This method is supposed to calculate the integral of the time profile - from time ``t1`` to time ``t2``. - - Parameters - ---------- - t1 : float | array of float - The start time of the integration. - t2 : float | array of float - The end time of the integration. - unit : instance of astropy.units.UnitBase | None - The unit of the given times. - If set to ``Ǹone``, the set time unit of this TimeFluxProfile - instance is assumed. - - Returns - ------- - integral : array of float - The integral value(s) of the time profile. The values are in the - set time unit of this TimeFluxProfile instance. - """ - pass - - -class UnityTimeFluxProfile(TimeFluxProfile): - """Time flux profile for the constant profile function ``1``. - """ - def __init__(self, time_unit=None): - super(UnityTimeFluxProfile, self).__init__( - time_unit=time_unit) - - @property - def math_function_str(self): - return '1' - - def __call__(self, t, unit=None): - """Returns 1 as numpy ndarray in same shape as t. - - Parameters - ---------- - t : float | 1D numpy ndarray of float - The time(s) for which to get the time flux profile values. - unit : instance of astropy.units.UnitBase | None - The unit of the given times. - By definition of this specific class, this argument is ignored. - - Returns - ------- - values : 1D numpy ndarray of int8 - 1 in same shape as ``t``. - """ - t = np.atleast_1d(t) - - values = np.ones_like(t, dtype=np.int8) - - return values - - def move(self, dt, unit=None): - """Moves the time profile by the given amount of time. By definition - this method does nothing, because the profile is 1 over the entire - dataset time range. - - Parameters - ---------- - dt : float - The time difference of how far to move the time profile in time. - This can be a positive or negative time shift value. - unit : instance of astropy.units.UnitBase | None - The unit of the given time difference. - If set to ``None``, the set time unit of this TimeFluxProfile - instance is assumed. - """ - pass - - def get_integral(self, t1, t2, unit=None): - """Calculates the integral of the time profile from time t1 to time t2. - - Parameters - ---------- - t1 : float | array of float - The start time of the integration. - t2 : float | array of float - The end time of the integration. - unit : instance of astropy.units.UnitBase | None - The unit of the given times. - If set to ``None``, the set time unit of this TimeFluxProfile - instance is assumed. - - Returns - ------- - integral : array of float - The integral value(s) of the time profile. The values are in the - set time unit of this TimeFluxProfile instance. - """ - if((unit is not None) and (unit != self._time_unit)): - time_unit_conv_factor = unit.to(self._time_unit) - t1 = t1 * time_unit_conv_factor - t2 = t2 * time_unit_conv_factor - - integral = t2 - t1 - - return integral - - -class BoxTimeFluxProfile(TimeFluxProfile): - """This class describes a box-shaped time flux profile. - It has the following parameters: - - t0 : float - The mid time of the box profile. - tw : float - The width of the box profile. - - The box is centered at ``t0`` and extends to +/-``tw``/2 around ``t0``. - """ - def __init__(self, t0, tw, time_unit=None): - """Creates a new box-shaped time profile instance. - - Parameters - ---------- - t0 : float - The mid time of the box profile. - tw : float - The width of the box profile. - time_unit : instance of astropy.units.UnitBase | None - The used unit for time. - If set to ``None``, the configured default time unit for fluxes is - used. - """ - t_start = t0 - tw/2. - t_end = t0 + tw/2. - - super(BoxTimeFluxProfile, self).__init__( - t_start=t_start, t_end=t_end, time_unit=time_unit) - - # Define the parameters which can be set via the `set_parameters` - # method. - self.parameter_names = ('t0', 'tw') - - @property - def t0(self): - """The time of the mid point of the box. - The value is in the set time unit of this TimeFluxProfile instance. - """ - return 0.5*(self._t_start + self._t_end) - @t0.setter - def t0(self, t): - old_t0 = self.t0 - dt = t - old_t0 - self.move(dt) - - @property - def tw(self): - """The time width of the box. - The value is in the set time unit of this TimeFluxProfile instance. - """ - return self._t_end - self._t_start - @tw.setter - def tw(self, w): - t0 = self.t0 - self._t_start = t0 - 0.5*w - self._t_end = t0 + 0.5*w - - @property - def math_function_str(self): - t0 = self.t0 - tw = self.tw - return '1 for t in [%g-%g/2; %g+%g/2], 0 otherwise'%( - t0, tw, t0, tw) - - def __call__(self, t, unit=None): - """Returns 1 for all t within the interval [t0-tw/2; t0+tw/2], and 0 - otherwise. - - Parameters - ---------- - t : float | 1D numpy ndarray of float - The time(s) for which to get the time flux profile values. - unit : instance of astropy.units.UnitBase | None - The unit of the given times. - If set to ``None``, the set time unit of this TimeFluxProfile - instance is assumed. - - Returns - ------- - values : 1D numpy ndarray of int8 - The value(s) of the time flux profile for the given time(s). - """ - t = np.atleast_1d(t) - - if((unit is not None) and (unit != self._time_unit)): - time_unit_conv_factor = unit.to(self._time_unit) - t = t * time_unit_conv_factor - - values = np.zeros((t.shape[0],), dtype=np.int8) - m = (t >= self._t_start) & (t <= self._t_end) - values[m] = 1 - - return values - - def move(self, dt, unit=None): - """Moves the box-shaped time profile by the time difference dt. - - Parameters - ---------- - dt : float - The time difference of how far to move the time profile in time. - This can be a positive or negative time shift value. - unit : instance of astropy.units.UnitBase | None - The unit of ``dt``. - If set to ``None``, the set time unit of this TimeFluxProfile - instance is assumed. - """ - if((unit is not None) and (unit != self._time_unit)): - dt = dt * unit.to(self._time_unit) - - self._t_start += dt - self._t_end += dt - - def get_integral(self, t1, t2, unit=None): - """Calculates the integral of the box-shaped time flux profile from - time t1 to time t2. - - Parameters - ---------- - t1 : float | array of float - The start time(s) of the integration. - t2 : float | array of float - The end time(s) of the integration. - unit : instance of astropy.units.UnitBase | None - The unit of the given times. - If set to ``None``, the set time unit of this TimeFluxProfile - instance is assumed. - - Returns - ------- - integral : array of float - The integral value(s). The values are in the set time unit of this - TimeFluxProfile instance. - """ - t1 = np.atleast_1d(t1) - t2 = np.atleast_1d(t2) - - if((unit is not None) and (unit != self._time_unit)): - time_unit_conv_factor = unit.to(self._time_unit) - t1 = t1 * time_unit_conv_factor - t2 = t2 * time_unit_conv_factor - - integral = np.zeros((t1.shape[0],), dtype=np.float64) - - m = (t2 >= self._t_start) & (t1 <= self._t_end) - N = np.count_nonzero(m) - - t1 = np.max(np.vstack((t1[m], np.repeat(self._t_start, N))).T, axis=1) - t2 = np.min(np.vstack((t2[m], np.repeat(self._t_end, N))).T, axis=1) - - integral[m] = t2 - t1 - - return integral - - -class GaussianTimeFluxProfile(TimeFluxProfile): - """This class describes a gaussian-shaped time flux profile. - It has the following parameters: - - t0 : float - The mid time of the gaussian profile. - sigma_t : float - The one-sigma width of the gaussian profile. - """ - def __init__(self, t0, sigma_t, tol=1e-12, time_unit=None): - """Creates a new gaussian-shaped time profile instance. - - Parameters - ---------- - t0 : float - The mid time of the gaussian profile. - sigma_t : float - The one-sigma width of the gaussian profile. - tol : float - The tolerance of the gaussian value. This defines the start and end - time of the gaussian profile. - time_unit : instance of astropy.units.UnitBase | None - The used unit for time. - If set to ``None``, the configured default time unit for fluxes is - used. - """ - # Calculate the start and end time of the gaussian profile, such that - # at those times the gaussian values obey the given tolerance. - dt = np.sqrt(-2*sigma_t*sigma_t*np.log(np.sqrt(2*np.pi)*sigma_t*tol)) - t_start = t0 - dt - t_end = t0 + dt - - # A Gaussian profile extends to +/- infinity by definition. - super(GaussianTimeFluxProfile, self).__init__( - t_start=t_start, t_end=t_end, time_unit=time_unit) - - self.t0 = t0 - self.sigma_t = sigma_t - - # Define the parameters which can be set via the `set_parameters` - # method. - self.parameter_names = ('t0', 'sigma_t') - - @property - def t0(self): - """The time of the mid point of the gaussian profile. - The unit of the value is the set time unit of this TimeFluxProfile - instance. - """ - return 0.5*(self._t_start + self._t_end) - @t0.setter - def t0(self, t): - t = float_cast(t, - 'The t0 property must be castable to type float!') - old_t0 = self.t0 - dt = t - old_t0 - self.move(dt) - - @property - def sigma_t(self): - """The one-sigma width of the gaussian profile. - The unit of the value is the set time unit of this TimeFluxProfile - instance. - """ - return self._sigma_t - @sigma_t.setter - def sigma_t(self, sigma): - sigma = float_cast(sigma, - 'The sigma property must be castable to type float!') - self._sigma_t = sigma - - def __call__(self, t, unit=None): - """Returns the gaussian profile value for the given time ``t``. - - Parameters - ---------- - t : float | 1D numpy ndarray of float - The time(s) for which to get the time flux profile values. - unit : instance of astropy.units.UnitBase | None - The unit of the given times. - If set to ``None``, the set time unit of this TimeFluxProfile is - assumed. - - Returns - ------- - values : 1D numpy ndarray of float - The value(s) of the time flux profile for the given time(s). - """ - t = np.atleast_1d(t) - - if((unit is not None) and (unit != self._time_unit)): - time_unit_conv_factor = unit.to(self._time_unit) - t = t * time_unit_conv_factor - - s = self._sigma_t - twossq = 2*s*s - t0 = 0.5*(self._t_end + self._t_start) - dt = t - t0 - - values = 1/(np.sqrt(np.pi*twossq)) * np.exp(-dt*dt/twossq) - - return values - - def move(self, dt, unit=None): - """Moves the gaussian time profile by the given amount of time. - - Parameters - ---------- - dt : float - The time difference of how far to move the time profile in time. - This can be a positive or negative time shift value. - unit : instance of astropy.units.UnitBase | None - The unit of the given time difference. - If set to ``None``, the set time unit of this TimeFluxProfile is - assumed. - """ - if((unit is not None) and (unit != self._time_unit)): - dt = dt * unit.to(self._time_unit) - - self._t_start += dt - self._t_end += dt - - def get_integral(self, t1, t2, unit=None): - """Calculates the integral of the gaussian time profile from time ``t1`` - to time ``t2``. - - Parameters - ---------- - t1 : float | array of float - The start time(s) of the integration. - t2 : float | array of float - The end time(s) of the integration. - unit : instance of astropy.units.UnitBase | None - The unit of the given times. - If set to ``None``, the set time unit of this TimeFluxProfile - instance is assumed. - - Returns - ------- - integral : array of float - The integral value(s). The values are in the set time unit of - this TimeFluxProfile instance. - """ - if((unit is not None) and (unit != self._time_unit)): - time_unit_conv_factor = unit.to(self._time_unit) - t1 = t1 * time_unit_conv_factor - t2 = t2 * time_unit_conv_factor - - t0 = 0.5*(self._t_end + self._t_start) - sigma_t = self._sigma_t - - c1 = scipy.stats.norm.cdf(t1, loc=t0, scale=sigma_t) - c2 = scipy.stats.norm.cdf(t2, loc=t0, scale=sigma_t) - - integral = c2 - c1 - - return integral - - -class FluxModel(MathFunction, Model, metaclass=abc.ABCMeta): - r"""Abstract base class for all flux models - :math:`\Phi_S(\alpha,\delta,E,t | \vec{x}_s,\vec{p}_s)`. - - This base class defines the units used for the flux calculation. The unit - of the flux is ([angle]^{-2} [energy]^{-1} [length]^{-2} [time]^{-1}). - - At this point the functional form of the flux model is not yet defined. - """ - - def __init__( - self, angle_unit=None, energy_unit=None, length_unit=None, - time_unit=None, **kwargs): - """Creates a new FluxModel instance and defines the user-defined units. - - Parameters - ---------- - angle_unit : instance of astropy.units.UnitBase | None - The used unit for angles. - If set to ``None``, the configured default angle unit for fluxes is - used. - energy_unit : instance of astropy.units.UnitBase | None - The used unit for energy. - If set to ``None``, the configured default energy unit for fluxes is - used. - length_unit : instance of astropy.units.UnitBase | None - The used unit for length. - If set to ``None``, the configured default length unit for fluxes is - used. - time_unit : instance of astropy.units.UnitBase | None - The used unit for time. - If set to ``None``, the configured default time unit for fluxes is - used. - """ - super(FluxModel, self).__init__(**kwargs) - - # Define the units. - self.angle_unit = angle_unit - self.energy_unit = energy_unit - self.length_unit = length_unit - self.time_unit = time_unit - - @property - def angle_unit(self): - """The unit of angle used for the flux calculation. - """ - return self._angle_unit - @angle_unit.setter - def angle_unit(self, unit): - if(unit is None): - unit = CFG['units']['defaults']['fluxes']['angle'] - if(not isinstance(unit, units.UnitBase)): - raise TypeError('The property angle_unit must be of type ' - 'astropy.units.UnitBase!') - self._angle_unit = unit - - @property - def energy_unit(self): - """The unit of energy used for the flux calculation. - """ - return self._energy_unit - @energy_unit.setter - def energy_unit(self, unit): - if(unit is None): - unit = CFG['units']['defaults']['fluxes']['energy'] - if(not isinstance(unit, units.UnitBase)): - raise TypeError('The property energy_unit must be of type ' - 'astropy.units.UnitBase!') - self._energy_unit = unit - - @property - def length_unit(self): - """The unit of length used for the flux calculation. - """ - return self._length_unit - @length_unit.setter - def length_unit(self, unit): - if(unit is None): - unit = CFG['units']['defaults']['fluxes']['length'] - if(not isinstance(unit, units.UnitBase)): - raise TypeError('The property length_unit must be of type ' - 'astropy.units.UnitBase!') - self._length_unit = unit - - @property - def time_unit(self): - """The unit of time used for the flux calculation. - """ - return self._time_unit - @time_unit.setter - def time_unit(self, unit): - if(unit is None): - unit = CFG['units']['defaults']['fluxes']['time'] - if(not isinstance(unit, units.UnitBase)): - raise TypeError('The property time_unit must be of type ' - 'astropy.units.UnitBase!') - self._time_unit = unit - - @property - def unit_str(self): - """The string representation of the flux unit. - """ - return '1/(%s %s %s^2 %s)'%( - self.energy_unit.to_string(), (self.angle_unit**2).to_string(), - self.length_unit.to_string(), self.time_unit.to_string()) - - @property - def unit_latex_str(self): - """The latex string representation of the flux unit. - """ - return r'%s$^{-1}$ %s$^{-1}$ %s$^{-2}$ %s$^{-1}$'%( - self.energy_unit.to_string(), (self.angle_unit**2).to_string(), - self.length_unit.to_string(), self.time_unit.to_string()) - - def __str__(self): - """Pretty string representation of this class. - """ - return self.math_function_str + ' ' + self.unit_str - - @abc.abstractmethod - def __call__( - self, alpha, delta, E, t, - angle_unit=None, energy_unit=None, time_unit=None): - """The call operator to retrieve a flux value for a given celestrial - position, energy, and observation time. - - Parameters - ---------- - alpha : float | (Ncoord,)-shaped 1D numpy ndarray of float - The right-ascention coordinate for which to retrieve the flux value. - delta : float | (Ncoord,)-shaped 1D numpy ndarray of float - The declination coordinate for which to retrieve the flux value. - E : float | (Nenergy,)-shaped 1D numpy ndarray of float - The energy for which to retrieve the flux value. - t : float | (Ntime,)-shaped 1D numpy ndarray of float - The observation time for which to retrieve the flux value. - angle_unit : instance of astropy.units.UnitBase | None - The unit of the given angles. - If ``None``, the set angle unit of the flux model is assumed. - energy_unit : instance of astropy.units.UnitBase | None - The unit of the given energies. - If ``None``, the set energy unit of the flux model is assumed. - time_unit : instance of astropy.units.UnitBase | None - The unit of the given times. - If ``None``, the set time unit of the flux model is assumed. - - Returns - ------- - flux : (Ncoord,Nenergy,Ntime)-shaped ndarray of float - The flux values are in unit of the set flux model units - [energy]^{-1} [angle]^{-2} [length]^{-2} [time]^{-1}. - """ - pass - - -class FactorizedFluxModel(FluxModel): - r"""This class describes a flux model where the spatial, energy, and time - profiles of the source factorize. That means the flux can be written as: - - .. math:: - - \Phi_S(\alpha,\delta,E,t | \vec{x}_s,\vec{p}_s) = - \Phi_0 - \Psi_{\mathrm{S}}(\alpha,\delta|\vec{p}_s) - \epsilon_{\mathrm{S}}(E|\vec{p}_s) - T_{\mathrm{S}}(t|\vec{p}_s) - - where, :math:`\Phi_0` is the normalization constant of the flux, and - :math:`\Psi_{\mathrm{S}}`, :math:`\epsilon_{\mathrm{S}}`, and - :math:`T_{\mathrm{S}}` are the spatial, energy, and time profiles of the - flux, respectively. - """ - def __init__( - self, Phi0, spatial_profile, energy_profile, time_profile, - length_unit=None, **kwargs): - """Creates a new factorized flux model. - - Parameters - ---------- - Phi0 : float - The flux normalization constant. - spatial_profile : SpatialFluxProfile instance | None - The SpatialFluxProfile instance providing the spatial profile - function of the flux. - If set to None, an instance of UnitySpatialFluxProfile will be used, - which represents the constant function 1. - energy_profile : EnergyFluxProfile instance | None - The EnergyFluxProfile instance providing the energy profile - function of the flux. - If set to None, an instance of UnityEnergyFluxProfile will be used, - which represents the constant function 1. - time_profile : TimeFluxProfile instance | None - The TimeFluxProfile instance providing the time profile function - of the flux. - If set to None, an instance of UnityTimeFluxProfile will be used, - which represents the constant function 1. - length_unit : instance of astropy.units.UnitBase | None - The used unit for length. - If set to ``None``, the configured default length unit for fluxes is - used. - """ - self.Phi0 = Phi0 - self.spatial_profile = spatial_profile - self.energy_profile = energy_profile - self.time_profile = time_profile - - # The base class will set the default (internally used) flux unit, which - # will be set automatically to the particular profile. - super(FactorizedFluxModel, self).__init__( - angle_unit=spatial_profile.angle_unit, - energy_unit=energy_profile.energy_unit, - time_unit=time_profile.time_unit, - length_unit=length_unit, - **kwargs - ) - - # Define the parameters which can be set via the `set_parameters` - # method. - self.parameter_names = ('Phi0',) - - @property - def Phi0(self): - """The flux normalization constant. - The unit of this normalization constant is - ([angle]^{-2} [energy]^{-1} [length]^{-2} [time]^{-1}). - """ - return self._Phi0 - @Phi0.setter - def Phi0(self, v): - v = float_cast(v, - 'The Phi0 property must be castable to type float!') - self._Phi0 = v - - @property - def spatial_profile(self): - """Instance of SpatialFluxProfile describing the spatial profile of the - flux. - """ - return self._spatial_profile - @spatial_profile.setter - def spatial_profile(self, profile): - if(profile is None): - profile = UnitySpatialFluxProfile() - if(not isinstance(profile, SpatialFluxProfile)): - raise TypeError('The spatial_profile property must be None, or an ' - 'instance of SpatialFluxProfile!') - self._spatial_profile = profile - - @property - def energy_profile(self): - """Instance of EnergyFluxProfile describing the energy profile of the - flux. - """ - return self._energy_profile - @energy_profile.setter - def energy_profile(self, profile): - if(profile is None): - profile = UnityEnergyFluxProfile() - if(not isinstance(profile, EnergyFluxProfile)): - raise TypeError('The energy_profile property must be None, or an ' - 'instance of EnergyFluxProfile!') - self._energy_profile = profile - - @property - def time_profile(self): - """Instance of TimeFluxProfile describing the time profile of the flux. - """ - return self._time_profile - @time_profile.setter - def time_profile(self, profile): - if(profile is None): - profile = UnityTimeFluxProfile() - if(not isinstance(profile, TimeFluxProfile)): - raise TypeError('The time_profile property must be None, or an ' - 'instance of TimeFluxProfile!') - self._time_profile = profile - - @property - def math_function_str(self): - """The string showing the mathematical function of the flux. - """ - return '%.3e * %s * %s * %s * %s'%( - self._Phi0, - self.unit_str, - self._spatial_profile.math_function_str, - self._energy_profile.math_function_str, - self._time_profile.math_function_str - ) - - @property - def angle_unit(self): - """The unit of angle used for the flux calculation. The unit is - taken and set from and to the set spatial flux profile, respectively. - """ - return self._spatial_profile.angle_unit - @angle_unit.setter - def angle_unit(self, unit): - self._spatial_profile.angle_unit = unit - - @property - def energy_unit(self): - """The unit of energy used for the flux calculation. The unit is - taken and set from and to the set energy flux profile, respectively. - """ - return self._energy_profile.energy_unit - @energy_unit.setter - def energy_unit(self, unit): - self._energy_profile.energy_unit = unit - - @property - def time_unit(self): - """The unit of time used for the flux calculation. The unit is - taken and set from and to the set time flux profile, respectively. - """ - return self._time_profile.time_unit - @time_unit.setter - def time_unit(self, unit): - self._time_profile.time_unit = unit - - @property - def parameter_names(self): - """The tuple holding the names of the math function's parameters. This - is the total set of parameter names for all flux profiles of this - FactorizedFluxModel instance. - """ - pnames = list(self._parameter_names) - pnames += self._spatial_profile.parameter_names - pnames += self._energy_profile.parameter_names - pnames += self._time_profile.parameter_names - - return tuple(pnames) - @parameter_names.setter - def parameter_names(self, names): - super(FactorizedFluxModel, self.__class__).parameter_names.fset(self, names) - - def __call__( - self, alpha, delta, E, t, - angle_unit=None, energy_unit=None, time_unit=None): - """Calculates the flux values for the given celestrial positions, - energies, and observation times. - - Parameters - ---------- - alpha : float | (Ncoord,)-shaped 1d numpy ndarray of float - The right-ascention coordinate for which to retrieve the flux value. - delta : float | (Ncoord,)-shaped 1d numpy ndarray of float - The declination coordinate for which to retrieve the flux value. - E : float | (Nenergy,)-shaped 1d numpy ndarray of float - The energy for which to retrieve the flux value. - t : float | (Ntime,)-shaped 1d numpy ndarray of float - The observation time for which to retrieve the flux value. - angle_unit : instance of astropy.units.UnitBase | None - The unit of the given angles. - If ``None``, the set angle unit of the spatial flux profile is - assumed. - energy_unit : instance of astropy.units.UnitBase | None - The unit of the given energies. - If ``None``, the set energy unit of the energy flux profile is - assumed. - time_unit : instance of astropy.units.UnitBase | None - The unit of the given times. - If ``None``, the set time unit of the time flux profile is - assumed. - - Returns - ------- - flux : (Ncoord,Nenergy,Ntime)-shaped ndarray of float - The flux values are in unit - [energy]^{-1} [angle]^{-2} [length]^{-2} [time]^{-1}. - """ - spatial_profile_values = self._spatial_profile( - alpha, delta, unit=angle_unit) - energy_profile_values = self._energy_profile( - E, unit=energy_unit) - time_profile_values = self._time_profile( - t, unit=time_unit) - - flux = ( - self._Phi0 * - spatial_profile_values[:,np.newaxis,np.newaxis] * - energy_profile_values[np.newaxis,:,np.newaxis] * - time_profile_values[np.newaxis,np.newaxis,:] - ) - - return flux - - def set_parameters(self, pdict): - """Sets the parameters of the flux model. For this factorized flux model - it means that it sets the parameters of the spatial, energy, and time - profiles. - - Parameters - ---------- - pdict : dict - The flux parameter dictionary. - - Returns - ------- - updated : bool - Flag if parameter values were actually updated. - """ - updated = False - - updated |= super(FactorizedFluxModel, self).set_parameters(pdict) - - updated |= self._spatial_profile.set_parameters(pdict) - updated |= self._energy_profile.set_parameters(pdict) - updated |= self._time_profile.set_parameters(pdict) - - return updated - - -class IsPointlikeSource(object): - """This is a classifier class that can be used by other classes to indicate - that the specific class describes a point-like source. - """ - def __init__( - self, ra_func_instance=None, get_ra_func=None, set_ra_func=None, - dec_func_instance=None, get_dec_func=None, set_dec_func=None, - **kwargs): - """Constructor method. Gets called when the an instance of a class is - created which derives from this IsPointlikeSource class. - - - """ - super(IsPointlikeSource, self).__init__(**kwargs) - - self._ra_func_instance = ra_func_instance - self._get_ra_func = get_ra_func - self._set_ra_func = set_ra_func - - self._dec_func_instance = dec_func_instance - self._get_dec_func = get_dec_func - self._set_dec_func = set_dec_func - - @property - def ra(self): - """The right-ascention coordinate of the point-like source. - """ - return self._get_ra_func(self._ra_func_instance) - @ra.setter - def ra(self, v): - self._set_ra_func(self._ra_func_instance, v) - - @property - def dec(self): - """The declination coordinate of the point-like source. - """ - return self._get_dec_func(self._dec_func_instance) - @dec.setter - def dec(self, v): - self._set_dec_func(self._dec_func_instance, v) - - -class PointlikeSourceFFM(FactorizedFluxModel, IsPointlikeSource): - """This class describes a factorized flux model (FFM), where the spatial - profile is modeled as a point. This class provides the base class for a flux - model of a point-like source. - """ - def __init__( - self, alpha_s, delta_s, Phi0, energy_profile, time_profile, - angle_unit=None, length_unit=None): - """Creates a new factorized flux model for a point-like source. - - Parameters - ---------- - alpha_s : float - The right-ascention of the point-like source. - delta_s : float - The declination of the point-like source. - Phi0 : float - The flux normalization constant in unit of flux. - energy_profile : EnergyFluxProfile instance | None - The EnergyFluxProfile instance providing the energy profile - function of the flux. - If set to None, an instance of UnityEnergyFluxProfile will be used, - which represents the constant function 1. - time_profile : TimeFluxProfile instance | None - The TimeFluxProfile instance providing the time profile function - of the flux. - If set to None, an instance of UnityTimeFluxProfile will be used, - which represents the constant function 1. - angle_unit : instance of astropy.units.UnitBase | None - The unit for angles used for the flux unit. - If set to ``None``, the configured internal angle unit is used. - length_unit : instance of astropy.units.UnitBase | None - The unit for length used for the flux unit. - If set to ``None``, the configured internal length unit is used. - """ - spatial_profile=PointSpatialFluxProfile( - alpha_s, delta_s, angle_unit=angle_unit) - - super(PointlikeSourceFFM, self).__init__( - Phi0=Phi0, - spatial_profile=spatial_profile, - energy_profile=energy_profile, - time_profile=time_profile, - length_unit=length_unit, - ra_func_instance=spatial_profile, - get_ra_func=spatial_profile.__class__.alpha_s.fget, - set_ra_func=spatial_profile.__class__.alpha_s.fset, - dec_func_instance=spatial_profile, - get_dec_func=spatial_profile.__class__.delta_s.fget, - set_dec_func=spatial_profile.__class__.delta_s.fset - ) - - -class SteadyPointlikeSourceFFM(PointlikeSourceFFM): - """This class describes a factorized flux model (FFM), where the spatial - profile is modeled as a point and the time profile as constant 1. It is - derived from the ``PointlikeSourceFFM`` class. - """ - def __init__( - self, alpha_s, delta_s, Phi0, energy_profile, - angle_unit=None, length_unit=None, time_unit=None): - """Creates a new factorized flux model for a point-like source with no - time dependance. - - Parameters - ---------- - alpha_s : float - The right-ascention of the point-like source. - delta_s : float - The declination of the point-like source. - Phi0 : float - The flux normalization constant. - energy_profile : EnergyFluxProfile instance | None - The EnergyFluxProfile instance providing the energy profile - function of the flux. - If set to None, an instance of UnityEnergyFluxProfile will be used, - which represents the constant function 1. - """ - super(SteadyPointlikeSourceFFM, self).__init__( - alpha_s=alpha_s, - delta_s=delta_s, - Phi0=Phi0, - energy_profile=energy_profile, - time_profile=UnityTimeFluxProfile(time_unit=time_unit), - angle_unit=angle_unit, - length_unit=length_unit - ) - - def __call__( - self, alpha, delta, E, - angle_unit=None, energy_unit=None): - """Calculates the flux values for the given celestrial positions, and - energies. - - Parameters - ---------- - alpha : float | (Ncoord,)-shaped 1d numpy ndarray of float - The right-ascention coordinate for which to retrieve the flux value. - delta : float | (Ncoord,)-shaped 1d numpy ndarray of float - The declination coordinate for which to retrieve the flux value. - E : float | (Nenergy,)-shaped 1d numpy ndarray of float - The energy for which to retrieve the flux value. - angle_unit : instance of astropy.units.UnitBase | None - The unit of the given angles. - If ``None``, the set angle unit of the spatial flux profile is - assumed. - energy_unit : instance of astropy.units.UnitBase | None - The unit of the given energies. - If ``None``, the set energy unit of the energy flux profile is - assumed. - - Returns - ------- - flux : (Ncoord,Nenergy)-shaped ndarray of float - The flux values are in unit - [energy]^{-1} [angle]^{-2} [length]^{-2} [time]^{-1}. - """ - spatial_profile_values = self._spatial_profile( - alpha, delta, unit=angle_unit) - energy_profile_values = self._energy_profile( - E, unit=energy_unit) - - flux = ( - self._Phi0 * - spatial_profile_values[:,np.newaxis] * - energy_profile_values[np.newaxis,:] - ) - - return flux diff --git a/skyllh/physics/model.py b/skyllh/physics/model.py deleted file mode 100644 index c01116cd34..0000000000 --- a/skyllh/physics/model.py +++ /dev/null @@ -1,127 +0,0 @@ -# -*- coding: utf-8 -*- - -"""This module defines the base classes for any physics models used by SkyLLH. -""" - -from skyllh.core.py import ( - ObjectCollection, - issequence, - str_cast -) - - -class PhysicsModel(object): - """This class provides a base class for all physics models like source - models or background models. - """ - def __init__(self, name=None): - """Creates a new PhysicsModel instance. - - Parameters - ---------- - name : str | None - The name of the physics model. If set to `None`, the id of the - object is taken as name. - """ - super(PhysicsModel, self).__init__() - - if(name is None): - name = self.id - - self.name = name - - @property - def name(self): - """The name of the physics model. - """ - return self._name - @name.setter - def name(self, n): - n = str_cast(n, 'The name property must be castable to type str!') - self._name = n - - @property - def id(self): - """(read-only) The ID of the physics model. It's an integer generated - with Python's `id` function. Hence, it's related to the memory address - of the object. - """ - return id(self) - - -class PhysicsModelCollection(ObjectCollection): - """This class describes a collection of PhysicsModel instances. It can be - used to group several physics models into a single object. - """ - @staticmethod - def cast(obj, errmsg): - """Casts the given object to a PhysicsModelCollection object. - If the cast fails, a TypeError with the given error message is raised. - - Parameters - ---------- - obj : PhysicsModel instance | sequence of PhysicsModel instances | - PhysicsModelCollection | None - The object that should be casted to PhysicsModelCollection. - errmsg : str - The error message if the cast fails. - - Raises - ------ - TypeError - If the cast fails. - - Returns - ------- - physmodelcollection : instance of PhysicsModelCollection - The created PhysicsModelCollection instance. If `obj` is already a - PhysicsModelCollection instance, it will be returned. - """ - if(obj is None): - obj = PhysicsModelCollection(PhysicsModel, None) - return obj - - if(isinstance(obj, PhysicsModel)): - obj = PhysicsModelCollection(PhysicsModel, [obj]) - return obj - - if(isinstance(obj, PhysicsModelCollection)): - return obj - - if(issequence(obj)): - obj = PhysicsModelCollection(PhysicsModel, obj) - return obj - - raise TypeError(errmsg) - - def __init__(self, model_type=None, models=None): - """Creates a new PhysicsModel collection. The type of the physics model - instances the collection holds can be restricted, by setting the - model_type parameter. - - Parameters - ---------- - model_type : type | None - The type of the physics model. If set to None (default), - PhysicsModel will be used. - models : sequence of model_type instances | None - The sequence of physics models this collection should be initalized - with. - """ - if(model_type is None): - model_type = PhysicsModel - super(PhysicsModelCollection, self).__init__( - obj_type=model_type, - obj_list=models) - - @property - def model_type(self): - """(read-only) The type of the physics model. - """ - return self.obj_type - - @property - def models(self): - """(read-only) The list of models of type `model_type`. - """ - return self.objects diff --git a/skyllh/physics/source.py b/skyllh/physics/source.py deleted file mode 100644 index 52b1aa3742..0000000000 --- a/skyllh/physics/source.py +++ /dev/null @@ -1,302 +0,0 @@ -# -*- coding: utf-8 -*- - -"""The ``source`` module contains the base class ``SourceModel`` for modelling a -source in the sky. What kind of properties this source has is modeled by a -derived class. The most common one is the PointLikeSource source model for a -point-like source at a given location in the sky with a given flux model. -""" - -import numpy as np - -from skyllh.core.py import ( - ObjectCollection, - classname, - float_cast, - issequence -) - - -class SourceLocation(object): - """Stores the location of a source, i.e. right-ascention and declination. - """ - def __init__(self, ra, dec): - self.ra = ra - self.dec = dec - - @property - def ra(self): - """The right-ascention angle in radian of the source position. - """ - return self._ra - @ra.setter - def ra(self, v): - v = float_cast(v, 'The ra property must be castable to type float!') - self._ra = v - - @property - def dec(self): - """The declination angle in radian of the source position. - """ - return self._dec - @dec.setter - def dec(self, v): - v = float_cast(v, 'The dec property must be castable to type float!') - self._dec = v - -class SourceWeights(object): - """Stores the relative weights of a source, i.e. weights and gradients. - There are two weights that should be included. one is the detector weight, - which is declination dependent, and the other is a hypothesis weight, and that - is provided by the user. - """ - def __init__(self, src_w=None, src_w_grad=None, src_w_W=None): - self.src_w = src_w - self.src_w_grad = src_w_grad - self.src_w_W = src_w_W - @property - def src_w(self): - """The relative weight of the source(s). - """ - return self._src_w - @src_w.setter - def src_w(self, v): - v = float_cast(v, 'The src_w property must be castable to type float!') - self._src_w = v - - @property - def src_w_grad(self): - """The relative weight gradients of the source(s). - """ - return self._src_w_grad - @src_w_grad.setter - def src_w_grad(self, v): - v = float_cast(v, 'The src_w_grad property must be castable to type float!') - self._src_w_grad = v - - @property - def src_w_W(self): - """The hypothesis weight of the source(s). - """ - return self._src_w_W - @src_w_W.setter - def src_w_W(self, v): - v = float_cast(v, 'The src_w_W property must be castable to type float!') - self._src_w_W = v - - -class SourceModel(object): - """The base class for all source models in Skyllh. Each source has a central - location given by a right-ascention and declination location. - """ - def __init__(self, ra, dec, src_w=None, src_w_grad=None, src_w_W=None): - self.loc = SourceLocation(ra, dec) - src_w = np.ones_like(self.loc.ra, dtype=np.float64) - src_w_grad = np.zeros_like(self.loc.ra, dtype=np.float64) - - if (src_w_W is None): - src_w_W = np.ones_like(self.loc.ra, dtype=np.float64) - - self.weight = SourceWeights(src_w, src_w_grad, src_w_W) - - @property - def loc(self): - """The location of the source. - """ - return self._loc - @loc.setter - def loc(self, srcloc): - if(not isinstance(srcloc, SourceLocation)): - raise TypeError('The loc property must be an instance of SourceLocation!') - self._loc = srcloc - - @property - def weight(self): - """The weight of the source. - """ - return self._weight - @weight.setter - def weight(self, w_src): - if(not isinstance(w_src, SourceWeights)): - raise TypeError('The weight property must be an instance of SourceWeights!') - self._weight = w_src - - @property - def id(self): - """(read-only) The ID of the source. It's an integer generated with the - id() function. Hence, it's related to the memory address of the object. - """ - return id(self) - - -class SourceCollection(ObjectCollection): - """This class describes a collection of sources. It can be used to group - sources into a single object, for instance for a stacking analysis. - """ - @staticmethod - def cast(obj, errmsg): - """Casts the given object to a SourceCollection object. If the cast - fails, a TypeError with the given error message is raised. - - Parameters - ---------- - obj : SourceModel | sequence of SourceModel | SourceCollection - The object that should be casted to SourceCollection. - errmsg : str - The error message if the cast fails. - - Raises - ------ - TypeError - If the cast fails. - """ - if(isinstance(obj, SourceModel)): - obj = SourceCollection(SourceModel, [obj]) - if(not isinstance(obj, SourceCollection)): - if(issequence(obj)): - obj = SourceCollection(SourceModel, obj) - else: - raise TypeError(errmsg) - return obj - - def __init__(self, source_type=None, sources=None): - """Creates a new source collection. - - Parameters - ---------- - source_type : type | None - The type of the source. If set to None (default), SourceModel will - be used. - sources : sequence of source_type instances | None - The sequence of sources this collection should be initalized with. - """ - if(source_type is None): - source_type = SourceModel - super(SourceCollection, self).__init__(sources, obj_type=source_type) - - @property - def source_type(self): - """(read-only) The type of the source model. - """ - return self.obj_type - - @property - def sources(self): - """(read-only) The list of sources of type ``source_type``. - """ - return self.objects - - -class Catalog(SourceCollection): - """This class describes a catalog of sources. It is derived from - SourceCollection. A catalog has a name. - """ - def __init__(self, name, source_type=None, sources=None): - """Creates a new source catalog. - - Parameters - ---------- - name : str - The name of the catalog. - source_type : type | None - The type of the source. If set to None (default), the default type - defined by SourceCollection will be used. - sources : sequence of source_type | None - The sequence of sources this catalog should be initalized with. - """ - super(Catalog, self).__init__(source_type=source_type, sources=sources) - self.name = name - - @property - def name(self): - """The name of the catalog. - """ - return self._name - @name.setter - def name(self, name): - if(not isinstance(name, str)): - raise TypeError('The name property must be of type str!') - self._name = name - - def as_source_collection(self): - """Creates a SourceCollection object for this catalog and returns it. - """ - source_collection = SourceCollection(source_type=self.source_type, sources=self.sources) - return source_collection - - -class PointLikeSource(SourceModel): - """The PointLikeSource class is a source model for a point-like source - object in the sky at a given location (right-ascention and declination). - """ - def __init__(self, ra, dec, src_w=None, src_w_grad=None, src_w_W=None): - super(PointLikeSource, self).__init__(ra, dec, src_w, src_w_grad, src_w_W) - - @property - def ra(self): - """(read-only) The right-ascention angle in radian of the source - position. - This is a short-cut for `self.loc.ra`. - """ - return self._loc._ra - - @property - def dec(self): - """(read-only) The declination angle in radian of the source position. - This is a short-cut for `self.loc.dec`. - """ - return self._loc._dec - - def __str__(self): - """Pretty string representation of this class instance. - """ - s = classname(self) + ': { ra=%.3f deg, dec=%.3f deg }'%( - np.rad2deg(self.ra), np.rad2deg(self.dec)) - return s - - -class PointLikeSourceCollection(SourceCollection): - """Describes a collection of point-like sources. - """ - def __init__(self, sources=None): - """Creates a new collection of PointLikeSource objects. - - Parameters - ---------- - sources : sequence of PointLikeSource instances | None - The sequence of PointLikeSource objects this collection should be - initalized with. - """ - super(PointLikeSourceCollection, self).__init__( - source_type=PointLikeSource, sources=sources) - - @property - def ra(self): - """(read-only) The ndarray with the right-ascention of all the sources. - """ - return np.array([ src.ra for src in self ]) - - @property - def dec(self): - """(read-only) The ndarray with the declination of all the sources. - """ - return np.array([ src.dec for src in self ]) - - -class PointLikeSourceCatalog(Catalog): - """Describes a catalog of point-like sources. The difference to a - PointLikeSourceCollection is the additional properties of a catalog, e.g. - the name. - """ - def __init__(self, name, sources=None): - """Creates a new point source catalog of the given name. - - Parameters - ---------- - name : str - The name of the point-like source catalog. - sources : sequence of PointLikeSource instances | None - The sequence of PointLikeSource instances this catalog should be - initalized with. - """ - super(PointLikeSourceCatalog, self).__init__( - name=name, source_type=PointLikeSource, sources=sources) diff --git a/skyllh/physics/time_profile.py b/skyllh/physics/time_profile.py deleted file mode 100644 index 2984f09598..0000000000 --- a/skyllh/physics/time_profile.py +++ /dev/null @@ -1,295 +0,0 @@ -# -*- coding: utf-8 -*- - -from __future__ import division - -import abc -import numpy as np - -from skyllh.core.py import float_cast, classname - -class TimeProfileModel(object, metaclass=abc.ABCMeta): - """Abstract base class for an emission time profile of a source. - """ - - def __init__(self, t_start, t_end): - """Creates a new time profile instance. - - Parameters - ---------- - t_start : float - The MJD start time of the box profile. - t_end : float - The MJD end time of the box profile. - """ - super(TimeProfileModel, self).__init__() - - self.t_start = t_start - self.t_end = t_end - - @property - def t_start(self): - """The MJD start time of the box profile. - """ - return self._t_start - @t_start.setter - def t_start(self, t): - t = float_cast(t, - 'The t_start property must be castable to type float!' - ) - self._t_start = t - - @property - def t_end(self): - """The MJD end time of the box profile. - """ - return self._t_end - @t_end.setter - def t_end(self, t): - t = float_cast(t, - 'The t_end property must be castable to type float!' - ) - self._t_end = t - - @property - def duration(self): - """The duration (in days) of the time profile. - """ - return self._t_end - self._t_start - - @abc.abstractmethod - def move(self, dt): - """Abstract method to move the time profile by the given amount of time. - - Parameters - ---------- - dt : float - The MJD time difference of how far to move the time profile in time. - This can be a positive or negative time shift. - """ - pass - - @abc.abstractmethod - def update(self, fitparams): - """This method is supposed to update the time profile based on new - fit parameter values. This method should return a boolean flag if an - update was actually performed. - - Returns - ------- - updated : bool - Flag if the time profile actually got updated because of new fit - parameter values. - """ - pass - - @abc.abstractmethod - def get_integral(self, t1, t2): - """This method is supposed to calculate the integral of the time profile - from time t1 to time t2. - - Parameters - ---------- - t1 : float | array of float - The MJD start time of the integration. - t2 : float | array of float - The MJD end time of the integration. - - Returns - ------- - integral : array of float - The integral value(s) of the time profile. - """ - pass - - @abc.abstractmethod - def get_total_integral(self): - """This method is supposed to calculate the total integral of the time - profile from t_start to t_end. - - Returns - ------- - integral : float - The integral value of the entire time profile. - """ - pass - - @abc.abstractmethod - def get_value(self, t): - """Retrieves the value of the time profile at time t. - - Parameters - ---------- - t : float - The MJD time for which the time profile value should get retrieved. - - Returns - ------- - value : float - The time profile value at the given time. - """ - pass - - -class BoxTimeProfile(TimeProfileModel): - """The BoxTimeProfile describes a box-shaped emission time profile of a - source. It has the following fit parameters: - - T0 : float - The mid MJD time of the box profile. - Tw : float - The width (days) of the box profile. - """ - def __init__(self, T0, Tw): - """Creates a new box-shaped time profile instance. - - Parameters - ---------- - T0 : float - The mid MJD time of the box profile. - Tw : float - The width (days) of the box profile. - """ - t_start = T0 - Tw/2. - t_end = T0 + Tw/2. - - super(BoxTimeProfile, self).__init__(t_start, t_end) - - def move(self, dt): - """Moves the box-shaped time profile by the time difference dt. - - Parameters - ---------- - dt : float - The MJD time difference of how far to move the time profile in time. - This can be a positive or negative time shift. - """ - self._t_start += dt - self._t_end += dt - - @property - def T0(self): - """The time of the mid point of the box. - """ - return 0.5*(self._t_start + self._t_end) - @T0.setter - def T0(self, t): - old_T0 = self.T0 - dt = t - old_T0 - self.move(dt) - - @property - def Tw(self): - """The time width (in days) of the box. - """ - return self._t_end - self._t_start - @Tw.setter - def Tw(self, w): - T0 = self.T0 - self._t_start = T0 - 0.5*w - self._t_end = T0 + 0.5*w - - def __str__(self): - """Pretty string representation of the BoxTimeProfile class instance. - """ - s = '%s(T0=%.6f, Tw=%.6f)'%(classname(self), self.T0, self.Tw) - return s - - def update(self, fitparams): - """Updates the box-shaped time profile with the new fit parameter - values. - - Parameters - ---------- - fitparams : dict - The dictionary with the new fit parameter values. The key must be - the name of the fit parameter and the value the new parameter value. - - Returns - ------- - updated : bool - Flag if the time profile actually got updated because of new fit - parameter values. - """ - updated = False - - self_T0 = self.T0 - T0 = fitparams.get('T0', self_T0) - if(T0 != self_T0): - self.T0 = T0 - updated = True - - self_Tw = self.Tw - Tw = fitparams.get('Tw', self_Tw) - if(Tw != self_Tw): - self.Tw = Tw - updated = True - - return updated - - def get_integral(self, t1, t2): - """Calculates the integral of the box-shaped time profile from MJD time - t1 to time t2. - - Parameters - ---------- - t1 : float | array of float - The MJD start time(s) of the integration. - t2 : float | array of float - The MJD end time(s) of the integration. - - Returns - ------- - integral : array of float - The integral value(s). - """ - t1 = np.atleast_1d(t1) - t2 = np.atleast_1d(t2) - - integrals = np.zeros((t1.shape[0],), dtype=np.float64) - - m = (t2 > self._t_start) & (t1 < self._t_end) - N = np.count_nonzero(m) - - t1 = np.max(np.vstack((t1[m], np.repeat(self._t_start, N))).T, axis=1) - t2 = np.min(np.vstack((t2[m], np.repeat(self._t_end, N))).T, axis=1) - - f = 1./self.duration - - integrals[m] = f*(t2-t1) - - return integrals - - def get_total_integral(self): - """Calculates the the total integral of the box-shaped time profile - from t_start to t_end. By definition it is 1. - - Returns - ------- - integral : float - The integral value of the entire time profile. - """ - return 1. - - def get_value(self, t): - """Retrieves the value of the box-shaped time profile at time t. - For a box-shaped time profile the values are all equal to 1/duration - for times within the time duration and zero otherwise. - - Parameters - ---------- - t : float | array of float - The MJD time(s) for which the time profile value(s) should get - retrieved. - - Returns - ------- - values : array of float - The time profile value(s) at the given time(s). - """ - t = np.atleast_1d(t) - - values = np.zeros((t.shape[0],), dtype=np.float64) - m = (t >= self._t_start) & (t < self._t_end) - values[m] = 1./self.duration - - return values diff --git a/skyllh/plotting/core/pdfratio.py b/skyllh/plotting/core/pdfratio.py index c5157b3811..9200ca5e96 100644 --- a/skyllh/plotting/core/pdfratio.py +++ b/skyllh/plotting/core/pdfratio.py @@ -10,14 +10,13 @@ from matplotlib.colors import LogNorm from skyllh.core.py import classname -from skyllh.core.source_hypothesis import SourceHypoGroupManager from skyllh.core.storage import DataFieldRecordArray from skyllh.core.trialdata import TrialDataManager -from skyllh.core.pdfratio import SpatialSigOverBkgPDFRatio +from skyllh.core.pdfratio import SigOverBkgPDFRatio -class SpatialSigOverBkgPDFRatioPlotter(object): - """Plotter class to plot a SpatialSigOverBkgPDFRatio object. +class SigOverBkgPDFRatioPlotter(object): + """Plotter class to plot a SigOverBkgPDFRatio object. """ def __init__(self, tdm, pdfratio): """Creates a new plotter object for plotting a @@ -39,10 +38,13 @@ def pdfratio(self): """The PDF ratio object to plot. """ return self._pdfratio + @pdfratio.setter def pdfratio(self, pdfratio): - if(not isinstance(pdfratio, SpatialSigOverBkgPDFRatio)): - raise TypeError('The pdfratio property must be an object of instance SpatialSigOverBkgPDFRatio!') + if not isinstance(pdfratio, SigOverBkgPDFRatio): + raise TypeError( + 'The pdfratio property must be an instance of ' + 'SigOverBkgPDFRatio!') self._pdfratio = pdfratio @property @@ -50,15 +52,22 @@ def tdm(self): """The TrialDataManager that provides the data for the PDF evaluation. """ return self._tdm + @tdm.setter def tdm(self, obj): - if(not isinstance(obj, TrialDataManager)): - raise TypeError('The tdm property must be an instance of ' - 'TrialDataManager!') + if not isinstance(obj, TrialDataManager): + raise TypeError( + 'The tdm property must be an instance of TrialDataManager!') self._tdm = obj - def plot(self, src_hypo_group_manager, axes, source_idx=None, log=True, - **kwargs): + def plot( + self, + src_hypo_group_manager, + axes, + source_idx=None, + log=True, + **kwargs + ): """Plots the spatial PDF ratio. If the signal PDF depends on the source, source_idx specifies the index of the source for which the PDF should get plotted. @@ -86,18 +95,20 @@ def plot(self, src_hypo_group_manager, axes, source_idx=None, log=True, img : instance of mpl.AxesImage The AxesImage instance showing the PDF ratio image. """ - if(not isinstance(axes, Axes)): - raise TypeError('The axes argument must be an instance of matplotlib.axes.Axes!') + if not isinstance(axes, Axes): + raise TypeError( + 'The axes argument must be an instance of ' + 'matplotlib.axes.Axes!') - if(source_idx is None): + if source_idx is None: source_idx = 0 # Define the binning for ra, dec, and sin_dec. delta_ra_deg = 0.5 delta_dec_deg = 0.5 - raaxis = self._pdfratio.signalpdf.axes.get_axis('ra') - decaxis = self._pdfratio.signalpdf.axes.get_axis('dec') + raaxis = self._pdfratio.signalpdf.axes['ra'] + decaxis = self._pdfratio.signalpdf.axes['dec'] # Create a grid of ratio in right-ascention and declination and fill it # with PDF ratio values from events that fall into these bins. @@ -105,7 +116,7 @@ def plot(self, src_hypo_group_manager, axes, source_idx=None, log=True, rabins = int(np.ceil(raaxis.length / np.deg2rad(delta_ra_deg))) decbins = int(np.ceil(decaxis.length / np.deg2rad(delta_dec_deg))) - ratios = np.zeros((rabins,decbins), dtype=np.float64) + ratios = np.zeros((rabins, decbins), dtype=np.float64) ra_binedges = np.linspace(raaxis.vmin, raaxis.vmax, rabins+1) ra_bincenters = 0.5*(ra_binedges[:-1] + ra_binedges[1:]) @@ -117,11 +128,13 @@ def plot(self, src_hypo_group_manager, axes, source_idx=None, log=True, events = DataFieldRecordArray( np.zeros( (ratios.size,), - dtype=[('ira', np.int64), ('ra', np.float64), - ('idec', np.int64), ('dec', np.float64), + dtype=[('ira', np.int64), + ('ra', np.float64), + ('idec', np.int64), + ('dec', np.float64), ('sin_dec', np.float64), ('ang_err', np.float64)])) - for (i, ((ira,ra),(idec,dec))) in enumerate(itertools.product( + for (i, ((ira, ra), (idec, dec))) in enumerate(itertools.product( enumerate(ra_bincenters), enumerate(dec_bincenters))): events['ira'][i] = ira @@ -136,15 +149,16 @@ def plot(self, src_hypo_group_manager, axes, source_idx=None, log=True, event_ratios = self._pdfratio.get_ratio(self._tdm) # Select only the ratios for the requested source. - if(event_ratios.ndim == 2): + if event_ratios.ndim == 2: event_ratios = event_ratios[source_idx] - ratios[events['ira'],events['idec']] = event_ratios + ratios[events['ira'], events['idec']] = event_ratios (left, right, bottom, top) = (raaxis.vmin, raaxis.vmax, decaxis.vmin, decaxis.vmax) norm = LogNorm() if log else None - img = axes.imshow(ratios.T, + img = axes.imshow( + ratios.T, extent=(left, right, bottom, top), origin='lower', norm=norm, diff --git a/skyllh/plotting/core/signalpdf.py b/skyllh/plotting/core/signalpdf.py index 13cda84b9c..d7d6a9a9ed 100644 --- a/skyllh/plotting/core/signalpdf.py +++ b/skyllh/plotting/core/signalpdf.py @@ -6,20 +6,35 @@ from matplotlib.axes import Axes from matplotlib.colors import LogNorm -from skyllh.core.py import classname -from skyllh.core.source_hypothesis import SourceHypoGroupManager -from skyllh.core.storage import DataFieldRecordArray -from skyllh.core.trialdata import TrialDataManager from skyllh.core.pdf import ( IsSignalPDF, - SpatialPDF + SpatialPDF, +) +from skyllh.core.py import ( + classname, +) +from skyllh.core.source_hypo_grouping import ( + SourceHypoGroupManager, +) +from skyllh.core.storage import ( + DataFieldRecordArray, +) +from skyllh.core.trialdata import ( + TrialDataManager, ) -class SignalSpatialPDFPlotter(object): +class SignalSpatialPDFPlotter( + object, +): """Plotter class to plot spatial signal PDF object. """ - def __init__(self, tdm, pdf): + def __init__( + self, + tdm, + pdf, + **kwargs, + ): """Creates a new plotter object for plotting a spatial signal PDF object. @@ -31,6 +46,7 @@ def __init__(self, tdm, pdf): pdf : class instance derived from SpatialPDF and IsSignalPDF The PDF object to plot. """ + super().__init__(**kwargs) self.tdm = tdm self.pdf = pdf @@ -39,12 +55,15 @@ def pdf(self): """The PDF object to plot. """ return self._pdf + @pdf.setter def pdf(self, pdf): - if(not isinstance(pdf, SpatialPDF)): - raise TypeError('The pdf property must be an object of instance SpatialPDF!') - if(not isinstance(pdf, IsSignalPDF)): - raise TypeError('The pdf property must be an object of instance IsSignalPDF!') + if not isinstance(pdf, SpatialPDF): + raise TypeError( + 'The pdf property must be an object of instance SpatialPDF!') + if not isinstance(pdf, IsSignalPDF): + raise TypeError( + 'The pdf property must be an object of instance IsSignalPDF!') self._pdf = pdf @property @@ -52,15 +71,23 @@ def tdm(self): """The TrialDataManager that provides the data for the PDF evaluation. """ return self._tdm + @tdm.setter def tdm(self, obj): - if(not isinstance(obj, TrialDataManager)): - raise TypeError('The tdm property must be an instance of ' - 'TrialDataManager!') + if not isinstance(obj, TrialDataManager): + raise TypeError( + 'The tdm property must be an instance of TrialDataManager!') self._tdm = obj - def plot(self, src_hypo_group_manager, axes, source_idx=None, sin_dec=True, - log=True, **kwargs): + def plot( + self, + src_hypo_group_manager, + axes, + source_idx=None, + sin_dec=True, + log=True, + **kwargs, + ): """Plots the signal spatial PDF for the specified source. Parameters @@ -86,14 +113,16 @@ def plot(self, src_hypo_group_manager, axes, source_idx=None, sin_dec=True, img : instance of mpl.AxesImage The AxesImage instance showing the PDF ratio image. """ - if(not isinstance(src_hypo_group_manager, SourceHypoGroupManager)): - raise TypeError('The src_hypo_group_manager argument must be an ' + if not isinstance(src_hypo_group_manager, SourceHypoGroupManager): + raise TypeError( + 'The src_hypo_group_manager argument must be an ' 'instance of SourceHypoGroupManager!') - if(not isinstance(axes, Axes)): - raise TypeError('The axes argument must be an instance of ' + if not isinstance(axes, Axes): + raise TypeError( + 'The axes argument must be an instance of ' 'matplotlib.axes.Axes!') - if(source_idx is None): + if source_idx is None: source_idx = 0 # Define the binning for ra, dec, and sin_dec. @@ -106,13 +135,13 @@ def plot(self, src_hypo_group_manager, axes, source_idx=None, sin_dec=True, # Create a grid of signal probabilities in right-ascention and # declination/sin(declination) and fill it with probabilities from # events that fall into these bins. - raaxis = self.pdf.axes.get_axis('ra') + raaxis = self.pdf.axes['ra'] rabins = int(np.ceil(raaxis.length / np.deg2rad(delta_ra_deg))) ra_binedges = np.linspace(raaxis.vmin, raaxis.vmax, rabins+1) ra_bincenters = 0.5*(ra_binedges[:-1] + ra_binedges[1:]) - decaxis = self.pdf.axes.get_axis('dec') - if(sin_dec is True): + decaxis = self.pdf.axes['dec'] + if sin_dec is True: (dec_min, dec_max) = (np.sin(decaxis.vmin), np.sin(decaxis.vmax)) decbins = int(np.ceil((dec_max-dec_min) / delta_sin_dec)) else: @@ -121,20 +150,24 @@ def plot(self, src_hypo_group_manager, axes, source_idx=None, sin_dec=True, dec_binedges = np.linspace(dec_min, dec_max, decbins+1) dec_bincenters = 0.5*(dec_binedges[:-1] + dec_binedges[1:]) - probs = np.zeros((rabins,decbins), dtype=np.float64) + probs = np.zeros((rabins, decbins), dtype=np.float64) # Generate events that fall into the probability bins. - events = DataFieldRecordArray(np.zeros((probs.size,), - dtype=[('ira', np.int64), ('ra', np.float64), - ('idec', np.int64), ('dec', np.float64), - ('ang_err', np.float64)])) - for (i, ((ira,ra),(idec,dec))) in enumerate(itertools.product( + events = DataFieldRecordArray( + np.zeros( + (probs.size,), + dtype=[ + ('ira', np.int64), ('ra', np.float64), + ('idec', np.int64), ('dec', np.float64), + ('ang_err', np.float64) + ])) + for (i, ((ira, ra), (idec, dec))) in enumerate(itertools.product( enumerate(ra_bincenters), enumerate(dec_bincenters))): events['ira'][i] = ira events['ra'][i] = ra events['idec'][i] = idec - if(sin_dec is True): + if sin_dec is True: events['dec'][i] = np.arcsin(dec) else: events['dec'][i] = dec @@ -145,21 +178,26 @@ def plot(self, src_hypo_group_manager, axes, source_idx=None, sin_dec=True, event_probs = self._pdf.get_prob(self._tdm) # Select only the probabilities for the requested source. - if(event_probs.ndim == 2): + if event_probs.ndim == 2: event_probs = event_probs[source_idx] # Fill the probs grid array. - probs[events['ira'],events['idec']] = event_probs + probs[events['ira'], events['idec']] = event_probs (left, right, bottom, top) = (raaxis.vmin, raaxis.vmax, dec_min, dec_max) norm = None - if(log): + if log: norm = LogNorm() - img = axes.imshow(probs.T, extent=(left, right, bottom, top), - origin='lower', norm=norm, interpolation='none', **kwargs) + img = axes.imshow( + probs.T, + extent=(left, right, bottom, top), + origin='lower', + norm=norm, + interpolation='none', + **kwargs) axes.set_xlabel(raaxis.name) - if(sin_dec is True): + if sin_dec is True: axes.set_ylabel('sin('+decaxis.name+')') else: axes.set_ylabel(decaxis.name) diff --git a/skyllh/plotting/i3/backgroundpdf.py b/skyllh/plotting/i3/backgroundpdf.py index 8e9d490ede..7c5c6c0561 100644 --- a/skyllh/plotting/i3/backgroundpdf.py +++ b/skyllh/plotting/i3/backgroundpdf.py @@ -8,7 +8,9 @@ from matplotlib.colors import LogNorm from skyllh.core.py import classname -from skyllh.core.source_hypothesis import SourceHypoGroupManager +from skyllh.core.source_hypo_grouping import ( + SourceHypoGroupManager, +) from skyllh.core.storage import DataFieldRecordArray from skyllh.core.trialdata import TrialDataManager from skyllh.i3.backgroundpdf import BackgroundI3SpatialPDF @@ -98,7 +100,7 @@ def plot(self, src_hypo_group_manager, axes): for i in range(len(events)): pdfprobs[0,i] = event_probs[i] - ra_axis = self.pdf.axes.get_axis('ra') + ra_axis = self.pdf.axes['ra'] (left, right, bottom, top) = ( ra_axis.vmin, ra_axis.vmax, sin_dec_binning.lower_edge, sin_dec_binning.upper_edge) diff --git a/skyllh/plotting/i3/pdf.py b/skyllh/plotting/i3/pdf.py index 71e2fdd0d4..8039f15322 100644 --- a/skyllh/plotting/i3/pdf.py +++ b/skyllh/plotting/i3/pdf.py @@ -10,7 +10,9 @@ from matplotlib.colors import LogNorm from skyllh.core.py import classname -from skyllh.core.source_hypothesis import SourceHypoGroupManager +from skyllh.core.source_hypo_grouping import ( + SourceHypoGroupManager, +) from skyllh.core.storage import DataFieldRecordArray from skyllh.core.trialdata import TrialDataManager from skyllh.i3.pdf import I3EnergyPDF diff --git a/skyllh/plotting/i3/pdfratio.py b/skyllh/plotting/i3/pdfratio.py index 47e0707181..caa07451a0 100644 --- a/skyllh/plotting/i3/pdfratio.py +++ b/skyllh/plotting/i3/pdfratio.py @@ -10,13 +10,15 @@ from matplotlib.colors import LogNorm from skyllh.core.py import classname -from skyllh.core.source_hypothesis import SourceHypoGroupManager +from skyllh.core.source_hypo_grouping import ( + SourceHypoGroupManager, +) from skyllh.core.storage import DataFieldRecordArray from skyllh.core.trialdata import TrialDataManager -from skyllh.i3.pdfratio import I3EnergySigSetOverBkgPDFRatioSpline +from skyllh.i3.pdfratio import SplinedI3EnergySigSetOverBkgPDFRatio -class I3EnergySigSetOverBkgPDFRatioSplinePlotter(object): +class SplinedI3EnergySigSetOverBkgPDFRatioPlotter(object): """Plotter class to plot an I3EnergySigSetOverBkgPDFRatioSpline object. """ def __init__(self, tdm, pdfratio): @@ -39,10 +41,13 @@ def pdfratio(self): """The PDF ratio object to plot. """ return self._pdfratio + @pdfratio.setter def pdfratio(self, pdfratio): - if(not isinstance(pdfratio, I3EnergySigSetOverBkgPDFRatioSpline)): - raise TypeError('The pdfratio property must be an object of instance I3EnergySigSetOverBkgPDFRatioSpline!') + if not isinstance(pdfratio, SplinedI3EnergySigSetOverBkgPDFRatio): + raise TypeError( + 'The pdfratio property must be an instance of ' + 'SplinedI3EnergySigSetOverBkgPDFRatio!') self._pdfratio = pdfratio @property @@ -50,11 +55,12 @@ def tdm(self): """The TrialDataManager that provides the data for the PDF evaluation. """ return self._tdm + @tdm.setter def tdm(self, obj): - if(not isinstance(obj, TrialDataManager)): - raise TypeError('The tdm property must be an instance of ' - 'TrialDataManager!') + if not isinstance(obj, TrialDataManager): + raise TypeError( + 'The tdm property must be an instance of TrialDataManager!') self._tdm = obj def plot(self, src_hypo_group_manager, axes, fitparams, **kwargs): @@ -81,15 +87,17 @@ def plot(self, src_hypo_group_manager, axes, fitparams, **kwargs): img : instance of mpl.AxesImage The AxesImage instance showing the PDF ratio image. """ - if(not isinstance(src_hypo_group_manager, SourceHypoGroupManager)): - raise TypeError('The src_hypo_group_manager argument must be an ' + if not isinstance(src_hypo_group_manager, SourceHypoGroupManager): + raise TypeError( + 'The src_hypo_group_manager argument must be an ' 'instance of SourceHypoGroupManager!') - if(not isinstance(axes, Axes)): - raise TypeError('The axes argument must be an instance of ' + if not isinstance(axes, Axes): + raise TypeError( + 'The axes argument must be an instance of ' 'matplotlib.axes.Axes!') - if(not isinstance(fitparams, dict)): - raise TypeError('The fitparams argument must be an instance of ' - 'dict!') + if not isinstance(fitparams, dict): + raise TypeError( + 'The fitparams argument must be an instance of dict!') # Get the binning for the axes. We use the background PDF to get it # from. By construction, all PDFs use the same binning. We know that @@ -99,10 +107,11 @@ def plot(self, src_hypo_group_manager, axes, fitparams, **kwargs): # Create a 2D array with the ratio values. We put one event into each # bin. ratios = np.zeros((xbinning.nbins, ybinning.nbins), dtype=np.float64) - events = DataFieldRecordArray(np.zeros((ratios.size,), + events = DataFieldRecordArray(np.zeros( + (ratios.size,), dtype=[('ix', np.int64), (xbinning.name, np.float64), ('iy', np.int64), (ybinning.name, np.float64)])) - for (i, ((ix,x),(iy,y))) in enumerate(itertools.product( + for (i, ((ix, x), (iy, y))) in enumerate(itertools.product( enumerate(xbinning.bincenters), enumerate(ybinning.bincenters))): events['ix'][i] = ix @@ -114,12 +123,17 @@ def plot(self, src_hypo_group_manager, axes, fitparams, **kwargs): event_ratios = self.pdfratio.get_ratio(self._tdm, fitparams) for i in range(len(events)): - ratios[events['ix'][i],events['iy'][i]] = event_ratios[i] + ratios[events['ix'][i], events['iy'][i]] = event_ratios[i] (left, right, bottom, top) = (xbinning.lower_edge, xbinning.upper_edge, ybinning.lower_edge, ybinning.upper_edge) - img = axes.imshow(ratios.T, extent=(left, right, bottom, top), - origin='lower', norm=LogNorm(), interpolation='none', **kwargs) + img = axes.imshow( + ratios.T, + extent=(left, right, bottom, top), + origin='lower', + norm=LogNorm(), + interpolation='none', + **kwargs) axes.set_xlabel(xbinning.name) axes.set_ylabel(ybinning.name) axes.set_title(classname(self._pdfratio)) diff --git a/skyllh/physics/__init__.py b/skyllh/scripting/__init__.py similarity index 100% rename from skyllh/physics/__init__.py rename to skyllh/scripting/__init__.py diff --git a/skyllh/scripting/argparser.py b/skyllh/scripting/argparser.py new file mode 100644 index 0000000000..18af340bc8 --- /dev/null +++ b/skyllh/scripting/argparser.py @@ -0,0 +1,152 @@ +# -*- coding: utf-8 -*- + +"""This module contains utility functions for the argument parser of an analysis +script. +""" + +import argparse + + +def create_argparser( + description=None, + options=True): + """Creates an argparser with the given description and adds common options + useful for analysis scripts. + + Parameters + ---------- + description : str | None + The description for the argparser. + options : bool | dict | None + If set to None or False, no options will be added. + If set to True, all common analysis script options will be added. + If set to a dictionary, individual options can be turned on and off. + See the :func:`add_argparser_options` for possible options. + Default is ``True``. + """ + parser = argparse.ArgumentParser( + description=description, + formatter_class=argparse.RawTextHelpFormatter + ) + + if options is True: + options = dict() + + if isinstance(options, dict): + add_argparser_options( + parser=parser, + **options) + + return parser + + +def add_argparser_options( + parser, + config=True, + data_basepath=True, + debug_logfile=True, + enable_tracing=True, + n_cpu=True, + seed=True): + """Adds common argparser options to the given argparser instance, useful for + analysis scripts. + + Parameters + ---------- + parser : instance of ArgumentParser + The instance of ArgumentParser to which options should get added. + config : bool + If set to ``True``, the ``--config`` option of type ``str`` + will be added. + It specifies the configuration file. + The default value is ``None``. + The option destination is ``config``. + data_basepath : bool + If set to ``True``, the ``--data-basepath`` option of type ``str`` + will be added. + It specifies the base path to the data samples. + The default value is ``None``. + The option destination is ``data_basepath``. + debug_logfile : bool + If set to ``True``, the ``--debug-logfile`` option of type ``str`` + will be added. + If not ``None``, it specifies the log file for dubug information. + The default value is ``None``. + The option destination is ``debug_logfile``. + enable_tracing : bool + If set to ``True``, the ``--enable-tracing`` option of type ``bool`` + will be added. + If specified, enables the logging on the tracing level, i.e. a lot of + DEBUG messages. + The default value is ``False``. + The option destination is ``enable_tracing``. + n_cpu : bool + If set to ``True``, the ``--n-cpu`` option of type ``int`` + will be added. + It specifies the number of CPUs to utilize where parallelization is + possible. + The default value is ``1``. + The option destination is ``n_cpu``. + seed : bool + If set to ``True``, the ``--seed`` option of type ``int`` + will be added. + It specifies the seed for the random number generator. + The default value is ``0``. + The option destination is ``seed``. + """ + if config: + parser.add_argument( + '--config', + dest='config', + default=None, + type=str, + help='The configuration file. ' + '(default=None)') + + if data_basepath: + parser.add_argument( + '--data-basepath', + dest='data_basepath', + default=None, + type=str, + help='The base path to the data samples. ' + '(default=None)') + + if debug_logfile: + parser.add_argument( + '--debug-logfile', + dest='debug_logfile', + default=None, + type=str, + help='If not None, it specifies the log file for dubug ' + 'information. ' + '(default=None)') + + if enable_tracing: + parser.add_argument( + '--enable-tracing', + dest='enable_tracing', + default=False, + action='store_true', + help='If specified, enables the logging on the tracing level, i.e. ' + 'a lot of DEBUG messages. ' + '(default=False)') + + if n_cpu: + parser.add_argument( + '--n-cpu', + dest='n_cpu', + default=1, + type=int, + help='The number of CPUs to utilize where parallelization is ' + 'possible. ' + '(default=1)') + + if seed: + parser.add_argument( + '--seed', + dest='seed', + default=0, + type=int, + help='The seed for the random number generator. ' + '(default=0)') diff --git a/skyllh/scripting/logging.py b/skyllh/scripting/logging.py new file mode 100644 index 0000000000..b80d14a3ec --- /dev/null +++ b/skyllh/scripting/logging.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- + +"""This module contains utility functions for logging functionalities of an +analysis script. +""" + +import logging + +from skyllh.core.debugging import ( + get_logger, + setup_console_handler, + setup_file_handler, +) + + +def setup_logging( + script_logger_name, + log_format=None, + log_level=logging.INFO, + debug_pathfilename=None): + """Installs console handlers for the ``skyllh`` and ``script_logger_name`` + loggers. If a debug file is specified, file handlers for debug messages + will be installed as well. + + Parameters + ---------- + script_logger_name : str + The name of the logger used by the script. + log_format : str | None + The format template of the log message. If set to ``Ǹone``, the format + will be + + ``'%(asctime)s %(processName)s %(name)s %(levelname)s: %(message)s'`` + + log_level : int + The log level of the loggers. The default is ``logging.INFO``. + debug_pathfilename : str | None + If not ``None``, file handlers for DEBUG messages will be installed and + those messages will be stored in the given file. + + Returns + ------- + script_logger : instance of logging.Logger + The logger instance of the script, specified by ``script_logger_name``. + """ + if log_format is None: + log_format = '%(asctime)s %(processName)s %(name)s %(levelname)s: '\ + '%(message)s' + + setup_console_handler( + name='skyllh', + log_level=log_level, + log_format=log_format + ) + + setup_console_handler( + name=script_logger_name, + log_level=log_level, + log_format=log_format + ) + + if debug_pathfilename is not None: + setup_file_handler( + name='skyllh', + filename=debug_pathfilename, + log_format=log_format, + log_level=logging.DEBUG + ) + setup_file_handler( + name=script_logger_name, + filename=debug_pathfilename, + log_format=log_format, + log_level=logging.DEBUG + ) + + script_logger = get_logger(script_logger_name) + + return script_logger diff --git a/skyllh/utils/__init__.py b/skyllh/utils/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/skyllh/utils/spline.py b/skyllh/utils/spline.py deleted file mode 100644 index 967ba8fb09..0000000000 --- a/skyllh/utils/spline.py +++ /dev/null @@ -1,43 +0,0 @@ -# -*- coding: utf-8 -*- - -import numpy as np - -from scipy.interpolate import interp1d - - -def make_spline_1d(x, y, kind='linear', **kwargs): - """Creates a 1D spline for the function y(x) using - :class:`scipy.interpolate.interp1d`. - - Parameters - ---------- - x : array_like - The x values. - y : array_like - The y values. - kind : str - The kind of the spline. See the :class:`scipy.interpolate.interp1d` - documentation for possible values. Default is ``'linear'``. - **kwargs - Additional keyword arguments are passed to the :class:`~scipy.interpolate.interp1d` function. - - Returns - ------- - spline : - The created 1D spline instance. - """ - x = np.atleast_1d(x) - y = np.atleast_1d(y) - - # The interp1d function requires unique x values. So we need to sort x in - # increasing order and mask out repeating x values. - xy = np.array(sorted(zip(x, y)), dtype=y.dtype) - x = xy[:,0] - unique_x_mask = np.concatenate(([True], np.invert( - x[1:] <= x[:-1]))) - x = x[unique_x_mask] - y = xy[:,1][unique_x_mask] - - spline = interp1d(x, y, kind=kind, copy=False, assume_sorted=True, **kwargs) - - return spline diff --git a/tests/core/test_analysis.py b/tests/core/test_analysis.py deleted file mode 100644 index a85061e67a..0000000000 --- a/tests/core/test_analysis.py +++ /dev/null @@ -1,81 +0,0 @@ -# -*- coding: utf-8 -*- -# TODO: File out of date, tell flake8 to ignore -# flake8: noqa - -from __future__ import division - -import os.path -import unittest -import numpy as np - -from skyllh.core.analysis import Analysis -from skyllh.core.random import RandomStateService - -# Classes to define the source hypothesis. -from skyllh.physics.source import PointLikeSource -from skyllh.physics.flux import PowerLawFlux -from skyllh.core.source_hypo_group import SourceHypoGroup -from skyllh.core.source_hypothesis import SourceHypoGroupManager - -# Classes to define the fit parameters. -from skyllh.core.parameters import ( - SingleSourceFitParameterMapper, - FitParameter -) - -# Classes for the minimizer. -from skyllh.core.minimizer import Minimizer, LBFGSMinimizerImpl - -# Classes for defining the analysis. -from skyllh.core.test_statistic import TestStatisticWilks -#from skyllh.core.analysis import ( -# SpacialEnergyTimeIntegratedMultiDatasetSingleSourceAnalysis as Analysis -#) - -""" -class TestAnalysis(unittest.TestCase): - def setUp(self): - # path = os.path.abspath(os.path.dirname(__file__)) - # self.exp_data = np.load(os.path.join(path, 'testdata/exp_testdata.npy')) - # self.mc_data = np.load(os.path.join(path, 'testdata/mc_testdata.npy')) - # self.livetime_data = np.load(os.path.join(path, 'testdata/livetime_testdata.npy')) - - # Create the minimizer instance. - minimizer = Minimizer(LBFGSMinimizerImpl()) - - # Create a source hypothesis group manager. - src_hypo_group_manager = SourceHypoGroupManager(SourceHypoGroup( - source, fluxmodel, detsigeff_implmethod, sig_gen_method)) - - # Create a source fit parameter mapper and define the fit parameters. - src_fitparam_mapper = SingleSourceFitParameterMapper(rss) - src_fitparam_mapper.def_fit_parameter(fitparam_gamma) - # Define the test statistic. - test_statistic = TestStatisticWilks() - - self.analysis = Analysis() - self.rss = RandomStateService(seed=0) - - # Define the data scrambler with its data scrambling method, which is used - # for background generation. - data_scrambler = DataScrambler(UniformRAScramblingMethod(), - inplace_scrambling=True) - - # Create background generation method. - bkg_gen_method = FixedScrambledExpDataI3BkgGenMethod(data_scrambler) - - def tearDown(self): - # self.exp_data.close() - # self.mc_data.close() - # self.livetime_data.close() - pass - - def test_do_trials(self): - N = 10 - ncpu = None - - self.analysis.do_trials(N, self.rss, ncpu=ncpu) -""" - -if(__name__ == '__main__'): - unittest.main() diff --git a/tests/core/test_dataset.py b/tests/core/test_dataset.py index bad76f91d6..fca6f6e0a1 100644 --- a/tests/core/test_dataset.py +++ b/tests/core/test_dataset.py @@ -1,19 +1,19 @@ # -*- coding: utf-8 -*- -from __future__ import division - +import numpy as np import os.path import unittest -import numpy as np from skyllh.core.dataset import ( get_data_subset, + DatasetData, +) +from skyllh.core.livetime import ( + Livetime, +) +from skyllh.core.storage import ( DataFieldRecordArray, - DatasetData ) -from skyllh.core.livetime import Livetime - -from skyllh.core.py import float_cast class TestDatasetFunctions(unittest.TestCase): @@ -105,5 +105,6 @@ def test_get_data_subset(self): self.assertEqual(len(dataset_data_subset.mc), 3) self.assertAlmostEqual(livetime_subset.livetime, 0.75) -if(__name__ == '__main__'): + +if __name__ == '__main__': unittest.main() diff --git a/tests/core/test_event_selection.py b/tests/core/test_event_selection.py new file mode 100644 index 0000000000..afa41329ff --- /dev/null +++ b/tests/core/test_event_selection.py @@ -0,0 +1,695 @@ +# -*- coding: utf-8 -*- + +"""This test module tests classes, methods and functions of the +``core.event_selection`` module. + +Note: The `PsiFuncEventSelectionMethod` is not currently used/tested. +""" + +import unittest +from unittest.mock import Mock + +import numpy as np + +from skyllh.core.event_selection import ( + AllEventSelectionMethod, + DecBandEventSectionMethod, + RABandEventSectionMethod, + SpatialBoxEventSelectionMethod, + AngErrOfPsiEventSelectionMethod, +) +from skyllh.core.source_hypo_grouping import ( + SourceHypoGroupManager, +) +from skyllh.core.source_model import ( + PointLikeSource, +) +from skyllh.core.storage import ( + DataFieldRecordArray, +) + +from tests.core.testdata.testdata_generator import ( + generate_testdata, +) + + +def shgm_setup(n_sources=1): + # Mock SourceHypoGroupManager class in order to pass isinstance checks and + # set its properties used by event selection methods. + shgm = Mock(spec_set=["__class__", "source_list", "n_sources"]) + shgm.__class__ = SourceHypoGroupManager + + rng = np.random.default_rng(0) + x = rng.random((n_sources, 2)) + src_ras = 2 * np.pi * x[:, 0] + src_decs = np.pi * (x[:, 1] - 0.5) + source_list = [ + PointLikeSource(*src_loc) for src_loc in zip(src_ras, src_decs) + ] + + shgm.source_list = source_list + shgm.n_sources = n_sources + + return shgm + + +def get_func_psi_ang_err(ang_err=0.5): + def func_psi_ang_err(psi): + """A dummy function for psi func event selection. + + Parameters + ---------- + psi : 1d ndarray of floats + The opening angle between the source position and the event's + reconstructed position. + """ + return ang_err * np.ones_like(psi) + + return func_psi_ang_err + + +class AllEventSelectionMethod_TestCase(unittest.TestCase): + def setUp(self): + testdata = generate_testdata() + self.test_events = DataFieldRecordArray(testdata.get("exp_testdata")) + + def test_change_shg_mgr(self): + n_sources = 1 + shg_mgr = shgm_setup(n_sources=n_sources) + evt_sel_method = AllEventSelectionMethod(shg_mgr) + + self.assertEqual( + evt_sel_method.shg_mgr.source_list, + shg_mgr.source_list, + ) + self.assertEqual( + evt_sel_method.shg_mgr.n_sources, + shg_mgr.n_sources, + ) + + # Change the SourceHypoGroupManager instance. + n_sources = 2 + shg_mgr_new = shgm_setup(n_sources=n_sources) + evt_sel_method.change_shg_mgr( + shg_mgr_new + ) + + self.assertEqual( + evt_sel_method.shg_mgr.source_list, + shg_mgr_new.source_list, + ) + self.assertEqual( + evt_sel_method.shg_mgr.n_sources, + shg_mgr_new.n_sources, + ) + + def test_select_events_single_source(self): + n_sources = 1 + shg_mgr = shgm_setup(n_sources=n_sources) + evt_sel_method = AllEventSelectionMethod(shg_mgr) + + (events, (src_idxs, ev_idxs)) = evt_sel_method.select_events( + events=self.test_events) + + np.testing.assert_array_equal( + events, self.test_events) + self.assertEqual( + len(src_idxs), n_sources * len(self.test_events)) + self.assertEqual( + len(ev_idxs), n_sources * len(self.test_events)) + np.testing.assert_array_equal( + np.unique(src_idxs), np.arange(n_sources)) + np.testing.assert_array_equal( + np.unique(ev_idxs), np.arange(len(self.test_events))) + + def test_select_events_multiple_sources(self): + n_sources = 2 + shg_mgr = shgm_setup(n_sources=n_sources) + evt_sel_method = AllEventSelectionMethod(shg_mgr) + + (events, (src_idxs, ev_idxs)) = evt_sel_method.select_events( + self.test_events) + + np.testing.assert_array_equal( + events, self.test_events) + self.assertEqual( + len(src_idxs), n_sources * len(self.test_events)) + self.assertEqual( + len(ev_idxs), n_sources * len(self.test_events)) + np.testing.assert_array_equal( + np.unique(src_idxs), np.arange(n_sources)) + np.testing.assert_array_equal( + np.unique(ev_idxs), np.arange(len(self.test_events))) + + +class DecBandEventSectionMethod_TestCase(unittest.TestCase): + def setUp(self): + testdata = generate_testdata() + self.test_events = DataFieldRecordArray(testdata.get("events")) + + def test_sources_to_array_single_source(self): + n_sources = 1 + delta_angle = np.deg2rad(15) + shg_mgr = shgm_setup(n_sources=n_sources) + evt_sel_method = DecBandEventSectionMethod( + shg_mgr, delta_angle + ) + + src_arr = evt_sel_method.sources_to_array( + shg_mgr.source_list + ) + + src_ras = np.array( + [source.ra for source in shg_mgr.source_list] + ) + src_decs = np.array( + [source.dec for source in shg_mgr.source_list] + ) + + np.testing.assert_array_equal(src_arr["ra"], src_ras) + np.testing.assert_array_equal(src_arr["dec"], src_decs) + + def test_sources_to_array_multiple_sources(self): + n_sources = 2 + delta_angle = np.deg2rad(15) + shg_mgr = shgm_setup(n_sources=n_sources) + evt_sel_method = DecBandEventSectionMethod( + shg_mgr, delta_angle + ) + + src_arr = evt_sel_method.sources_to_array( + shg_mgr.source_list + ) + + src_ras = np.array( + [source.ra for source in shg_mgr.source_list] + ) + src_decs = np.array( + [source.dec for source in shg_mgr.source_list] + ) + + np.testing.assert_array_equal(src_arr["ra"], src_ras) + np.testing.assert_array_equal(src_arr["dec"], src_decs) + + def test_select_events_single_source(self): + n_sources = 1 + delta_angle = np.deg2rad(15) + shg_mgr = shgm_setup(n_sources=n_sources) + + dec_min = shg_mgr.source_list[0].dec - delta_angle + dec_max = shg_mgr.source_list[0].dec + delta_angle + + evt_sel_method = DecBandEventSectionMethod( + shg_mgr=shg_mgr, + delta_angle=delta_angle) + + (events, (src_idxs, evt_idxs)) = evt_sel_method.select_events( + events=self.test_events) + + self.assertTrue( + np.all(events["dec"] > dec_min), + msg="Returned selected events below src_dec - delta_angle.", + ) + self.assertTrue( + np.all(events["dec"] < dec_max), + msg="Returned selected events above src_dec + delta_angle.", + ) + + n_expected_events = np.sum( + (events["dec"] > dec_min) & (events["dec"] < dec_max) + ) + self.assertEqual( + len(src_idxs), n_expected_events) + self.assertEqual( + len(evt_idxs), n_expected_events) + np.testing.assert_array_equal( + np.unique(src_idxs), np.arange(n_sources)) + np.testing.assert_array_equal( + np.unique(evt_idxs), np.arange(len(events))) + + def test_select_events_multiple_sources(self): + n_sources = 2 + delta_angle = np.deg2rad(15) + shg_mgr = shgm_setup(n_sources=n_sources) + evt_sel_method = DecBandEventSectionMethod( + shg_mgr, delta_angle + ) + + src_decs = [source.dec for source in shg_mgr.source_list] + + dec_min = np.min(src_decs) - delta_angle + dec_max = np.max(src_decs) + delta_angle + + (events, (src_idxs, evt_idxs)) = evt_sel_method.select_events( + self.test_events) + + self.assertTrue( + np.all(events["dec"] > dec_min), + msg="Returned selected events below minimal src_dec - delta_angle.", + ) + self.assertTrue( + np.all(events["dec"] < dec_max), + msg="Returned selected events above maximal src_dec + delta_angle.", + ) + + for i, src_dec in enumerate(src_decs): + events_mask = src_idxs == i + dec_min = src_dec - delta_angle + dec_max = src_dec + delta_angle + + self.assertTrue( + np.all(events["dec"][evt_idxs[events_mask]] > dec_min), + msg="Returned selected events below src_dec - delta_angle.", + ) + self.assertTrue( + np.all(events["dec"][evt_idxs[events_mask]] < dec_max), + msg="Returned selected events above src_dec + delta_angle.", + ) + np.testing.assert_array_equal( + np.unique(src_idxs), np.arange(n_sources)) + np.testing.assert_array_equal( + np.unique(evt_idxs), np.arange(len(events))) + + +class RABandEventSectionMethod_TestCase(unittest.TestCase): + def setUp(self): + testdata = generate_testdata() + self.test_events = DataFieldRecordArray(testdata.get("events")) + + def test_select_events_single_source(self): + n_sources = 1 + delta_angle = np.deg2rad(15) + shg_mgr = shgm_setup(n_sources=n_sources) + evt_sel_method = RABandEventSectionMethod( + shg_mgr, delta_angle + ) + + src_ras = np.array( + [source.ra for source in shg_mgr.source_list] + ) + src_decs = np.array( + [source.dec for source in shg_mgr.source_list] + ) + + # Get the minus and plus declination around the sources. + src_dec_minus = np.maximum(-np.pi / 2, src_decs - delta_angle) + src_dec_plus = np.minimum(src_decs + delta_angle, np.pi / 2) + + # Calculate the cosine factor for the largest declination distance from + # the source. We use np.amin here because smaller cosine values are + # larger angles. + # cosfact is a (N_sources,)-shaped ndarray. + cosfact = np.amin(np.cos([src_dec_minus, src_dec_plus]), axis=0) + + # Calculate delta RA, which is a function of declination. + # dRA is a (N_sources,)-shaped ndarray. + dRA_half = np.amin( + [np.repeat(2 * np.pi, n_sources), np.fabs(delta_angle / cosfact)], + axis=0, + ) + + (events, (src_idxs, ev_idxs)) = evt_sel_method.select_events( + events=self.test_events) + + for i in range(n_sources): + events_mask = src_idxs == i + src_ra_max = src_ras[i] + dRA_half[i] - np.pi + + self.assertTrue( + np.all( + np.fabs(events["ra"][ev_idxs[events_mask]] - np.pi) + < src_ra_max + ), + msg="Returned selected events above maximal " + "src_ra + delta_angle/cosfact.", + ) + + src_ra_max = src_ras[0] + dRA_half[0] - np.pi + n_expected_events = np.sum((np.fabs(events["ra"] - np.pi) < src_ra_max)) + + self.assertEqual( + len(src_idxs), n_expected_events) + self.assertEqual( + len(ev_idxs), n_expected_events) + np.testing.assert_array_equal( + np.unique(src_idxs), np.arange(n_sources)) + np.testing.assert_array_equal( + np.unique(ev_idxs), np.arange(len(events))) + + def test_select_events_multiple_sources(self): + n_sources = 2 + delta_angle = np.deg2rad(15) + shg_mgr = shgm_setup(n_sources=n_sources) + evt_sel_method = RABandEventSectionMethod( + shg_mgr, delta_angle + ) + + src_ras = np.array( + [source.ra for source in shg_mgr.source_list] + ) + src_decs = np.array( + [source.dec for source in shg_mgr.source_list] + ) + + # Get the minus and plus declination around the sources. + src_dec_minus = np.maximum(-np.pi / 2, src_decs - delta_angle) + src_dec_plus = np.minimum(src_decs + delta_angle, np.pi / 2) + + # Calculate the cosine factor for the largest declination distance from + # the source. We use np.amin here because smaller cosine values are + # larger angles. + # cosfact is a (N_sources,)-shaped ndarray. + cosfact = np.amin(np.cos([src_dec_minus, src_dec_plus]), axis=0) + + # Calculate delta RA, which is a function of declination. + # dRA is a (N_sources,)-shaped ndarray. + dRA_half = np.amin( + [np.repeat(2 * np.pi, n_sources), np.fabs(delta_angle / cosfact)], + axis=0, + ) + + (events, (src_idxs, evt_idxs)) = evt_sel_method.select_events( + events=self.test_events) + + for i in range(n_sources): + events_mask = src_idxs == i + src_ra_max = src_ras[i] + dRA_half[i] - np.pi + + self.assertTrue( + np.all( + np.fabs(events["ra"][evt_idxs[events_mask]] - np.pi) + < src_ra_max + ), + msg="Returned selected events above maximal " + "src_ra + delta_angle/cosfact.", + ) + + np.testing.assert_array_equal( + np.unique(src_idxs), np.arange(n_sources)) + np.testing.assert_array_equal( + np.unique(evt_idxs), np.arange(len(events))) + + +class SpatialBoxEventSelectionMethod_TestCase(unittest.TestCase): + def setUp(self): + testdata = generate_testdata() + self.test_events = DataFieldRecordArray(testdata.get("events")) + + def test_select_events_single_source(self): + n_sources = 1 + delta_angle = np.deg2rad(15) + shg_mgr = shgm_setup(n_sources=n_sources) + evt_sel_method = SpatialBoxEventSelectionMethod( + shg_mgr, delta_angle + ) + + src_ras = np.array( + [source.ra for source in shg_mgr.source_list] + ) + src_decs = np.array( + [source.dec for source in shg_mgr.source_list] + ) + + # Get the minus and plus declination around the sources. + src_dec_minus = np.maximum(-np.pi / 2, src_decs - delta_angle) + src_dec_plus = np.minimum(src_decs + delta_angle, np.pi / 2) + + # Calculate the cosine factor for the largest declination distance from + # the source. We use np.amin here because smaller cosine values are + # larger angles. + # cosfact is a (N_sources,)-shaped ndarray. + cosfact = np.amin(np.cos([src_dec_minus, src_dec_plus]), axis=0) + + # Calculate delta RA, which is a function of declination. + # dRA is a (N_sources,)-shaped ndarray. + dRA_half = np.amin( + [np.repeat(2 * np.pi, n_sources), np.fabs(delta_angle / cosfact)], + axis=0, + ) + + (events, (src_idxs, evt_idxs)) = evt_sel_method.select_events( + events=self.test_events) + + for i in range(n_sources): + events_mask = src_idxs == i + src_ra_max = src_ras[i] + dRA_half[i] - np.pi + + self.assertTrue( + np.all( + np.fabs(events["ra"][evt_idxs[events_mask]] - np.pi) + < src_ra_max + ), + msg="Returned selected events above maximal " + "src_ra + delta_angle/cosfact.", + ) + + dec_min = src_decs[i] - delta_angle + dec_max = src_decs[i] + delta_angle + + self.assertTrue( + np.all(events["dec"][evt_idxs[events_mask]] > dec_min), + msg="Returned selected events below src_dec - delta_angle.", + ) + self.assertTrue( + np.all(events["dec"][evt_idxs[events_mask]] < dec_max), + msg="Returned selected events above src_dec + delta_angle.", + ) + + src_ra_max = src_ras[0] + dRA_half[0] - np.pi + n_expected_events = np.sum((np.fabs(events["ra"] - np.pi) < src_ra_max)) + + self.assertEqual( + len(src_idxs), n_expected_events) + self.assertEqual( + len(evt_idxs), n_expected_events) + np.testing.assert_array_equal( + np.unique(src_idxs), np.arange(n_sources)) + np.testing.assert_array_equal( + np.unique(evt_idxs), np.arange(len(events))) + + def test_select_events_multiple_sources(self): + n_sources = 2 + delta_angle = np.deg2rad(15) + shg_mgr = shgm_setup(n_sources=n_sources) + evt_sel_method = SpatialBoxEventSelectionMethod( + shg_mgr, delta_angle + ) + + src_ras = np.array( + [source.ra for source in shg_mgr.source_list] + ) + src_decs = np.array( + [source.dec for source in shg_mgr.source_list] + ) + + # Get the minus and plus declination around the sources. + src_dec_minus = np.maximum(-np.pi / 2, src_decs - delta_angle) + src_dec_plus = np.minimum(src_decs + delta_angle, np.pi / 2) + + # Calculate the cosine factor for the largest declination distance from + # the source. We use np.amin here because smaller cosine values are + # larger angles. + # cosfact is a (N_sources,)-shaped ndarray. + cosfact = np.amin(np.cos([src_dec_minus, src_dec_plus]), axis=0) + + # Calculate delta RA, which is a function of declination. + # dRA is a (N_sources,)-shaped ndarray. + dRA_half = np.amin( + [np.repeat(2 * np.pi, n_sources), np.fabs(delta_angle / cosfact)], + axis=0, + ) + + (events, (src_idxs, evt_idxs)) = evt_sel_method.select_events( + events=self.test_events) + + for i in range(n_sources): + events_mask = src_idxs == i + src_ra_max = src_ras[i] + dRA_half[i] - np.pi + + self.assertTrue( + np.all( + np.fabs(events["ra"][evt_idxs[events_mask]] - np.pi) + < src_ra_max + ), + msg="Returned selected events above maximal " + "src_ra + delta_angle/cosfact.", + ) + + dec_min = src_decs[i] - delta_angle + dec_max = src_decs[i] + delta_angle + + self.assertTrue( + np.all(events["dec"][evt_idxs[events_mask]] > dec_min), + msg="Returned selected events below src_dec - delta_angle.", + ) + self.assertTrue( + np.all(events["dec"][evt_idxs[events_mask]] < dec_max), + msg="Returned selected events above src_dec + delta_angle.", + ) + np.testing.assert_array_equal( + np.unique(src_idxs), np.arange(n_sources)) + np.testing.assert_array_equal( + np.unique(evt_idxs), np.arange(len(events))) + + +class AngErrOfPsiAndSpatialBoxEventSelectionMethod_TestCase( + unittest.TestCase): + + def setUp(self): + testdata = generate_testdata() + self.test_events = DataFieldRecordArray(testdata.get("events")) + + def test_select_events_single_source(self): + """Check if the event selection without a psi cut returns an identical + result to the `SpatialBoxEventSelectionMethod`. + """ + n_sources = 1 + delta_angle = np.deg2rad(15) + shg_mgr = shgm_setup(n_sources=n_sources) + func = get_func_psi_ang_err(ang_err=0) + + evt_sel_method_angerr = AngErrOfPsiEventSelectionMethod( + shg_mgr=shg_mgr, + func=func, + psi_floor=0.) + + evt_sel_method_sb = SpatialBoxEventSelectionMethod( + shg_mgr=shg_mgr, + delta_angle=delta_angle) + + evt_sel_method = evt_sel_method_sb & evt_sel_method_angerr + + (events, (src_idxs, evt_idxs)) = evt_sel_method.select_events( + events=self.test_events) + (events_sb, (src_idxs_sb, ev_idxs_sb)) = evt_sel_method_sb.select_events( + events=self.test_events) + + np.testing.assert_array_equal( + events.as_numpy_record_array(), + events_sb.as_numpy_record_array(), + ) + np.testing.assert_array_equal( + src_idxs, src_idxs_sb) + np.testing.assert_array_equal( + evt_idxs, ev_idxs_sb) + np.testing.assert_array_equal( + np.unique(src_idxs), np.arange(n_sources)) + np.testing.assert_array_equal( + np.unique(evt_idxs), np.arange(len(events))) + + def test_select_events_multiple_sources(self): + """Check if the event selection without a psi cut returns an identical + result to the `SpatialBoxEventSelectionMethod`. + """ + n_sources = 2 + delta_angle = np.deg2rad(15) + shg_mgr = shgm_setup(n_sources=n_sources) + func = get_func_psi_ang_err(ang_err=0) + + evt_sel_method_angerr = AngErrOfPsiEventSelectionMethod( + shg_mgr=shg_mgr, + func=func, + psi_floor=0.) + + evt_sel_method_sb = SpatialBoxEventSelectionMethod( + shg_mgr=shg_mgr, + delta_angle=delta_angle) + + evt_sel_method = evt_sel_method_sb & evt_sel_method_angerr + + (evts, (src_idxs, evt_idxs)) = evt_sel_method.select_events( + events=self.test_events) + (evts_sb, (src_idxs_sb, evt_idxs_sb)) = evt_sel_method_sb.select_events( + events=self.test_events) + + np.testing.assert_array_equal( + evts.as_numpy_record_array(), + evts_sb.as_numpy_record_array()) + np.testing.assert_array_equal( + src_idxs, src_idxs_sb) + np.testing.assert_array_equal( + evt_idxs, evt_idxs_sb) + np.testing.assert_array_equal( + np.unique(src_idxs), np.arange(n_sources)) + np.testing.assert_array_equal( + np.unique(evt_idxs), np.arange(len(evts))) + + def test_select_events_single_source_psi_func(self): + n_sources = 1 + delta_angle = np.deg2rad(15) + shg_mgr = shgm_setup(n_sources=n_sources) + ang_err = 3.0 + func = get_func_psi_ang_err(ang_err) + + evt_sel_method_sb = SpatialBoxEventSelectionMethod( + shg_mgr=shg_mgr, + delta_angle=delta_angle) + + evt_sel_method_angerr = AngErrOfPsiEventSelectionMethod( + shg_mgr=shg_mgr, + func=func, + psi_floor=0.) + + evt_sel_method = evt_sel_method_sb & evt_sel_method_angerr + + (evts, (src_idxs, evt_idxs)) = evt_sel_method.select_events( + events=self.test_events) + (evts_sb, (src_idxs_sb, evt_idxs_sb)) = evt_sel_method_sb.select_events( + events=self.test_events) + + mask_psi_cut = evts_sb["ang_err"] > ang_err + + np.testing.assert_array_equal( + evts.as_numpy_record_array(), + evts_sb[mask_psi_cut].as_numpy_record_array(), + ) + np.testing.assert_array_equal( + np.unique(src_idxs), np.arange(n_sources)) + np.testing.assert_array_equal( + np.unique(evt_idxs), np.arange(len(evts))) + + def test_select_events_multiple_sources_psi_func(self): + n_sources = 2 + delta_angle = np.deg2rad(15) + shg_mgr = shgm_setup(n_sources=n_sources) + ang_err = 3.0 + func = get_func_psi_ang_err(ang_err) + + evt_sel_method_sb = SpatialBoxEventSelectionMethod( + shg_mgr=shg_mgr, + delta_angle=delta_angle) + + evt_sel_method_angerr = AngErrOfPsiEventSelectionMethod( + shg_mgr=shg_mgr, + func=func, + psi_floor=0.) + + evt_sel_method = evt_sel_method_sb & evt_sel_method_angerr + + (evts, (src_idxs, evt_idxs)) = evt_sel_method.select_events( + events=self.test_events) + (evts_sb, (src_idxs_sb, evt_idxs_sb)) = evt_sel_method_sb.select_events( + events=self.test_events) + + for i in range(n_sources): + evts_mask = src_idxs == i + evts_mask_sb = src_idxs_sb == i + + mask_psi_cut = ( + evts_sb[evt_idxs_sb[evts_mask_sb]]["ang_err"] > ang_err + ) + + np.testing.assert_array_equal( + evts[evt_idxs[evts_mask]].as_numpy_record_array(), + evts_sb[evt_idxs_sb[evts_mask_sb]] + [mask_psi_cut].as_numpy_record_array(), + ) + + np.testing.assert_array_equal( + np.unique(src_idxs), np.arange(n_sources)) + np.testing.assert_array_equal( + np.unique(evt_idxs), np.arange(len(evts))) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/core/test_flux_model.py b/tests/core/test_flux_model.py new file mode 100644 index 0000000000..a4b3d7d402 --- /dev/null +++ b/tests/core/test_flux_model.py @@ -0,0 +1,284 @@ +# -*- coding: utf-8 -*- + +import unittest +import numpy as np + +from astropy import units + +from skyllh.core.flux_model import ( + BoxTimeFluxProfile, + CutoffPowerLawEnergyFluxProfile, + LogParabolaPowerLawEnergyFluxProfile, + PointSpatialFluxProfile, + PowerLawEnergyFluxProfile, + UnitySpatialFluxProfile, +) + + +class UnitySpatialFluxProfileTestCase( + unittest.TestCase, +): + def setUp(self): + self.fluxprofile = UnitySpatialFluxProfile() + + def test_math_function_str(self): + self.assertEqual( + self.fluxprofile.math_function_str, + '1' + ) + + def test_call(self): + alpha = 1.5 + delta = 2.5 + + np.testing.assert_array_equal( + self.fluxprofile(alpha, delta), + np.array([1])) + + alpha = np.array([1.5, 2]) + delta = np.array([2.5, 3]) + + np.testing.assert_array_equal(self.fluxprofile(alpha, delta), + np.array([1, 1])) + + +class PointSpatialFluxProfileTestCase( + unittest.TestCase, +): + def setUp(self): + self.ra = 1.5 + self.dec = 2.5 + self.angle_unit = units.radian + self.fluxprofile = PointSpatialFluxProfile( + self.ra, self.dec, angle_unit=units.radian) + + def test_init(self): + self.assertEqual(self.fluxprofile.ra, self.ra) + self.assertEqual(self.fluxprofile.dec, self.dec) + + def test_param_names(self): + param_names = self.fluxprofile.param_names + self.assertEqual(len(param_names), 2) + self.assertEqual(param_names[0], 'ra') + self.assertEqual(param_names[1], 'dec') + + def test_angle_unit(self): + self.assertEqual(self.fluxprofile.angle_unit, units.radian) + + def test_ra(self): + ra = 5.5 + self.fluxprofile.ra = ra + self.assertAlmostEqual(self.fluxprofile.ra, ra) + + def test_dec(self): + dec = -1.4 + self.fluxprofile.dec = dec + self.assertAlmostEqual(self.fluxprofile.dec, dec) + + def test_math_function_str(self): + self.assertEqual( + self.fluxprofile.math_function_str, + (f'delta(ra-{self.ra:g}{self.angle_unit})*' + f'delta(dec-{self.dec:g}{self.angle_unit})') + ) + + def test_call(self): + ra = 1.5 + dec = 2.5 + + np.testing.assert_array_equal( + self.fluxprofile(ra, dec), + np.array([1])) + + ra = np.array([1.5, 2]) + dec = np.array([2.5, 3]) + + np.testing.assert_array_equal( + self.fluxprofile(ra, dec), + np.array([1, 0])) + + +class PowerLawEnergyFluxProfileTestCase( + unittest.TestCase, +): + def setUp(self): + self.E0 = 2.5 + self.gamma = 2.7 + self.energy_unit = units.GeV + self.fluxprofile = PowerLawEnergyFluxProfile( + E0=self.E0, + gamma=self.gamma, + energy_unit=self.energy_unit) + + def test_gamma(self): + self.assertEqual(self.fluxprofile.gamma, self.gamma) + + gamma = 2.6 + self.fluxprofile.gamma = gamma + self.assertEqual(self.fluxprofile.gamma, gamma) + + def test_math_function_str(self): + self.assertEqual( + self.fluxprofile.math_function_str, + f'(E / ({self.E0:g} {self.energy_unit}))^-{self.gamma:g}' + ) + + def test_call(self): + E = np.array([5]) + values = np.power(E / self.E0, -self.gamma) + + self.assertEqual( + self.fluxprofile(E=E, unit=self.energy_unit), + values) + + +class CutoffPowerLawEnergyFluxProfileTestCase( + unittest.TestCase, +): + def setUp(self): + self.E0 = 2.5 + self.gamma = 2.7 + self.Ecut = 2 + self.energy_unit = units.GeV + self.fluxprofile = CutoffPowerLawEnergyFluxProfile( + E0=self.E0, + gamma=self.gamma, + Ecut=self.Ecut, + energy_unit=self.energy_unit) + + def test_Ecut(self): + self.assertEqual(self.fluxprofile.Ecut, self.Ecut) + + Ecut = 2.5 + self.fluxprofile.Ecut = Ecut + self.assertEqual(self.fluxprofile.Ecut, Ecut) + + def test_math_function_str(self): + self.assertEqual( + self.fluxprofile.math_function_str, + (f'(E / ({self.E0:g} {self.energy_unit}))^-{self.gamma:g} ' + f'exp(-E / ({self.Ecut:g} {self.energy_unit}))')) + + def test_call(self): + E = np.array([5]) + values = np.power(E / self.E0, -self.gamma) * np.exp(-E / self.Ecut) + + self.assertEqual( + self.fluxprofile(E=E, unit=self.energy_unit), + values) + + +class LogParabolaPowerLawEnergyFluxProfileTestCase( + unittest.TestCase, +): + def setUp(self): + self.E0 = 2.5 + self.alpha = 1 + self.beta = 2 + self.energy_unit = units.GeV + self.fluxprofile = LogParabolaPowerLawEnergyFluxProfile( + E0=self.E0, + alpha=self.alpha, + beta=self.beta, + energy_unit=self.energy_unit) + + def test_alpha(self): + self.assertEqual(self.fluxprofile.alpha, self.alpha) + + alpha = 1.5 + self.fluxprofile.alpha = alpha + self.assertEqual(self.fluxprofile.alpha, alpha) + + def test_beta(self): + self.assertEqual(self.fluxprofile.beta, self.beta) + + beta = 2.5 + self.fluxprofile.beta = beta + self.assertEqual(self.fluxprofile.beta, beta) + + def test_math_funciton_str(self): + s_E0 = f'{self.E0:g} {self.energy_unit}' + test_string = ( + f'(E / {s_E0})' + f'^(-({self.alpha:g} + {self.beta:g} log(E / {s_E0})))' + ) + self.assertEqual( + self.fluxprofile.math_function_str, + test_string) + + +class BoxTimeFluxProfileTestCase( + unittest.TestCase, +): + def setUp(self): + self.t0 = 58430 # MJD time 2018.11.08 + self.tw = 2 # 2 day width of the box profile + self.time_unit = units.day + self.profile = BoxTimeFluxProfile( + t0=self.t0, + tw=self.tw, + time_unit=self.time_unit) + + def test_move(self): + dt = 5 + self.profile.move(dt=dt) + self.assertEqual(self.profile.t0, self.t0 + dt) + self.assertEqual(self.profile.tw, self.tw) + + def test_t0(self): + self.assertEqual(self.profile.t0, self.t0) + + t0 = 0 + self.profile.t0 = t0 + self.assertEqual(self.profile.t0, t0) + + def test_tw(self): + self.assertEqual(self.profile.tw, self.tw) + + tw = 2.5 + self.profile.tw = tw + self.assertEqual(self.profile.tw, tw) + + def test_get_integral(self): + t1 = self.t0 + t2 = self.t0 + self.tw/2 + times1 = np.array([self.t0 - self.tw, + self.t0 - self.tw/2, + self.t0]) + times2 = np.array([self.t0 + self.tw, + self.t0 + self.tw/2, + self.t0 + self.tw/2]) + + self.assertEqual(self.profile.get_integral(t1, t1), 0) + self.assertEqual(self.profile.get_integral(t1, t2), 1.0) + np.testing.assert_array_equal( + self.profile.get_integral(times1, times2), + np.array([2, 2, 1])) + + # Test cases when t1 > t2. + self.assertEqual( + self.profile.get_integral(t2, t1), -1) + np.testing.assert_array_equal( + self.profile.get_integral(times2, times1), + np.array([0, -2, -1])) + + def test_get_total_integral(self): + self.assertEqual(self.profile.get_total_integral(), 2) + + def test_call(self): + self.assertEqual(self.profile(t=self.t0 - self.tw), 0) + self.assertEqual(self.profile(t=self.t0), 1) + self.assertEqual(self.profile(t=self.t0 + self.tw), 0) + + times = np.array([ + self.t0 - self.tw, + self.t0, + self.t0 + self.tw + ]) + np.testing.assert_array_equal( + self.profile(t=times), + np.array([0, 1, 0])) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/core/test_interpolate.py b/tests/core/test_interpolate.py new file mode 100644 index 0000000000..d1f26a7bd2 --- /dev/null +++ b/tests/core/test_interpolate.py @@ -0,0 +1,427 @@ +# -*- coding: utf-8 -*- + +"""This test module tests classes, methods and functions of the +skyllh.core.interpolate module. +""" + +import numpy as np + +import unittest +from unittest.mock import Mock + +from skyllh.core.interpolate import ( + Linear1DGridManifoldInterpolationMethod, + NullGridManifoldInterpolationMethod, + Parabola1DGridManifoldInterpolationMethod, +) +from skyllh.core.parameters import ( + ParameterGrid, + ParameterGridSet, +) +from skyllh.core.trialdata import ( + TrialDataManager, +) + + +def line_manifold_func( + tdm, + eventdata, + gridparams_recarray, + n_values): + """This function will calculate the line value of f=2p+1 for the parameter + p. The values will be the same for each event. + """ + def line(m, p, b): + return m*p + b + + # Check for special case, when only one set of parameters is provided for + # all sources. + if len(gridparams_recarray) == 1: + gridparams_recarray = np.tile(gridparams_recarray, tdm.n_sources) + + p = gridparams_recarray['p'] + + n_selected_events = eventdata.shape[0] + + values = np.repeat(line(m=2, p=p, b=1), n_selected_events) + + assert len(values) == len(gridparams_recarray)*n_selected_events + + return values + + +def param_product_func( + tdm, + eventdata, + gridparams_recarray, + n_values): + """This function calculates the product of two parameter values p1 and p2. + The result will be the same for each event. + """ + def product(p1, p2): + return p1 * p2 + + # Check for special case, when only one set of parameters is provided for + # all sources. + if len(gridparams_recarray) == 1: + gridparams_recarray = np.tile(gridparams_recarray, tdm.n_sources) + + p1 = gridparams_recarray['p1'] + p2 = gridparams_recarray['p2'] + + n_selected_events = eventdata.shape[0] + + values = np.repeat(product(p1, p2), n_selected_events) + + assert len(values) == len(gridparams_recarray)*n_selected_events + + return values + + +def create_tdm(n_sources, n_selected_events): + """Creates a Mock instance mimicing a TrialDataManager instance with a + given number of sources and selected events. + """ + tdm = Mock(spec_set=[ + '__class__', + 'trial_data_state_id', + 'get_n_values', + 'src_evt_idxs', + 'n_sources', + 'n_selected_events', + 'broadcast_params_recarray_to_values_array', + 'broadcast_arrays_to_values_array', + 'broadcast_sources_array_to_values_array', + 'broadcast_sources_arrays_to_values_arrays', + ]) + + def tdm_broadcast_params_recarray_to_values_array(params_recarray): + return TrialDataManager.broadcast_params_recarray_to_values_array( + tdm, params_recarray) + + def tdm_broadcast_arrays_to_values_array(arrays): + return TrialDataManager.broadcast_arrays_to_values_array( + tdm, arrays) + + def tdm_broadcast_sources_array_to_values_array(*args, **kwargs): + return TrialDataManager.broadcast_sources_array_to_values_array( + tdm, *args, **kwargs) + + def tdm_broadcast_sources_arrays_to_values_arrays(*args, **kwargs): + return TrialDataManager.broadcast_sources_arrays_to_values_arrays( + tdm, *args, **kwargs) + + tdm.__class__ = TrialDataManager + tdm.trial_data_state_id = 1 + tdm.get_n_values = lambda: n_sources*n_selected_events + tdm.src_evt_idxs = ( + np.repeat(np.arange(n_sources), n_selected_events), + np.tile(np.arange(n_selected_events), n_sources) + ) + tdm.n_sources = n_sources + tdm.n_selected_events = n_selected_events + tdm.broadcast_params_recarray_to_values_array =\ + tdm_broadcast_params_recarray_to_values_array + tdm.broadcast_arrays_to_values_array =\ + tdm_broadcast_arrays_to_values_array + tdm.broadcast_sources_array_to_values_array =\ + tdm_broadcast_sources_array_to_values_array + tdm.broadcast_sources_arrays_to_values_arrays =\ + tdm_broadcast_sources_arrays_to_values_arrays + + return tdm + + +class NullGridManifoldInterpolationMethod_TestCase(unittest.TestCase): + def setUp(self): + param1_grid = ParameterGrid.from_range('p1', -3, 3, 0.1) + param2_grid = ParameterGrid.from_range('p2', -1.5, 2.3, 0.1) + + self.interpolmethod = NullGridManifoldInterpolationMethod( + func=param_product_func, + param_grid_set=ParameterGridSet((param1_grid, param2_grid))) + + self.tdm = create_tdm(n_sources=3, n_selected_events=2) + + self.eventdata = np.zeros( + (self.tdm.n_selected_events, 1), dtype=np.float64) + + def test__call__with_different_source_values(self): + """Test for when the interpolation parameters have different values for + different sources. + """ + params_recarray = np.empty( + (self.tdm.n_sources,), + dtype=[('p1', np.float64), ('p2', np.float64)]) + params_recarray['p1'] = [-2.12, 1.36, 2.4] + params_recarray['p2'] = [-1.06, 2.1, 1.33] + + (values, grads) = self.interpolmethod( + tdm=self.tdm, + eventdata=self.eventdata, + params_recarray=params_recarray) + + np.testing.assert_almost_equal( + values, + [2.31, 2.31, 2.94, 2.94, 3.12, 3.12]) + np.testing.assert_almost_equal( + grads, + [[0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0.]]) + + def test__call__with_same_source_values(self): + """Test for when the interpolation parameter has the same values for all + sources. + """ + params_recarray = np.empty( + (self.tdm.n_sources,), + dtype=[('p1', np.float64), ('p2', np.float64)]) + params_recarray['p1'] = [2.12, 2.12, 2.12] + params_recarray['p2'] = [-1.06, -1.06, -1.06] + + (values, grads) = self.interpolmethod( + tdm=self.tdm, + eventdata=self.eventdata, + params_recarray=params_recarray) + + np.testing.assert_almost_equal( + values, + [-2.31, -2.31, -2.31, -2.31, -2.31, -2.31]) + np.testing.assert_almost_equal( + grads, + [[0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0.]]) + + def test__call__with_single_value(self): + """Test for when the interpolation parameters have the same values for + all sources and is provided as a single set. + """ + params_recarray = np.empty( + (self.tdm.n_sources,), + dtype=[('p1', np.float64), ('p2', np.float64)]) + params_recarray['p1'] = [2.12] + params_recarray['p2'] = [-1.06] + + (values, grads) = self.interpolmethod( + tdm=self.tdm, + eventdata=self.eventdata, + params_recarray=params_recarray) + + np.testing.assert_almost_equal( + values, + [-2.31, -2.31, -2.31, -2.31, -2.31, -2.31]) + np.testing.assert_almost_equal( + grads, + [[0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0.]]) + + def test__call__with_grid_edge_values(self): + """Test for when the interpolation parameters fall on the grid edges. + """ + params_recarray = np.empty( + (self.tdm.n_sources,), + dtype=[('p1', np.float64), ('p2', np.float64)]) + params_recarray['p1'] = [-2.1, 1.4, 2.4] + params_recarray['p2'] = [-1.1, 2.1, 1.3] + + (values, grads) = self.interpolmethod( + tdm=self.tdm, + eventdata=self.eventdata, + params_recarray=params_recarray) + + np.testing.assert_almost_equal( + values, + [2.31, 2.31, 2.94, 2.94, 3.12, 3.12]) + np.testing.assert_almost_equal( + grads, + [[0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0.]]) + + +class Linear1DGridManifoldInterpolationMethod_TestCase(unittest.TestCase): + def setUp(self): + param_grid = ParameterGrid.from_range('p', -3, 3, 0.1) + + self.interpolmethod = Linear1DGridManifoldInterpolationMethod( + func=line_manifold_func, + param_grid_set=ParameterGridSet((param_grid,))) + + self.tdm = create_tdm(n_sources=3, n_selected_events=2) + + self.eventdata = np.zeros( + (self.tdm.n_selected_events, 1), dtype=np.float64) + + def test__call__with_different_source_values(self): + """Test for when the interpolation parameter has different values for + different sources. + """ + params_recarray = np.empty( + (self.tdm.n_sources,), dtype=[('p', np.float64)]) + params_recarray['p'] = [-2.12, 1.36, 2.4] + + (values, grads) = self.interpolmethod( + tdm=self.tdm, + eventdata=self.eventdata, + params_recarray=params_recarray) + + np.testing.assert_almost_equal( + values, + [-3.24, -3.24, 3.72, 3.72, 5.8, 5.8]) + np.testing.assert_almost_equal( + grads, + [[2., 2., 2., 2., 2., 2.]]) + + def test__call__with_same_source_values(self): + """Test for when the interpolation parameter has the same values for all + sources. + """ + params_recarray = np.empty( + (self.tdm.n_sources,), dtype=[('p', np.float64)]) + params_recarray['p'] = [1.36, 1.36, 1.36] + + (values, grads) = self.interpolmethod( + tdm=self.tdm, + eventdata=self.eventdata, + params_recarray=params_recarray) + + np.testing.assert_almost_equal( + values, + [3.72, 3.72, 3.72, 3.72, 3.72, 3.72]) + np.testing.assert_almost_equal( + grads, + [[2., 2., 2., 2., 2., 2.]]) + + def test__call__with_single_value(self): + """Test for when the interpolation parameter has the same values for all + sources and is provided as a single value. + """ + params_recarray = np.empty( + (self.tdm.n_sources,), dtype=[('p', np.float64)]) + params_recarray['p'] = [1.36] + + (values, grads) = self.interpolmethod( + tdm=self.tdm, + eventdata=self.eventdata, + params_recarray=params_recarray) + + np.testing.assert_almost_equal( + values, + [3.72, 3.72, 3.72, 3.72, 3.72, 3.72]) + np.testing.assert_almost_equal( + grads, + [[2., 2., 2., 2., 2., 2.]]) + + def test__call__with_grid_edge_values(self): + """Test for when the interpolation parameters fall on the grid edges. + """ + params_recarray = np.empty( + (self.tdm.n_sources,), dtype=[('p', np.float64)]) + params_recarray['p'] = [-3., 0, 3] + + (values, grads) = self.interpolmethod( + tdm=self.tdm, + eventdata=self.eventdata, + params_recarray=params_recarray) + np.testing.assert_almost_equal( + values, + [-5., -5., 1., 1., 7., 7.]) + np.testing.assert_almost_equal( + grads, + [[2., 2., 2., 2., 2., 2.]]) + + +class Parabola1DGridManifoldInterpolationMethod_TestCase(unittest.TestCase): + def setUp(self): + param_grid = ParameterGrid.from_range('p', -3, 3, 0.1) + + self.interpolmethod = Parabola1DGridManifoldInterpolationMethod( + func=line_manifold_func, + param_grid_set=ParameterGridSet((param_grid,))) + + self.tdm = create_tdm(n_sources=3, n_selected_events=2) + + self.eventdata = np.zeros( + (self.tdm.n_selected_events, 1), dtype=np.float64) + + def test__call__with_different_source_values(self): + """Test for when the interpolation parameter has different values for + different sources. + """ + params_recarray = np.empty( + (self.tdm.n_sources,), dtype=[('p', np.float64)]) + params_recarray['p'] = [-2.12, 1.36, 2.4] + + (values, grads) = self.interpolmethod( + tdm=self.tdm, + eventdata=self.eventdata, + params_recarray=params_recarray) + + # A parabola approximation of a line will be a line again. + np.testing.assert_almost_equal( + values, + [-3.24, -3.24, 3.72, 3.72, 5.8, 5.8]) + np.testing.assert_almost_equal( + grads, + [[2., 2., 2., 2., 2., 2.]]) + + def test__call__with_same_source_values(self): + """Test for when the interpolation parameter has the same values for all + sources. + """ + params_recarray = np.empty( + (self.tdm.n_sources,), dtype=[('p', np.float64)]) + params_recarray['p'] = [1.36, 1.36, 1.36] + + (values, grads) = self.interpolmethod( + tdm=self.tdm, + eventdata=self.eventdata, + params_recarray=params_recarray) + + np.testing.assert_almost_equal( + values, + [3.72, 3.72, 3.72, 3.72, 3.72, 3.72]) + np.testing.assert_almost_equal( + grads, + [[2., 2., 2., 2., 2., 2.]]) + + def test__call__with_single_value(self): + """Test for when the interpolation parameter has the same values for all + sources and is provided as a single value. + """ + params_recarray = np.empty( + (self.tdm.n_sources,), dtype=[('p', np.float64)]) + params_recarray['p'] = [1.36] + + (values, grads) = self.interpolmethod( + tdm=self.tdm, + eventdata=self.eventdata, + params_recarray=params_recarray) + + np.testing.assert_almost_equal( + values, + [3.72, 3.72, 3.72, 3.72, 3.72, 3.72]) + np.testing.assert_almost_equal( + grads, + [[2., 2., 2., 2., 2., 2.]]) + + def test__call__with_grid_edge_values(self): + """Test for when the interpolation parameters fall on the grid edges. + """ + params_recarray = np.empty( + (self.tdm.n_sources,), dtype=[('p', np.float64)]) + params_recarray['p'] = [-3., 0, 3] + + (values, grads) = self.interpolmethod( + tdm=self.tdm, + eventdata=self.eventdata, + params_recarray=params_recarray) + np.testing.assert_almost_equal( + values, + [-5., -5., 1., 1., 7., 7.]) + np.testing.assert_almost_equal( + grads, + [[2., 2., 2., 2., 2., 2.]]) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/core/test_model.py b/tests/core/test_model.py index c71c792693..fa9b2af641 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -57,7 +57,7 @@ def test_cast(self): with self.assertRaises(TypeError): modelcoll = ModelCollection.cast('A str instance.') with self.assertRaises(TypeError): - modelcoll = ModelCollection.cast(('str1','str2')) + modelcoll = ModelCollection.cast(('str1', 'str2')) def test_model_type(self): self.assertTrue(issubclass(self.modelcoll.model_type, Model)) @@ -68,5 +68,5 @@ def test_models(self): self.assertEqual(self.modelcoll.models[1], self.model2) -if(__name__ == '__main__'): +if __name__ == '__main__': unittest.main() diff --git a/tests/core/test_optimize.py b/tests/core/test_optimize.py deleted file mode 100644 index 7113980633..0000000000 --- a/tests/core/test_optimize.py +++ /dev/null @@ -1,886 +0,0 @@ -# -*- coding: utf-8 -*- - -"""This test module tests classes, methods and functions of the -``core.optimize`` module. - -Note: The `PsiFuncEventSelectionMethod` is not currently used/tested. -""" - -import unittest -from unittest.mock import Mock - -import numpy as np - -from skyllh.core.optimize import ( - AllEventSelectionMethod, - DecBandEventSectionMethod, - # PsiFuncEventSelectionMethod, - RABandEventSectionMethod, - SpatialBoxAndPsiFuncEventSelectionMethod, - SpatialBoxEventSelectionMethod, -) -from skyllh.core.source_hypothesis import SourceHypoGroupManager -from skyllh.core.storage import DataFieldRecordArray -from skyllh.physics.source import PointLikeSource - -from tests.core.testdata.testdata_generator import generate_testdata - - -def shgm_setup(n_sources=1): - # Mock SourceHypoGroupManager class in order to pass isinstance checks and - # set its properties used by event selection methods. - shgm = Mock(spec_set=["__class__", "source_list", "n_sources"]) - shgm.__class__ = SourceHypoGroupManager - - rng = np.random.default_rng(0) - x = rng.random((n_sources, 2)) - src_ras = 2 * np.pi * x[:, 0] - src_decs = np.pi * (x[:, 1] - 0.5) - source_list = [ - PointLikeSource(*src_loc) for src_loc in zip(src_ras, src_decs) - ] - - shgm.source_list = source_list - shgm.n_sources = n_sources - - return shgm - - -def get_func_psi_ang_err(ang_err=0.5): - def func_psi_ang_err(psi): - """A dummy function for psi func event selection. - - Parameters - ---------- - psi : 1d ndarray of floats - The opening angle between the source position and the event's - reconstructed position. - """ - return ang_err * np.ones_like(psi) - - return func_psi_ang_err - - -class AllEventSelectionMethod_TestCase(unittest.TestCase): - def setUp(self): - testdata = generate_testdata() - self.test_events = DataFieldRecordArray(testdata.get("exp_testdata")) - - def test_change_source_hypo_group_manager(self): - n_sources = 1 - src_hypo_group_manager = shgm_setup(n_sources=n_sources) - evt_sel_method = AllEventSelectionMethod(src_hypo_group_manager) - - self.assertEqual( - evt_sel_method.src_hypo_group_manager.source_list, - src_hypo_group_manager.source_list, - ) - self.assertEqual( - evt_sel_method.src_hypo_group_manager.n_sources, - src_hypo_group_manager.n_sources, - ) - - # Change the SourceHypoGroupManager instance. - n_sources = 2 - src_hypo_group_manager_new = shgm_setup(n_sources=n_sources) - evt_sel_method.change_source_hypo_group_manager( - src_hypo_group_manager_new - ) - - self.assertEqual( - evt_sel_method.src_hypo_group_manager.source_list, - src_hypo_group_manager_new.source_list, - ) - self.assertEqual( - evt_sel_method.src_hypo_group_manager.n_sources, - src_hypo_group_manager_new.n_sources, - ) - - def test_select_events_single_source(self): - n_sources = 1 - src_hypo_group_manager = shgm_setup(n_sources=n_sources) - evt_sel_method = AllEventSelectionMethod(src_hypo_group_manager) - - # Test with `ret_src_ev_idxs=False`. - (events, idxs) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=False - ) - - np.testing.assert_array_equal(events, self.test_events) - self.assertIsNone(idxs) - - # Test with `ret_src_ev_idxs=True`. - (events, (src_idxs, ev_idxs)) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=True - ) - - np.testing.assert_array_equal(events, self.test_events) - self.assertEqual(len(src_idxs), n_sources * len(self.test_events)) - self.assertEqual(len(ev_idxs), n_sources * len(self.test_events)) - np.testing.assert_array_equal(np.unique(src_idxs), np.arange(n_sources)) - np.testing.assert_array_equal( - np.unique(ev_idxs), np.arange(len(self.test_events)) - ) - - def test_select_events_multiple_sources(self): - n_sources = 2 - src_hypo_group_manager = shgm_setup(n_sources=n_sources) - evt_sel_method = AllEventSelectionMethod(src_hypo_group_manager) - - # Test with `ret_src_ev_idxs=False`. - (events, idxs) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=False - ) - - np.testing.assert_array_equal(events, self.test_events) - self.assertIsNone(idxs) - - # Test with `ret_src_ev_idxs=True`. - (events, (src_idxs, ev_idxs)) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=True - ) - - np.testing.assert_array_equal(events, self.test_events) - self.assertEqual(len(src_idxs), n_sources * len(self.test_events)) - self.assertEqual(len(ev_idxs), n_sources * len(self.test_events)) - np.testing.assert_array_equal(np.unique(src_idxs), np.arange(n_sources)) - np.testing.assert_array_equal( - np.unique(ev_idxs), np.arange(len(self.test_events)) - ) - - -class DecBandEventSectionMethod_TestCase(unittest.TestCase): - def setUp(self): - testdata = generate_testdata() - self.test_events = DataFieldRecordArray(testdata.get("events")) - - def test_source_to_array_single_source(self): - n_sources = 1 - delta_angle = np.deg2rad(15) - src_hypo_group_manager = shgm_setup(n_sources=n_sources) - evt_sel_method = DecBandEventSectionMethod( - src_hypo_group_manager, delta_angle - ) - - src_arr = evt_sel_method.source_to_array( - src_hypo_group_manager.source_list - ) - - src_ras = np.array( - [source.ra for source in src_hypo_group_manager.source_list] - ) - src_decs = np.array( - [source.dec for source in src_hypo_group_manager.source_list] - ) - - np.testing.assert_array_equal(src_arr["ra"], src_ras) - np.testing.assert_array_equal(src_arr["dec"], src_decs) - - def test_source_to_array_multiple_sources(self): - n_sources = 2 - delta_angle = np.deg2rad(15) - src_hypo_group_manager = shgm_setup(n_sources=n_sources) - evt_sel_method = DecBandEventSectionMethod( - src_hypo_group_manager, delta_angle - ) - - src_arr = evt_sel_method.source_to_array( - src_hypo_group_manager.source_list - ) - - src_ras = np.array( - [source.ra for source in src_hypo_group_manager.source_list] - ) - src_decs = np.array( - [source.dec for source in src_hypo_group_manager.source_list] - ) - - np.testing.assert_array_equal(src_arr["ra"], src_ras) - np.testing.assert_array_equal(src_arr["dec"], src_decs) - - def test_select_events_single_source(self): - n_sources = 1 - delta_angle = np.deg2rad(15) - src_hypo_group_manager = shgm_setup(n_sources=n_sources) - evt_sel_method = DecBandEventSectionMethod( - src_hypo_group_manager, delta_angle - ) - - # Test with `ret_src_ev_idxs=False`. - (events, idxs) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=False - ) - dec_min = src_hypo_group_manager.source_list[0].dec - delta_angle - dec_max = src_hypo_group_manager.source_list[0].dec + delta_angle - - self.assertTrue( - np.all(events["dec"] > dec_min), - msg="Returned selected events below src_dec - delta_angle.", - ) - self.assertTrue( - np.all(events["dec"] < dec_max), - msg="Returned selected events above src_dec + delta_angle.", - ) - self.assertIsNone(idxs) - - # Test with `ret_src_ev_idxs=True`. - (events, (src_idxs, ev_idxs)) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=True - ) - - self.assertTrue( - np.all(events["dec"] > dec_min), - msg="Returned selected events below src_dec - delta_angle.", - ) - self.assertTrue( - np.all(events["dec"] < dec_max), - msg="Returned selected events above src_dec + delta_angle.", - ) - - n_expected_events = np.sum( - (events["dec"] > dec_min) & (events["dec"] < dec_max) - ) - self.assertEqual(len(src_idxs), n_expected_events) - self.assertEqual(len(ev_idxs), n_expected_events) - np.testing.assert_array_equal(np.unique(src_idxs), np.arange(n_sources)) - np.testing.assert_array_equal( - np.unique(ev_idxs), np.arange(len(events)) - ) - - def test_select_events_multiple_sources(self): - n_sources = 2 - delta_angle = np.deg2rad(15) - src_hypo_group_manager = shgm_setup(n_sources=n_sources) - evt_sel_method = DecBandEventSectionMethod( - src_hypo_group_manager, delta_angle - ) - - src_decs = [source.dec for source in src_hypo_group_manager.source_list] - - dec_min = np.min(src_decs) - delta_angle - dec_max = np.max(src_decs) + delta_angle - - # Test with `ret_src_ev_idxs=False`. - (events, idxs) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=False - ) - - self.assertTrue( - np.all(events["dec"] > dec_min), - msg="Returned selected events below minimal src_dec - delta_angle.", - ) - self.assertTrue( - np.all(events["dec"] < dec_max), - msg="Returned selected events above maximal src_dec + delta_angle.", - ) - self.assertIsNone(idxs) - - # Test with `ret_src_ev_idxs=True`. - (events, (src_idxs, ev_idxs)) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=True - ) - - self.assertTrue( - np.all(events["dec"] > dec_min), - msg="Returned selected events below minimal src_dec - delta_angle.", - ) - self.assertTrue( - np.all(events["dec"] < dec_max), - msg="Returned selected events above maximal src_dec + delta_angle.", - ) - - for i, src_dec in enumerate(src_decs): - events_mask = src_idxs == i - dec_min = src_dec - delta_angle - dec_max = src_dec + delta_angle - - self.assertTrue( - np.all(events["dec"][ev_idxs[events_mask]] > dec_min), - msg="Returned selected events below src_dec - delta_angle.", - ) - self.assertTrue( - np.all(events["dec"][ev_idxs[events_mask]] < dec_max), - msg="Returned selected events above src_dec + delta_angle.", - ) - np.testing.assert_array_equal(np.unique(src_idxs), np.arange(n_sources)) - np.testing.assert_array_equal( - np.unique(ev_idxs), np.arange(len(events)) - ) - - -class RABandEventSectionMethod_TestCase(unittest.TestCase): - def setUp(self): - testdata = generate_testdata() - self.test_events = DataFieldRecordArray(testdata.get("events")) - - def test_select_events_single_source(self): - n_sources = 1 - delta_angle = np.deg2rad(15) - src_hypo_group_manager = shgm_setup(n_sources=n_sources) - evt_sel_method = RABandEventSectionMethod( - src_hypo_group_manager, delta_angle - ) - - src_ras = np.array( - [source.ra for source in src_hypo_group_manager.source_list] - ) - src_decs = np.array( - [source.dec for source in src_hypo_group_manager.source_list] - ) - - # Get the minus and plus declination around the sources. - src_dec_minus = np.maximum(-np.pi / 2, src_decs - delta_angle) - src_dec_plus = np.minimum(src_decs + delta_angle, np.pi / 2) - - # Calculate the cosine factor for the largest declination distance from - # the source. We use np.amin here because smaller cosine values are - # larger angles. - # cosfact is a (N_sources,)-shaped ndarray. - cosfact = np.amin(np.cos([src_dec_minus, src_dec_plus]), axis=0) - - # Calculate delta RA, which is a function of declination. - # dRA is a (N_sources,)-shaped ndarray. - dRA_half = np.amin( - [np.repeat(2 * np.pi, n_sources), np.fabs(delta_angle / cosfact)], - axis=0, - ) - - # Test with `ret_src_ev_idxs=False`. - (events, idxs) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=False - ) - - for i in range(n_sources): - src_ra_max = src_ras[i] + dRA_half[i] - np.pi - - self.assertTrue( - np.all(np.fabs(events["ra"] - np.pi) < src_ra_max), - msg="Returned selected events above maximal " - "src_ra + delta_angle/cosfact.", - ) - self.assertIsNone(idxs) - - # Test with `ret_src_ev_idxs=True`. - (events, (src_idxs, ev_idxs)) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=True - ) - - for i in range(n_sources): - events_mask = src_idxs == i - src_ra_max = src_ras[i] + dRA_half[i] - np.pi - - self.assertTrue( - np.all( - np.fabs(events["ra"][ev_idxs[events_mask]] - np.pi) - < src_ra_max - ), - msg="Returned selected events above maximal " - "src_ra + delta_angle/cosfact.", - ) - - src_ra_max = src_ras[0] + dRA_half[0] - np.pi - n_expected_events = np.sum((np.fabs(events["ra"] - np.pi) < src_ra_max)) - - self.assertEqual(len(src_idxs), n_expected_events) - self.assertEqual(len(ev_idxs), n_expected_events) - np.testing.assert_array_equal(np.unique(src_idxs), np.arange(n_sources)) - np.testing.assert_array_equal( - np.unique(ev_idxs), np.arange(len(events)) - ) - - def test_select_events_multiple_sources(self): - n_sources = 2 - delta_angle = np.deg2rad(15) - src_hypo_group_manager = shgm_setup(n_sources=n_sources) - evt_sel_method = RABandEventSectionMethod( - src_hypo_group_manager, delta_angle - ) - - src_ras = np.array( - [source.ra for source in src_hypo_group_manager.source_list] - ) - src_decs = np.array( - [source.dec for source in src_hypo_group_manager.source_list] - ) - - # Get the minus and plus declination around the sources. - src_dec_minus = np.maximum(-np.pi / 2, src_decs - delta_angle) - src_dec_plus = np.minimum(src_decs + delta_angle, np.pi / 2) - - # Calculate the cosine factor for the largest declination distance from - # the source. We use np.amin here because smaller cosine values are - # larger angles. - # cosfact is a (N_sources,)-shaped ndarray. - cosfact = np.amin(np.cos([src_dec_minus, src_dec_plus]), axis=0) - - # Calculate delta RA, which is a function of declination. - # dRA is a (N_sources,)-shaped ndarray. - dRA_half = np.amin( - [np.repeat(2 * np.pi, n_sources), np.fabs(delta_angle / cosfact)], - axis=0, - ) - - # Test with `ret_src_ev_idxs=False`. - (events, idxs) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=False - ) - - # TODO: Can't really test events of multiple sources selection without - # idxs. - self.assertIsNone(idxs) - - # Test with `ret_src_ev_idxs=True`. - (events, (src_idxs, ev_idxs)) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=True - ) - - for i in range(n_sources): - events_mask = src_idxs == i - src_ra_max = src_ras[i] + dRA_half[i] - np.pi - - self.assertTrue( - np.all( - np.fabs(events["ra"][ev_idxs[events_mask]] - np.pi) - < src_ra_max - ), - msg="Returned selected events above maximal " - "src_ra + delta_angle/cosfact.", - ) - np.testing.assert_array_equal(np.unique(src_idxs), np.arange(n_sources)) - np.testing.assert_array_equal( - np.unique(ev_idxs), np.arange(len(events)) - ) - - -class SpatialBoxEventSelectionMethod_TestCase(unittest.TestCase): - def setUp(self): - testdata = generate_testdata() - self.test_events = DataFieldRecordArray(testdata.get("events")) - - def test_select_events_single_source(self): - n_sources = 1 - delta_angle = np.deg2rad(15) - src_hypo_group_manager = shgm_setup(n_sources=n_sources) - evt_sel_method = SpatialBoxEventSelectionMethod( - src_hypo_group_manager, delta_angle - ) - - src_ras = np.array( - [source.ra for source in src_hypo_group_manager.source_list] - ) - src_decs = np.array( - [source.dec for source in src_hypo_group_manager.source_list] - ) - - # Get the minus and plus declination around the sources. - src_dec_minus = np.maximum(-np.pi / 2, src_decs - delta_angle) - src_dec_plus = np.minimum(src_decs + delta_angle, np.pi / 2) - - # Calculate the cosine factor for the largest declination distance from - # the source. We use np.amin here because smaller cosine values are - # larger angles. - # cosfact is a (N_sources,)-shaped ndarray. - cosfact = np.amin(np.cos([src_dec_minus, src_dec_plus]), axis=0) - - # Calculate delta RA, which is a function of declination. - # dRA is a (N_sources,)-shaped ndarray. - dRA_half = np.amin( - [np.repeat(2 * np.pi, n_sources), np.fabs(delta_angle / cosfact)], - axis=0, - ) - - # Test with `ret_src_ev_idxs=False`. - (events, idxs) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=False - ) - - for i in range(n_sources): - src_ra_max = src_ras[i] + dRA_half[i] - np.pi - - self.assertTrue( - np.all(np.fabs(events["ra"] - np.pi) < src_ra_max), - msg="Returned selected events above maximal " - "src_ra + delta_angle/cosfact.", - ) - - dec_min = src_decs[i] - delta_angle - dec_max = src_decs[i] + delta_angle - - self.assertTrue( - np.all(events["dec"] > dec_min), - msg="Returned selected events below src_dec - delta_angle.", - ) - self.assertTrue( - np.all(events["dec"] < dec_max), - msg="Returned selected events above src_dec + delta_angle.", - ) - self.assertIsNone(idxs) - - # Test with `ret_src_ev_idxs=True`. - (events, (src_idxs, ev_idxs)) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=True - ) - - for i in range(n_sources): - events_mask = src_idxs == i - src_ra_max = src_ras[i] + dRA_half[i] - np.pi - - self.assertTrue( - np.all( - np.fabs(events["ra"][ev_idxs[events_mask]] - np.pi) - < src_ra_max - ), - msg="Returned selected events above maximal " - "src_ra + delta_angle/cosfact.", - ) - - dec_min = src_decs[i] - delta_angle - dec_max = src_decs[i] + delta_angle - - self.assertTrue( - np.all(events["dec"][ev_idxs[events_mask]] > dec_min), - msg="Returned selected events below src_dec - delta_angle.", - ) - self.assertTrue( - np.all(events["dec"][ev_idxs[events_mask]] < dec_max), - msg="Returned selected events above src_dec + delta_angle.", - ) - - src_ra_max = src_ras[0] + dRA_half[0] - np.pi - n_expected_events = np.sum((np.fabs(events["ra"] - np.pi) < src_ra_max)) - self.assertEqual(len(src_idxs), n_expected_events) - self.assertEqual(len(ev_idxs), n_expected_events) - np.testing.assert_array_equal(np.unique(src_idxs), np.arange(n_sources)) - np.testing.assert_array_equal( - np.unique(ev_idxs), np.arange(len(events)) - ) - - def test_select_events_multiple_sources(self): - n_sources = 2 - delta_angle = np.deg2rad(15) - src_hypo_group_manager = shgm_setup(n_sources=n_sources) - evt_sel_method = SpatialBoxEventSelectionMethod( - src_hypo_group_manager, delta_angle - ) - - src_ras = np.array( - [source.ra for source in src_hypo_group_manager.source_list] - ) - src_decs = np.array( - [source.dec for source in src_hypo_group_manager.source_list] - ) - - # Get the minus and plus declination around the sources. - src_dec_minus = np.maximum(-np.pi / 2, src_decs - delta_angle) - src_dec_plus = np.minimum(src_decs + delta_angle, np.pi / 2) - - # Calculate the cosine factor for the largest declination distance from - # the source. We use np.amin here because smaller cosine values are - # larger angles. - # cosfact is a (N_sources,)-shaped ndarray. - cosfact = np.amin(np.cos([src_dec_minus, src_dec_plus]), axis=0) - - # Calculate delta RA, which is a function of declination. - # dRA is a (N_sources,)-shaped ndarray. - dRA_half = np.amin( - [np.repeat(2 * np.pi, n_sources), np.fabs(delta_angle / cosfact)], - axis=0, - ) - - # Test with `ret_src_ev_idxs=False`. - (events, idxs) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=False - ) - - dec_min = np.min(src_decs) - delta_angle - dec_max = np.max(src_decs) + delta_angle - - self.assertTrue( - np.all(events["dec"] > dec_min), - msg="Returned selected events below src_dec - delta_angle.", - ) - self.assertTrue( - np.all(events["dec"] < dec_max), - msg="Returned selected events above src_dec + delta_angle.", - ) - self.assertIsNone(idxs) - - # Test with `ret_src_ev_idxs=True`. - (events, (src_idxs, ev_idxs)) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=True - ) - - for i in range(n_sources): - events_mask = src_idxs == i - src_ra_max = src_ras[i] + dRA_half[i] - np.pi - - self.assertTrue( - np.all( - np.fabs(events["ra"][ev_idxs[events_mask]] - np.pi) - < src_ra_max - ), - msg="Returned selected events above maximal " - "src_ra + delta_angle/cosfact.", - ) - - dec_min = src_decs[i] - delta_angle - dec_max = src_decs[i] + delta_angle - - self.assertTrue( - np.all(events["dec"][ev_idxs[events_mask]] > dec_min), - msg="Returned selected events below src_dec - delta_angle.", - ) - self.assertTrue( - np.all(events["dec"][ev_idxs[events_mask]] < dec_max), - msg="Returned selected events above src_dec + delta_angle.", - ) - np.testing.assert_array_equal(np.unique(src_idxs), np.arange(n_sources)) - np.testing.assert_array_equal( - np.unique(ev_idxs), np.arange(len(events)) - ) - - -class SpatialBoxAndPsiFuncEventSelectionMethod_TestCase(unittest.TestCase): - def setUp(self): - testdata = generate_testdata() - self.test_events = DataFieldRecordArray(testdata.get("events")) - - def test_select_events_single_source(self): - """Check if the event selection without a psi cut returns an identical - result to the `SpatialBoxEventSelectionMethod`. - """ - n_sources = 1 - delta_angle = np.deg2rad(15) - src_hypo_group_manager = shgm_setup(n_sources=n_sources) - func = get_func_psi_ang_err(ang_err=0) - evt_sel_method = SpatialBoxAndPsiFuncEventSelectionMethod( - src_hypo_group_manager=src_hypo_group_manager, - delta_angle=delta_angle, - psi_name="psi", - func=func, - axis_name_list=["psi"], - ) - - evt_sel_method_sb = SpatialBoxEventSelectionMethod( - src_hypo_group_manager, delta_angle - ) - - # Test with `ret_src_ev_idxs=False`. - (events, idxs) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=False - ) - (events_sb, idxs_sb) = evt_sel_method_sb.select_events( - self.test_events, ret_src_ev_idxs=False - ) - - np.testing.assert_array_equal( - events.as_numpy_record_array(), - events_sb.as_numpy_record_array(), - ) - self.assertIsNone(idxs) - self.assertIsNone(idxs_sb) - - # Test with `ret_src_ev_idxs=True`. - (events, (src_idxs, ev_idxs)) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=True - ) - ( - events_sb, - (src_idxs_sb, ev_idxs_sb), - ) = evt_sel_method_sb.select_events( - self.test_events, ret_src_ev_idxs=True) - - np.testing.assert_array_equal( - events.as_numpy_record_array(), - events_sb.as_numpy_record_array(), - ) - np.testing.assert_array_equal(src_idxs, src_idxs_sb) - np.testing.assert_array_equal(ev_idxs, ev_idxs_sb) - np.testing.assert_array_equal(np.unique(src_idxs), np.arange(n_sources)) - np.testing.assert_array_equal( - np.unique(ev_idxs), np.arange(len(events)) - ) - - def test_select_events_multiple_sources(self): - """Check if the event selection without a psi cut returns an identical - result to the `SpatialBoxEventSelectionMethod`. - """ - n_sources = 2 - delta_angle = np.deg2rad(15) - src_hypo_group_manager = shgm_setup(n_sources=n_sources) - func = get_func_psi_ang_err(ang_err=0) - evt_sel_method = SpatialBoxAndPsiFuncEventSelectionMethod( - src_hypo_group_manager=src_hypo_group_manager, - delta_angle=delta_angle, - psi_name="psi", - func=func, - axis_name_list=["psi"], - ) - - evt_sel_method_sb = SpatialBoxEventSelectionMethod( - src_hypo_group_manager, delta_angle - ) - - # Test with `ret_src_ev_idxs=False`. - (events, idxs) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=False - ) - (events_sb, idxs_sb) = evt_sel_method_sb.select_events( - self.test_events, ret_src_ev_idxs=False - ) - - np.testing.assert_array_equal( - events.as_numpy_record_array(), - events_sb.as_numpy_record_array(), - ) - self.assertIsNone(idxs) - self.assertIsNone(idxs_sb) - - # Test with `ret_src_ev_idxs=True`. - (events, (src_idxs, ev_idxs)) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=True - ) - ( - events_sb, - (src_idxs_sb, ev_idxs_sb), - ) = evt_sel_method_sb.select_events( - self.test_events, ret_src_ev_idxs=True) - - np.testing.assert_array_equal( - events.as_numpy_record_array(), - events_sb.as_numpy_record_array(), - ) - np.testing.assert_array_equal(src_idxs, src_idxs_sb) - np.testing.assert_array_equal(ev_idxs, ev_idxs_sb) - np.testing.assert_array_equal(np.unique(src_idxs), np.arange(n_sources)) - np.testing.assert_array_equal( - np.unique(ev_idxs), np.arange(len(events)) - ) - - def test_select_events_single_source_psi_func(self): - n_sources = 1 - delta_angle = np.deg2rad(15) - src_hypo_group_manager = shgm_setup(n_sources=n_sources) - ang_err = 3.0 - - func = get_func_psi_ang_err(ang_err) - evt_sel_method = SpatialBoxAndPsiFuncEventSelectionMethod( - src_hypo_group_manager=src_hypo_group_manager, - delta_angle=delta_angle, - psi_name="psi", - func=func, - axis_name_list=["psi"], - psi_floor=0.0, - ) - - evt_sel_method_sb = SpatialBoxEventSelectionMethod( - src_hypo_group_manager, delta_angle - ) - - # Test with `ret_src_ev_idxs=False`. - (events, idxs) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=False - ) - (events_sb, idxs_sb) = evt_sel_method_sb.select_events( - self.test_events, ret_src_ev_idxs=False - ) - - mask_psi_cut = events_sb["ang_err"] > ang_err - - np.testing.assert_array_equal( - events.as_numpy_record_array(), - events_sb[mask_psi_cut].as_numpy_record_array(), - ) - self.assertIsNone(idxs) - self.assertIsNone(idxs_sb) - - # Test with `ret_src_ev_idxs=True`. - (events, (src_idxs, ev_idxs)) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=True - ) - ( - events_sb, - (src_idxs_sb, ev_idxs_sb), - ) = evt_sel_method_sb.select_events( - self.test_events, ret_src_ev_idxs=True) - - mask_psi_cut = events_sb["ang_err"] > ang_err - - np.testing.assert_array_equal( - events.as_numpy_record_array(), - events_sb[mask_psi_cut].as_numpy_record_array(), - ) - np.testing.assert_array_equal(np.unique(src_idxs), np.arange(n_sources)) - np.testing.assert_array_equal( - np.unique(ev_idxs), np.arange(len(events)) - ) - - def test_select_events_multiple_sources_psi_func(self): - n_sources = 2 - delta_angle = np.deg2rad(15) - src_hypo_group_manager = shgm_setup(n_sources=n_sources) - ang_err = 3.0 - - func = get_func_psi_ang_err(ang_err) - evt_sel_method = SpatialBoxAndPsiFuncEventSelectionMethod( - src_hypo_group_manager=src_hypo_group_manager, - delta_angle=delta_angle, - psi_name="psi", - func=func, - axis_name_list=["psi"], - psi_floor=0.0, - ) - - evt_sel_method_sb = SpatialBoxEventSelectionMethod( - src_hypo_group_manager, delta_angle - ) - - # Test with `ret_src_ev_idxs=False`. - (events, idxs) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=False - ) - (events_sb, idxs_sb) = evt_sel_method_sb.select_events( - self.test_events, ret_src_ev_idxs=False - ) - - mask_psi_cut = events_sb["ang_err"] > ang_err - - np.testing.assert_array_equal( - events.as_numpy_record_array(), - events_sb[mask_psi_cut].as_numpy_record_array(), - ) - self.assertIsNone(idxs) - self.assertIsNone(idxs_sb) - - # Test with `ret_src_ev_idxs=True`. - (events, (src_idxs, ev_idxs)) = evt_sel_method.select_events( - self.test_events, ret_src_ev_idxs=True - ) - ( - events_sb, - (src_idxs_sb, ev_idxs_sb), - ) = evt_sel_method_sb.select_events( - self.test_events, ret_src_ev_idxs=True) - - for i in range(n_sources): - events_mask = src_idxs == i - events_mask_sb = src_idxs_sb == i - mask_psi_cut = ( - events_sb[ev_idxs_sb[events_mask_sb]]["ang_err"] > ang_err - ) - - np.testing.assert_array_equal( - events[ev_idxs[events_mask]].as_numpy_record_array(), - events_sb[ev_idxs_sb[events_mask_sb]][ - mask_psi_cut - ].as_numpy_record_array(), - ) - - np.testing.assert_array_equal(np.unique(src_idxs), np.arange(n_sources)) - np.testing.assert_array_equal( - np.unique(ev_idxs), np.arange(len(events)) - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/core/test_parameters.py b/tests/core/test_parameters.py index 476327de81..5a4a216571 100644 --- a/tests/core/test_parameters.py +++ b/tests/core/test_parameters.py @@ -5,30 +5,25 @@ """ import numpy as np -import os.path -import sys import unittest -from skyllh.core.binning import BinningDefinition -from skyllh.core.model import Model +from skyllh.core.binning import ( + BinningDefinition, +) +from skyllh.core.model import ( + Model, +) from skyllh.core.parameters import ( - HypoParameterDefinition, Parameter, - ParameterSet, - ParameterSetArray, ParameterGrid, ParameterGridSet, - SingleModelParameterMapper, - MultiModelParameterMapper + ParameterModelMapper, + ParameterSet, ) -from skyllh.core.py import const -from skyllh.core.random import RandomStateService -sys.path.append(os.path.join(os.path.split(__file__)[0], '..')) -from utils import isAlmostEqual GAMMA_GRID = [ - 1. , 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2. + 1., 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2. ] ECUT_GRID = [ @@ -100,10 +95,10 @@ def test_name(self): self.assertEqual(self.floating_param.name, 'floating_param') def test_initial(self): - self.assertTrue(isAlmostEqual( - self.fixed_param.initial, self.fixed_param_initial)) - self.assertTrue(isAlmostEqual( - self.floating_param.initial, self.floating_param_initial)) + np.testing.assert_almost_equal( + self.fixed_param.initial, self.fixed_param_initial) + np.testing.assert_almost_equal( + self.floating_param.initial, self.floating_param_initial) def test_isfixed(self): self.assertTrue(self.fixed_param.isfixed) @@ -111,19 +106,19 @@ def test_isfixed(self): def test_valmin(self): self.assertEqual(self.fixed_param.valmin, None) - self.assertTrue(isAlmostEqual( - self.floating_param.valmin, self.floating_param_valmin)) + np.testing.assert_almost_equal( + self.floating_param.valmin, self.floating_param_valmin) def test_valmax(self): self.assertEqual(self.fixed_param.valmax, None) - self.assertTrue(isAlmostEqual( - self.floating_param.valmax, self.floating_param_valmax)) + np.testing.assert_almost_equal( + self.floating_param.valmax, self.floating_param_valmax) def test_value(self): - self.assertTrue(isAlmostEqual( - self.fixed_param.value, self.fixed_param_initial)) - self.assertTrue(isAlmostEqual( - self.floating_param.value, self.floating_param_initial)) + np.testing.assert_almost_equal( + self.fixed_param.value, self.fixed_param_initial) + np.testing.assert_almost_equal( + self.floating_param.value, self.floating_param_initial) # Try to change the value of a fixed parameter. with self.assertRaises(ValueError): @@ -135,11 +130,9 @@ def test_value(self): self.floating_param.value = self.fixed_param_initial def test_str(self): - try: - str(self.fixed_param) - str(self.floating_param) - except: - self.fail('The __str__ method raised an exception!') + # Make sure the __str__ methods don't raise exceptions. + str(self.fixed_param) + str(self.floating_param) def test_as_linear_grid(self): grid_delta = 0.1 @@ -147,25 +140,25 @@ def test_as_linear_grid(self): self.fixed_param.as_linear_grid(grid_delta) param_grid = self.floating_param.as_linear_grid(grid_delta) - self.assertTrue(np.all(isAlmostEqual( - param_grid.grid, self.floating_param_grid))) + np.testing.assert_almost_equal( + param_grid.grid, self.floating_param_grid) def test_change_fixed_value(self): with self.assertRaises(ValueError): self.floating_param.change_fixed_value(self.fixed_param_initial) self.fixed_param.change_fixed_value(self.floating_param_initial) - self.assertTrue(isAlmostEqual( - self.fixed_param.initial, self.floating_param_initial)) - self.assertTrue(isAlmostEqual( - self.fixed_param.value, self.floating_param_initial)) + np.testing.assert_almost_equal( + self.fixed_param.initial, self.floating_param_initial) + np.testing.assert_almost_equal( + self.fixed_param.value, self.floating_param_initial) def test_make_fixed(self): self.floating_param.make_fixed(self.fixed_param_initial) - self.assertTrue(isAlmostEqual( - self.floating_param.initial, self.fixed_param_initial)) - self.assertTrue(isAlmostEqual( - self.floating_param.value, self.fixed_param_initial)) + np.testing.assert_almost_equal( + self.floating_param.initial, self.fixed_param_initial) + np.testing.assert_almost_equal( + self.floating_param.value, self.fixed_param_initial) def test_make_floating(self): with self.assertRaises(ValueError): @@ -185,14 +178,14 @@ def test_make_floating(self): initial=self.floating_param_initial, valmin=self.floating_param_valmin, valmax=self.floating_param_valmax) - self.assertTrue(isAlmostEqual( - self.fixed_param.initial, self.floating_param_initial)) - self.assertTrue(isAlmostEqual( - self.fixed_param.value, self.floating_param_initial)) - self.assertTrue(isAlmostEqual( - self.fixed_param.valmin, self.floating_param_valmin)) - self.assertTrue(isAlmostEqual( - self.fixed_param.valmax, self.floating_param_valmax)) + np.testing.assert_almost_equal( + self.fixed_param.initial, self.floating_param_initial) + np.testing.assert_almost_equal( + self.fixed_param.value, self.floating_param_initial) + np.testing.assert_almost_equal( + self.fixed_param.valmin, self.floating_param_valmin) + np.testing.assert_almost_equal( + self.fixed_param.valmax, self.floating_param_valmax) class ParameterSet_TestCase(unittest.TestCase): @@ -207,8 +200,8 @@ def test_union(self): p0 = Parameter('p0', 2.3) p1 = Parameter('p1', 1.1, valmin=0.5, valmax=1.6) p2 = Parameter('p2', 3.2, valmin=2.3, valmax=4.7) - paramset0 = ParameterSet((p0,p2)) - paramset1 = ParameterSet((p1,p2)) + paramset0 = ParameterSet((p0, p2)) + paramset1 = ParameterSet((p1, p2)) paramset_union = ParameterSet.union(paramset0, paramset1) params = paramset_union.params self.assertEqual(len(params), 3) @@ -217,7 +210,7 @@ def test_union(self): self.assertEqual(params[2], p1) def test_params(self, paramset=None): - if(paramset is None): + if paramset is None: paramset = self.paramset params = self.paramset.params self.assertEqual(len(params), 2) @@ -255,40 +248,48 @@ def test_n_fixed_params(self): def test_n_floating_params(self): self.assertEqual(self.paramset.n_floating_params, 1) - def test_fixed_param_name_list(self): - names = self.paramset.fixed_param_name_list + def test_fixed_params_idxs(self): + idxs = self.paramset.fixed_params_idxs + self.assertEqual(len(idxs), 1) + np.testing.assert_equal(idxs, [0]) + + def test_fixed_params_name_list(self): + names = self.paramset.fixed_params_name_list self.assertEqual(len(names), 1) self.assertEqual(names, ['p0']) - def test_floating_param_name_list(self): - names = self.paramset.floating_param_name_list + def test_floating_params_name_list(self): + names = self.paramset.floating_params_name_list self.assertEqual(len(names), 1) self.assertEqual(names, ['p1']) + def test_floating_params_idxs(self): + idxs = self.paramset.floating_params_idxs + self.assertEqual(len(idxs), 1) + np.testing.assert_equal(idxs, [1]) + def test_fixed_param_values(self): values = self.paramset.fixed_param_values - self.assertTrue(isAlmostEqual(values, [2.3])) + np.testing.assert_almost_equal(values, [2.3]) def test_floating_param_initials(self): initials = self.paramset.floating_param_initials - self.assertTrue(isAlmostEqual(initials, [1.1])) + np.testing.assert_almost_equal(initials, [1.1]) def test_floating_param_bounds(self): bounds = self.paramset.floating_param_bounds - self.assertTrue(isAlmostEqual(bounds[0], [0.5, 1.6])) + np.testing.assert_almost_equal(bounds[0], [0.5, 1.6]) def test_len(self): self.assertEqual(len(self.paramset), 2) def test_iter(self): - for (i,param) in enumerate(self.paramset): - self.assertEqual(param.name, 'p%d'%(i)) + for (i, param) in enumerate(self.paramset): + self.assertEqual(param.name, f'p{i}') def test_str(self): - try: - str(self.paramset) - except: - self.fail('The __str__ method raised exception!') + # Ensure that __str__ method does not raise an exception. + str(self.paramset) def test_get_fixed_pidx(self): self.assertEqual(self.paramset.get_fixed_pidx('p0'), 0) @@ -322,7 +323,7 @@ def test_make_params_fixed(self): self.assertEqual(self.paramset.n_floating_params, 0) self.test_params() values = self.paramset.fixed_param_values - self.assertTrue(isAlmostEqual(values, [2.3, 0.4])) + np.testing.assert_almost_equal(values, [2.3, 0.4]) self.assertEqual(self.paramset.params[1].valmin, None) self.assertEqual(self.paramset.params[1].valmax, None) @@ -335,9 +336,9 @@ def test_make_params_fixed(self): self.assertEqual(self.paramset.n_floating_params, 0) self.test_params() values = self.paramset.fixed_param_values - self.assertTrue(isAlmostEqual(values, [2.3, 1.1])) - self.assertTrue(isAlmostEqual(self.paramset.params[1].valmin, 0.5)) - self.assertTrue(isAlmostEqual(self.paramset.params[1].valmax, 1.6)) + np.testing.assert_almost_equal(values, [2.3, 1.1]) + np.testing.assert_almost_equal(self.paramset.params[1].valmin, 0.5) + np.testing.assert_almost_equal(self.paramset.params[1].valmax, 1.6) def test_make_params_floating(self): # Already floating parameters cannot be made floating. @@ -351,17 +352,17 @@ def test_make_params_floating(self): self.paramset.make_params_floating({'p0': 1.2}) self.paramset.make_params_floating({'p0': (1.2, 1.0, 1.3)}) self.assertTrue(self.paramset.has_floating_param('p0')) - self.assertTrue(isAlmostEqual(self.paramset.params[0].initial, 1.2)) - self.assertTrue(isAlmostEqual(self.paramset.params[0].valmin, 1.0)) - self.assertTrue(isAlmostEqual(self.paramset.params[0].valmax, 1.3)) + np.testing.assert_almost_equal(self.paramset.params[0].initial, 1.2) + np.testing.assert_almost_equal(self.paramset.params[0].valmin, 1.0) + np.testing.assert_almost_equal(self.paramset.params[0].valmax, 1.3) def test_update_fixed_param_value_cache(self): self.assertAlmostEqual(self.paramset.params[0].value, 2.3) self.fixed_param.change_fixed_value(3.1) self.assertAlmostEqual(self.paramset.params[0].value, 3.1) - self.assertTrue(isAlmostEqual(self.paramset.fixed_param_values, [2.3])) + np.testing.assert_almost_equal(self.paramset.fixed_param_values, [2.3]) self.paramset.update_fixed_param_value_cache() - self.assertTrue(isAlmostEqual(self.paramset.fixed_param_values, [3.1])) + np.testing.assert_almost_equal(self.paramset.fixed_param_values, [3.1]) def test_copy(self): new_paramset = self.paramset.copy() @@ -397,109 +398,30 @@ def test_has_param(self): self.assertFalse(self.paramset.has_param(Parameter('p', 0.0))) def test_floating_param_values_to_dict(self): - param_dict = self.paramset.floating_param_values_to_dict(np.array([1.3])) - self.assertAlmostEqual(param_dict['p0'], 2.3) + param_dict = self.paramset.get_floating_params_dict(np.array([1.3])) + self.assertTrue(len(param_dict), 1) self.assertAlmostEqual(param_dict['p1'], 1.3) -class ParameterSetArray_TestCase(unittest.TestCase): - """This test case tests the ParameterSetArray class. - """ - def setUp(self): - self.fixed_param0 = Parameter( - 'fixed_param0', 2.3) - self.fixed_param1 = Parameter( - 'fixed_param1', 0.1) - self.floating_param0 = Parameter( - 'floating_param0', 1.1, valmin=0.5, valmax=1.6) - self.floating_param1 = Parameter( - 'floating_param1', 13.5, valmin=10.5, valmax=15) - self.paramset0 = ParameterSet((self.fixed_param0, self.floating_param0)) - self.paramset1 = ParameterSet((self.floating_param1, self.fixed_param1)) - - self.paramsetarr = ParameterSetArray( - (const(self.paramset0), const(self.paramset1))) - - def test__init__(self): - paramset0 = ParameterSet((self.fixed_param0, self.floating_param0)) - paramset1 = ParameterSet((self.floating_param1, self.fixed_param1)) - with self.assertRaises(TypeError): - paramsetarr = ParameterSetArray( - (paramset0, paramset1)) - - def test_paramset_list(self): - paramset_list = self.paramsetarr.paramset_list - self.assertEqual(len(paramset_list), 2) - self.assertEqual(paramset_list[0], self.paramset0) - self.assertEqual(paramset_list[1], self.paramset1) - - def test_n_params(self): - self.assertEqual(self.paramsetarr.n_params, 4) - - def test_n_fixed_params(self): - self.assertEqual(self.paramsetarr.n_fixed_params, 2) - - def test_n_floating_params(self): - self.assertEqual(self.paramsetarr.n_floating_params, 2) - - def test_floating_param_initials(self): - initials = self.paramsetarr.floating_param_initials - self.assertEqual(initials.shape, (2,)) - self.assertAlmostEqual(initials[0], 1.1) - self.assertAlmostEqual(initials[1], 13.5) - - def test_floating_param_bounds(self): - bounds = self.paramsetarr.floating_param_bounds - self.assertEqual(bounds.shape, (2,2)) - np.testing.assert_almost_equal(bounds[0], (0.5, 1.6)) - np.testing.assert_almost_equal(bounds[1], (10.5, 15)) - - def test__str__(self): - try: - str(self.paramsetarr) - except: - self.fail('The __str__ method raised exception!') - - def test_generate_random_initials(self): - rss_ref = RandomStateService(42) - rn = rss_ref.random.uniform(size=2) - - rss = RandomStateService(42) - initials = self.paramsetarr.generate_random_initials(rss) - np.testing.assert_almost_equal(initials, - (0.5+rn[0]*(1.6-0.5), - 10.5+rn[1]*(15-10.5))) - - def test_split_floating_param_values(self): - fl_param_values = np.array([0.9, 14.2]) - fl_param_values_list = self.paramsetarr.split_floating_param_values( - fl_param_values) - self.assertEqual(len(fl_param_values_list), 2) - self.assertEqual(len(fl_param_values_list[0]), 1) - self.assertEqual(len(fl_param_values_list[1]), 1) - np.testing.assert_almost_equal(fl_param_values_list[0], [0.9]) - np.testing.assert_almost_equal(fl_param_values_list[1], [14.2]) - - class ParameterGrid_TestCase(unittest.TestCase): """This test case tests the ParameterGrid class. """ def setUp(self): - self.paramgrid_gamma1 = ParameterGrid('gamma1', [ 1.5, 2., 2.5, 3., 3.5]) + self.paramgrid_gamma1 = ParameterGrid('gamma1', [1.5, 2., 2.5, 3., 3.5]) self.paramgrid_gamma2 = ParameterGrid('gamma2', GAMMA_GRID) - self.paramgrid_gamma3 = ParameterGrid('gamma3', [ 1.05, 1.15, 1.25, 1.35]) + self.paramgrid_gamma3 = ParameterGrid('gamma3', [1.05, 1.15, 1.25, 1.35]) def test_from_BinningDefinition(self): binning = BinningDefinition(name='gamma', binedges=GAMMA_GRID) param_grid = ParameterGrid.from_BinningDefinition(binning) self.assertEqual(param_grid.name, binning.name) - self.assertTrue(isAlmostEqual(param_grid.grid, GAMMA_GRID)) + np.testing.assert_almost_equal(param_grid.grid, GAMMA_GRID) def test_delta(self): - self.assertTrue(isAlmostEqual(self.paramgrid_gamma1.delta, 0.5)) - self.assertTrue(isAlmostEqual(self.paramgrid_gamma2.delta, 0.1)) - self.assertTrue(isAlmostEqual(self.paramgrid_gamma3.delta, 0.1)) + np.testing.assert_almost_equal(self.paramgrid_gamma1.delta, 0.5) + np.testing.assert_almost_equal(self.paramgrid_gamma2.delta, 0.1) + np.testing.assert_almost_equal(self.paramgrid_gamma3.delta, 0.1) def test_decimals(self): self.assertTrue(self.paramgrid_gamma1.decimals >= 1) @@ -509,11 +431,12 @@ def test_decimals(self): def test_round_to_nearest_grid_point(self): # Test values outside the grid range. x = 1.49999999999 - with self.assertRaises(ValueError): - gp = self.paramgrid_gamma1.round_to_nearest_grid_point(x) + gp = self.paramgrid_gamma1.round_to_nearest_grid_point(x) + np.testing.assert_almost_equal(gp, [1.5]) + x = 3.50000000001 - with self.assertRaises(ValueError): - gp = self.paramgrid_gamma1.round_to_nearest_grid_point(x) + gp = self.paramgrid_gamma1.round_to_nearest_grid_point(x) + np.testing.assert_almost_equal(gp, [3.5]) # Test a value between two grid points. x = [2.1, 2.4, 2.2, 2.3] @@ -522,27 +445,27 @@ def test_round_to_nearest_grid_point(self): x = [1.051, 1.14] gp = self.paramgrid_gamma3.round_to_nearest_grid_point(x) - self.assertTrue(isAlmostEqual(gp, [1.05, 1.15])) + np.testing.assert_almost_equal(gp, [1.05, 1.15]) # Test a value on a grid point. x = [1.05, 1.35] gp = self.paramgrid_gamma3.round_to_nearest_grid_point(x) - self.assertTrue(isAlmostEqual(gp, [1.05, 1.35])) + np.testing.assert_almost_equal(gp, [1.05, 1.35]) def test_round_to_lower_grid_point(self): # Test a value between two grid points. x = 2.4 gp = self.paramgrid_gamma1.round_to_lower_grid_point(x) - self.assertTrue(isAlmostEqual(gp, 2.)) + np.testing.assert_almost_equal(gp, 2.) # Test a value at a grid point. x = 2. gp = self.paramgrid_gamma1.round_to_lower_grid_point(x) - self.assertTrue(isAlmostEqual(gp, 2.)) + np.testing.assert_almost_equal(gp, 2.) x = 1.6 gp = self.paramgrid_gamma2.round_to_lower_grid_point(x) - self.assertTrue(isAlmostEqual(gp, 1.6)) + np.testing.assert_almost_equal(gp, 1.6) x = [1.05, 1.15, 1.25, 1.35] gp = self.paramgrid_gamma3.round_to_lower_grid_point(x) @@ -552,16 +475,16 @@ def test_round_to_upper_grid_point(self): # Test a value between two grid points. x = 2.4 gp = self.paramgrid_gamma1.round_to_upper_grid_point(x) - self.assertTrue(isAlmostEqual(gp, 2.5)) + np.testing.assert_almost_equal(gp, 2.5) # Test a value at a grid point. x = 2. gp = self.paramgrid_gamma1.round_to_upper_grid_point(x) - self.assertTrue(isAlmostEqual(gp, 2.5)) + np.testing.assert_almost_equal(gp, 2.5) x = 1.6 gp = self.paramgrid_gamma2.round_to_upper_grid_point(x) - self.assertTrue(isAlmostEqual(gp, 1.7)) + np.testing.assert_almost_equal(gp, 1.7) x = [1.05, 1.15, 1.25, 1.35] gp = self.paramgrid_gamma3.round_to_upper_grid_point(x) @@ -584,156 +507,103 @@ def test_ndim(self): self.assertEqual( self.paramgridset.ndim, 2) - def test_parameter_names(self): + def test_param_names(self): self.assertEqual( - self.paramgridset.parameter_names, ['gamma', 'Ecut']) + self.paramgridset.params_name_list, ['gamma', 'Ecut']) def test_parameter_permutation_dict_list(self): perm_dict_list = self.paramgridset.parameter_permutation_dict_list - self.assertTrue(isAlmostEqual( - [ d['gamma'] for d in perm_dict_list ], + np.testing.assert_almost_equal( + [d['gamma'] for d in perm_dict_list], np.repeat(np.array(GAMMA_GRID), len(ECUT_GRID)) - )) - self.assertTrue(isAlmostEqual( - [ d['Ecut'] for d in perm_dict_list ], + ) + np.testing.assert_almost_equal( + [d['Ecut'] for d in perm_dict_list], list(ECUT_GRID)*len(GAMMA_GRID) - )) - - def test_index(self): - self.assertEqual( - self.paramgridset.index(self.paramgrid_gamma), 0) - - self.assertEqual( - self.paramgridset.index(self.paramgrid_Ecut), 1) - - def test_index_by_name(self): - self.assertEqual( - self.paramgridset.index_by_name('gamma'), 0) - - self.assertEqual( - self.paramgridset.index_by_name('Ecut'), 1) - - def test_pop_and_add(self): - paramgrid_gamma = self.paramgridset.pop('gamma') - self.assertEqual(paramgrid_gamma.name, 'gamma') - - paramgrid_Ecut = self.paramgridset.pop() - self.assertEqual(paramgrid_Ecut.name, 'Ecut') - - self.paramgridset.add(paramgrid_gamma) - self.paramgridset.add(paramgrid_Ecut) - - # The altered ParameterGridSet instance should be the same as the - # initial ParameterGridSet instance. So just run all the tests on that - # altered one. - self.test_ndim() - self.test_parameter_names() - self.test_parameter_permutation_dict_list() - self.test_index() - self.test_index_by_name() - - -class SingleModelParameterMapperTestCase(unittest.TestCase): - def setUp(self): - self.fixed_param = Parameter('p1', 42) - self.floating_param = Parameter('p2', 4, 1, 6) - self.model = Model('the_src') - self.mpm = SingleModelParameterMapper( - 'signal', self.model) - - def test_name(self): - self.assertEqual(self.mpm.name, 'signal') - - def test_models(self): - self.assertEqual(len(self.mpm.models), 1) - self.assertEqual(self.mpm.models[0], self.model) - - def test_n_models(self): - self.assertEqual(self.mpm.n_models, 1) - - def test_str(self): - # Add some parameters to the model parameter mapper. - self.test_def_param() - try: - str(self.mpm) - except: - self.fail('The __str__ method raised an exception!') - - def test_def_param(self): - self.mpm.def_param(self.fixed_param) - self.mpm.def_param(self.floating_param, 'fp') - self.assertEqual(self.mpm.n_global_params, 2) - self.assertEqual(self.mpm.n_global_fixed_params, 1) - self.assertEqual(self.mpm.n_global_floating_params, 1) - - def test_get_model_param_dict(self): - # Add some parameters to the model parameter mapper. - self.test_def_param() - model_param_dict = self.mpm.get_model_param_dict(np.array([2.4])) - self.assertEqual(len(model_param_dict), 2) - self.assertTrue('p1' in model_param_dict) - self.assertTrue('fp' in model_param_dict) - self.assertAlmostEqual(model_param_dict['p1'], 42) - self.assertAlmostEqual(model_param_dict['fp'], 2.4) + ) -class MultiModelParameterMapperTestCase(unittest.TestCase): +class ParameterModelMapperTestCase(unittest.TestCase): def setUp(self): self.fixed_param0 = Parameter('p0', 42) self.floating_param0 = Parameter('p1', 4, 1, 6) self.floating_param1 = Parameter('p2', 13, 10, 15) self.model0 = Model('m0') self.model1 = Model('m1') - self.mpm = MultiModelParameterMapper( - 'signal', (self.model0, self.model1)) - - def test_name(self): - self.assertEqual(self.mpm.name, 'signal') + self.pmm = ParameterModelMapper( + models=(self.model0, self.model1)) def test_models(self): - self.assertEqual(len(self.mpm.models), 2) - self.assertEqual(self.mpm.models[0], self.model0) - self.assertEqual(self.mpm.models[1], self.model1) + self.assertEqual(len(self.pmm.models), 2) + self.assertEqual(self.pmm.models[0], self.model0) + self.assertEqual(self.pmm.models[1], self.model1) def test_n_models(self): - self.assertEqual(self.mpm.n_models, 2) + self.assertEqual(self.pmm.n_models, 2) def test_str(self): # Add some parameters. self.test_def_param() - try: - str(self.mpm) - except: - self.fail('The __str__ method raised an exception!') + + # Ensure that __str__ does not raise an exception. + str(self.pmm) + + def test_unique_model_param_names(self): + self.pmm.def_param( + param=self.fixed_param0, + models=(self.model0,), + model_param_names='p') + self.pmm.def_param( + param=self.floating_param0, + models=(self.model1,), + model_param_names='p') + self.pmm.def_param( + param=self.floating_param1) + names = self.pmm.unique_model_param_names + self.assertEqual(len(names), 2) + np.testing.assert_equal(names, ['p', 'p2']) def test_def_param(self): - self.mpm.def_param(self.fixed_param0, models=(self.model1,)) - self.mpm.def_param(self.floating_param0, 'fp', models=(self.model0,self.model1)) - self.mpm.def_param(self.floating_param1, models=(self.model1)) - self.assertEqual(self.mpm.n_global_params, 3) - self.assertEqual(self.mpm.n_global_fixed_params, 1) - self.assertEqual(self.mpm.n_global_floating_params, 2) + self.pmm.def_param( + param=self.fixed_param0, + models=(self.model1,)) + self.pmm.def_param( + param=self.floating_param0, + models=(self.model0, self.model1), + model_param_names='fp') + self.pmm.def_param( + param=self.floating_param1, + models=(self.model1,)) + self.assertEqual(self.pmm.n_global_params, 3) + self.assertEqual(self.pmm.n_global_fixed_params, 1) + self.assertEqual(self.pmm.n_global_floating_params, 2) # The models cannot be an empty set. with self.assertRaises(ValueError): - self.mpm.def_param(self.fixed_param0, 'fp', models=()) + self.pmm.def_param( + param=self.fixed_param0, + models=(), + model_param_names='fp') # A model parameter can only be defined once for a given model. with self.assertRaises(KeyError): - self.mpm.def_param(self.fixed_param0, 'fp', models=(self.model0)) + self.pmm.def_param( + param=self.fixed_param0, + models=(self.model0,), + model_param_names='fp') - def test_get_model_param_dict(self): + def test_create_model_params_dict(self): # Add some parameters to the model parameter mapper. self.test_def_param() - m0_param_dict = self.mpm.get_model_param_dict( - np.array([2.4, 11.1]), model_idx=0) + m0_param_dict = self.pmm.create_model_params_dict( + np.array([2.4, 11.1]), model=0) self.assertEqual(len(m0_param_dict), 1) self.assertTrue('fp' in m0_param_dict) self.assertAlmostEqual(m0_param_dict['fp'], 2.4) - m1_param_dict = self.mpm.get_model_param_dict( - np.array([2.4, 11.1]), model_idx=1) + m1_param_dict = self.pmm.create_model_params_dict( + np.array([2.4, 11.1]), model=1) self.assertEqual(len(m1_param_dict), 3) self.assertTrue('p0' in m1_param_dict) self.assertTrue('fp' in m1_param_dict) @@ -742,128 +612,14 @@ def test_get_model_param_dict(self): self.assertAlmostEqual(m1_param_dict['p2'], 11.1) self.assertAlmostEqual(m1_param_dict['p0'], 42) + def test_get_local_param_is_global_floating_param_mask(self): + # Add some parameters to the model parameter mapper. + self.test_def_param() -class HypoParameterDefinitionTestCase(unittest.TestCase): - def setUp(self): - self.fixed_param0 = Parameter('fixed_param0', 42) - self.fixed_param1 = Parameter('fixed_param1', 11) - self.floating_param0 = Parameter('floating_param0', 4, 1, 6) - self.floating_param1 = Parameter('floating_param1', 0.3, 0.1, 1) - - self.model0 = Model('m0') - self.model1 = Model('m1') - - self.mpm0 = SingleModelParameterMapper( - 'mpm0', self.model0) - self.mpm0.def_param(self.fixed_param0) - self.mpm0.def_param(self.floating_param0, 'fp0') - - self.mpm1 = SingleModelParameterMapper( - 'mpm1', self.model1) - self.mpm1.def_param(self.fixed_param1) - self.mpm1.def_param(self.floating_param1, 'fp1') - - self.hpdef = HypoParameterDefinition((self.mpm0, self.mpm1)) - - def test_model_param_mapper_list(self): - mpm_list = self.hpdef.model_param_mapper_list - self.assertEqual(len(mpm_list), 2) - self.assertEqual(mpm_list[0], self.mpm0) - self.assertEqual(mpm_list[1], self.mpm1) - - def test__str__(self): - try: - str(self.hpdef) - except: - self.fail('The __str__ method raised an exception!') - - def test_copy(self): - # Check to raise on bad input type. - with self.assertRaises(TypeError): - hpdef = self.hpdef.copy('not of type dict') - - # Create an exact copy. - hpdef = self.hpdef.copy() - self.assertTrue(isinstance(hpdef, HypoParameterDefinition)) - mpm0 = hpdef['mpm0'] - mpm1 = hpdef['mpm1'] - self.assertTrue((mpm0 is not self.mpm0) and (mpm0 is not self.mpm1)) - self.assertTrue((mpm1 is not self.mpm0) and (mpm1 is not self.mpm1)) - - self.assertEqual(mpm0.global_paramset.n_params, 2) - self.assertEqual(mpm0.global_paramset.n_fixed_params, 1) - self.assertEqual(mpm0.global_paramset.n_floating_params, 1) - self.assertTrue(mpm0.global_paramset.params[0] == self.fixed_param0) - self.assertTrue(mpm0.global_paramset.params[1] == self.floating_param0) - - self.assertEqual(mpm1.global_paramset.n_params, 2) - self.assertEqual(mpm1.global_paramset.n_fixed_params, 1) - self.assertEqual(mpm1.global_paramset.n_floating_params, 1) - self.assertTrue(mpm1.global_paramset.params[0] == self.fixed_param1) - self.assertTrue(mpm1.global_paramset.params[1] == self.floating_param1) - - self.setUp() - - # Create a copy where we fix one of the floating parameters. - hpdef = self.hpdef.copy({'floating_param1':2.3}) - self.assertTrue(isinstance(hpdef, HypoParameterDefinition)) - mpm0 = hpdef['mpm0'] - mpm1 = hpdef['mpm1'] - self.assertTrue((mpm0 is not self.mpm0) and (mpm0 is not self.mpm1)) - self.assertTrue((mpm1 is not self.mpm0) and (mpm1 is not self.mpm1)) - - self.assertEqual(mpm0.global_paramset.n_params, 2) - self.assertEqual(mpm0.global_paramset.n_fixed_params, 1) - self.assertEqual(mpm0.global_paramset.n_floating_params, 1) - self.assertTrue(mpm0.global_paramset.params[0] == self.fixed_param0) - self.assertTrue(mpm0.global_paramset.params[1] == self.floating_param0) - - self.assertEqual(mpm1.global_paramset.n_params, 2) - self.assertEqual(mpm1.global_paramset.n_fixed_params, 2) - self.assertEqual(mpm1.global_paramset.n_floating_params, 0) - self.assertTrue(mpm1.global_paramset.params[0] == self.fixed_param1) - self.assertFalse(mpm1.global_paramset.params[1] == self.floating_param1) - self.assertTrue(mpm1.global_paramset.params[1].isfixed) - self.assertAlmostEqual(mpm1.global_paramset.params[1].initial, 2.3) - self.assertAlmostEqual(mpm1.global_paramset.params[1].value, 2.3) - - def test_create_ParameterSetArray(self): - paramsetarr = self.hpdef.create_ParameterSetArray() - self.assertTrue(isinstance(paramsetarr, ParameterSetArray)) - - -class TestParameters(unittest.TestCase): - def test_MultiSourceFitParameterMapper(self): - from skyllh.physics.source import PointLikeSource - from skyllh.core.parameters import ( - FitParameter, - MultiSourceFitParameterMapper - ) - - # Define a list of point-like sources. - sources = [ - PointLikeSource(np.deg2rad(120), np.deg2rad(-23)), - PointLikeSource(np.deg2rad(266), np.deg2rad(61)), - ] - - # Define the fit parameters 'gamma1' and 'gamma2' which map to the - # 'gamma' source parameter of the first and second source, respectively. - sfpm = MultiSourceFitParameterMapper(sources) - sfpm.def_fit_parameter(FitParameter('gamma1', 1, 4, 2.0), 'gamma', sources[0]) - sfpm.def_fit_parameter(FitParameter('gamma2', 1, 4, 2.1), 'gamma', sources[1]) - - # Check the initial values. - self.assertTrue(np.all(sfpm.fitparamset.initials == np.array([2.0, 2.1]))) - - # Get the source parameters for the first source (feed it with the - # initials). - fitparams = sfpm.get_src_fitparams(sfpm.fitparamset.initials, 0) - self.assertEqual(fitparams, {'gamma': 2.0}) + mask = self.pmm.get_local_param_is_global_floating_param_mask( + ['p0', 'fp', 'p2']) + np.testing.assert_equal(mask, [False, True, True]) - # Get the source parameters for the second source (feed it with the - # initials). - fitparams = sfpm.get_src_fitparams(sfpm.fitparamset.initials, 1) - self.assertEqual(fitparams, {'gamma': 2.1}) -if(__name__ == '__main__'): +if __name__ == '__main__': unittest.main() diff --git a/tests/core/test_py.py b/tests/core/test_py.py index 1713bad542..79611ea0cf 100644 --- a/tests/core/test_py.py +++ b/tests/core/test_py.py @@ -5,6 +5,7 @@ from skyllh.core.py import ( ConstPyQualifier, + NamedObjectCollection, const, issequenceof ) @@ -14,7 +15,12 @@ class A(object): def __init__(self, name=None): super(A, self).__init__() - self.name = name + self._name = name + + @property + def name(self): + return self._name + class B(object): def __init__(self, name=None): @@ -52,5 +58,66 @@ def test_pyqualifiers(self): self.assertFalse(issequenceof(seq, A, const)) -if(__name__ == '__main__'): +class NamedObjectCollection_TestCase( + unittest.TestCase): + def setUp(self): + self.a1 = A('a1') + self.a2 = A('a2') + self.a3 = A('a3') + self.noc = NamedObjectCollection([self.a1, self.a2, self.a3]) + + def test_name_list(self): + self.assertEqual(self.noc.name_list, ['a1', 'a2', 'a3']) + + def test__contains__(self): + self.assertTrue('a1' in self.noc) + self.assertTrue('a2' in self.noc) + self.assertTrue('a3' in self.noc) + self.assertFalse('a4' in self.noc) + + def test__getitem__(self): + self.assertTrue(self.noc['a1'] is self.a1) + self.assertTrue(self.noc['a2'] is self.a2) + self.assertTrue(self.noc['a3'] is self.a3) + + self.assertTrue(self.noc[0] is self.a1) + self.assertTrue(self.noc[1] is self.a2) + self.assertTrue(self.noc[2] is self.a3) + + def test_get_index_by_name(self): + self.assertEqual(self.noc.get_index_by_name('a1'), 0) + self.assertEqual(self.noc.get_index_by_name('a2'), 1) + self.assertEqual(self.noc.get_index_by_name('a3'), 2) + + def test_add(self): + a4 = A('a4') + self.noc.add(a4) + + self.assertEqual(self.noc.name_list, ['a1', 'a2', 'a3', 'a4']) + self.assertEqual(self.noc.get_index_by_name('a4'), 3) + self.assertTrue(self.noc['a4'] is a4) + + def test_pop(self): + obj = self.noc.pop() + self.assertTrue(obj is self.a3) + self.assertEqual(self.noc.name_list, ['a1', 'a2']) + self.assertEqual(self.noc.get_index_by_name('a1'), 0) + self.assertEqual(self.noc.get_index_by_name('a2'), 1) + + def test_pop_with_int(self): + obj = self.noc.pop(1) + self.assertTrue(obj is self.a2) + self.assertEqual(self.noc.name_list, ['a1', 'a3']) + self.assertEqual(self.noc.get_index_by_name('a1'), 0) + self.assertEqual(self.noc.get_index_by_name('a3'), 1) + + def test_pop_with_str(self): + obj = self.noc.pop('a2') + self.assertTrue(obj is self.a2) + self.assertEqual(self.noc.name_list, ['a1', 'a3']) + self.assertEqual(self.noc.get_index_by_name('a1'), 0) + self.assertEqual(self.noc.get_index_by_name('a3'), 1) + + +if __name__ == '__main__': unittest.main() diff --git a/tests/core/test_signal_generator.py b/tests/core/test_signal_generator.py index f46484c55b..00706befa6 100644 --- a/tests/core/test_signal_generator.py +++ b/tests/core/test_signal_generator.py @@ -1,42 +1,120 @@ # -*- coding: utf-8 -*- import numpy as np -import os.path import unittest - +from skyllh.core.flux_model import ( + PowerLawEnergyFluxProfile, + SteadyPointlikeFFM, +) from skyllh.core.parameters import ( ParameterGrid, ) +from skyllh.core.services import ( + DatasetSignalWeightFactorsService, + DetSigYieldService, + SrcDetSigYieldWeightsService, +) from skyllh.core.signal_generator import ( - SignalGenerator, + MCMultiDatasetSignalGenerator, ) -from skyllh.core.source_hypothesis import ( +from skyllh.core.source_hypo_grouping import ( + SourceHypoGroup, SourceHypoGroupManager, ) -from skyllh.core.source_hypo_group import ( - SourceHypoGroup, +from skyllh.core.source_model import ( + PointLikeSource, ) from skyllh.i3.detsigyield import ( - PowerLawFluxPointLikeSourceI3DetSigYieldImplMethod, + SingleParamFluxPointLikeSourceI3DetSigYieldBuilder, ) from skyllh.i3.signal_generation import ( PointLikeSourceI3SignalGenerationMethod, ) -from skyllh.physics.source import ( - PointLikeSource, -) -from skyllh.physics.flux import ( - PowerLawFlux, -) DATA_SAMPLES_IMPORTED = True try: from i3skyllh.datasets import repository from i3skyllh.datasets import data_samples -except: +except Exception: DATA_SAMPLES_IMPORTED = False +if DATA_SAMPLES_IMPORTED is True: + repository.setup_repository() + + +def create_signal_generator( + sig_generator_cls, + sig_gen_method, +): + """Creates a SignalGenerator instance of the given class + ``sig_generator_cls`` using the signal generation method instance + ``sig_gen_method``. + + Parameters + ---------- + sig_generator_cls : class of SignalGenerator + The class object of the signal generator. + sig_gen_method : instance of SignalGenerationMethod + The SignalGenerationMethod instance that should be used for the + signal generation. + + Returns + ------- + sig_gen : instance of ``sig_generator_cls`` + The created instance of ``sig_generator_cls`` which is derived from + SignalGenerator. + """ + dataset_name = 'PointSourceTracks_v004p00' + dsc = data_samples[dataset_name].create_dataset_collection() + ds_list = dsc.get_datasets(['IC86, 2018', 'IC86, 2019']) + + data_list = [ds.load_and_prepare_data() for ds in ds_list] + + sources = [ + PointLikeSource(ra=np.deg2rad(0), dec=np.deg2rad(10)), + PointLikeSource(ra=np.deg2rad(30), dec=np.deg2rad(2)), + ] + + fluxmodel = SteadyPointlikeFFM( + Phi0=1, + energy_profile=PowerLawEnergyFluxProfile(E0=1000, gamma=2) + ) + + gamma_grid = ParameterGrid(name='gamma', grid=np.arange(1, 4.1, 0.1)) + detsigyield_builder = SingleParamFluxPointLikeSourceI3DetSigYieldBuilder( + param_grid=gamma_grid) + + shg_mgr = SourceHypoGroupManager( + SourceHypoGroup( + sources=sources, + fluxmodel=fluxmodel, + detsigyield_builders=detsigyield_builder, + sig_gen_method=sig_gen_method)) + + detsigyield_service = DetSigYieldService( + shg_mgr=shg_mgr, + dataset_list=ds_list, + data_list=data_list, + ) + + src_detsigyield_weights_service = SrcDetSigYieldWeightsService( + detsigyield_service=detsigyield_service, + ) + + ds_sig_weight_factors_service = DatasetSignalWeightFactorsService( + src_detsigyield_weights_service=src_detsigyield_weights_service, + ) + + sig_gen = sig_generator_cls( + shg_mgr=shg_mgr, + dataset_list=ds_list, + data_list=data_list, + ds_sig_weight_factors_service=ds_sig_weight_factors_service, + ) + + return sig_gen + class TestSignalGenerator(unittest.TestCase): @classmethod @@ -46,39 +124,9 @@ def setUpClass(cls): if not DATA_SAMPLES_IMPORTED: return - dataset_name = 'PointSourceTracks_v004p00' - dsc = data_samples[dataset_name].create_dataset_collection() - ds_list = dsc.get_datasets(['IC86, 2018', 'IC86, 2019']) - - data_list = [ds.load_and_prepare_data() for ds in ds_list] - - sources = [ - PointLikeSource(ra=np.deg2rad(0), dec=np.deg2rad(10)), - PointLikeSource(ra=np.deg2rad(30), dec=np.deg2rad(2)), - ] - - fluxmodel = PowerLawFlux( - Phi0=1, - E0=1000, - gamma=2) - - gamma_grid = ParameterGrid(name='gamma', grid=np.arange(1, 4.1, 0.1)) - detsigyield_builder = PowerLawFluxPointLikeSourceI3DetSigYieldImplMethod( - gamma_grid=gamma_grid) - - sig_gen_method = PointLikeSourceI3SignalGenerationMethod() - - shg_mgr = SourceHypoGroupManager( - SourceHypoGroup( - sources=sources, - fluxmodel=fluxmodel, - detsigyield_implmethods=detsigyield_builder, - sig_gen_method=sig_gen_method)) - - cls._sig_gen = SignalGenerator( - src_hypo_group_manager=shg_mgr, - dataset_list=ds_list, - data_list=data_list) + cls._sig_gen = create_signal_generator( + sig_generator_cls=MCMultiDatasetSignalGenerator, + sig_gen_method=PointLikeSourceI3SignalGenerationMethod()) @unittest.skipIf(not DATA_SAMPLES_IMPORTED, 'Data samples not imported!') def testSigCandidatesArray(self): @@ -134,16 +182,18 @@ def testSigCandidatesArray(self): 'shg_src_idxs shape' ) self.assertTrue( - np.all(np.equal(shg_src_idxs, np.array([0,1]))), + np.all(np.equal(shg_src_idxs, np.array([0, 1]))), 'shg_idx values' ) @unittest.skipIf(not DATA_SAMPLES_IMPORTED, 'Data samples not imported!') def testSigCandidatesWeightSum(self): + weight_sum = type(self)._sig_gen._sig_candidates_weight_sum self.assertTrue( - type(self)._sig_gen._sig_candidates_weight_sum == 7884630181096259, + np.isclose(weight_sum, 7884630181096259), + f'weight sum is {weight_sum}' ) -if(__name__ == '__main__'): +if __name__ == '__main__': unittest.main() diff --git a/tests/core/test_signalpdf.py b/tests/core/test_signalpdf.py new file mode 100644 index 0000000000..c48972b2aa --- /dev/null +++ b/tests/core/test_signalpdf.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- + +"""The unit tests in this module test classes of the skyllh.core.signalpdf +module. +""" + +import numpy as np + +import unittest +from unittest.mock import Mock + +from skyllh.core.flux_model import ( + BoxTimeFluxProfile, +) +from skyllh.core.livetime import ( + Livetime, +) +from skyllh.core.parameters import ( + ParameterModelMapper, +) +from skyllh.core.signalpdf import ( + SignalTimePDF, +) +from skyllh.core.source_model import ( + SourceModel, +) +from skyllh.core.trialdata import ( + TrialDataManager, +) + + +def create_tdm(n_sources, n_selected_events): + """Creates a Mock instance mimicing a TrialDataManager instance with a + given number of sources and selected events. + """ + tdm = Mock(spec_set=[ + '__class__', + 'trial_data_state_id', + 'get_n_values', + 'src_evt_idxs', + 'n_sources', + 'n_selected_events', + 'get_data']) + + def tdm_get_data(key): + if n_selected_events == 3: + return np.array([0, 5, 9.7]) + raise ValueError( + f'Value n_selected_events={n_selected_events} is not supported!') + + tdm.__class__ = TrialDataManager + tdm.trial_data_state_id = 1 + tdm.get_n_values = lambda: n_sources*n_selected_events + tdm.src_evt_idxs = ( + np.repeat(np.arange(n_sources), n_selected_events), + np.tile(np.arange(n_selected_events), n_sources) + ) + tdm.n_sources = n_sources + tdm.n_selected_events = n_selected_events + tdm.get_data = tdm_get_data + + return tdm + + +class SignalTimePDFTestCase( + unittest.TestCase, +): + def setUp(self): + self.pmm = ParameterModelMapper( + models=[ + SourceModel(), + SourceModel()]) + + self.livetime = Livetime(np.array([ + [0, 1], + [1.3, 4.6], + [7.7, 10], + ])) + + self.S = (1-0) + (4.6-1.3) + (10-7.7) + + self.time_flux_profile = BoxTimeFluxProfile( + t0=5, + tw=10) + + self.sig_time_pdf = SignalTimePDF( + pmm=self.pmm, + livetime=self.livetime, + time_flux_profile=self.time_flux_profile) + + def test__str__(self): + str(self.sig_time_pdf) + + def test__calculate_sum_of_ontime_time_flux_profile_integrals(self): + S = self.sig_time_pdf._calculate_sum_of_ontime_time_flux_profile_integrals() + self.assertEqual(S, self.S) + + def test_get_pd(self): + tdm = create_tdm(n_sources=self.pmm.n_sources, n_selected_events=3) + src_params_recarray = self.pmm.create_src_params_recarray(gflp_values=[]) + + (pd, grads) = self.sig_time_pdf.get_pd( + tdm=tdm, + params_recarray=src_params_recarray) + + np.testing.assert_almost_equal( + pd, + np.array([ + 1/self.S, + 0., + 1/self.S, + 1/self.S, + 0., + 1/self.S, + ])) + + self.assertEqual(grads, {}) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/core/test_source_model.py b/tests/core/test_source_model.py new file mode 100644 index 0000000000..8e3e1f3c1e --- /dev/null +++ b/tests/core/test_source_model.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- + +import unittest + +from skyllh.core.catalog import ( + SourceCatalog, +) +from skyllh.core.source_model import ( + PointLikeSource, + SourceModel, + SourceModelCollection, +) + + +class SourceModelTestCase( + unittest.TestCase +): + def setUp(self): + self.name = 'MySource' + self.classification = 'MyClassification' + self.weight = 1.1 + + self.source_model = SourceModel( + name=self.name, + classification=self.classification, + weight=self.weight) + + def test_name(self): + self.assertEqual(self.source_model.name, self.name) + + def test_classification(self): + self.assertEqual(self.source_model.classification, self.classification) + + def test_weight(self): + self.assertEqual(self.source_model.weight, self.weight) + + +class SourceModelCollectionTestCase( + unittest.TestCase, +): + def setUp(self): + self.ra = 0 + self.dec = 1 + + def test_SourceModelCollection(self): + source_model1 = SourceModel(self.ra, self.dec) + source_model2 = SourceModel(self.ra, self.dec) + + source_collection_casted = SourceModelCollection.cast( + source_model1, + "Could not cast SourceModel to SourceCollection") + source_collection = SourceModelCollection( + source_type=SourceModel, + sources=[source_model1, source_model2]) + + self.assertIsInstance(source_collection_casted, SourceModelCollection) + self.assertEqual(source_collection.source_type, SourceModel) + self.assertIsInstance(source_collection.sources[0], SourceModel) + self.assertIsInstance(source_collection.sources[1], SourceModel) + + +class SourceCatalogTestCase( + unittest.TestCase, +): + def setUp(self): + self.name = 'MySourceCatalog' + self.ra = 0.1 + self.dec = 1.1 + self.source1 = SourceModel(self.ra, self.dec) + self.source2 = SourceModel(self.ra, self.dec) + + self.catalog = SourceCatalog( + name=self.name, + sources=[self.source1, self.source2], + source_type=SourceModel) + + def test_name(self): + self.assertEqual(self.catalog.name, self.name) + + def test_as_SourceModelCollection(self): + sc = self.catalog.as_SourceModelCollection() + self.assertIsInstance(sc, SourceModelCollection) + + +class PointLikeSourceTestCase( + unittest.TestCase, +): + def setUp(self): + self.name = 'MyPointLikeSource' + self.ra = 0.1 + self.dec = 1.1 + self.source = PointLikeSource( + name=self.name, + ra=self.ra, + dec=self.dec) + + def test_name(self): + self.assertEqual(self.source.name, self.name) + + def test_ra(self): + self.assertEqual(self.source.ra, self.ra) + + def test_dec(self): + self.assertEqual(self.source.dec, self.dec) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/core/test_storage.py b/tests/core/test_storage.py index 3037be525f..0905897a69 100644 --- a/tests/core/test_storage.py +++ b/tests/core/test_storage.py @@ -13,9 +13,9 @@ def setUp(self): self.field2 = np.array([2.5, 2.1, 2.3, 2.4, 2.2], dtype=np.float64) self.field3 = np.array([3.2, 3.5, 3.1, 3.3, 3.4], dtype=np.float64) data = dict( - field1 = self.field1, - field2 = self.field2, - field3 = self.field3 + field1=self.field1, + field2=self.field2, + field3=self.field3 ) self.arr = DataFieldRecordArray(data) self.arr_len = 5 @@ -37,7 +37,7 @@ def test__getitem__(self): assert_array_almost_equal(self.arr['field3'], self.field3) # Access rows of the dataset via indices. - idx = np.array([1,4,2]) + idx = np.array([1, 4, 2]) sub_arr = self.arr[idx] assert_array_almost_equal(sub_arr['field1'], self.field1[idx]) assert_array_almost_equal(sub_arr['field2'], self.field2[idx]) @@ -65,11 +65,11 @@ def test__setitem__(self): self.setUp() # Set selected rows with new values by indices. - idx = np.array([1,4,2]) + idx = np.array([1, 4, 2]) new_data = dict( - field1 = self.field1[idx], - field2 = new_field2[idx], - field3 = self.field3[idx] + field1=self.field1[idx], + field2=new_field2[idx], + field3=self.field3[idx] ) new_arr = DataFieldRecordArray(new_data) self.arr[idx] = new_arr @@ -85,9 +85,9 @@ def test__setitem__(self): # Set selected rows with new values by mask. mask = np.array([True, True, False, True, False]) new_data = dict( - field1 = self.field1[mask], - field2 = new_field2[mask], - field3 = self.field3[mask] + field1=self.field1[mask], + field2=new_field2[mask], + field3=self.field3[mask] ) new_arr = DataFieldRecordArray(new_data) self.arr[mask] = new_arr @@ -112,7 +112,7 @@ def test__setitem__(self): def test__str__(self): try: str(self.arr) - except: + except Exception: self.fail('The __str__ method raised an exception!') def test_field_name_list(self): @@ -163,7 +163,7 @@ def test_tidy_up(self): # Reset the array. self.setUp() - self.arr.tidy_up(('field2','field3')) + self.arr.tidy_up(('field2', 'field3')) self.assertEqual(len(self.arr.field_name_list), 2) self.assertTrue('field2' in self.arr.field_name_list) self.assertTrue('field3' in self.arr.field_name_list) @@ -171,5 +171,6 @@ def test_tidy_up(self): self.assertTrue('field2' in self.arr) self.assertTrue('field3' in self.arr) -if(__name__ == '__main__'): + +if __name__ == '__main__': unittest.main() diff --git a/tests/core/test_weights.py b/tests/core/test_weights.py new file mode 100644 index 0000000000..a8ce18a8c3 --- /dev/null +++ b/tests/core/test_weights.py @@ -0,0 +1,565 @@ +# -*- coding: utf-8 -*- + +"""The unit tests in this module test classes of the skyllh.core.weights module. +""" + +import numpy as np +import unittest +from unittest.mock import ( + Mock, +) + +from skyllh.core.detsigyield import ( + DetSigYield, + DetSigYieldBuilder, +) +from skyllh.core.flux_model import ( + SteadyPointlikeFFM, +) +from skyllh.core.parameters import ( + Parameter, + ParameterModelMapper, +) +from skyllh.core.services import ( + DatasetSignalWeightFactorsService, + DetSigYieldService, + SrcDetSigYieldWeightsService, +) +from skyllh.core.source_hypo_grouping import ( + SourceHypoGroup, + SourceHypoGroupManager, +) +from skyllh.core.source_model import ( + PointLikeSource, +) + + +# Define a DetSigYield class that is a simple function of the source declination +# position. +class SimpleDetSigYieldWithoutGrads( + DetSigYield): + def __init__(self, scale=1, **kwargs): + self._scale = scale + + def sources_to_recarray(self, sources): + recarr = np.empty((len(sources),), dtype=[('dec', np.double)]) + for (i, src) in enumerate(sources): + recarr[i]['dec'] = src.dec + return recarr + + def __call__(self, src_recarray, src_params_recarray): + """ + Parameters + ---------- + src_recarray : (N_sources,)-shaped numpy record ndarray + The numpy record array containing the information of the sources. + The required fields of this record array are implementation + dependent. In the most generic case for a point-like source, it + must contain the following three fields: ra, dec. + src_params_recarray : (N_sources,)-shaped numpy record ndarray + The numpy record ndarray containing the parameter values of the + sources. The parameter values can be different for the different + sources. + The record array needs to contain two fields for each source + parameter, one named with the source's local parameter name + holding the source's local parameter value, and one named + holding the global parameter index plus one for each + source value. For values mapping to non-fit parameters, the index + should be negative. + + Returns + ------- + values : (N_sources,)-shaped 1D ndarray of float + The array with the mean number of signal in the detector for each + given source. + grads : dict + The dictionary holding the gradient values for each global fit + parameter. The key is the global fit parameter index and the value + is the (N_sources,)-shaped numpy ndarray holding the gradient value + dY_k/dp_s. + """ + values = self._scale * np.rad2deg(src_recarray['dec']) + grads = dict() + + return (values, grads) + + +class SimpleDetSigYieldWithGrads( + SimpleDetSigYieldWithoutGrads): + def __init__(self, pname, **kwargs): + super().__init__(**kwargs) + + self.param_names = (pname,) + + def __call__(self, src_recarray, src_params_recarray): + """ + Parameters + ---------- + src_recarray : (N_sources,)-shaped numpy record ndarray + The numpy record array containing the information of the sources. + The required fields of this record array are implementation + dependent. In the most generic case for a point-like source, it + must contain the following three fields: ra, dec. + src_params_recarray : (N_sources,)-shaped numpy record ndarray + The numpy record ndarray containing the parameter values of the + sources. The parameter values can be different for the different + sources. + The record array needs to contain two fields for each source + parameter, one named with the source's local parameter name + holding the source's local parameter value, and one named + holding the global parameter index plus one for each + source value. For values mapping to non-fit parameters, the index + should be negative. + + Returns + ------- + values : (N_sources,)-shaped 1D ndarray of float + The array with the mean number of signal in the detector for each + given source. + grads : dict + The dictionary holding the gradient values for each global fit + parameter. The key is the global fit parameter index and the value + is the (N_sources,)-shaped numpy ndarray holding the gradient value + dY_k/dp_s. + """ + local_param_name = self.param_names[0] + + src_dec = np.atleast_1d(src_recarray['dec']) + src_param = src_params_recarray[local_param_name] + src_param_gp_idxs = src_params_recarray[f'{local_param_name}:gpidx'] + + n_sources = len(src_dec) + + # Check for correct input format. + if not (len(src_param) == n_sources and + len(src_param_gp_idxs) == n_sources): + raise RuntimeError( + f'The length ({len(src_param)}) of the array for the ' + f'source parameter "{local_param_name}" does not match the ' + f'number of sources ({n_sources})!') + + values = self._scale * np.rad2deg(src_dec) + + # Determine the number of global parameters the local parameter is + # made of. + gfp_idxs = np.unique(src_param_gp_idxs) + gfp_idxs = gfp_idxs[gfp_idxs > 0] - 1 + + grads = dict() + for gfp_idx in gfp_idxs: + grads[gfp_idx] = np.zeros((n_sources,), dtype=np.double) + + # Create a mask to select the sources that depend on the global + # fit parameter with index gfp_idx. + m = (src_param_gp_idxs == gfp_idx+1) + + grads[gfp_idx][m] = src_param[m] + + return (values, grads) + + +# Define placeholder class to satisfy type checks. +class NoDetSigYieldBuilder( + DetSigYieldBuilder): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def construct_detsigyield(self, **kwargs): + pass + + +def create_shg_mgr_and_pmm(): + """Creates a SourceHypoGroupManager and a ParameterModelMapper instance. + """ + sources = [ + PointLikeSource( + name='PS1', ra=0, dec=np.deg2rad(10), weight=1), + PointLikeSource( + name='PS2', ra=0, dec=np.deg2rad(20), weight=2), + PointLikeSource( + name='PS3', ra=0, dec=np.deg2rad(30), weight=3), + ] + fluxmodel = SteadyPointlikeFFM(Phi0=1, energy_profile=None) + + shg_mgr = SourceHypoGroupManager( + SourceHypoGroup( + sources=sources, + fluxmodel=fluxmodel, + detsigyield_builders=NoDetSigYieldBuilder(), + sig_gen_method=None)) + + p1 = Parameter('p1', 141, 99, 199) + p2 = Parameter('p2', 142, 100, 200) + + pmm = ParameterModelMapper(models=sources) + pmm.def_param(p1).def_param(p2) + + return (shg_mgr, pmm) + + +def create_DetSigYieldService(shg_mgr, detsigyield_arr): + """Creates a Mock instance mimicing a DetSigYieldService instance with a + given detsigyield array. + """ + detsigyield_service = Mock(spec_set=[ + '__class__', + 'arr', + 'shg_mgr', + 'n_datasets', + 'n_shgs', + ]) + + detsigyield_service.__class__ = DetSigYieldService + detsigyield_service.arr = detsigyield_arr + detsigyield_service.shg_mgr = shg_mgr + detsigyield_service.n_datasets = detsigyield_arr.shape[0] + detsigyield_service.n_shgs = detsigyield_arr.shape[1] + + return detsigyield_service + + +class TestSourceDetectorWeights(unittest.TestCase): + @classmethod + def setUpClass(cls): + """This class method will run only once for this TestCase. + """ + (cls._shg_mgr, cls._pmm) = create_shg_mgr_and_pmm() + + def test_without_grads(self): + """Tests the __call__ method of the SourceDetectorWeights class + using a DetSigYield instance without any gradients. + """ + # detsigyield_arr is (N_datasets, N_shgs)-shaped. + detsigyield_arr = np.array([ + [SimpleDetSigYieldWithoutGrads()], + [SimpleDetSigYieldWithoutGrads()] + ]) + + detsigyield_service = create_DetSigYieldService( + shg_mgr=type(self)._shg_mgr, + detsigyield_arr=detsigyield_arr) + + src_detsigyield_weights_service = SrcDetSigYieldWeightsService( + detsigyield_service=detsigyield_service) + + gflp_values = np.array([120.0, 177.7]) + src_params_recarray = type(self)._pmm.create_src_params_recarray( + gflp_values) + src_detsigyield_weights_service.calculate(src_params_recarray) + (a_jk, a_jk_grads) = src_detsigyield_weights_service.get_weights() + + self.assertIsInstance( + a_jk, np.ndarray, + 'instance of a_jk') + + self.assertEqual( + a_jk.shape, (2, 3), + 'a_jk.shape') + + np.testing.assert_allclose( + a_jk, + np.array([ + [1*10, 2*20, 3*30], + [1*10, 2*20, 3*30] + ]), + err_msg='a_jk values') + + self.assertIsInstance( + a_jk_grads, dict, + 'instance of a_jk_grads') + + self.assertEqual( + len(a_jk_grads), 0, + 'length of a_jk_grads') + + def test_with_grads_p1(self): + """Tests the __call__ method of the SourceDetectorWeights class + using a DetSigYield instance with gradients for the parameter p1. + """ + # detsigyield_arr is (N_datasets, N_shgs)-shaped. + detsigyield_arr = np.array([ + [SimpleDetSigYieldWithGrads(pname='p1')], + [SimpleDetSigYieldWithGrads(pname='p1')] + ]) + + detsigyield_service = create_DetSigYieldService( + shg_mgr=type(self)._shg_mgr, + detsigyield_arr=detsigyield_arr) + + src_detsigyield_weights_service = SrcDetSigYieldWeightsService( + detsigyield_service=detsigyield_service) + + gflp_values = np.array([120.0, 177.7]) + src_params_recarray = type(self)._pmm.create_src_params_recarray( + gflp_values) + src_detsigyield_weights_service.calculate(src_params_recarray) + (a_jk, a_jk_grads) = src_detsigyield_weights_service.get_weights() + + self.assertIsInstance( + a_jk, np.ndarray, + 'instance of a_jk') + + self.assertEqual( + a_jk.shape, (2, 3), + 'a_jk.shape') + + np.testing.assert_allclose( + a_jk, + [ + [1*10, 2*20, 3*30], + [1*10, 2*20, 3*30] + ], + err_msg='a_jk values') + + self.assertIsInstance( + a_jk_grads, dict, + 'instance of a_jk_grads') + + self.assertEqual( + len(a_jk_grads), 1, + 'length of a_jk_grads') + + self.assertIn( + 0, a_jk_grads, + '0 in a_jk_grads') + + np.testing.assert_allclose( + a_jk_grads[0], + [ + [1*120., 2*120., 3*120.], + [1*120., 2*120., 3*120.] + ], + err_msg='a_jk_grads[0] values') + + def test_with_grads_p2(self): + """Tests the __call__ method of the SourceDetectorWeights class + using a DetSigYield instance with gradients for the parameter p2. + """ + # detsigyield_arr is (N_datasets, N_shgs)-shaped. + detsigyield_arr = np.array([ + [SimpleDetSigYieldWithGrads(pname='p2')], + [SimpleDetSigYieldWithGrads(pname='p2')] + ]) + + detsigyield_service = create_DetSigYieldService( + shg_mgr=type(self)._shg_mgr, + detsigyield_arr=detsigyield_arr) + + src_detsigyield_weights_service = SrcDetSigYieldWeightsService( + detsigyield_service=detsigyield_service) + + gflp_values = np.array([120., 177.]) + src_params_recarray = type(self)._pmm.create_src_params_recarray( + gflp_values) + src_detsigyield_weights_service.calculate(src_params_recarray) + (a_jk, a_jk_grads) = src_detsigyield_weights_service.get_weights() + + self.assertIsInstance( + a_jk, np.ndarray, + 'instance of a_jk') + + self.assertEqual( + a_jk.shape, (2, 3), + 'a_jk.shape') + + np.testing.assert_allclose( + a_jk, + [ + [1*10, 2*20, 3*30], + [1*10, 2*20, 3*30] + ], + err_msg='a_jk values') + + self.assertIsInstance( + a_jk_grads, dict, + 'instance of a_jk_grads') + + self.assertEqual( + len(a_jk_grads), 1, + 'length of a_jk_grads') + + self.assertIn( + 1, a_jk_grads, + '1 in a_jk_grads') + + np.testing.assert_allclose( + a_jk_grads[1], + [ + [1*177., 2*177., 3*177.], + [1*177., 2*177., 3*177.] + ], + err_msg='a_jk_grads[1] values') + + +class TestDatasetSignalWeightFactors(unittest.TestCase): + @classmethod + def setUpClass(cls): + """This class method will run only once for this TestCase. + """ + (cls._shg_mgr, cls._pmm) = create_shg_mgr_and_pmm() + + def test_without_grads(self): + """Tests the __call__ method of the DatasetSignalWeightFactors class + using a DetSigYield instance without any gradients. + """ + # detsigyield_arr is (N_datasets, N_shgs)-shaped. + detsigyield_arr = np.array([ + [SimpleDetSigYieldWithoutGrads(scale=1)], + [SimpleDetSigYieldWithoutGrads(scale=2)] + ]) + + detsigyield_service = create_DetSigYieldService( + shg_mgr=type(self)._shg_mgr, + detsigyield_arr=detsigyield_arr) + + src_detsigyield_weights_service = SrcDetSigYieldWeightsService( + detsigyield_service=detsigyield_service) + + ds_sig_weight_factors_service = DatasetSignalWeightFactorsService( + src_detsigyield_weights_service=src_detsigyield_weights_service) + + gflp_values = np.array([120.0, 177.7]) + src_params_recarray = type(self)._pmm.create_src_params_recarray( + gflp_values) + src_detsigyield_weights_service.calculate(src_params_recarray) + ds_sig_weight_factors_service.calculate() + (f_j, f_j_grads) = ds_sig_weight_factors_service.get_weights() + + self.assertIsInstance( + f_j, np.ndarray, + 'instance of f_j') + + self.assertEqual( + f_j.shape, (2,), + 'f_j.shape') + + np.testing.assert_allclose( + f_j, + [1/3, 2/3], + err_msg='f_j values') + + self.assertIsInstance( + f_j_grads, dict, + 'instance of f_j_grads') + + self.assertEqual( + len(f_j_grads), 0, + 'length of f_j_grads') + + def test_with_grads_p1(self): + """Tests the __call__ method of the DatasetSignalWeightFactors class + using a DetSigYield instance with gradients for the parameter p1. + """ + # detsigyield_arr is (N_datasets, N_shgs)-shaped. + detsigyield_arr = np.array([ + [SimpleDetSigYieldWithGrads(pname='p1', scale=1)], + [SimpleDetSigYieldWithGrads(pname='p1', scale=2)] + ]) + + detsigyield_service = create_DetSigYieldService( + shg_mgr=type(self)._shg_mgr, + detsigyield_arr=detsigyield_arr) + + src_detsigyield_weights_service = SrcDetSigYieldWeightsService( + detsigyield_service=detsigyield_service) + + ds_sig_weight_factors_service = DatasetSignalWeightFactorsService( + src_detsigyield_weights_service=src_detsigyield_weights_service) + + gflp_values = np.array([120.0, 177.7]) + src_params_recarray = type(self)._pmm.create_src_params_recarray( + gflp_values) + src_detsigyield_weights_service.calculate(src_params_recarray) + ds_sig_weight_factors_service.calculate() + (f_j, f_j_grads) = ds_sig_weight_factors_service.get_weights() + + self.assertIsInstance( + f_j, np.ndarray, + 'instance of f_j') + + self.assertEqual( + f_j.shape, (2,), + 'f_j.shape') + + np.testing.assert_allclose( + f_j, + [1/3, 2/3], + err_msg='f_j values') + + self.assertIsInstance( + f_j_grads, dict, + 'instance of f_j_grads') + + self.assertEqual( + len(f_j_grads), 1, + 'length of f_j_grads') + + self.assertIn( + 0, f_j_grads, + '0 in f_j_grads') + + np.testing.assert_allclose( + f_j_grads[0], + [0.57142857, -0.57142857], + err_msg='f_j_grads[0] values') + + def test_with_grads_p2(self): + """Tests the __call__ method of the DatasetSignalWeightFactors class + using a DetSigYield instance with gradients for the parameter p2. + """ + # detsigyield_arr is (N_datasets, N_shgs)-shaped. + detsigyield_arr = np.array([ + [SimpleDetSigYieldWithGrads(pname='p2', scale=1)], + [SimpleDetSigYieldWithGrads(pname='p2', scale=2)] + ]) + + detsigyield_service = create_DetSigYieldService( + shg_mgr=type(self)._shg_mgr, + detsigyield_arr=detsigyield_arr) + + src_detsigyield_weights_service = SrcDetSigYieldWeightsService( + detsigyield_service=detsigyield_service) + + ds_sig_weight_factors_service = DatasetSignalWeightFactorsService( + src_detsigyield_weights_service=src_detsigyield_weights_service) + + gflp_values = np.array([120.0, 177.7]) + src_params_recarray = type(self)._pmm.create_src_params_recarray( + gflp_values) + src_detsigyield_weights_service.calculate(src_params_recarray) + ds_sig_weight_factors_service.calculate() + (f_j, f_j_grads) = ds_sig_weight_factors_service.get_weights() + + self.assertIsInstance( + f_j, np.ndarray, + 'instance of f_j') + + self.assertEqual( + f_j.shape, (2,), + 'f_j.shape') + + np.testing.assert_allclose( + f_j, + [1/3, 2/3], + err_msg='f_j values') + + self.assertIsInstance( + f_j_grads, dict, + 'instance of f_j_grads') + + self.assertEqual( + len(f_j_grads), 1, + 'length of f_j_grads') + + self.assertIn( + 1, f_j_grads, + '1 in f_j_grads') + + np.testing.assert_allclose( + f_j_grads[1], + [0.84619048, -0.84619048], + err_msg='f_j_grads[1] values') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/core/testdata/testdata_generator.py b/tests/core/testdata/testdata_generator.py index a5898e7948..87455c4741 100644 --- a/tests/core/testdata/testdata_generator.py +++ b/tests/core/testdata/testdata_generator.py @@ -8,39 +8,61 @@ def generate_testdata(): - exp_testdata_dtype = np.dtype( - [('time', ' t2 - self.assertEqual(self.box_time_profile.get_integral(t2, t1), 0) - np.testing.assert_array_equal(self.box_time_profile.get_integral(times2, times1), np.zeros_like(values)) - - def test_get_total_integral(self): - self.assertEqual(self.box_time_profile.get_total_integral(), 1) - - def test_get_value(self): - value = 1/self.Tw - times = np.array([self.T0 - self.Tw, - self.T0, - self.T0 + self.Tw]) - values = np.array([0, - value, - 0]) - - self.assertEqual(self.box_time_profile.get_value(self.T0 - self.Tw), 0) - self.assertEqual(self.box_time_profile.get_value(self.T0), value) - self.assertEqual(self.box_time_profile.get_value(self.T0 + self.Tw), 0) - np.testing.assert_array_equal(self.box_time_profile.get_value(times), values) - - -if(__name__ == '__main__'): - unittest.main() diff --git a/tests/run.sh b/tests/run.sh index b44515e116..67d6613038 100755 --- a/tests/run.sh +++ b/tests/run.sh @@ -4,6 +4,5 @@ rcode=0 /usr/bin/env python -m unittest discover tests/core || rcode=$? /usr/bin/env python -m unittest discover tests/i3 || rcode=$? -/usr/bin/env python -m unittest discover tests/physics || rcode=$? exit $rcode diff --git a/tests/utils.py b/tests/utils.py deleted file mode 100644 index d6502e0305..0000000000 --- a/tests/utils.py +++ /dev/null @@ -1,9 +0,0 @@ -# -*- coding: utf-8 -*- -# Author: Dr. Martin Wolf - -import numpy as np - -def isAlmostEqual(a, b, decimals=9): - a = np.atleast_1d(a) - b = np.atleast_1d(b) - return np.all(np.around(np.abs(a - b), decimals) == 0)