Skip to content

Commit

Permalink
expose inmemory to startup
Browse files Browse the repository at this point in the history
  • Loading branch information
diehlbw committed Jun 27, 2024
1 parent f6d490f commit 9bdaf40
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 8 deletions.
1 change: 1 addition & 0 deletions changelog/34.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* seismometer.run_startup() can now accept preloaded prediction and event dataframes that take precendence over loading from configuration
23 changes: 20 additions & 3 deletions src/seismometer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
import logging
from pathlib import Path
from typing import Optional

import pandas as pd

from seismometer._version import __version__
from seismometer.core.logger import add_log_formatter, set_default_logger_config


def run_startup(*, config_path: str | Path = None, output_path: str | Path = None, log_level: int = logging.WARN):
def run_startup(
*,
config_path: str | Path = None,
output_path: str | Path = None,
predictions_frame: Optional[pd.DataFrame] = None,
events_frame: Optional[pd.DataFrame] = None,
definitions: Optional[dict] = None,
log_level: int = logging.WARN,
):
"""
Runs the required startup for instantiating seismometer.
Expand All @@ -16,6 +27,12 @@ def run_startup(*, config_path: str | Path = None, output_path: str | Path = Non
output_path : Optional[str | Path], optional
An output path to write data to, overwriting the default path specified by info_dir in config.yml,
by default None.
predictions_frame : Optional[pd.DataFrame], optional
An optional DataFrame containing the fully loaded predictions data, by default None.
By default, when not specified here, these data will be loaded based on conifguration.
events_frame : Optional[pd.DataFrame], optional
An optional DataFrame containing the fully loaded events data, by default None.
By default, when not specified here, these data will be loaded based on conifguration.
log_level : logging._Level, optional
The log level to set. by default, logging.WARN.
"""
Expand All @@ -31,8 +48,8 @@ def run_startup(*, config_path: str | Path = None, output_path: str | Path = Non
logger.setLevel(log_level)
logger.info(f"seismometer version {__version__} starting")

sg = Seismogram(config_path, output_path)
sg.load_data()
sg = Seismogram(config_path, output_path, definitions=definitions)
sg.load_data(predictions=predictions_frame, events=events_frame)

# Surface api into namespace
s_module = importlib.import_module("seismometer._api")
Expand Down
7 changes: 7 additions & 0 deletions src/seismometer/configuration/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ class ConfigProvider:
Specifies the template notebook name to use during building, by default None; it uses "template" from the
primary config file.
This is the template that will be used as a base for building the final notebook.
definitions : Optional[dict], optional
A dictionary of definitions to use instead of loading those specified by configuration, by default None.
"""

Expand All @@ -47,6 +49,7 @@ def __init__(
info_dir: str | Path = None,
data_dir: str | Path = None,
template_notebook: Option = None,
definitions: dict = None,
):
self._config: OtherInfo = None
self._usage: DataUsage = None
Expand All @@ -55,6 +58,10 @@ def __init__(
self._output_dir: Path = None
self._output_notebook: str = ""

if definitions is not None:
self._prediction_defs = PredictionDictionary(predictions=definitions.pop("predictions", []))
self._event_defs = EventDictionary(events=definitions.pop("events", None))

self._load_config_config(config_config)
self._resolve_other_paths(usage_config, info_dir, data_dir)
self._override_template(template_notebook)
Expand Down
51 changes: 46 additions & 5 deletions src/seismometer/seismogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,12 @@ class Seismogram(object, metaclass=Singleton):
output_list: list[str]
""" The list of columns representing model outputs."""

def __init__(self, config_path: Optional[str | Path] = None, output_path: Optional[str | Path] = None):
def __init__(
self,
config_path: Optional[str | Path] = None,
output_path: Optional[str | Path] = None,
definitions: Optional[dict] = None,
):
"""
Constructor for Seismogram, which can only be instantiated once.
Expand All @@ -63,6 +68,8 @@ def __init__(self, config_path: Optional[str | Path] = None, output_path: Option
output_path : str or Path, optional
Override location to place resulting data and report files.
Defaults to the config.yml info_dir, and then the notebook's output directory.
definitions : dict, optional
Additional definitions to be used instead of loading based on configuration, by default None.
"""
if config_path is None:
Expand All @@ -73,13 +80,36 @@ def __init__(self, config_path: Optional[str | Path] = None, output_path: Option
self.cohort_cols: list[str] = []
self.config_path = config_path

self.load_config(config_path)
self.load_config(config_path, definitions=definitions)

self.config.set_output(output_path)
self.config.output_dir.mkdir(parents=True, exist_ok=True)
self.dataloader = loader_factory(self.config)

def load_data(self, predictions=None, events=None):
def load_data(
self, *, predictions: Optional[pd.DataFrame] = None, events: Optional[pd.DataFrame] = None, reset: bool = False
):
"""
Loads the seismogram data.
Uses the passed in frames if they are specified, otherwise uses configuration to load data.
If data is already loaded, does not change state unless reset is true.
Parameters
----------
predictions : pd.DataFrame, optional
The fully prepared predictions dataframe, by default None.
Uses this when specified, otherwise loads based on configuration.
events : pd.DataFrame, optional
The pre-loaded events dataframe, by default None.
Uses this when specified, otherwise loads based on configuration.
reset : bool, optional
Flag when set to true will overwrite existing dataframe, by default False
"""
if self.dataframe and not reset:
logger.debug("Data already loaded; pass reset=True to clear data and re-evaluate.")
return

self._load_metadata()

self.dataframe = self.dataloader.load_data(predictions, events)
Expand Down Expand Up @@ -273,8 +303,19 @@ def score_bins(self):
# endregion

# region initialization and preprocessing (this region knows about config)
def load_config(self, config_path: Path):
self.config = ConfigProvider(config_path)
def load_config(self, config_path: Path, definitions: Optional[dict] = None):
"""
Loads the base configuration and alerting congfiguration
Parameters
----------
config_path : Path
The location of the main configuration file.
definitions : Optional[dict], optional
An optional dictionary containing both events and predictions lists, by default None.
If not passed, these will be loaded based on configuration.
"""
self.config = ConfigProvider(config_path, definitions=definitions)
self.alert_config = AlertConfigProvider(config_path)

if len(self.config.cohorts) == 0:
Expand Down

0 comments on commit 9bdaf40

Please sign in to comment.