Skip to content

Commit

Permalink
Merge 62b9dce into f03688c
Browse files Browse the repository at this point in the history
  • Loading branch information
wiwski authored Jun 30, 2023
2 parents f03688c + 62b9dce commit 50eb18a
Show file tree
Hide file tree
Showing 20 changed files with 372 additions and 211 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/release-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,13 @@ jobs:
- name: Build CLI with Nuitka
run: |
.\.venv\Scripts\activate.ps1
nuitka --standalone --assume-yes-for-downloads --include-package=h5py --include-data-files=lst_config.yml=lst_config.yml new_aglae_data_converter/converter.py
nuitka --standalone --assume-yes-for-downloads --include-package=h5py --include-data-files=config.yml=config.yml new_aglae_data_converter/converter.py
shell: pwsh

- name: Build GUI with Nuitka
run: |
.\.venv\Scripts\activate.ps1
nuitka --standalone --assume-yes-for-downloads --enable-plugin=pyside6 --include-package=h5py --include-data-files=lst_config.yml=lst_config.yml new_aglae_data_converter/gui.py
nuitka --standalone --assume-yes-for-downloads --enable-plugin=pyside6 --include-package=h5py --include-data-files=config.yml=config.yml new_aglae_data_converter/gui.py
shell: pwsh

- name: Zip build
Expand Down Expand Up @@ -157,10 +157,10 @@ jobs:
run: make build_rs

- name: Build CLI
run: poetry run nuitka3 --standalone --assume-yes-for-downloads --include-data-files=lst_config.yml=lst_config.yml --clang new_aglae_data_converter/converter.py
run: poetry run nuitka3 --standalone --assume-yes-for-downloads --include-data-files=config.yml=config.yml --clang new_aglae_data_converter/converter.py

- name: Build GUI
run: poetry run nuitka3 --standalone --assume-yes-for-downloads --enable-plugin=pyside6 --include-data-files=lst_config.yml=lst_config.yml --clang new_aglae_data_converter/gui.py
run: poetry run nuitka3 --standalone --assume-yes-for-downloads --enable-plugin=pyside6 --include-data-files=config.yml=config.yml --clang new_aglae_data_converter/gui.py

- name: Zip build
run: |
Expand Down
Binary file added 20230227_0002_Std_SASHI_IBA.hdf5
Binary file not shown.
Binary file added 20230510_0001_Std_COLORSOURCES_IBA.hdf5
Binary file not shown.
53 changes: 53 additions & 0 deletions config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
x: 256
y: 512
detectors:
x1:
adc: 1
channels: 2048
x2:
adc: 2
channels: 2048
x3:
adc: 4
channels: 2048
x4:
adc: 8
channels: 2048
x0:
adc: 16
channels: 2048
GAMMA:
adc: 32
channels: 4096
file_extension: g7
GAMMA_20:
adc: 1024
channels: 1024
file_extension: g20
GAMMA_70:
adc: 2048
channels: 4096
file_extension: g70
# GAMMA_7244: # TODO: add channels & file_extension
# file_extension: g27
RBS:
adc: 64
channels: 512
file_extension: r8
RBS_135:
adc: 64
channels: 4096
file_extension: r135
RBS_150:
adc: 128
channels: 4096
file_extension: r150
computed_detectors:
x10:
detectors: ["x1", "x2", "x3", "x4"]
x11:
detectors: ["x1", "x2"]
x12:
detectors: ["x3", "x4"]
x13:
detectors: ["x1", "x2", "x3"]
Binary file added globals.hdf5
Binary file not shown.
31 changes: 0 additions & 31 deletions lst_config.yml

This file was deleted.

14 changes: 11 additions & 3 deletions lstrs.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,24 @@ class ParsingResult:
class Detector:
adc: int
channels: int
file_extension: str | None

def __init__(self, adc: int, channels: int) -> None: ...
def __init__(self, adc: int, channels: int, file_extension: str | None) -> None: ...

class LstConfig:
class ComputedDetector:
detectors: list[str]
file_extension: str | None

def __init__(self, detectors: list[str], file_extension: str | None) -> None: ...

class Config:
x: int
y: int
detectors: dict[str, Detector]
computed_detectors: dict[str, ComputedDetector]

def __init__(
self, x: int, y: int, detectors: dict[str, Detector], computed_detectors: dict[str, list[str]]
) -> None: ...

def parse_lst(filename: str, config: LstConfig) -> ParsingResult: ...
def parse_lst(filename: str, config: Config) -> ParsingResult: ...
34 changes: 34 additions & 0 deletions new_aglae_data_converter/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import logging
import pathlib

import lstrs
import yaml

logger = logging.getLogger(__name__)


def parse_config(config_file: pathlib.Path) -> lstrs.Config:
"""
Parse the config file. yaml format.
"""
with open(config_file, "r") as f:
logger.debug(f"Opening config file: {config_file}")
config = yaml.safe_load(f)

detectors: dict[str, lstrs.Detector] = {}
computed_detectors: dict[str, lstrs.ComputedDetector] = {}

for key, value in config["detectors"].items():
detectors[key] = lstrs.Detector(value["adc"], value["channels"], value.get("file_extension"))
for key, value in config["computed_detectors"].items():
computed_detectors[key] = lstrs.ComputedDetector(
value["detectors"],
value.get("file_extension"),
)

return lstrs.Config(
config["x"],
config["y"],
detectors=detectors,
computed_detectors=computed_detectors,
)
25 changes: 20 additions & 5 deletions new_aglae_data_converter/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,32 +6,44 @@
from globals.converter import convert_globals_to_hdf5
from lst.converter import convert_lst_to_hdf5

from new_aglae_data_converter.config import parse_config

logger = logging.getLogger(__name__)


def convert(
extraction_types: tuple[ExtractionType, ...],
data_path: pathlib.Path,
output_path: pathlib.Path,
lst_config_path: pathlib.Path | None = None,
config_path: pathlib.Path | None = None,
):
"""
Extract data files included in `extraction_types` from `data_path` and
convert them to HDF5 files saved to `output_path`.
:param extraction_types: Types of extraction to perform.
:param data_path: Path to the folder containing the data files.
:param output_path: Path to the folder where the HDF5 files should be saved.
:param lst_config_path: Path to a config file for lst parsing.
:param config_path: Path to a config file for lst parsing.
:return: Number of processed files.
"""
# Check that the paths exist. Raise FileNotFoundError if not.
data_path.resolve(strict=True)
output_path.resolve(strict=True)
logger.info("Reading from : %s", data_path)
logger.info("Saving files to : %s", output_path)

# Throw error if no config file is provided
if not config_path:
config_path = pathlib.Path(__file__).parents[1] / "config.yml"
if not config_path.exists():
raise ValueError("Default config file is missing. Provide a config file.")
config = parse_config(config_path)

processed_files_num = 0
if ExtractionType.GLOBALS in extraction_types or ExtractionType.STANDARDS in extraction_types:
processed_files_num += convert_globals_to_hdf5(extraction_types, data_path, output_path)
processed_files_num += convert_globals_to_hdf5(extraction_types, data_path, output_path, config)
if ExtractionType.LST in extraction_types:
processed_files_num += convert_lst_to_hdf5(data_path, output_path, lst_config_path)
processed_files_num += convert_lst_to_hdf5(data_path, output_path, config)

return processed_files_num

Expand All @@ -45,6 +57,7 @@ def convert(
type=str,
nargs="+",
choices=("lst", "globals", "standards"),
default=["lst", "globals", "standards"],
help="The data types to extract and convert. "
"Choices are 'lst', 'globals' and 'standards'. "
"Example: python converter.py -e lst globals -d ... -o ...",
Expand All @@ -55,13 +68,15 @@ def convert(
metavar="Data path",
type=pathlib.Path,
help="Path to the the globals data folder.",
required=True,
)
parser.add_argument(
"--output-path",
"-o",
metavar="Output path",
type=pathlib.Path,
help="Path to the the globals data folder.",
required=True,
)
parser.add_argument(
"--config",
Expand All @@ -82,6 +97,6 @@ def convert(
extraction_types=tuple(ExtractionType[ext_type.upper()] for ext_type in args.extraction_types),
data_path=args.data_path,
output_path=args.output_path,
lst_config_path=args.config,
config_path=args.config,
)
logger.debug(f"Processed %s files.", processed_files_cnt)
81 changes: 38 additions & 43 deletions new_aglae_data_converter/globals/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,34 +5,19 @@
import pathlib

import h5py

import lstrs
from enums import ExtractionType
from globals.parsers import RBSParser, SpectrumParser
from globals.parsers import BaseParser, RBSParser, SpectrumParser

logger = logging.getLogger(__name__)

# List of valid file extensions for global data files
GLOBALS_FILE_EXTENSIONS = [
"g7",
"g27",
"r8",
"r9",
"x0",
"x1",
"x2",
"x3",
"x4",
"x10",
"x11",
"x12",
"x13",
]
logger = logging.getLogger(__name__)


def convert_globals_to_hdf5(
extraction_types: tuple[ExtractionType, ...],
data_path: pathlib.Path,
output_path: pathlib.Path,
config: lstrs.Config,
) -> int:
"""
Convert global data files to HDF5 format and save them to the specified output path.
Expand All @@ -49,7 +34,7 @@ def convert_globals_to_hdf5(
standards_file = h5py.File(output_path / "std.hdf5", mode="w")

# Get global data files in the specified folder
data_files = get_global_files(data_path)
data_files = get_global_files(data_path, config)
logger.info("Starting reading files...")
num_processed_files = 0
for global_file in data_files:
Expand Down Expand Up @@ -85,49 +70,59 @@ def insert_global_file_in_hdf5(hdf5_group: h5py.Group, global_file: pathlib.Path
# Create a group for the measure point in the HDF5 file
measure_point_group = hdf5_group.require_group(measure_point)
# Add attributes to the measure point group
measure_point_group.attrs.create("start date", start_date)
measure_point_group.attrs.create("ref object", ref_object)
measure_point_group.attrs.create("start_date", start_date)
measure_point_group.attrs.create("object_ref", ref_object)
# Return if the file is empty
if os.stat(global_file).st_size == 0:
return
# Populate the detector dataset with the data from the global file
populate_detector_dataset(measure_point_group, detector, global_file)


def populate_detector_dataset(parent_group: h5py.Group, detector_name: str, global_file: pathlib.Path):
"""
Populate an HDF5 dataset with the data from a global file.
:param parent_group: HDF5 group containing the dataset.
:param detector_name: Name of the detector.
:param global_file: Global file to extract the data from.
"""
with open(global_file, "r", encoding="utf-8") as file:
# Extract attributes from header
if global_file.suffix in [".r8", ".r9"]:
if global_file.suffix in [".r8", ".r9", ".r150", ".r135"]:
parser = RBSParser(file)
else:
parser = SpectrumParser(file)

header = parser.parse_header()
populate_measure_point_group_attributes(measure_point_group, parser)

# Create dataset and populate data
detector_dataset = parent_group.create_dataset(detector_name, data=parser.parse_dataset(), compression="gzip")
# Add attributes to dataset
for key, value in header:
detector_dataset.attrs.create(key, value)
# Populate the detector dataset with the data from the global file
measure_point_group.create_dataset(detector, data=parser.parse_dataset(), compression="gzip")


def get_global_files(folder: pathlib.Path):
def populate_measure_point_group_attributes(measure_point_group: h5py.Group, parser: BaseParser):
header = parser.parse_header()
if not header:
return
experiment_information = header.pop("experiment_information")
flatten_header = {**header, **experiment_information}

measure_point_attrs_keys = measure_point_group.attrs.keys()
# if the group is empty or not fully populated, populate parent attributes with fist detector header
# only for spectrum files because rbs files do not have all attributes
if not measure_point_attrs_keys or len(measure_point_attrs_keys) < len(flatten_header.keys()):
for key, value in flatten_header.items():
measure_point_group.attrs.create(key, value)


def get_global_files(folder: pathlib.Path, config: lstrs.Config) -> list[pathlib.Path]:
"""
Get all global data files in the specified folder.
:param folder: Folder to search for global data files.
:return: Iterator of global data files.
"""
files = folder.glob("**/*")
for file in files:
if file.suffix[1:] in GLOBALS_FILE_EXTENSIONS:
yield file
global_extensions = _get_global_file_extensions(config)
global_files: list[pathlib.Path] = list(filter(lambda file: file.suffix[1:] in global_extensions, files))
return sorted(global_files)


def is_file_std(filename: str) -> bool:
return "_std_" in filename.lower()


def _get_global_file_extensions(config: lstrs.Config) -> set[str]:
detector_extensions = [detector.file_extension or name for name, detector in config.detectors.items()]
computed_detector_extensions = [
detector.file_extension or name for name, detector in config.computed_detectors.items()
]
return set(detector_extensions + computed_detector_extensions)
Loading

0 comments on commit 50eb18a

Please sign in to comment.