Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix typo and best practices violations. #59

Merged
merged 7 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ dynamic = ["version"]
"Source" = "https://github.com/ScicatProject/scicat-filewriter-ingest"

[project.scripts]
scicat_ingestor = "scicat_ingestor:main"
background_ingestor = "background_ingestor:main"
scicat_ingestor = "scicat_online_ingestor:main"
background_ingestor = "scicat_offline_ingestor:main"

[project.entry-points."scicat_ingestor.metadata_extractor"]
max = "numpy:max"
Expand Down
2 changes: 1 addition & 1 deletion resources/base.imsc.json.example
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"name" : "Generic metadata schema"
"instrument" : "",
"selector" : "filename:starts_with:/ess/data",
"variables" : {
"variables" : {
"pid": {
"source": "NXS",
"path": "/entry/entry_identifier_uuid",
Expand Down
61 changes: 37 additions & 24 deletions src/scicat_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,20 @@ def _load_config(config_file: Any) -> dict:


def _merge_config_options(
config_dict: dict,
input_args_dict: dict,
keys: list[str] | None = None
config_dict: dict, input_args_dict: dict, keys: list[str] | None = None
) -> dict:
"""Merge configuration from the configuration file and input arguments."""

if keys == None:
keys = config_dict.keys();
if keys is None:
keys = config_dict.keys()

return {
**config_dict.setdefault("options", {}),
**{key: input_args_dict[key] for key in keys if input_args_dict[key] is not None},
**{
key: input_args_dict[key]
for key in keys
if input_args_dict[key] is not None
},
}


Expand Down Expand Up @@ -75,7 +77,8 @@ def build_online_arg_parser() -> argparse.ArgumentParser:
type=str,
)
group.add_argument(
"-d", "--dry-run",
"-d",
"--dry-run",
dest="dry_run",
help="Dry run. Does not produce any output file nor modify entry in SciCat",
action="store_true",
Expand Down Expand Up @@ -255,6 +258,7 @@ class FileHandlingOptions:
message_to_file: bool = True
message_file_extension: str = "message.json"


@dataclass
class IngestionOptions:
file_handling: FileHandlingOptions
Expand All @@ -274,13 +278,13 @@ def from_configurations(cls, config: dict) -> "IngestionOptions":

@dataclass
class DatasetOptions:
check_by_job_id: bool = True,
allow_dataset_pid: bool = True,
generate_dataset_pid: bool = False,
dataset_pid_prefix: str = "20.500.12269",
default_instrument_id: str = "",
default_proposal_id: str = "",
default_owner_group: str = "",
check_by_job_id: bool = (True,)
allow_dataset_pid: bool = (True,)
generate_dataset_pid: bool = (False,)
dataset_pid_prefix: str = ("20.500.12269",)
default_instrument_id: str = ("",)
default_proposal_id: str = ("",)
default_owner_group: str = ("",)
default_access_groups: list[str] = field(default_factory=list)

@classmethod
Expand All @@ -302,9 +306,7 @@ class SciCatOptions:
def from_configurations(cls, config: dict) -> "SciCatOptions":
"""Create SciCatOptions from a dictionary."""
options = cls(**config)
options.headers = {
"Authorization": "Bearer {}".format(options.token)
}
options.headers = {"Authorization": f"Bearer {options.token}"}
return options


Expand Down Expand Up @@ -335,16 +337,22 @@ def to_dict(self) -> dict:
)


def build_scicat_online_ingestor_config(input_args: argparse.Namespace) -> OnlineIngestorConfig:
def build_scicat_online_ingestor_config(
input_args: argparse.Namespace,
) -> OnlineIngestorConfig:
"""Merge configuration from the configuration file and input arguments."""
config_dict = _load_config(input_args.config_file)
logging_dict = _merge_config_options(config_dict.setdefault("logging",{}), vars(input_args))
ingestion_dict = _merge_config_options(config_dict.setdefault("ingestion",{}), vars(input_args), ["dry-run"])
logging_dict = _merge_config_options(
config_dict.setdefault("logging", {}), vars(input_args)
)
ingestion_dict = _merge_config_options(
config_dict.setdefault("ingestion", {}), vars(input_args), ["dry-run"]
)

# Wrap configuration in a dataclass
return OnlineIngestorConfig(
original_dict=_freeze_dict_items(config_dict),
dataset=DatasetOptions(**config_dict.setdefault("dataset",{})),
dataset=DatasetOptions(**config_dict.setdefault("dataset", {})),
ingestion=IngestionOptions.from_configurations(ingestion_dict),
kafka=KafkaOptions(**config_dict.setdefault("kafka", {})),
logging=LoggingOptions(**logging_dict),
Expand All @@ -359,6 +367,7 @@ class OfflineRunOptions:
done_writing_message_file: str
"""Full path of the done writing message file that match the ``nexus_file``."""


@dataclass
class OfflineIngestorConfig(OnlineIngestorConfig):
offline_run: OfflineRunOptions
Expand Down Expand Up @@ -388,8 +397,12 @@ def build_scicat_offline_ingestor_config(
"""Merge configuration from the configuration file and input arguments."""
config_dict = _load_config(input_args.config_file)
input_args_dict = vars(input_args)
logging_dict = _merge_config_options(config_dict.setdefault("logging",{}), input_args_dict)
ingestion_dict = _merge_config_options(config_dict.setdefault("ingestion",{}), input_args_dict, ["dry-run"])
logging_dict = _merge_config_options(
config_dict.setdefault("logging", {}), input_args_dict
)
ingestion_dict = _merge_config_options(
config_dict.setdefault("ingestion", {}), input_args_dict, ["dry-run"]
)
offline_run_option_dict = {
"nexus_file": input_args_dict.pop("nexus_file"),
"done_writing_message_file": input_args_dict.pop("done_writing_message_file"),
Expand All @@ -398,7 +411,7 @@ def build_scicat_offline_ingestor_config(
# Wrap configuration in a dataclass
return OfflineIngestorConfig(
original_dict=_freeze_dict_items(config_dict),
dataset=DatasetOptions(**config_dict.setdefault("dataset",{})),
dataset=DatasetOptions(**config_dict.setdefault("dataset", {})),
ingestion=IngestionOptions.from_configurations(ingestion_dict),
kafka=KafkaOptions(**config_dict.setdefault("kafka", {})),
logging=LoggingOptions(**logging_dict),
Expand Down
11 changes: 6 additions & 5 deletions src/scicat_dataset.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject)
import datetime
import json
import pathlib
from types import MappingProxyType
from typing import Any

from scicat_configuration import FileHandlingOptions
from scicat_schemas import (
load_datafilelist_item_schema_template,
load_dataset_schema_template,
load_origdatablock_schema_template,
load_datafilelist_item_schema_template,
)


Expand Down Expand Up @@ -114,7 +115,7 @@ def build_single_datafile_instance(
perm: str,
checksum: str = "",
) -> str:
return load_single_datafile_template().render(
return load_datafilelist_item_schema_template().render(
file_absolute_path=file_absolute_path,
file_size=file_size,
datetime_isoformat=datetime_isoformat,
Expand Down Expand Up @@ -167,7 +168,7 @@ def build_single_data_file_desc(
"""
Build the description of a single data file.
"""
single_file_template = load_single_datafile_template()
single_file_template = load_datafilelist_item_schema_template()

return json.loads(
single_file_template.render(
Expand Down Expand Up @@ -204,9 +205,9 @@ def save_and_build_single_hash_file_desc(
import datetime
import json

from scicat_schemas import load_single_datafile_template
from scicat_schemas import load_datafilelist_item_schema_template

single_file_template = load_single_datafile_template()
single_file_template = load_datafilelist_item_schema_template()
file_hash: str = original_file_desciption["chk"]
hash_path = _build_hash_file_path(
original_file_path=original_file_desciption["path"],
Expand Down
8 changes: 4 additions & 4 deletions src/scicat_kafka.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
from collections.abc import Generator

from confluent_kafka import Consumer
from scicat_configuration import MessageSavingOptions, kafkaOptions
from scicat_configuration import KafkaOptions
from streaming_data_types import deserialise_wrdn
from streaming_data_types.finished_writing_wrdn import (
FILE_IDENTIFIER as WRDN_FILE_IDENTIFIER,
)
from streaming_data_types.finished_writing_wrdn import WritingFinished


def collect_consumer_options(options: kafkaOptions) -> dict:
def collect_consumer_options(options: KafkaOptions) -> dict:
"""Build a Kafka consumer and configure it according to the ``options``."""
from dataclasses import asdict

Expand All @@ -35,7 +35,7 @@ def collect_consumer_options(options: kafkaOptions) -> dict:
return config_dict


def collect_kafka_topics(options: kafkaOptions) -> list[str]:
def collect_kafka_topics(options: KafkaOptions) -> list[str]:
"""Return the Kafka topics as a list."""
if isinstance(options.topics, str):
return options.topics.split(",")
Expand All @@ -45,7 +45,7 @@ def collect_kafka_topics(options: kafkaOptions) -> list[str]:
raise TypeError("The topics must be a list or a comma-separated string.")


def build_consumer(kafka_options: kafkaOptions, logger: logging.Logger) -> Consumer:
def build_consumer(kafka_options: KafkaOptions, logger: logging.Logger) -> Consumer:
"""Build a Kafka consumer and configure it according to the ``options``."""
consumer_options = collect_consumer_options(kafka_options)
logger.info("Connecting to Kafka with the following parameters:")
Expand Down
4 changes: 2 additions & 2 deletions src/scicat_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
import logging.handlers

import graypy
from scicat_configuration import OnlineIngestorConfig, OfflineIngestorConfig
from scicat_configuration import OfflineIngestorConfig, OnlineIngestorConfig


def build_logger(
config: OnlineIngestorConfig | OfflineIngestorConfig
config: OnlineIngestorConfig | OfflineIngestorConfig,
) -> logging.Logger:
"""Build a logger and configure it according to the ``config``."""
logging_options = config.logging
Expand Down
Loading