Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new command: annotate #292

Merged
merged 15 commits into from
Aug 1, 2022
17 changes: 17 additions & 0 deletions sssom/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

from sssom.constants import (
DEFAULT_VALIDATION_TYPES,
MAPPING_SET_SLOTS,
MAPPING_SLOTS,
PREFIX_MAP_MODES,
SchemaValidationType,
Expand All @@ -34,6 +35,7 @@
from . import __version__
from .cliques import split_into_cliques, summarize_cliques
from .io import (
annotate_file,
convert_file,
filter_file,
parse_file,
Expand Down Expand Up @@ -644,5 +646,20 @@ def filter(input: str, output: TextIO, **kwargs):
filter_file(input=input, output=output, **kwargs)


@main.command()
@input_argument
@output_option
@dynamically_generate_sssom_options(MAPPING_SET_SLOTS)
def annotate(input: str, output: TextIO, **kwargs):
"""Annotate metadata of a mapping set.

:param input: Input path of the SSSOM tsv file.
:param output: Output location.
:param **kwargs: Options provided by user
which are added to the metadata (e.g.: --mapping_set_id http://example.org/abcd)
"""
annotate_file(input=input, output=output, **kwargs)


if __name__ == "__main__":
main()
30 changes: 30 additions & 0 deletions sssom/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from sssom.validators import validate

from .constants import (
MAPPING_SET_SLOTS,
PREFIX_MAP_MODE_MERGED,
PREFIX_MAP_MODE_METADATA_ONLY,
PREFIX_MAP_MODE_SSSOM_DEFAULT_ONLY,
Expand Down Expand Up @@ -361,3 +362,32 @@ def filter_file(input: str, output: TextIO, **kwargs) -> MappingSetDataFrame:
if multiple_params and idx != len(params):
query += " AND ("
return run_sql_query(query=query, inputs=[input], output=output)


def annotate_file(input: str, output: TextIO, **kwargs) -> MappingSetDataFrame:
"""Annotate a file i.e. add custom metadata to the mapping set.

:param input: DataFrame to be queried over.
hrshdhgd marked this conversation as resolved.
Show resolved Hide resolved
:param output: Output location.
:param **kwargs: Options provided by user
which are added to the metadata (e.g.: --mapping_set_id http://example.org/abcd)
:raises ValueError: If parameter provided is invalid.
:return: Annotated MappingSetDataFrame object.
"""
params = {k: v for k, v in kwargs.items() if v}
hrshdhgd marked this conversation as resolved.
Show resolved Hide resolved
legit_params = all(p in MAPPING_SET_SLOTS for p in params.keys())
if not legit_params:
hrshdhgd marked this conversation as resolved.
Show resolved Hide resolved
invalids = [p for p in params if p not in MAPPING_SET_SLOTS]
raise ValueError(
f"The params are invalid: {invalids}. Should be any of the following: {MAPPING_SET_SLOTS}"
)

input_msdf = parse_sssom_table(input)
if input_msdf.metadata:
for k, v in params.items():
if len(v) <= 1:
input_msdf.metadata[k] = v[0]
hrshdhgd marked this conversation as resolved.
Show resolved Hide resolved
else:
input_msdf.metadata[k] = list(v)
write_table(input_msdf, output)
return input_msdf
Loading