Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Flake8 Round 3: We're going to make you an overnight celebrity #164

Merged
merged 9 commits into from
Oct 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,13 @@ ignore =
S408 # don't worry about unsafe xml
S318 # don't worry about unsafe xml
S310 # TODO remove this later and switch to using requests
# The following are documentation things (remove one at a time in future PRs)
D100
D101
D102
exclude =
sssom/sssom_datamodel.py
sssom/cliquesummary.py

##########################
# Darglint Configuration #
##########################
[darglint]
docstring_style = sphinx
strictness = short
2 changes: 1 addition & 1 deletion sssom/__main__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

"""Entrypoint module, in case you use `python -m term_expando`.
"""Entrypoint module, in case you use `python -m sssom`.

Why does this file exist, and why __main__? For more info, read:

Expand Down
34 changes: 20 additions & 14 deletions sssom/cli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
"""Command line interface for SSSOM.

Why does this file exist, and why not put this in ``__main__``? You might be tempted to import things from ``__main__``
later, but that will cause problems--the code will get executed twice:

- When you run ``python3 -m sssom`` python will execute``__main__.py`` as a script. That means there won't be any
``sssom.__main__`` in ``sys.modules``.
- When you import __main__ it will get executed again (as a module) because
there's no ``sssom.__main__`` in ``sys.modules``.
matentzn marked this conversation as resolved.
Show resolved Hide resolved

.. seealso:: https://click.palletsprojects.com/en/8.0.x/setuptools/
"""

import logging
import re
import sys
Expand Down Expand Up @@ -96,7 +109,7 @@ def convert(input: str, output: TextIO, output_format: str):

Example:
sssom covert --input my.sssom.tsv --output-format rdfxml --output my.sssom.owl
"""
""" # noqa: DAR101
convert_file(input_path=input, output=output, output_format=output_format)


Expand Down Expand Up @@ -134,16 +147,7 @@ def parse(
clean_prefixes: bool,
output: TextIO,
):
"""Parse a file in one of the supported formats (such as obographs) into an SSSOM TSV file.

Args:
input: The path to the input file in one of the legal formats, eg obographs, aligmentapi-xml
input_format: The string denoting the input format.
metadata: The path to a file containing the sssom metadata (including prefix_map) to be used during parse.
prefix_map_mode: prefix map mode.
clean_prefixes: If True (default), records with unknown prefixes are removed from the SSSOM file.
output: The path to the SSSOM TSV output file.
"""
"""Parse a file in one of the supported formats (such as obographs) into an SSSOM TSV file."""
parse_file(
input_path=input,
output=output,
Expand Down Expand Up @@ -221,7 +225,7 @@ def dosql(query: str, inputs: List[str], output: TextIO):
Example:
`sssom dosql -q "SELECT file1.*,file2.object_id AS ext_object_id, file2.object_label AS ext_object_label \
FROM file1 INNER JOIN file2 WHERE file1.object_id = file2.subject_id" FROM file1.sssom.tsv file2.sssom.tsv`
"""
""" # noqa: DAR101
# should start with from_tsv and MOST should return write_sssom
n = 1
while len(inputs) >= n:
Expand Down Expand Up @@ -288,7 +292,7 @@ def diff(inputs: Tuple[str, str], output: TextIO):

The output is a new SSSOM file with the union of all mappings, and
injected comments indicating uniqueness to set1 or set2.
"""
""" # noqa: DAR101,DAR401
input1, input2 = inputs
msdf1 = read_sssom_table(input1)
msdf2 = read_sssom_table(input2)
Expand Down Expand Up @@ -424,7 +428,7 @@ def merge(inputs: Sequence[str], output: TextIO, reconcile: bool = True):
reconcile (if msdf contains a higher confidence _negative_ mapping,
then remove lower confidence positive one. If confidence is the same,
prefer HumanCurated. If both HumanCurated, prefer negative mapping).
"""
""" # noqa: DAR101
(input1, input2) = inputs[:2]
msdf1 = read_sssom_table(input1)
msdf2 = read_sssom_table(input2)
Expand Down Expand Up @@ -463,6 +467,8 @@ def rewire(

Example:
sssom rewire -I xml -i tests/data/cob.owl -m tests/data/cob-to-external.tsv --precedence PR

# noqa: DAR101
"""
msdf = read_sssom_table(mapping_file)
g = Graph()
Expand Down
2 changes: 2 additions & 0 deletions sssom/cliques.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Utilities for identifying and working with cliques/SCCs in mappings graphs."""

import hashlib
import statistics
from collections import defaultdict
Expand Down
2 changes: 2 additions & 0 deletions sssom/context.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Utilities for loading JSON-LD contexts."""

import json
import logging
from typing import Optional
Expand Down
2 changes: 2 additions & 0 deletions sssom/external_context.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""This module contains an autogenerated copy of the external SSSOM context."""

# This is autogenerated and super hacky.
sssom_external_context = """
{
Expand Down
2 changes: 2 additions & 0 deletions sssom/internal_context.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""This module contains an autogenerated copy of the internal SSSOM context."""

# This is autogenerated and super hacky.
sssom_context = """
{
Expand Down
44 changes: 19 additions & 25 deletions sssom/io.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""I/O utilities for SSSOM."""

import logging
from pathlib import Path
from typing import Optional, TextIO, Union
Expand All @@ -16,10 +18,9 @@ def convert_file(
) -> None:
"""Convert a file.

Args:
input_path: The path to the input SSSOM tsv file
output: The path to the output file. If none is given, will default to using stdout.
output_format: The format to which the the SSSOM TSV should be converted.
:param input_path: The path to the input SSSOM tsv file
:param output: The path to the output file. If none is given, will default to using stdout.
:param output_format: The format to which the the SSSOM TSV should be converted.
"""
raise_for_bad_path(input_path)
doc = read_sssom_table(input_path)
Expand All @@ -40,15 +41,14 @@ def parse_file(
) -> None:
"""Parse an SSSOM metadata file and write to a table.

Args:
input_path: The path to the input file in one of the legal formats, eg obographs, aligmentapi-xml
output: The path to the output file.
input_format: The string denoting the input format.
metadata_path: The path to a file containing the sssom metadata (including prefix_map)
to be used during parse.
prefix_map_mode: Defines whether the prefix map in the metadata should be extended or replaced with
the SSSOM default prefix map. Must be one of metadata_only, sssom_default_only, merged
clean_prefixes: If True (default), records with unknown prefixes are removed from the SSSOM file.
:param input_path: The path to the input file in one of the legal formats, eg obographs, aligmentapi-xml
:param output: The path to the output file.
:param input_format: The string denoting the input format.
:param metadata_path: The path to a file containing the sssom metadata (including prefix_map)
to be used during parse.
:param prefix_map_mode: Defines whether the prefix map in the metadata should be extended or replaced with
the SSSOM default prefix map. Must be one of metadata_only, sssom_default_only, merged
:param clean_prefixes: If True (default), records with unknown prefixes are removed from the SSSOM file.
"""
raise_for_bad_path(input_path)
metadata = get_metadata_and_prefix_map(
Expand All @@ -65,14 +65,10 @@ def parse_file(


def validate_file(input_path: str) -> bool:
"""
Validate the incoming SSSOM TSV according to the SSSOM specification.
"""Validate the incoming SSSOM TSV according to the SSSOM specification.

Args:
input_path: The path to the input file in one of the legal formats, eg obographs, aligmentapi-xml

Returns:
Boolean. True if valid SSSOM, false otherwise.
:param input_path: The path to the input file in one of the legal formats, eg obographs, aligmentapi-xml
:returns: True if valid SSSOM, false otherwise.
"""
try:
read_sssom_table(file_path=input_path)
Expand All @@ -83,12 +79,10 @@ def validate_file(input_path: str) -> bool:


def split_file(input_path: str, output_directory: Union[str, Path]) -> None:
"""
Split an SSSOM TSV by prefixes and relations.
"""Split an SSSOM TSV by prefixes and relations.

Args:
input_path: The path to the input file in one of the legal formats, eg obographs, aligmentapi-xml
output_directory: The directory to which the split file should be exported.
:param input_path: The path to the input file in one of the legal formats, eg obographs, aligmentapi-xml
:param output_directory: The directory to which the split file should be exported.
"""
raise_for_bad_path(input_path)
msdf = read_sssom_table(input_path)
Expand Down
13 changes: 8 additions & 5 deletions sssom/parsers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""SSSOM parsers."""

import json
import logging
import re
Expand Down Expand Up @@ -175,9 +177,9 @@ def from_sssom_dataframe(
) -> MappingSetDataFrame:
"""Convert a dataframe to a MappingSetDataFrame.

:param df:
:param prefix_map:
:param meta:
:param df: A mappings dataframe
:param prefix_map: A prefix map
:param meta: A metadata dictionary
:return: MappingSetDataFrame
"""
prefix_map = _ensure_prefix_map(prefix_map)
Expand Down Expand Up @@ -318,9 +320,10 @@ def from_alignment_minidom(
"""Read a minidom Document object.

:param dom: XML (minidom) object
:param prefix_map:
:param prefix_map: A prefix map
:param meta: Optional meta data
:return: MappingSetDocument
:raises ValueError: for alignment format: xml element said, but not set to yes. Only XML is supported!
"""
# FIXME: should be prefix_map = _check_prefix_map(prefix_map)
_ensure_prefix_map(prefix_map)
Expand Down Expand Up @@ -349,7 +352,7 @@ def from_alignment_minidom(

elif node_name == "xml":
if e.firstChild.nodeValue != "yes":
raise Exception(
raise ValueError(
"Alignment format: xml element said, but not set to yes. Only XML is supported!"
)
elif node_name == "onto1":
Expand Down
6 changes: 6 additions & 0 deletions sssom/rdf_util.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Rewriting functionality for RDFlib graphs."""

import logging
from typing import Any, Dict, List, Optional

Expand All @@ -7,6 +9,10 @@
from .sssom_datamodel import EntityId, Mapping
from .util import MappingSetDataFrame

__all__ = [
"rewire_graph",
]

matentzn marked this conversation as resolved.
Show resolved Hide resolved

def rewire_graph(
g: Graph,
Expand Down
9 changes: 9 additions & 0 deletions sssom/sparql_util.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Utilities for querying mappings with SPARQL."""

import logging
from dataclasses import dataclass
from typing import Dict, List, Mapping, Optional
Expand All @@ -9,9 +11,16 @@

from .util import MappingSetDataFrame

__all__ = [
"EndpointConfig",
"query_mappings",
]


@dataclass
class EndpointConfig:
"""A container for a SPARQL endpoint's configuration."""

url: str
graph: URIRef
predmap: Dict[str, str]
Expand Down
6 changes: 6 additions & 0 deletions sssom/sssom_document.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
"""Additional SSSOM object models."""

from dataclasses import dataclass

from .sssom_datamodel import MappingSet
from .typehints import PrefixMap

__all__ = [
"MappingSetDocument",
]


@dataclass()
class MappingSetDocument:
Expand Down
2 changes: 2 additions & 0 deletions sssom/typehints.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,7 @@


class Metadata(NamedTuple):
"""A pair of a prefix map and associated metadata."""

prefix_map: PrefixMap
metadata: MetadataType
19 changes: 13 additions & 6 deletions sssom/util.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Utilities for SSSOM."""

import hashlib
import json
import logging
Expand Down Expand Up @@ -116,6 +118,7 @@ def __str__(self) -> str: # noqa:D105
return description

def clean_prefix_map(self) -> None:
"""Remove unused prefixes from the internal prefix map based on the internal dataframe."""
prefixes_in_map = get_prefixes_used_in_table(self.df)
new_prefixes: PrefixMap = dict()
missing_prefixes = []
Expand Down Expand Up @@ -316,7 +319,11 @@ def group_mappings(df: pd.DataFrame) -> Dict[EntityPair, List[pd.Series]]:
def compare_dataframes(df1: pd.DataFrame, df2: pd.DataFrame) -> MappingSetDiff:
"""Perform a diff between two SSSOM dataframes.

Currently does not discriminate between mappings with different predicates
:param df1: A mapping dataframe
:param df2: A mapping dataframe
:returns: A mapping set diff

.. warning:: currently does not discriminate between mappings with different predicates
"""
mappings1 = group_mappings(df1.copy())
mappings2 = group_mappings(df2.copy())
Expand Down Expand Up @@ -465,9 +472,7 @@ def merge_msdf(
then remove lower confidence positive one. If confidence is the same,
prefer HumanCurated. If both HumanCurated, prefer negative mapping).
Defaults to True.

Returns:
MappingSetDataFrame: Merged MappingSetDataFrame.
:returns: Merged MappingSetDataFrame.
"""
# Inject metadata of msdf into df
msdf1 = inject_metadata_into_df(msdf=msdf1)
Expand Down Expand Up @@ -506,6 +511,7 @@ def deal_with_negation(df: pd.DataFrame) -> pd.DataFrame:

:param df: Merged Pandas DataFrame
:return: Pandas DataFrame with negations addressed
:raises ValueError: If the dataframe is none after assigning default confidence
"""
"""
1. Mappings in mapping1 trump mappings in mapping2 (if mapping2 contains a conflicting mapping in mapping1,
Expand All @@ -527,7 +533,7 @@ def deal_with_negation(df: pd.DataFrame) -> pd.DataFrame:
df, nan_df = assign_default_confidence(df)

if df is None:
raise Exception(
raise ValueError(
"The dataframe, after assigning default confidence, appears empty (deal_with_negation"
)

Expand Down Expand Up @@ -783,7 +789,7 @@ def to_mapping_set_dataframe(doc: MappingSetDocument) -> MappingSetDataFrame:


class NoCURIEException(ValueError):
pass
"""An exception raised when a CURIE can not be parsed with a given prefix map."""


CURIE_RE = re.compile(r"[A-Za-z0-9_]+[:][A-Za-z0-9_]")
Expand Down Expand Up @@ -908,6 +914,7 @@ def prepare_context_str(prefix_map: Optional[PrefixMap] = None, **kwargs) -> str
"""Prepare a JSON-LD context and dump to a string.

:param prefix_map: Prefix map, defaults to None
:param kwargs: Keyword arguments to pass through to :func:`json.dumps`
:return: Context in str format
"""
return json.dumps(prepare_context(prefix_map), **kwargs)
Expand Down
2 changes: 2 additions & 0 deletions sssom/writers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Serialization functions for SSSOM."""

import json
import logging
from pathlib import Path
Expand Down
10 changes: 10 additions & 0 deletions tests/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""Constants for test cases."""

import os
import pathlib

cwd = pathlib.Path(__file__).parent.resolve()
data_dir = os.path.join(cwd, "data")

test_out_dir = os.path.join(cwd, "tmp")
os.makedirs(test_out_dir, exist_ok=True)
Loading