Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add static type checking with MyPy #143

Merged
merged 30 commits into from
Sep 22, 2021
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
597cb47
Add configuration for static type checking with mypy
cthoyt Sep 13, 2021
59e4072
Merge branch 'master' into add-mypy
cthoyt Sep 13, 2021
32bba5b
Update type annotations
cthoyt Sep 13, 2021
7c6d2c3
updated gitignore
hrshdhgd Sep 14, 2021
6975f42
mypy: changed Set[str] to Set[EntityPair]
hrshdhgd Sep 14, 2021
2a1c39f
added type Dict[Tuple[Any]]
hrshdhgd Sep 14, 2021
1e8880e
updated type: # type: Dict[Any, Any]
hrshdhgd Sep 14, 2021
384fb42
meta type: Dict[str, str]
hrshdhgd Sep 14, 2021
f0cd16f
switched to max_conf: Dict[Any, Any] = {}
hrshdhgd Sep 14, 2021
6ee9b3e
type declaration and existence checks
hrshdhgd Sep 14, 2021
0250856
util.py is mypy compliant
hrshdhgd Sep 14, 2021
cd2f983
Merge branch 'master' into add-mypy
cthoyt Sep 15, 2021
056924d
util mypy compliant
hrshdhgd Sep 15, 2021
ab19cb7
changes during meetig with Nico
hrshdhgd Sep 20, 2021
c30f575
edits for mypy compatibility
hrshdhgd Sep 20, 2021
d7dbb36
mympy recommendations addressed
hrshdhgd Sep 20, 2021
41a6379
Merge branch 'master' into add-mypy
hrshdhgd Sep 20, 2021
98fa859
mypy compliant
hrshdhgd Sep 20, 2021
2a47034
Merge branch 'master' into add-mypy
matentzn Sep 21, 2021
a25b3f9
Finish type checking
cthoyt Sep 21, 2021
0e74ca8
Fix typing
cthoyt Sep 21, 2021
fde5fda
Update cli.py
cthoyt Sep 21, 2021
b5bf96d
change type
hrshdhgd Sep 21, 2021
2574a52
flipped variables for correct capture
hrshdhgd Sep 21, 2021
812e521
determined type for invdict
hrshdhgd Sep 21, 2021
81ffa5c
narrowed down on type.
hrshdhgd Sep 21, 2021
a3c37b0
removed unused import
hrshdhgd Sep 21, 2021
76d1e32
Tuple type declared properly
hrshdhgd Sep 21, 2021
f75b724
Add mypy goal to Makefile
cthoyt Sep 22, 2021
778bfca
Make sure mypy is run with `tox`
cthoyt Sep 22, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/qc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,7 @@ jobs:
run: pip install tox
- name: Lint with flake8
run: tox -e flake8
- name: Test with MyPy
run: tox -e mypy
- name: Test with pytest
run: tox -e py
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ sphinx/_build
sphinx/_static
sphinx/_templates
docs/_build
.vscode/*
7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,10 @@ target-version = ['py36', 'py37', 'py38', 'py39']
[tool.isort]
profile = "black"
multi_line_output = 3

[[tool.mypy.overrides]]
module = [
'sssom.sssom_datamodel',
'sssom.cliquesummary'
]
ignore_errors = true
4 changes: 2 additions & 2 deletions sssom/context.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import logging
from typing import Any, Mapping, Tuple
from typing import Any, Dict, Mapping, Tuple

from .external_context import sssom_external_context
from .internal_context import sssom_context
Expand Down Expand Up @@ -49,7 +49,7 @@ def get_default_metadata() -> Tuple[Mapping[str, Any], Mapping[str, Any]]:
contxt = get_jsonld_context()
contxt_external = get_external_jsonld_context()
curie_map = {}
meta = {}
meta: Dict[str, str] = {}
for key in contxt["@context"]:
v = contxt["@context"][key]
if isinstance(v, str):
Expand Down
72 changes: 40 additions & 32 deletions sssom/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import sys
from dataclasses import dataclass
from io import FileIO, StringIO
from typing import Any, Dict, List, Mapping, Optional, Set, Union
from typing import Any, Dict, List, Mapping, Optional, Set, Tuple
from urllib.request import urlopen

import numpy as np
Expand Down Expand Up @@ -63,17 +63,20 @@ class MappingSetDataFrame:
A collection of mappings represented as a DataFrame, together with additional metadata
"""

df: pd.DataFrame = None # Mappings
prefixmap: Dict[str, str] = None # maps CURIE prefixes to URI bases
df: Optional[pd.DataFrame] = None # Mappings
prefixmap: Dict[str, Any] = None # maps CURIE prefixes to URI bases
metadata: Optional[Dict[str, str]] = None # header metadata excluding prefixes

def merge(self, msdf2, inplace=True):
def merge(
self, msdf2: "MappingSetDataFrame", inplace: bool = True
) -> "MappingSetDataFrame":
"""Merges two MappingSetDataframes

Args:
msdf2 (MappingSetDataFrame): Secondary MappingSetDataFrame (self => primary)
inplace (bool): if true, msdf2 is merged into the calling MappingSetDataFrame, if false, it simply return
the merged data frame.
msdf: Secondary MappingSetDataFrame (self => primary)
inplace:
if true, msdf2 is merged into the calling MappingSetDataFrame, if false, it simply return
the merged data frame.

Returns:
MappingSetDataFrame: Merged MappingSetDataFrame
Expand All @@ -83,6 +86,7 @@ def merge(self, msdf2, inplace=True):
self.df = msdf.df
self.prefixmap = msdf.prefixmap
self.metadata = msdf.metadata
# FIXME should return self if inplace
return msdf

def __str__(self):
Expand Down Expand Up @@ -142,9 +146,9 @@ class MappingSetDiff:
this is considered a mapping in common.
"""

unique_tuples1: Optional[Set[str]] = None
unique_tuples2: Optional[Set[str]] = None
common_tuples: Optional[Set[str]] = None
unique_tuples1: Optional[Set[EntityPair]] = None
unique_tuples2: Optional[Set[EntityPair]] = None
common_tuples: Optional[Set[EntityPair]] = None

combined_dataframe: Optional[pd.DataFrame] = None
"""
Expand All @@ -171,6 +175,8 @@ def load(self, filename) -> None:
self.df = read_pandas(filename)

def convert(self) -> Dict[str, Any]:
if self.df is None:
cthoyt marked this conversation as resolved.
Show resolved Hide resolved
raise RuntimeError("dataframe is not loaded properly")
# note that 'mapping' is both a metaproperty and a property of this model...
cslots = {
"mappings": {
Expand All @@ -180,7 +186,7 @@ def convert(self) -> Dict[str, Any]:
},
"id": {"description": "CURIE or IRI identifier", "identifier": True},
}
classes = {
classes: Dict[str, Any] = {
"mapping set": {
"description": "Represents a set of mappings",
"slots": ["mappings"],
Expand Down Expand Up @@ -310,7 +316,7 @@ def filter_redundant_rows(df: pd.DataFrame, ignore_predicate=False) -> pd.DataFr
key = [SUBJECT_ID, OBJECT_ID, PREDICATE_ID]
dfmax: pd.DataFrame
dfmax = df.groupby(key, as_index=False)[CONFIDENCE].apply(max).drop_duplicates()
max_conf = {}
max_conf: Dict[Any, Any] = {}
cthoyt marked this conversation as resolved.
Show resolved Hide resolved
for _, row in dfmax.iterrows():
if ignore_predicate:
max_conf[(row[SUBJECT_ID], row[OBJECT_ID])] = row[CONFIDENCE]
Expand Down Expand Up @@ -586,13 +592,15 @@ def merge_msdf(

merged_msdf = MappingSetDataFrame()
# If msdf2 has a DataFrame
if msdf2.df is not None:
if msdf1.df is not None and msdf2.df is not None:
# 'outer' join in pandas == FULL JOIN in SQL
merged_msdf.df = msdf1.df.merge(msdf2.df, how="outer")
else:
merged_msdf.df = msdf1.df
# merge the non DataFrame elements
merged_msdf.prefixmap = dict_merge(msdf2.prefixmap, msdf1.prefixmap, "prefixmap")
merged_msdf.prefixmap = dict_merge(
source=msdf2.prefixmap, target=msdf1.prefixmap, dict_name="prefixmap"
)
# After a Slack convo with @matentzn, commented out below.
# merged_msdf.metadata = dict_merge(msdf2.metadata, msdf1.metadata, 'metadata')

Expand Down Expand Up @@ -677,15 +685,13 @@ def deal_with_negation(df: pd.DataFrame) -> pd.DataFrame:
)[CONFIDENCE].max()

# If same confidence prefer "HumanCurated".
reconciled_df_subset: pd.DataFrame
reconciled_df_subset = pd.DataFrame(columns=combined_normalized_subset.columns)
for _, row_1 in max_confidence_df.iterrows():
match_condition_1 = (
(combined_normalized_subset[SUBJECT_ID] == row_1[SUBJECT_ID])
& (combined_normalized_subset[OBJECT_ID] == row_1[OBJECT_ID])
& (combined_normalized_subset[CONFIDENCE] == row_1[CONFIDENCE])
)
match_condition_1: Union[bool, ...]
# match_condition_1[match_condition_1] gives the list of 'True's.
# In other words, the rows that match the condition (rules declared).
# Ideally, there should be 1 row. If not apply an extra rule to look for 'HumanCurated'.
Expand Down Expand Up @@ -715,12 +721,10 @@ def deal_with_negation(df: pd.DataFrame) -> pd.DataFrame:
& (reconciled_df_subset[OBJECT_ID] == row_2[OBJECT_ID])
& (reconciled_df_subset[CONFIDENCE] == row_2[CONFIDENCE])
)
match_condition_2: Union[bool, ...]
reconciled_df_subset.loc[
match_condition_2[match_condition_2].index, PREDICATE_ID
] = row_2[PREDICATE_ID]

reconciled_df: pd.DataFrame
reconciled_df = pd.DataFrame(columns=df.columns)
for _, row_3 in reconciled_df_subset.iterrows():
match_condition_3 = (
Expand All @@ -729,28 +733,31 @@ def deal_with_negation(df: pd.DataFrame) -> pd.DataFrame:
& (df[CONFIDENCE] == row_3[CONFIDENCE])
& (df[PREDICATE_ID] == row_3[PREDICATE_ID])
)
match_condition_3: Union[bool, ...]
reconciled_df = reconciled_df.append(
df.loc[match_condition_3[match_condition_3].index, :]
)
return_df = reconciled_df.append(nan_df).drop_duplicates()
return return_df


def dict_merge(source: Dict, target: Dict, dict_name: str) -> Dict:
def dict_merge(
*,
source: Optional[Dict[str, Any]] = None,
target: Dict[str, Any],
dict_name: str,
) -> Dict[str, Any]:
"""
Takes 2 MappingSetDataFrame elements (prefixmap OR metadata) and merges source => target

Args:
source (Dict): MappingSetDataFrame.prefixmap / MappingSetDataFrame.metadata
target (Dict): MappingSetDataFrame.prefixmap / MappingSetDataFrame.metadata
dict_name (str): prefixmap or metadata
source: MappingSetDataFrame.prefixmap / MappingSetDataFrame.metadata
target: MappingSetDataFrame.prefixmap / MappingSetDataFrame.metadata
dict_name: prefixmap or metadata

Returns:
Dict: merged MappingSetDataFrame.prefixmap / MappingSetDataFrame.metadata
"""
if source is not None:
k: str
for k, v in source.items():
if k not in target:
if v not in list(target.values()):
Expand All @@ -777,7 +784,7 @@ def inject_metadata_into_df(msdf: MappingSetDataFrame) -> MappingSetDataFrame:
Returns:
MappingSetDataFrame: MappingSetDataFrame with metadata as columns
"""
if bool(msdf.metadata):
if msdf.metadata is not None and msdf.df is not None:
for k, v in msdf.metadata.items():
if k not in msdf.df.columns:
msdf.df[k] = v
Expand Down Expand Up @@ -878,13 +885,14 @@ def to_mapping_set_dataframe(doc: MappingSetDocument) -> MappingSetDataFrame:
# convert MappingSetDocument into MappingSetDataFrame
###
data = []
for mapping in doc.mapping_set.mappings:
mdict = mapping.__dict__
m = {}
for key in mdict:
if mdict[key]:
m[key] = mdict[key]
data.append(m)
if doc.mapping_set.mappings is not None:
cthoyt marked this conversation as resolved.
Show resolved Hide resolved
for mapping in doc.mapping_set.mappings:
mdict = mapping.__dict__
m = {}
for key in mdict:
if mdict[key]:
m[key] = mdict[key]
data.append(m)
df = pd.DataFrame(data=data)
meta = extract_global_metadata(doc)
meta.pop("curie_map", None)
Expand Down
6 changes: 6 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,9 @@ deps =
flake8-bugbear
flake8-isort
description = Run the flake8 code quality checker.

[testenv:mypy]
deps = mypy
skip_install = true
commands = mypy --install-types --non-interactive --ignore-missing-imports sssom/ setup.py
description = Run the mypy tool to check static typing on the project.