diff --git a/.github/workflows/qc.yml b/.github/workflows/qc.yml
index 4c599ee4..95a4425e 100644
--- a/.github/workflows/qc.yml
+++ b/.github/workflows/qc.yml
@@ -27,5 +27,7 @@ jobs:
         pip install --upgrade tox
     - name: Lint with flake8
       run: tox -e flake8
+    - name: Test with MyPy
+      run: tox -e mypy
     - name: Test with pytest
       run: tox -e py
diff --git a/.gitignore b/.gitignore
index d53a9366..e8667fc6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,4 +19,4 @@ sphinx/_build
 sphinx/_static
 sphinx/_templates
 docs/_build
-.vscode/
+.vscode/*
diff --git a/Makefile b/Makefile
index 4b55a4f7..ca7fcc13 100644
--- a/Makefile
+++ b/Makefile
@@ -74,6 +74,11 @@ lint:
 	pip install tox
 	tox -e lint
 
+.PHONY: mypy
+mypy:
+	pip install tox
+	tox -e mypy
+
 .PHONY: sphinx
 sphinx:
 	cd sphinx &&\
diff --git a/pyproject.toml b/pyproject.toml
index 5ad18245..61bf6afe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,3 +5,10 @@ target-version = ['py36', 'py37', 'py38', 'py39']
 [tool.isort]
 profile = "black"
 multi_line_output = 3
+
+[[tool.mypy.overrides]]
+module = [
+    'sssom.sssom_datamodel',
+    'sssom.cliquesummary'
+]
+ignore_errors = true
diff --git a/sssom/cli.py b/sssom/cli.py
index a4ea72b5..be1b9dad 100644
--- a/sssom/cli.py
+++ b/sssom/cli.py
@@ -2,7 +2,7 @@
 import re
 import sys
 from pathlib import Path
-from typing import Dict, List, TextIO, Tuple
+from typing import Dict, List, Sequence, TextIO, Tuple
 
 import click
 import pandas as pd
@@ -313,25 +313,9 @@ def sparql(
     prefix: List[Dict[str, str]],
     output: TextIO,
 ):
-    """Run a SPARQL query.
-
-    Args:
-
-        url (str):
-        config (str):
-        graph (str):
-        limit (int):
-        object_labels (bool):
-        prefix (List):
-        output (str): Output TSV/SSSOM file.
-
-
-    Returns:
-
-        None.
-    """
-
-    endpoint = EndpointConfig()
+    """Run a SPARQL query."""
+    # FIXME this usage needs _serious_ refactoring
+    endpoint = EndpointConfig()  # type: ignore
     if config is not None:
         for k, v in yaml.safe_load(config).items():
             setattr(endpoint, k, v)
@@ -377,9 +361,16 @@ def diff(inputs: Tuple[str, str], output: TextIO):
     msdf1 = read_sssom_table(input1)
     msdf2 = read_sssom_table(input2)
     d = compare_dataframes(msdf1.df, msdf2.df)
-    logging.info(
-        f"COMMON: {len(d.common_tuples)} UNIQUE_1: {len(d.unique_tuples1)} UNIQUE_2: {len(d.unique_tuples2)}"
-    )
+    if d.combined_dataframe is None:
+        raise RuntimeError
+    if (
+        d.common_tuples is not None
+        and d.unique_tuples1 is not None
+        and d.unique_tuples2 is not None
+    ):
+        logging.info(
+            f"COMMON: {len(d.common_tuples)} UNIQUE_1: {len(d.unique_tuples1)} UNIQUE_2: {len(d.unique_tuples2)}"
+        )
     d.combined_dataframe.to_csv(output, sep="\t", index=False)
 
 
@@ -549,7 +540,7 @@ def correlations(input: str, output: TextIO, transpose: bool, fields: Tuple):
     help="Boolean indicating the need for reconciliation of the SSSOM tsv file.",
 )
 @output_option
-def merge(inputs: Tuple[str, str], output: TextIO, reconcile: bool = True):
+def merge(inputs: Sequence[str], output: TextIO, reconcile: bool = True):
     """
     Merging msdf2 into msdf1,
         if reconcile=True, then dedupe(remove redundant lower confidence mappings) and
diff --git a/sssom/cliques.py b/sssom/cliques.py
index df01594d..1f401271 100644
--- a/sssom/cliques.py
+++ b/sssom/cliques.py
@@ -1,12 +1,12 @@
-import collections
 import hashlib
 import statistics
+from typing import Any, Dict
 
 import networkx as nx
 import pandas as pd
 
 from .parsers import to_mapping_set_document
-from .sssom_datamodel import MappingSet
+from .sssom_datamodel import Mapping, MappingSet
 from .sssom_document import MappingSetDocument
 from .util import MappingSetDataFrame
 
@@ -18,41 +18,44 @@ def to_networkx(msdf: MappingSetDataFrame) -> nx.DiGraph:
     # m = {
     #    "owl:subClassOf",
     # }
-    for mapping in doc.mapping_set.mappings:
-        s = mapping.subject_id
-        o = mapping.object_id
-        p = mapping.predicate_id
-        # TODO: this is copypastad from export_ptable
-
-        pi = None
-
-        if p == "owl:equivalentClass":
-            pi = 2
-        elif p == "skos:exactMatch":
-            pi = 2
-        elif p == "skos:closeMatch":
-            # TODO: consider distributing
-            pi = 2
-        elif p == "owl:subClassOf":
-            pi = 0
-        elif p == "skos:broadMatch":
-            pi = 0
-        elif p == "inverseOf(owl:subClassOf)":
-            pi = 1
-        elif p == "skos:narrowMatch":
-            pi = 1
-        elif p == "owl:differentFrom":
-            pi = 3
-        elif p == "dbpedia-owl:different":
-            pi = 3
-
-        if pi == 0:
-            g.add_edge(o, s)
-        elif pi == 1:
-            g.add_edge(s, o)
-        elif pi == 2:
-            g.add_edge(s, o)
-            g.add_edge(o, s)
+    if doc.mapping_set.mappings is not None:
+        for mapping in doc.mapping_set.mappings:
+            if not isinstance(mapping, Mapping):
+                raise TypeError
+            s = mapping.subject_id
+            o = mapping.object_id
+            p = mapping.predicate_id
+            # TODO: this is copypastad from export_ptable
+
+            pi = None
+
+            if p == "owl:equivalentClass":
+                pi = 2
+            elif p == "skos:exactMatch":
+                pi = 2
+            elif p == "skos:closeMatch":
+                # TODO: consider distributing
+                pi = 2
+            elif p == "owl:subClassOf":
+                pi = 0
+            elif p == "skos:broadMatch":
+                pi = 0
+            elif p == "inverseOf(owl:subClassOf)":
+                pi = 1
+            elif p == "skos:narrowMatch":
+                pi = 1
+            elif p == "owl:differentFrom":
+                pi = 3
+            elif p == "dbpedia-owl:different":
+                pi = 3
+
+            if pi == 0:
+                g.add_edge(o, s)
+            elif pi == 1:
+                g.add_edge(s, o)
+            elif pi == 2:
+                g.add_edge(s, o)
+                g.add_edge(o, s)
     return g
 
 
@@ -65,7 +68,6 @@ def split_into_cliques(msdf: MappingSetDataFrame):
     comp_id = 0
     newdocs = []
     for comp in sorted(gen, key=len, reverse=True):
-        comp: collections.Iterable
         for n in comp:
             node_to_comp[n] = comp_id
         comp_id += 1
@@ -75,15 +77,21 @@ def split_into_cliques(msdf: MappingSetDataFrame):
             )
         )
 
+    if not isinstance(doc.mapping_set.mappings, list):
+        raise TypeError
     for m in doc.mapping_set.mappings:
+        if not isinstance(m, Mapping):
+            raise TypeError
         comp_id = node_to_comp[m.subject_id]
         subdoc = newdocs[comp_id]
+        if not isinstance(subdoc.mapping_set.mappings, list):
+            raise TypeError
         subdoc.mapping_set.mappings.append(m)
     return newdocs
 
 
-def invert_dict(d: dict) -> dict:
-    invdict = {}
+def invert_dict(d: Dict[str, str]) -> Dict[str, str]:
+    invdict: Dict[str, Any] = {}
     for k, v in d.items():
         if v not in invdict:
             invdict[v] = []
diff --git a/sssom/context.py b/sssom/context.py
index b3391be7..737ec497 100644
--- a/sssom/context.py
+++ b/sssom/context.py
@@ -1,13 +1,16 @@
 import json
 import logging
-from typing import Any, Mapping, Tuple
+from typing import Optional
 
 from .external_context import sssom_external_context
 from .internal_context import sssom_context
+from .typehints import Metadata, MetadataType, PrefixMap
 
 # HERE = pathlib.Path(__file__).parent.resolve()
 # DEFAULT_CONTEXT_PATH = HERE / "sssom.context.jsonld"
 # EXTERNAL_CONTEXT_PATH = HERE / "sssom.external.context.jsonld"
+
+
 SSSOM_BUILT_IN_PREFIXES = ["sssom", "owl", "rdf", "rdfs", "skos"]
 
 
@@ -19,7 +22,7 @@ def get_external_jsonld_context():
     return json.loads(sssom_external_context, strict=False)
 
 
-def get_built_in_prefix_map():
+def get_built_in_prefix_map() -> PrefixMap:
     contxt = get_jsonld_context()
     curie_map = {}
     for key in contxt["@context"]:
@@ -30,7 +33,9 @@ def get_built_in_prefix_map():
     return curie_map
 
 
-def add_built_in_prefixes_to_prefix_map(prefixmap):
+def add_built_in_prefixes_to_prefix_map(
+    prefixmap: Optional[PrefixMap] = None,
+) -> PrefixMap:
     builtinmap = get_built_in_prefix_map()
     if not prefixmap:
         prefixmap = builtinmap
@@ -45,27 +50,27 @@ def add_built_in_prefixes_to_prefix_map(prefixmap):
     return prefixmap
 
 
-def get_default_metadata() -> Tuple[Mapping[str, Any], Mapping[str, Any]]:
+def get_default_metadata() -> Metadata:
     contxt = get_jsonld_context()
     contxt_external = get_external_jsonld_context()
-    curie_map = {}
-    meta = {}
+    prefix_map = {}
+    metadata: MetadataType = {}
     for key in contxt["@context"]:
         v = contxt["@context"][key]
         if isinstance(v, str):
-            curie_map[key] = v
+            prefix_map[key] = v
         elif isinstance(v, dict):
             if "@id" in v and "@prefix" in v:
                 if v["@prefix"]:
-                    curie_map[key] = v["@id"]
+                    prefix_map[key] = v["@id"]
     for key in contxt_external["@context"]:
         v = contxt_external["@context"][key]
         if isinstance(v, str):
-            if key not in curie_map:
-                curie_map[key] = v
+            if key not in prefix_map:
+                prefix_map[key] = v
             else:
-                if curie_map[key] != v:
+                if prefix_map[key] != v:
                     logging.warning(
-                        f"{key} is already in curie map ({curie_map[key]}, but with a different value than {v}"
+                        f"{key} is already in curie map ({prefix_map[key]}, but with a different value than {v}"
                     )
-    return meta, curie_map
+    return Metadata(prefix_map=prefix_map, metadata=metadata)
diff --git a/sssom/io.py b/sssom/io.py
index 8215d25d..7ade31d7 100644
--- a/sssom/io.py
+++ b/sssom/io.py
@@ -3,6 +3,7 @@
 
 from .context import get_default_metadata
 from .parsers import get_parsing_function, read_sssom_table, split_dataframe
+from .typehints import Metadata
 from .util import raise_for_bad_path, read_metadata
 from .writers import get_writer_function, write_table, write_tables
 
@@ -24,7 +25,8 @@ def convert_file(
     write_func, fileformat = get_writer_function(
         output_format=output_format, output=output
     )
-    write_func(doc, output, serialisation=fileformat)
+    # TODO cthoyt figure out how to use protocols for this
+    write_func(doc, output, serialisation=fileformat)  # type:ignore
 
 
 def parse_file(
@@ -48,11 +50,13 @@ def parse_file(
         clean_prefixes: If True (default), records with unknown prefixes are removed from the SSSOM file.
     """
     raise_for_bad_path(input_path)
-    meta, curie_map = get_metadata_and_curie_map(
+    metadata = get_metadata_and_curie_map(
         metadata_path=metadata_path, curie_map_mode=curie_map_mode
     )
     parse_func = get_parsing_function(input_format, input_path)
-    doc = parse_func(input_path, curie_map=curie_map, meta=meta)
+    doc = parse_func(
+        input_path, curie_map=metadata.prefix_map, meta=metadata.prefix_map
+    )
     if clean_prefixes:
         # We do this because we got a lot of prefixes from the default SSSOM prefixes!
         doc.clean_prefix_map()
@@ -92,8 +96,8 @@ def split_file(input_path: str, output_directory: str) -> None:
 
 
 def get_metadata_and_curie_map(
-    metadata_path: Optional[str] = None, curie_map_mode: str = "metadata_only"
-):
+    metadata_path: Optional[str] = None, curie_map_mode: Optional[str] = None
+) -> Metadata:
     """
     Load SSSOM metadata from a file, and then augments it with default prefixes.
 
@@ -103,15 +107,17 @@ def get_metadata_and_curie_map(
     """
     if metadata_path is None:
         return get_default_metadata()
-
-    meta, curie_map = read_metadata(metadata_path)
+    if curie_map_mode is None:
+        curie_map_mode = "metadata_only"
+    prefix_map, metadata = read_metadata(metadata_path)
+    # TODO reduce complexity by flipping conditionals
+    #  and returning eagerly (it's fine if there are multiple returns)
     if curie_map_mode != "metadata_only":
         meta_sssom, curie_map_sssom = get_default_metadata()
         if curie_map_mode == "sssom_default_only":
-            curie_map = curie_map_sssom
+            prefix_map = curie_map_sssom
         elif curie_map_mode == "merged":
             for prefix, uri_prefix in curie_map_sssom.items():
-                if prefix not in curie_map:
-                    curie_map[prefix] = uri_prefix
-
-    return meta, curie_map
+                if prefix not in prefix_map:
+                    prefix_map[prefix] = uri_prefix
+    return Metadata(prefix_map=prefix_map, metadata=metadata)
diff --git a/sssom/parsers.py b/sssom/parsers.py
index 80af85c7..82147bfd 100644
--- a/sssom/parsers.py
+++ b/sssom/parsers.py
@@ -2,7 +2,8 @@
 import logging
 import re
 import typing
-from typing import Any, Dict, Optional, Set, TextIO, Union
+from collections import Counter
+from typing import Any, Dict, List, Optional, Set, TextIO, Union, cast
 from urllib.request import urlopen
 from xml.dom import Node, minidom
 from xml.dom.minidom import Document
@@ -17,6 +18,7 @@
 from .context import add_built_in_prefixes_to_prefix_map, get_default_metadata
 from .sssom_datamodel import Mapping, MappingSet
 from .sssom_document import MappingSetDocument
+from .typehints import Metadata, MetadataType, PrefixMap
 from .util import (
     SSSOM_DEFAULT_RDF_SERIALISATION,
     URI_SSSOM_MAPPINGS,
@@ -33,7 +35,9 @@
 
 
 def read_sssom_table(
-    file_path: str, curie_map: Dict[str, str] = None, meta: Dict[str, str] = None
+    file_path: str,
+    curie_map: Optional[PrefixMap] = None,
+    meta: Optional[MetadataType] = None,
 ) -> MappingSetDataFrame:
     """
     parses a TSV -> MappingSetDocument -> MappingSetDataFrame
@@ -76,14 +80,14 @@ def read_sssom_rdf(
     parses a TSV -> MappingSetDocument -> MappingSetDataFrame
     """
     raise_for_bad_path(file_path)
-    curie_map, meta = _get_curie_map_and_metadata(curie_map=curie_map, meta=meta)
+    metadata = _get_curie_map_and_metadata(curie_map=curie_map, meta=meta)
 
     g = Graph()
     g.load(file_path, format=serialisation)
     # json_obj = json.loads(g.serialize(format="json-ld"))
     # print(json_obj)
     # msdf = from_sssom_json(json_obj, curie_map=curie_map, meta=meta)
-    msdf = from_sssom_rdf(g, curie_map=curie_map, meta=meta)
+    msdf = from_sssom_rdf(g, curie_map=metadata.prefix_map, meta=metadata.metadata)
     return msdf
 
 
@@ -94,11 +98,13 @@ def read_sssom_json(
     parses a TSV -> MappingSetDocument -> MappingSetDataFrame
     """
     raise_for_bad_path(file_path)
-    curie_map, meta = _get_curie_map_and_metadata(curie_map=curie_map, meta=meta)
+    metadata = _get_curie_map_and_metadata(curie_map=curie_map, meta=meta)
 
     with open(file_path) as json_file:
         jsondoc = json.load(json_file)
-    msdf = from_sssom_json(jsondoc=jsondoc, curie_map=curie_map, meta=meta)
+    msdf = from_sssom_json(
+        jsondoc=jsondoc, curie_map=metadata.prefix_map, meta=metadata.metadata
+    )
     return msdf
 
 
@@ -117,48 +123,54 @@ def read_obographs_json(
     """
     raise_for_bad_path(file_path)
 
-    curie_map, meta = _get_curie_map_and_metadata(curie_map=curie_map, meta=meta)
+    _xmetadata = _get_curie_map_and_metadata(curie_map=curie_map, meta=meta)
 
     with open(file_path) as json_file:
         jsondoc = json.load(json_file)
 
-    return from_obographs(jsondoc, curie_map, meta)
+    return from_obographs(
+        jsondoc, curie_map=_xmetadata.prefix_map, meta=_xmetadata.metadata
+    )
 
 
-def _get_curie_map_and_metadata(curie_map: Dict, meta: Dict):
-    default_meta, default_curie_map = get_default_metadata()
+def _get_curie_map_and_metadata(
+    curie_map: Optional[PrefixMap] = None, meta: Optional[MetadataType] = None
+) -> Metadata:
+    default_metadata = get_default_metadata()
 
-    if not curie_map:
+    if curie_map is None:
         logging.warning(
             "No curie map provided (not recommended), trying to use defaults.."
         )
-        curie_map = default_curie_map
+        curie_map = default_metadata.prefix_map
 
-    if not meta:
-        meta = default_meta
+    if meta is None:
+        meta = default_metadata.metadata
     else:
         if curie_map and "curie_map" in meta:
             logging.info(
-                "Curie map prvoided as parameter, but SSSOM file provides its own CURIE map. "
+                "Curie map provided as parameter, but SSSOM file provides its own CURIE map. "
                 "CURIE map provided externally is disregarded in favour of the curie map in the SSSOM file."
             )
-            curie_map = meta["curie_map"]
+            curie_map = cast(PrefixMap, meta["curie_map"])
 
-    return curie_map, meta
+    return Metadata(prefix_map=curie_map, metadata=meta)
 
 
 def read_alignment_xml(
-    file_path: str, curie_map: Dict[str, str] = None, meta: Dict[str, str] = None
+    file_path: str, curie_map: Dict[str, str], meta: Dict[str, str]
 ) -> MappingSetDataFrame:
     """
     parses a TSV -> MappingSetDocument -> MappingSetDataFrame
     """
     raise_for_bad_path(file_path)
 
-    curie_map, meta = _get_curie_map_and_metadata(curie_map=curie_map, meta=meta)
+    metadata = _get_curie_map_and_metadata(curie_map=curie_map, meta=meta)
     logging.info("Loading from alignment API")
     xmldoc = minidom.parse(file_path)
-    msdf = from_alignment_minidom(xmldoc, curie_map, meta)
+    msdf = from_alignment_minidom(
+        xmldoc, curie_map=metadata.prefix_map, meta=metadata.metadata
+    )
     return msdf
 
 
@@ -166,7 +178,10 @@ def read_alignment_xml(
 
 
 def from_sssom_dataframe(
-    df: pd.DataFrame, curie_map: Dict[str, str], meta: Dict[str, str]
+    df: pd.DataFrame,
+    *,
+    curie_map: Optional[PrefixMap] = None,
+    meta: Optional[MetadataType] = None,
 ) -> MappingSetDataFrame:
     """
     Converts a dataframe to a MappingSetDataFrame
@@ -175,15 +190,14 @@ def from_sssom_dataframe(
     :param meta:
     :return: MappingSetDataFrame
     """
-
-    _check_curie_map(curie_map)
+    curie_map = _ensure_prefix_map(curie_map)
 
     if "confidence" in df.columns:
         df["confidence"].replace(r"^\s*$", np.NaN, regex=True, inplace=True)
 
-    mlist = []
+    mlist: List[Mapping] = []
     ms = MappingSet()
-    bad_attrs = {}
+    bad_attrs: typing.Counter[str] = Counter()
     for _, row in df.iterrows():
         mdict = {}
         for k, v in row.items():
@@ -199,16 +213,14 @@ def from_sssom_dataframe(
                 ms[k] = v
                 ok = True
             if not ok:
-                if k not in bad_attrs:
-                    bad_attrs[k] = 1
-                else:
-                    bad_attrs[k] += 1
-        m = _prepare_mapping(Mapping(**mdict))
+                bad_attrs[k] += 1
+        mlist.append(_prepare_mapping(Mapping(**mdict)))
 
-        mlist.append(m)
-    for k, v in bad_attrs.items():
+    for k, v in bad_attrs.most_common():
         logging.warning(f"No attr for {k} [{v} instances]")
-    ms.mappings = mlist
+    # the autogenerated code's type annotations are _really_ messy. This is in fact okay,
+    # so with a heavy heart we employ type:ignore
+    ms.mappings = mlist  # type:ignore
     _set_metadata_in_mapping_set(mapping_set=ms, metadata=meta)
     doc = MappingSetDocument(mapping_set=ms, curie_map=curie_map)
     return to_mapping_set_dataframe(doc)
@@ -216,14 +228,14 @@ def from_sssom_dataframe(
 
 def from_sssom_rdf(
     g: Graph,
-    curie_map: Dict[str, str] = None,
-    meta: Dict[str, str] = None,
-    mapping_predicates: Set[str] = None,
+    curie_map: Optional[PrefixMap] = None,
+    meta: Optional[MetadataType] = None,
+    mapping_predicates: Optional[Set[str]] = None,
 ) -> MappingSetDataFrame:
     """
     Converts an SSSOM RDF graph into a SSSOM data table
     Args:
-        g: the Grah (rdflib)
+        g: the Graph (rdflib)
         curie_map: A dictionary conatining the prefix map
         meta: Potentially additional metadata
         mapping_predicates: A set of predicates that should be extracted from the RDF graph
@@ -231,14 +243,14 @@ def from_sssom_rdf(
     Returns:
 
     """
-    curie_map = _check_curie_map(curie_map)
+    curie_map = _ensure_prefix_map(curie_map)
 
     if mapping_predicates is None:
         # FIXME unused
         mapping_predicates = _get_default_mapping_predicates()
 
     ms = MappingSet()
-    mlist = []
+    mlist: List[Mapping] = []
 
     for sx, px, ox in g.triples((None, URIRef(URI_SSSOM_MAPPINGS), None)):
         mdict = {}
@@ -283,28 +295,30 @@ def from_sssom_rdf(
                 f"This usually happens when a critical curie_map entry is missing."
             )
 
-    ms.mappings = mlist
+    ms.mappings = mlist  # type: ignore
     _set_metadata_in_mapping_set(mapping_set=ms, metadata=meta)
     mdoc = MappingSetDocument(mapping_set=ms, curie_map=curie_map)
     return to_mapping_set_dataframe(mdoc)
 
 
 def from_sssom_json(
-    jsondoc: Union[str, dict, TextIO], curie_map: Dict, meta: Dict[str, str] = None
+    jsondoc: Union[str, dict, TextIO],
+    *,
+    curie_map: Dict[str, str],
+    meta: Dict[str, str] = None,
 ) -> MappingSetDataFrame:
-    _check_curie_map(curie_map)
+    _ensure_prefix_map(curie_map)
 
     # noinspection PyTypeChecker
     ms = JSONLoader().load(source=jsondoc, target_class=MappingSet)
 
     _set_metadata_in_mapping_set(ms, metadata=meta)
-    ms: MappingSet
     mdoc = MappingSetDocument(mapping_set=ms, curie_map=curie_map)
     return to_mapping_set_dataframe(mdoc)
 
 
 def from_alignment_minidom(
-    dom: Document, curie_map: Dict[str, str] = None, meta: Dict[str, str] = None
+    dom: Document, *, curie_map: PrefixMap, meta: MetadataType
 ) -> MappingSetDataFrame:
     """
     Reads a minidom Document object
@@ -313,10 +327,11 @@ def from_alignment_minidom(
     :param meta: Optional meta data
     :return: MappingSetDocument
     """
-    _check_curie_map(curie_map)
+    # FIXME: should be curie_map =  _check_curie_map(curie_map)
+    _ensure_prefix_map(curie_map)
 
     ms = MappingSet()
-    mlist = []
+    mlist: List[Mapping] = []
     # bad_attrs = {}
 
     alignments = dom.getElementsByTagName("Alignment")
@@ -351,14 +366,14 @@ def from_alignment_minidom(
                 elif node_name == "uri2":
                     ms["object_source"] = e.firstChild.nodeValue
 
-    ms.mappings = mlist
+    ms.mappings = mlist  # type: ignore
     _set_metadata_in_mapping_set(mapping_set=ms, metadata=meta)
     mdoc = MappingSetDocument(mapping_set=ms, curie_map=curie_map)
     return to_mapping_set_dataframe(mdoc)
 
 
 def from_obographs(
-    jsondoc: Dict, curie_map: Dict[str, str], meta: Dict[str, str] = None
+    jsondoc: Dict, *, curie_map: PrefixMap, meta: Optional[MetadataType] = None
 ) -> MappingSetDataFrame:
     """
     Converts a obographs json object to an SSSOM data frame
@@ -372,10 +387,10 @@ def from_obographs(
         An SSSOM data frame (MappingSetDataFrame)
 
     """
-    _check_curie_map(curie_map)
+    _ensure_prefix_map(curie_map)
 
     ms = MappingSet()
-    mlist = []
+    mlist: List[Mapping] = []
     # bad_attrs = {}
 
     allowed_properties = [
@@ -436,7 +451,7 @@ def from_obographs(
     else:
         raise Exception("No graphs element in obographs file, wrong format?")
 
-    ms.mappings = mlist
+    ms.mappings = mlist  # type: ignore
     _set_metadata_in_mapping_set(mapping_set=ms, metadata=meta)
     mdoc = MappingSetDocument(mapping_set=ms, curie_map=curie_map)
     return to_mapping_set_dataframe(mdoc)
@@ -463,7 +478,7 @@ def get_parsing_function(input_format, filename):
         raise Exception(f"Unknown input format: {input_format}")
 
 
-def _check_curie_map(curie_map):
+def _ensure_prefix_map(curie_map: Optional[PrefixMap] = None) -> PrefixMap:
     if not curie_map:
         raise Exception("No valid curie_map provided")
     else:
@@ -508,9 +523,9 @@ def _swap_object_subject(mapping: Mapping) -> Mapping:
     return mapping
 
 
-def _read_metadata_from_table(filename: str) -> typing.Mapping[str, Any]:
-    if validators.url(filename):
-        response = urlopen(filename)
+def _read_metadata_from_table(path: str) -> Dict[str, Any]:
+    if validators.url(path):
+        response = urlopen(path)
         yamlstr = ""
         for lin in response:
             line = lin.decode("utf-8")
@@ -519,9 +534,9 @@ def _read_metadata_from_table(filename: str) -> typing.Mapping[str, Any]:
             else:
                 break
     else:
-        with open(filename, "r") as s:
+        with open(path) as file:
             yamlstr = ""
-            for line in s:
+            for line in file:
                 if line.startswith("#"):
                     yamlstr += re.sub("^#", "", line)
                 else:
@@ -537,8 +552,10 @@ def _is_valid_mapping(m: Mapping) -> bool:
     return bool(m.predicate_id and m.object_id and m.subject_id)
 
 
-def _set_metadata_in_mapping_set(mapping_set: MappingSet, metadata: dict) -> None:
-    if not metadata:
+def _set_metadata_in_mapping_set(
+    mapping_set: MappingSet, metadata: Optional[MetadataType] = None
+) -> None:
+    if metadata is None:
         logging.info("Tried setting metadata but none provided.")
     else:
         for k, v in metadata.items():
@@ -546,7 +563,7 @@ def _set_metadata_in_mapping_set(mapping_set: MappingSet, metadata: dict) -> Non
                 mapping_set[k] = v
 
 
-def _cell_element_values(cell_node, curie_map: dict) -> Optional[Mapping]:
+def _cell_element_values(cell_node, curie_map: PrefixMap) -> Optional[Mapping]:
     mdict = {}
     for child in cell_node.childNodes:
         if child.nodeType == Node.ELEMENT_NODE:
@@ -577,6 +594,8 @@ def _cell_element_values(cell_node, curie_map: dict) -> Optional[Mapping]:
     m = Mapping(**mdict)
     if _is_valid_mapping(m):
         return m
+    else:
+        return None
 
 
 # The following methods dont really belong in the parser package..
@@ -587,44 +606,47 @@ def to_mapping_set_document(msdf: MappingSetDataFrame) -> MappingSetDocument:
     if not msdf.prefixmap:
         raise Exception("No valid curie_map provided")
 
-    mlist = []
+    mlist: List[Mapping] = []
     ms = MappingSet()
     bad_attrs = {}
-    for _, row in msdf.df.iterrows():
-        mdict = {}
-        for k, v in row.items():
-            ok = False
-            if k:
-                k = str(k)
-            if hasattr(Mapping, k):
-                mdict[k] = v
-                ok = True
-            if hasattr(MappingSet, k):
-                ms[k] = v
-                ok = True
-            if not ok:
-                if k not in bad_attrs:
-                    bad_attrs[k] = 1
-                else:
-                    bad_attrs[k] += 1
-        m = _prepare_mapping(Mapping(**mdict))
-        mlist.append(m)
+    if msdf.df is not None:
+        for _, row in msdf.df.iterrows():
+            mdict = {}
+            for k, v in row.items():
+                ok = False
+                if k:
+                    k = str(k)
+                if hasattr(Mapping, k):
+                    mdict[k] = v
+                    ok = True
+                if hasattr(MappingSet, k):
+                    ms[k] = v
+                    ok = True
+                if not ok:
+                    if k not in bad_attrs:
+                        bad_attrs[k] = 1
+                    else:
+                        bad_attrs[k] += 1
+            m = _prepare_mapping(Mapping(**mdict))
+            mlist.append(m)
     for k, v in bad_attrs.items():
         logging.warning(f"No attr for {k} [{v} instances]")
-    ms.mappings = mlist
-    for k, v in msdf.metadata.items():
-        if k != "curie_map":
-            ms[k] = v
+    ms.mappings = mlist  # type: ignore
+    if msdf.metadata is not None:
+        for k, v in msdf.metadata.items():
+            if k != "curie_map":
+                ms[k] = v
     return MappingSetDocument(mapping_set=ms, curie_map=msdf.prefixmap)
 
 
 def split_dataframe(
     msdf: MappingSetDataFrame,
 ) -> typing.Mapping[str, MappingSetDataFrame]:
-    df = msdf.df
-    subject_prefixes = set(df["subject_id"].str.split(":", 1, expand=True)[0])
-    object_prefixes = set(df["object_id"].str.split(":", 1, expand=True)[0])
-    relations = set(df["predicate_id"])
+    if msdf.df is None:
+        raise RuntimeError
+    subject_prefixes = set(msdf.df["subject_id"].str.split(":", 1, expand=True)[0])
+    object_prefixes = set(msdf.df["object_id"].str.split(":", 1, expand=True)[0])
+    relations = set(msdf.df["predicate_id"])
     return split_dataframe_by_prefix(
         msdf=msdf,
         subject_prefixes=subject_prefixes,
@@ -654,12 +676,12 @@ def split_dataframe_by_prefix(
                 relpre = rel.split(":")[0]
                 relppost = rel.split(":")[1]
                 split_name = f"{pre_subj.lower()}_{relppost.lower()}_{pre_obj.lower()}"
-
-                dfs = df[
-                    (df["subject_id"].str.startswith(pre_subj + ":"))
-                    & (df["predicate_id"] == rel)
-                    & (df["object_id"].str.startswith(pre_obj + ":"))
-                ]
+                if df is not None:
+                    dfs = df[
+                        (df["subject_id"].str.startswith(pre_subj + ":"))
+                        & (df["predicate_id"] == rel)
+                        & (df["object_id"].str.startswith(pre_obj + ":"))
+                    ]
                 if pre_subj in curie_map and pre_obj in curie_map and len(dfs) > 0:
                     cm = {
                         pre_subj: curie_map[pre_subj],
diff --git a/sssom/rdf_util.py b/sssom/rdf_util.py
index 599def70..031fcf39 100644
--- a/sssom/rdf_util.py
+++ b/sssom/rdf_util.py
@@ -1,38 +1,42 @@
 import logging
-from typing import List
+from typing import Any, Dict, List, Optional
 
 from rdflib import Graph, URIRef
-from rdflib.plugins.memory import Any
 
 from .parsers import to_mapping_set_document
-from .sssom_datamodel import Mapping
+from .sssom_datamodel import EntityId, Mapping
 from .util import MappingSetDataFrame
 
 
 def rewire_graph(
     g: Graph,
     mset: MappingSetDataFrame,
-    subject_to_object=True,
-    precedence: List[str] = None,
-) -> str:
+    subject_to_object: bool = True,
+    precedence: Optional[List[str]] = None,
+) -> int:
     """
     rewires an RDF Graph replacing using equivalence mappings
     """
     pm = mset.prefixmap
     mdoc = to_mapping_set_document(mset)
-    rewire_map = {}
+    rewire_map: Dict[EntityId, EntityId] = {}
 
-    def expand_curie(curie: str):
+    def expand_curie(curie: str) -> URIRef:
         pfx, local = curie.split(":")
         return URIRef(f"{pm[pfx]}{local}")
 
+    if mdoc.mapping_set.mappings is None:
+        raise TypeError
     for m in mdoc.mapping_set.mappings:
-        m: Mapping
+        if not isinstance(m, Mapping):
+            continue
         if m.predicate_id in {"owl:equivalentClass", "owl:equivalentProperty"}:
             if subject_to_object:
                 src, tgt = m.subject_id, m.object_id
             else:
                 src, tgt = m.object_id, m.subject_id
+            if not isinstance(src, EntityId) or not isinstance(tgt, EntityId):
+                raise TypeError
             if src in rewire_map:
                 curr_tgt = rewire_map[src]
                 logging.info(f"Ambiguous: {src} -> {tgt} vs {curr_tgt}")
@@ -49,12 +53,15 @@ def expand_curie(curie: str):
                     raise ValueError(f"Ambiguous: {src} -> {tgt} vs {curr_tgt}")
             else:
                 rewire_map[src] = tgt
-    rewire_map = {expand_curie(k): expand_curie(v) for k, v in rewire_map.items()}
+
+    uri_ref_rewire_map: Dict[URIRef, URIRef] = {
+        expand_curie(k): expand_curie(v) for k, v in rewire_map.items()
+    }
 
     def rewire_node(n: Any):
         if isinstance(n, URIRef):
-            if n in rewire_map:
-                return rewire_map[n]
+            if n in uri_ref_rewire_map:
+                return uri_ref_rewire_map[n]
             else:
                 return n
         else:
diff --git a/sssom/sparql_util.py b/sssom/sparql_util.py
index 4f954ad3..f83124a9 100644
--- a/sssom/sparql_util.py
+++ b/sssom/sparql_util.py
@@ -1,6 +1,6 @@
 import logging
 from dataclasses import dataclass
-from typing import Dict, List, Optional
+from typing import Dict, List, Mapping, Optional
 
 import pandas as pd
 from rdflib import URIRef
@@ -12,12 +12,12 @@
 
 @dataclass
 class EndpointConfig:
-    url: str = None
-    graph: URIRef = None
-    predmap: Dict[str, str] = None
-    predicates: Optional[List[str]] = None
-    limit: Optional[int] = None
-    curie_map: Optional[Dict[str, str]] = None
+    url: str
+    graph: URIRef
+    predmap: Dict[str, str]
+    predicates: Optional[List[str]]
+    limit: Optional[int]
+    curie_map: Optional[Dict[str, str]]
     include_object_labels: bool = False
 
 
@@ -28,20 +28,21 @@ def query_mappings(config: EndpointConfig) -> MappingSetDataFrame:
     sparql = SPARQLWrapper(config.url)
     if config.graph is None:
         g = "?g"
+    elif isinstance(config.graph, str):
+        g = URIRef(config.graph).n3()
     else:
-        g = config.graph
-        if isinstance(g, str):
-            g = URIRef(g)
-        g = g.n3()
-    preds = config.predicates
-    if preds is None:
-        preds = {SKOS.exactMatch, SKOS.closeMatch}
+        g = config.graph.n3()
+    if config.predicates is None:
+        predicates = [SKOS.exactMatch, SKOS.closeMatch]
     else:
-        preds = [expand_curie(p, config) for p in preds]
-    predstr = " ".join([p.n3() for p in preds])
-    limitstr = ""
+        predicates = [
+            expand_curie(predicate, config) for predicate in config.predicates
+        ]
+    predstr = " ".join(URIRef(predicate).n3() for predicate in predicates)
     if config.limit is not None:
         limitstr = f"LIMIT {config.limit}"
+    else:
+        limitstr = ""
     cols = [
         "subject_id",
         "subject_label",
@@ -57,7 +58,7 @@ def query_mappings(config: EndpointConfig) -> MappingSetDataFrame:
         if config.include_object_labels
         else ""
     )
-    q = f"""
+    q = f"""\
     PREFIX rdfs: {RDFS.uri.n3()}
     SELECT {colstr}
     WHERE {{
@@ -79,14 +80,13 @@ def query_mappings(config: EndpointConfig) -> MappingSetDataFrame:
         row = {k: v["value"] for k, v in result.items()}
         rows.append(curiefy_row(row, config))
     df = pd.DataFrame(rows)
+    if config.curie_map is None:
+        raise TypeError
     return MappingSetDataFrame(df=df, prefixmap=config.curie_map)
 
 
-def curiefy_row(row: Dict[str, str], config: EndpointConfig) -> Dict[str, str]:
-    new_row = {}
-    for k, v in row.items():
-        new_row[k] = contract_uri(v, config)
-    return new_row
+def curiefy_row(row: Mapping[str, str], config: EndpointConfig) -> Dict[str, str]:
+    return {k: contract_uri(v, config) for k, v in row.items()}
 
 
 def contract_uri(uristr: str, config: EndpointConfig) -> str:
diff --git a/sssom/sssom_document.py b/sssom/sssom_document.py
index 71fbdd7f..52b045bd 100644
--- a/sssom/sssom_document.py
+++ b/sssom/sssom_document.py
@@ -1,7 +1,7 @@
 from dataclasses import dataclass
-from typing import Dict
 
 from .sssom_datamodel import MappingSet
+from .typehints import PrefixMap
 
 
 @dataclass()
@@ -17,7 +17,7 @@ class MappingSetDocument:
     The main part of the document: a set of mappings plus metadata
     """
 
-    curie_map: Dict[str, str]
+    curie_map: PrefixMap
     """
     Mappings between ID prefixes and URI Bases, used to map CURIEs to URIs.
     Note that the CURIE map is not part of the core SSSOM model, hence it belongs here in the document
diff --git a/sssom/typehints.py b/sssom/typehints.py
new file mode 100644
index 00000000..d3eda828
--- /dev/null
+++ b/sssom/typehints.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+
+"""Type hints for SSSOM."""
+
+from typing import Any, Dict, NamedTuple
+
+__all__ = [
+    "PrefixMap",
+    "MetadataType",
+    "Metadata",
+]
+
+PrefixMap = Dict[str, str]
+
+#: TODO replace this with something more specific
+MetadataType = Dict[str, Any]
+
+
+class Metadata(NamedTuple):
+    prefix_map: PrefixMap
+    metadata: MetadataType
diff --git a/sssom/util.py b/sssom/util.py
index d95a23ae..882c3eef 100644
--- a/sssom/util.py
+++ b/sssom/util.py
@@ -3,9 +3,21 @@
 import logging
 import os
 import re
-from dataclasses import dataclass
+from collections import defaultdict
+from dataclasses import dataclass, field
 from io import FileIO, StringIO
-from typing import Any, Dict, List, Mapping, Optional, Set, TextIO, Union
+from typing import (
+    Any,
+    DefaultDict,
+    Dict,
+    List,
+    Mapping,
+    Optional,
+    Set,
+    TextIO,
+    Tuple,
+    Union,
+)
 from urllib.request import urlopen
 
 import numpy as np
@@ -16,6 +28,7 @@
 from .context import get_default_metadata, get_jsonld_context
 from .sssom_datamodel import Entity, slots
 from .sssom_document import MappingSetDocument
+from .typehints import Metadata, MetadataType, PrefixMap
 
 SSSOM_READ_FORMATS = [
     "tsv",
@@ -61,17 +74,21 @@ class MappingSetDataFrame:
     A collection of mappings represented as a DataFrame, together with additional metadata
     """
 
-    df: pd.DataFrame = None  # Mappings
-    prefixmap: Dict[str, str] = None  # maps CURIE prefixes to URI bases
-    metadata: Optional[Dict[str, str]] = None  # header metadata excluding prefixes
+    df: Optional[pd.DataFrame] = None  # Mappings
+    #: maps CURIE prefixes to URI bases
+    prefixmap: PrefixMap = field(default_factory=dict)
+    metadata: Optional[MetadataType] = None  # header metadata excluding prefixes
 
-    def merge(self, msdf2, inplace=True):
+    def merge(
+        self, msdf2: "MappingSetDataFrame", inplace: bool = True
+    ) -> "MappingSetDataFrame":
         """Merges two MappingSetDataframes
 
         Args:
-            msdf2 (MappingSetDataFrame): Secondary MappingSetDataFrame (self => primary)
-            inplace (bool): if true, msdf2 is merged into the calling MappingSetDataFrame, if false, it simply return
-                            the merged data frame.
+            msdf: Secondary MappingSetDataFrame (self => primary)
+            inplace:
+                if true, msdf2 is merged into the calling MappingSetDataFrame, if false, it simply return
+                the merged data frame.
 
         Returns:
             MappingSetDataFrame: Merged MappingSetDataFrame
@@ -81,6 +98,7 @@ def merge(self, msdf2, inplace=True):
             self.df = msdf.df
             self.prefixmap = msdf.prefixmap
             self.metadata = msdf.metadata
+            # FIXME should return self if inplace
         return msdf
 
     def __str__(self):
@@ -94,9 +112,9 @@ def __str__(self):
         description += self.df.tail().to_string() + "\n"
         return description
 
-    def clean_prefix_map(self):
+    def clean_prefix_map(self) -> None:
         prefixes_in_map = get_prefixes_used_in_table(self.df)
-        new_prefixes = dict()
+        new_prefixes: PrefixMap = dict()
         missing_prefix = []
         for prefix in prefixes_in_map:
             if prefix in self.prefixmap:
@@ -140,9 +158,9 @@ class MappingSetDiff:
     this is considered a mapping in common.
     """
 
-    unique_tuples1: Optional[Set[str]] = None
-    unique_tuples2: Optional[Set[str]] = None
-    common_tuples: Optional[Set[str]] = None
+    unique_tuples1: Optional[Set[EntityPair]] = None
+    unique_tuples2: Optional[Set[EntityPair]] = None
+    common_tuples: Optional[Set[EntityPair]] = None
 
     combined_dataframe: Optional[pd.DataFrame] = None
     """
@@ -169,6 +187,8 @@ def load(self, filename) -> None:
         self.df = read_pandas(filename)
 
     def convert(self) -> Dict[str, Any]:
+        if self.df is None:
+            raise RuntimeError("dataframe is not loaded properly")
         # note that 'mapping' is both a metaproperty and a property of this model...
         cslots = {
             "mappings": {
@@ -178,7 +198,7 @@ def convert(self) -> Dict[str, Any]:
             },
             "id": {"description": "CURIE or IRI identifier", "identifier": True},
         }
-        classes = {
+        classes: Dict[str, Any] = {
             "mapping set": {
                 "description": "Represents a set of mappings",
                 "slots": ["mappings"],
@@ -252,7 +272,7 @@ def convert_and_save(self, fn: str) -> None:
             yaml.safe_dump(obj, stream, sort_keys=False)
 
 
-def parse(filename) -> pd.DataFrame:
+def parse(filename: str) -> pd.DataFrame:
     """
     parses a TSV to a pandas frame
     """
@@ -262,7 +282,7 @@ def parse(filename) -> pd.DataFrame:
     # return read_pandas(filename)
 
 
-def collapse(df):
+def collapse(df: pd.DataFrame) -> pd.DataFrame:
     """
     collapses rows with same S/P/O and combines confidence
     """
@@ -274,7 +294,7 @@ def collapse(df):
     return df2
 
 
-def sort_sssom_columns(columns: list) -> list:
+def sort_sssom_columns(columns: List[str]) -> List[str]:
     # Ideally, the order of the sssom column names is parsed strictly from sssom.yaml
 
     logging.warning("SSSOM sort columns not implemented")
@@ -289,7 +309,9 @@ def sort_sssom(df: pd.DataFrame) -> pd.DataFrame:
     return df
 
 
-def filter_redundant_rows(df: pd.DataFrame, ignore_predicate=False) -> pd.DataFrame:
+def filter_redundant_rows(
+    df: pd.DataFrame, ignore_predicate: bool = False
+) -> pd.DataFrame:
     """
     removes rows if there is another row with same S/O and higher confidence
 
@@ -308,7 +330,7 @@ def filter_redundant_rows(df: pd.DataFrame, ignore_predicate=False) -> pd.DataFr
         key = [SUBJECT_ID, OBJECT_ID, PREDICATE_ID]
     dfmax: pd.DataFrame
     dfmax = df.groupby(key, as_index=False)[CONFIDENCE].apply(max).drop_duplicates()
-    max_conf = {}
+    max_conf: Dict[Tuple[str, ...], float] = {}
     for _, row in dfmax.iterrows():
         if ignore_predicate:
             max_conf[(row[SUBJECT_ID], row[OBJECT_ID])] = row[CONFIDENCE]
@@ -337,7 +359,7 @@ def filter_redundant_rows(df: pd.DataFrame, ignore_predicate=False) -> pd.DataFr
     return return_df
 
 
-def assign_default_confidence(df: pd.DataFrame):
+def assign_default_confidence(df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
     # Get rows having numpy.NaN as confidence
     if df is not None and "confidence" not in df.columns:
         df["confidence"] = np.NaN
@@ -357,7 +379,7 @@ def remove_unmatched(df: pd.DataFrame) -> pd.DataFrame:
     return df[df[PREDICATE_ID] != "noMatch"]
 
 
-def create_entity(row, eid: str, mappings: Dict) -> Entity:
+def create_entity(row, eid: str, mappings: Dict[str, Any]) -> Entity:
     logging.warning(f"create_entity() has row parameter ({row}), but not used.")
     e = Entity(id=eid)
     for k, v in mappings.items():
@@ -366,37 +388,32 @@ def create_entity(row, eid: str, mappings: Dict) -> Entity:
     return e
 
 
-def group_mappings(df: pd.DataFrame) -> Dict[EntityPair, List]:
+def group_mappings(df: pd.DataFrame) -> Dict[EntityPair, List[pd.Series]]:
     """
     group mappings by EntityPairs
     """
-    mappings: Dict = {}
+    mappings: DefaultDict[EntityPair, List[pd.Series]] = defaultdict(list)
     for _, row in df.iterrows():
-        sid = row[SUBJECT_ID]
-        oid = row[OBJECT_ID]
-        s = create_entity(
+        subject_entity = create_entity(
             row,
-            sid,
+            row[SUBJECT_ID],
             {
                 "label": SUBJECT_LABEL,
                 "category": SUBJECT_CATEGORY,
                 "source": SUBJECT_SOURCE,
             },
         )
-        o = create_entity(
+        object_entity = create_entity(
             row,
-            oid,
+            row[OBJECT_ID],
             {
                 "label": OBJECT_LABEL,
                 "category": OBJECT_CATEGORY,
                 "source": OBJECT_SOURCE,
             },
         )
-        pair = EntityPair(s, o)
-        if pair not in mappings:
-            mappings[pair] = []
-        mappings[pair].append(row)
-    return mappings
+        mappings[EntityPair(subject_entity, object_entity)].append(row)
+    return dict(mappings)
 
 
 def compare_dataframes(df1: pd.DataFrame, df2: pd.DataFrame) -> MappingSetDiff:
@@ -569,13 +586,15 @@ def merge_msdf(
 
     merged_msdf = MappingSetDataFrame()
     # If msdf2 has a DataFrame
-    if msdf2.df is not None:
+    if msdf1.df is not None and msdf2.df is not None:
         # 'outer' join in pandas == FULL JOIN in SQL
         merged_msdf.df = msdf1.df.merge(msdf2.df, how="outer")
     else:
         merged_msdf.df = msdf1.df
     # merge the non DataFrame elements
-    merged_msdf.prefixmap = dict_merge(msdf2.prefixmap, msdf1.prefixmap, "prefixmap")
+    merged_msdf.prefixmap = dict_merge(
+        source=msdf2.prefixmap, target=msdf1.prefixmap, dict_name="prefixmap"
+    )
     # After a Slack convo with @matentzn, commented out below.
     # merged_msdf.metadata = dict_merge(msdf2.metadata, msdf1.metadata, 'metadata')
 
@@ -660,7 +679,6 @@ def deal_with_negation(df: pd.DataFrame) -> pd.DataFrame:
     )[CONFIDENCE].max()
 
     # If same confidence prefer "HumanCurated".
-    reconciled_df_subset: pd.DataFrame
     reconciled_df_subset = pd.DataFrame(columns=combined_normalized_subset.columns)
     for _, row_1 in max_confidence_df.iterrows():
         match_condition_1 = (
@@ -668,7 +686,6 @@ def deal_with_negation(df: pd.DataFrame) -> pd.DataFrame:
             & (combined_normalized_subset[OBJECT_ID] == row_1[OBJECT_ID])
             & (combined_normalized_subset[CONFIDENCE] == row_1[CONFIDENCE])
         )
-        match_condition_1: Union[bool, ...]
         # match_condition_1[match_condition_1] gives the list of 'True's.
         # In other words, the rows that match the condition (rules declared).
         # Ideally, there should be 1 row. If not apply an extra rule to look for 'HumanCurated'.
@@ -698,12 +715,10 @@ def deal_with_negation(df: pd.DataFrame) -> pd.DataFrame:
             & (reconciled_df_subset[OBJECT_ID] == row_2[OBJECT_ID])
             & (reconciled_df_subset[CONFIDENCE] == row_2[CONFIDENCE])
         )
-        match_condition_2: Union[bool, ...]
         reconciled_df_subset.loc[
             match_condition_2[match_condition_2].index, PREDICATE_ID
         ] = row_2[PREDICATE_ID]
 
-    reconciled_df: pd.DataFrame
     reconciled_df = pd.DataFrame(columns=df.columns)
     for _, row_3 in reconciled_df_subset.iterrows():
         match_condition_3 = (
@@ -712,7 +727,6 @@ def deal_with_negation(df: pd.DataFrame) -> pd.DataFrame:
             & (df[CONFIDENCE] == row_3[CONFIDENCE])
             & (df[PREDICATE_ID] == row_3[PREDICATE_ID])
         )
-        match_condition_3: Union[bool, ...]
         reconciled_df = reconciled_df.append(
             df.loc[match_condition_3[match_condition_3].index, :]
         )
@@ -720,20 +734,24 @@ def deal_with_negation(df: pd.DataFrame) -> pd.DataFrame:
     return return_df
 
 
-def dict_merge(source: Dict, target: Dict, dict_name: str) -> Dict:
+def dict_merge(
+    *,
+    source: Optional[Dict[str, Any]] = None,
+    target: Dict[str, Any],
+    dict_name: str,
+) -> Dict[str, Any]:
     """
     Takes 2 MappingSetDataFrame elements (prefixmap OR metadata) and merges source => target
 
     Args:
-        source (Dict): MappingSetDataFrame.prefixmap / MappingSetDataFrame.metadata
-        target (Dict): MappingSetDataFrame.prefixmap / MappingSetDataFrame.metadata
-        dict_name (str): prefixmap or metadata
+        source: MappingSetDataFrame.prefixmap / MappingSetDataFrame.metadata
+        target: MappingSetDataFrame.prefixmap / MappingSetDataFrame.metadata
+        dict_name: prefixmap or metadata
 
     Returns:
         Dict: merged MappingSetDataFrame.prefixmap / MappingSetDataFrame.metadata
     """
     if source is not None:
-        k: str
         for k, v in source.items():
             if k not in target:
                 if v not in list(target.values()):
@@ -760,15 +778,18 @@ def inject_metadata_into_df(msdf: MappingSetDataFrame) -> MappingSetDataFrame:
     Returns:
         MappingSetDataFrame: MappingSetDataFrame with metadata as columns
     """
-    if bool(msdf.metadata):
+    if msdf.metadata is not None and msdf.df is not None:
         for k, v in msdf.metadata.items():
             if k not in msdf.df.columns:
                 msdf.df[k] = v
     return msdf
 
 
-def get_file_extension(file: TextIO) -> str:
-    filename = file.name
+def get_file_extension(file: Union[str, TextIO]) -> str:
+    if isinstance(file, str):
+        filename = file
+    else:
+        filename = file.name
     parts = filename.split(".")
     if len(parts) > 0:
         f_format = parts[-1]
@@ -793,7 +814,7 @@ def read_csv(filename, comment="#", sep=","):
     return pd.read_csv(StringIO(lines), sep=sep)
 
 
-def read_metadata(filename):
+def read_metadata(filename: str) -> Metadata:
     """
     Read a metadata file (yaml) that is supplied separately from a TSV.
 
@@ -811,35 +832,27 @@ def read_metadata(filename):
             meta = m
         except yaml.YAMLError as exc:
             print(exc)  # FIXME this clobbers the exception. Remove try/except
-    return meta, curie_map
+    return Metadata(prefix_map=curie_map, metadata=meta)
 
 
-def read_pandas(filename: str, sep: Optional[str] = "\t") -> pd.DataFrame:
+def read_pandas(file: Union[str, TextIO], sep: Optional[str] = None) -> pd.DataFrame:
     """
     Read a tabular data file by wrapping func:`pd.read_csv` to handles comment lines correctly.
 
-    :param filename:
+    :param file: The file to read. If no separator is given, this file should be named.
     :param sep: File separator in pandas (\t or ,)
-    :return:
+    :return: A pandas dataframe
     """
-    if not sep:
-        extension = get_file_extension(filename)
-        sep = "\t"
+    if sep is None:
+        extension = get_file_extension(file)
         if extension == "tsv":
             sep = "\t"
         elif extension == "csv":
             sep = ","
         else:
+            sep = "\t"
             logging.warning("Cannot automatically determine table format, trying tsv.")
-
-    # from tempfile import NamedTemporaryFile
-    # with NamedTemporaryFile("r+") as tmp:
-    #    with open(filename, "r") as f:
-    #        for line in f:
-    #            if not line.startswith('#'):
-    #                tmp.write(line + "\n")
-    #    tmp.seek(0)
-    return read_csv(filename, comment="#", sep=sep).fillna("")
+    return read_csv(file, comment="#", sep=sep).fillna("")
 
 
 def extract_global_metadata(msdoc: MappingSetDocument):
@@ -862,13 +875,14 @@ def to_mapping_set_dataframe(doc: MappingSetDocument) -> MappingSetDataFrame:
     # convert MappingSetDocument into MappingSetDataFrame
     ###
     data = []
-    for mapping in doc.mapping_set.mappings:
-        mdict = mapping.__dict__
-        m = {}
-        for key in mdict:
-            if mdict[key]:
-                m[key] = mdict[key]
-        data.append(m)
+    if doc.mapping_set.mappings is not None:
+        for mapping in doc.mapping_set.mappings:
+            mdict = mapping.__dict__
+            m = {}
+            for key in mdict:
+                if mdict[key]:
+                    m[key] = mdict[key]
+            data.append(m)
     df = pd.DataFrame(data=data)
     meta = extract_global_metadata(doc)
     meta.pop("curie_map", None)
@@ -932,7 +946,7 @@ def filter_out_prefixes(df: pd.DataFrame, filter_prefixes) -> pd.DataFrame:
         return pd.DataFrame(columns=KEY_FEATURES)
 
 
-def guess_file_format(filename):
+def guess_file_format(filename: Union[str, TextIO]) -> str:
     extension = get_file_extension(filename)
     if extension in ["owl", "rdf"]:
         return SSSOM_DEFAULT_RDF_SERIALISATION
@@ -944,11 +958,10 @@ def guess_file_format(filename):
         )
 
 
-def prepare_context_from_curie_map(curie_map: dict):
-    meta, default_curie_map = get_default_metadata()
+def prepare_context_from_curie_map(curie_map: Optional[PrefixMap] = None) -> str:
     context = get_jsonld_context()
-    if not curie_map:
-        curie_map = default_curie_map
+    if curie_map is None:
+        curie_map = get_default_metadata().prefix_map
 
     for k, v in curie_map.items():
         if isinstance(v, str):
diff --git a/sssom/writers.py b/sssom/writers.py
index 18ed536c..cb0c5ded 100644
--- a/sssom/writers.py
+++ b/sssom/writers.py
@@ -1,7 +1,7 @@
 import json
 import logging
 import os
-from typing import Callable, Optional, TextIO, Tuple
+from typing import Any, Callable, Dict, Optional, TextIO, Tuple
 
 import pandas as pd
 import yaml
@@ -43,15 +43,16 @@ def write_table(msdf: MappingSetDataFrame, file: TextIO, serialisation="tsv") ->
     """
     dataframe 2 tsv
     """
+    if msdf.df is None:
+        raise TypeError
 
     sep = _get_separator(serialisation)
 
     # df = to_dataframe(msdf)
 
+    meta: Dict[str, Any] = {}
     if msdf.metadata is not None:
-        meta = {k: v for k, v in msdf.metadata.items()}
-    else:
-        meta = {}
+        meta.update(msdf.metadata)
     if msdf.prefixmap is not None:
         meta["curie_map"] = msdf.prefixmap
 
@@ -125,9 +126,9 @@ def write_owl(
 
 def to_dataframe(msdf: MappingSetDataFrame) -> pd.DataFrame:
     data = []
-
     doc = to_mapping_set_document(msdf)
-
+    if doc.mapping_set.mappings is None:
+        raise TypeError
     for mapping in doc.mapping_set.mappings:
         mdict = mapping.__dict__
         m = {}
diff --git a/tests/test_parsers.py b/tests/test_parsers.py
index d14e3cde..890cda52 100644
--- a/tests/test_parsers.py
+++ b/tests/test_parsers.py
@@ -52,7 +52,7 @@ def setUp(self) -> None:
 
         self.alignmentxml_file = f"{test_data_dir}/oaei-ordo-hp.rdf"
         self.alignmentxml = minidom.parse(self.alignmentxml_file)
-        self.metadata, self.curie_map = get_default_metadata()
+        self.curie_map, self.metadata = get_default_metadata()
 
     def test_parse_sssom_dataframe(self):
         input_path = f"{test_data_dir}/basic.tsv"
diff --git a/tox.ini b/tox.ini
index 46acdbde..ae7fb47a 100644
--- a/tox.ini
+++ b/tox.ini
@@ -7,6 +7,7 @@
 envlist =
     lint
     flake8
+    mypy
     py
 
 [testenv]
@@ -40,3 +41,9 @@ deps =
     flake8-bugbear
     flake8-isort
 description = Run the flake8 code quality checker.
+
+[testenv:mypy]
+deps = mypy
+skip_install = true
+commands = mypy --install-types --non-interactive --ignore-missing-imports sssom/ setup.py
+description = Run the mypy tool to check static typing on the project.