diff --git a/src/koza/io/writer/tsv_writer.py b/src/koza/io/writer/tsv_writer.py index 9e9d6dc..8c1f79a 100644 --- a/src/koza/io/writer/tsv_writer.py +++ b/src/koza/io/writer/tsv_writer.py @@ -84,24 +84,22 @@ def get_new_fh_path(base_dir, filename, category): if split: base_dir, filename = Path(fh.name).parent, getattr(self, f"{record_type}s_file_name").name if record_type == "node": - category = record.get("category", ["UnknownNodeCategory"])[0].split(":")[-1] + category = record.get("category", [""])[0].split(":")[-1] else: subject_category = ( - record.get("subject_category", "UnknownSubjectCategory").split(":")[-1] + record.get("subject_category", "").split(":")[-1] if record.get("subject_category") - else "UnknownSubjectCategory" + else "UnknownCategory" ) object_category = ( - record.get("object_category", "UnknownObjectCategory").split(":")[-1] + record.get("object_category", "").split(":")[-1] if record.get("object_category") - else "UnknownObjectCategory" + else "UnknownCategory" ) edge_category = ( - record.get("category", ["UnknownEdgeCategory"])[0].split(":")[-1] - if record.get("category") - else "UnknownEdgeCategory" + record.get("category", [""])[0].split(":")[-1] if record.get("category") else "UnknownCategory" ) category = subject_category + edge_category + object_category diff --git a/tests/unit/test_tsvwriter_node_and_edge.py b/tests/unit/test_tsvwriter_node_and_edge.py index c8d7d7f..692564f 100644 --- a/tests/unit/test_tsvwriter_node_and_edge.py +++ b/tests/unit/test_tsvwriter_node_and_edge.py @@ -40,8 +40,8 @@ def test_tsv_writer_split(): """ Writes a test tsv file """ - g1 = Gene(id="HGNC:11603", name="TBX4") - d1 = Disease(id="MONDO:0005002", name="chronic obstructive pulmonary disease") + g1 = Gene(id="HGNC:11603", name="TBX4", category=["biolink:Gene"]) + d1 = Disease(id="MONDO:0005002", name="chronic obstructive pulmonary disease", category=["biolink:Disease"]) a1 = GeneToDiseaseAssociation( id="uuid:5b06e86f-d768-4cd9-ac27-abe31e95ab1e", subject=g1.id, @@ -49,8 +49,10 @@ def test_tsv_writer_split(): predicate="biolink:contributes_to", knowledge_level="not_provided", agent_type="not_provided", + subject_category="biolink:Gene", + object_category="biolink:Disease", ) - g2 = Gene(id="HGNC:11604", name="TBX5") + g2 = Gene(id="HGNC:11604", name="TBX5", category=["biolink:Gene"]) d2 = Disease(id="MONDO:0005003", name="asthma") a2 = GeneToDiseaseAssociation( id="uuid:5b06e86f-d768-4cd9-ac27-abe31e95ab1f", @@ -61,7 +63,7 @@ def test_tsv_writer_split(): agent_type="not_provided", ) g3 = Gene(id="HGNC:11605", name="TBX6") - d3 = Disease(id="MONDO:0005004", name="lung cancer") + d3 = Disease(id="MONDO:0005004", name="lung cancer", category=["biolink:Disease"]) a3 = GeneToDiseaseAssociation( id="uuid:5b06e86f-d768-4cd9-ac27-abe31e95ab1g", subject=g3.id, @@ -70,14 +72,25 @@ def test_tsv_writer_split(): knowledge_level="not_provided", agent_type="not_provided", ) - ents = [[g1, d1, a1], [g2, d2, a2], [g3, d3, a3]] + g4 = Gene(id="HGNC:11606", name="TBX7") + d4 = Disease(id="MONDO:0005005", name="pulmonary fibrosis") + a4 = GeneToDiseaseAssociation( + id="uuid:5b06e86f-d768-4cd9-ac27-abe31e95ab1h", + subject=g4.id, + object=d4.id, + predicate="biolink:contributes_to", + knowledge_level="not_provided", + agent_type="not_provided", + ) + + ents = [[g1, d1, a1], [g2, d2, a2], [g3, d3, a3], [g4, d4, a4]] node_properties = ["id", "category", "symbol", "in_taxon", "provided_by", "source"] edge_properties = ["id", "subject", "predicate", "object", "category" "qualifiers", "publications", "provided_by"] outdir = "output/tests/split-examples" outfile = "tsvwriter" - split_edge_file_substring = "UnknownSubjectCategoryGeneToDiseaseAssociationUnknownObjectCategory" + split_edge_file_substring = "UnknownCategoryGeneToDiseaseAssociationUnknownCategory" t = TSVWriter(outdir, outfile, node_properties, edge_properties) for ent in ents: