diff --git a/kgx/cli/cli_utils.py b/kgx/cli/cli_utils.py index 03b3479e..59b34aa5 100644 --- a/kgx/cli/cli_utils.py +++ b/kgx/cli/cli_utils.py @@ -219,7 +219,7 @@ def validate( }, # streaming processing throws the graph data away # ... Second, we inject the Inspector into the transform() call, # for the underlying Transformer.process() to use... - inspector=validator + inspector=validator, ) if output: diff --git a/tests/resources/test-transform-rdf-tsv.yaml b/tests/resources/test-transform-rdf-tsv.yaml new file mode 100644 index 00000000..cb5dc01f --- /dev/null +++ b/tests/resources/test-transform-rdf-tsv.yaml @@ -0,0 +1,25 @@ +configuration: + output_directory: ../target + checkpoint: false +transform: + source: + test_graph: + name: "Test Graph" + input: + format: nt + filename: + - test-transform-rdf.nt + output: + format: tsv + filename: + - test-transform-rdf + valid_graph: + name: "Valid JSON Graph" + input: + format: json + filename: + - valid.json + output: + format: tsv + filename: + - valid_graph diff --git a/tests/resources/test-transform-rdf.nt b/tests/resources/test-transform-rdf.nt new file mode 100644 index 00000000..3d566ca2 --- /dev/null +++ b/tests/resources/test-transform-rdf.nt @@ -0,0 +1,15 @@ + . + . + . + . + "Test Gene 123" . + "This is a Test Gene 123" . + "Test Dataset" . + . + . + . + . + "Test Gene 456" . + "This is a Test Gene 456" . + "Test Dataset" . + . \ No newline at end of file diff --git a/tests/resources/test-transform-tsv-rdf.yaml b/tests/resources/test-transform-tsv-rdf.yaml new file mode 100644 index 00000000..ef578793 --- /dev/null +++ b/tests/resources/test-transform-tsv-rdf.yaml @@ -0,0 +1,26 @@ +configuration: + output_directory: ../target + checkpoint: false +transform: + source: + test_graph: + name: "Test Graph" + input: + format: tsv + filename: + - test2_nodes.tsv + - test2_edges.tsv + output: + format: nt + filename: + - test-tranform-tsv-rdf.nt + valid_graph: + name: "Valid JSON Graph" + input: + format: json + filename: + - valid.json + output: + format: tsv + filename: + - valid_graph diff --git a/tests/unit/test_cli_utils.py b/tests/unit/test_cli_utils.py index 1b4b4f92..47279fea 100644 --- a/tests/unit/test_cli_utils.py +++ b/tests/unit/test_cli_utils.py @@ -866,6 +866,25 @@ def test_transform2(): assert os.path.exists(os.path.join(RESOURCE_DIR, "graph_edges.tsv")) +def test_transform_rdf_to_tsv(): + """ + Transform from a test transform YAML. + """ + transform_config = os.path.join(RESOURCE_DIR, "test-transform-rdf-tsv.yaml") + transform(inputs=None, transform_config=transform_config) + assert os.path.exists(os.path.join(TARGET_DIR, "test-transform-rdf_edges.tsv")) + assert os.path.exists(os.path.join(TARGET_DIR, "test-transform-rdf_nodes.tsv")) + + +def test_transform_tsv_to_rdf(): + """ + Transform from a test transform YAML. + """ + transform_config = os.path.join(RESOURCE_DIR, "test-transform-tsv-rdf.yaml") + transform(inputs=None, transform_config=transform_config) + assert os.path.exists(os.path.join(TARGET_DIR, "test-tranform-tsv-rdf.nt")) + + def test_merge1(): """ Transform from test merge YAML. diff --git a/tests/unit/test_source/test_json_source.py b/tests/unit/test_source/test_json_source.py index 488e1a08..fc1ecc6e 100644 --- a/tests/unit/test_source/test_json_source.py +++ b/tests/unit/test_source/test_json_source.py @@ -40,6 +40,31 @@ def test_read_json1(): assert e["relation"] == "RO:0004013" +def test_read_json_filter(): + """ + Read from a JSON using JsonSource. + """ + t = Transformer() + s = JsonSource(t) + filters = { + "category": {"biolink:Disease"} + } + s.set_node_filters(filters) + g = s.parse(os.path.join(RESOURCE_DIR, "valid.json")) + nodes = {} + edges = {} + for rec in g: + if rec: + if len(rec) == 4: + edges[(rec[0], rec[1])] = rec[3] + else: + nodes[rec[0]] = rec[1] + + for node in nodes: + n = nodes[node] + assert n["category"] == ["biolink:Disease"] + + def test_read_json2(): """ Read from a JSON using JsonSource.