Skip to content

Commit

Permalink
Infer ontology format from file extension (#699)
Browse files Browse the repository at this point in the history
  • Loading branch information
kysrpex authored Aug 27, 2021
1 parent 46c9885 commit 3fe52c3
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 13 deletions.
24 changes: 15 additions & 9 deletions osp/core/ontology/parser/owl/parser.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
"""Parses OWL ontologies."""

from typing import Tuple, Set, Dict, Optional
import io
import logging
import os.path
from rdflib import Graph, URIRef
from typing import Dict, Set, Tuple, Optional

import rdflib
import requests
import yaml
from osp.core.ontology.parser.parser import OntologyParser
from rdflib import Graph, URIRef
from rdflib.util import guess_format

import osp.core.ontology.parser.owl.keywords as keywords
from osp.core.ontology.parser.parser import OntologyParser

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -82,8 +85,7 @@ def reference_style(self) -> bool:
def graph(self) -> Graph:
"""Fetch the ontology graph from the ontology file."""
if not self._graph:
file_format = self._yaml_config.get(keywords.FILE_FORMAT_KEY,
'xml')
file_format = self._yaml_config.get(keywords.FILE_FORMAT_KEY, None)
self._graph = self._read_ontology_graph(self._yaml_config,
self._file_path,
file_format)
Expand Down Expand Up @@ -179,24 +181,28 @@ def _validate_yaml_config(doc: list):
"identifier: %s." % doc[keywords.IDENTIFIER_KEY])

@staticmethod
def _read_ontology_graph(yaml_config_doc: list,
def _read_ontology_graph(yaml_config_doc: dict,
yaml_config_path: str,
file_format: str) -> Graph:
file_format: Optional[str] = None) -> Graph:
"""Get the ontology from the file specified in the configuration file.
Args:
yaml_config_doc (list): The YAML doc resulting from loading the
yaml_config_doc: The YAML doc resulting from loading the
a YAML config file for OWL ontologies. The doc must have been
validated with `_validate_yaml_config` before being passed to
this function.
yaml_config_path (str): the path where the YAML config file was
yaml_config_path: The path where the YAML config file was
read. It is used to resolve the relative path to the ontology
file.
file_format: The format of the file containing the ontology graph.
When not provided, it will be guessed using `guess_format`
from `rdflib.util`.
Returns:
Graph: The ontology graph.
"""
rdf_file_location = yaml_config_doc[keywords.RDF_FILE_KEY]
file_format = file_format or guess_format(rdf_file_location)
if rdf_file_location.startswith(('http://', 'https://')):
logger.info(f"Downloading {rdf_file_location}.")
content = requests.get(rdf_file_location).content.decode('utf-8')
Expand Down
34 changes: 30 additions & 4 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
"""Test the default parser used for parsing OWL ontologies."""

import os
import yaml
import rdflib
import re
import shutil
import unittest2 as unittest
import tempfile
import yaml
from pathlib import Path

import responses
import rdflib
import unittest2 as unittest
from rdflib.compare import isomorphic

from osp.core.ontology.namespace_registry import NamespaceRegistry
from osp.core.ontology.parser import Parser
from osp.core.ontology.parser.parser import OntologyParser
from osp.core.ontology.namespace_registry import NamespaceRegistry


RDF_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
Expand Down Expand Up @@ -215,6 +219,28 @@ def test_parse(self):
g1.parse(RDF_FILE, format="ttl")
self.assertTrue(parser.graph, g1)

def test_parse_guess_format(self):
"""Test the parsing a file without providing the format."""
modified_yml_config_path = Path(YML_FILE)
modified_yml_config_path = str(modified_yml_config_path.with_name(
modified_yml_config_path.stem + '_mod'
+ modified_yml_config_path.suffix))
try:
# Create a copy of YML_FILE and remove the 'format' keyword.
with open(modified_yml_config_path, 'w') as modified_yml_config:
with open(YML_FILE, 'r') as yml_config:
modified_yml_config.write(
re.sub(r'^[\s]*format:[\s].*', '',
yml_config.read(), flags=re.MULTILINE))

parser = OntologyParser.get_parser(modified_yml_config_path)
g1 = rdflib.Graph()
g1.parse(RDF_FILE, format="ttl")
self.assertTrue(parser.graph, g1)
finally:
if os.path.exists(modified_yml_config_path):
os.remove(modified_yml_config_path)


if __name__ == "__main__":
unittest.main()

0 comments on commit 3fe52c3

Please sign in to comment.