diff --git a/etk/knowledge_graph/graph.py b/etk/knowledge_graph/graph.py index 69abff1b..ec387593 100644 --- a/etk/knowledge_graph/graph.py +++ b/etk/knowledge_graph/graph.py @@ -47,7 +47,9 @@ def serialize(self, format='ttl', namespace_manager=None, **kwargs): b_string = self._g.serialize(format=format, contexts=namespace_manager, **kwargs) else: b_string = self._g.serialize(format=format, **kwargs) - return b_string.decode('UTF-8') + if isinstance(b_string, bytes): + b_string.decode('UTF-8') + return b_string @lru_cache() def _resolve_uri(self, uri: URI) -> rdflib.URIRef: diff --git a/etk/knowledge_graph/node.py b/etk/knowledge_graph/node.py index 22f5b98a..cd56328f 100644 --- a/etk/knowledge_graph/node.py +++ b/etk/knowledge_graph/node.py @@ -3,7 +3,7 @@ from datetime import date, datetime from xml.dom.minidom import Document, DocumentFragment from etk.etk_exceptions import InvalidGraphNodeValueError, UnknownLiteralType - +import re class Node(object): def __init__(self, value): @@ -118,6 +118,13 @@ def __getattr__(self, item): class LiteralType(URI, metaclass=__Type): + + valid_time_pattern = re.compile(r"[\-]?(\d{4})-((0[1-9])|(1[0-2]))-(0[1-9]|[12][0-9]|3[01])T(0[0-9]|1[0-9]|2[0-3]):(0[0-9]|1[0-9]|2[0-9]|3[0-9]|4[0-9]|5[0-9]):(0[0-9]|1[0-9]|2[0-9]|3[0-9]|4[0-9]|5[0-9])") + valid_month_pattern = re.compile(r"[\-]?(\d{4})-((0[1-9])|(1[0-2]))-(00)T00:00:00") + valid_year_decade_millennium_pattern = re.compile(r"[\-]?(\d{4})-(00)-(00)T00:00:00") + valid_hundred_thousand_years_pattern = re.compile(r"[\-]?(\d{6,7})-(0[0|1])-(0[0|1])T00:00:00") + valid_million_billion_years = re.compile(r"[\-]?(\d{8}\d+)-(0[0|1])-(0[0|1])T00:00:00") + def __init__(self, s, common_check=True): self.common_check = common_check self._type = None @@ -172,6 +179,7 @@ def is_value_valid(self, s): @staticmethod def _is_valid_date_time(s): + if isinstance(s, datetime): return True @@ -193,7 +201,12 @@ def _is_valid_date_time(s): except: pass - return False + validity_list = [LiteralType.valid_time_pattern.match(s), + LiteralType.valid_month_pattern.match(s), + LiteralType.valid_year_decade_millennium_pattern.match(s), + LiteralType.valid_hundred_thousand_years_pattern.match(s), + LiteralType.valid_million_billion_years.match(s)] + return any(validity_list) xsd = 'http://www.w3.org/2001/XMLSchema#' rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' diff --git a/etk/wikidata/value.py b/etk/wikidata/value.py index 255b284e..35181e1d 100644 --- a/etk/wikidata/value.py +++ b/etk/wikidata/value.py @@ -77,7 +77,12 @@ def __build_full_value(self): self.full_value.add_property(URI('wikibase:timePrecision'), self._precision) self.full_value.add_property(URI('wikibase:timeTimezone'), self._time_zone) self.full_value.add_property(URI('wikibase:timeCalendarModel'), self._calendar.value) - self.full_value.add_property(URI('wikibase:timeValue'), self.value) + self.full_value.add_property(URI('wikibase:timeValue'), self.value) + # TODO fix import bug + # if not self.value.startswith("+"): + # self.full_value.add_property(URI('wikibase:timeValue'), self.value) + # else: + # self.full_value.add_property(URI('wikibase:timeValue'), self.value[1:]) def _v_name(self): time = self.value.value.replace(':', '').replace(' ', '-') diff --git a/setup.py b/setup.py index 477ad889..0253b2c3 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ setuptools.setup( name="etk", - version="2.2.4", + version="2.2.5", author="Amandeep Singh", author_email="amandeep.s.saggu@gmail.com", description="extraction toolkit",