dice-group · Jean-KOUAGOU · Nov 5, 2024 · Nov 5, 2024 · Nov 14, 2024 · Nov 14, 2024
diff --git a/README.md b/README.md
@@ -43,7 +43,7 @@ wget https://files.dice-research.org/projects/Ontolearn/KGs.zip -O ./KGs.zip &&
 wget https://files.dice-research.org/projects/Ontolearn/LPs.zip -O ./LPs.zip && unzip LPs.zip
 ```
 
-## Learning OWL Class Expression
+## Learning OWL Class Expressions
 ```python
 from ontolearn.learners import TDL
 from ontolearn.triple_store import TripleStore
@@ -95,7 +95,7 @@ weighted avg       1.00      1.00      1.00         4
 """
 ```
 
-## Learning OWL Class Expression over DBpedia
+## Learning OWL Class Expressions over DBpedia
 ```python
 from ontolearn.learners import TDL
 from ontolearn.triple_store import TripleStore
@@ -118,6 +118,20 @@ print(owl_expression_to_sparql(expression=h))
 save_owl_class_expressions(expressions=h,path="owl_prediction")
 ```
 
+- With one command
+1. For TDL
+```bash
+git clone https://github.com/dice-group/Ontolearn.git && cd Ontolearn && git checkout large_scale_cel && conda create -n venv python=3.10.14 --no-default-packages --y && conda activate venv && pip install -e . && wget https://files.dice-research.org/projects/Ontolearn/LPs.zip -O ./LPs.zip && unzip LPs.zip && python examples/dbpedia_concept_learning_with_ontolearn.py tdl
+```
+2. For Drill
+```bash
+git clone https://github.com/dice-group/Ontolearn.git && cd Ontolearn && git checkout large_scale_cel && conda create -n venv python=3.10.14 --no-default-packages --y && conda activate venv && pip install -e . && wget https://files.dice-research.org/projects/Ontolearn/LPs.zip -O ./LPs.zip && unzip LPs.zip && python examples/dbpedia_concept_learning_with_ontolearn.py drill
+```
+3. For DL-Learner
+```bash
+git clone https://github.com/dice-group/Ontolearn.git && cd Ontolearn && git checkout large_scale_cel && conda create -n venv python=3.10.14 --no-default-packages --y && conda activate venv && pip install -e . && wget https://github.com/SmartDataAnalytics/DL-Learner/releases/download/1.4.0/dllearner-1.4.0.zip -O ./dllearner-1.4.0.zip && unzip dllearner-1.4.0.zip && wget https://files.dice-research.org/projects/Ontolearn/LPs.zip -O ./LPs.zip && unzip LPs.zip && python examples/dbpedia_concept_learning_with_dllearner.py
+```
+
 Fore more please refer to  the [examples](https://github.com/dice-group/Ontolearn/tree/develop/examples) folder.
 
 ## ontolearn-webservice 

diff --git a/examples/dbpedia_concept_learning_with_dllearner.py b/examples/dbpedia_concept_learning_with_dllearner.py
@@ -0,0 +1,26 @@
+from ontolearn.binders import DLLearnerBinder
+from owlapy.iri import IRI
+from owlapy.owl_individual import OWLNamedIndividual
+import json
+
+from ontolearn.learning_problem import PosNegLPStandard
+
+# (1) SPARQL endpoint as knowledge source: supported only by DL-Learner-1.4.0
+kb_path = "https://dbpedia.data.dice-research.org/sparql"
+
+# To download DL-learner,  https://github.com/SmartDataAnalytics/DL-Learner/releases.
+dl_learner_binary_path = "./dllearner-1.4.0/bin/cli"
+
+# (2) Read learning problem file
+with open("./LPs/DBpedia2022-12/lps.json") as f:
+    lps = json.load(f)
+# (3) Start class expression learning
+for i, item in enumerate(lps):
+    print(f"\nLP {i+1}/{len(lps)} ==> Target expression: ", item["target expression"], "\n")
+    lp = PosNegLPStandard(pos=set(list(map(OWLNamedIndividual,map(IRI.create, item["examples"]["positive examples"])))),
+                          neg=set(list(map(OWLNamedIndividual,map(IRI.create, item["examples"]["negative examples"])))))
+
+    celoe = DLLearnerBinder(binary_path=dl_learner_binary_path, kb_path=kb_path, model='celoe')
+    print("\nStarting class expression learning with DL-Learner")
+    best_pred_celoe = celoe.fit(lp, use_sparql=True).best_hypothesis()
+    print("\nLearned expression: ", best_pred_celoe)
diff --git a/examples/dbpedia_concept_learning_with_ontolearn.py b/examples/dbpedia_concept_learning_with_ontolearn.py
@@ -0,0 +1,36 @@
+import json, os, sys
+from owlapy.owl_individual import OWLNamedIndividual, IRI
+from ontolearn.learners import Drill, TDL
+from ontolearn.learning_problem import PosNegLPStandard
+from ontolearn.triple_store import TripleStore
+from ontolearn.utils.static_funcs import save_owl_class_expressions
+from owlapy.render import DLSyntaxObjectRenderer
+
+if len(sys.argv) < 2:
+    print("You need to provide the model name; either tdl or drill")
+    sys.exit(1)
+
+model_name = sys.argv[1]
+assert model_name.lower() in ["drill", "tdl"], "Currently, only Drill and TDL are supported"
+
+# (1) Initialize knowledge source with TripleStore
+kb = TripleStore(url="https://dbpedia.data.dice-research.org/sparql")
+# (2) Initialize a DL renderer.
+renderer = DLSyntaxObjectRenderer()
+# (3) Initialize a learner.
+model = Drill(knowledge_base=kb, max_runtime=240) if model_name.lower() == "drill" else TDL(knowledge_base=kb)
+# (4) Solve learning problems
+with open("./LPs/DBpedia2022-12/lps.json") as f:
+    lps = json.load(f)
+for i, item in enumerate(lps):
+    print("\nTarget expression: ", item["target expression"], "\n")
+    lp = PosNegLPStandard(pos=set(list(map(OWLNamedIndividual,map(IRI.create, item["examples"]["positive examples"])))),
+                          neg=set(list(map(OWLNamedIndividual,map(IRI.create, item["examples"]["negative examples"])))))
+    # (5) Learn description logic concepts best fitting
+    h = model.fit(learning_problem=lp).best_hypotheses()
+    str_concept = renderer.render(h)
+    print("Concept:", str_concept)  # e.g.  ∃ predecessor.WikicatPeopleFromBerlin
+    # (6) Save e.g., ∃ predecessor.WikicatPeopleFromBerlin into disk
+    if not os.path.exists(f"./learned_owl_expressions_{model_name}"):
+        os.mkdir(f"./learned_owl_expressions_{model_name}")
+    save_owl_class_expressions(expressions=h, path=f"./learned_owl_expressions_{model_name}/owl_prediction_{i}")
diff --git a/examples/generate_lps_dbpedia.py b/examples/generate_lps_dbpedia.py
@@ -0,0 +1,11 @@
+from ontolearn.lp_generator import LPGen
+
+PATH = 'https://dbpedia.data.dice-research.org/sparql'
+STORAGE_DIR = 'DBpedia_LPs'
+
+def generate_lps():
+    lp_gen = LPGen(kb_path=PATH, storage_dir=STORAGE_DIR, refinement_expressivity=1e-7, use_triple_store=True, sample_fillers_count=1, num_sub_roots=1)
+    lp_gen.generate()
+
+if __name__ == '__main__':
+    generate_lps()
diff --git a/examples/large_scale_learning_problem_generation.py b/examples/large_scale_learning_problem_generation.py
@@ -0,0 +1,103 @@
+import requests
+from requests import Response
+from requests.exceptions import RequestException, JSONDecodeError
+from owlapy.converter import owl_expression_to_sparql
+from owlapy.parser import DLSyntaxParser
+from ontolearn.triple_store import TripleStoreKnowledgeBase
+import random
+import numpy as np
+import json
+
+random.seed(42)
+np.random.seed(42)
+
+sparql_endpoint = "https://dbpedia.data.dice-research.org/sparql"
+
+rdfs_prefix = "PREFIX  rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n "
+
+namespace = "http://dbpedia.org/ontology/"
+
+dls_parser = DLSyntaxParser(namespace=namespace)
+
+kb = TripleStoreKnowledgeBase(url=sparql_endpoint)
+
+#print(kb.ontology)
+
+selected_concepts_str = ['http://dbpedia.org/ontology/Journalist', 'http://dbpedia.org/ontology/HistoricPlace', 'http://dbpedia.org/ontology/Lipid', 'http://dbpedia.org/ontology/Profession', 'http://dbpedia.org/ontology/Model', 'http://dbpedia.org/ontology/President', 'http://dbpedia.org/ontology/Academic', 'http://dbpedia.org/ontology/Actor', 'http://dbpedia.org/ontology/Place', 'http://dbpedia.org/ontology/FootballMatch']
+
+def query_func(query):
+    try:
+        response = requests.post(sparql_endpoint, data={"query": query}, timeout=300)
+    except RequestException as e:
+        raise RequestException(
+            f"Make sure the server is running on the `triplestore_address` = '{sparql_endpoint}'"
+            f". Check the error below:"
+            f"\n  -->Error: {e}"
+        )
+
+    json_results = response.json()
+    vars_ = list(json_results["head"]["vars"])
+    inds = []
+    for b in json_results["results"]["bindings"]:
+        val = []
+        for v in vars_:
+            if b[v]["type"] == "uri":
+                val.append(b[v]["value"])
+        inds.extend(val)
+
+    if inds:
+        yield from inds
+    else:
+        yield None
+
+
+def generate_lps():
+    pass
+
+if __name__ == "__main__":
+    all_obj_props = list(kb.ontology.object_properties_in_signature())
+
+    all_lps = []
+
+    for i in range(200):
+        connectors = ['⊔', '⊓']
+        neg = "¬"
+        quantifiers = ['∃', '∀']
+
+        expression = f"<{random.choice(selected_concepts_str)}> {random.choice(connectors)} <{random.choice(selected_concepts_str)}>"
+
+        if random.random() > 0.9:
+            expression = f"{neg}{expression}"
+
+        if random.random() > 0.8:
+            expression = f"{random.choice(quantifiers)} <{random.choice(all_obj_props).str}>.({expression})"
+
+        neg_expression = neg + f"({expression})"
+        concept = dls_parser.parse(expression)
+        concept_neg = dls_parser.parse(neg_expression)
+
+        sparql_query = owl_expression_to_sparql(concept) + "\nLIMIT 100"
+        sparql_query_neg = owl_expression_to_sparql(concept_neg) + "\nLIMIT 100"
+
+        print(sparql_query)
+        print("\nNeg query")
+        print(sparql_query_neg)
+
+        pos_inds = list(query_func(sparql_query))
+        neg_inds = list(query_func(sparql_query_neg))
+
+        if len(pos_inds) <= 1 or len(neg_inds) <= 1:
+            continue
+
+        if pos_inds and neg_inds:
+            lp = {"target expression": expression,
+                   "examples": {"positive examples": pos_inds,
+                                "negative examples": neg_inds}
+              }
+
+            all_lps.append(lp)
+
+    with open("Large_scale_lps.json", "w") as f:
+        json.dump(all_lps, f)
+
+
diff --git a/ontolearn/binders.py b/ontolearn/binders.py
@@ -47,10 +47,13 @@ class DLLearnerBinder:
     """
 
     def __init__(self, binary_path=None, model=None, kb_path=None, storage_path=".", max_runtime=3):
-        assert binary_path, f"binary_path must be given {binary_path}"
-        assert os.path.exists(binary_path), f"binary path {binary_path} does not exist"
-        assert model, "model must be given"
-        assert kb_path, "kb_path must be given"
+        try:
+            assert binary_path
+            assert model
+            assert kb_path
+        except AssertionError:
+            print(f'binary_path:{binary_path}, model:{model}, kb_path{kb_path} cannot be None')
+            raise
         self.binary_path = binary_path
         self.kb_path = kb_path
         self.name = model
@@ -62,7 +65,7 @@ def __init__(self, binary_path=None, model=None, kb_path=None, storage_path=".",
         self.best_predictions = None
         self.config_name_identifier = None
 
-    def write_dl_learner_config(self, pos: List[str], neg: List[str]) -> str:
+    def write_dl_learner_config(self, pos: List[str], neg: List[str], use_sparql=False) -> str:
         """Writes config file for dl-learner.
 
         Args:
@@ -95,14 +98,24 @@ def write_dl_learner_config(self, pos: List[str], neg: List[str]) -> str:
         Text.append("// knowledge source definition")
 
         Text.append("cli.type = \"org.dllearner.cli.CLI\"")
-        Text.append("ks.type = \"OWL File\"")
         Text.append("\n")
 
         Text.append("// knowledge source definition")
-        Text.append(
+        if use_sparql:
+            Text.append(
+            "ks.url = \"" + self.kb_path + '\"')
+            Text.append("ks.type = \"SPARQL endpoint\"")
+            Text.append("reasoner.type = \"SPARQL Reasoner\"")
+            Text.append("op.type = \"tdtop\"")
+        else:
+            Text.append(
             "ks.fileName = \"" + self.kb_path + '\"')
-        Text.append("\n")
-        Text.append("reasoner.type = \"closed world reasoner\"")
+            Text.append("ks.type = \"OWL File\"")
+            Text.append("reasoner.type = \"closed world reasoner\"")
+            Text.append("op.type = \"rho\"")
+            Text.append("op.useNumericDatatypes = \"false\"")
+            Text.append("op.useCardinalityRestrictions = \"false\"")
+
         Text.append("reasoner.sources = { ks }")
         Text.append("\n")
 
@@ -115,10 +128,6 @@ def write_dl_learner_config(self, pos: List[str], neg: List[str]) -> str:
         Text.append("\n")
         Text.append("alg.writeSearchTree = \"true\"")
 
-        Text.append("op.type = \"rho\"")
-        Text.append("op.useNumericDatatypes = \"false\"")
-        Text.append("op.useCardinalityRestrictions = \"false\"")
-
         if self.name == 'celoe':
             Text.append("alg.type = \"celoe\"")
             Text.append("alg.stopOnFirstDefinition = \"true\"")
@@ -143,7 +152,7 @@ def write_dl_learner_config(self, pos: List[str], neg: List[str]) -> str:
                 wb.write("\n".encode("utf-8"))
         return pathToConfig
 
-    def fit(self, lp: PosNegLPStandard, max_runtime: int = None):
+    def fit(self, lp: PosNegLPStandard, max_runtime: int = None, use_sparql=False):
         """Fit dl-learner model on a given positive and negative examples.
 
         Args:
@@ -159,7 +168,8 @@ def fit(self, lp: PosNegLPStandard, max_runtime: int = None):
             self.max_runtime = max_runtime
 
         pathToConfig = self.write_dl_learner_config(pos=[i.str for i in lp.pos],
-                                                    neg=[i.str for i in lp.neg])
+                                                    neg=[i.str for i in lp.neg],
+                                                   use_sparql=use_sparql)
         total_runtime = time.time()
         res = subprocess.run([self.binary_path, pathToConfig], capture_output=True, universal_newlines=True)
         total_runtime = round(time.time() - total_runtime, 3)

diff --git a/ontolearn/triple_store.py b/ontolearn/triple_store.py
@@ -291,7 +291,7 @@ def __repr__(self):
 
 
 class TripleStoreReasoner(AbstractOWLReasoner):
-    __slots__ = "ontology"
+    #__slots__ = "ontology"
 
     def __init__(self, ontology: TripleStoreOntology):
         self.ontology = ontology