-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Large scale cel #495
base: develop
Are you sure you want to change the base?
Large scale cel #495
Changes from 16 commits
ab62eaf
8a823ea
a95e426
a58496c
fce0793
d70054c
79cbb53
5f61bfe
bd3bb0f
8c12444
c2b07e5
f747b0a
9b29a4b
98b6cb0
0d135c2
4ae9527
dff6d96
32f420a
050ee3a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from ontolearn.binders import DLLearnerBinder | ||
from owlapy.iri import IRI | ||
from owlapy.owl_individual import OWLNamedIndividual | ||
import json | ||
|
||
from ontolearn.learning_problem import PosNegLPStandard | ||
|
||
# (1) SPARQL endpoint as knowledge source: supported only by DL-Learner-1.4.0 | ||
kb_path = "https://dbpedia.data.dice-research.org/sparql" | ||
|
||
# To download DL-learner, https://github.com/SmartDataAnalytics/DL-Learner/releases. | ||
dl_learner_binary_path = "./dllearner-1.4.0/bin/cli" | ||
|
||
# (2) Read learning problem file | ||
with open("./LPs/DBpedia2022-12/lps.json") as f: | ||
lps = json.load(f) | ||
# (3) Start class expression learning | ||
for i, item in enumerate(lps): | ||
print(f"\nLP {i+1}/{len(lps)} ==> Target expression: ", item["target expression"], "\n") | ||
lp = PosNegLPStandard(pos=set(list(map(OWLNamedIndividual,map(IRI.create, item["examples"]["positive examples"])))), | ||
neg=set(list(map(OWLNamedIndividual,map(IRI.create, item["examples"]["negative examples"]))))) | ||
|
||
celoe = DLLearnerBinder(binary_path=dl_learner_binary_path, kb_path=kb_path, model='celoe') | ||
print("\nStarting class expression learning with DL-Learner") | ||
best_pred_celoe = celoe.fit(lp, use_sparql=True).best_hypothesis() | ||
print("\nLearned expression: ", best_pred_celoe) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import json, os, sys | ||
from owlapy.owl_individual import OWLNamedIndividual, IRI | ||
from ontolearn.learners import Drill, TDL | ||
from ontolearn.learning_problem import PosNegLPStandard | ||
from ontolearn.triple_store import TripleStore | ||
from ontolearn.utils.static_funcs import save_owl_class_expressions | ||
from owlapy.render import DLSyntaxObjectRenderer | ||
|
||
if len(sys.argv) < 2: | ||
print("You need to provide the model name; either tdl or drill") | ||
sys.exit(1) | ||
|
||
model_name = sys.argv[1] | ||
assert model_name.lower() in ["drill", "tdl"], "Currently, only Drill and TDL are supported" | ||
|
||
# (1) Initialize knowledge source with TripleStore | ||
kb = TripleStore(url="https://dbpedia.data.dice-research.org/sparql") | ||
# (2) Initialize a DL renderer. | ||
renderer = DLSyntaxObjectRenderer() | ||
# (3) Initialize a learner. | ||
model = Drill(knowledge_base=kb, max_runtime=240) if model_name.lower() == "drill" else TDL(knowledge_base=kb) | ||
# (4) Solve learning problems | ||
with open("./LPs/DBpedia2022-12/lps.json") as f: | ||
lps = json.load(f) | ||
for i, item in enumerate(lps): | ||
print("\nTarget expression: ", item["target expression"], "\n") | ||
lp = PosNegLPStandard(pos=set(list(map(OWLNamedIndividual,map(IRI.create, item["examples"]["positive examples"])))), | ||
neg=set(list(map(OWLNamedIndividual,map(IRI.create, item["examples"]["negative examples"]))))) | ||
# (5) Learn description logic concepts best fitting | ||
h = model.fit(learning_problem=lp).best_hypotheses() | ||
str_concept = renderer.render(h) | ||
print("Concept:", str_concept) # e.g. ∃ predecessor.WikicatPeopleFromBerlin | ||
# (6) Save e.g., ∃ predecessor.WikicatPeopleFromBerlin into disk | ||
if not os.path.exists(f"./learned_owl_expressions_{model_name}"): | ||
os.mkdir(f"./learned_owl_expressions_{model_name}") | ||
save_owl_class_expressions(expressions=h, path=f"./learned_owl_expressions_{model_name}/owl_prediction_{i}") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from ontolearn.lp_generator import LPGen | ||
|
||
PATH = 'https://dbpedia.data.dice-research.org/sparql' | ||
STORAGE_DIR = 'DBpedia_LPs' | ||
|
||
def generate_lps(): | ||
lp_gen = LPGen(kb_path=PATH, storage_dir=STORAGE_DIR, refinement_expressivity=1e-7, use_triple_store=True, sample_fillers_count=1, num_sub_roots=1) | ||
lp_gen.generate() | ||
|
||
if __name__ == '__main__': | ||
generate_lps() |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think that there is a need to merge a script into devl branch that doesn't work. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Removed this file |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
import requests | ||
from requests import Response | ||
from requests.exceptions import RequestException, JSONDecodeError | ||
from owlapy.converter import owl_expression_to_sparql | ||
from owlapy.parser import DLSyntaxParser | ||
from ontolearn.triple_store import TripleStoreKnowledgeBase | ||
import random | ||
import numpy as np | ||
import json | ||
|
||
random.seed(42) | ||
np.random.seed(42) | ||
|
||
sparql_endpoint = "https://dbpedia.data.dice-research.org/sparql" | ||
|
||
rdfs_prefix = "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n " | ||
|
||
namespace = "http://dbpedia.org/ontology/" | ||
|
||
dls_parser = DLSyntaxParser(namespace=namespace) | ||
|
||
kb = TripleStoreKnowledgeBase(url=sparql_endpoint) | ||
|
||
#print(kb.ontology) | ||
|
||
selected_concepts_str = ['http://dbpedia.org/ontology/Journalist', 'http://dbpedia.org/ontology/HistoricPlace', 'http://dbpedia.org/ontology/Lipid', 'http://dbpedia.org/ontology/Profession', 'http://dbpedia.org/ontology/Model', 'http://dbpedia.org/ontology/President', 'http://dbpedia.org/ontology/Academic', 'http://dbpedia.org/ontology/Actor', 'http://dbpedia.org/ontology/Place', 'http://dbpedia.org/ontology/FootballMatch'] | ||
|
||
def query_func(query): | ||
try: | ||
response = requests.post(sparql_endpoint, data={"query": query}, timeout=300) | ||
except RequestException as e: | ||
raise RequestException( | ||
f"Make sure the server is running on the `triplestore_address` = '{sparql_endpoint}'" | ||
f". Check the error below:" | ||
f"\n -->Error: {e}" | ||
) | ||
|
||
json_results = response.json() | ||
vars_ = list(json_results["head"]["vars"]) | ||
inds = [] | ||
for b in json_results["results"]["bindings"]: | ||
val = [] | ||
for v in vars_: | ||
if b[v]["type"] == "uri": | ||
val.append(b[v]["value"]) | ||
inds.extend(val) | ||
|
||
if inds: | ||
yield from inds | ||
else: | ||
yield None | ||
|
||
|
||
def generate_lps(): | ||
pass | ||
|
||
if __name__ == "__main__": | ||
all_obj_props = list(kb.ontology.object_properties_in_signature()) | ||
|
||
all_lps = [] | ||
|
||
for i in range(200): | ||
connectors = ['⊔', '⊓'] | ||
neg = "¬" | ||
quantifiers = ['∃', '∀'] | ||
|
||
expression = f"<{random.choice(selected_concepts_str)}> {random.choice(connectors)} <{random.choice(selected_concepts_str)}>" | ||
|
||
if random.random() > 0.9: | ||
expression = f"{neg}{expression}" | ||
|
||
if random.random() > 0.8: | ||
expression = f"{random.choice(quantifiers)} <{random.choice(all_obj_props).str}>.({expression})" | ||
|
||
neg_expression = neg + f"({expression})" | ||
concept = dls_parser.parse(expression) | ||
concept_neg = dls_parser.parse(neg_expression) | ||
|
||
sparql_query = owl_expression_to_sparql(concept) + "\nLIMIT 100" | ||
sparql_query_neg = owl_expression_to_sparql(concept_neg) + "\nLIMIT 100" | ||
|
||
print(sparql_query) | ||
print("\nNeg query") | ||
print(sparql_query_neg) | ||
|
||
pos_inds = list(query_func(sparql_query)) | ||
neg_inds = list(query_func(sparql_query_neg)) | ||
|
||
if len(pos_inds) <= 1 or len(neg_inds) <= 1: | ||
continue | ||
|
||
if pos_inds and neg_inds: | ||
lp = {"target expression": expression, | ||
"examples": {"positive examples": pos_inds, | ||
"negative examples": neg_inds} | ||
} | ||
|
||
all_lps.append(lp) | ||
|
||
with open("Large_scale_lps.json", "w") as f: | ||
json.dump(all_lps, f) | ||
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Your changes in the init decrease the quality of code by
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Resolved, see below |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please remove these lines. There is no need to repeat exact same commands multiple times
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should I remove all three commands? From line 121 onwards.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. I removed the three commands