Skip to content

Commit

Permalink
Merge pull request #8 from Lucaterre/experiment_#4_one-shot_infos
Browse files Browse the repository at this point in the history
Experiment #4 one shot infos
  • Loading branch information
Lucaterre authored Jul 8, 2022
2 parents 39c894a + 2e3c3a8 commit ed1c74d
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 11 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,7 @@ dmypy.json

# others files
test.py
test_universe.json
test_universe.json

LEGACY/
datatest/
23 changes: 13 additions & 10 deletions spacyfishing/entity_fishing_linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def process_response(response: requests.models.Response) -> Tuple[dict, dict]:
return res_json, metadata

@staticmethod
def prepare_data(text: str, terms: str, entities: list, language: dict) -> dict:
def prepare_data(text: str, terms: str, entities: list, language: dict, full: bool = False) -> dict:
"""Preprocess data before call Entity-Fishing service.
Parameters:
Expand All @@ -181,6 +181,7 @@ def prepare_data(text: str, terms: str, entities: list, language: dict) -> dict:
e.g. "ONU Barack Obama president ...".
entities (list): Specific entities to disambiguate.
language (dict): Type of language.
full (bool): Retrieve extra information or not on entity. Defaults to `False`.
Returns:
dict (dict): data ready to send.
Expand All @@ -198,7 +199,8 @@ def prepare_data(text: str, terms: str, entities: list, language: dict) -> dict:
} for ent in entities
],
"mentions": [],
"customisation": "generic"
"customisation": "generic",
"full": "true" if full else "false"
})
}

Expand All @@ -216,20 +218,21 @@ def updated_entities(self, doc: Doc, response: list) -> None:
span = doc[entity['offsetStart']:entity['offsetEnd']]
try:
span._.kb_qid = str(entity['wikidataId'])
span._.url_wikidata = self.wikidata_url_base + span._.kb_qid
except KeyError:
pass
try:
span._.wikipedia_page_ref = str(entity["wikipediaExternalRef"])
# if flag + wikipediaextref => search extra infos
# if flag_extra : search other info on entity
# => attach extra entity info to span
if self.flag_extra:
self.look_extra_informations_on_entity(span)
self.look_extra_informations_on_entity(span, entity)
except KeyError:
pass
try:
span._.nerd_score = entity['confidence_score']
except KeyError:
pass
span._.url_wikidata = self.wikidata_url_base + span._.kb_qid

# ~ Entity-fishing call service methods ~:

Expand Down Expand Up @@ -263,16 +266,15 @@ def disambiguate_text(self, files: dict) -> requests.Response:
files=files,
verbose=self.verbose)

def look_extra_informations_on_entity(self, span: Span) -> None:
"""Find and attach to span extra information:
def look_extra_informations_on_entity(self, span: Span, res_desc: dict) -> None:
"""Attach to span extra information:
normalised term name, description, description source,
others identifiers (statements attach to QID).
Parameters:
span (Span): spaCy span object where attach extra information.
res_desc (dict): dict that contains extra information on entity.
"""
req_desc = self.concept_look_up(span._.wikipedia_page_ref)
res_desc = req_desc.json()
# normalised term name
try:
span._.normal_term = res_desc['preferredTerm']
Expand Down Expand Up @@ -335,7 +337,8 @@ def main_disambiguation_process(self,
data_to_post = self.prepare_data(text=text,
terms=terms,
entities=entities,
language=self.language)
language=self.language,
full=self.flag_extra)
req = self.disambiguate_text(files=data_to_post)
res, metadata = self.process_response(response=req)
try:
Expand Down

0 comments on commit ed1c74d

Please sign in to comment.