Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixes pipeline issue 380 by editing the header, taxon, type of the GP… #682

Merged
merged 3 commits into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions ontobio/io/entityparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,10 @@ def gpi_version(self) -> str:
else:
return self.default_version

def parse_line(self, line):
def parse_line(self, line) -> (str, List[Dict]):
"""Parses a single line of a GPI.

Return a tuple `(processed_line, entities)`. Typically
Return a tuple `(processed_line, entities)`. Typically,
there will be a single entity, but in some cases there
may be none (invalid line) or multiple (disjunctive clause in
annotation extensions)
Expand Down Expand Up @@ -301,6 +301,7 @@ def line_as_entity_subject(self, line: str):
for entity in entity_dicts:
entity_types = []
if self.gpi_version() == "2.0":

entity_types = [association.Curie.from_str(t) for t in entity["type"]]
if any(c.is_error() for c in entity_types):
logger.error("Skipping `{}` due to malformed CURIE in entity type: `{}`".format(line, entity["type"]))
Expand Down
34 changes: 21 additions & 13 deletions ontobio/io/entitywriter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
"""
Classes for exporting entities.

So far only one implementation
"""
"""Classes for exporting entities."""
import re
from datetime import datetime

from ontobio.model.association import map_gp_type_label_to_curie

external_taxon = re.compile("taxon:([0-9]+)")
internal_taxon = re.compile("NCBITaxon:([0-9]+)")


def stringify(s):
if s is None:
Expand All @@ -13,8 +16,6 @@ def stringify(s):
else:
return s

external_taxon = re.compile("taxon:([0-9]+)")
internal_taxon = re.compile("NCBITaxon:([0-9]+)")

def normalize_taxon(taxon):
global internal_taxon
Expand Down Expand Up @@ -100,6 +101,8 @@ def __init__(self, file=None, version=None):
if self.file:
if self.version == "2.0":
self.file.write("!gpi-version: 2.0\n")
self.file.write("!date_generated: " + datetime.now().strftime("%Y-%m-%dT%H:%M") + "\n")
self.file.write("!generated_by: GO Central\n")
else:
self.file.write("!gpi-version: 1.2\n")

Expand Down Expand Up @@ -140,14 +143,19 @@ def write_entity(self, entity):

"""

taxon = entity.get("taxon").get("id")
if normalize_taxon(taxon).startswith("taxon:"):
taxon = taxon.replace("taxon:", "NCBITaxon:")

if self.version == "2.0":
vals = [
entity.get('id'), # DB_Object_ID
entity.get('label'), # DB_Object_symbol
entity.get('full_name'), # DB_Object_Name
entity.get('synonyms'), # DB_Object_Synonyms
entity.get('type'), # DB_Object_Type
normalize_taxon(entity.get("taxon").get("id")), # DB_Object_Taxon
# GPI spec says this is single valued, GpiParser returns list, so take the first element here.
str(map_gp_type_label_to_curie(entity.get('type')[0])), # DB_Object_Type to curie vs. label
taxon, # DB_Object_Taxon, normalized to NCBITaxon prefix
"", # Encoded_by
entity.get('parents'), # Parent_Protein
"", # Protein_Containing_Complex_Members
Expand All @@ -160,10 +168,10 @@ def write_entity(self, entity):
prefix, # DB
local_id, # DB_Object_ID
entity.get('label'), # DB_Object_Symbol
entity.get('full_name'), # DB_Object_Symbol
entity.get('synonyms'), # DB_Object_Name
entity.get('type'), # DB_Object_Synonyms
normalize_taxon(entity.get("taxon").get("id")), # taxon
entity.get('full_name'), # DB_Object_Full_Name
entity.get('synonyms'), # DB_Object_Synonyms
entity.get('type'), # DB_Object_Type
normalize_taxon(entity.get("taxon").get("id")), # taxon in gpi 1.2 was prefixed by `taxon:`
entity.get('parents'), # Parent_Object_ID
entity.get('xrefs'), # DB_Xref(s)
entity.get('properties') # Properties
Expand Down
Loading
Loading