Skip to content

Commit

Permalink
Edges from complexes are now represented as edges from the individual…
Browse files Browse the repository at this point in the history
… components of the complex, with a 'comple_context' edge property to record the complex name the edge was derived from.
  • Loading branch information
beaslejt committed Oct 3, 2023
1 parent 08aa891 commit 7d15da1
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 35 deletions.
92 changes: 69 additions & 23 deletions parsers/Reactome/src/loadReactome.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,12 @@
# GenomeEncodedEntity: EntityWITHACCESSIONEDSEQUENCE(Protein), EntityWITHACCESSIONEDSEQUENCE(Gene and transcript), EntityWITHACCESSIONEDSEQUENCE(DNA), EntityWITHACCESSIONEDSEQUENCE(RNA)
CROSS_MAPPING = ('EntityWithAccessionedSequence','GenomeEncodedEntity', 'SimpleEntity', 'Drug', 'Complex', 'Polymer')

TO_WRITE = ('Provenance/Include', 'Attribute/Include') #Descriptive features of other existing nodes eg Summation
TO_WRITE = ('Provenance/Include','Attribute/Include') #Descriptive features of other existing nodes eg Summation
TO_MAP = ('IDMapping/Include', ) # Maps the external identifier of a node from another node
TO_INCLUDE = ('Include',)
RDF_EDGES_TO_INCLUDE = ('RDF_edges/Include',)
MOLE_COMPLEX = ('Include/Complex',) #
TO_SWITCH_MOLE_COMPLEX = ('Include/SwitchSO/Complex',) #
TO_SWITCH = ('Include/SwitchSO', )

##############
Expand Down Expand Up @@ -173,6 +175,18 @@ def extract_data(self, neo4j_driver) -> dict:
f"AND any(x in labels(b) WHERE x in ['Complex','GenomeEncodedEntity','EntityWithAccessionedSequence']) " \
f"RETURN a, labels(a) as a_labels, id(a) as a_id, type(r) as r_type, b, labels(b) as b_labels, id(b) as b_id, labels(d) as regulationType"
queries_to_include.append(cypher_query)
elif line[INCLUDE_COLUMN] in TO_SWITCH_MOLE_COMPLEX:
cypher_query = f"MATCH (b)<-[r:hasComponent]-(c:{line[SUBJECT_COLUMN]})-[r1:{line[PREDICATE_COLUMN]}]->(a:{line[OBJECT_COLUMN]}) " \
f"RETURN a, labels(a) as a_labels, id(a) as a_id, type(r) as r_type, b, labels(b) as b_labels, id(b) as b_id, c.name as complex_context"
# Remove if map_ids is not needed.
#cypher_query = self.map_ids(line[SUBJECT_COLUMN], line[PREDICATE_COLUMN], line[OBJECT_COLUMN], switch=True)
queries_to_include.append(cypher_query)
elif line[INCLUDE_COLUMN] in MOLE_COMPLEX:
cypher_query = f"MATCH (a)<-[r:hasComponent]-(c:{line[SUBJECT_COLUMN]})-[r1:{line[PREDICATE_COLUMN]}]->(b:{line[OBJECT_COLUMN]}) " \
f"RETURN a, labels(a) as a_labels, id(a) as a_id, type(r) as r_type, b, labels(b) as b_labels, id(b) as b_id, c.name as complex_context"
# Remove if map_ids is not needed.
#cypher_query = self.map_ids(line[SUBJECT_COLUMN], line[PREDICATE_COLUMN], line[OBJECT_COLUMN], switch=True)
queries_to_include.append(cypher_query)
elif line[INCLUDE_COLUMN] in TO_SWITCH:
cypher_query = f"MATCH (b:{line[SUBJECT_COLUMN]})-[r:{line[PREDICATE_COLUMN]}]->(a:{line[OBJECT_COLUMN]}) " \
f"RETURN a, labels(a) as a_labels, id(a) as a_id, type(r) as r_type, b, labels(b) as b_labels, id(b) as b_id"
Expand Down Expand Up @@ -238,7 +252,7 @@ def extract_data(self, neo4j_driver) -> dict:
def get_reference_entity_mapping(self, neo4j_session):
reference_entity_mapping = {}
# The following line excludes Pathways from ID mapping because we only want to map them to GO terms, like 2 lines below.
reference_entity_query = "MATCH (a)-[:referenceEntity|crossReference]->(b) WHERE NOT('Pathway' in labels(a)) return id(a) as identity, b as reference, labels(b) as ref_labels"
reference_entity_query = "MATCH (a)-[r:referenceEntity|crossReference]->(b) WHERE NOT('Pathway' in labels(a)) return id(a) as identity, b as reference, labels(b) as ref_labels"
goBioProcess_query = "MATCH (a:Pathway)-[r:goBiologicalProcess]->(b:GO_Term) WHERE replace(toLower(a.displayName),'-',' ') = replace(toLower(b.displayName),'-',' ') return id(a) as identity, b as reference, labels(b) as ref_labels"
reference_entity_result = neo4j_session.run(reference_entity_query)
goBioProcess_query = neo4j_session.run(goBioProcess_query)
Expand Down Expand Up @@ -289,11 +303,20 @@ def write_neo4j_result_to_file(self, result: neo4j.Result, reference_entity_mapp
if node_a_id and node_b_id:
if "regulationType" in record_data.keys():
if any("positive" in x.lower() for x in record_data['regulationType']):
self.process_edge_from_neo4j(node_a_id, record_data['r_type'], node_b_id, regulationType='positive')
if "complex_context" in record_data.keys():
self.process_edge_from_neo4j(node_a_id, record_data['r_type'], node_b_id, regulationType='positive', complex_context=record_data['complex_context'])
else:
self.process_edge_from_neo4j(node_a_id, record_data['r_type'], node_b_id, regulationType='positive')
elif any("negative" in x.lower() for x in record_data['regulationType']):
self.process_edge_from_neo4j(node_a_id, record_data['r_type'], node_b_id, regulationType='negative')
if "complex_context" in record_data.keys():
self.process_edge_from_neo4j(node_a_id, record_data['r_type'], node_b_id, regulationType='negative', complex_context=record_data['complex_context'])
else:
self.process_edge_from_neo4j(node_a_id, record_data['r_type'], node_b_id, regulationType='negative')
else:
self.process_edge_from_neo4j(node_a_id, record_data['r_type'], node_b_id, regulationType=None)
if "complex_context" in record_data.keys():
self.process_edge_from_neo4j(node_a_id, record_data['r_type'], node_b_id, regulationType=None, complex_context=record_data['complex_context'])
else:
self.process_edge_from_neo4j(node_a_id, record_data['r_type'], node_b_id, regulationType=None)
record_count += 1
else:
skipped_record_count += 1
Expand Down Expand Up @@ -380,32 +403,55 @@ def process_node_from_neo4j(self, reference_entity_mapping, node_identity, node:
self.dbid_to_node_id_lookup[node['dbId']] = node_id
"""

def process_edge_from_neo4j(self, subject_id: str, relationship_type: str, object_id: str, regulationType=None):
def process_edge_from_neo4j(self, subject_id: str, relationship_type: str, object_id: str, regulationType=None, complex_context=None):
predicate = PREDICATE_MAPPING.get(relationship_type, None)
if predicate:
if regulationType == None:
output_edge = kgxedge(
subject_id=subject_id,
object_id=object_id,
predicate=predicate,
primary_knowledge_source=self.provenance_id
)
if complex_context != None:
output_edge = kgxedge(
subject_id=subject_id,
object_id=object_id,
predicate=predicate,
edgeprops = {'complex_context':complex_context},
primary_knowledge_source=self.provenance_id
)
else:
output_edge = kgxedge(
subject_id=subject_id,
object_id=object_id,
predicate=predicate,
primary_knowledge_source=self.provenance_id
)
else:
if regulationType == "positive":
direction = 'increases'
elif regulationType == "negative":
direction = 'decreases'
output_edge = kgxedge(
subject_id=subject_id,
object_id=object_id,
predicate=predicate,
edgeprops={
'qualified_predicate':'biolink:causes',
'object_direction_qualifier':direction,
'object_aspect_qualifier':'expression'
},
primary_knowledge_source=self.provenance_id
)
if complex_context != None:
output_edge = kgxedge(
subject_id=subject_id,
object_id=object_id,
predicate=predicate,
edgeprops={
'qualified_predicate':'biolink:causes',
'object_direction_qualifier':direction,
'object_aspect_qualifier':'expression',
'complex_context':complex_context
},
primary_knowledge_source=self.provenance_id
)
else:
output_edge = kgxedge(
subject_id=subject_id,
object_id=object_id,
predicate=predicate,
edgeprops={
'qualified_predicate':'biolink:causes',
'object_direction_qualifier':direction,
'object_aspect_qualifier':'expression',
},
primary_knowledge_source=self.provenance_id
)
self.output_file_writer.write_kgx_edge(output_edge)
else:
self.logger.warning(f'A predicate could not be mapped for relationship type {relationship_type}')
Expand Down
16 changes: 4 additions & 12 deletions parsers/Reactome/src/reactomeContents_CriticalTriples.csv
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,10 @@ EntityWithAccessionedSequence,cellType,CellType,Include,
Drug,cellType,CellType,Include,
Pathway,disease,Disease,Include/SwitchSO,
ReactionLikeEvent,disease,Disease,Include/SwitchSO,
Complex,hasComponent,EntityWithAccessionedSequence,Include,
Complex,species,Species,Include,
Complex,compartment,GO_Term,Include,
Complex,hasComponent,Complex,Include,
ReactionLikeEvent,output,Complex,Include,
ReactionLikeEvent,input,Complex,Include,
Complex,hasComponent,SimpleEntity,Include,
Complex,hasComponent,GenomeEncodedEntity,Include,
Complex,includedLocation,GO_Term,Include,
Complex,disease,Disease,Include/SwitchSO,
Complex,hasComponent,Polymer,Include,
Regulation,regulator,Complex,RDF_edges/Include,
ReactionLikeEvent,output,Complex,Include/Complex,
ReactionLikeEvent,input,Complex,Include/Complex,
Complex,disease,Disease,Include/SwitchSO/Complex,
Regulation,regulator,Complex,Include/Complex,
Regulation,regulator,EntityWithAccessionedSequence,RDF_edges/Include,
Regulation,regulator,SimpleEntity,RDF_edges/Include,
Regulation,regulator,Drug,RDF_edges/Include,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
Subject,Predicate,Object,Decision,Notes:
Pathway,hasEvent,ReactionLikeEvent,Include,
ReactionLikeEvent,input,SimpleEntity,Include,
ReactionLikeEvent,output,SimpleEntity,Include,
ReactionLikeEvent,precedingEvent,ReactionLikeEvent,Include,
ReactionLikeEvent,input,EntityWithAccessionedSequence,Include,
ReactionLikeEvent,output,EntityWithAccessionedSequence,Include,
ReactionLikeEvent,input,GenomeEncodedEntity,Include,
ReactionLikeEvent,output,GenomeEncodedEntity,Include,
Pathway,hasEvent,Pathway,Include,
ReactionLikeEvent,compartment,GO_Term,Include,
Pathway,compartment,GO_Term,Include,
Pathway,goBiologicalProcess,GO_BiologicalProcess,Include,
ReactionLikeEvent,goBiologicalProcess,GO_BiologicalProcess,Include,
ReactionLikeEvent,precedingEvent,ReactionLikeEvent,Include,
Pathway,precedingEvent,Pathway,Include,
ReactionLikeEvent,input,Polymer,Include,
ReactionLikeEvent,output,Polymer,Include,
ReactionLikeEvent,input,Drug,Include,
ReactionLikeEvent,output,Drug,Include,
SimpleEntity,cellType,CellType,Include,
EntityWithAccessionedSequence,cellType,CellType,Include,
Drug,cellType,CellType,Include,
Pathway,disease,Disease,Include/SwitchSO,
ReactionLikeEvent,disease,Disease,Include/SwitchSO,
Complex,hasComponent,EntityWithAccessionedSequence,Include,
Complex,species,Species,Include,
Complex,compartment,GO_Term,Include,
Complex,hasComponent,Complex,Include,
ReactionLikeEvent,output,Complex,Include,
ReactionLikeEvent,input,Complex,Include,
Complex,hasComponent,SimpleEntity,Include,
Complex,hasComponent,GenomeEncodedEntity,Include,
Complex,includedLocation,GO_Term,Include,
Complex,disease,Disease,Include/SwitchSO,
Complex,hasComponent,Polymer,Include,
Regulation,regulator,Complex,RDF_edges/Include,
Regulation,regulator,EntityWithAccessionedSequence,RDF_edges/Include,
Regulation,regulator,SimpleEntity,RDF_edges/Include,
Regulation,regulator,Drug,RDF_edges/Include,
Regulation,regulator,GenomeEncodedEntity,RDF_edges/Include,

0 comments on commit 7d15da1

Please sign in to comment.