From 0abf23cb418ab94cc74c6d51cb231f58193e5cc5 Mon Sep 17 00:00:00 2001 From: Jon-Michael Beasley Date: Thu, 31 Oct 2024 11:30:51 -0400 Subject: [PATCH 1/3] Added script to collapse object qualifier statements to the edge predicates. --- Common/build_manager.py | 17 +++++-- Common/collapse_qualifiers.py | 91 +++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 5 deletions(-) create mode 100644 Common/collapse_qualifiers.py diff --git a/Common/build_manager.py b/Common/build_manager.py index 0f378927..f0c1d547 100644 --- a/Common/build_manager.py +++ b/Common/build_manager.py @@ -23,6 +23,7 @@ NODES_FILENAME = 'nodes.jsonl' EDGES_FILENAME = 'edges.jsonl' REDUNDANT_EDGES_FILENAME = 'redundant_edges.jsonl' +COLLAPSED_QUALIFIERS_FILENAME = 'collapsed_qualifier_edges.jsonl' class GraphBuilder: @@ -115,6 +116,17 @@ def build_graph(self, graph_id: str): output_formats = graph_spec.graph_output_format.lower().split('+') if graph_spec.graph_output_format else [] nodes_filepath = os.path.join(graph_output_dir, NODES_FILENAME) edges_filepath = os.path.join(graph_output_dir, EDGES_FILENAME) + + if 'redundant_jsonl' in output_formats: + self.logger.info(f'Generating redundant edge KG for {graph_id}...') + redundant_filepath = edges_filepath.replace(EDGES_FILENAME, REDUNDANT_EDGES_FILENAME) + generate_redundant_kg(edges_filepath, redundant_filepath) + + if 'collapsed_qualifiers_jsonl' in output_formats: + self.logger.info(f'Generating redundant edge KG for {graph_id}...') + redundant_filepath = edges_filepath.replace(EDGES_FILENAME, COLLAPSED_QUALIFIERS_FILENAME) + generate_redundant_kg(edges_filepath, redundant_filepath) + if 'neo4j' in output_formats: self.logger.info(f'Starting Neo4j dump pipeline for {graph_id}...') dump_success = create_neo4j_dump(nodes_filepath=nodes_filepath, @@ -128,11 +140,6 @@ def build_graph(self, graph_id: str): graph_output_url = self.get_graph_output_URL(graph_id, graph_version) graph_metadata.set_dump_url(f'{graph_output_url}graph_{graph_version}.db.dump') - if 'redundant_jsonl' in output_formats: - self.logger.info(f'Generating redundant edge KG for {graph_id}...') - redundant_filepath = edges_filepath.replace(EDGES_FILENAME, REDUNDANT_EDGES_FILENAME) - generate_redundant_kg(edges_filepath, redundant_filepath) - def build_dependencies(self, graph_spec: GraphSpec): for subgraph_source in graph_spec.subgraphs: subgraph_id = subgraph_source.id diff --git a/Common/collapse_qualifiers.py b/Common/collapse_qualifiers.py new file mode 100644 index 00000000..afd466ff --- /dev/null +++ b/Common/collapse_qualifiers.py @@ -0,0 +1,91 @@ +try: + from tqdm import tqdm + TQDM_AVAILABLE = True +except ImportError: + TQDM_AVAILABLE = False + +from Common.biolink_constants import OBJECT_ASPECT_QUALIFIER, OBJECT_DIRECTION_QUALIFIER, SPECIES_CONTEXT_QUALIFIER, \ + QUALIFIED_PREDICATE, PREDICATE +from Common.utils import quick_jsonl_file_iterator +from Common.kgx_file_writer import KGXFileWriter + +### The goal of this script is to collapse the qualifiers, which are in edge properties, into a single statement, then replace the +### existing predicate label with the collapsed qualifier statement. + + +# TODO - really we should get the full list of qualifiers from Common/biolink_constants.py, +# but because we currently cannot deduce the association types of edges and/or permissible value enumerators, +# we have to hard code qualifier handling anyway, we might as well check against a smaller list +QUALIFIER_KEYS = [OBJECT_DIRECTION_QUALIFIER, OBJECT_ASPECT_QUALIFIER] +# we do have these qualifiers but we cant do any redundancy with them so ignore for now: +# QUALIFIED_PREDICATE - +# SPECIES_CONTEXT_QUALIFIER - + +def write_edge_no_q(edge, predicate): + tmp_edge = edge.copy() + tmp_edge[PREDICATE] = f"{predicate}" + tmp_edge.pop(OBJECT_DIRECTION_QUALIFIER, None) + tmp_edge.pop(OBJECT_ASPECT_QUALIFIER, None) + tmp_edge.pop(QUALIFIED_PREDICATE, None) + return tmp_edge + +# +def object_direction_qualifier_semantic_adjustment(object_direction_qualifier): + object_direction_conversion_map = { + 'increased': 'increases', + 'decreased': 'decreases', + 'upregulated': 'upregulates', + 'downregulated': 'downregulates', + } + try: + object_direction_conversion = object_direction_conversion_map[object_direction_qualifier] + except KeyError: + object_direction_conversion = object_direction_qualifier + return object_direction_conversion + +def object_aspect_qualifier_semantic_adjustment(object_aspect_qualifier): + if object_aspect_qualifier.aplit('_')[-1] -- 'interaction': + object_aspect_conversion = object_aspect_qualifier + "_with" + else: + object_aspect_conversion = object_aspect_qualifier + "_of" + return object_aspect_conversion + +def generate_collapsed_qualifiers_kg(infile, edges_file_path): + + with KGXFileWriter(edges_output_file_path=edges_file_path) as kgx_file_writer: + for edge in tqdm(quick_jsonl_file_iterator(infile)) if TQDM_AVAILABLE else quick_jsonl_file_iterator(infile): + + try: + edge_predicate = edge['predicate'] + except KeyError: + print(f"Collapsed Qualifiers Graph Failed - missing predicate on edge: {edge}") + break + + # qualifiers = check_qualifier(edge) <- it would be better to do something like this but because we're not + # handling other qualifiers anyway it's faster to just do the following: + qualifiers = [qualifier for qualifier in QUALIFIER_KEYS if qualifier in edge] + + qualifier_statement = "" + + object_direction_qualifier_exists = False + # The following crafts a new collapsed qualifier statement to replace the edge predicate, but needs to do some semantic adjustment. + if OBJECT_DIRECTION_QUALIFIER in qualifiers: + object_direction_qualifier_exists = True + qualifier_statement+= object_direction_qualifier_semantic_adjustment(edge[OBJECT_DIRECTION_QUALIFIER]) + + if OBJECT_ASPECT_QUALIFIER in qualifiers: + if object_direction_qualifier_exists == True: + qualifier_statement+= "_" + else: # Don't think we'll need this, but breaking the loop if we find something unexpected. + print(f"Collapsed Qualifiers Graph Failed - missing object_direction_qualifier while trying to add object_aspect_qualifer on edge: {edge}") + break + qualifier_statement+= object_aspect_qualifier_semantic_adjustment(edge[OBJECT_ASPECT_QUALIFIER]) + + edges_to_write = [] + + # Either rewrite the original edge if no qualifier collapsing happened, or rewrite with new predicate from qualifier_statement. + if qualifier_statement != "": + edges_to_write.append(write_edge_no_q(edge, qualifier_statement)) + else: + edges_to_write(edge) + kgx_file_writer.write_normalized_edges(edges_to_write) From 0839bcc9560d6a21d37af35024049a5d164f2bf8 Mon Sep 17 00:00:00 2001 From: Jon-Michael Beasley Date: Thu, 31 Oct 2024 13:42:46 -0400 Subject: [PATCH 2/3] Updated to fix code and add option to create collapsed qualifier Neo4j dump. --- Common/build_manager.py | 39 ++++++++++++++++++++++++++++++++--- Common/collapse_qualifiers.py | 11 +++++----- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/Common/build_manager.py b/Common/build_manager.py index f0c1d547..1b199b7f 100644 --- a/Common/build_manager.py +++ b/Common/build_manager.py @@ -19,6 +19,7 @@ from Common.biolink_constants import PRIMARY_KNOWLEDGE_SOURCE, AGGREGATOR_KNOWLEDGE_SOURCES, PREDICATE, PUBLICATIONS from Common.meta_kg import MetaKnowledgeGraphBuilder, META_KG_FILENAME, TEST_DATA_FILENAME from Common.redundant_kg import generate_redundant_kg +from Common.collapse_qualifiers import generate_collapsed_qualifiers_kg NODES_FILENAME = 'nodes.jsonl' EDGES_FILENAME = 'edges.jsonl' @@ -121,11 +122,43 @@ def build_graph(self, graph_id: str): self.logger.info(f'Generating redundant edge KG for {graph_id}...') redundant_filepath = edges_filepath.replace(EDGES_FILENAME, REDUNDANT_EDGES_FILENAME) generate_redundant_kg(edges_filepath, redundant_filepath) - - if 'collapsed_qualifiers_jsonl' in output_formats: + + if 'redundant_neo4j' in output_formats: self.logger.info(f'Generating redundant edge KG for {graph_id}...') - redundant_filepath = edges_filepath.replace(EDGES_FILENAME, COLLAPSED_QUALIFIERS_FILENAME) + redundant_filepath = edges_filepath.replace(EDGES_FILENAME, REDUNDANT_EDGES_FILENAME) generate_redundant_kg(edges_filepath, redundant_filepath) + self.logger.info(f'Starting Neo4j dump pipeline for redundant {graph_id}...') + dump_success = create_neo4j_dump(nodes_filepath=nodes_filepath, + edges_filepath=redundant_filepath, + output_directory=graph_output_dir, + graph_id=graph_id, + graph_version=graph_version, + logger=self.logger) + + if dump_success: + graph_output_url = self.get_graph_output_URL(graph_id, graph_version) + graph_metadata.set_dump_url(f'{graph_output_url}graph_{graph_version}_redundant.db.dump') + + if 'collapsed_qualifiers_jsonl' in output_formats: + self.logger.info(f'Generating collapsed qualifier predicates KG for {graph_id}...') + collapsed_qualifiers_filepath = edges_filepath.replace(EDGES_FILENAME, COLLAPSED_QUALIFIERS_FILENAME) + generate_collapsed_qualifiers_kg(edges_filepath, collapsed_qualifiers_filepath) + + if 'collapsed_qualifiers_neo4j' in output_formats: + self.logger.info(f'Generating collapsed qualifier predicates KG for {graph_id}...') + collapsed_qualifiers_filepath = edges_filepath.replace(EDGES_FILENAME, COLLAPSED_QUALIFIERS_FILENAME) + generate_collapsed_qualifiers_kg(edges_filepath, collapsed_qualifiers_filepath) + self.logger.info(f'Starting Neo4j dump pipeline for {graph_id} with collapsed qualifiers...') + dump_success = create_neo4j_dump(nodes_filepath=nodes_filepath, + edges_filepath=collapsed_qualifiers_filepath, + output_directory=graph_output_dir, + graph_id=graph_id, + graph_version=graph_version, + logger=self.logger) + + if dump_success: + graph_output_url = self.get_graph_output_URL(graph_id, graph_version) + graph_metadata.set_dump_url(f'{graph_output_url}graph_{graph_version}_collapsed_qualifiers.db.dump') if 'neo4j' in output_formats: self.logger.info(f'Starting Neo4j dump pipeline for {graph_id}...') diff --git a/Common/collapse_qualifiers.py b/Common/collapse_qualifiers.py index afd466ff..af66de36 100644 --- a/Common/collapse_qualifiers.py +++ b/Common/collapse_qualifiers.py @@ -44,7 +44,8 @@ def object_direction_qualifier_semantic_adjustment(object_direction_qualifier): return object_direction_conversion def object_aspect_qualifier_semantic_adjustment(object_aspect_qualifier): - if object_aspect_qualifier.aplit('_')[-1] -- 'interaction': + # TODO check if other object aspect qualifiers besides molecular interaction need to be treated differently. + if object_aspect_qualifier.split('_')[-1] == 'molecular_interaction': object_aspect_conversion = object_aspect_qualifier + "_with" else: object_aspect_conversion = object_aspect_qualifier + "_of" @@ -76,9 +77,8 @@ def generate_collapsed_qualifiers_kg(infile, edges_file_path): if OBJECT_ASPECT_QUALIFIER in qualifiers: if object_direction_qualifier_exists == True: qualifier_statement+= "_" - else: # Don't think we'll need this, but breaking the loop if we find something unexpected. - print(f"Collapsed Qualifiers Graph Failed - missing object_direction_qualifier while trying to add object_aspect_qualifer on edge: {edge}") - break + else: # Currently, we'll just say "affects_something" if no direction is specified. + qualifier_statement+= "affects_" qualifier_statement+= object_aspect_qualifier_semantic_adjustment(edge[OBJECT_ASPECT_QUALIFIER]) edges_to_write = [] @@ -87,5 +87,6 @@ def generate_collapsed_qualifiers_kg(infile, edges_file_path): if qualifier_statement != "": edges_to_write.append(write_edge_no_q(edge, qualifier_statement)) else: - edges_to_write(edge) + edges_to_write.append(edge) + kgx_file_writer.write_normalized_edges(edges_to_write) From 7bbdc00c1a5d7919bfab1ddda315cafdafa758b3 Mon Sep 17 00:00:00 2001 From: beasleyjonm <85600465+beasleyjonm@users.noreply.github.com> Date: Thu, 7 Nov 2024 12:35:08 -0500 Subject: [PATCH 3/3] Update collapse_qualifiers.py --- Common/collapse_qualifiers.py | 181 ++++++++++++++++++++++++---------- 1 file changed, 130 insertions(+), 51 deletions(-) diff --git a/Common/collapse_qualifiers.py b/Common/collapse_qualifiers.py index af66de36..f3563a0a 100644 --- a/Common/collapse_qualifiers.py +++ b/Common/collapse_qualifiers.py @@ -4,52 +4,53 @@ except ImportError: TQDM_AVAILABLE = False -from Common.biolink_constants import OBJECT_ASPECT_QUALIFIER, OBJECT_DIRECTION_QUALIFIER, SPECIES_CONTEXT_QUALIFIER, \ - QUALIFIED_PREDICATE, PREDICATE +from Common.biolink_constants import PREDICATE, QUALIFIED_PREDICATE, SUBJECT_DERIVATIVE_QUALIFIER, SUBJECT_FORM_OR_VARIANT_QUALIFIER, SUBJECT_PART_QUALIFIER, \ + SUBJECT_DIRECTION_QUALIFIER, SUBJECT_ASPECT_QUALIFIER, OBJECT_DERIVATIVE_QUALIFIER, OBJECT_FORM_OR_VARIANT_QUALIFIER, \ + OBJECT_PART_QUALIFIER, OBJECT_DIRECTION_QUALIFIER, OBJECT_ASPECT_QUALIFIER, CAUSAL_MECHANISM_QUALIFIER, \ + ANATOMICAL_CONTEXT_QUALIFIER, SPECIES_CONTEXT_QUALIFIER +from Common.biolink_utils import get_biolink_model_toolkit from Common.utils import quick_jsonl_file_iterator from Common.kgx_file_writer import KGXFileWriter ### The goal of this script is to collapse the qualifiers, which are in edge properties, into a single statement, then replace the ### existing predicate label with the collapsed qualifier statement. +### Call the biolink model toolkit to get the list of all qualifiers. This may change, but the way qualifiers are handled is currently hard-coded in this script. +bmt = get_biolink_model_toolkit() -# TODO - really we should get the full list of qualifiers from Common/biolink_constants.py, -# but because we currently cannot deduce the association types of edges and/or permissible value enumerators, -# we have to hard code qualifier handling anyway, we might as well check against a smaller list -QUALIFIER_KEYS = [OBJECT_DIRECTION_QUALIFIER, OBJECT_ASPECT_QUALIFIER] -# we do have these qualifiers but we cant do any redundancy with them so ignore for now: -# QUALIFIED_PREDICATE - -# SPECIES_CONTEXT_QUALIFIER - - -def write_edge_no_q(edge, predicate): +def write_edge_no_q(edge, predicate, qualifiers): tmp_edge = edge.copy() tmp_edge[PREDICATE] = f"{predicate}" - tmp_edge.pop(OBJECT_DIRECTION_QUALIFIER, None) - tmp_edge.pop(OBJECT_ASPECT_QUALIFIER, None) - tmp_edge.pop(QUALIFIED_PREDICATE, None) + for qualifier in qualifiers.keys(): + tmp_edge.pop(qualifier, None) return tmp_edge -# -def object_direction_qualifier_semantic_adjustment(object_direction_qualifier): - object_direction_conversion_map = { - 'increased': 'increases', - 'decreased': 'decreases', - 'upregulated': 'upregulates', - 'downregulated': 'downregulates', - } - try: - object_direction_conversion = object_direction_conversion_map[object_direction_qualifier] - except KeyError: - object_direction_conversion = object_direction_qualifier - return object_direction_conversion - -def object_aspect_qualifier_semantic_adjustment(object_aspect_qualifier): - # TODO check if other object aspect qualifiers besides molecular interaction need to be treated differently. - if object_aspect_qualifier.split('_')[-1] == 'molecular_interaction': - object_aspect_conversion = object_aspect_qualifier + "_with" +def aspect_qualifier_semantic_adjustment(aspect_qualifier): + # TODO check if other aspect qualifiers besides molecular interaction need to be treated differently. + if aspect_qualifier.split('_')[-1] == 'interaction': + aspect_conversion = aspect_qualifier + "_with" else: - object_aspect_conversion = object_aspect_qualifier + "_of" - return object_aspect_conversion + aspect_conversion = aspect_qualifier + "_of" + return aspect_conversion + +def form_or_variant_qualifier_semantic_adjustment(form_or_variant_qualifier): + # TODO check if other form_or_variant_qualifier qualifiers besides molecular interaction need to be treated differently. + form_or_variant_conversion = form_or_variant_qualifier + "_of" + return form_or_variant_conversion + +def causal_mechanism_qualifier_semantic_adjustment(causal_mechanism_qualifier): + # TODO check if other causal_mechanism qualifiers besides molecular interaction need to be treated differently. + causal_mechanism_qualifier = "via_"+ causal_mechanism_qualifier + return causal_mechanism_qualifier + +def species_context_qualifier_semantic_adjustment(species_context_qualifier): + species_context_qualifier = "in_"+ species_context_qualifier + return species_context_qualifier + +def anatomical_context_qualifier_semantic_adjustment(anatomical_context_qualifier, species_context_qualifier=False): + if species_context_qualifier == False: + anatomical_context_qualifier = "in_"+ anatomical_context_qualifier + return anatomical_context_qualifier def generate_collapsed_qualifiers_kg(infile, edges_file_path): @@ -62,30 +63,108 @@ def generate_collapsed_qualifiers_kg(infile, edges_file_path): print(f"Collapsed Qualifiers Graph Failed - missing predicate on edge: {edge}") break - # qualifiers = check_qualifier(edge) <- it would be better to do something like this but because we're not - # handling other qualifiers anyway it's faster to just do the following: - qualifiers = [qualifier for qualifier in QUALIFIER_KEYS if qualifier in edge] + qualifiers = {key:value for key, value in edge.items() if bmt.is_qualifier(key)} + # Count the number of qualifiers and print a warning if number of qualifiers we handle in the next section doesn't match number of qualifiers detected. + # This will help warn us if new qualifiers are added in the future while giving us the option to still run the script as is. + qualifier_count = len(qualifiers.keys()) + counted_qualifiers = 0 - qualifier_statement = "" + # The following section crafts a new collapsed qualifier statement to replace the edge predicate, but needs to do some semantic adjustment. + # This is where to edit if the biolink model ever changes and handles qualifiers differently. + # Take guidance from: https://biolink.github.io/biolink-model/reading-a-qualifier-based-statement/ + # Example jsonl edge used here: {"subject":"UNII:7PK6VC94OU","predicate":"biolink:affects","object":"NCBIGene:6531","primary_knowledge_source":"infores:ctd","description":"decreases activity of","NCBITaxon":"9606","publications":["PMID:30776375"],"knowledge_level":"knowledge_assertion","agent_type":"manual_agent","subject_direction_qualifier":"increased","subject_aspect_qualifier":"abundance","subject_form_or_variant_qualifier":"mutant_form","subject_derivative_qualifier":"transcript","subject_part_qualifier":"polyA_tail","object_aspect_qualifier":"activity","object_direction_qualifier":"upregulated","object_form_or_variant_qualifier":"wildtype_form","object_derivative_qualifier":"protein","object_part_qualifier":"catalytic_site","causal_mechanism_qualifier":"phosyphorylation","species_context_qualifier":"human","anatomical_context_qualifier":"liver","qualified_predicate":"biolink:causes"} - object_direction_qualifier_exists = False - # The following crafts a new collapsed qualifier statement to replace the edge predicate, but needs to do some semantic adjustment. - if OBJECT_DIRECTION_QUALIFIER in qualifiers: - object_direction_qualifier_exists = True - qualifier_statement+= object_direction_qualifier_semantic_adjustment(edge[OBJECT_DIRECTION_QUALIFIER]) + qualifier_statement = "" - if OBJECT_ASPECT_QUALIFIER in qualifiers: - if object_direction_qualifier_exists == True: - qualifier_statement+= "_" - else: # Currently, we'll just say "affects_something" if no direction is specified. - qualifier_statement+= "affects_" - qualifier_statement+= object_aspect_qualifier_semantic_adjustment(edge[OBJECT_ASPECT_QUALIFIER]) + # Add on subject direction and aspect qualifiers first. eg. "increased_abundance_of_" + if SUBJECT_DIRECTION_QUALIFIER in qualifiers.keys(): + counted_qualifiers+= 1 + qualifier_statement+= qualifiers[SUBJECT_DIRECTION_QUALIFIER] + qualifier_statement+= "_" + if SUBJECT_ASPECT_QUALIFIER in qualifiers.keys(): + counted_qualifiers+= 1 + qualifier_statement+= aspect_qualifier_semantic_adjustment(qualifiers[SUBJECT_ASPECT_QUALIFIER]) + qualifier_statement+= "_" + # Add on subject form_or_variant qualifiers. eg. "increased_abundance_of_mutant_form_of_" + if SUBJECT_FORM_OR_VARIANT_QUALIFIER in qualifiers.keys(): + counted_qualifiers+= 1 + qualifier_statement+= form_or_variant_qualifier_semantic_adjustment(qualifiers[SUBJECT_FORM_OR_VARIANT_QUALIFIER]) + qualifier_statement+= "_" + # Add placeholder slot for subject node. eg. "increased_abundance_of_mutant_form_of_" + qualifier_statement+= "_" + # Add on subject derivative and part qualifiers. eg. "increased_abundance_of_mutant_form_of_transcript_poly_A_tail" + if SUBJECT_DERIVATIVE_QUALIFIER in qualifiers.keys(): + counted_qualifiers+= 1 + qualifier_statement+= qualifiers[SUBJECT_DERIVATIVE_QUALIFIER] + qualifier_statement+= "_" + if SUBJECT_PART_QUALIFIER in qualifiers.keys(): + counted_qualifiers+= 1 + qualifier_statement+= qualifiers[SUBJECT_PART_QUALIFIER] + qualifier_statement+= "_" + + # Add the qualified predicate. eg. "increased_abundance_of_mutant_form_of__transcript_poly_A_tail_causes" + if QUALIFIED_PREDICATE in qualifiers.keys(): + counted_qualifiers+= 1 + qualifier_statement+= qualifiers[QUALIFIED_PREDICATE].replace("biolink:","") + qualifier_statement+= "_" + + # Add on object direction and aspect qualifiers. eg. "increased_abundance_of_mutant_form_of_transcript_poly_A_tail_causes_upregulated_activity_of" + if OBJECT_DIRECTION_QUALIFIER in qualifiers.keys(): + counted_qualifiers+= 1 + qualifier_statement+= qualifiers[OBJECT_DIRECTION_QUALIFIER] + qualifier_statement+= "_" + if OBJECT_ASPECT_QUALIFIER in qualifiers.keys(): + counted_qualifiers+= 1 + qualifier_statement+= aspect_qualifier_semantic_adjustment(qualifiers[OBJECT_ASPECT_QUALIFIER]) + qualifier_statement+= "_" + # Add on object form_or_variant qualifiers. eg. "increased_abundance_of_mutant_form_of_transcript_poly_A_tail_causes_upregulated_activity_of_mutant_form_of" + if OBJECT_FORM_OR_VARIANT_QUALIFIER in qualifiers.keys(): + counted_qualifiers+= 1 + qualifier_statement+= form_or_variant_qualifier_semantic_adjustment(qualifiers[OBJECT_FORM_OR_VARIANT_QUALIFIER]) + qualifier_statement+= "_" + # Add placeholder slot for object node. eg. "increased_abundance_of_mutant_form_of_transcript_poly_A_tail_causes_upregulated_activity_of_mutant_form_of_" + qualifier_statement+= "" + + # Add on object derivative and part qualifiers. eg. "increased_abundance_of_mutant_form_of_transcript_poly_A_tail_causes_upregulated_activity_of_mutant_form_of__protein_catalytic_site" + # Need to start putting "_" before each qualifier as any given one could be the last in the statement. + if OBJECT_DERIVATIVE_QUALIFIER in qualifiers.keys(): + counted_qualifiers+= 1 + qualifier_statement+= "_" + qualifier_statement+= qualifiers[OBJECT_DERIVATIVE_QUALIFIER] + if OBJECT_PART_QUALIFIER in qualifiers.keys(): + counted_qualifiers+= 1 + qualifier_statement+= "_" + qualifier_statement+= qualifiers[OBJECT_PART_QUALIFIER] + + # Add on mechanism qualifiers. eg. "increased_abundance_of_mutant_form_of_transcript_poly_A_tail_causes_upregulated_activity_of_mutant_form_of__protein_catalytic_site_via_phosphorylation" + if CAUSAL_MECHANISM_QUALIFIER in qualifiers.keys(): + counted_qualifiers+= 1 + qualifier_statement+= "_" + qualifier_statement+= causal_mechanism_qualifier_semantic_adjustment(qualifiers[CAUSAL_MECHANISM_QUALIFIER]) + + # Add on species qualifiers. eg. "increased_abundance_of_mutant_form_of_transcript_poly_A_tail_causes_upregulated_activity_of_mutant_form_of__protein_catalytic_site_via_phosphorylation_in_human" + if SPECIES_CONTEXT_QUALIFIER in qualifiers.keys(): + counted_qualifiers+= 1 + qualifier_statement+= "_" + qualifier_statement+= species_context_qualifier_semantic_adjustment(qualifiers[SPECIES_CONTEXT_QUALIFIER]) + + # Add on anatomical context qualifiers. eg. "increased_abundance_of_mutant_form_of_transcript_poly_A_tail_causes_upregulated_activity_of_mutant_form_of__protein_catalytic_site_via_phosphorylation_in_human_liver" + if ANATOMICAL_CONTEXT_QUALIFIER in qualifiers.keys(): + counted_qualifiers+= 1 + qualifier_statement+= "_" + if SPECIES_CONTEXT_QUALIFIER in qualifiers.keys(): + species_qualifier = True + else: + species_qualifier = False + qualifier_statement+= anatomical_context_qualifier_semantic_adjustment(qualifiers[ANATOMICAL_CONTEXT_QUALIFIER], species_qualifier) - edges_to_write = [] + if counted_qualifiers < qualifier_count: + print(f"Qualifiers on edge: {edge} are not all being handled correctly. Please revise collapse_qualifiers.py to handle all qualifiers.") # Either rewrite the original edge if no qualifier collapsing happened, or rewrite with new predicate from qualifier_statement. + edges_to_write = [] if qualifier_statement != "": - edges_to_write.append(write_edge_no_q(edge, qualifier_statement)) + edges_to_write.append(write_edge_no_q(edge, qualifier_statement, qualifiers)) else: edges_to_write.append(edge)