-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(#605): split guidelines by lookup keys
- Loading branch information
Showing
15 changed files
with
181 additions
and
92 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
*.json | ||
*.base64 | ||
temp/ | ||
|
||
.venv/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
import bson | ||
|
||
def get_object_id(): | ||
return str(bson.ObjectId()) |
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
from common.get_data import get_data, get_information_key, get_guideline_by_id, \ | ||
get_phenotype_value_lengths, get_phenotype_value, get_phenotype_key | ||
from common.write_data import write_data | ||
from common.constants import SCRIPT_POSTFIXES | ||
from common.cpic_data import get_phenotype_map | ||
from common.remove_history import remove_history | ||
from common.mongo import get_object_id | ||
|
||
# Rename `cpicData` in guidelines to `externalData` (#582) | ||
# Add `source` field to `externalData` with value 'CPIC' (#582) | ||
def rename_external_data(guideline): | ||
old_key = 'cpicData' | ||
new_key = 'externalData' | ||
if old_key in guideline: | ||
guideline[new_key] = guideline.pop(old_key) | ||
guideline[new_key]['source'] = 'CPIC' | ||
return guideline | ||
|
||
# Change `externalData` to array (#597) | ||
def enlist_external_data(guideline): | ||
if type(guideline['externalData']) is not list: | ||
guideline['externalData'] = [guideline['externalData']] | ||
return guideline | ||
|
||
# Add phenotypes for guideline (#602) | ||
def add_phenotypes(guideline, phenotype_map): | ||
if not 'phenotypes' in guideline: | ||
phenotypes = {} | ||
for gene_symbol, gene_results in guideline['lookupkey'].items(): | ||
phenotypes[gene_symbol] = [] | ||
for gene_result in gene_results: | ||
phenotype = phenotype_map[gene_symbol][gene_result] | ||
phenotypes[gene_symbol].append(phenotype) | ||
guideline['phenotypes'] = phenotypes | ||
return guideline | ||
|
||
# Chain single guideline migrations together | ||
def migrate_guideline(guideline, phenotype_map): | ||
return add_phenotypes( | ||
enlist_external_data(rename_external_data(guideline)), | ||
phenotype_map) | ||
|
||
# Contract external data by phenotypes (#597) | ||
# Split up previously contracted phenotypes (#604) | ||
# Contraction is implemented here analogous to | ||
# anni/src/common/database/helpers/cpic-constructors.py | ||
def contract_phenotypes_per_drug(guidelines): | ||
# Split up by lookupkeys and group by phenotype and external information | ||
phenotype_guideline_map = {} | ||
for guideline in guidelines: | ||
contracted_guideline_number = get_phenotype_value_lengths( | ||
guideline, expect_same_length=True) | ||
for phenotype_index in range(0, contracted_guideline_number): | ||
decontracted_guideline = guideline.copy() | ||
decontracted_guideline['_id'] = get_object_id() | ||
decontracted_guideline['lookupkey'] = get_phenotype_value( | ||
guideline['lookupkey'], phenotype_index) | ||
decontracted_guideline['phenotypes'] = get_phenotype_value( | ||
guideline['phenotypes'], phenotype_index) | ||
phenotype_key = get_phenotype_key(decontracted_guideline) | ||
information_key = get_information_key(decontracted_guideline) | ||
if not phenotype_key in phenotype_guideline_map: | ||
phenotype_guideline_map[phenotype_key] = {} | ||
phenotype_guidelines = phenotype_guideline_map[phenotype_key] | ||
if not information_key in phenotype_guidelines: | ||
phenotype_guidelines[information_key] = [] | ||
phenotype_guidelines[information_key].append( | ||
decontracted_guideline) | ||
# TODO: Contract grouped guidelines | ||
return list(guidelines) | ||
|
||
# Migrate data | ||
def migrate_data(): | ||
data = remove_history(get_data()) | ||
phenotype_map = get_phenotype_map() | ||
|
||
# If phenotypes are not present initially (data was created before #602), | ||
# assume that guidelines also need to be contracted by phenotypes (#604) | ||
contract_by_phenotypes = not 'phenotypes' in data['Guideline'][0] | ||
|
||
# Iterate data for migration of single guidelines and contract guidelines | ||
# per drug afterwards (needs phenotypes) | ||
|
||
for guideline in data['Guideline']: | ||
guideline = migrate_guideline(guideline, phenotype_map) | ||
|
||
if contract_by_phenotypes: | ||
migrated_guidelines = [] | ||
for drug in data['Drug']: | ||
migrated_guidelines.append(contract_phenotypes_per_drug( | ||
list(map( | ||
lambda id: get_guideline_by_id(data, id), | ||
drug['guidelines'])))) | ||
data['Guideline'] = migrated_guidelines | ||
|
||
if 'AppData' in data: | ||
for row in data['AppData']: | ||
for drug in row['drugs']: | ||
guidelines = drug['guidelines'] | ||
for guideline in guidelines: | ||
guideline = migrate_guideline(guideline, phenotype_map) | ||
if contract_by_phenotypes: | ||
guidelines = contract_phenotypes_per_drug( | ||
guidelines, phenotype_map) | ||
|
||
write_data(data, postfix=SCRIPT_POSTFIXES['migrate']) | ||
|
||
if __name__ == '__main__': | ||
migrate_data() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
pymongo==3.5.1 |