Skip to content

Commit

Permalink
feat(#605): add phenotypes
Browse files Browse the repository at this point in the history
  • Loading branch information
tamslo committed May 4, 2023
1 parent 9c586ce commit 7c44cd0
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 25 deletions.
3 changes: 2 additions & 1 deletion anni/scripts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@ containing a zipped JSON.
Run `pyhthon3 migrate.py <PATH_TO_BACKUP>[.json|.base64]` to receive
`<PATH_TO_BACKUP>_migrated_<TIMESTAMP>.base64`.

Breaking changes covered:
(Breaking) changes covered:

* [Add new medications (FDA)](https://github.com/hpi-dhc/PharMe/pull/582)
* [One annotation per phenotype](https://github.com/hpi-dhc/PharMe/pull/597)
* [Zipped Anni backup](https://github.com/hpi-dhc/PharMe/pull/599)
* [Use phenotypes from cpic](https://github.com/hpi-dhc/PharMe/pull/602)
* [Contract by phenotype first](https://github.com/hpi-dhc/PharMe/pull/604)

## Decode Base64

Expand Down
29 changes: 29 additions & 0 deletions anni/scripts/common/cpic_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import json
import urllib.request
import urllib.parse


def get_cpic_data(endpoint, params):
base_url = 'https://api.cpicpgx.org/v1/'
url = base_url + endpoint + '?' + urllib.parse.urlencode(params)
with urllib.request.urlopen(url) as response:
return json.loads(response.read())

def get_phenotype_map():
# Would get gene but list of activity scores is not complete
lookup_data = get_cpic_data('recommendation', params={
'select': 'lookupkey,phenotypes',
})
phenotype_map = {}
for result in lookup_data:
for gene in result['lookupkey']:
gene_result = result['lookupkey'][gene]
phenotype = result['phenotypes'][gene] \
if gene in result['phenotypes'] \
else gene_result
if not gene in phenotype_map:
phenotype_map[gene] = {}
if not gene_result in phenotype_map[gene]:
phenotype_map[gene][gene_result] = phenotype
return phenotype_map

70 changes: 46 additions & 24 deletions anni/scripts/migrate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from common.get_data import get_data
from common.write_data import write_data
from common.constants import SCRIPT_POSTFIXES
from common.cpic_data import get_phenotype_map

# Rename `cpicData` in guidelines to `externalData` (#582)
# Add `source` field to `externalData` with value 'CPIC' (#582)
Expand All @@ -19,30 +20,51 @@ def enlist_external_data(guideline):
return guideline

# Add phenotypes for guideline (#602)
def add_phenotypes(guideline):
# TODO: get phenotypes from CPIC API based on lookupkey
def add_phenotypes(guideline, phenotype_map):
if not 'phenotypes' in guideline:
phenotypes = {}
for gene_symbol, gene_results in guideline['lookupkey'].items():
phenotypes[gene_symbol] = []
for gene_result in gene_results:
phenotype = phenotype_map[gene_symbol][gene_result]
phenotypes[gene_symbol].append(phenotype)
guideline['phenotypes'] = phenotypes
return guideline

# Do not contract different phenotypes (#604)
def split_phenotypes(guidelines):
# TODO: split up and copy guidelines per phenotype (combination)
return guidelines

# Chain guideline migrations together
def migrate_guideline(guideline):
return add_phenotypes(
enlist_external_data(
rename_external_data(guideline)))

data = get_data()

# Iterate data for migration of content
for table_name in data.keys():
table_content = data[table_name]
if table_name.startswith('AppData'):
for row in table_content:
drugs = row['drugs']
for drug in drugs:
guidelines = drug['guidelines']
for guideline in guidelines:
guideline = migrate_guideline(guideline)
if table_name.startswith('Guideline'):
for guideline in table_content:
guideline = migrate_guideline(guideline)

write_data(data, postfix=SCRIPT_POSTFIXES['migrate'])
def migrate_guideline(guideline, phenotype_map):
return split_phenotypes(
add_phenotypes(
enlist_external_data(rename_external_data(guideline)),
phenotype_map))

# Migrate data
def migrate_data():
data = get_data()
phenotype_map = get_phenotype_map()

# Iterate data for migration of content
for table_name in data.keys():
table_content = data[table_name]
if table_name.startswith('AppData'):
for row in table_content:
drugs = row['drugs']
for drug in drugs:
guidelines = drug['guidelines']
for guideline in guidelines:
guideline = migrate_guideline(guideline, phenotype_map)
guidelines = split_phenotypes(guidelines)
if table_name.startswith('Guideline'):
for guideline in table_content:
guideline = migrate_guideline(guideline, phenotype_map)
table_content = split_phenotypes(table_content)

write_data(data, postfix=SCRIPT_POSTFIXES['migrate'])

if __name__ == '__main__':
migrate_data()

0 comments on commit 7c44cd0

Please sign in to comment.