Skip to content

Commit

Permalink
feat(#605): simplify migration
Browse files Browse the repository at this point in the history
  • Loading branch information
tamslo committed May 5, 2023
1 parent 10ebaeb commit b2084d4
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 33 deletions.
2 changes: 1 addition & 1 deletion scripts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ containing a zipped JSON.
Run `pyhthon migrate.py <PATH_TO_BACKUP>[.json|.base64]` to receive
`<PATH_TO_BACKUP>_migrated_<TIMESTAMP>.base64`.

**⚠️ Migrating data will remove the data history!**
**⚠️ Migrating data will remove the data history, including published versions!**

(Breaking) changes covered:

Expand Down
10 changes: 6 additions & 4 deletions scripts/common/get_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,18 @@ def get_phenotype_value_lengths(guideline, expect_same_length = False):
def get_phenotype_value(phenotype_values, index):
phenotype_value = {}
for gene in phenotype_values:
phenotype_value[gene] = phenotype_values[gene][index]
phenotype_value[gene] = [phenotype_values[gene][index]]
return phenotype_value

def dict_to_key(dictionary):
def dict_to_key(dictionary, format_value=lambda value: value):
return ' '.join(map(
lambda key: f'{key} {dictionary[key]}',
lambda key: f'{key} {format_value(dictionary[key])}',
dict(sorted(dictionary.items())).keys()))

def get_phenotype_key(guideline):
return dict_to_key(guideline['phenotypes'])
return dict_to_key(
guideline['phenotypes'],
lambda phenotype_value: ''.join(phenotype_value))

def get_information_key(guideline):
# Lenth of guideline['externalData']) should always be 1 as we just migrated
Expand Down
2 changes: 2 additions & 0 deletions scripts/common/remove_history.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
def remove_history(data):
if 'AppData' in data:
data['AppData'] = []
for table_name in data.keys():
if table_name.endswith('_History'):
data[table_name] = []
Expand Down
55 changes: 27 additions & 28 deletions scripts/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,34 +40,40 @@ def migrate_guideline(guideline, phenotype_map):
enlist_external_data(rename_external_data(guideline)),
phenotype_map)

# Contract external data by phenotypes (#597)
# Split up previously contracted phenotypes (#604)
# Contraction is implemented here analogous to
# anni/src/common/database/helpers/cpic-constructors.py
def contract_phenotypes_per_drug(guidelines):
# Split up by lookupkeys and group by phenotype and external information
# Migrate single guidelines; then split up by lookupkeys and re-contract by
# phenotype and external information (according to #597 and #604)
def migrate_drug_guidelines(guidelines, phenotype_map):
phenotype_guideline_map = {}
for guideline in guidelines:
guideline = migrate_guideline(guideline, phenotype_map)
contracted_guideline_number = get_phenotype_value_lengths(
guideline, expect_same_length=True)
for phenotype_index in range(0, contracted_guideline_number):
decontracted_guideline = guideline.copy()
decontracted_guideline['_id'] = get_object_id()
del decontracted_guideline['_id']
decontracted_guideline['lookupkey'] = get_phenotype_value(
guideline['lookupkey'], phenotype_index)
decontracted_guideline['phenotypes'] = get_phenotype_value(
guideline['phenotypes'], phenotype_index)
# Contraction is implemented analogous to Anni
# (see cpic-constructors.ts)
phenotype_key = get_phenotype_key(decontracted_guideline)
information_key = get_information_key(decontracted_guideline)
if not phenotype_key in phenotype_guideline_map:
phenotype_guideline_map[phenotype_key] = {}
phenotype_guidelines = phenotype_guideline_map[phenotype_key]
if not information_key in phenotype_guidelines:
phenotype_guidelines[information_key] = []
phenotype_guidelines[information_key].append(
decontracted_guideline)
# TODO: Contract grouped guidelines
return list(guidelines)
phenotype_guidelines[information_key].append(decontracted_guideline)
# Re-contracted guidelines and assign new IDs
recontracted_guidelines = []
for phenotype_guidelines in phenotype_guideline_map.values():
# TODO: Contract lookupkeys per phenotype
# TODO: Contract unique external data per phenotype
print(phenotype_guidelines.values())
print('')

return recontracted_guidelines

# Migrate data
def migrate_data():
Expand All @@ -81,28 +87,21 @@ def migrate_data():
# Iterate data for migration of single guidelines and contract guidelines
# per drug afterwards (needs phenotypes)

for guideline in data['Guideline']:
guideline = migrate_guideline(guideline, phenotype_map)

if contract_by_phenotypes:
migrated_guidelines = []
for drug in data['Drug']:
migrated_guidelines.append(contract_phenotypes_per_drug(
list(map(
lambda id: get_guideline_by_id(data, id),
drug['guidelines']))))
drug_guidelines = list(map(
lambda id: get_guideline_by_id(data, id),
drug['guidelines']))
migrated_drug_guidelines = migrate_drug_guidelines(
drug_guidelines, phenotype_map)
migrated_guidelines += migrated_drug_guidelines
drug['guidelines'] = list(map(
lambda guideline: guideline['_id'],
migrated_drug_guidelines
))
data['Guideline'] = migrated_guidelines

if 'AppData' in data:
for row in data['AppData']:
for drug in row['drugs']:
guidelines = drug['guidelines']
for guideline in guidelines:
guideline = migrate_guideline(guideline, phenotype_map)
if contract_by_phenotypes:
guidelines = contract_phenotypes_per_drug(
guidelines, phenotype_map)

write_data(data, postfix=SCRIPT_POSTFIXES['migrate'])

if __name__ == '__main__':
Expand Down

0 comments on commit b2084d4

Please sign in to comment.