Skip to content

Commit

Permalink
Os update 8 2020 (#12)
Browse files Browse the repository at this point in the history
## Major Updates
* Extend configuration to allow allow user to set new collections to use in matching and trial curations. Set new defaults in `config.json` using `prior_treatments` as an example.

## Bug Fixes
* Check for patients who have aged in/out of trial eligibility since last time patient was run against trial and ensure patients are added to runs
* Allow user to specify custom `trial_status` field instead of defaulting to using `_summary.status` to determine trial open/closed status
* When using a Mongo `_id` as a trial identifier, remove `_id` from trial match pre-processing to avoid `_id` collisions 
* Use `BIRHTDATEINT` field instead of `BIRTHDATE` to calculate age eligibilities (BIRHTDATEINT is the patient's birth date as an integer using the format YYYYMMDD)

## Minor Updates
* Update Jenkinsfile to include graph viz lib
* Update README
* Move debug flag higher in `init` to avoid missing extra logs
* add `bypass_warnings` flag to continue runs when run flags have been toggled
* Add `try/catch` to `asyncio` init 
* Add comments and additional logging
* Chunk all queries to avoid hitting MongoDB query size limits 
* Add tests to verify that patients can age in/out of trial eligibilities 
* Various performance improvements 

Co-authored-by: Eric Marriott <marriott@ds.dfci.harvard.edu>
Co-authored-by: esiegel <esiegel@jimmy.harvard.edu>
Co-authored-by: Eric Marriott <marriott@bcb.dfci.harvard.edu>
Co-authored-by: Roshni Biswas <rbiswas@ds.dfci.harvard.edu>
Co-authored-by: emarriott <Limpitl0g>
  • Loading branch information
5 people authored Aug 10, 2020
1 parent 37fb5f2 commit ad5ddc6
Show file tree
Hide file tree
Showing 35 changed files with 1,155 additions and 602 deletions.
2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ node {

sh "cat SECRETS_JSON.json"

sh 'apt-get update && apt-get install -y graphviz'
sh 'apt-get update && apt-get install -y libgraphviz-dev graphviz'

sh """
python setup.py install && \
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ Welcome to the documentation for the matchengine! The Matchengine is a system de
The matchengine can be used on local instances that provide access to private data. If you are interested in the development of new features, or in setting up a local instance of the MatchMiner system, please see the documentation, or contact [matchminer@dfci.harvard.edu](https://app.gitbook.com/@matchminer/s/matchminer)

# Documentation
[https://matchminer.gitbook.io/](https://matchminer.gitbook.io/)
[https://app.gitbook.com/@matchminer/s/matchminer](https://app.gitbook.com/@matchminer/s/matchminer)
253 changes: 140 additions & 113 deletions matchengine/config/dfci_config.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,135 @@
{
"match_criteria": {
"trial_collection": "trial",
"trial_identifier": "protocol_no",
"match_trial_link_id": "protocol_no",
"trial_status_key": {
"key_name": "status",
"open_to_accrual_values": ["open to accrual"]
},
"ctml_collection_mappings": {
"clinical": {
"query_collection": "clinical",
"join_field": "_id",
"id_field": "_id",
"trial_key_mappings": {
"AGE_NUMERICAL": {
"sample_key": "BIRTH_DATE_INT",
"sample_value": "age_range_to_date_int_query"
},
"ONCOTREE_PRIMARY_DIAGNOSIS": {
"sample_key": "ONCOTREE_PRIMARY_DIAGNOSIS_NAME",
"sample_value": "external_file_mapping",
"file": "oncotree_mapping.json"
},
"GENDER": {
"sample_key": "GENDER",
"sample_value": "nomap"
},
"TMB_NUMERICAL": {
"sample_key": "TUMOR_MUTATIONAL_BURDEN_PER_MEGABASE",
"sample_value": "tmb_range_to_query"
},
"HER2_STATUS": {
"ignore": true
},
"PR_STATUS": {
"ignore": true
},
"ER_STATUS": {
"ignore": true
},
"DISEASE_STATUS": {
"ignore": true
}
}
},
"genomic": {
"query_collection": "genomic",
"join_field": "CLINICAL_ID",
"id_field": "_id",
"trial_key_mappings": {
"HUGO_SYMBOL": {
"sample_key": "TRUE_HUGO_SYMBOL",
"sample_value": "nomap"
},
"EXON": {
"sample_key": "TRUE_TRANSCRIPT_EXON",
"sample_value": "nomap"
},
"PROTEIN_CHANGE": {
"sample_key": "TRUE_PROTEIN_CHANGE",
"sample_value": "nomap"
},
"WILDCARD_PROTEIN_CHANGE": {
"sample_key": "TRUE_PROTEIN_CHANGE",
"sample_value": "wildcard_regex"
},
"VARIANT_CLASSIFICATION": {
"sample_key": "TRUE_VARIANT_CLASSIFICATION",
"sample_value": "nomap"
},
"VARIANT_CATEGORY": {
"sample_key": "VARIANT_CATEGORY",
"sample_value": "variant_category_map"
},
"CNV_CALL": {
"sample_key": "CNV_CALL",
"sample_value": "cnv_map"
},
"WILDTYPE": {
"sample_key": "WILDTYPE",
"sample_value": "nomap"
},
"MMR_STATUS": {
"sample_key": "MMR_STATUS",
"sample_value": "mmr_ms_map"
},
"MS_STATUS": {
"sample_key": "MMR_STATUS",
"sample_value": "mmr_ms_map"
},
"APOBEC_SIGNATURE": {
"sample_key": "APOBEC_STATUS",
"sample_value": "nomap"
},
"POLE_SIGNATURE": {
"sample_key": "POLE_STATUS",
"sample_value": "nomap"
},
"TOBACCO_SIGNATURE": {
"sample_key": "TABACCO_STATUS",
"sample_value": "nomap"
},
"TEMOZOLOMIDE_SIGNATURE": {
"sample_key": "TEMOZOLOMIDE_STATUS",
"sample_value": "nomap"
},
"UVA_SIGNATURE": {
"sample_key": "UVA_STATUS",
"sample_value": "nomap"
},
"DISPLAY_NAME": {
"ignore": true
},
"FUSION_PARTNER_HUGO_SYMBOL": {
"sample_key": "FUSION_PARTNER_HUGO_SYMBOL",
"sample_value": "nomap"
}
}
},
"prior_treatments": {
"query_collection": "prior_treatments",
"join_field": "CLINICAL_ID",
"id_field": "_id",
"trial_key_mappings": {
"DRUG": {
"sample_key": "DRUG",
"sample_value": "nomap"
}
}
}
},
"projections": {
"genomic": [
"TRUE_CDNA_CHANGE",
"REFERENCE_ALLELE",
Expand Down Expand Up @@ -29,6 +159,9 @@
"RIGHT_PARTNER_GENE",
"STRUCTURAL_VARIANT_TYPE"
],
"prior_treatments": [
"DRUG"
],
"clinical": [
"REPORT_DATE",
"GENDER",
Expand All @@ -46,14 +179,6 @@
"_summary"
]
},
"extra_initial_mapping_fields": {
"clinical": [
[
"PANEL_VERSION",
"raw"
]
]
},
"extra_initial_lookup_fields": {
"clinical": [
[
Expand All @@ -62,108 +187,6 @@
]
]
},
"trial_key_mappings": {
"genomic": {
"HUGO_SYMBOL": {
"sample_key": "TRUE_HUGO_SYMBOL",
"sample_value": "nomap"
},
"EXON": {
"sample_key": "TRUE_TRANSCRIPT_EXON",
"sample_value": "nomap"
},
"PROTEIN_CHANGE": {
"sample_key": "TRUE_PROTEIN_CHANGE",
"sample_value": "nomap"
},
"WILDCARD_PROTEIN_CHANGE": {
"sample_key": "TRUE_PROTEIN_CHANGE",
"sample_value": "wildcard_regex"
},
"VARIANT_CLASSIFICATION": {
"sample_key": "TRUE_VARIANT_CLASSIFICATION",
"sample_value": "nomap"
},
"VARIANT_CATEGORY": {
"sample_key": "VARIANT_CATEGORY",
"sample_value": "variant_category_map"
},
"CNV_CALL": {
"sample_key": "CNV_CALL",
"sample_value": "cnv_map"
},
"WILDTYPE": {
"sample_key": "WILDTYPE",
"sample_value": "nomap"
},
"MMR_STATUS": {
"sample_key": "MMR_STATUS",
"sample_value": "mmr_ms_map"
},
"MS_STATUS": {
"sample_key": "MMR_STATUS",
"sample_value": "mmr_ms_map"
},
"APOBEC_SIGNATURE": {
"sample_key": "APOBEC_STATUS",
"sample_value": "nomap"
},
"POLE_SIGNATURE": {
"sample_key": "POLE_STATUS",
"sample_value": "nomap"
},
"TOBACCO_SIGNATURE": {
"sample_key": "TABACCO_STATUS",
"sample_value": "nomap"
},
"TEMOZOLOMIDE_SIGNATURE": {
"sample_key": "TEMOZOLOMIDE_STATUS",
"sample_value": "nomap"
},
"UVA_SIGNATURE": {
"sample_key": "UVA_STATUS",
"sample_value": "nomap"
},
"DISPLAY_NAME": {
"ignore": true
},
"FUSION_PARTNER_HUGO_SYMBOL": {
"sample_key": "FUSION_PARTNER_HUGO_SYMBOL",
"sample_value": "nomap"
}
},
"clinical": {
"AGE_NUMERICAL": {
"sample_key": "BIRTH_DATE",
"sample_value": "age_range_to_date_query"
},
"ONCOTREE_PRIMARY_DIAGNOSIS": {
"sample_key": "ONCOTREE_PRIMARY_DIAGNOSIS_NAME",
"sample_value": "external_file_mapping",
"file": "oncotree_mapping.json"
},
"GENDER": {
"sample_key": "GENDER",
"sample_value": "nomap"
},
"TMB_NUMERICAL": {
"sample_key": "TUMOR_MUTATIONAL_BURDEN_PER_MEGABASE",
"sample_value": "tmb_range_to_query"
},
"HER2_STATUS": {
"ignore": true
},
"PR_STATUS": {
"ignore": true
},
"ER_STATUS": {
"ignore": true
},
"DISEASE_STATUS": {
"ignore": true
}
}
},
"trial_match_sorting": [
{
"show_in_ui": {
Expand Down Expand Up @@ -249,15 +272,19 @@
"POLE_STATUS",
"TABACCO_STATUS",
"TEMOZOLOMIDE_STATUS",
"UVA_STATUS"
"UVA_STATUS",
"LEFT_PARTNER_GENE",
"RIGHT_PARTNER_GENE",
"TRUE_HUGO_SYMBOL"
],
"clinical": [
"GENDER",
"SAMPLE_ID",
"MRN",
"ONCOTREE_PRIMARY_DIAGNOSIS_NAME",
"TUMOR_MUTATIONAL_BURDEN_PER_MEGABASE",
"VITAL_STATUS"
"VITAL_STATUS",
"BIRTH_DATE_INT"
],
"trial_match": [
"hash",
Expand Down
Loading

0 comments on commit ad5ddc6

Please sign in to comment.