Skip to content

Commit

Permalink
Merge pull request #4 from kids-first/feature/mb-update-config
Browse files Browse the repository at this point in the history
✨ Echtvar Update
  • Loading branch information
migbro authored Feb 13, 2024
2 parents 4adef10 + a71485c commit 0f4f9e0
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 65 deletions.
4 changes: 2 additions & 2 deletions docs/CUSTOM_GNOMAD_REF.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ First, create a config JSON file. See [here](https://github.com/brentp/echtvar#c
```sh
echtvar \
encode \
gnomad.v3.1.1.chr16_custom.echtvar.zip \
gnomad.v3.1.1.custom.echtvar.zip \
gnomad_update.json \
gnomad.genomes.v3.1.1.sites.chr16.custom.INFO_added.vcf.gz
*.custom.INFO_added.vcf.gz
```
56 changes: 28 additions & 28 deletions docs/gnomad_update.json
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
[
{"field": "AC", "alias": "gnomad_3_1_1_AC", "missing_value": -2147483648},
{"field": "AN", "alias": "gnomad_3_1_1_AN", "missing_value": -2147483648},
{"field": "AF", "alias": "gnomad_3_1_1_AF", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "nhomalt", "alias": "gnomad_3_1_1_nhomalt", "missing_value": -2147483648},
{"field": "AC_popmax", "alias": "gnomad_3_1_1_AC_popmax", "missing_value": -2147483648},
{"field": "AN_popmax", "alias": "gnomad_3_1_1_AN_popmax", "missing_value": -2147483648},
{"field": "AF_popmax", "alias": "gnomad_3_1_1_AF_popmax", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "nhomalt_popmax", "alias": "gnomad_3_1_1_nhomalt_popmax", "missing_value": -2147483648},
{"field": "AC_controls_and_biobanks", "alias": "gnomad_3_1_1_AC_controls_and_biobanks", "missing_value": -2147483648},
{"field": "AN_controls_and_biobanks", "alias": "gnomad_3_1_1_AN_controls_and_biobanks", "missing_value": -2147483648},
{"field": "AF_controls_and_biobanks", "alias": "gnomad_3_1_1_AF_controls_and_biobanks", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer", "alias": "gnomad_3_1_1_AF_non_cancer", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "primate_ai_score", "alias": "gnomad_3_1_1_primate_ai_score", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "splice_ai_consequence", "alias": "gnomad_3_1_1_splice_ai_consequence", "missing_string": "."},
{"field": "GNOMAD_FILTER", "alias": "gnomad_3_1_1_FILTER", "missing_string": "."},
{"field": "AF_non_cancer_afr", "alias": "gnomad_3_1_1_AF_non_cancer_afr", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_ami", "alias": "gnomad_3_1_1_AF_non_cancer_ami", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_asj", "alias": "gnomad_3_1_1_AF_non_cancer_asj", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_eas", "alias": "gnomad_3_1_1_AF_non_cancer_eas", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_fin", "alias": "gnomad_3_1_1_AF_non_cancer_fin", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_mid", "alias": "gnomad_3_1_1_AF_non_cancer_mid", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_nfe", "alias": "gnomad_3_1_1_AF_non_cancer_nfe", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_oth", "alias": "gnomad_3_1_1_AF_non_cancer_oth", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_raw", "alias": "gnomad_3_1_1_AF_non_cancer_raw", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_sas", "alias": "gnomad_3_1_1_AF_non_cancer_sas", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_amr", "alias": "gnomad_3_1_1_AF_non_cancer_amr", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_popmax", "alias": "gnomad_3_1_1_AF_non_cancer_popmax", "multiplier": 2000000,"missing_value": 2139095041},
{"field": "AF_non_cancer_all_popmax", "alias": "gnomad_3_1_1_AF_non_cancer_all_popmax", "multiplier": 2000000, "missing_value": 2139095041}
{"field": "AC", "alias": "gnomad_3_1_1_AC", "description": "Alternate allele count", "missing_value": -2147483648},
{"field": "AN", "alias": "gnomad_3_1_1_AN", "description": "Total number of alleles", "missing_value": -2147483648},
{"field": "AF", "alias": "gnomad_3_1_1_AF", "description": "Alternate allele frequency", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "nhomalt", "alias": "gnomad_3_1_1_nhomalt", "description": "Count of homozygous individuals", "missing_value": -2147483648},
{"field": "AC_popmax", "alias": "gnomad_3_1_1_AC_popmax", "description": "Allele count in the population with the maximum allele frequency", "missing_value": -2147483648},
{"field": "AN_popmax", "alias": "gnomad_3_1_1_AN_popmax", "description": "Total number of alleles in the population with the maximum allele frequency", "missing_value": -2147483648},
{"field": "AF_popmax", "alias": "gnomad_3_1_1_AF_popmax", "description": "Maximum allele frequency across populations", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "nhomalt_popmax", "alias": "gnomad_3_1_1_nhomalt_popmax", "description": "Count of homozygous individuals in the population with the maximum allele frequency", "missing_value": -2147483648},
{"field": "AC_controls_and_biobanks", "alias": "gnomad_3_1_1_AC_controls_and_biobanks", "description": "Alternate allele count in controls_and_biobanks subset", "missing_value": -2147483648},
{"field": "AN_controls_and_biobanks", "alias": "gnomad_3_1_1_AN_controls_and_biobanks", "description": "Total number of alleles in controls_and_biobanks subset", "missing_value": -2147483648},
{"field": "AF_controls_and_biobanks", "alias": "gnomad_3_1_1_AF_controls_and_biobanks", "description": "Alternate allele frequency in controls_and_biobanks subset", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer", "alias": "gnomad_3_1_1_AF_non_cancer", "description": "Alternate allele frequency in non_cancer subset", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "primate_ai_score", "alias": "gnomad_3_1_1_primate_ai_score", "description": "PrimateAI's deleteriousness score from 0 (less deleterious) to 1 (more deleterious).", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "splice_ai_consequence", "alias": "gnomad_3_1_1_splice_ai_consequence", "description": "The consequence term associated with the max delta score in 'splice_ai_max_ds'.", "missing_string": "."},
{"field": "GNOMAD_FILTER", "alias": "gnomad_3_1_1_FILTER", "description": "Value of FILTER for gnomAD variant. Use to include/exclude non-PASS variants", "missing_string": "."},
{"field": "AF_non_cancer_afr", "alias": "gnomad_3_1_1_AF_non_cancer_afr", "description": "Alternate allele frequency in samples of African/African-American ancestry in non_cancer subset", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_ami", "alias": "gnomad_3_1_1_AF_non_cancer_ami", "description": "Alternate allele frequency in samples of Amish ancestry in non_cancer subset", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_asj", "alias": "gnomad_3_1_1_AF_non_cancer_asj", "description": "Alternate allele frequency in samples of Ashkenazi Jewish ancestry in non_cancer subset", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_eas", "alias": "gnomad_3_1_1_AF_non_cancer_eas", "description": "Alternate allele frequency in samples of East Asian ancestry in non_cancer subset", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_fin", "alias": "gnomad_3_1_1_AF_non_cancer_fin", "description": "Alternate allele frequency in samples of Finnish ancestry in non_cancer subset", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_mid", "alias": "gnomad_3_1_1_AF_non_cancer_mid", "description": "Alternate allele frequency in samples of Middle Eastern ancestry in non_cancer subset", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_nfe", "alias": "gnomad_3_1_1_AF_non_cancer_nfe", "description": "Alternate allele frequency in samples of Non-Finnish European ancestry in non_cancer subset", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_oth", "alias": "gnomad_3_1_1_AF_non_cancer_oth", "description": "Alternate allele frequency in samples of Other ancestry in non_cancer subset", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_raw", "alias": "gnomad_3_1_1_AF_non_cancer_raw", "description": "Alternate allele frequency in samples, before removing low-confidence genotypes in non_cancer subset", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_sas", "alias": "gnomad_3_1_1_AF_non_cancer_sas", "description": "Alternate allele frequency in samples of South Asian ancestry in non_cancer subset", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_amr", "alias": "gnomad_3_1_1_AF_non_cancer_amr", "description": "Alternate allele frequency in samples of Latino ancestry in non_cancer subset", "multiplier": 2000000, "missing_value": 2139095041},
{"field": "AF_non_cancer_popmax", "alias": "gnomad_3_1_1_AF_non_cancer_popmax", "description": "Max AF of non-bottleneck populations in AF_non_cancer", "multiplier": 2000000,"missing_value": 2139095041},
{"field": "AF_non_cancer_all_popmax", "alias": "gnomad_3_1_1_AF_non_cancer_all_popmax", "description": "Max AF of populations in AF_non_cancer INCLUDING bottleneck", "multiplier": 2000000, "missing_value": 2139095041}
]
19 changes: 11 additions & 8 deletions tools/bcftools_annotate.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,21 @@ arguments:
- position: 0
shellQuote: false
valueFrom: >-
--annotations $(inputs.annotation_vcf.path)
--columns $(inputs.columns)
-o $(inputs.output_basename).$(inputs.tool_name).bcf_annotated.vcf.gz
-O z
--threads $(inputs.threads)
$(inputs.input_vcf.path)
- position: 2
shellQuote: false
valueFrom: >-
&& tabix $(inputs.output_basename).$(inputs.tool_name).bcf_annotated.vcf.gz
inputs:
input_vcf: {type: 'File', secondaryFiles: ['.tbi']}
annotation_vcf: {type: 'File', secondaryFiles: ['.tbi'], doc: "bgzipped annotation vcf file"}
columns: {type: 'string', doc: "csv string of columns from annotation to port into the input vcf, i.e INFO/AF"}
threads: {type: 'int?', doc: "Number of compression/decompression threads", default: 4}
input_vcf: { type: 'File', secondaryFiles: ['.tbi'],
inputBinding: { position: 1 } }
annotation_vcf: { type: 'File', secondaryFiles: ['.tbi'], doc: "bgzipped annotation vcf file",
inputBinding: { position: 0, prefix: "--annotations"} }
columns: { type: 'string', doc: "csv string of columns from annotation to port into the input vcf, i.e INFO/AF",
inputBinding: { position: 0, prefix: "--columns" } }
threads: { type: 'int?', doc: "Number of compression/decompression threads", default: 4,
inputBinding: { position: 0, prefix: "--threads" } }
output_basename: string
tool_name: string

Expand Down
2 changes: 1 addition & 1 deletion tools/bcftools_strip_ann.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ inputs:
input_vcf: {type: File, secondaryFiles: ['.tbi']}
output_basename: string
tool_name: string
strip_info: {type: ['null', string], doc: "If given, remove previous annotation information based on INFO file, i.e. to strip VEP info, use INFO/ANN"}
strip_info: {type: 'string?', doc: "If given, remove previous annotation information based on INFO file, i.e. to strip VEP info, use INFO/ANN"}

outputs:
stripped_vcf:
Expand Down
4 changes: 2 additions & 2 deletions tools/echtvar_anno.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ requirements:
- class: InlineJavascriptRequirement
- class: ShellCommandRequirement
- class: DockerRequirement
dockerPull: "pgc-images.sbgenomics.com/d3b-bixu/echtvar:0.1.9"
dockerPull: "pgc-images.sbgenomics.com/d3b-bixu/echtvar:0.2.0"
- class: ResourceRequirement
coresMin: $(inputs.cpu)
ramMin: $(inputs.ram * 1000)
Expand All @@ -22,7 +22,7 @@ arguments:
prefix: "&&"
shellQuote: false
valueFrom: >-
bcftools index
bcftools index --threads $(inputs.cpu)
- position: 19
shellQuote: false
valueFrom: >-
Expand Down
8 changes: 4 additions & 4 deletions tools/gatk_variant_filter.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ arguments:
- position: 0
shellQuote: false
valueFrom: >-
-R $(inputs.reference.path)
-V $(inputs.input_vcf.path)
-O $(inputs.output_basename).$(inputs.tool_name).gatk.soft_filtered.vcf.gz
${
var args = "";
Expand All @@ -28,8 +26,10 @@ arguments:
}

inputs:
input_vcf: {type: 'File', secondaryFiles: ['.tbi']}
reference: {type: 'File', secondaryFiles: [^.dict, .fai]}
input_vcf: { type: 'File', secondaryFiles: ['.tbi'],
inputBinding: { position: 0, prefix: "-V" } }
reference: { type: 'File', secondaryFiles: [^.dict, .fai],
inputBinding: { position: 0, prefix: "-R"} }
filter_name: {type: 'string[]', doc: "Array of names for each filter tag to add"}
filter_expression: {type: 'string[]', doc: "Array of filter expressions to establish criteria to tag variants with. See https://gatk.broadinstitute.org/hc/en-us/articles/360036730071-VariantFiltration for clues"}
threads: {type: 'int?', doc: "Number of compression/decompression threads", default: 4}
Expand Down
11 changes: 2 additions & 9 deletions tools/kf_mskcc_vcf2maf.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,6 @@ arguments:
&& perl vcf2maf.pl
--input-vcf input_file.vcf
--output-maf $(inputs.output_basename).$(inputs.tool_name).vep.maf
${
if(inputs.maf_center){
return "--maf-center \"" + inputs.maf_center + "\""
}
else{
return "";
}
}

inputs:
reference: { type: 'File', secondaryFiles: [.fai], doc: "Fasta genome assembly with index",
Expand All @@ -51,7 +43,8 @@ inputs:
inputBinding: {position: 6, prefix: "--retain-ann"} }
custom_enst: { type: 'File?', doc: "Use a file with ens tx IDs for each gene to override VEP PICK",
inputBinding: {position: 7, prefix: "--custom-enst"} }
maf_center: {type: 'string?', doc: "Sequencing center of variant called", default: "."}
maf_center: {type: 'string?', doc: "Sequencing center of variant called", default: ".",
inputBinding: { position: 8, prefix: "--maf-center"} }

outputs:
output_maf:
Expand Down
20 changes: 9 additions & 11 deletions workflows/kfdrc-germline-snv-annot-workflow.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,8 @@ inputs:
# bcftools annotate if more to do
bcftools_annot_clinvar_columns: {type: 'string?', doc: "csv string of columns from annotation to port into the input vcf", default: "INFO/ALLELEID,INFO/CLNDN,INFO/CLNDNINCL,INFO/CLNDISDB,INFO/CLNDISDBINCL,INFO/CLNHGVS,INFO/CLNREVSTAT,INFO/CLNSIG,INFO/CLNSIGCONF,INFO/CLNSIGINCL,INFO/CLNVC,INFO/CLNVCSO,INFO/CLNVI"}
clinvar_annotation_vcf: {type: 'File?', secondaryFiles: ['.tbi'], doc: "additional bgzipped annotation vcf file"}
echtvar_anno_zips: {type: 'File[]?', doc: "Annotation ZIP files for echtvar anno"}
echtvar_anno_zips: { type: 'File[]?', doc: "Annotation ZIP files for echtvar anno",
"sbg:suggestedValue": [{class: File, path: 65c64d847dab7758206248c6, name: gnomad.v3.1.1.custom.echtvar.zip}] }
# VEP-specific
disable_vep_annotation: {type: 'boolean?', doc: "Disable VEP Annotation and skip this task.", default: false}
vep_ram: {type: 'int?', default: 48, doc: "In GB, may need to increase this value depending on the size/complexity of input"}
Expand Down Expand Up @@ -318,7 +319,7 @@ steps:
output_filename:
source: [output_basename, tool_name]
valueFrom: |
$(self[0]).$(self[1]).bcf_annotated.vcf.gz
$(self[0]).$(self[1]).echtvar_annotated.vcf.gz
out: [annotated_vcf]

bcftools_clinvar_annotate:
Expand All @@ -341,17 +342,14 @@ steps:
in:
input_files:
source: [bcftools_clinvar_annotate/bcftools_annotated_vcf, echtvar_anno/annotated_vcf, vep_annotate_vcf/output_vcf]
valueFrom: |
${
var first_non_null = self.filter(function(e) { return e != null }).shift();
return [first_non_null, first_non_null.secondaryFiles[0]];
}
}"
valueFrom: |
${ var first_non_null = self.filter(function(e) { return e != null }).shift();
return [first_non_null, first_non_null.secondaryFiles[0]];
}
rename_to:
source: [output_basename, tool_name]
valueFrom: |
${
var pro_vcf = '.'.join([self[0], self[1], 'vcf.gz']);
${ var pro_vcf = [self[0], self[1], 'vcf.gz'].join('.');
return [pro_vcf, pro_vcf + '.tbi'];
}
out: [renamed_files]
Expand All @@ -363,6 +361,6 @@ sbg:license: Apache License 2.0
sbg:publisher: KFDRC

"sbg:links":
- id: 'https://github.com/kids-first/kids-first/kf-annotation-tools/releases/tag/v1.0.0'
- id: 'https://github.com/kids-first/kids-first/kf-annotation-tools/releases/tag/v1.1.0'
label: github-release

0 comments on commit 0f4f9e0

Please sign in to comment.