SciLifeLab · maxulysse · Nov 27, 2018 · Nov 19, 2018 · Nov 19, 2018 · Nov 26, 2018
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 -   [#671](https://github.com/SciLifeLab/Sarek/pull/671) - New `publishDirMode` param and docs
 -   [#673](https://github.com/SciLifeLab/Sarek/pull/673), [#675](https://github.com/SciLifeLab/Sarek/pull/675),  [#676](https://github.com/SciLifeLab/Sarek/pull/676) - Profiles for BinAC and CFC clusters in Tübingen
 -   [#679](https://github.com/SciLifeLab/Sarek/pull/679) - Add container for `CreateIntervalBeds`
+-   [#692](https://github.com/SciLifeLab/Sarek/pull/692) - Add AWS iGenomes possibilities (currently under `iGRCh37` and `iGRCh38`)
 -   [#694](https://github.com/SciLifeLab/Sarek/pull/694) - Add monochrome and grey logos for light or dark background
 
 ### `Changed`

diff --git a/conf/aws-batch.config b/conf/aws-batch.config
@@ -8,7 +8,7 @@
  */
 
 params {
-  genome_base = params.genome == 'GRCh37' ? "s3://sarek-references/Homo_sapiens/GATK/GRCh37" : params.genome == 'iGRCh38' ? "s3://sarek-references/Homo_sapiens/GATK/GRCh38" : "s3://sarek-references/small"
+  genome_base = params.genome == 'iGRCh37' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh37" : params.genome == 'iGRCh38' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38" : "s3://sarek-references/small"
   publishDirMode = 'copy'
 }
 

diff --git a/conf/genomes.config b/conf/genomes.config
@@ -42,6 +42,19 @@ params {
 			//AF_files			= "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf"
 			//AF_indexes		= "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf.idx"
     }
+    'iGRCh37' {
+      acLoci           = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_20130502_SNP_maf0.3.loci"
+      dbsnp            = "${params.genome_base}/Annotation/GATKBundle/dbsnp_138.b37.vcf"
+      dbsnpIndex       = "${params.genome_base}/Annotation/GATKBundle/dbsnp_138.b37.vcf.idx"
+      genomeFile       = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta"
+      genomeDict       = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict"
+      genomeIndex      = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai"
+      bwaIndex         = "${params.genome_base}/Sequence/BWAIndex/human_g1k_v37_decoy.fasta.{amb,ann,bwt,pac,sa}"
+      intervals        = "${params.genome_base}/Annotation/intervals/wgs_calling_regions_CAW.list"
+      knownIndels      = "${params.genome_base}/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf"
+      knownIndelsIndex = "${params.genome_base}/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.idx"
+      snpeffDb         = "GRCh37.75"
+    }
     'iGRCh38' {
       acLoci           = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci"
       dbsnp            = "${params.genome_base}/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz"
@@ -51,8 +64,8 @@ params {
       genomeIndex      = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai"
       bwaIndex         = "${params.genome_base}/Sequence/BWAIndex/Homo_sapiens_assembly38.fasta.64.{alt,amb,ann,bwt,pac,sa}"
       intervals        = "${params.genome_base}/Annotation/intervals/wgs_calling_regions.hg38.bed"
-      knownIndels      = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,Homo_sapiens_assembly38.known_indels}.vcf.gz"
-      knownIndelsIndex = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi"
+      knownIndels      = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz"
+      knownIndelsIndex = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi"
       snpeffDb         = "GRCh38.86"
     }
     'smallGRCh37' {

diff --git a/docs/REFERENCES.md b/docs/REFERENCES.md
@@ -1,6 +1,7 @@
 # Genomes and reference files
 
-Sarek currently uses GRCh38 by default. The settings are in `genomes.config`, they can be tailored to your needs. The [`buildReferences.nf`](#buildreferencesnf) script can be use to build the indexes based on the reference files.
+Sarek currently uses GRCh38 by default. The settings are in `genomes.config`, they can be tailored to your needs.
+The [`buildReferences.nf`](#buildreferencesnf) script is used to build the indexes for the reference test.
 
 ## GRCh37
 
@@ -36,7 +37,7 @@ The MD5SUM of `Homo_sapiens_assembly38.fasta` included in that file is 7ff134953
 If you download the data from the FTP servers `beta/` directory, which seems to be an older version of the bundle, only `Homo_sapiens_assembly38.known_indels.vcf` is needed. Also, you can omit `dbsnp_138_` and `dbsnp_144` files as we use `dbsnp_146`. The old ones also use the wrong chromosome naming convention. The Google Cloud mirror has all data in the `v0` directory, but requires you to remove the `resources_broad_hg38_v0_` prefixes from all files.
 
 The following files need to be downloaded:
- 
+
 - 3884c62eb0e53fa92459ed9bff133ae6 - 'Homo_sapiens_assembly38.dict'
 - 7ff134953dcca8c8997453bbb80b6b5e - 'Homo_sapiens_assembly38.fasta'
 - b07e65aa4425bc365141756f5c98328c - 'Homo_sapiens_assembly38.fasta.64.alt'
@@ -64,7 +65,7 @@ You can create your own cosmic reference for any human reference as specified be
 
 ## COSMIC files
 
-To annotate with COSMIC variants during MuTect1/2 Variant Calling you need to create a compatible VCF file. 
+To annotate with COSMIC variants during MuTect1/2 Variant Calling you need to create a compatible VCF file.
 Download the coding and non-coding VCF files from [COSMIC](http://cancer.sanger.ac.uk/cosmic/download) and
 process them with the [Create\_Cosmic.sh](https://github.com/SciLifeLab/Sarek/tree/master/scripts/Create_Cosmic.sh)
 script for either GRCh37 or GRCh38. The script requires a fasta index `.fai`, of the reference file you are using.
@@ -88,6 +89,10 @@ igvtools index <cosmicvxx.vcf>
 
 Use `--genome smallGRCh37` to map against a small reference genome based on GRCh37. `smallGRCh37` is the default genome for the testing profile (`-profile testing`).
 
+## AWS iGenomes
+Sarek is using [AWS iGenomes](https://ewels.github.io/AWS-iGenomes/), which facilitate storing and sharing references.
+Both `GRCh37` and `GRCh38` are available with `--genome iGRCh37` or `--genome iGRCh38` respectively, it contains all data previously detailed.
+
 ## buildReferences.nf
 
 The `buildReferences.nf` script can download and build the files needed for smallGRCh37, or build the references for GRCh37/smallGRCh37.