Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for fastq.gz.spring-files as input #1534

Merged
merged 23 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
b5b766d
Merge pull request #1484 from nf-core/dev
maxulysse May 7, 2024
85905b3
Adding support for spring-compressed fastq.gz as input
asp8200 May 21, 2024
5e9a3df
Improve error msg
asp8200 May 21, 2024
3967a8c
Adding test config
asp8200 May 22, 2024
37e2fa7
Aligning profile-name and config-name for test alignment_from_fastq_a…
asp8200 May 22, 2024
c239c37
Improving names of variables and module instances
asp8200 May 22, 2024
2944812
pleasing the linter
asp8200 May 22, 2024
1719e39
Merge branch 'nf-core:master' into spring_II
asp8200 May 27, 2024
b6421a7
Merge branch 'dev' into spring_II
asp8200 May 27, 2024
c11092f
setup test of alignment from bam,fastq and spring in one input-csv
asp8200 May 27, 2024
1863b61
Updating changelog
asp8200 May 27, 2024
85f03c1
Adding pytest alignment_from_everything
asp8200 May 29, 2024
e434c1c
fix typo
asp8200 May 29, 2024
8515a83
prettier
asp8200 May 29, 2024
1a649a7
Disabling default publishing of fastq.gz-files from SPRING_DECOMPRESS
asp8200 May 30, 2024
9504c26
Reduce code-duplication by introducing function addReadgroupToMeta
asp8200 Jun 10, 2024
0a60386
Merge branch 'dev' into spring_II
asp8200 Jun 10, 2024
2e16ba1
Adding some docs on fastq.gz.spring-files as input
asp8200 Jun 10, 2024
5be6cd5
prettier
asp8200 Jun 10, 2024
84abe8d
Update workflows/sarek/main.nf
asp8200 Jun 17, 2024
b1a4ac0
Very minor update of usage.md
asp8200 Jun 18, 2024
4a9b22c
variable names all lowercase
asp8200 Jun 18, 2024
67e3f02
Merge branch 'spring_II' of https://github.com/asp8200/sarek into spr…
asp8200 Jun 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

- [#1502](https://github.com/nf-core/sarek/pull/1502) - export CNVs into VCF format in `bam_variant_calling_cnvkit`
- [#1534](https://github.com/nf-core/sarek/pull/1534) - Handling `.fastq.gz.spring` files as input

### Changed

Expand Down
40 changes: 37 additions & 3 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,20 @@
"pattern": "^\\S+$",
"unique": ["patient", "sample"],
"anyOf": [
{
"dependentRequired": ["bam"]
},
{
"dependentRequired": ["fastq_1"]
},
{
"dependentRequired": ["bam"]
"dependentRequired": ["spring_1"]
}
],
"meta": ["lane"]
},
"fastq_1": {
"errorMessage": "FastQ file for reads 1 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
"errorMessage": "Gzipped FastQ file for reads 1 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
"anyOf": [
{
"type": "string",
Expand All @@ -72,7 +75,7 @@
"exists": true
},
"fastq_2": {
"errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
"errorMessage": "Gzipped FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
"dependentRequired": ["fastq_1"],
"anyOf": [
{
Expand All @@ -87,6 +90,37 @@
"format": "file-path",
"exists": true
},
"spring_1": {
"errorMessage": "Gzipped and spring-compressed FastQ file for reads 1 cannot contain spaces and must have extension '.fq.gz.spring' or '.fastq.gz.spring'",
"anyOf": [
{
"type": "string",
"pattern": "^\\S+\\.f(ast)?q\\.gz.spring$"
},
{
"type": "string",
"maxLength": 0
}
],
"format": "file-path",
"exists": true
},
"spring_2": {
"errorMessage": "Gzipped and spring-compressed FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz.spring' or '.fastq.gz.spring'",
"dependentRequired": ["spring_1"],
"anyOf": [
{
"type": "string",
"pattern": "^\\S+\\.f(ast)?q\\.gz.spring$"
},
{
"type": "string",
"maxLength": 0
}
],
"format": "file-path",
"exists": true
},
"table": {
"errorMessage": "Recalibration table cannot contain spaces and must have extension '.table'",
"anyOf": [
Expand Down
7 changes: 7 additions & 0 deletions conf/modules/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ process {
]
}

withName: 'NFCORE_SAREK:SAREK:SPRING_DECOMPRESS_.*' {
ext.prefix = { "${spring.simpleName}" }
publishDir = [
enabled: false
]
}

withName: 'MOSDEPTH' {
ext.args = { !params.wes ? "-n --fast-mode --by 500" : ""}
ext.prefix = {
Expand Down
15 changes: 15 additions & 0 deletions conf/test/alignment_from_everything.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.

Use as follows:
nextflow run nf-core/sarek -profile test,<extra_test_profile>,<docker/singularity> --outdir <OUTDIR>
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

params {
input = "${projectDir}/tests/csv/3.0/bam_and_fastq_and_spring.csv"
tools = null
}
44 changes: 28 additions & 16 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,27 +80,29 @@ Output from Variant Calling and/or Annotation will be in a specific directory fo

### Overview: Samplesheet Columns

| Column | Description |
| --------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `patient` | **Custom patient ID**; designates the patient/subject; must be unique for each patient, but one patient can have multiple samples (e.g. normal and tumor). <br /> _Required_ |
| `sex` | **Sex chromosomes of the patient**; i.e. XX, XY..., only used for Copy-Number Variation analysis in a tumor/pair<br /> _Optional, Default: `NA`_ |
| `status` | **Normal/tumor status of sample**; can be `0` (normal) or `1` (tumor).<br /> _Optional, Default: `0`_ |
| `sample` | **Custom sample ID** for each tumor and normal sample; more than one tumor sample for each subject is possible, i.e. a tumor and a relapse; samples can have multiple lanes for which the _same_ ID must be used to merge them later (see also `lane`). Sample IDs must be unique for unique biological samples <br /> _Required_ |
| `lane` | Lane ID, used when the `sample` is multiplexed on several lanes. Must be unique for each lane in the same sample (but does not need to be the original lane name), and must contain at least one character <br /> _Required for `--step mapping`_ |
| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension `.fastq.gz` or `.fq.gz`. |
| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension `.fastq.gz` or `.fq.gz`. |
| `bam` | Full path to (u)BAM file |
| `bai` | Full path to BAM index file |
| `cram` | Full path to CRAM file |
| `crai` | Full path to CRAM index file |
| `table` | Full path to recalibration table file |
| `vcf` | Full path to vcf file |
| Column | Description |
| ---------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `patient` | **Custom patient ID**; designates the patient/subject; must be unique for each patient, but one patient can have multiple samples (e.g. normal and tumor). <br /> _Required_ |
| `sex` | **Sex chromosomes of the patient**; i.e. XX, XY..., only used for Copy-Number Variation analysis in a tumor/pair<br /> _Optional, Default: `NA`_ |
| `status` | **Normal/tumor status of sample**; can be `0` (normal) or `1` (tumor).<br /> _Optional, Default: `0`_ |
| `sample` | **Custom sample ID** for each tumor and normal sample; more than one tumor sample for each subject is possible, i.e. a tumor and a relapse; samples can have multiple lanes for which the _same_ ID must be used to merge them later (see also `lane`). Sample IDs must be unique for unique biological samples <br /> _Required_ |
| `lane` | Lane ID, used when the `sample` is multiplexed on several lanes. Must be unique for each lane in the same sample (but does not need to be the original lane name), and must contain at least one character <br /> _Required for `--step mapping`_ |
| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension `.fastq.gz` or `.fq.gz`. |
| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension `.fastq.gz` or `.fq.gz`. |
| `spring_1` | Full path to spring-compressed, gzipped FastQ file for read 1 or for reads 1 and 2. The Fastq file has to be first gzipped, then spring-compressed, and it must have the extension `.fastq.gz` or `.fq.gz`. |
| `spring_2` | Full path to spring-compressed, gzipped FastQ file for read 2. The Fastq file has to be first gzipped, then spring-compressed, and it must have the extension `.fastq.gz` or `.fq.gz`. |
| `bam` | Full path to (u)BAM file |
| `bai` | Full path to BAM index file |
| `cram` | Full path to CRAM file |
| `crai` | Full path to CRAM index file |
| `table` | Full path to recalibration table file |
| `vcf` | Full path to vcf file |

An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.

### Start with mapping (`--step mapping` [default])

This step can be started either from FastQ files or (u)BAMs. The CSV must contain at least the columns `patient`, `sample`, `lane`, and either `fastq_1/fastq_2` or `bam`.
This step can be started either from FastQ files (gzip-compressed or gzip+spring-compressed) or (u)BAMs. The CSV must contain at least the columns `patient`, `sample`, `lane`, and `fastq_1/fastq_2`, `spring_1/spring_2` or `bam`.
asp8200 marked this conversation as resolved.
Show resolved Hide resolved

#### Examples

Expand All @@ -111,6 +113,16 @@ patient,sample,lane,fastq_1,fastq_2
patient1,test_sample,lane_1,test_1.fastq.gz,test_2.fastq.gz
```

```bash
patient,sample,lane,spring_1
patient1,test_sample,lane_1,test_R1_and_R2.fastq.gz.spring
```

```bash
patient,sample,lane,spring_1,spring_2
patient1,test_sample,lane_1,test_R1.fastq.gz.spring,test_R2.fastq.gz.spring
```

```bash
patient,sample,lane,bam
patient1,test_sample,lane_1,test.bam
Expand Down
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,11 @@
"git_sha": "2f3db6f45147ebbb56b371536e31bdf622b5bfee",
"installed_by": ["modules", "vcf_annotate_snpeff"]
},
"spring/decompress": {
"branch": "master",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
"installed_by": ["modules"]
},
"strelka/germline": {
"branch": "master",
"git_sha": "e8f2c77a6e4174ee0a48d073d4cc8ff06c44bb4c",
Expand Down
7 changes: 7 additions & 0 deletions modules/nf-core/spring/decompress/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 40 additions & 0 deletions modules/nf-core/spring/decompress/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions modules/nf-core/spring/decompress/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ profiles {
}

// Extra test profiles for more complete CI
alignment_from_everything { includeConfig 'conf/test/alignment_from_everything.config' }
alignment_to_fastq { includeConfig 'conf/test/alignment_to_fastq.config' }
annotation { includeConfig 'conf/test/annotation.config' }
markduplicates_bam { includeConfig 'conf/test/markduplicates_bam.config' }
Expand Down
Loading
Loading