diff --git a/docs/pipelines.rst b/docs/pipelines.rst index c3bffc21..3e430729 100644 --- a/docs/pipelines.rst +++ b/docs/pipelines.rst @@ -623,7 +623,7 @@ NOTES: - If you're a VSC user, you might want to add the ``vsc`` profile. - The final output (FASTQ files) will available in ``out/data/sra`` - If you're downloading 10x Genomics scATAC-seq data, make sure to set ``params.sratoolkit.includeTechnicalReads = true`` and properly set ``params.utils.sra_normalize_fastqs.fastq_read_suffixes``. In the case of downloading the scATAC-seq samples of SRP254409, ``fastq_read_suffixes`` would be set to ``["R1", "R2", "I1", "I2"]``. - +- The download of SRA files is by default limited to 20 Gb. If this limit needs to be increased please set ``params.sratoolkit.maxSize`` accordingly. This limit can be removed by setting the parameter to arbitrarily high number (e.g.: 9999999999999). Now we can run it with the following command: diff --git a/src/sratoolkit/processes/downloadFastQ.nf b/src/sratoolkit/processes/downloadFastQ.nf index 538f252f..cd34e7c5 100644 --- a/src/sratoolkit/processes/downloadFastQ.nf +++ b/src/sratoolkit/processes/downloadFastQ.nf @@ -31,7 +31,11 @@ process DOWNLOAD_FASTQS_FROM_SRA_ACC_ID { rm \${SRA_FILE_LOCK} fi # Fetch SRA file - prefetch -v -p 1 ${sraId} + prefetch \ + -v \ + -p 1 \ + ${params.sratoolkit?.maxSize ? '--max-size '+ params.sratoolkit.maxSize: ''} \ + ${sraId} # Convert SRA file to FASTQ files fasterq-dump \ -S \ diff --git a/src/sratoolkit/processes/fixAndCompressFastQ.nf b/src/sratoolkit/processes/fixAndCompressFastQ.nf index 94eab8cb..33becf72 100644 --- a/src/sratoolkit/processes/fixAndCompressFastQ.nf +++ b/src/sratoolkit/processes/fixAndCompressFastQ.nf @@ -39,7 +39,7 @@ process FIX_AND_COMPRESS_SRA_FASTQ { echo "Removing all uncompressed FASTQ files" for FASTQ in *.fastq; do echo "Removing uncompressed FASTQ file \${FASTQ}..." - rm "$(readlink -f \${FASTQ})" + rm "\$(readlink -f \${FASTQ})" done echo "Done." """ diff --git a/src/sratoolkit/sratoolkit.config b/src/sratoolkit/sratoolkit.config index 486b8365..b2d3e6e4 100644 --- a/src/sratoolkit/sratoolkit.config +++ b/src/sratoolkit/sratoolkit.config @@ -4,6 +4,8 @@ params { // --include-technical option (fasterq-dump) // This option should be set to 'true' if data you're downloading is e.g.: 10x Genomics scATAC-seq includeTechnicalReads = false + // --max-size (prefetch), maximum file size to download in KB (exclusive). Default: 20G + maxSize = 20000000 } }