Skip to content

Commit

Permalink
Merge pull request nf-core#1673 from maxulysse/fix_previously_silent_…
Browse files Browse the repository at this point in the history
…error

print warnings instead of erroring
  • Loading branch information
maxulysse authored Oct 3, 2024
2 parents 9e0fda5 + 70b10c3 commit 2be5d14
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 25 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changed

- [1669](https://github.com/nf-core/sarek/pull/1669) - Better nf-test pipeline level tests

### Fixed

- [1656](https://github.com/nf-core/sarek/pull/1656) - Retiring parameter `snpeff_genome`
- [1657](https://github.com/nf-core/sarek/pull/1657) - Update all actions used in the GHA CI
- [1661](https://github.com/nf-core/sarek/pull/1661) - nf-test pipeline level tests
- [1669](https://github.com/nf-core/sarek/pull/1669) - Better nf-test pipeline level tests
- [1673](https://github.com/nf-core/sarek/pull/1673) - Print warning message instead of silent error with Nextflow versions prior to 24.08.0edge

### Removed

Expand Down
67 changes: 43 additions & 24 deletions workflows/sarek/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -937,45 +937,64 @@ workflow SAREK {
FUNCTIONS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

// Add readgroup to meta and remove lane
def addReadgroupToMeta(meta, files) {
def CN = params.seq_center ? "CN:${params.seq_center}\\t" : ''

// Here we're assuming that fastq_1 and fastq_2 are from the same flowcell:
def flowcell = flowcellLaneFromFastq(files[0])
// If we cannot read the flowcell ID from the fastq file, then we don't use it
def sample_lane_id = flowcellLaneFromFastq(files[0]) ? "${flowcell}.${meta.sample}.${meta.lane}" : "${meta.sample}.${meta.lane}"
// TO-DO: Would it perhaps be better to also call flowcellLaneFromFastq(files[1]) and check that we get the same flowcell-id?

// Don't use a random element for ID, it breaks resuming
def read_group = "\"@RG\\tID:${flowcell}.${meta.sample}.${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\""
def read_group = "\"@RG\\tID:${sample_lane_id}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\""
meta = meta - meta.subMap('lane') + [read_group: read_group.toString()]
return [ meta, files ]
}

// Parse first line of a FASTQ file, return the flowcell id and lane number.
def flowcellLaneFromFastq(path) {
// expected format:
// xx:yy:FLOWCELLID:LANE:... (seven fields)
// or
// FLOWCELLID:LANE:xx:... (five fields)
def line
path.withInputStream {
InputStream gzipStream = new java.util.zip.GZIPInputStream(it)
Reader decoder = new InputStreamReader(gzipStream, 'ASCII')
BufferedReader buffered = new BufferedReader(decoder)
line = buffered.readLine()
// First line of FASTQ file contains sequence identifier plus optional description
def firstLine = readFirstLineOfFastq(path)
def flowcell_id = null

// Expected format from ILLUMINA
// cf https://en.wikipedia.org/wiki/FASTQ_format#Illumina_sequence_identifiers
// Five fields:
// @<instrument>:<lane>:<tile>:<x-pos>:<y-pos>...
// Seven fields or more (from CASAVA 1.8+):
// "@<instrument>:<run number>:<flowcell ID>:<lane>:<tile>:<x-pos>:<y-pos>..."

fields = firstLine ? firstLine.split(':') : []
if (fields.size() == 5) {
// Get the instrument name as flowcell ID
flowcell_id = fields[0].substring(1)
} else if (fields.size() >= 7) {
// Get the actual flowcell ID
flowcell_id = fields[2]
} else if (fields.size() != 0) {
log.warn "FASTQ file(${path}): Cannot extract flowcell ID from ${firstLine}"
}
assert line.startsWith('@')
line = line.substring(1)
def fields = line.split(':')
String fcid

if (fields.size() >= 7) {
// CASAVA 1.8+ format, from https://support.illumina.com/help/BaseSpace_OLH_009008/Content/Source/Informatics/BS/FileFormat_FASTQ-files_swBS.htm
// "@<instrument>:<run number>:<flowcell ID>:<lane>:<tile>:<x-pos>:<y-pos>:<UMI> <read>:<is filtered>:<control number>:<index>"
fcid = fields[2]
} else if (fields.size() == 5) {
fcid = fields[0]
return flowcell_id
}

// Get first line of a FASTQ file
def readFirstLineOfFastq(path) {
def line = null
try {
path.withInputStream {
InputStream gzipStream = new java.util.zip.GZIPInputStream(it)
Reader decoder = new InputStreamReader(gzipStream, 'ASCII')
BufferedReader buffered = new BufferedReader(decoder)
line = buffered.readLine()
assert line.startsWith('@')
}
} catch (Exception e) {
log.warn "FASTQ file(${path}): Error streaming"
log.warn "${e.message}"
}
return fcid
return line
}

/*
Expand Down

0 comments on commit 2be5d14

Please sign in to comment.