diff --git a/artic/align_trim.py b/artic/align_trim.py index 7898294..3c31b03 100755 --- a/artic/align_trim.py +++ b/artic/align_trim.py @@ -60,7 +60,7 @@ def find_primer(bed, pos, direction, threshold=20): return closest -def trim(segment, primer_pos, end, debug): +def trim(segment, primer_pos, end, verbose=False): """Soft mask an alignment to fit within primer start/end sites. Parameters @@ -71,7 +71,7 @@ def trim(segment, primer_pos, end, debug): The position in the reference to soft mask up to (equates to the start/end position of the primer in the reference) end : bool If True, the segment is being masked from the end (i.e. for the reverse primer) - debug : bool + vernose : bool If True, will print soft masking info during trimming """ # get a copy of the cigar tuples to work with @@ -93,13 +93,14 @@ def trim(segment, primer_pos, end, debug): flag, length = cigar.pop() else: flag, length = cigar.pop(0) - if debug: + if verbose: print("Chomped a %s, %s" % (flag, length), file=sys.stderr) except IndexError: - print( - "Ran out of cigar during soft masking - completely masked read will be ignored", - file=sys.stderr, - ) + if verbose: + print( + "Ran out of cigar during soft masking - completely masked read will be ignored", + file=sys.stderr, + ) break # if the CIGAR operation consumes the reference sequence, increment/decrement the position by the CIGAR operation length @@ -121,10 +122,10 @@ def trim(segment, primer_pos, end, debug): # calculate how many extra matches are needed in the CIGAR extra = abs(pos - primer_pos) - if debug: + if verbose: print("extra %s" % (extra), file=sys.stderr) if extra: - if debug: + if verbose: print("Inserted a %s, %s" % (0, extra), file=sys.stderr) if end: cigar.append((0, extra)) @@ -137,12 +138,12 @@ def trim(segment, primer_pos, end, debug): # update the position of the leftmost mappinng base segment.pos = pos - extra - if debug: + if verbose: print("New pos: %s" % (segment.pos), file=sys.stderr) # if proposed softmask leads straight into a deletion, shuffle leftmost mapping base along and ignore the deletion if cigar[0][0] == 2: - if debug: + if verbose: print( "softmask created a leading deletion in the CIGAR, shuffling the alignment", file=sys.stderr, @@ -188,16 +189,19 @@ def handle_segment( # filter out unmapped and supplementary alignment segments if segment.is_unmapped: - print("%s skipped as unmapped" % (segment.query_name), file=sys.stderr) + if args.verbose: + print("%s skipped as unmapped" % (segment.query_name), file=sys.stderr) return False if segment.is_supplementary: - print("%s skipped as supplementary" % (segment.query_name), file=sys.stderr) + if args.verbose: + print("%s skipped as supplementary" % (segment.query_name), file=sys.stderr) return False if segment.mapping_quality < min_mapq: - print( - "%s skipped as mapping quality below threshold" % (segment.query_name), - file=sys.stderr, - ) + if args.verbose: + print( + "%s skipped as mapping quality below threshold" % (segment.query_name), + file=sys.stderr, + ) return False # locate the nearest primers to this alignment segment @@ -205,17 +209,19 @@ def handle_segment( p2 = find_primer(bed, segment.reference_end, "-", args.primer_match_threshold) if not p1 or not p2: - print( - "%s skipped as no primer found for segment" % (segment.query_name), - file=sys.stderr, - ) + if args.verbose: + print( + "%s skipped as no primer found for segment" % (segment.query_name), + file=sys.stderr, + ) return False # check if primers are correctly paired and then assign read group # NOTE: removed this as a function as only called once # TODO: will try improving this / moving it to the primer scheme processing code - correctly_paired = p1[2]["Primer_ID"].split("_")[1] == p2[2]["Primer_ID"].split("_")[1] - + correctly_paired = ( + p1[2]["Primer_ID"].split("_")[1] == p2[2]["Primer_ID"].split("_")[1] + ) if not args.no_read_groups: if correctly_paired: @@ -247,10 +253,11 @@ def handle_segment( report_writer.writerow(report) if args.remove_incorrect_pairs and not correctly_paired: - print( - "%s skipped as not correctly paired" % (segment.query_name), - file=sys.stderr, - ) + if args.verbose: + print( + "%s skipped as not correctly paired" % (segment.query_name), + file=sys.stderr, + ) return False if args.verbose: @@ -306,11 +313,12 @@ def handle_segment( # check the the alignment still contains bases matching the reference if "M" not in segment.cigarstring: - print( - "%s dropped as does not match reference post masking" - % (segment.query_name), - file=sys.stderr, - ) + if args.verbose: + print( + "%s dropped as does not match reference post masking" + % (segment.query_name), + file=sys.stderr, + ) return False return (amplicon, segment) @@ -364,7 +372,7 @@ def generate_amplicons(bed: list): return amplicons -def normalise(trimmed_segments: dict, normalise: int, bed: list): +def normalise(trimmed_segments: dict, normalise: int, bed: list, verbose: bool = False): """Normalise the depth of the trimmed segments to a given value. Perform per-amplicon normalisation using numpy vector maths to determine whether the segment in question would take the depth closer to the desired depth accross the amplicon. Args: @@ -397,10 +405,11 @@ def normalise(trimmed_segments: dict, normalise: int, bed: list): amplicon_depth = np.zeros((amplicons[amplicon]["length"],), dtype=int) if not segments: - print( - f"No segments assigned to amplicon {amplicon}, skipping", - file=sys.stderr, - ) + if verbose: + print( + f"No segments assigned to amplicon {amplicon}, skipping", + file=sys.stderr, + ) continue random.shuffle(segments) @@ -507,7 +516,7 @@ def go(args): # normalise if requested if args.normalise: output_segments, mean_amp_depths = normalise( - trimmed_segments, args.normalise, bed + trimmed_segments, args.normalise, bed, args.verbose ) # write mean amplicon depths to file @@ -567,6 +576,7 @@ def main(): parser.add_argument("--remove-incorrect-pairs", action="store_true") args = parser.parse_args() + go(args) diff --git a/artic/minion.py b/artic/minion.py index 59798bb..a7a09ac 100755 --- a/artic/minion.py +++ b/artic/minion.py @@ -134,11 +134,19 @@ def run(parser, args): normalise_string = "" cmds.append( - f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --remove-incorrect-pairs --min-mapq {args.min_mapq} --report {args.sample}.alignreport.csv < {args.sample}.sorted.bam 2> {args.sample}.alignreport.er | samtools sort -T {args.sample} - -o {args.sample}.trimmed.rg.sorted.bam" + f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --remove-incorrect-pairs --min-mapq {args.min_mapq} --report {args.sample}.alignreport.csv < {args.sample}.sorted.bam > {args.sample}.trimmed.rg.bam" ) cmds.append( - f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --min-mapq {args.min_mapq} --remove-incorrect-pairs --trim-primers --report {args.sample}.alignreport.csv --amp-depth-report {args.sample}.amplicon_depths.tsv < {args.sample}.sorted.bam 2> {args.sample}.alignreport.er | samtools sort -T {args.sample} - -o {args.sample}.primertrimmed.rg.sorted.bam" + f"samtools sort -T {args.sample} {args.sample}.trimmed.rg.bam -o {args.sample}.trimmed.rg.sorted.bam" + ) + + cmds.append( + f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --min-mapq {args.min_mapq} --remove-incorrect-pairs --trim-primers --report {args.sample}.alignreport.csv --amp-depth-report {args.sample}.amplicon_depths.tsv < {args.sample}.sorted.bam > {args.sample}.primertrimmed.rg.bam" + ) + + cmds.append( + f"samtools sort -T {args.sample} {args.sample}.primertrimmed.rg.bam -o {args.sample}.primertrimmed.rg.sorted.bam" ) cmds.append(f"samtools index {args.sample}.trimmed.rg.sorted.bam") diff --git a/setup.cfg b/setup.cfg index 1f2cf00..72b235c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = artic -version = 1.5.5 +version = 1.5.6 author = Nick Loman author_email = n.j.loman@bham.ac.uk maintainer = Sam Wilkinson diff --git a/tests/align_trim_unit_test.py b/tests/align_trim_unit_test.py index ad298ea..d8bebab 100644 --- a/tests/align_trim_unit_test.py +++ b/tests/align_trim_unit_test.py @@ -1,7 +1,6 @@ # align_trim_unit_test.py contains unit tests for alignment trimming import os import pysam -import pytest from artic import align_trim from artic import utils @@ -114,7 +113,7 @@ def testRunner(seg, expectedCIGAR): # trim the forward primer try: - align_trim.trim(seg, p1_position, False, False) + align_trim.trim(seg, p1_position, False) except Exception as e: raise Exception( "problem soft masking left primer in {} (error: {})".format( @@ -132,7 +131,7 @@ def testRunner(seg, expectedCIGAR): # trim the reverse primer try: - align_trim.trim(seg, p2_position, True, False) + align_trim.trim(seg, p2_position, True) except Exception as e: raise Exception( "problem soft masking right primer in {} (error: {})".format( diff --git a/tests/minion_validator.py b/tests/minion_validator.py index 259394c..d036ed7 100644 --- a/tests/minion_validator.py +++ b/tests/minion_validator.py @@ -184,9 +184,6 @@ def genCommand(sampleID, workflow): "--scheme-directory", dataDir + "primer-schemes", ] - if workflow == "medaka": - cmd.append("--model") - cmd.append("r941_min_high_g351") if workflow == "clair3": cmd.append("--model") @@ -255,6 +252,7 @@ def runner(workflow, sampleID): args = parser.parse_args(cmd) except SystemExit: sys.stderr.write("failed to parse valid command for `artic minion`") + assert False # run the minion pipeline try: