Skip to content

Commit

Permalink
add option to ignore input vs output sanity check
Browse files Browse the repository at this point in the history
  • Loading branch information
adamewing committed Apr 3, 2018
1 parent 3d17e97 commit 310dbbf
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 55 deletions.
113 changes: 60 additions & 53 deletions bamsurgeon/aligners.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def checkoptions(name, options, picardjar, sv=False):
raise ValueError("ERROR\t'--aligner bowtie2' requires '--alignopts bowtie2ref:bowtie2_ref_basepath\n")


def remap_bam(name, bamfn, fastaref, options, mutid='null', threads=1, paired=True, picardjar=None):
def remap_bam(name, bamfn, fastaref, options, mutid='null', threads=1, paired=True, picardjar=None, insane=False):
''' remap bam file with supported alignment method. "options" param is a dict of aligner-specific required options '''

checkoptions(name, options, picardjar)
Expand All @@ -64,25 +64,25 @@ def remap_bam(name, bamfn, fastaref, options, mutid='null', threads=1, paired=Tr
remap_backtrack_bam(bamfn, threads, fastaref, mutid=mutid, paired=paired)

if name == 'mem':
remap_bwamem_bam(bamfn, threads, fastaref, picardjar, mutid=mutid, paired=paired)
remap_bwamem_bam(bamfn, threads, fastaref, picardjar, mutid=mutid, paired=paired, insane=insane)

if name == 'bwakit':
remap_bwakit_bam(bamfn, threads, fastaref, picardjar, mutid=mutid, paired=paired)
remap_bwakit_bam(bamfn, threads, fastaref, picardjar, mutid=mutid, paired=paired, insane=insane)

if name == 'novoalign':
remap_novoalign_bam(bamfn, threads, fastaref, picardjar, options['novoref'], mutid=mutid, paired=paired)
remap_novoalign_bam(bamfn, threads, fastaref, picardjar, options['novoref'], mutid=mutid, paired=paired, insane=insane)

if name == 'gsnap':
remap_gsnap_bam(bamfn, threads, fastaref, picardjar, options['gsnaprefdir'], options['gsnaprefname'], mutid=mutid, paired=paired)
remap_gsnap_bam(bamfn, threads, fastaref, picardjar, options['gsnaprefdir'], options['gsnaprefname'], mutid=mutid, paired=paired, insane=insane)

if name == 'STAR':
remap_STAR_bam(bamfn, threads, fastaref, picardjar, options['STARrefdir'], mutid=mutid, paired=paired)
remap_STAR_bam(bamfn, threads, fastaref, picardjar, options['STARrefdir'], mutid=mutid, paired=paired, insane=insane)

if name == 'bowtie2':
remap_bowtie2_bam(bamfn, threads, fastaref, picardjar, options['bowtie2ref'], mutid=mutid, paired=paired)
remap_bowtie2_bam(bamfn, threads, fastaref, picardjar, options['bowtie2ref'], mutid=mutid, paired=paired, insane=insane)

if name == 'tmap':
remap_tmap_bam(bamfn, threads, fastaref, picardjar, mutid=mutid, paired=paired)
remap_tmap_bam(bamfn, threads, fastaref, picardjar, mutid=mutid, paired=paired, insane=insane)


def remap_backtrack_bam(bamfn, threads, fastaref, mutid='null', paired=True):
Expand Down Expand Up @@ -154,7 +154,7 @@ def remap_backtrack_bam(bamfn, threads, fastaref, mutid='null', paired=True):
os.remove(samfn)


def remap_bwamem_bam(bamfn, threads, fastaref, picardjar, mutid='null', paired=True, deltmp=True):
def remap_bwamem_bam(bamfn, threads, fastaref, picardjar, mutid='null', paired=True, deltmp=True, insane=False):
""" call bwa mem and samtools to remap .bam
"""
assert os.path.exists(picardjar)
Expand Down Expand Up @@ -206,14 +206,15 @@ def remap_bwamem_bam(bamfn, threads, fastaref, picardjar, mutid='null', paired=T
subprocess.call(idx_cmd)

# check if BAM readcount looks sane
if bamreadcount(bamfn) < fastqreadcount(fastq):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
if not insane:
if bamreadcount(bamfn) < fastqreadcount(fastq):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")

sys.stdout.write("INFO\t" + now() + "\t" + mutid + "\tremoving " + fastq + "\n")
if deltmp: os.remove(fastq)


def remap_bwakit_bam(bamfn, threads, fastaref, picardjar, mutid='null', paired=True, deltmp=True):
def remap_bwakit_bam(bamfn, threads, fastaref, picardjar, mutid='null', paired=True, deltmp=True, insane=False):
""" call bwa kit and samtools to remap .bam
"""
assert os.path.exists(picardjar)
Expand Down Expand Up @@ -263,12 +264,13 @@ def remap_bwakit_bam(bamfn, threads, fastaref, picardjar, mutid='null', paired=T
subprocess.call(idx_cmd)

# check if BAM readcount looks sane
if paired:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]) + fastqreadcount(fastq[1]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
else:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
if not insane:
if paired:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]) + fastqreadcount(fastq[1]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
else:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")

if paired:
for fq in fastq:
Expand All @@ -279,7 +281,7 @@ def remap_bwakit_bam(bamfn, threads, fastaref, picardjar, mutid='null', paired=T
os.remove(fastq[0])


def remap_novoalign_bam(bamfn, threads, fastaref, picardjar, novoref, mutid='null', paired=True):
def remap_novoalign_bam(bamfn, threads, fastaref, picardjar, novoref, mutid='null', paired=True, insane=False):
""" call novoalign and samtools to remap .bam
"""
assert os.path.exists(picardjar)
Expand Down Expand Up @@ -332,12 +334,13 @@ def remap_novoalign_bam(bamfn, threads, fastaref, picardjar, novoref, mutid='nul
subprocess.call(idx_cmd)

# check if BAM readcount looks sane
if paired:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]) + fastqreadcount(fastq[1]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
else:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
if not insane:
if paired:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]) + fastqreadcount(fastq[1]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
else:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")

if paired:
for fq in fastq:
Expand All @@ -348,7 +351,7 @@ def remap_novoalign_bam(bamfn, threads, fastaref, picardjar, novoref, mutid='nul
os.remove(fastq[0])


def remap_gsnap_bam(bamfn, threads, fastaref, picardjar, gsnaprefdir, gsnaprefname, mutid='null', paired=True):
def remap_gsnap_bam(bamfn, threads, fastaref, picardjar, gsnaprefdir, gsnaprefname, mutid='null', paired=True, insane=False):
""" call gsnap and samtools to remap .bam
"""
assert os.path.exists(picardjar)
Expand Down Expand Up @@ -403,12 +406,13 @@ def remap_gsnap_bam(bamfn, threads, fastaref, picardjar, gsnaprefdir, gsnaprefna
subprocess.call(idx_cmd)

# check if BAM readcount looks sane
if paired:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]) + fastqreadcount(fastq[1]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
else:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
if not insane:
if paired:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]) + fastqreadcount(fastq[1]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
else:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")

if paired:
for fq in fastq:
Expand All @@ -418,7 +422,7 @@ def remap_gsnap_bam(bamfn, threads, fastaref, picardjar, gsnaprefdir, gsnaprefna
sys.stdout.write("INFO\t" + now() + "\t" + mutid + "\tremoving " + fastq[0] + "\n")
os.remove(fastq[0])

def remap_STAR_bam(bamfn, threads, fastaref, picardjar, STARrefdir, mutid='null', paired=True):
def remap_STAR_bam(bamfn, threads, fastaref, picardjar, STARrefdir, mutid='null', paired=True, insane=False):
""" call gsnap and samtools to remap .bam
"""
assert os.path.exists(picardjar)
Expand Down Expand Up @@ -470,12 +474,13 @@ def remap_STAR_bam(bamfn, threads, fastaref, picardjar, STARrefdir, mutid='null'
# check if BAM readcount looks sane
# STAR may not report unmapped reads in BAM

if paired:
if bamreadcount(bamfn) < .5*(fastqreadcount(fastq[0]) + fastqreadcount(fastq[1])):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
else:
if bamreadcount(bamfn) < .5*fastqreadcount(fastq[0]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
if not insane:
if paired:
if bamreadcount(bamfn) < .5*(fastqreadcount(fastq[0]) + fastqreadcount(fastq[1])):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
else:
if bamreadcount(bamfn) < .5*fastqreadcount(fastq[0]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")

if paired:
for fq in fastq:
Expand All @@ -485,7 +490,7 @@ def remap_STAR_bam(bamfn, threads, fastaref, picardjar, STARrefdir, mutid='null'
sys.stdout.write("INFO\t" + now() + "\t" + mutid + "\tremoving " + fastq[0] + "\n")
os.remove(fastq[0])

def remap_bowtie2_bam(bamfn, threads, fastaref, picardjar, bowtie2ref, mutid='null', paired=True):
def remap_bowtie2_bam(bamfn, threads, fastaref, picardjar, bowtie2ref, mutid='null', paired=True, insane=False):
""" call bowtie2 and samtools to remap .bam
"""

Expand Down Expand Up @@ -536,12 +541,13 @@ def remap_bowtie2_bam(bamfn, threads, fastaref, picardjar, bowtie2ref, mutid='nu
subprocess.call(idx_cmd)

# check if BAM readcount looks sane
if paired:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]) + fastqreadcount(fastq[1]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
else:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
if not insane:
if paired:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]) + fastqreadcount(fastq[1]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
else:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")

if paired:
for fq in fastq:
Expand All @@ -552,7 +558,7 @@ def remap_bowtie2_bam(bamfn, threads, fastaref, picardjar, bowtie2ref, mutid='nu
os.remove(fastq[0])


def remap_tmap_bam(bamfn, threads, fastaref, picardjar, mutid='null', paired=False):
def remap_tmap_bam(bamfn, threads, fastaref, picardjar, mutid='null', paired=False, insane=False):
""" call bowtie2 and samtools to remap .bam
"""

Expand Down Expand Up @@ -602,12 +608,13 @@ def remap_tmap_bam(bamfn, threads, fastaref, picardjar, mutid='null', paired=Fal
subprocess.call(idx_cmd)

# check if BAM readcount looks sane
if paired:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]) + fastqreadcount(fastq[1]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
else:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
if not insane:
if paired:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]) + fastqreadcount(fastq[1]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")
else:
if bamreadcount(bamfn) < fastqreadcount(fastq[0]):
raise ValueError("ERROR\t" + now() + "\t" + mutid + "\tbam readcount < fastq readcount, alignment sanity check failed!\n")

if paired:
for fq in fastq:
Expand Down
3 changes: 2 additions & 1 deletion bin/addindel.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def makemut(args, chrom, start, end, vaf, ins, avoid, alignopts):

if not hasSNP or args.force:
outbam_muts.close()
aligners.remap_bam(args.aligner, tmpoutbamname, args.refFasta, alignopts, mutid=mutid, paired=(not args.single), picardjar=args.picardjar)
aligners.remap_bam(args.aligner, tmpoutbamname, args.refFasta, alignopts, mutid=mutid, paired=(not args.single), picardjar=args.picardjar, insane=args.insane)

outbam_muts = pysam.Samfile(tmpoutbamname,'rb')
coverwindow = 1
Expand Down Expand Up @@ -414,6 +414,7 @@ def run():
parser.add_argument('--nomut', action='store_true', default=False, help="dry run")
parser.add_argument('--det', action='store_true', default=False, help="deterministic base changes: make transitions only")
parser.add_argument('--force', action='store_true', default=False, help="force mutation to happen regardless of nearby SNP or low coverage")
parser.add_argument('--insane', action='store_true', default=False, help="ignore sanity check enforcing input read count = output read count in realignment")
parser.add_argument('--single', action='store_true', default=False, help="input BAM is simgle-ended (default is paired-end)")
parser.add_argument('--maxopen', dest='maxopen', default=1000, help="maximum number of open files during merge (default 1000)")
parser.add_argument('--requirepaired', action='store_true', default=False, help='skip mutations if unpaired reads are present')
Expand Down
3 changes: 2 additions & 1 deletion bin/addsnv.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def makemut(args, hc, avoid, alignopts):
if not hasSNP or args.force:
outbam_muts.close()

aligners.remap_bam(args.aligner, tmpoutbamname, args.refFasta, alignopts, mutid=hapstr, paired=(not args.single), picardjar=args.picardjar)
aligners.remap_bam(args.aligner, tmpoutbamname, args.refFasta, alignopts, mutid=hapstr, paired=(not args.single), picardjar=args.picardjar, insane=args.insane)

outbam_muts = pysam.Samfile(tmpoutbamname,'rb')
coverwindow = 1
Expand Down Expand Up @@ -496,6 +496,7 @@ def run():
parser.add_argument('--ignoresnps', action='store_true', default=False, help="make mutations even if there are non-reference alleles sharing the relevant reads")
parser.add_argument('--ignoreref', action='store_true', default=False, help="make mutations even if the mutation is back to the reference allele")
parser.add_argument('--force', action='store_true', default=False, help="force mutation to happen regardless of nearby SNP or low coverage")
parser.add_argument('--insane', action='store_true', default=False, help="ignore sanity check enforcing input read count = output read count in realignment")
parser.add_argument('--single', action='store_true', default=False, help="input BAM is simgle-ended (default is paired-end)")
parser.add_argument('--maxopen', dest='maxopen', default=1000, help="maximum number of open files during merge (default 1000)")
parser.add_argument('--requirepaired', action='store_true', default=False, help='skip mutations if unpaired reads are present')
Expand Down

0 comments on commit 310dbbf

Please sign in to comment.