Skip to content

Commit

Permalink
allow per-variant SV VAF adjustment via varfile
Browse files Browse the repository at this point in the history
  • Loading branch information
adamewing committed Jan 24, 2017
1 parent 6ee1f8e commit b096aca
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 14 deletions.
43 changes: 34 additions & 9 deletions bin/addsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ def makemut(args, bedline, alignopts):
chrom = c[0]
start = int(c[1])
end = int(c[2])
araw = c[3:len(c)] # INV, DEL, INS seqfile.fa TSDlength, DUP
araw = c[3:] # INV, DEL, INS, DUP, TRN

# translocation specific
trn_chrom = None
Expand All @@ -355,6 +355,10 @@ def makemut(args, bedline, alignopts):
is_transloc = c[3] == 'TRN'

if is_transloc:
araw = [c[3]]
if len(c) > 7:
araw += c[7:]

start -= 3000
end += 3000
if start < 0: start = 0
Expand All @@ -366,16 +370,18 @@ def makemut(args, bedline, alignopts):

actions = map(lambda x: x.strip(),' '.join(araw).split(','))

svfrac = float(args.svfrac) # default, can be overridden by cnv file
svfrac = float(args.svfrac) # default, can be overridden by cnv file or per-variant

cn = 1.0

if cnv: # CNV file is present
if chrom in cnv.contigs:
for cnregion in cnv.fetch(chrom,start,end):
cn = float(cnregion.strip().split()[3]) # expect chrom,start,end,CN
sys.stdout.write("INFO\t" + now() + "\t" + mutid + "\t" + ' '.join(("copy number in sv region:",chrom,str(start),str(end),"=",str(cn))) + "\n")
svfrac = 1.0/float(cn)
assert svfrac <= 1.0
sys.stdout.write("INFO\t" + now() + "\t" + mutid + "\tadjusted MAF: " + str(svfrac) + "\n")
svfrac = svfrac/float(cn)
assert svfrac <= 1.0, 'copy number from %s must be at least 1: %s' % (args.cnvfile, snregion.stri[()])
sys.stdout.write("INFO\t" + now() + "\t" + mutid + "\tadjusted default MAF: " + str(svfrac) + "\n")

print "INFO\t" + now() + "\t" + mutid + "\tinterval:", c
print "INFO\t" + now() + "\t" + mutid + "\tlength:", end-start
Expand Down Expand Up @@ -485,22 +491,28 @@ def makemut(args, bedline, alignopts):
assert len(a) > 1 # insertion syntax: INS <file.fa> [optional TSDlen]
insseqfile = a[1]
if not (os.path.exists(insseqfile) or insseqfile == 'RND'): # not a file... is it a sequence? (support indel ins.)
assert re.search('^[ATGCatgc]*$',insseqfile) # make sure it's a sequence
assert re.search('^[ATGCatgc]*$',insseqfile), "cannot determine SV type: %s" % insseqfile # make sure it's a sequence
insseq = insseqfile.upper()
insseqfile = None
if len(a) > 2: # field 5 for insertion is TSD Length
tsdlen = int(a[2])

if len(a) > 3: # field 5 for insertion is motif, format = 'NNNN/NNNN where / is cut site
if len(a) > 3: # field 6 for insertion is motif, format = 'NNNN^NNNN where ^ is cut site
ins_motif = a[3]
assert '^' in ins_motif, 'insertion motif specification requires cut site defined by ^'

if len(a) > 4: # field 7 is VAF
svfrac = float(a[4])/cn

if action == 'DUP':
if len(a) > 1:
ndups = int(a[1])
else:
ndups = 1

if len(a) > 2: # VAF
svfrac = float(a[2])/cn

if action == 'DEL':
if len(a) > 1:
dsize = float(a[1])
Expand All @@ -511,9 +523,19 @@ def makemut(args, bedline, alignopts):
else:
dsize = 1.0

if len(a) > 2: # VAF
svfrac = float(a[2])/cn

if action == 'TRN':
pass
if len(a) > 1:
svfrac = float(a[1])/cn

if action == 'INV':
if len(a) > 1:
svfrac = float(a[1])/cn


print "INFO\t" + now() + "\t" + mutid + "\tfinal VAF accounting for copy number %f: %f" % (cn, svfrac)

logfile.write(">" + chrom + ":" + str(refstart) + "-" + str(refend) + " BEFORE\n" + str(mutseq) + "\n")

Expand Down Expand Up @@ -579,6 +601,9 @@ def makemut(args, bedline, alignopts):
print "INFO\t" + now() + "\t" + mutid + "\tset paired end mean distance: " + str(args.ismean)
print "INFO\t" + now() + "\t" + mutid + "\tset paired end distance stddev: " + str(args.issd)




# simulate reads
(fq1, fq2) = runwgsim(maxcontig, mutseq.seq, svfrac, actions, exclude, pemean, pesd, args.tmpdir, mutid=mutid, seed=args.seed, trn_contig=trn_maxcontig)

Expand Down Expand Up @@ -748,7 +773,7 @@ def main(args):
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='adds SVs to reads, outputs modified reads as .bam along with mates')
parser.add_argument('-v', '--varfile', dest='varFileName', required=True,
help='whitespace-delimited target regions to try and add a SV: chrom,start,stop,action,seqfile (if insertion),TSDlength (if insertion)')
help='whitespace-delimited target regions for SV spike-in, see manual for syntax')
parser.add_argument('-f', '--bamfile', dest='bamFileName', required=True,
help='sam/bam file from which to obtain reads')
parser.add_argument('-r', '--reference', dest='refFasta', required=True,
Expand Down
10 changes: 5 additions & 5 deletions test_data/test_sv.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
22 34171055 34184700 INS ACTAACCTGCACAATGTGCACATGTACCCTAAAACTTAGAGTATAATAAAAAAAA 13 AA^TTTT
22 33607508 33607508 TRN 22 34314981 34314981
22 33871043 33884754 DEL 0.9
22 34071043 34084754 INV
22 34271043 34284754 DUP 3
22 34171055 34184700 INS ACTAACCTGCACAATGTGCACATGTACCCTAAAACTTAGAGTATAATAAAAAAAA 13 AA^TTTT 0.8
22 33607508 33607508 TRN 22 34314981 34314981 0.9
22 33871043 33884754 DEL 0.9 0.75
22 34071043 34084754 INV 0.9
22 34271043 34284754 DUP 3 0.9

0 comments on commit b096aca

Please sign in to comment.