Skip to content

Commit

Permalink
Add adjustments to fix depth biases
Browse files Browse the repository at this point in the history
  • Loading branch information
rrwick committed Aug 12, 2019
1 parent cb51b11 commit ad69a0d
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 45 deletions.
38 changes: 29 additions & 9 deletions badread/simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def simulate(args, output=sys.stderr):
ref_seqs, ref_depths, ref_circular = load_reference(args.reference, output)
rev_comp_ref_seqs = {name: reverse_complement(seq) for name, seq in ref_seqs.items()}
frag_lengths = FragmentLengths(args.mean_frag_length, args.frag_length_stdev, output)
adjust_depths(ref_seqs, ref_depths, ref_circular, frag_lengths, args)
identities = Identities(args.mean_identity, args.identity_stdev, args.max_identity, output)
error_model = ErrorModel(args.error_model, output)
qscore_model = QScoreModel(args.qscore_model, output)
Expand Down Expand Up @@ -157,7 +158,7 @@ def get_fragment(frag_lengths, ref_seqs, rev_comp_ref_seqs, ref_contigs, ref_con
# repeatedly until we get a result.
for _ in range(100):
seq, info = get_real_fragment(fragment_length, ref_seqs, rev_comp_ref_seqs, ref_contigs,
ref_contig_weights, ref_circular, args)
ref_contig_weights, ref_circular)
if seq != '':
return seq, info
sys.exit('Error: failed to generate any sequence fragments - are your read lengths '
Expand All @@ -180,7 +181,7 @@ def get_fragment_type(args):


def get_real_fragment(fragment_length, ref_seqs, rev_comp_ref_seqs, ref_contigs,
ref_contig_weights, ref_circular, args):
ref_contig_weights, ref_circular):

if len(ref_contigs) == 1:
contig = ref_contigs[0]
Expand All @@ -200,14 +201,10 @@ def get_real_fragment(fragment_length, ref_seqs, rev_comp_ref_seqs, ref_contigs,
info.append('0-' + str(len(seq)))
return seq, info

# If the reference contig is circular and the fragment length is too long, then we either
# fail to get the read (if --small_plasmid_bias was used) or bring the fragment size back
# down to the contig size.
# If the reference contig is circular and the fragment length is too long, then we fail to get
# the read.
if fragment_length > len(seq) and ref_circular[contig]:
if args.small_plasmid_bias:
return '', ''
else:
fragment_length = len(seq)
return '', ''

start_pos = random.randint(0, len(seq)-1)
end_pos = start_pos + fragment_length
Expand Down Expand Up @@ -489,3 +486,26 @@ def print_intro(output):
print('', file=output)
print(f'Badread v{__version__}', file=output)
print(f'long read simulation', file=output)


def adjust_depths(ref_seqs, ref_depths, ref_circular, frag_lengths, args):
sampled_lengths = [frag_lengths.get_fragment_length() for x in range(100000)]
total = sum(sampled_lengths)
for ref_name, ref_seq in ref_seqs.items():
ref_len = len(ref_seq)
ref_circ = ref_circular[ref_name]

# Circular plasmids may have to have their depth increased due compensate for misses.
if not args.small_plasmid_bias and ref_circ:
passing_total = sum(length for length in sampled_lengths if length <= ref_len)
if passing_total == 0:
sys.exit('Error: fragment length distribution incompatible with reference lengths '
'- try running with --small_plasmid_bias to avoid this error')
adjustment = total / passing_total
ref_depths[ref_name] *= adjustment

# Linear plasmids may have to have their depth increased due compensate for truncations.
if not ref_circ:
passing_total = sum(min(ref_len, length) for length in sampled_lengths)
adjustment = total / passing_total
ref_depths[ref_name] *= adjustment
2 changes: 1 addition & 1 deletion badread/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
If not, see <http://www.gnu.org/licenses/>.
"""

__version__ = '0.1.3'
__version__ = '0.1.4'
22 changes: 0 additions & 22 deletions test/test_fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,28 +322,6 @@ def setUp(self):
def tearDown(self):
self.null.close()

def test_bias_off(self):
# When bias is off, the fragments end up at the reference length (even though the frag
# length distribution wants them bigger).
start_adapt_rate, start_adapt_amount = 0.0, 0.0
end_adapt_rate, end_adapt_amount = 0.0, 0.0
Args = collections.namedtuple('Args', ['start_adapter_seq', 'end_adapter_seq',
'junk_reads', 'random_reads', 'chimeras',
'glitch_rate', 'glitch_size', 'glitch_skip',
'small_plasmid_bias'])
args = Args(start_adapter_seq='', end_adapter_seq='',
junk_reads=0, random_reads=0, chimeras=0,
glitch_rate=0, glitch_size=0, glitch_skip=0,
small_plasmid_bias=False)
for _ in range(self.trials):
fragment, info = \
badread.simulate.build_fragment(self.lengths, self.ref_seqs, self.rev_comp_ref_seqs,
self.ref_contigs, self.ref_contig_weights,
self.ref_circular, args, start_adapt_rate,
start_adapt_amount, end_adapt_rate,
end_adapt_amount)
self.assertEqual(len(fragment), 1000)

def test_bias_on(self):
# When bias is on, the program quits with an error, because it can't make any fragments.
start_adapt_rate, start_adapt_amount = 0.0, 0.0
Expand Down
13 changes: 0 additions & 13 deletions test/test_simulate2.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,19 +134,6 @@ def test_counts_3(self):
self.assertGreater(ratio, 1.6)
self.assertLess(ratio, 2.4)

def test_plasmid_bias_off(self):
# Contig H is 1% the size of contig G but 100x the depth, so it should get about the same
# number of reads.
ref_filename = os.path.join(os.path.dirname(__file__), 'test_ref_5.fasta')
with badread.misc.captured_output() as (out, err):
sequence(ref_filename)
out, err = out.getvalue().strip(), err.getvalue().strip()
counts = count_ref_contigs(out)
g_count, h_count = counts['G'], counts['H']
ratio = h_count / g_count
self.assertGreater(ratio, 0.6)
self.assertLess(ratio, 1.4)

def test_plasmid_bias_on(self):
# When --small_plasmid_bias is on, contig H gets few to no reads, because its length
# (50 bp) is much less than the mean fragment length (100 bp).
Expand Down

0 comments on commit ad69a0d

Please sign in to comment.