Skip to content

Commit

Permalink
Test should not be OK again!
Browse files Browse the repository at this point in the history
  • Loading branch information
stela2502 committed Jan 22, 2024
1 parent f7e65f9 commit 016f1df
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 31 deletions.
14 changes: 14 additions & 0 deletions News.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# 1.2.5

Mapper has significantly improved: both the false positive as well as false negative rate had improved.
The improvement was possible by using a needleman-wunsch inspired algorithm.
The 32 bp matches are now tolerant to not only bp mismatches, but also insertions and deletions.

There is no longer a need to exclude polyA containing reads.

Compared to the mere bp replacement matching we e.g. find almost 10x more reads from the Ighm locus in the test data.
And none of the reads I have seen so far looked like a not Ighm transcript. All I checked were also mapped to the Ighm transcripts using NCBI BLAST (I only checked the strange looking ones).

I have added the Ighm reads that were detected with both settings to this repository.


# 1.2.4

PolyA containing R2 reads are now filtered out if a PolyA streatch of at least 15 A's is detected in the last 30 bp of a R2 read.
Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@ You can inspect the state of the program using this [deatiled comparison between

## News

Mapper has significantly improved: both the false positive as well as false negative rate had improved.
The improvement was possible by using a needleman-wunsch inspired algorithm.
The 32 bp matches are now tolerant to not only bp mismatches, but also insertions and deletions.

Compared to the mere bp replacement matching we e.g. find almost 10x more reads from the Ighm locus in the test data.
And none of the reads I have seen so far looked like a not Ighm transcript. All I checked were also mapped to the Ighm transcripts using NCBI BLAST (I only checked the strange looking ones).

I have added the Ighm reads that were detected with both settings to this repository.


quantify_rhapsody has finally gotten a muti processor upgrade: quantify_rhapsody_multi.
I have not tested it out completetly now, but am confident it works correctly. (Final last words - I know). Just the PCR duplicates are not collected correctly as they can now only be measured in each chunk of the data. But the UMIs is all this tool does measure.
So even if the PCR duplicates are not counted correctly they will nevertheless be excluded from the final data.
Expand Down
4 changes: 2 additions & 2 deletions src/fast_mapper/mapper_entries/mapper_entries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ impl MapperEntry{
map,
only :0,
hamming_cut :2,
needleman_wunsch_cut: 25
needleman_wunsch_cut: 20
}
}

Expand Down Expand Up @@ -108,7 +108,7 @@ impl MapperEntry{
//eprintln!("Distance is {dist}");
//if dist <= self.hamming_cut {
if dist <= self.needleman_wunsch_cut {
//eprintln!( "{seq:?} did match to {:?} should that be right?", self.map[i].0);
println!( "{seq} did match to \n{} ({}) should that be right?", self.map[i].0, dist);
ret.push( &self.map[i].1 );
dists.push( dist );
if dist < min_dist{
Expand Down
4 changes: 2 additions & 2 deletions src/fast_mapper/mapper_entries/second_seq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,12 +147,12 @@ impl SecondSeq {
}

// Uncomment the following lines to print the alignment matrix
/*for i in 0..rows {
for i in 0..rows {
for j in 0..cols {
print!("{:4} ", matrix[i][j].score);
}
println!();
}*/
}

(size as i32 - matrix[rows - 1][cols - 1].score).abs() as u32
}
Expand Down
33 changes: 14 additions & 19 deletions tests/fast_mapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,13 @@ mod tests {

//assert_eq!( mapper.with_data, 51 );

assert_eq!( mapper.get( b"ATATTATTTGGTATCTTTTACTTACCTGCTTGAATACTTGAATAAACCATTC", &mut tool ), Some(vec![0]) );
assert_eq!( mapper.get( b"AAAAGAAGAGGAtggagagggagtgggaaggaGAAAAGTCG", &mut tool ), Some(vec![1]) );
assert_eq!( mapper.get( b"GTTGTATATTATTTGGTATCTTTTACTTACCTGCTTGAATACTTG", &mut tool ), Some(vec![0]) );
assert_eq!( mapper.get( b"GGGCTCCGGAGCCAGGAAAAGAAGAGGAtggagagggagt", &mut tool ), Some(vec![1]) );

//adding the same sequence as Gene2 with the name Gene3 should not make the next search return None
mapper.add( &b"CGATTACTTCTGTTCCATCGCCCACACCTTTGAACCCTAGGGCTGGGTTGAACATCTTCTGTCTCCTAGGTCTGC".to_vec(), "Gene3".to_string(),EMPTY_VEC.clone() );
mapper.add( &b"CCAAGAATGGTTCCTGTGTTGTATATTATTTGGTATCTTTTACTTACCTGCTTGAATACTTGAATAAACCATTCACCGGTTTTAATCCTTTTACTTCAAAACTTACACATACTGACCTAC".to_vec(), "Gene3".to_string(),EMPTY_VEC.clone() );

assert_eq!( mapper.get( b"CGATTACTTCTGTTCCATCGCCCACACCCTCAGAAGCACATCGACTTCTCCCTCCGTTCTCCTTATGGCGGCGGC", &mut tool ), None );

let mut gnames = Vec::<String>::with_capacity(3);
gnames.push( "Gene1".to_string() );
gnames.push( "Gene2".to_string() );
gnames.push( "Gene3".to_string() );
assert_eq!( mapper.names_store, gnames );
mapper.print();
assert_eq!( mapper.get( b"GTTGTATATTATTTGGTATCTTTTACTTACCTGCTTGAATACTTG", &mut tool ), None );

}
#[test]
Expand Down Expand Up @@ -162,10 +155,12 @@ mod tests {
#[test]
fn check_samples_shifted() {
let mut mapper = FastMapper::new( 32, 10 );
mapper.change_start_id( 10 );
// "AGGAGGCCCCGCGTGAGAGTGATCAATCCAGGATACATTCCCGTC"
//mapper.change_start_id( 10 );
// samples[0] "AAGAGTCGACTGCCATGTCCCCTCCGCGGGTCCGTGCCCCCCAAG"
// "GGCAAGGTGTCACATTGGGCTACCGCGGGAGGTCGACCAGATCCT"
let sample2 = b"GTTGTCAAGATGCTACCGTTCAGAGGGCAAGGTGTCACATTGGGCTACCGCGGGAAGTCGACCAGATCCTA";
//let sample = b"GTTGTCAAGATGCTACCGTTCTGAGGGCAAGGTGTCACTTTGGGCTACCGCGGGAAGTCGACCAGATCCTA";
//
let sample_real = b"GTTGTCAAGATGCTACCGTTCAGAGAAGAGTCGACTGCCATGTCCCCTCCGCGGGTCCGTGCCCCCCAAGAAAA";
let sequences = [
b"AAGAGTCGACTGCCATGTCCCCTCCGCGGGTCCGTGCCCCCCAAG", b"ACCGATTAGGTGCGAGGCGCTATAGTCGTACGTCGTTGCCGTGCC",
Expand All @@ -183,17 +178,17 @@ mod tests {
}
let mut tool = IntToStr::new( b"AAGGCCTT".to_vec(), 27);

assert_eq!( mapper.get_strict( sequences[0], &mut tool ), Some(vec![10]) );
assert_eq!( mapper.get_strict( sequences[0], &mut tool ), Some(vec![0]) );
println!("\n");
assert_eq!( mapper.get_strict( sequences[1], &mut tool ), Some(vec![11]) );
assert_eq!( mapper.get_strict( sequences[1], &mut tool ), Some(vec![1]) );
println!("\n");
assert_eq!( mapper.get( sample2, &mut tool ), None );
assert_eq!( mapper.get( sample2, &mut tool ), Some(vec![4]) );
println!("\n");
assert_eq!( mapper.get_strict( &sequences[0][7..], &mut tool ), Some(vec![10]) );
assert_eq!( mapper.get_strict( &sequences[0][7..], &mut tool ), Some(vec![0]) );
println!("\n");
assert_eq!( mapper.get_strict( &sequences[11][7..], &mut tool ), Some(vec![21]) );
assert_eq!( mapper.get_strict( &sequences[11][7..], &mut tool ), Some(vec![11]) );
println!("\n");
assert_eq!( mapper.get_strict( sample_real, &mut tool ), Some(vec![10]) );
assert_eq!( mapper.get_strict( sample_real, &mut tool ), Some(vec![0]) );
println!("\n");
}

Expand Down
2 changes: 1 addition & 1 deletion tests/int_to_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ mod tests {
while let Some(_entries) = tool.next(){
i+=1;
}
assert_eq!( i,45, "A total of 54 fragments!")
assert_eq!( i,66, "A total of 54 fragments!")
}

}
14 changes: 7 additions & 7 deletions tests/second_seq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,22 +54,22 @@ mod tests {
}


// works somehow. Catty - thanky you!
// works somehow. Chatty - thanky you!
#[test]
fn test_needleman_wunsch() {
let seq1 = SecondSeq(0b101010, 20);
let seq2 = SecondSeq(0b101010, 20);
assert_eq!( seq1.needleman_wunsch( &seq2 ), 20 );
assert_eq!( seq1.needleman_wunsch( &seq2 ), 1 );
let seq3 = SecondSeq(0b011010, 20);
assert_eq!( seq1.needleman_wunsch( &seq3 ), 18 );
assert_eq!( seq1.needleman_wunsch( &seq3 ), 3 );
let seq4 = SecondSeq(0b001010, 20);
assert_eq!( seq1.needleman_wunsch( &seq4 ), 18 );
assert_eq!( seq1.needleman_wunsch( &seq4 ), 3 );
let seq5 = SecondSeq(0b011001, 20);
assert_eq!( seq1.needleman_wunsch( &seq5 ), 16 );
assert_eq!( seq1.needleman_wunsch( &seq5 ), 5 );
let seq6 = SecondSeq(0b0, 20);
assert_eq!( seq1.needleman_wunsch( &seq6 ), 14 );
assert_eq!( seq1.needleman_wunsch( &seq6 ), 7 );
let seq7 = SecondSeq(0b101010, 15);
assert_eq!( seq1.needleman_wunsch( &seq7 ), 5 );
assert_eq!( seq1.needleman_wunsch( &seq7 ), 1 );
}

#[test]
Expand Down

0 comments on commit 016f1df

Please sign in to comment.