From 703b103239fc76ae1fc64bc191da650a07ee0e19 Mon Sep 17 00:00:00 2001 From: Jared Simpson Date: Fri, 14 Apr 2023 10:21:26 -0400 Subject: [PATCH 01/12] implement htslib basemod API --- src/bam/record.rs | 265 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 265 insertions(+) diff --git a/src/bam/record.rs b/src/bam/record.rs index 9c48ccdd2..4b41b8de1 100644 --- a/src/bam/record.rs +++ b/src/bam/record.rs @@ -1020,6 +1020,40 @@ impl Record { } } + /// Access the base modifications associated with this Record through the MM tag. + /// Example: + /// ``` + /// if let Ok(mods) = record.basemods_iter() { + /// // print metadata for the modifications present in this record + /// for mod_code in mods.recorded() { + /// if let Ok(mod_metadata) = mods.query_type(*mod_code) { + /// println!("mod found with code {}/{} flags: [{} {} {}]", + /// mod_code, *mod_code as u8 as char, + /// mod_metadata.strand, mod_metadata.implicit, mod_metadata.canonical as u8 as char); + /// } + /// } + /// + /// // iterate over the modifications in this record + /// // the modifications are returned as a tuple with the + /// // position within SEQ and an hts_base_mod struct + /// for res in mods { + /// if let Ok( (position, m) ) = res { + /// println!("{} {},{}", position, m.modified_base as u8 as char, m.qual); + /// } + /// } + /// } + /// ``` + pub fn basemods_iter(&self) -> Result { + BaseModificationsIter::new(self) + } + + /// An iterator that returns all of the modifications for each position as a vector. + /// This is useful for the case where multiple possible modifications can be annotated + /// at a single position (for example a C could be 5-mC or 5-hmC) + pub fn basemods_position_iter(&self) -> Result { + BaseModificationsPositionIter::new(self) + } + /// Infer read pair orientation from record. Returns `SequenceReadPairOrientation::None` if record /// is not paired, mates are not mapping to the same contig, or mates start at the /// same position. @@ -2171,6 +2205,237 @@ impl fmt::Display for CigarStringView { } } +pub struct BaseModificationMetadata +{ + pub strand: i32, + pub implicit: i32, + pub canonical: i8 +} + +/// struct containing the internal state required to access +/// the base modifications for a bam::Record +pub struct BaseModificationState<'a> +{ + record: &'a Record, + state: *mut htslib::hts_base_mod_state, + buffer: Vec, + buffer_pos: i32 +} + +impl BaseModificationState<'_> { + + /// Initialize a new BaseModification struct from a bam::Record + /// This function allocates memory for the state structure + /// and initializes the iterator to the start of the modification + /// records. + fn new<'a>(r: &'a Record) -> Result> { + let mut bm = unsafe { + BaseModificationState { + record: r, + state: hts_sys::hts_base_mod_state_alloc(), + buffer: Vec::new(), + buffer_pos: -1 + } + }; + + if bm.state.is_null() { + panic!("Unable to allocate memory for hts_base_mod_state"); + } + + // parse the MM tag to initialize the state + unsafe { + let ret = hts_sys::bam_parse_basemod(bm.record.inner_ptr(), bm.state); + if ret != 0 { + return Err(Error::BamBaseModificationTagNotFound); + } + } + + let types = bm.recorded(); + bm.buffer.reserve(types.len()); + return Ok(bm); + } + + pub fn buffer_next_mods(&mut self) -> Result { + + unsafe { + let ret = hts_sys::bam_next_basemod(self.record.inner_ptr(), + self.state, + self.buffer.as_mut_ptr(), + self.buffer.capacity() as i32, + &mut self.buffer_pos); + + if ret < 0 { + return Err(Error::BamBaseModificationIterationFailed); + } + + // the htslib API won't write more than buffer.capacity() mods to the output array but it will + // return the actual number of modifications found. We return an error to the caller + // in the case where there was insufficient storage to return all mods. + if ret as usize > self.buffer.capacity() { + return Err(Error::BamBaseModificationTooManyMods); + } + + // we read the modifications directly into the vector, which does + // not update the length so needs to be manually set + self.buffer.set_len(ret as usize); + + return Ok(ret as usize) + } + } + + /// Return an array containing the modification codes listed for this record. + /// Positive values are ascii character codes (eg m), negative values are chEBI codes. + pub fn recorded<'a>(&self) -> &'a [i32] { + unsafe { + let mut n: i32 = 0; + let data_ptr: *const i32 = hts_sys::bam_mods_recorded(self.state, &mut n); + + // htslib should not return a null pointer, even when there are no base mods + if data_ptr.is_null() { + panic!("Unable to obtain pointer to base modifications"); + } + assert!(n >= 0); + return slice::from_raw_parts(data_ptr, n as usize); + } + } + + /// Return metadata for the specified character code indicating the strand + /// the base modification was called on, whether the tag uses implicit mode + /// and the ascii code for the canonical base. + pub fn query_type<'a>(&self, code: i32) -> Result { + unsafe { + let mut strand: i32 = 0; + let mut implicit: i32 = 0; + let mut canonical: i8 = 0; + + let ret = hts_sys::bam_mods_query_type(self.state, code, &mut strand, &mut implicit, &mut canonical); + if ret == -1 { + return Err(Error::BamBaseModificationTypeNotFound); + } else { + return Ok(BaseModificationMetadata { + strand: strand, + implicit: implicit, + canonical: canonical + }); + } + } + } +} + +impl Drop for BaseModificationState<'_> { + fn drop<'a>(&mut self) { + unsafe { hts_sys::hts_base_mod_state_free(self.state); } + } +} + +/// Iterator over the base modifications that returns +/// a vector for all of the mods at each position +pub struct BaseModificationsPositionIter<'a> +{ + mod_state: BaseModificationState<'a> +} + +impl BaseModificationsPositionIter<'_> +{ + fn new<'a>(r: &'a Record) -> Result> { + let state = BaseModificationState::new(r)?; + Ok(BaseModificationsPositionIter { mod_state: state }) + } + + pub fn recorded<'a>(&self) -> &'a [i32] { + return self.mod_state.recorded(); + } + + pub fn query_type<'a>(&self, code: i32) -> Result { + return self.mod_state.query_type(code); + } +} + +impl<'a> Iterator for BaseModificationsPositionIter<'a> { + type Item = Result< (i32, Vec) >; + + fn next(&mut self) -> Option< Self::Item > { + let ret = self.mod_state.buffer_next_mods(); + + // Three possible things happened in buffer_next_mods: + // 1. the htslib API call was successful but there are no more mods + // 2. ths htslib API call was successful and we read some mods + // 3. the htslib API call failed, we propogate the error wrapped in an option + match ret { + Ok(num_mods) => { + if num_mods == 0 { + return None; + } + else { + let data = (self.mod_state.buffer_pos, self.mod_state.buffer.clone()); + return Some(Ok(data)); + } + }, + Err(e) => { + return Some(Err(e)) + } + } + } +} + +/// Iterator over the base modifications that returns +/// the next modification found, one by one +pub struct BaseModificationsIter<'a> +{ + mod_state: BaseModificationState<'a>, + buffer_idx: usize +} + +impl BaseModificationsIter<'_> +{ + fn new<'a>(r: &'a Record) -> Result> { + let state = BaseModificationState::new(r)?; + Ok(BaseModificationsIter { mod_state: state, buffer_idx: 0 }) + } + + pub fn recorded<'a>(&self) -> &'a [i32] { + return self.mod_state.recorded(); + } + + pub fn query_type<'a>(&self, code: i32) -> Result { + return self.mod_state.query_type(code); + } +} + +impl<'a> Iterator for BaseModificationsIter<'a> { + type Item = Result< (i32, hts_sys::hts_base_mod) >; + + fn next(&mut self) -> Option< Self::Item > { + + if self.buffer_idx == self.mod_state.buffer.len() { + // need to use the internal state to read the next + // set of modifications into the buffer + let ret = self.mod_state.buffer_next_mods(); + + match ret { + Ok(num_mods) => { + if num_mods == 0 { + // done iterating + return None; + } else { + // we read some mods, reset the position in the buffer then fall through + self.buffer_idx = 0; + } + }, + Err(e) => { + return Some(Err(e)) + } + } + } + + // if we got here when there are mods buffered that we haven't emitted yet + assert!(self.buffer_idx < self.mod_state.buffer.len()); + let data = (self.mod_state.buffer_pos, self.mod_state.buffer[self.buffer_idx]); + self.buffer_idx += 1; + return Some(Ok(data)); + } +} + #[cfg(test)] mod tests { use super::*; From 65bb4c7bf70f8e4fdad5e7f7702afeb646ca3a1b Mon Sep 17 00:00:00 2001 From: Jared Simpson Date: Fri, 14 Apr 2023 10:54:28 -0400 Subject: [PATCH 02/12] add error types for basemod API --- src/errors.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/errors.rs b/src/errors.rs index 680bd5f33..8545c684f 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -86,6 +86,17 @@ pub enum Error { #[error("failed to add aux field, tag is already present")] BamAuxTagAlreadyPresent, + // Errors for base modification fields + #[error("no base modification tag found for record")] + BamBaseModificationTagNotFound, + #[error("no base modification with the specified code found in record")] + BamBaseModificationTypeNotFound, + #[error("base modification iteration failed")] + BamBaseModificationIterationFailed, + #[error("base modification found too many modifications")] + BamBaseModificationTooManyMods, + + // Errors for BCF #[error("error allocating internal data structure for BCF/VCF reader (out of memory?)")] BcfAllocationError, From f3432cbf9716f5df70e428a29e78ab5fee66aef5 Mon Sep 17 00:00:00 2001 From: Jared Simpson Date: Mon, 12 Jun 2023 10:54:27 -0400 Subject: [PATCH 03/12] fix formatting with cargo fmt --- src/bam/record.rs | 94 ++++++++++++++++++++++++----------------------- src/errors.rs | 1 - 2 files changed, 48 insertions(+), 47 deletions(-) diff --git a/src/bam/record.rs b/src/bam/record.rs index 4b41b8de1..a6f986efa 100644 --- a/src/bam/record.rs +++ b/src/bam/record.rs @@ -2205,25 +2205,22 @@ impl fmt::Display for CigarStringView { } } -pub struct BaseModificationMetadata -{ +pub struct BaseModificationMetadata { pub strand: i32, pub implicit: i32, - pub canonical: i8 + pub canonical: i8, } /// struct containing the internal state required to access /// the base modifications for a bam::Record -pub struct BaseModificationState<'a> -{ +pub struct BaseModificationState<'a> { record: &'a Record, state: *mut htslib::hts_base_mod_state, buffer: Vec, - buffer_pos: i32 + buffer_pos: i32, } impl BaseModificationState<'_> { - /// Initialize a new BaseModification struct from a bam::Record /// This function allocates memory for the state structure /// and initializes the iterator to the start of the modification @@ -2234,7 +2231,7 @@ impl BaseModificationState<'_> { record: r, state: hts_sys::hts_base_mod_state_alloc(), buffer: Vec::new(), - buffer_pos: -1 + buffer_pos: -1, } }; @@ -2256,13 +2253,14 @@ impl BaseModificationState<'_> { } pub fn buffer_next_mods(&mut self) -> Result { - unsafe { - let ret = hts_sys::bam_next_basemod(self.record.inner_ptr(), - self.state, - self.buffer.as_mut_ptr(), - self.buffer.capacity() as i32, - &mut self.buffer_pos); + let ret = hts_sys::bam_next_basemod( + self.record.inner_ptr(), + self.state, + self.buffer.as_mut_ptr(), + self.buffer.capacity() as i32, + &mut self.buffer_pos, + ); if ret < 0 { return Err(Error::BamBaseModificationIterationFailed); @@ -2279,7 +2277,7 @@ impl BaseModificationState<'_> { // not update the length so needs to be manually set self.buffer.set_len(ret as usize); - return Ok(ret as usize) + return Ok(ret as usize); } } @@ -2308,15 +2306,21 @@ impl BaseModificationState<'_> { let mut implicit: i32 = 0; let mut canonical: i8 = 0; - let ret = hts_sys::bam_mods_query_type(self.state, code, &mut strand, &mut implicit, &mut canonical); + let ret = hts_sys::bam_mods_query_type( + self.state, + code, + &mut strand, + &mut implicit, + &mut canonical, + ); if ret == -1 { return Err(Error::BamBaseModificationTypeNotFound); } else { return Ok(BaseModificationMetadata { - strand: strand, - implicit: implicit, - canonical: canonical - }); + strand: strand, + implicit: implicit, + canonical: canonical, + }); } } } @@ -2324,19 +2328,19 @@ impl BaseModificationState<'_> { impl Drop for BaseModificationState<'_> { fn drop<'a>(&mut self) { - unsafe { hts_sys::hts_base_mod_state_free(self.state); } + unsafe { + hts_sys::hts_base_mod_state_free(self.state); + } } } /// Iterator over the base modifications that returns /// a vector for all of the mods at each position -pub struct BaseModificationsPositionIter<'a> -{ - mod_state: BaseModificationState<'a> +pub struct BaseModificationsPositionIter<'a> { + mod_state: BaseModificationState<'a>, } -impl BaseModificationsPositionIter<'_> -{ +impl BaseModificationsPositionIter<'_> { fn new<'a>(r: &'a Record) -> Result> { let state = BaseModificationState::new(r)?; Ok(BaseModificationsPositionIter { mod_state: state }) @@ -2352,9 +2356,9 @@ impl BaseModificationsPositionIter<'_> } impl<'a> Iterator for BaseModificationsPositionIter<'a> { - type Item = Result< (i32, Vec) >; + type Item = Result<(i32, Vec)>; - fn next(&mut self) -> Option< Self::Item > { + fn next(&mut self) -> Option { let ret = self.mod_state.buffer_next_mods(); // Three possible things happened in buffer_next_mods: @@ -2365,32 +2369,30 @@ impl<'a> Iterator for BaseModificationsPositionIter<'a> { Ok(num_mods) => { if num_mods == 0 { return None; - } - else { + } else { let data = (self.mod_state.buffer_pos, self.mod_state.buffer.clone()); return Some(Ok(data)); } - }, - Err(e) => { - return Some(Err(e)) } + Err(e) => return Some(Err(e)), } } } /// Iterator over the base modifications that returns /// the next modification found, one by one -pub struct BaseModificationsIter<'a> -{ +pub struct BaseModificationsIter<'a> { mod_state: BaseModificationState<'a>, - buffer_idx: usize + buffer_idx: usize, } -impl BaseModificationsIter<'_> -{ +impl BaseModificationsIter<'_> { fn new<'a>(r: &'a Record) -> Result> { let state = BaseModificationState::new(r)?; - Ok(BaseModificationsIter { mod_state: state, buffer_idx: 0 }) + Ok(BaseModificationsIter { + mod_state: state, + buffer_idx: 0, + }) } pub fn recorded<'a>(&self) -> &'a [i32] { @@ -2403,10 +2405,9 @@ impl BaseModificationsIter<'_> } impl<'a> Iterator for BaseModificationsIter<'a> { - type Item = Result< (i32, hts_sys::hts_base_mod) >; - - fn next(&mut self) -> Option< Self::Item > { + type Item = Result<(i32, hts_sys::hts_base_mod)>; + fn next(&mut self) -> Option { if self.buffer_idx == self.mod_state.buffer.len() { // need to use the internal state to read the next // set of modifications into the buffer @@ -2421,16 +2422,17 @@ impl<'a> Iterator for BaseModificationsIter<'a> { // we read some mods, reset the position in the buffer then fall through self.buffer_idx = 0; } - }, - Err(e) => { - return Some(Err(e)) } + Err(e) => return Some(Err(e)), } } // if we got here when there are mods buffered that we haven't emitted yet assert!(self.buffer_idx < self.mod_state.buffer.len()); - let data = (self.mod_state.buffer_pos, self.mod_state.buffer[self.buffer_idx]); + let data = ( + self.mod_state.buffer_pos, + self.mod_state.buffer[self.buffer_idx], + ); self.buffer_idx += 1; return Some(Ok(data)); } diff --git a/src/errors.rs b/src/errors.rs index 8545c684f..1c7dd2692 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -96,7 +96,6 @@ pub enum Error { #[error("base modification found too many modifications")] BamBaseModificationTooManyMods, - // Errors for BCF #[error("error allocating internal data structure for BCF/VCF reader (out of memory?)")] BcfAllocationError, From f41e91a1ff86f2751af2c38e0ebad93235b1cdff Mon Sep 17 00:00:00 2001 From: Jared Simpson Date: Tue, 13 Jun 2023 10:03:40 -0400 Subject: [PATCH 04/12] fix linter warnings --- src/bam/record.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bam/record.rs b/src/bam/record.rs index a6f986efa..47e7e5418 100644 --- a/src/bam/record.rs +++ b/src/bam/record.rs @@ -2317,9 +2317,9 @@ impl BaseModificationState<'_> { return Err(Error::BamBaseModificationTypeNotFound); } else { return Ok(BaseModificationMetadata { - strand: strand, - implicit: implicit, - canonical: canonical, + strand, + implicit, + canonical, }); } } From 4f7c76beb0444dbbcd359343598147a8594ced1f Mon Sep 17 00:00:00 2001 From: David Laehnemann Date: Wed, 14 Jun 2023 09:56:46 +0200 Subject: [PATCH 05/12] update `hts-sys` to new release `2.1.0` --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 7b8be41e3..c4e5c9512 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ bio-types = ">=0.9" byteorder = "1.3" custom_derive = "0.1" derive-new = "0.5" -hts-sys = {version = "2.0.3", default-features = false} +hts-sys = {version = "2.1.0", default-features = false} ieee754 = "0.2" lazy_static = "1.4" libc = "0.2" From d2762312a3b1c38d2e964e85ec2b955cae6f6c6f Mon Sep 17 00:00:00 2001 From: David Laehnemann Date: Wed, 14 Jun 2023 16:06:22 +0200 Subject: [PATCH 06/12] update to available version of `hts-sys`, now `2.1.1` --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c4e5c9512..88f30c681 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ bio-types = ">=0.9" byteorder = "1.3" custom_derive = "0.1" derive-new = "0.5" -hts-sys = {version = "2.1.0", default-features = false} +hts-sys = {version = "2.1.1", default-features = false} ieee754 = "0.2" lazy_static = "1.4" libc = "0.2" From 36a4dc4d0231ec44b6003a60fe935f4fc4c3b6e9 Mon Sep 17 00:00:00 2001 From: Jared Simpson Date: Wed, 14 Jun 2023 10:51:37 -0400 Subject: [PATCH 07/12] ask rustdoc to ignore example code --- src/bam/record.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bam/record.rs b/src/bam/record.rs index 47e7e5418..a26f701c3 100644 --- a/src/bam/record.rs +++ b/src/bam/record.rs @@ -1022,7 +1022,7 @@ impl Record { /// Access the base modifications associated with this Record through the MM tag. /// Example: - /// ``` + /// ```ignore /// if let Ok(mods) = record.basemods_iter() { /// // print metadata for the modifications present in this record /// for mod_code in mods.recorded() { From 0228945b88e33d92bed8e1b9b9465953336943e7 Mon Sep 17 00:00:00 2001 From: Jared Simpson Date: Wed, 14 Jun 2023 12:47:16 -0400 Subject: [PATCH 08/12] hide code in doctest instead of ignore --- src/bam/record.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/bam/record.rs b/src/bam/record.rs index a26f701c3..9b356caaa 100644 --- a/src/bam/record.rs +++ b/src/bam/record.rs @@ -1022,7 +1022,9 @@ impl Record { /// Access the base modifications associated with this Record through the MM tag. /// Example: - /// ```ignore + /// ``` + /// # use rust_htslib::bam::Record; + /// # let record = Record::new(); /// if let Ok(mods) = record.basemods_iter() { /// // print metadata for the modifications present in this record /// for mod_code in mods.recorded() { From f18bedb00bf9b34991377e438b040422f1a2d906 Mon Sep 17 00:00:00 2001 From: David Laehnemann Date: Thu, 15 Jun 2023 10:31:40 +0200 Subject: [PATCH 09/12] try the fix that doctest failure suggests --- src/bam/record.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bam/record.rs b/src/bam/record.rs index 9b356caaa..12deb50e8 100644 --- a/src/bam/record.rs +++ b/src/bam/record.rs @@ -1043,7 +1043,7 @@ impl Record { /// println!("{} {},{}", position, m.modified_base as u8 as char, m.qual); /// } /// } - /// } + /// }; /// ``` pub fn basemods_iter(&self) -> Result { BaseModificationsIter::new(self) From f17711658db2e8097ca8f4d4a821a2020f51c21a Mon Sep 17 00:00:00 2001 From: Jared Simpson Date: Thu, 15 Jun 2023 14:45:36 -0400 Subject: [PATCH 10/12] import basemod doctest, add test data from htslib --- src/bam/record.rs | 42 ++++++++++++++++++++---------------- test/base_mods/MM-orient.sam | 6 ++++++ 2 files changed, 30 insertions(+), 18 deletions(-) create mode 100644 test/base_mods/MM-orient.sam diff --git a/src/bam/record.rs b/src/bam/record.rs index 12deb50e8..b6507eb37 100644 --- a/src/bam/record.rs +++ b/src/bam/record.rs @@ -1023,27 +1023,33 @@ impl Record { /// Access the base modifications associated with this Record through the MM tag. /// Example: /// ``` - /// # use rust_htslib::bam::Record; - /// # let record = Record::new(); - /// if let Ok(mods) = record.basemods_iter() { - /// // print metadata for the modifications present in this record - /// for mod_code in mods.recorded() { - /// if let Ok(mod_metadata) = mods.query_type(*mod_code) { - /// println!("mod found with code {}/{} flags: [{} {} {}]", - /// mod_code, *mod_code as u8 as char, - /// mod_metadata.strand, mod_metadata.implicit, mod_metadata.canonical as u8 as char); + /// use rust_htslib::bam::{Read, Reader, Record}; + /// let mut bam = Reader::from_path(&"test/base_mods/MM-orient.sam").unwrap(); + /// let mut mod_count = 0; + /// for r in bam.records() { + /// let record = r.unwrap(); + /// if let Ok(mods) = record.basemods_iter() { + /// // print metadata for the modifications present in this record + /// for mod_code in mods.recorded() { + /// if let Ok(mod_metadata) = mods.query_type(*mod_code) { + /// println!("mod found with code {}/{} flags: [{} {} {}]", + /// mod_code, *mod_code as u8 as char, + /// mod_metadata.strand, mod_metadata.implicit, mod_metadata.canonical as u8 as char); + /// } /// } - /// } /// - /// // iterate over the modifications in this record - /// // the modifications are returned as a tuple with the - /// // position within SEQ and an hts_base_mod struct - /// for res in mods { - /// if let Ok( (position, m) ) = res { - /// println!("{} {},{}", position, m.modified_base as u8 as char, m.qual); + /// // iterate over the modifications in this record + /// // the modifications are returned as a tuple with the + /// // position within SEQ and an hts_base_mod struct + /// for res in mods { + /// if let Ok( (position, m) ) = res { + /// println!("{} {},{}", position, m.modified_base as u8 as char, m.qual); + /// mod_count += 1; + /// } /// } - /// } - /// }; + /// }; + /// } + /// assert_eq!(mod_count, 14); /// ``` pub fn basemods_iter(&self) -> Result { BaseModificationsIter::new(self) diff --git a/test/base_mods/MM-orient.sam b/test/base_mods/MM-orient.sam new file mode 100644 index 000000000..363e7c2be --- /dev/null +++ b/test/base_mods/MM-orient.sam @@ -0,0 +1,6 @@ +@CO Testing mods on top and bottom strand, but also in +@CO original vs reverse-complemented orientation +top-fwd 0 * 0 0 * * 0 0 AGGATCTCTAGCGGATCGGCGGGGGATATGCCATAT * Mm:Z:C+m,1,3,0; Ml:B:C,128,153,179 +top-rev 16 * 0 0 * * 0 0 ATATGGCATATCCCCCGCCGATCCGCTAGAGATCCT * Mm:Z:C+m,1,3,0; Ml:B:C,128,153,179 +bot-fwd 0 * 0 0 * * 0 0 AGGATCTCTAGCGGATCGGCGGGGGATATGCCATAT * Mm:Z:G-m,0,0,4,3; Ml:B:C,115,141,166,192 +bot-rev 16 * 0 0 * * 0 0 ATATGGCATATCCCCCGCCGATCCGCTAGAGATCCT * Mm:Z:G-m,0,0,4,3; Ml:B:C,115,141,166,192 From 0f2ec92c801f8a7ca08215c777b89aaf42d45289 Mon Sep 17 00:00:00 2001 From: Jared Simpson Date: Mon, 19 Jun 2023 15:44:53 -0400 Subject: [PATCH 11/12] add additional tests for basemod API --- src/bam/record.rs | 78 ++++++++++++++++++++++++++++++++++++ test/base_mods/MM-double.sam | 3 ++ 2 files changed, 81 insertions(+) create mode 100644 test/base_mods/MM-double.sam diff --git a/src/bam/record.rs b/src/bam/record.rs index b6507eb37..64bb4d978 100644 --- a/src/bam/record.rs +++ b/src/bam/record.rs @@ -2308,6 +2308,8 @@ impl BaseModificationState<'_> { /// Return metadata for the specified character code indicating the strand /// the base modification was called on, whether the tag uses implicit mode /// and the ascii code for the canonical base. + /// If there are multiple modifications with the same code this will return the data + /// for the first mod. See https://github.com/samtools/htslib/issues/1635 pub fn query_type<'a>(&self, code: i32) -> Result { unsafe { let mut strand: i32 = 0; @@ -2891,3 +2893,79 @@ mod alignment_cigar_tests { } } } + +#[cfg(test)] +mod basemod_tests { + use crate::bam::{Read, Reader}; + + #[test] + pub fn test_count_recorded() { + let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap(); + + for r in bam.records() { + let record = r.unwrap(); + if let Ok(mods) = record.basemods_iter() { + let n = mods.recorded().len(); + assert_eq!(n, 3); + }; + } + } + + #[test] + pub fn test_query_type() { + let mut bam = Reader::from_path(&"test/base_mods/MM-orient.sam").unwrap(); + + let mut n_fwd = 0; + let mut n_rev = 0; + + for r in bam.records() { + let record = r.unwrap(); + if let Ok(mods) = record.basemods_iter() { + for mod_code in mods.recorded() { + if let Ok(mod_metadata) = mods.query_type(*mod_code) { + if mod_metadata.strand == 0 { n_fwd += 1; } + if mod_metadata.strand == 1 { n_rev += 1; } + } + } + }; + } + assert_eq!(n_fwd, 2); + assert_eq!(n_rev, 2); + } + + #[test] + pub fn test_mod_iter() { + let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap(); + let expected_positions = [ 1, 7, 12, 13, 13, 22, 30, 31 ]; + let mut i = 0; + + for r in bam.records() { + let record = r.unwrap(); + for res in record.basemods_iter().unwrap() { + if let Ok( (position, _m) ) = res { + assert_eq!(position, expected_positions[i]); + i += 1; + } + } + } + } + + #[test] + pub fn test_position_iter() { + let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap(); + let expected_positions = [ 1, 7, 12, 13, 22, 30, 31 ]; + let expected_counts = [ 1, 1, 1, 2, 1, 1, 1 ]; + let mut i = 0; + + for r in bam.records() { + let record = r.unwrap(); + for res in record.basemods_position_iter().unwrap() { + if let Ok( (position, elements) ) = res { + assert_eq!(position, expected_positions[i]); + assert_eq!(elements.len(), expected_counts[i]); + i += 1; + } + } + } + } +} diff --git a/test/base_mods/MM-double.sam b/test/base_mods/MM-double.sam new file mode 100644 index 000000000..608516fc1 --- /dev/null +++ b/test/base_mods/MM-double.sam @@ -0,0 +1,3 @@ +@CO Modifications called on both strands of the same record, +@CO including potentially at the same location simultaneously. +* 0 * 0 0 * * 0 0 AGGATCTCTAGCGGATCGGCGGGGGATATGCCATAT * Mm:Z:C+m,1,3,0;G-m,0,2,0,4;G+o,4; Ml:B:C,128,153,179,115,141,166,192,102 From cee863593358643277f6589d904c32df9349122a Mon Sep 17 00:00:00 2001 From: Jared Simpson Date: Tue, 20 Jun 2023 08:35:02 -0400 Subject: [PATCH 12/12] update base mod tests with cargo fmt --- src/bam/record.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/bam/record.rs b/src/bam/record.rs index 64bb4d978..9df4a168f 100644 --- a/src/bam/record.rs +++ b/src/bam/record.rs @@ -2923,8 +2923,12 @@ mod basemod_tests { if let Ok(mods) = record.basemods_iter() { for mod_code in mods.recorded() { if let Ok(mod_metadata) = mods.query_type(*mod_code) { - if mod_metadata.strand == 0 { n_fwd += 1; } - if mod_metadata.strand == 1 { n_rev += 1; } + if mod_metadata.strand == 0 { + n_fwd += 1; + } + if mod_metadata.strand == 1 { + n_rev += 1; + } } } }; @@ -2936,13 +2940,13 @@ mod basemod_tests { #[test] pub fn test_mod_iter() { let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap(); - let expected_positions = [ 1, 7, 12, 13, 13, 22, 30, 31 ]; + let expected_positions = [1, 7, 12, 13, 13, 22, 30, 31]; let mut i = 0; for r in bam.records() { let record = r.unwrap(); for res in record.basemods_iter().unwrap() { - if let Ok( (position, _m) ) = res { + if let Ok((position, _m)) = res { assert_eq!(position, expected_positions[i]); i += 1; } @@ -2953,14 +2957,14 @@ mod basemod_tests { #[test] pub fn test_position_iter() { let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap(); - let expected_positions = [ 1, 7, 12, 13, 22, 30, 31 ]; - let expected_counts = [ 1, 1, 1, 2, 1, 1, 1 ]; + let expected_positions = [1, 7, 12, 13, 22, 30, 31]; + let expected_counts = [1, 1, 1, 2, 1, 1, 1]; let mut i = 0; for r in bam.records() { let record = r.unwrap(); for res in record.basemods_position_iter().unwrap() { - if let Ok( (position, elements) ) = res { + if let Ok((position, elements)) = res { assert_eq!(position, expected_positions[i]); assert_eq!(elements.len(), expected_counts[i]); i += 1;