Skip to content

Commit

Permalink
2024/10/25-12:57:27 (Linux cray unknown)
Browse files Browse the repository at this point in the history
  • Loading branch information
pbenner committed Oct 25, 2024
1 parent c523447 commit ea5edbe
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 20 deletions.
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ pub mod track_granges;
pub mod track_simple;
pub mod track_simple_bedgraph;
pub mod track_simple_bigwig;
pub mod track_simple_coverage;
pub mod track_simple_wig;
pub mod track_statistics;

Expand Down
1 change: 1 addition & 0 deletions src/track.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ pub trait Track {
/* -------------------------------------------------------------------------- */

pub trait MutableTrack : Track {
fn as_track(&self) -> &dyn Track;
fn get_sequence_mut(&mut self, seqname: &str) -> Result<TrackMutableSequence, Box<dyn Error>>;
}

Expand Down
3 changes: 1 addition & 2 deletions src/track_coverage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ use crate::error::ArgumentError;

use crate::coverage::{CoverageError, CoverageConfig, OptionCoverage, FraglenEstimate};
use crate::read_stream::ReadStream;
use crate::track_generic::GenericMutableTrack;
use crate::track_simple::SimpleTrack;
use crate::track_statistics::estimate_fragment_length;

Expand Down Expand Up @@ -198,7 +197,7 @@ pub fn bam_coverage(
let fraglen_treatment_arg : Vec<usize> = treatment_fraglen_estimates.iter().map(|x| x.fraglen as usize).collect();
let fraglen_control_arg : Vec<usize> = control_fraglen_estimates.iter().map(|x| x.fraglen as usize).collect();

let track = GenericMutableTrack::coverage_from_bam(config, filenames_treatment, filenames_control, &fraglen_treatment_arg, &fraglen_control_arg, genome).map_err(
let track = SimpleTrack::coverage_from_bam(config, filenames_treatment, filenames_control, &fraglen_treatment_arg, &fraglen_control_arg, genome).map_err(
|e| CoverageError::new(e, treatment_fraglen_estimates.clone(), control_fraglen_estimates.clone())
)?;

Expand Down
36 changes: 18 additions & 18 deletions src/track_generic_coverage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,24 @@ use futures::executor::block_on_stream;
use crate::coverage::CoverageConfig;
use crate::genome::Genome;
use crate::read_stream::ReadStream;
use crate::track::MutableTrack;
use crate::track_generic::GenericMutableTrack;
use crate::bam::BamFile;
use crate::track_simple::SimpleTrack;

/* -------------------------------------------------------------------------- */

impl<'a> GenericMutableTrack<'a> {

pub fn coverage_from_bam(
mut config : CoverageConfig,
track1 : GenericMutableTrack,
track2_arg : Option<GenericMutableTrack>,
filenames_treatment: &Vec<&str>,
filenames_control : &Vec<&str>,
fraglen_treatment : &Vec<usize>,
fraglen_control : &Vec<usize>,
genome : Genome,
) -> Result<SimpleTrack, Box<dyn Error>> {
) -> Result<(), Box<dyn Error>> {
// Treatment data
let mut track1 = SimpleTrack::alloc("treatment".to_string(), genome.clone(), config.initial_value, config.bin_size);
let mut n_treatment = 0;
let mut n_control = 0;

Expand Down Expand Up @@ -76,7 +75,7 @@ impl<'a> GenericMutableTrack<'a> {
}
});

n_treatment += GenericMutableTrack::wrap(&mut track1).add_reads(treatment_iter, fraglen, &config.binning_method);
n_treatment += track1.add_reads(treatment_iter, fraglen, &config.binning_method);

if let Some(err) = err_opt {
return Err(Box::new(err));
Expand All @@ -87,20 +86,19 @@ impl<'a> GenericMutableTrack<'a> {
if config.normalize_track == "rpkm" {
log!(config.logger, "Normalizing treatment track (rpkm)");
let c = 1_000_000.0 / (n_treatment as f64 * config.bin_size as f64);
GenericMutableTrack::wrap(&mut track1).map(|_name, _i, x| c * x)?;
track1.map(|_name, _i, x| c * x)?;
config.pseudocounts[0] *= c;
}

if config.normalize_track == "cpm" {
log!(config.logger, "Normalizing treatment track (cpm)");
let c = 1_000_000.0 / n_treatment as f64;
GenericMutableTrack::wrap(&mut track1).map(|_name, _i, x| c * x)?;
track1.map(|_name, _i, x| c * x)?;
config.pseudocounts[0] *= c;
}

if !filenames_control.is_empty() {
if let Some(track2) = track2_arg {
// Control data
let mut track2 = SimpleTrack::alloc("control".to_string(), genome.clone(), config.initial_value, config.bin_size);

for (i, filename) in filenames_control.iter().enumerate() {
let mut err_opt = None;
Expand Down Expand Up @@ -131,7 +129,7 @@ impl<'a> GenericMutableTrack<'a> {
}
});

n_control += GenericMutableTrack::wrap(&mut track2).add_reads(control_iter, fraglen, &config.binning_method);
n_control += track2.add_reads(control_iter, fraglen, &config.binning_method);

if let Some(err) = err_opt {
return Err(Box::new(err));
Expand All @@ -142,32 +140,33 @@ impl<'a> GenericMutableTrack<'a> {
if config.normalize_track == "rpkm" {
log!(config.logger, "Normalizing control track (rpkm)");
let c = 1_000_000.0 / (n_control as f64 * config.bin_size as f64);
GenericMutableTrack::wrap(&mut track2).map(|_name, _i, x| c * x)?;
track2.map(|_name, _i, x| c * x)?;
config.pseudocounts[1] *= c;
}

if config.normalize_track == "cpm" {
log!(config.logger, "Normalizing control track (cpm)");
let c = 1_000_000.0 / n_control as f64;
GenericMutableTrack::wrap(&mut track2).map(|_name, _i, x| c * x)?;
track2.map(|_name, _i, x| c * x)?;
config.pseudocounts[1] *= c;
}

if config.smoothen_control {
GenericMutableTrack::wrap(&mut track2).smoothen(config.smoothen_min, config.smoothen_sizes.clone())?;
track2.smoothen(config.smoothen_min, config.smoothen_sizes.clone())?;
}

log!(config.logger, "Combining treatment and control tracks...");
GenericMutableTrack::wrap(&mut track1).normalize(&track2, config.pseudocounts[0], config.pseudocounts[1], config.log_scale)?;
track1.normalize(
track2.track.as_track(), config.pseudocounts[0], config.pseudocounts[1], config.log_scale)?;
} else {
// No control data
if config.pseudocounts[0] != 0.0 {
log!(config.logger, "Adding pseudocount `{}`", config.pseudocounts[0]);
GenericMutableTrack::wrap(&mut track1).map(|_name, _i, x| x + config.pseudocounts[0])?;
track1.map(|_name, _i, x| x + config.pseudocounts[0])?;
}
if config.log_scale {
log!(config.logger, "Log-transforming data");
GenericMutableTrack::wrap(&mut track1).map(|_name, _i, x| x.ln())?;
track1.map(|_name, _i, x| x.ln())?;
}
}

Expand All @@ -183,7 +182,7 @@ impl<'a> GenericMutableTrack<'a> {
if !config.filter_chroms.is_empty() {
log!(config.logger, "Removing all reads from `{}`", config.filter_chroms.join(", "));
for chr in &config.filter_chroms {
if let Ok(mut s) = track1.get_sequence_mut(chr) {
if let Ok(mut s) = track1.track.get_sequence_mut(chr) {
for i in 0..s.n_bins() {
s.set_bin(i, 0.0);
}
Expand All @@ -192,7 +191,8 @@ impl<'a> GenericMutableTrack<'a> {
}
}

Ok(track1)
Ok(())

}

}
Expand Down
4 changes: 4 additions & 0 deletions src/track_simple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,10 @@ impl Track for SimpleTrack {

impl MutableTrack for SimpleTrack {

fn as_track(&self) -> &dyn Track {
self
}

fn get_sequence_mut(&mut self, query: &str) -> Result<TrackMutableSequence, Box<dyn Error>> {
match self.data.get_mut(query) {
Some(seq) => Ok(TrackMutableSequence::new(seq.clone(), self.bin_size)),
Expand Down
74 changes: 74 additions & 0 deletions src/track_simple_coverage.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright (C) 2024 Philipp Benner
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the “Software”), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.

use std::error::Error;

use crate::coverage::CoverageConfig;
use crate::genome::Genome;
use crate::track_generic::GenericMutableTrack;
use crate::track_simple::SimpleTrack;

/* -------------------------------------------------------------------------- */

impl SimpleTrack {

pub fn coverage_from_bam(
config : CoverageConfig,
filenames_treatment: &Vec<&str>,
filenames_control : &Vec<&str>,
fraglen_treatment : &Vec<usize>,
fraglen_control : &Vec<usize>,
genome : Genome,
) -> Result<SimpleTrack, Box<dyn Error>> {

let mut track1 = SimpleTrack::alloc("treatment".to_string(), genome.clone(), config.initial_value, config.bin_size);
if !filenames_control.is_empty() {
// Control data
let mut track2 = SimpleTrack::alloc("control".to_string(), genome.clone(), config.initial_value, config.bin_size);

GenericMutableTrack::coverage_from_bam(
config,
GenericMutableTrack::wrap(&mut track1),
Some(GenericMutableTrack::wrap(&mut track2)),
filenames_treatment,
filenames_control,
fraglen_treatment,
fraglen_control,
genome)?;

} else {

GenericMutableTrack::coverage_from_bam(
config,
GenericMutableTrack::wrap(&mut track1),
None,
filenames_treatment,
filenames_control,
fraglen_treatment,
fraglen_control,
genome)?;

};

Ok(track1)

}

}

0 comments on commit ea5edbe

Please sign in to comment.