diff --git a/src/contact_matrix/contact_matrix_dense_safe_impl.hpp b/src/contact_matrix/contact_matrix_dense_safe_impl.hpp index 59666d73..3f2773bd 100644 --- a/src/contact_matrix/contact_matrix_dense_safe_impl.hpp +++ b/src/contact_matrix/contact_matrix_dense_safe_impl.hpp @@ -175,7 +175,7 @@ ContactMatrixDense ContactMatrixDense::blur(const double sigma, const const auto lck = this->lock(); if (tpool) { - auto fut = tpool->template parallelize_loop(usize(0), this->ncols(), apply_kernel); + auto fut = tpool->template submit_blocks(usize(0), this->ncols(), apply_kernel); fut.wait(); } else { apply_kernel(0, this->ncols()); @@ -220,7 +220,7 @@ ContactMatrixDense ContactMatrixDense::diff_of_gaussians( const auto lck = this->lock(); if (tpool) { - auto fut = tpool->template parallelize_loop(usize(0), this->ncols(), compute_gauss_diff); + auto fut = tpool->template submit_blocks(usize(0), this->ncols(), compute_gauss_diff); fut.wait(); } else { compute_gauss_diff(0, this->ncols()); diff --git a/src/libmodle/cpu/context_manager_impl.hpp b/src/libmodle/cpu/context_manager_impl.hpp index 5f40c607..0c463c75 100644 --- a/src/libmodle/cpu/context_manager_impl.hpp +++ b/src/libmodle/cpu/context_manager_impl.hpp @@ -197,13 +197,13 @@ inline usize ContextManager::num_io_threads() const noexcept { template template inline void ContextManager::spawn_worker_thread(TaskLambda lambda) { - this->_futures.emplace_back(this->_worker_tpool.submit(lambda)); + this->_futures.emplace_back(this->_worker_tpool.submit_task(lambda)); } template template inline void ContextManager::spawn_io_thread(TaskLambda lambda) { - this->_futures.emplace_back(this->_io_tpool.submit(lambda)); + this->_futures.emplace_back(this->_io_tpool.submit_task(lambda)); } template @@ -212,9 +212,9 @@ inline void ContextManager::shutdown() { this->close_queue(); spdlog::debug(FMT_STRING("waiting for worker threads to return...")); - this->_worker_tpool.wait_for_tasks(); + this->_worker_tpool.wait(); spdlog::debug(FMT_STRING("waiting for io threads to return...")); - this->_io_tpool.wait_for_tasks(); + this->_io_tpool.wait(); spdlog::debug( FMT_STRING("all background threads returned! Checking if any exception have been raised...")); diff --git a/src/libmodle/cpu/include/modle/simulation.hpp b/src/libmodle/cpu/include/modle/simulation.hpp index 9e57ed82..0ef298cd 100644 --- a/src/libmodle/cpu/include/modle/simulation.hpp +++ b/src/libmodle/cpu/include/modle/simulation.hpp @@ -59,7 +59,8 @@ class Simulation { struct Task { // NOLINT(altera-struct-pack-align) enum class Status : u8f { PENDING, RUNNING, COMPLETED, FAILED }; usize id{}; - GenomicInterval* interval{}; + const GenomicInterval* interval{}; + GenomicIntervalData* interval_data{}; usize cell_id{}; usize num_target_epochs{}; usize num_target_contacts{}; @@ -336,18 +337,21 @@ class Simulation { /// Register contacts for chromosome \p interval using the position the extrusion units of the /// LEFs in \p lefs whose index is present in \p selected_lef_idx. /// Return the actual number of contacts registered - usize register_contacts_loop(GenomicInterval& interval, absl::Span lefs, - usize num_sampling_events, random::PRNG_t& rand_eng) const; + usize register_contacts_loop(const GenomicInterval& interval, GenomicIntervalData& interval_data, + absl::Span lefs, usize num_sampling_events, + random::PRNG_t& rand_eng) const; usize register_contacts_loop(bp_t start_pos, bp_t end_pos, ContactMatrixDense& contacts, absl::Span lefs, usize num_sampling_events, random::PRNG_t& rand_eng) const; - usize register_contacts_tad(GenomicInterval& interval, absl::Span lefs, - usize num_sampling_events, random::PRNG_t& rand_eng) const; + usize register_contacts_tad(const GenomicInterval& interval, GenomicIntervalData& interval_data, + absl::Span lefs, usize num_sampling_events, + random::PRNG_t& rand_eng) const; usize register_contacts_tad(bp_t start_pos, bp_t end_pos, ContactMatrixDense& contacts, absl::Span lefs, usize num_sampling_events, random::PRNG_t& rand_eng) const; - usize register_1d_lef_occupancy(GenomicInterval& interval, absl::Span lefs, + usize register_1d_lef_occupancy(const GenomicInterval& interval, + GenomicIntervalData& interval_data, absl::Span lefs, usize num_sampling_events, random::PRNG_t& rand_eng) const; usize register_1d_lef_occupancy(bp_t start_pos, bp_t end_pos, diff --git a/src/libmodle/cpu/register_contacts.cpp b/src/libmodle/cpu/register_contacts.cpp index d562a2e4..3c470400 100644 --- a/src/libmodle/cpu/register_contacts.cpp +++ b/src/libmodle/cpu/register_contacts.cpp @@ -109,13 +109,14 @@ void Simulation::sample_and_register_contacts(State& s, usize num_sampling_event const auto num_tad_contacts = num_sampling_events - num_loop_contacts; assert(s.interval); - s.num_contacts += - this->register_contacts_loop(*s.interval, s.get_lefs(), num_loop_contacts, s.rand_eng); - s.num_contacts += - this->register_contacts_tad(*s.interval, s.get_lefs(), num_tad_contacts, s.rand_eng); + s.num_contacts += this->register_contacts_loop(*s.interval, *s.interval_data, s.get_lefs(), + num_loop_contacts, s.rand_eng); + s.num_contacts += this->register_contacts_tad(*s.interval, *s.interval_data, s.get_lefs(), + num_tad_contacts, s.rand_eng); if (c().track_1d_lef_position) { - this->register_1d_lef_occupancy(*s.interval, s.get_lefs(), num_sampling_events, s.rand_eng); + this->register_1d_lef_occupancy(*s.interval, *s.interval_data, s.get_lefs(), + num_sampling_events, s.rand_eng); } assert(s.num_contacts <= s.num_target_contacts); @@ -233,26 +234,31 @@ usize Simulation::register_1d_lef_occupancy(bp_t start_pos, bp_t end_pos, return num_successful_sampling_events; } -usize Simulation::register_contacts_loop(GenomicInterval& interval, +usize Simulation::register_contacts_loop(const GenomicInterval& interval, + GenomicIntervalData& interval_data, const absl::Span lefs, usize num_sampling_events, random::PRNG_t& rand_eng) const { - return this->register_contacts_loop(interval.start() + 1, interval.end() - 1, interval.contacts(), - lefs, num_sampling_events, rand_eng); + return this->register_contacts_loop(interval.start() + 1, interval.end() - 1, + interval_data.contacts(), lefs, num_sampling_events, + rand_eng); } -usize Simulation::register_contacts_tad(GenomicInterval& interval, absl::Span lefs, - usize num_sampling_events, random::PRNG_t& rand_eng) const { - return this->register_contacts_tad(interval.start() + 1, interval.end() - 1, interval.contacts(), - lefs, num_sampling_events, rand_eng); +usize Simulation::register_contacts_tad(const GenomicInterval& interval, + GenomicIntervalData& interval_data, + absl::Span lefs, usize num_sampling_events, + random::PRNG_t& rand_eng) const { + return this->register_contacts_tad(interval.start() + 1, interval.end() - 1, + interval_data.contacts(), lefs, num_sampling_events, rand_eng); } -usize Simulation::register_1d_lef_occupancy(GenomicInterval& interval, absl::Span lefs, - usize num_sampling_events, +usize Simulation::register_1d_lef_occupancy(const GenomicInterval& interval, + GenomicIntervalData& interval_data, + absl::Span lefs, usize num_sampling_events, random::PRNG_t& rand_eng) const { return this->register_1d_lef_occupancy(interval.start() + 1, interval.end() - 1, - interval.lef_1d_occupancy(), lefs, num_sampling_events, - rand_eng); + interval_data.lef_1d_occupancy(), lefs, + num_sampling_events, rand_eng); } } // namespace modle diff --git a/src/libmodle/cpu/scheduler_simulate.cpp b/src/libmodle/cpu/scheduler_simulate.cpp index d59e8f0b..ba77cbd3 100644 --- a/src/libmodle/cpu/scheduler_simulate.cpp +++ b/src/libmodle/cpu/scheduler_simulate.cpp @@ -103,14 +103,14 @@ void Simulation::run_simulate() { std::unique_ptr xxh_state{XXH3_createState()}; // Loop over chromosomes - for (auto& interval : this->_genome) { + for (auto& [interval, interval_data] : this->_genome) { this->_ctx.check_exceptions(); auto sleep_time = std::chrono::microseconds(50); auto rand_eng = random::PRNG(interval.hash(*xxh_state, c().seed)); // Don't bother simulating intervals without barriers - if (!c().simulate_chromosomes_wo_barriers && interval.num_barriers() == 0) { + if (!c().simulate_chromosomes_wo_barriers && interval_data.num_barriers() == 0) { spdlog::info(FMT_STRING("{} has 0 barriers... SKIPPING!"), interval); Task t{}; t.interval = &interval; @@ -128,7 +128,7 @@ void Simulation::run_simulate() { // Compute # of LEFs to be simulated based on interval.sizes const auto nlefs = this->compute_num_lefs(interval.size()); - const auto npixels = interval.npixels(); + const auto npixels = interval_data.npixels(); const auto tot_target_contacts = static_cast(std::round(static_cast(npixels) * c().target_contact_density)); @@ -144,6 +144,7 @@ void Simulation::run_simulate() { Task t{taskid++, &interval, + &interval_data, cellid, c().target_simulation_epochs, num_target_contacts, diff --git a/src/libmodle/cpu/simulation.cpp b/src/libmodle/cpu/simulation.cpp index 0aaa18fa..46eff320 100644 --- a/src/libmodle/cpu/simulation.cpp +++ b/src/libmodle/cpu/simulation.cpp @@ -49,8 +49,8 @@ namespace modle { static void override_extrusion_barrier_occupancy(Genome& genome, const double barrier_occupied_stp, const double barrier_not_occupied_stp) { - for (auto& interval : genome) { - auto& barriers = interval.barriers(); + for (auto& [interval, interval_data] : genome) { + auto& barriers = interval_data.barriers(); std::transform(barriers.begin(), barriers.end(), barriers.begin(), [&](const auto& barrier) { return ExtrusionBarrier{barrier.pos, barrier_occupied_stp, barrier_not_occupied_stp, barrier.blocking_direction.complement()}; @@ -60,7 +60,7 @@ static void override_extrusion_barrier_occupancy(Genome& genome, const double ba static void issue_warnings_for_small_intervals(const Genome& genome, const bp_t diagonal_width) { std::vector warnings; - for (const auto& interval : genome) { + for (const auto& [interval, interval_data] : genome) { if (interval.size() < diagonal_width) { warnings.emplace_back(fmt::format(FMT_STRING("{}: {};"), interval, interval.size())); } @@ -75,8 +75,8 @@ static void issue_warnings_for_small_intervals(const Genome& genome, const bp_t template static void issue_warnings_for_small_matrices(const Genome& genome) { std::vector warnings; - for (const auto& interval : genome) { - if (const auto npixels = interval.npixels(); npixels < num_pixels_threshold) { + for (const auto& [interval, interval_data] : genome) { + if (const auto npixels = interval_data.npixels(); npixels < num_pixels_threshold) { warnings.emplace_back(fmt::format(FMT_STRING("{}: {} pixels"), interval, npixels)); } } @@ -136,13 +136,13 @@ usize Simulation::simulated_size() const { return this->_genome.simulated_size() return bwf; } -static void write_contact_matrix_to_cooler(hictk::cooler::File& cf, - const GenomicInterval& interval) { +static void write_contact_matrix_to_cooler(hictk::cooler::File& cf, const GenomicInterval& interval, + const GenomicIntervalData& interval_data) { if (!cf) { return; } - const auto& matrix = interval.contacts(); + const auto& matrix = interval_data.contacts(); if (matrix.npixels() != 0) { const auto t0 = absl::Now(); spdlog::info(FMT_STRING("[io] writing contacts for {} to file \"{}\"..."), interval, cf.uri()); @@ -165,8 +165,9 @@ static void write_contact_matrix_to_cooler(hictk::cooler::File& cf, } static void write_lef_occupancy_to_bwig(io::bigwig::Writer& bw, const GenomicInterval& interval, + const GenomicIntervalData& interval_data, const bp_t resolution, std::vector& buff) { - if (!bw || interval.npixels() == 0) { + if (!bw || interval_data.npixels() == 0) { return; } @@ -174,12 +175,12 @@ static void write_lef_occupancy_to_bwig(io::bigwig::Writer& bw, const GenomicInt const auto t0 = absl::Now(); spdlog::info(FMT_STRING("[io]: writing 1D LEF occupancy profile for {} to file {}..."), interval, bw.path()); - buff.resize(interval.lef_1d_occupancy().size()); - const auto max_element = - std::max_element(interval.lef_1d_occupancy().begin(), interval.lef_1d_occupancy().end()) - ->load(); + buff.resize(interval_data.lef_1d_occupancy().size()); + const auto max_element = std::max_element(interval_data.lef_1d_occupancy().begin(), + interval_data.lef_1d_occupancy().end()) + ->load(); - std::transform(interval.lef_1d_occupancy().begin(), interval.lef_1d_occupancy().end(), + std::transform(interval_data.lef_1d_occupancy().begin(), interval_data.lef_1d_occupancy().end(), buff.begin(), [&](const auto& n) { return static_cast(static_cast(n.load()) / static_cast(max_element)); @@ -228,14 +229,15 @@ void Simulation::simulate_io(std::chrono::milliseconds wait_time) { auto last_interval = this->_genome.end(); auto ctok = this->_ctx.register_consumer(); - absl::btree_map task_map{}; + absl::btree_map, usize> task_map{}; std::vector bw_buff{}; while (!!this->_ctx && current_interval != last_interval) { - if (auto it = task_map.find(&(*current_interval)); it != task_map.end() && it->second == 0) { + const auto key = std::make_pair(¤t_interval->first, ¤t_interval->second); + if (auto it = task_map.find(key); it != task_map.end() && it->second == 0) { // All tasks for the current interval successfully completed! - write_contact_matrix_to_cooler(cf, *it->first); - write_lef_occupancy_to_bwig(bw, *it->first, c().bin_size, bw_buff); - it->first->deallocate(); + write_contact_matrix_to_cooler(cf, *it->first.first, *it->first.second); + write_lef_occupancy_to_bwig(bw, *it->first.first, *it->first.second, c().bin_size, bw_buff); + it->first.second->deallocate(); task_map.erase(it); ++current_interval; continue; @@ -246,7 +248,8 @@ void Simulation::simulate_io(std::chrono::milliseconds wait_time) { task = *task_opt; // Add interval to task_map if not already present. // Then decrement the number of tasks still pending. - auto [it, _] = task_map.try_emplace(task.interval, c().num_cells); + auto [it, _] = + task_map.try_emplace(std::make_pair(task.interval, task.interval_data), c().num_cells); --it->second; } } @@ -432,8 +435,10 @@ void Simulation::rank_lefs(const absl::Span lefs, } if (MODLE_LIKELY(ranks_are_partially_sorted)) { - cppsort::split_sort(rev_lef_rank_buff.begin(), rev_lef_rank_buff.end(), rev_comparator); - cppsort::split_sort(fwd_lef_rank_buff.begin(), fwd_lef_rank_buff.end(), fwd_comparator); + cppsort::split_adapter{}(rev_lef_rank_buff.begin(), + rev_lef_rank_buff.end(), rev_comparator); + cppsort::split_adapter{}(fwd_lef_rank_buff.begin(), + fwd_lef_rank_buff.end(), fwd_comparator); } else { // Fallback to pattern-defeating quicksort we have no information regarding the level of // pre-sortedness of LEFs @@ -747,13 +752,14 @@ Simulation::State& Simulation::State::operator=(const Task& task) { this->id = task.id; this->interval = task.interval; + this->interval_data = task.interval_data; this->cell_id = task.cell_id; this->num_target_epochs = task.num_target_epochs; this->num_target_contacts = task.num_target_contacts; this->num_lefs = task.num_lefs; - if (task.interval) { - this->barriers = - ExtrusionBarriers{task.interval->barriers().begin(), task.interval->barriers().end()}; + if (task.interval_data) { + this->barriers = ExtrusionBarriers{task.interval_data->barriers().begin(), + task.interval_data->barriers().end()}; } this->rand_eng = task.rand_eng; @@ -1087,11 +1093,12 @@ usize Simulation::compute_num_lefs(const usize size_bp) const noexcept { void Simulation::print_status_update(const Task& t) const noexcept { assert(t.interval); - auto tot_target_epochs = this->compute_tot_target_epochs(t.num_lefs, t.interval->npixels()); + assert(t.interval_data); + auto tot_target_epochs = this->compute_tot_target_epochs(t.num_lefs, t.interval_data->npixels()); spdlog::info(FMT_STRING("begin processing {}: simulating ~{} epochs across {} cells using {} " "LEFs and {} barriers (~{} epochs per cell)..."), *t.interval, tot_target_epochs, c().num_cells, t.num_lefs, - t.interval->barriers().size(), tot_target_epochs / c().num_cells); + t.interval_data->barriers().size(), tot_target_epochs / c().num_cells); } usize Simulation::compute_num_worker_threads(usize num_threads, usize num_intervals, diff --git a/src/libmodle/cpu/simulation_impl.hpp b/src/libmodle/cpu/simulation_impl.hpp index af77af8e..77be667d 100644 --- a/src/libmodle/cpu/simulation_impl.hpp +++ b/src/libmodle/cpu/simulation_impl.hpp @@ -117,7 +117,7 @@ auto fmt::formatter::format(const modle::Simulation::Ta assert(t.interval); return fmt::format_to(ctx.out(), FMT_STRING("{}\t{}\t{}\t{}\t{}\t{}\t{}"), t.id, t.interval->chrom(), t.cell_id, t.num_target_epochs, t.num_target_contacts, - t.num_lefs, !!t.interval ? t.interval->barriers().size() : 0); + t.num_lefs, !!t.interval_data ? t.interval_data->barriers().size() : 0); } constexpr auto fmt::formatter::parse(format_parse_context& ctx) diff --git a/src/libmodle/internal/genome.cpp b/src/libmodle/internal/genome.cpp index 65d722fe..46b2dae9 100644 --- a/src/libmodle/internal/genome.cpp +++ b/src/libmodle/internal/genome.cpp @@ -188,15 +188,12 @@ u64 Chromosome::hash(XXH3_state_t& state, u64 seed) const { return this->hash(state); } -GenomicInterval::GenomicInterval(usize id, const std::shared_ptr& chrom, - bp_t contact_matrix_resolution, bp_t diagonal_width) - : GenomicInterval(id, chrom, 0, chrom->size(), contact_matrix_resolution, diagonal_width) {} +GenomicInterval::GenomicInterval(usize id, const std::shared_ptr& chrom) + : GenomicInterval(id, chrom, 0, chrom->size()) {} GenomicInterval::GenomicInterval(usize id, std::shared_ptr chrom, bp_t start, - bp_t end, bp_t contact_matrix_resolution, bp_t diagonal_width) - : GenomicInterval(id, std::move(chrom), start, end, contact_matrix_resolution, diagonal_width, - static_cast(nullptr), - static_cast(nullptr)) {} + bp_t end) + : _id(id), _chrom(std::move(chrom)), _start(start), _end(end) {} u64 GenomicInterval::hash(XXH3_state_t& state) const { auto handle_errors = [&](const auto& status) { @@ -228,57 +225,65 @@ const Chromosome& GenomicInterval::chrom() const noexcept { return *this->_chrom; } -usize GenomicInterval::num_barriers() const { return this->_barriers.size(); } +GenomicIntervalData::GenomicIntervalData(bp_t start, bp_t end, bp_t contact_matrix_resolution, + bp_t diagonal_width) + : _contacts(end - start, diagonal_width, contact_matrix_resolution), + _lef_1d_occupancy(end - start, contact_matrix_resolution) { + assert(start <= end); +} + +usize GenomicIntervalData::num_barriers() const { return this->_barriers.size(); } -auto GenomicInterval::barriers() const noexcept -> const std::vector& { +auto GenomicIntervalData::barriers() const noexcept -> const std::vector& { return this->_barriers; } -auto GenomicInterval::barriers() noexcept -> std::vector& { +auto GenomicIntervalData::barriers() noexcept -> std::vector& { return this->_barriers; } -auto GenomicInterval::contacts() const noexcept -> const ContactMatrix& { +auto GenomicIntervalData::contacts() const noexcept -> const ContactMatrix& { return this->_contacts(); } -auto GenomicInterval::contacts() noexcept -> ContactMatrix& { return this->_contacts(); } +auto GenomicIntervalData::contacts() noexcept -> ContactMatrix& { return this->_contacts(); } -auto GenomicInterval::lef_1d_occupancy() const noexcept -> const std::vector>& { +auto GenomicIntervalData::lef_1d_occupancy() const noexcept + -> const std::vector>& { return this->_lef_1d_occupancy(); } -auto GenomicInterval::lef_1d_occupancy() noexcept -> std::vector>& { +auto GenomicIntervalData::lef_1d_occupancy() noexcept -> std::vector>& { return this->_lef_1d_occupancy(); } -void GenomicInterval::deallocate() noexcept { +void GenomicIntervalData::deallocate() noexcept { this->_contacts.deallocate(); this->_lef_1d_occupancy.deallocate(); } -struct ComputeBarrierStpResult { - double stp_active; - double stp_inactive; -}; - -[[nodiscard]] static constexpr ComputeBarrierStpResult compute_barrier_stp( - double score, double default_stp_active, double default_stp_inactive) { +[[nodiscard]] static constexpr auto compute_barrier_stp(double score, double default_stp_active, + double default_stp_inactive) { + struct Result { + double stp_active; + double stp_inactive; + }; if (score != 0.0) { // When the score field is zero (i.e. when the extr. barrier does not have a custom // occupancy), use the default self-transition probabilities const auto occupancy = score; const auto stp_active = ExtrusionBarrier::compute_stp_active_from_occupancy(default_stp_inactive, occupancy); - return {stp_active, default_stp_inactive}; + return Result{stp_active, default_stp_inactive}; } - return {default_stp_active, default_stp_inactive}; + return Result{default_stp_active, default_stp_inactive}; } -void GenomicInterval::add_extrusion_barrier(const bed::BED& record, - const double default_barrier_stp_active, - const double default_barrier_stp_inactive) { +void GenomicIntervalData::add_extrusion_barrier(const GenomicInterval& interval, + const bed::BED& record, + const double default_barrier_stp_active, + const double default_barrier_stp_inactive) { assert(record.strand == '+' || record.strand == '-' || record.strand == '.'); const auto pos = (record.chrom_start + record.chrom_end + 1) / 2; - if (pos < this->start() || pos >= this->end()) { + if (pos < interval.start() || pos >= interval.end()) { return; } @@ -288,11 +293,12 @@ void GenomicInterval::add_extrusion_barrier(const bed::BED& record, this->_barriers.emplace_back(pos, barrier_stp_active, barrier_stp_inactive, record.strand); } -void GenomicInterval::add_extrusion_barriers(std::vector barriers) { +void GenomicIntervalData::add_extrusion_barriers([[maybe_unused]] const GenomicInterval& interval, + std::vector barriers) { if constexpr (utils::ndebug_not_defined()) { for ([[maybe_unused]] const auto& barrier : barriers) { - assert(barrier.pos >= this->start()); - assert(barrier.pos < this->end()); + assert(barrier.pos >= interval.start()); + assert(barrier.pos < interval.end()); } } this->_barriers.insert(this->_barriers.end(), std::make_move_iterator(barriers.begin()), @@ -315,7 +321,7 @@ Genome::Genome(const std::filesystem::path& path_to_chrom_sizes, })), _simulated_size(std::accumulate( _intervals.begin(), _intervals.end(), usize(0), - [&](auto accumulator, const GenomicInterval& gi) { return accumulator + gi.size(); })) { + [&](auto accumulator, const auto& gi) { return accumulator + gi.first.size(); })) { assert(!path_to_extr_barriers.empty()); const auto t0 = absl::Now(); spdlog::info(FMT_STRING("importing extrusion barriers from {}..."), path_to_extr_barriers); @@ -367,21 +373,23 @@ std::vector> Genome::import_chromosomes( return buffer; } -absl::btree_set Genome::import_genomic_intervals( +absl::btree_map Genome::import_genomic_intervals( const std::filesystem::path& path_to_bed, const std::vector>& chromosomes, bp_t contact_matrix_resolution, bp_t diagonal_width) { assert(!chromosomes.empty()); - absl::btree_set buffer{}; + absl::btree_map buffer{}; if (path_to_bed.empty()) { spdlog::debug( FMT_STRING("path to genomic regions to simulate is empty. Assuming whole chromosomes are " "to be simulated!")); - std::transform(chromosomes.begin(), chromosomes.end(), std::inserter(buffer, buffer.begin()), - [&](const auto& chrom_ptr) { - return GenomicInterval{chrom_ptr->id(), chrom_ptr, contact_matrix_resolution, - diagonal_width}; - }); + std::transform( + chromosomes.begin(), chromosomes.end(), std::inserter(buffer, buffer.begin()), + [&](const auto& chrom_ptr) { + return std::make_pair( + GenomicInterval{chrom_ptr->id(), chrom_ptr}, + GenomicIntervalData{0, chrom_ptr->size(), contact_matrix_resolution, diagonal_width}); + }); return buffer; } @@ -405,12 +413,10 @@ absl::btree_set Genome::import_genomic_intervals( } std::transform(overlaps.begin(), overlaps.end(), std::inserter(buffer, buffer.end()), [&](const auto& interval) { - return GenomicInterval{id++, - chrom, - interval.chrom_start, - interval.chrom_end, - contact_matrix_resolution, - diagonal_width}; + return std::make_pair( + GenomicInterval{id++, chrom, interval.chrom_start, interval.chrom_end}, + GenomicIntervalData{interval.chrom_start, interval.chrom_end, + contact_matrix_resolution, diagonal_width}); }); } @@ -475,13 +481,13 @@ absl::btree_set Genome::import_genomic_intervals( return buff; } -usize Genome::map_barriers_to_intervals(absl::btree_set& intervals, - const bed::BED_tree<>& barriers_bed, - double default_barrier_pbb, double default_barrier_puu, - bool interpret_name_field_as_puu) { +usize Genome::map_barriers_to_intervals( + absl::btree_map& intervals, + const bed::BED_tree<>& barriers_bed, double default_barrier_pbb, double default_barrier_puu, + bool interpret_name_field_as_puu) { assert(!intervals.empty()); usize tot_num_barriers = 0; - for (auto& interval : intervals) { + for (auto& [interval, interval_data] : intervals) { auto barriers = generate_barriers_from_bed_records( barriers_bed.find_overlaps(std::string{interval.chrom().name()}, utils::conditional_static_cast(interval.start()), @@ -489,7 +495,7 @@ usize Genome::map_barriers_to_intervals(absl::btree_set& interv default_barrier_pbb, default_barrier_puu, interpret_name_field_as_puu); tot_num_barriers += barriers.size(); - interval.add_extrusion_barriers(std::move(barriers)); + interval_data.add_extrusion_barriers(interval, std::move(barriers)); } return tot_num_barriers; } @@ -511,29 +517,29 @@ auto Genome::find(const GenomicInterval& query) const -> const_iterator { auto Genome::find(usize query) -> iterator { return std::find_if(this->_intervals.begin(), this->_intervals.end(), - [&](const auto& gi) { return gi.id() == query; }); + [&](const auto& gi) { return gi.first.id() == query; }); } auto Genome::find(usize query) const -> const_iterator { return std::find_if(this->_intervals.begin(), this->_intervals.end(), - [&](const auto& gi) { return gi.id() == query; }); + [&](const auto& gi) { return gi.first.id() == query; }); } auto Genome::find(const Chromosome& query) -> iterator { return std::find_if(this->_intervals.begin(), this->_intervals.end(), - [&](const auto& gi) { return gi.chrom() == query; }); + [&](const auto& gi) { return gi.first.chrom() == query; }); } auto Genome::find(const Chromosome& query) const -> const_iterator { return std::find_if(this->_intervals.begin(), this->_intervals.end(), - [&](const auto& gi) { return gi.chrom() == query; }); + [&](const auto& gi) { return gi.first.chrom() == query; }); } auto Genome::find(std::string_view query) -> iterator { return std::find_if(this->_intervals.begin(), this->_intervals.end(), - [&](const auto& gi) { return gi.chrom().name() == query; }); + [&](const auto& gi) { return gi.first.chrom().name() == query; }); } auto Genome::find(std::string_view query) const -> const_iterator { return std::find_if(this->_intervals.begin(), this->_intervals.end(), - [&](const auto& gi) { return gi.chrom().name() == query; }); + [&](const auto& gi) { return gi.first.chrom().name() == query; }); } bool Genome::contains(const GenomicInterval& query) const { @@ -569,17 +575,19 @@ const Chromosome& Genome::longest_chromosome() const noexcept { const GenomicInterval& Genome::longest_interval() const noexcept { assert(!this->_intervals.empty()); - return *std::max_element( - this->_intervals.begin(), this->_intervals.end(), - [&](const auto& gi1, const auto& gi2) { return gi1.size() < gi2.size(); }); + return std::max_element( + this->_intervals.begin(), this->_intervals.end(), + [&](const auto& gi1, const auto& gi2) { return gi1.first.size() < gi2.first.size(); }) + ->first; } const GenomicInterval& Genome::interval_with_most_barriers() const noexcept { assert(!this->_intervals.empty()); - return *std::max_element(this->_intervals.begin(), this->_intervals.end(), - [&](const auto& gi1, const auto& gi2) { - return gi1.barriers().size() < gi2.barriers().size(); - }); + return std::max_element(this->_intervals.begin(), this->_intervals.end(), + [&](const auto& gi1, const auto& gi2) { + return gi1.second.barriers().size() < gi2.second.barriers().size(); + }) + ->first; } usize Genome::max_target_contacts(usize bin_size, usize diagonal_width, diff --git a/src/libmodle/internal/genome_impl.hpp b/src/libmodle/internal/genome_impl.hpp index 3f77db96..99246fa3 100644 --- a/src/libmodle/internal/genome_impl.hpp +++ b/src/libmodle/internal/genome_impl.hpp @@ -50,27 +50,6 @@ constexpr bool Chromosome::operator>=(const Chromosome& other) const noexcept { return this->_id >= other._id; } -template -inline GenomicInterval::GenomicInterval(usize id, const std::shared_ptr& chrom, - bp_t contact_matrix_resolution, bp_t diagonal_width, - It first_barrier, It last_barrier) - : GenomicInterval(id, chrom, contact_matrix_resolution, diagonal_width, 0, chrom->size(), - first_barrier, last_barrier) {} - -template -inline GenomicInterval::GenomicInterval(usize id, std::shared_ptr chrom, - bp_t start, bp_t end, bp_t contact_matrix_resolution, - bp_t diagonal_width, It first_barrier, It last_barrier) - : _id(id), - _chrom(std::move(chrom)), - _start(start), - _end(end), - _barriers(first_barrier, last_barrier), - _contacts(end - start, diagonal_width, contact_matrix_resolution), - _lef_1d_occupancy(end - start, contact_matrix_resolution) { - assert(start <= end); -} - constexpr bool GenomicInterval::operator==(const GenomicInterval& other) const noexcept { return this->_id == other._id; } @@ -94,13 +73,24 @@ constexpr usize GenomicInterval::id() const noexcept { return this->_id; } constexpr bp_t GenomicInterval::start() const noexcept { return this->_start; } constexpr bp_t GenomicInterval::end() const noexcept { return this->_end; } constexpr bp_t GenomicInterval::size() const noexcept { return this->_end - this->_start; } -constexpr u64 GenomicInterval::npixels() const noexcept { return this->_contacts.npixels(); } template H AbslHashValue(H h, const GenomicInterval& c) { return H::combine(std::move(h), c._id); } +template +inline GenomicIntervalData::GenomicIntervalData(bp_t start, bp_t end, + bp_t contact_matrix_resolution, bp_t diagonal_width, + It first_barrier, It last_barrier) + : _barriers(first_barrier, last_barrier), + _contacts(end - start, diagonal_width, contact_matrix_resolution), + _lef_1d_occupancy(end - start, contact_matrix_resolution) { + assert(start <= end); +} + +constexpr u64 GenomicIntervalData::npixels() const noexcept { return this->_contacts.npixels(); } + constexpr const std::vector>& Genome::chromosomes() const noexcept { return this->_chroms; diff --git a/src/libmodle/internal/include/modle/genome.hpp b/src/libmodle/internal/include/modle/genome.hpp index f955a669..22a050fd 100644 --- a/src/libmodle/internal/include/modle/genome.hpp +++ b/src/libmodle/internal/include/modle/genome.hpp @@ -124,7 +124,6 @@ class Chromosome { class GenomicInterval { public: - using ContactMatrix = internal::ContactMatrixLazy::ContactMatrix; friend class Genome; private: @@ -133,33 +132,10 @@ class GenomicInterval { bp_t _start{}; bp_t _end{}; - std::vector _barriers{}; - - internal::ContactMatrixLazy _contacts{}; - internal::Occupancy1DLazy _lef_1d_occupancy{}; - public: GenomicInterval() = default; - GenomicInterval(usize id, const std::shared_ptr& chrom, - bp_t contact_matrix_resolution, bp_t diagonal_width); - GenomicInterval(usize id, std::shared_ptr chrom, bp_t start, bp_t end, - bp_t contact_matrix_resolution, bp_t diagonal_width); - template - GenomicInterval(usize id, const std::shared_ptr& chrom, - bp_t contact_matrix_resolution, bp_t diagonal_width, It first_barrier, - It last_barrier); - template - GenomicInterval(usize id, std::shared_ptr chrom, bp_t start, bp_t end, - bp_t contact_matrix_resolution, bp_t diagonal_width, It first_barrier, - It last_barrier); - - ~GenomicInterval() = default; - - GenomicInterval(const GenomicInterval& other) = delete; - GenomicInterval(GenomicInterval&& other) noexcept = default; - - GenomicInterval& operator=(const GenomicInterval& other) = delete; - GenomicInterval& operator=(GenomicInterval&& other) noexcept = default; + GenomicInterval(usize id, const std::shared_ptr& chrom); + GenomicInterval(usize id, std::shared_ptr chrom, bp_t start, bp_t end); [[nodiscard]] constexpr bool operator==(const GenomicInterval& other) const noexcept; [[nodiscard]] constexpr bool operator!=(const GenomicInterval& other) const noexcept; @@ -168,10 +144,6 @@ class GenomicInterval { [[nodiscard]] constexpr bool operator>(const GenomicInterval& other) const noexcept; [[nodiscard]] constexpr bool operator>=(const GenomicInterval& other) const noexcept; - void add_extrusion_barrier(const bed::BED& record, double default_barrier_stp_active, - double default_barrier_stp_inactive); - void add_extrusion_barriers(std::vector barriers); - [[nodiscard]] constexpr usize id() const noexcept; [[nodiscard]] u64 hash(XXH3_state_t& state) const; [[nodiscard]] u64 hash(XXH3_state_t& state, u64 seed) const; @@ -179,7 +151,42 @@ class GenomicInterval { [[nodiscard]] constexpr bp_t start() const noexcept; [[nodiscard]] constexpr bp_t end() const noexcept; [[nodiscard]] constexpr bp_t size() const noexcept; - [[nodiscard]] constexpr u64 npixels() const noexcept; + + template + inline friend H AbslHashValue(H h, const GenomicInterval& c); +}; + +class GenomicIntervalData { + public: + using ContactMatrix = internal::ContactMatrixLazy::ContactMatrix; + friend class Genome; + + private: + std::vector _barriers{}; + + internal::ContactMatrixLazy _contacts{}; + internal::Occupancy1DLazy _lef_1d_occupancy{}; + + public: + GenomicIntervalData(bp_t start, bp_t end, bp_t contact_matrix_resolution, bp_t diagonal_width); + template + GenomicIntervalData(bp_t start, bp_t end, bp_t contact_matrix_resolution, bp_t diagonal_width, + It first_barrier, It last_barrier); + + ~GenomicIntervalData() = default; + + GenomicIntervalData(const GenomicIntervalData& other) = delete; + GenomicIntervalData(GenomicIntervalData&& other) noexcept = default; + + GenomicIntervalData& operator=(const GenomicIntervalData& other) = delete; + GenomicIntervalData& operator=(GenomicIntervalData&& other) noexcept = default; + + void add_extrusion_barrier(const GenomicInterval& interval, const bed::BED& record, + double default_barrier_stp_active, + double default_barrier_stp_inactive); + void add_extrusion_barriers(const GenomicInterval& interval, + std::vector barriers); + [[nodiscard]] usize num_barriers() const; [[nodiscard]] auto barriers() const noexcept -> const std::vector&; [[nodiscard]] auto barriers() noexcept -> std::vector&; @@ -187,15 +194,13 @@ class GenomicInterval { [[nodiscard]] auto contacts() noexcept -> ContactMatrix&; [[nodiscard]] auto lef_1d_occupancy() const noexcept -> const std::vector>&; [[nodiscard]] auto lef_1d_occupancy() noexcept -> std::vector>&; + [[nodiscard]] constexpr u64 npixels() const noexcept; void deallocate() noexcept; - - template - inline friend H AbslHashValue(H h, const GenomicInterval& c); }; class Genome { std::vector> _chroms{}; - absl::btree_set _intervals{}; + absl::btree_map _intervals{}; usize _size{}; usize _simulated_size{}; @@ -262,17 +267,17 @@ class Genome { /// Import genomic intervals from a BED file. If path to BED file is empty, assume entire /// chromosomes are to be simulated. - absl::btree_set import_genomic_intervals( + absl::btree_map import_genomic_intervals( const std::filesystem::path& path_to_bed, const std::vector>& chromosomes, bp_t contact_matrix_resolution, bp_t diagonal_width); /// Parse a BED file containing the genomic coordinates of extrusion barriers and add them to /// the Genome - static usize map_barriers_to_intervals(absl::btree_set& intervals, - const bed::BED_tree<>& barriers_bed, - double default_barrier_pbb, double default_barrier_puu, - bool interpret_name_field_as_puu); + static usize map_barriers_to_intervals( + absl::btree_map& intervals, + const bed::BED_tree<>& barriers_bed, double default_barrier_pbb, double default_barrier_puu, + bool interpret_name_field_as_puu); }; } // namespace modle diff --git a/src/modle_tools/eval.cpp b/src/modle_tools/eval.cpp index 02547fcf..d664cf2a 100644 --- a/src/modle_tools/eval.cpp +++ b/src/modle_tools/eval.cpp @@ -745,9 +745,9 @@ void eval_subcmd(const modle::tools::eval_config &c) { if (c.normalize) { const auto t00 = absl::Now(); spdlog::info(FMT_STRING("Normalizing contact matrices for {}..."), coord_str); - return_codes[0] = tpool.submit([&]() { ref_matrix.normalize_inplace(); }); - return_codes[1] = tpool.submit([&]() { tgt_matrix.normalize_inplace(); }); - tpool.wait_for_tasks(); + return_codes[0] = tpool.submit_task([&]() { ref_matrix.normalize_inplace(); }); + return_codes[1] = tpool.submit_task([&]() { tgt_matrix.normalize_inplace(); }); + tpool.wait(); try { // Handle exceptions thrown inside worker threads std::ignore = return_codes[0]; @@ -763,15 +763,15 @@ void eval_subcmd(const modle::tools::eval_config &c) { } using d = StripeDirection; - return_codes[0] = tpool.submit([&, interval = interval]() { + return_codes[0] = tpool.submit_task([&, interval = interval]() { run_task(c.metric, interval, writers, ref_matrix, tgt_matrix, c.exclude_zero_pxls, bin_size, weights); }); - return_codes[1] = tpool.submit([&, interval = interval]() { + return_codes[1] = tpool.submit_task([&, interval = interval]() { run_task(c.metric, interval, writers, ref_matrix, tgt_matrix, c.exclude_zero_pxls, bin_size, weights); }); - tpool.wait_for_tasks(); + tpool.wait(); // Raise exceptions thrown inside worker threads (if any) std::ignore = return_codes[0]; std::ignore = return_codes[1]; diff --git a/src/modle_tools/transform.cpp b/src/modle_tools/transform.cpp index f7771b70..5021ab45 100644 --- a/src/modle_tools/transform.cpp +++ b/src/modle_tools/transform.cpp @@ -230,7 +230,7 @@ void transform_subcmd(const modle::tools::transform_config& c) { } } - tpool.wait_for_tasks(); + tpool.wait(); spdlog::info(FMT_STRING("DONE! Processed {} chromosomes in {}!"), input_cooler.chromosomes().size(), absl::FormatDuration(absl::Now() - t0)); spdlog::info(FMT_STRING("Transformed contacts have been saved to file {}"), c.output_cooler_uri); diff --git a/test/units/simulation_cpu/common.hpp b/test/units/simulation_cpu/common.hpp index fe18c4da..399c498e 100644 --- a/test/units/simulation_cpu/common.hpp +++ b/test/units/simulation_cpu/common.hpp @@ -150,11 +150,11 @@ inline void require_that_lefs_are_sorted_by_idx(const LefCollection& lefs, [[nodiscard]] inline GenomicInterval init_interval( std::string name, bp_t chrom_size, bp_t chrom_start = 0, - bp_t chrom_end = (std::numeric_limits::max)(), bp_t resolution = 1, bp_t diag_width = 1) { + bp_t chrom_end = (std::numeric_limits::max)()) { chrom_end = std::min(chrom_end, chrom_size); assert(chrom_start < chrom_end); auto chrom = std::make_shared(0, std::move(name), chrom_size); - return {0, chrom, chrom_start, chrom_end, resolution, diag_width}; + return {0, chrom, chrom_start, chrom_end}; } // This function is here as a compatibility layer between the old and new way to construct extrusion