diff --git a/src/cooler_file_writer.cpp b/src/cooler_file_writer.cpp index 6d63e52..1d0e542 100644 --- a/src/cooler_file_writer.cpp +++ b/src/cooler_file_writer.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -113,8 +114,8 @@ void CoolerFileWriter::add_pixels(const nb::object &df) { var); } -void CoolerFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str, - std::size_t chunk_size, std::size_t update_freq) { +hictk::File CoolerFileWriter::finalize(std::string_view log_lvl_str, std::size_t chunk_size, + std::size_t update_freq) { if (_finalized) { throw std::runtime_error( fmt::format(FMT_STRING("finalize() was already called on file \"{}\""), _path)); @@ -151,6 +152,8 @@ void CoolerFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str, _w.reset(); std::filesystem::remove(sclr_path); // NOLINT // NOLINTEND(*-unchecked-optional-access) + + return hictk::File{_path.string()}; } hictk::cooler::SingleCellFile CoolerFileWriter::create_file(std::string_view path, @@ -212,7 +215,7 @@ void CoolerFileWriter::bind(nb::module_ &m) { // NOLINTBEGIN(*-avoid-magic-numbers) writer.def("finalize", &hictkpy::CoolerFileWriter::finalize, nb::arg("log_lvl") = "WARN", nb::arg("chunk_size") = 500'000, nb::arg("update_frequency") = 10'000'000, - "Write interactions to file."); + "Write interactions to file.", nb::rv_policy::move); // NOLINTEND(*-avoid-magic-numbers) } } // namespace hictkpy diff --git a/src/file.cpp b/src/file.cpp index 8606de7..29ba846 100644 --- a/src/file.cpp +++ b/src/file.cpp @@ -84,8 +84,9 @@ bool is_cooler(const std::filesystem::path &uri) { bool is_hic(const std::filesystem::path &uri) { return hictk::hic::utils::is_hic_file(uri); } -static hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, +static hictkpy::PixelSelector fetch(const hictk::File &f, std::optional range1, + std::optional range2, + std::optional normalization, std::string_view count_type, bool join, std::string_view query_type) { if (count_type != "float" && count_type != "int") { @@ -96,15 +97,17 @@ static hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range throw std::runtime_error("query_type should be either UCSC or BED"); } - if (normalization != "NONE") { + const hictk::balancing::Method normalization_method{normalization.value_or("NONE")}; + + if (normalization_method != hictk::balancing::Method::NONE()) { count_type = "float"; } - if (range1.empty()) { - assert(range2.empty()); + if (!range1.has_value() || range1->empty()) { + assert(!range2.has_value() || range2->empty()); return std::visit( [&](const auto &ff) { - auto sel = ff.fetch(hictk::balancing::Method{normalization}); + auto sel = ff.fetch(normalization_method); using SelT = decltype(sel); return hictkpy::PixelSelector(std::make_shared(std::move(sel)), count_type, join); @@ -112,20 +115,21 @@ static hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range f.get()); } - if (range2.empty()) { + if (!range2.has_value() || range2->empty()) { range2 = range1; } const auto query_type_ = query_type == "UCSC" ? hictk::GenomicInterval::Type::UCSC : hictk::GenomicInterval::Type::BED; - const auto gi1 = hictk::GenomicInterval::parse(f.chromosomes(), std::string{range1}, query_type_); - const auto gi2 = hictk::GenomicInterval::parse(f.chromosomes(), std::string{range2}, query_type_); + const auto gi1 = + hictk::GenomicInterval::parse(f.chromosomes(), std::string{*range1}, query_type_); + const auto gi2 = + hictk::GenomicInterval::parse(f.chromosomes(), std::string{*range2}, query_type_); return std::visit( [&](const auto &ff) { - // Workaround bug fixed in https://github.com/paulsengroup/hictk/pull/158 - auto sel = ff.fetch(fmt::format(FMT_STRING("{}"), gi1), fmt::format(FMT_STRING("{}"), gi2), - hictk::balancing::Method(normalization)); + auto sel = ff.fetch(gi1.chrom().name(), gi1.start(), gi1.end(), gi2.chrom().name(), + gi2.start(), gi2.end(), normalization_method); using SelT = decltype(sel); return hictkpy::PixelSelector(std::make_shared(std::move(sel)), count_type, @@ -307,9 +311,9 @@ void declare_file_class(nb::module_ &m) { file.def("attributes", &file::attributes, "Get file attributes as a dictionary.", nb::rv_policy::take_ownership); - file.def("fetch", &file::fetch, nb::keep_alive<0, 1>(), nb::arg("range1") = "", - nb::arg("range2") = "", nb::arg("normalization") = "NONE", nb::arg("count_type") = "int", - nb::arg("join") = false, nb::arg("query_type") = "UCSC", + file.def("fetch", &file::fetch, nb::keep_alive<0, 1>(), nb::arg("range1") = nb::none(), + nb::arg("range2") = nb::none(), nb::arg("normalization") = nb::none(), + nb::arg("count_type") = "int", nb::arg("join") = false, nb::arg("query_type") = "UCSC", "Fetch interactions overlapping a region of interest.", nb::rv_policy::move); file.def("avail_normalizations", &file::avail_normalizations, diff --git a/src/hic_file_writer.cpp b/src/hic_file_writer.cpp index f25363d..a5eadea 100644 --- a/src/hic_file_writer.cpp +++ b/src/hic_file_writer.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -73,7 +74,7 @@ HiCFileWriter::HiCFileWriter(const std::filesystem::path &path_, const hictkpy:: assembly, n_threads, chunk_size, tmpdir, compression_lvl, skip_all_vs_all_matrix) {} -void HiCFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str) { +hictk::File HiCFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str) { if (_finalized) { throw std::runtime_error( fmt::format(FMT_STRING("finalize() was already called on file \"{}\""), _w.path())); @@ -93,14 +94,25 @@ void HiCFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str) { } SPDLOG_INFO(FMT_STRING("successfully finalized \"{}\"!"), _w.path()); spdlog::default_logger()->set_level(previous_lvl); + + return hictk::File{std::string{_w.path()}, _w.resolutions().front()}; } std::filesystem::path HiCFileWriter::path() const noexcept { return std::filesystem::path{_w.path()}; } -const std::vector &HiCFileWriter::resolutions() const noexcept { - return _w.resolutions(); +auto HiCFileWriter::resolutions() const { + using WeightVector = nb::ndarray, nb::c_contig, std::uint32_t>; + + // NOLINTNEXTLINE + auto *resolutions_ptr = new std::vector(_w.resolutions()); + + auto capsule = nb::capsule(resolutions_ptr, [](void *vect_ptr) noexcept { + delete reinterpret_cast *>(vect_ptr); // NOLINT + }); + + return WeightVector{resolutions_ptr->data(), {resolutions_ptr->size()}, capsule}; } const hictk::Reference &HiCFileWriter::chromosomes() const { return _w.chromosomes(); } @@ -171,7 +183,7 @@ void HiCFileWriter::bind(nb::module_ &m) { writer.def("path", &hictkpy::HiCFileWriter::path, "Get the file path.", nb::rv_policy::move); writer.def("resolutions", &hictkpy::HiCFileWriter::resolutions, - "Get the list of resolutions in bp.", nb::rv_policy::move); + "Get the list of resolutions in bp.", nb::rv_policy::take_ownership); writer.def("chromosomes", &get_chromosomes_from_object, nb::arg("include_ALL") = false, "Get chromosomes sizes as a dictionary mapping names to sizes.", @@ -185,7 +197,7 @@ void HiCFileWriter::bind(nb::module_ &m) { "either with columns=[bin1_id, bin2_id, count] or with columns=[chrom1, start1, end1, " "chrom2, start2, end2, count]."); writer.def("finalize", &hictkpy::HiCFileWriter::finalize, nb::arg("log_lvl") = "WARN", - "Write interactions to file."); + "Write interactions to file.", nb::rv_policy::move); } } // namespace hictkpy diff --git a/src/include/hictkpy/cooler_file_writer.hpp b/src/include/hictkpy/cooler_file_writer.hpp index 456da33..ec90b4b 100644 --- a/src/include/hictkpy/cooler_file_writer.hpp +++ b/src/include/hictkpy/cooler_file_writer.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -43,7 +44,8 @@ class CoolerFileWriter { void add_pixels(const nanobind::object& df); - void finalize(std::string_view log_lvl_str, std::size_t chunk_size, std::size_t update_frequency); + [[nodiscard]] hictk::File finalize(std::string_view log_lvl_str, std::size_t chunk_size, + std::size_t update_frequency); [[nodiscard]] std::string repr() const; static void bind(nanobind::module_& m); diff --git a/src/include/hictkpy/dynamic_1d_array.hpp b/src/include/hictkpy/dynamic_1d_array.hpp deleted file mode 100644 index e592e89..0000000 --- a/src/include/hictkpy/dynamic_1d_array.hpp +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (C) 2024 Roberto Rossini -// -// SPDX-License-Identifier: MIT - -#pragma once - -#include -#include -#include - -#include "hictkpy/nanobind.hpp" - -namespace hictkpy { - -template -struct Dynamic1DA { - private: - using BufferT = nanobind::ndarray, T>; - using VectorT = decltype(std::declval().view()); - nanobind::object _dtype{}; - nanobind::object _np_array{}; - BufferT _buff{}; - VectorT _vector{}; - - std::int64_t _size{}; - std::int64_t _capacity{}; - - public: - explicit Dynamic1DA(std::size_t size_ = 1000); - void push_back(T x); - void emplace_back(T &&x); - void resize(std::int64_t new_size); - void grow(); - void shrink_to_fit(); - [[nodiscard]] auto operator()() -> BufferT; - - private: - [[nodiscard]] static nanobind::object np(); -}; - -} // namespace hictkpy - -#include "./impl/dynamic_1d_array_impl.hpp" diff --git a/src/include/hictkpy/hic_file_writer.hpp b/src/include/hictkpy/hic_file_writer.hpp index ddb70c2..1041b72 100644 --- a/src/include/hictkpy/hic_file_writer.hpp +++ b/src/include/hictkpy/hic_file_writer.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -40,14 +41,14 @@ class HiCFileWriter { bool skip_all_vs_all_matrix); [[nodiscard]] std::filesystem::path path() const noexcept; - [[nodiscard]] const std::vector& resolutions() const noexcept; + [[nodiscard]] auto resolutions() const; [[nodiscard]] const hictk::Reference& chromosomes() const; [[nodiscard]] hictkpy::BinTable bins(std::uint32_t resolution) const; void add_pixels(const nanobind::object& df); - void finalize(std::string_view log_lvl_str); + [[nodiscard]] hictk::File finalize(std::string_view log_lvl_str); [[nodiscard]] std::string repr() const; diff --git a/src/include/hictkpy/impl/dynamic_1d_array_impl.hpp b/src/include/hictkpy/impl/dynamic_1d_array_impl.hpp deleted file mode 100644 index ea83905..0000000 --- a/src/include/hictkpy/impl/dynamic_1d_array_impl.hpp +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (C) 2024 Roberto Rossini -// -// SPDX-License-Identifier: MIT - -#pragma once - -#include -#include -#include -#include - -#include "hictkpy/common.hpp" -#include "hictkpy/nanobind.hpp" - -namespace hictkpy { - -template -inline Dynamic1DA::Dynamic1DA(std::size_t size_) - : _dtype(np().attr("dtype")(map_type_to_dtype())), - _np_array( - np().attr("empty")(static_cast(size_), nanobind::arg("dtype") = _dtype)), - _buff(nanobind::cast(_np_array)), - _vector(_buff.view()), - _capacity(static_cast(size_)) {} - -template -inline void Dynamic1DA::push_back(T x) { - if (_capacity == _size) { - grow(); - } - _vector(_size++) = x; -} - -template -inline void Dynamic1DA::emplace_back(T &&x) { - if (_capacity == _size) { - grow(); - } - _vector(_size++) = std::move(x); -} - -template -inline void Dynamic1DA::resize(std::int64_t new_size) { - if (_capacity == new_size) { - return; - } - auto new_array = np().attr("empty")(new_size, nanobind::arg("dtype") = _dtype); - auto new_buff = nanobind::cast(new_array); - auto new_vector = new_buff.view(); - - _capacity = new_size; - _size = std::min(_capacity, _size); - std::copy(_vector.data(), _vector.data() + static_cast(_size), new_vector.data()); - - std::swap(new_array, _np_array); - std::swap(new_buff, _buff); - std::swap(new_vector, _vector); -} - -template -inline void Dynamic1DA::grow() { - resize(static_cast(_buff.size() * 2)); -} - -template -inline void Dynamic1DA::shrink_to_fit() { - resize(_size); -} - -template -[[nodiscard]] auto Dynamic1DA::operator()() -> BufferT { - shrink_to_fit(); - return _buff; -} - -template -[[nodiscard]] nanobind::object Dynamic1DA::np() { - return nanobind::module_::import_("numpy"); -} - -} // namespace hictkpy diff --git a/src/multires_file.cpp b/src/multires_file.cpp index 15dace7..f8b7121 100644 --- a/src/multires_file.cpp +++ b/src/multires_file.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include "hictkpy/nanobind.hpp" #include "hictkpy/reference.hpp" @@ -27,6 +28,19 @@ static std::string repr(const hictk::MultiResFile& mrf) { static std::filesystem::path get_path(const hictk::MultiResFile& mrf) { return mrf.path(); } +[[nodiscard]] static auto get_resolutions(const hictk::MultiResFile& f) { + using WeightVector = nb::ndarray, nb::c_contig, std::uint32_t>; + + // NOLINTNEXTLINE + auto* resolutions_ptr = new std::vector(f.resolutions()); + + auto capsule = nb::capsule(resolutions_ptr, [](void* vect_ptr) noexcept { + delete reinterpret_cast*>(vect_ptr); // NOLINT + }); + + return WeightVector{resolutions_ptr->data(), {resolutions_ptr->size()}, capsule}; +} + static nb::dict get_attrs(const hictk::hic::File& hf) { nb::dict py_attrs; @@ -59,7 +73,7 @@ static nb::dict get_attrs(const hictk::cooler::MultiResFile& mclr) { static nb::dict attributes(const hictk::MultiResFile& f) { auto attrs = f.is_hic() ? get_attrs(f.open(f.resolutions().front()).get()) : get_attrs(hictk::cooler::MultiResFile{f.path()}); - attrs["resolutions"] = f.resolutions(); + attrs["resolutions"] = get_resolutions(f); return attrs; } @@ -84,8 +98,8 @@ void declare_multires_file_class(nb::module_& m) { nb::arg("include_ALL") = false, "Get chromosomes sizes as a dictionary mapping names to sizes.", nb::rv_policy::take_ownership); - mres_file.def("resolutions", &hictk::MultiResFile::resolutions, - "Get the list of available resolutions.", nb::rv_policy::copy); + mres_file.def("resolutions", &get_resolutions, "Get the list of available resolutions.", + nb::rv_policy::take_ownership); mres_file.def("attributes", &multires_file::attributes, "Get file attributes as a dictionary.", nb::rv_policy::take_ownership); mres_file.def("__getitem__", &hictk::MultiResFile::open, diff --git a/test/test_file_creation_cool.py b/test/test_file_creation_cool.py index 4a77605..1685326 100644 --- a/test/test_file_creation_cool.py +++ b/test/test_file_creation_cool.py @@ -62,7 +62,7 @@ def test_file_creation_thin_pixel(self, file, resolution, tmpdir): end = start + chunk_size w.add_pixels(df[start:end]) - w.finalize("info", 100_000, 100_000) + f = w.finalize("info", 100_000, 100_000) with pytest.raises(Exception): w.add_pixels(df) with pytest.raises(Exception): @@ -71,7 +71,6 @@ def test_file_creation_thin_pixel(self, file, resolution, tmpdir): del w gc.collect() - f = hictkpy.File(path, resolution) assert f.fetch().sum() == expected_sum def test_file_creation(self, file, resolution, tmpdir): @@ -90,7 +89,7 @@ def test_file_creation(self, file, resolution, tmpdir): end = start + chunk_size w.add_pixels(df[start:end]) - w.finalize("info", 100_000, 100_000) + f = w.finalize("info", 100_000, 100_000) with pytest.raises(Exception): w.add_pixels(df) with pytest.raises(Exception): @@ -99,7 +98,6 @@ def test_file_creation(self, file, resolution, tmpdir): del w gc.collect() - f = hictkpy.File(path, resolution) assert f.fetch().sum() == expected_sum def test_file_creation_bin_table(self, file, resolution, tmpdir): @@ -116,7 +114,7 @@ def test_file_creation_bin_table(self, file, resolution, tmpdir): end = start + chunk_size w.add_pixels(df[start:end]) - w.finalize("info", 100_000, 100_000) + f = w.finalize("info", 100_000, 100_000) with pytest.raises(Exception): w.add_pixels(df) with pytest.raises(Exception): @@ -125,7 +123,6 @@ def test_file_creation_bin_table(self, file, resolution, tmpdir): del w gc.collect() - f = hictkpy.File(path, resolution) assert f.fetch().sum() == expected_sum def test_file_creation_float_counts(self, file, resolution, tmpdir): @@ -145,7 +142,7 @@ def test_file_creation_float_counts(self, file, resolution, tmpdir): end = start + chunk_size w.add_pixels(df[start:end]) - w.finalize("info", 100_000, 100_000) + f = w.finalize("info", 100_000, 100_000) with pytest.raises(Exception): w.add_pixels(df) with pytest.raises(Exception): @@ -154,5 +151,4 @@ def test_file_creation_float_counts(self, file, resolution, tmpdir): del w gc.collect() - f = hictkpy.File(path, resolution) assert pytest.approx(f.fetch(count_type="float").sum()) == expected_sum diff --git a/test/test_file_creation_hic.py b/test/test_file_creation_hic.py index dc03e73..347ef20 100644 --- a/test/test_file_creation_hic.py +++ b/test/test_file_creation_hic.py @@ -61,7 +61,7 @@ def test_file_creation_thin_pixel(self, file, resolution, tmpdir): end = start + chunk_size w.add_pixels(df[start:end]) - w.finalize() + f = w.finalize() with pytest.raises(Exception): w.add_pixels(df) with pytest.raises(Exception): @@ -70,7 +70,6 @@ def test_file_creation_thin_pixel(self, file, resolution, tmpdir): del w gc.collect() - f = hictkpy.File(path, resolution) assert f.fetch().sum() == expected_sum def test_file_creation(self, file, resolution, tmpdir): @@ -89,7 +88,7 @@ def test_file_creation(self, file, resolution, tmpdir): end = start + chunk_size w.add_pixels(df[start:end]) - w.finalize() + f = w.finalize() with pytest.raises(Exception): w.add_pixels(df) with pytest.raises(Exception): @@ -98,7 +97,6 @@ def test_file_creation(self, file, resolution, tmpdir): del w gc.collect() - f = hictkpy.File(path, resolution) assert f.fetch().sum() == expected_sum def test_file_creation_bin_table(self, file, resolution, tmpdir): @@ -120,7 +118,7 @@ def test_file_creation_bin_table(self, file, resolution, tmpdir): end = start + chunk_size w.add_pixels(df[start:end]) - w.finalize() + f = w.finalize() with pytest.raises(Exception): w.add_pixels(df) with pytest.raises(Exception): @@ -129,5 +127,4 @@ def test_file_creation_bin_table(self, file, resolution, tmpdir): del w gc.collect() - f = hictkpy.File(path, resolution) assert f.fetch().sum() == expected_sum diff --git a/test/test_multires_file_accessors.py b/test/test_multires_file_accessors.py index 6255afb..4e1e1c5 100644 --- a/test/test_multires_file_accessors.py +++ b/test/test_multires_file_accessors.py @@ -32,16 +32,16 @@ def test_accessors(self, file, format): if f.is_hic(): resolutions = [100_000] - assert f.resolutions() == resolutions + assert (f.resolutions() == resolutions).all() assert f.attributes()["format"] == "HIC" assert f.attributes()["format-version"] == 9 - assert f.attributes()["resolutions"] == resolutions + assert (f.attributes()["resolutions"] == resolutions).all() else: resolutions = [100_000, 1_000_000] - assert f.resolutions() == resolutions + assert (f.resolutions() == resolutions).all() assert f.attributes()["format"] == "HDF5::MCOOL" assert f.attributes()["format-version"] == 2 - assert f.attributes()["resolutions"] == resolutions + assert (f.attributes()["resolutions"] == resolutions).all() assert f[100_000].resolution() == 100_000