Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix: query mirroring #43

Merged
merged 5 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/hictkpy_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range1, std:
: hictk::GenomicInterval::parse_bed(f.chromosomes(), range2);
bool mirror = false;

if (gi1 > gi2) {
if (gi1 > gi2 || (gi1.chrom() == gi2.chrom() && gi1.start() > gi2.start())) {
mirror = true;
std::swap(gi1, gi2);
}
Expand Down
11 changes: 6 additions & 5 deletions src/hictkpy_pixel_selector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,18 +202,19 @@ nb::object PixelSelector::to_numpy() const {

const auto mirror_matrix = coord1().bin1.chrom() == coord2().bin1.chrom();

const auto row_offset = static_cast<std::int64_t>(coord1().bin1.id());
const auto col_offset = static_cast<std::int64_t>(coord2().bin1.id());

return std::visit(
[&, num_rows, num_cols, mirror_matrix](const auto& s) {
[&, num_rows, num_cols, mirror_matrix, row_offset, col_offset](const auto& s) {
if (int_pixels()) {
using T = std::int32_t;
return pixel_iterators_to_numpy(s->template begin<T>(), s->template end<T>(), num_rows,
num_cols, mirror_matrix, coord1().bin1.id(),
coord2().bin1.id());
num_cols, mirror_matrix, row_offset, col_offset);
} else {
using T = double;
return pixel_iterators_to_numpy(s->template begin<T>(), s->template end<T>(), num_rows,
num_cols, mirror_matrix, coord1().bin1.id(),
coord2().bin1.id());
num_cols, mirror_matrix, row_offset, col_offset);
}
},
selector);
Expand Down
18 changes: 9 additions & 9 deletions src/include/hictkpy/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,40 +292,40 @@ inline nb::object pixel_iterators_to_coo_df(PixelIt first_pixel, PixelIt last_pi

template <typename PixelIt>
inline nb::object pixel_iterators_to_numpy(PixelIt first_pixel, PixelIt last_pixel,
std::size_t num_rows, std::size_t num_cols,
std::int64_t num_rows, std::int64_t num_cols,
bool mirror_below_diagonal = true,
std::size_t row_offset = 0, std::size_t col_offset = 0) {
using N = decltype(first_pixel->count);

const auto np = nb::module_::import_("numpy");
const auto dtype = np.attr("dtype")(map_type_to_dtype<N>());
auto buffer = np.attr("zeros")(std::vector<std::size_t>{num_rows, num_cols}, "dtype"_a = dtype);
auto buffer = np.attr("zeros")(std::vector<std::int64_t>{num_rows, num_cols}, "dtype"_a = dtype);

using Shape = nb::shape<nb::any, nb::any>;
using MatrixT = nb::ndarray<nb::numpy, N, Shape>;

auto matrix = nb::cast<MatrixT>(buffer);
auto m = matrix.view();

DISABLE_WARNING_PUSH
DISABLE_WARNING_SIGN_COMPARE
std::for_each(first_pixel, last_pixel, [&](const hictk::ThinPixel<N> &tp) {
const auto i1 = static_cast<std::int64_t>(tp.bin1_id - row_offset);
const auto i2 = static_cast<std::int64_t>(tp.bin2_id - col_offset);
m(i1, i2) = tp.count;

if (mirror_below_diagonal) {
// Mirror matrix below diagonal
if (i2 - i1 < num_rows && i1 < num_cols && i2 < num_rows) {
const auto delta = i2 - i1;
if (delta >= 0 && delta < num_rows && i1 < num_cols && i2 < num_rows) {
m(i2, i1) = tp.count;
} else if (i2 - i1 > num_cols && i1 < num_cols && i2 < num_rows) {
} else if ((delta < 0 || delta > num_cols) && i1 < num_cols && i2 < num_rows) {
const auto i3 = static_cast<std::int64_t>(tp.bin2_id - row_offset);
const auto i4 = static_cast<std::int64_t>(tp.bin1_id - col_offset);
m(i3, i4) = tp.count;

if (i3 >= 0 && i3 < num_rows && i4 >= 0 && i4 < num_cols) {
m(i3, i4) = tp.count;
}
}
}
});
DISABLE_WARNING_POP
return nb::cast(matrix);
}

Expand Down
12 changes: 12 additions & 0 deletions test/test_fetch_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,18 @@ def test_cis(self, file, resolution):
m = f.fetch("chr2R\t10000000\t15000000", query_type="BED").to_numpy()
assert m.shape == (50, 50)

m = f.fetch("chr2L:0-10,000,000", "chr2L:5,000,000-20,000,000").to_numpy()
assert m.shape == (100, 150)
assert m.sum() == 6_287_451

m = f.fetch("chr2L:0-10,000,000", "chr2L:10,000,000-20,000,000").to_numpy()
assert m.shape == (100, 100)
assert m.sum() == 761_223

m = f.fetch("chr2L:0-10,000,000", "chr2L:0-15,000,000").to_numpy()
assert m.shape == (100, 150)
assert m.sum() == 12_607_205

def test_trans(self, file, resolution):
f = hictkpy.File(file, resolution)
m = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000").to_numpy()
Expand Down
8 changes: 8 additions & 0 deletions test/test_fetch_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ def test_cis(self, file, resolution):
df = f.fetch("chr2R\t10000000\t15000000", query_type="BED").to_df()
assert len(df) == 1275

df = f.fetch("chr2L:0-10,000,000", "chr2L:10,000,000-20,000,000").to_df()
assert df["count"].sum() == 761_223
assert len(df) == 9_999

df = f.fetch("chr2L:0-10,000,000", "chr2L:0-15,000,000").to_df()
assert df["count"].sum() == 9_270_385
assert len(df) == 10_050

def test_trans(self, file, resolution):
f = hictkpy.File(file, resolution)

Expand Down
8 changes: 8 additions & 0 deletions test/test_fetch_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ def test_cis(self, file, resolution):
m = f.fetch("chr2R\t10000000\t15000000", query_type="BED").to_coo()
assert m.shape == (50, 50)

m = f.fetch("chr2L:0-10,000,000", "chr2L:10,000,000-20,000,000").to_coo()
assert m.shape == (100, 100)
assert m.sum() == 761_223

m = f.fetch("chr2L:0-10,000,000", "chr2L:0-15,000,000").to_coo()
assert m.shape == (100, 150)
assert m.sum() == 9_270_385

def test_trans(self, file, resolution):
f = hictkpy.File(file, resolution)

Expand Down