Skip to content

Commit

Permalink
Merge pull request #118 from paulsengroup/perf/avoid-unnecessary-copies
Browse files Browse the repository at this point in the history
Annotate functions with nb::rv_policy to avoid unnecessary copies
  • Loading branch information
robomics authored Nov 10, 2024
2 parents d1bfb66 + 21bfb2e commit 6cc6b29
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 49 deletions.
29 changes: 18 additions & 11 deletions src/bin_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -462,50 +462,55 @@ void BinTable::bind(nb::module_& m) {
"\"end\"].",
nb::sig("def __init__(self, bins: pandas.DataFrame) -> None"));

bt.def("__repr__", &BinTable::repr);
bt.def("__repr__", &BinTable::repr, nb::rv_policy::move);

bt.def("chromosomes", &get_chromosomes_from_object<hictkpy::BinTable>,
nb::arg("include_ALL") = false,
"Get chromosomes sizes as a dictionary mapping names to sizes.");
"Get chromosomes sizes as a dictionary mapping names to sizes.",
nb::rv_policy::take_ownership);

bt.def("resolution", &BinTable::resolution,
"Get the bin size for the bin table. "
"Return 0 in case the bin table has a variable bin size.");

bt.def("type", &BinTable::type,
"Get the type of table underlying the BinTable object (i.e. fixed or variable).");
"Get the type of table underlying the BinTable object (i.e. fixed or variable).",
nb::rv_policy::move);

bt.def("__len__", &BinTable::size, "Get the number of bins in the bin table.");

bt.def("__iter__", &BinTable::make_iterable, nb::keep_alive<0, 1>(),
nb::sig("def __iter__(self) -> hictkpy.BinTableIterator"),
"Return an iterator over the bins in the table.");
"Return an iterator over the bins in the table.", nb::rv_policy::take_ownership);

bt.def("get", &BinTable::bin_id_to_coord, nb::arg("bin_id"),
"Get the genomic coordinate given a bin ID.",
nb::sig("def get(self, bin_id: int) -> hictkpy.Bin"));
nb::sig("def get(self, bin_id: int) -> hictkpy.Bin"), nb::rv_policy::move);
bt.def("get", &BinTable::bin_ids_to_coords, nb::arg("bin_ids"),
"Get the genomic coordinates given a sequence of bin IDs. "
"Genomic coordinates are returned as a pandas.DataFrame with columns [\"chrom\", "
"\"start\", \"end\"].",
nb::sig("def get(self, bin_ids: collections.abc.Sequence[int]) -> pandas.DataFrame"));
nb::sig("def get(self, bin_ids: collections.abc.Sequence[int]) -> pandas.DataFrame"),
nb::rv_policy::take_ownership);

bt.def("get", &BinTable::coord_to_bin, nb::arg("chrom"), nb::arg("pos"),
"Get the bin overlapping the given genomic coordinate.",
nb::sig("def get(self, chrom: str, pos: int) -> hictkpy.Bin"));
nb::sig("def get(self, chrom: str, pos: int) -> hictkpy.Bin"), nb::rv_policy::move);
bt.def("get", &BinTable::coords_to_bins, nb::arg("chroms"), nb::arg("pos"),
"Get the bins overlapping the given genomic coordinates. "
"Bins are returned as a pandas.DataFrame with columns [\"chrom\", "
"\"start\", \"end\"].",
nb::sig("def get(self, chroms: collections.abc.Sequence[str], pos: "
"collections.abc.Sequence[int]) -> pandas.DataFrame"));
"collections.abc.Sequence[int]) -> pandas.DataFrame"),
nb::rv_policy::take_ownership);

bt.def("get_id", &BinTable::coord_to_bin_id, nb::arg("chrom"), nb::arg("pos"),
"Get the ID of the bin overlapping the given genomic coordinate.");
bt.def("get_ids", &BinTable::coords_to_bin_ids, nb::arg("chroms"), nb::arg("pos"),
"Get the IDs of the bins overlapping the given genomic coordinates.",
nb::sig("def get_ids(self, chroms: collections.abc.Sequence[str], pos: "
"collections.abc.Sequence[int]) -> numpy.ndarray[dtype=int64]"));
"collections.abc.Sequence[int]) -> numpy.ndarray[dtype=int64]"),
nb::rv_policy::take_ownership);

bt.def("merge", &BinTable::merge_coords, nb::arg("df"),
"Merge genomic coordinates corresponding to the given bin identifiers. "
Expand All @@ -514,13 +519,15 @@ void BinTable::bind(nb::module_& m) {
"Genomic coordinates are returned as a pandas.DataFrame containing the same data as the "
"DataFrame given as input, plus columns [\"chrom1\", \"start1\", \"end1\", \"chrom2\", "
"\"start2\", \"end2\"].",
nb::sig("def merge(self, df: pandas.DataFrame) -> pandas.DataFrame"));
nb::sig("def merge(self, df: pandas.DataFrame) -> pandas.DataFrame"),
nb::rv_policy::take_ownership);

bt.def("to_df", &BinTable::to_df, nb::arg("range") = nb::none(), nb::arg("query_type") = "UCSC",
"Return the bins in the BinTable as a pandas.DataFrame. The optional \"range\" parameter "
"can be used to only fetch a subset of the bins in the BinTable.",
nb::sig("def to_df(self, range: str | None = None, query_type: str = 'UCSC') -> "
"pandas.DataFrame"));
"pandas.DataFrame"),
nb::rv_policy::take_ownership);
}

} // namespace hictkpy
7 changes: 4 additions & 3 deletions src/cooler_file_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,13 +185,14 @@ void CoolerFileWriter::bind(nb::module_ &m) {
"Open a .cool file for writing given a table of bins.");
// NOLINTEND(*-avoid-magic-numbers)

writer.def("__repr__", &hictkpy::CoolerFileWriter::repr);
writer.def("__repr__", &hictkpy::CoolerFileWriter::repr, nb::rv_policy::move);

writer.def("path", &hictkpy::CoolerFileWriter::path, "Get the file path.");
writer.def("path", &hictkpy::CoolerFileWriter::path, "Get the file path.", nb::rv_policy::copy);
writer.def("resolution", &hictkpy::CoolerFileWriter::resolution, "Get the resolution in bp.");
writer.def("chromosomes", &get_chromosomes_from_object<hictkpy::CoolerFileWriter>,
nb::arg("include_ALL") = false,
"Get chromosomes sizes as a dictionary mapping names to sizes.");
"Get chromosomes sizes as a dictionary mapping names to sizes.",
nb::rv_policy::take_ownership);

writer.def("add_pixels", &hictkpy::CoolerFileWriter::add_pixels,
nb::sig("def add_pixels(self, pixels: pandas.DataFrame)"), nb::arg("pixels"),
Expand Down
18 changes: 10 additions & 8 deletions src/file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,32 +285,34 @@ void declare_file_class(nb::module_ &m) {
"resolution.\n"
"Resolution is ignored when opening single-resolution Cooler files.");

file.def("__repr__", &file::repr);
file.def("__repr__", &file::repr, nb::rv_policy::move);

file.def("uri", &hictk::File::uri, "Return the file URI.");
file.def("path", &file::get_path, "Return the file path.");
file.def("uri", &hictk::File::uri, "Return the file URI.", nb::rv_policy::move);
file.def("path", &file::get_path, "Return the file path.", nb::rv_policy::move);

file.def("is_hic", &hictk::File::is_hic, "Test whether file is in .hic format.");
file.def("is_cooler", &hictk::File::is_cooler, "Test whether file is in .cool format.");

file.def("chromosomes", &get_chromosomes_from_object<hictk::File>, nb::arg("include_ALL") = false,
"Get chromosomes sizes as a dictionary mapping names to sizes.");
"Get chromosomes sizes as a dictionary mapping names to sizes.",
nb::rv_policy::take_ownership);
file.def("bins", &get_bins_from_object<hictk::File>, "Get table of bins.",
nb::sig("def bins(self) -> hictkpy.BinTable"));
nb::sig("def bins(self) -> hictkpy.BinTable"), nb::rv_policy::move);

file.def("resolution", &hictk::File::resolution, "Get the bin size in bp.");
file.def("nbins", &hictk::File::nbins, "Get the total number of bins.");
file.def("nchroms", &hictk::File::nchroms, "Get the total number of chromosomes.");

file.def("attributes", &file::attributes, "Get file attributes as a dictionary.");
file.def("attributes", &file::attributes, "Get file attributes as a dictionary.",
nb::rv_policy::take_ownership);

file.def("fetch", &file::fetch, nb::keep_alive<0, 1>(), nb::arg("range1") = "",
nb::arg("range2") = "", nb::arg("normalization") = "NONE", nb::arg("count_type") = "int",
nb::arg("join") = false, nb::arg("query_type") = "UCSC",
"Fetch interactions overlapping a region of interest.");
"Fetch interactions overlapping a region of interest.", nb::rv_policy::move);

file.def("avail_normalizations", &file::avail_normalizations,
"Get the list of available normalizations.");
"Get the list of available normalizations.", nb::rv_policy::move);
file.def("has_normalization", &hictk::File::has_normalization, nb::arg("normalization"),
"Check whether a given normalization is available.");
file.def("weights", &file::weights, nb::arg("name"), nb::arg("divisive") = true,
Expand Down
9 changes: 5 additions & 4 deletions src/hic_file_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,14 +163,15 @@ void HiCFileWriter::bind(nb::module_ &m) {
"supported.");
// NOLINTEND(*-avoid-magic-numbers)

writer.def("__repr__", &hictkpy::HiCFileWriter::repr);
writer.def("__repr__", &hictkpy::HiCFileWriter::repr, nb::rv_policy::move);

writer.def("path", &hictkpy::HiCFileWriter::path, "Get the file path.");
writer.def("path", &hictkpy::HiCFileWriter::path, "Get the file path.", nb::rv_policy::move);
writer.def("resolutions", &hictkpy::HiCFileWriter::resolutions,
"Get the list of resolutions in bp.");
"Get the list of resolutions in bp.", nb::rv_policy::move);
writer.def("chromosomes", &get_chromosomes_from_object<hictkpy::HiCFileWriter>,
nb::arg("include_ALL") = false,
"Get chromosomes sizes as a dictionary mapping names to sizes.");
"Get chromosomes sizes as a dictionary mapping names to sizes.",
nb::rv_policy::take_ownership);

writer.def("add_pixels", &hictkpy::HiCFileWriter::add_pixels,
nb::sig("def add_pixels(self, pixels: pd.DataFrame) -> None"), nb::arg("pixels"),
Expand Down
12 changes: 7 additions & 5 deletions src/multires_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,18 @@ void declare_multires_file_class(nb::module_& m) {
mres_file.def("__init__", &multires_file::ctor, nb::arg("path"),
"Open a multi-resolution Cooler file (.mcool) or .hic file.");

mres_file.def("__repr__", &multires_file::repr);
mres_file.def("__repr__", &multires_file::repr, nb::rv_policy::move);

mres_file.def("path", &multires_file::get_path, "Get the file path.");
mres_file.def("path", &multires_file::get_path, "Get the file path.", nb::rv_policy::move);
mres_file.def("chromosomes", &get_chromosomes_from_object<hictk::MultiResFile>,
nb::arg("include_ALL") = false,
"Get chromosomes sizes as a dictionary mapping names to sizes.");
"Get chromosomes sizes as a dictionary mapping names to sizes.",
nb::rv_policy::take_ownership);
mres_file.def("resolutions", &hictk::MultiResFile::resolutions,
"Get the list of available resolutions.");
"Get the list of available resolutions.", nb::rv_policy::copy);
mres_file.def("__getitem__", &hictk::MultiResFile::open,
"Open the Cooler or .hic file corresponding to the resolution given as input.");
"Open the Cooler or .hic file corresponding to the resolution given as input.",
nb::rv_policy::move);
}

} // namespace hictkpy::multires_file
22 changes: 12 additions & 10 deletions src/pixel_selector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,35 +383,37 @@ void PixelSelector::bind(nb::module_& m) {
sel.def(nb::init<std::shared_ptr<const hictk::hic::PixelSelectorAll>, std::string_view, bool>(),
nb::arg("selector"), nb::arg("type"), nb::arg("join"));

sel.def("__repr__", &PixelSelector::repr);
sel.def("__repr__", &PixelSelector::repr, nb::rv_policy::move);

sel.def("coord1", &PixelSelector::get_coord1, "Get query coordinates for the first dimension.");
sel.def("coord2", &PixelSelector::get_coord2, "Get query coordinates for the second dimension.");
sel.def("coord1", &PixelSelector::get_coord1, "Get query coordinates for the first dimension.",
nb::rv_policy::move);
sel.def("coord2", &PixelSelector::get_coord2, "Get query coordinates for the second dimension.",
nb::rv_policy::move);

sel.def("__iter__", &PixelSelector::make_iterable, nb::keep_alive<0, 1>(),
nb::sig("def __iter__(self) -> hictkpy.PixelIterator"),
"Return an iterator over the selected pixels.");
"Return an iterator over the selected pixels.", nb::rv_policy::take_ownership);

sel.def("to_arrow", &PixelSelector::to_arrow, nb::arg("query_span") = "upper_triangle",
nb::sig("def to_arrow(self, query_span: str = \"upper_triangle\") -> pyarrow.Table"),
"Retrieve interactions as a pyarrow.Table.");
"Retrieve interactions as a pyarrow.Table.", nb::rv_policy::take_ownership);
sel.def("to_pandas", &PixelSelector::to_pandas, nb::arg("query_span") = "upper_triangle",
nb::sig("def to_pandas(self, query_span: str = \"upper_triangle\") -> pandas.DataFrame"),
"Retrieve interactions as a pandas DataFrame.");
"Retrieve interactions as a pandas DataFrame.", nb::rv_policy::take_ownership);
sel.def("to_df", &PixelSelector::to_df, nb::arg("query_span") = "upper_triangle",
nb::sig("def to_df(self, query_span: str = \"upper_triangle\") -> pandas.DataFrame"),
"Alias to to_pandas().");
"Alias to to_pandas().", nb::rv_policy::take_ownership);
sel.def("to_numpy", &PixelSelector::to_numpy, nb::arg("query_span") = "full",
nb::sig("def to_numpy(self, query_span: str = \"full\") -> numpy.ndarray"),
"Retrieve interactions as a numpy 2D matrix.");
"Retrieve interactions as a numpy 2D matrix.", nb::rv_policy::move);
sel.def(
"to_coo", &PixelSelector::to_coo, nb::arg("query_span") = "upper_triangle",
nb::sig("def to_coo(self, query_span: str = \"upper_triangle\") -> scipy.sparse.coo_matrix"),
"Retrieve interactions as a SciPy COO matrix.");
"Retrieve interactions as a SciPy COO matrix.", nb::rv_policy::take_ownership);
sel.def(
"to_csr", &PixelSelector::to_csr, nb::arg("query_span") = "upper_triangle",
nb::sig("def to_csr(self, query_span: str = \"upper_triangle\") -> scipy.sparse.csr_matrix"),
"Retrieve interactions as a SciPy CSR matrix.");
"Retrieve interactions as a SciPy CSR matrix.", nb::rv_policy::move);

sel.def("nnz", &PixelSelector::nnz,
"Get the number of non-zero entries for the current pixel selection.");
Expand Down
19 changes: 12 additions & 7 deletions src/singlecell_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,20 +106,25 @@ void declare_singlecell_file_class(nb::module_& m) {
scell_file.def("__init__", &singlecell_file::ctor, nb::arg("path"),
"Open a single-cell Cooler file (.scool).");

scell_file.def("__repr__", &singlecell_file::repr);
scell_file.def("__repr__", &singlecell_file::repr, nb::rv_policy::move);

scell_file.def("path", &singlecell_file::get_path, "Get the file path.");
scell_file.def("path", &singlecell_file::get_path, "Get the file path.", nb::rv_policy::move);
scell_file.def("resolution", &hictk::cooler::SingleCellFile::resolution,
"Get the bin size in bp.");
scell_file.def("chromosomes", &get_chromosomes_from_object<hictk::cooler::SingleCellFile>,
nb::arg("include_ALL") = false,
"Get chromosomes sizes as a dictionary mapping names to sizes.");
"Get chromosomes sizes as a dictionary mapping names to sizes.",
nb::rv_policy::take_ownership);
scell_file.def("bins", &get_bins_from_object<hictk::cooler::SingleCellFile>,
nb::sig("def bins(self) -> hictkpy.BinTable"), "Get table of bins.");
scell_file.def("attributes", &singlecell_file::get_attrs, "Get file attributes as a dictionary.");
scell_file.def("cells", &singlecell_file::get_cells, "Get the list of available cells.");
nb::sig("def bins(self) -> hictkpy.BinTable"), "Get table of bins.",
nb::rv_policy::move);
scell_file.def("attributes", &singlecell_file::get_attrs, "Get file attributes as a dictionary.",
nb::rv_policy::take_ownership);
scell_file.def("cells", &singlecell_file::get_cells, "Get the list of available cells.",
nb::rv_policy::move);
scell_file.def("__getitem__", &singlecell_file::getitem,
"Open the Cooler file corresponding to the cell ID given as input.");
"Open the Cooler file corresponding to the cell ID given as input.",
nb::rv_policy::move);
}

} // namespace hictkpy::singlecell_file
3 changes: 2 additions & 1 deletion src/to_pyarrow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ nb::object export_pyarrow_table(std::shared_ptr<arrow::Table> arrow_table) {
columns_py[i] = pa.attr("chunked_array")(export_arrow_array_stream(std::move(columns[i])));
}

return pa.attr("Table").attr("from_arrays")(columns_py, nb::arg("schema") = schema);
return nb::cast(pa.attr("Table").attr("from_arrays")(columns_py, nb::arg("schema") = schema),
nb::rv_policy::take_ownership);
}
// NOLINTEND(*-owning-memory,*-no-malloc)

Expand Down

0 comments on commit 6cc6b29

Please sign in to comment.