diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2b1fe8aa0..9b2ae0ce0 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -379,18 +379,16 @@ add_library( src/neighbors/detail/refine_host_int8_t_float.cpp src/neighbors/detail/refine_host_uint8_t_float.cpp src/neighbors/ivf_flat_index.cpp - src/neighbors/ivf_flat/ivf_flat_build_float_int64_t.cpp - src/neighbors/ivf_flat/ivf_flat_build_int8_t_int64_t.cpp - src/neighbors/ivf_flat/ivf_flat_build_uint8_t_int64_t.cpp - src/neighbors/ivf_flat/ivf_flat_extend_float_int64_t.cpp - src/neighbors/ivf_flat/ivf_flat_extend_int8_t_int64_t.cpp - src/neighbors/ivf_flat/ivf_flat_extend_uint8_t_int64_t.cpp - src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cpp - src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cpp - src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cpp - src/neighbors/ivf_flat/ivf_flat_serialize_float_int64_t.cpp - src/neighbors/ivf_flat/ivf_flat_serialize_int8_t_int64_t.cpp - src/neighbors/ivf_flat/ivf_flat_serialize_uint8_t_int64_t.cpp + src/neighbors/ivf_flat/ivf_flat_build_extend_float_int64_t.cu + src/neighbors/ivf_flat/ivf_flat_build_extend_int8_t_int64_t.cu + src/neighbors/ivf_flat/ivf_flat_build_extend_uint8_t_int64_t.cu + src/neighbors/ivf_flat/ivf_flat_helpers.cu + src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu + src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu + src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu + src/neighbors/ivf_flat/ivf_flat_serialize_float_int64_t.cu + src/neighbors/ivf_flat/ivf_flat_serialize_int8_t_int64_t.cu + src/neighbors/ivf_flat/ivf_flat_serialize_uint8_t_int64_t.cu src/neighbors/ivf_pq_index.cpp src/neighbors/ivf_pq/ivf_pq_build_common.cu src/neighbors/ivf_pq/ivf_pq_serialize.cu diff --git a/cpp/include/cuvs/neighbors/ivf_flat.hpp b/cpp/include/cuvs/neighbors/ivf_flat.hpp index efb32e024..33daedc78 100644 --- a/cpp/include/cuvs/neighbors/ivf_flat.hpp +++ b/cpp/include/cuvs/neighbors/ivf_flat.hpp @@ -17,13 +17,19 @@ #pragma once #include "ann_types.hpp" -#include +#include "ivf_list.hpp" +#include +#include namespace cuvs::neighbors::ivf_flat { /** * @defgroup ivf_flat_cpp_index_params IVF-Flat index build parameters * @{ */ + +/** Size of the interleaved group (see `index::data` description). */ +constexpr static uint32_t kIndexGroupSize = 32; + struct index_params : ann::index_params { /** The number of inverted lists (clusters) */ uint32_t n_lists = 1024; @@ -54,22 +60,6 @@ struct index_params : ann::index_params { * flag to `true` if you prefer to use as little GPU memory for the database as possible. */ bool conservative_memory_allocation = false; - - /** Build a raft IVF_FLAT index params from an existing cuvs IVF_FLAT index params. */ - operator raft::neighbors::ivf_flat::index_params() const - { - return raft::neighbors::ivf_flat::index_params{ - { - .metric = static_cast((int)this->metric), - .metric_arg = this->metric_arg, - .add_data_on_build = this->add_data_on_build, - }, - .n_lists = n_lists, - .kmeans_n_iters = kmeans_n_iters, - .kmeans_trainset_fraction = kmeans_trainset_fraction, - .adaptive_centers = adaptive_centers, - .conservative_memory_allocation = conservative_memory_allocation}; - } }; /** * @} @@ -82,14 +72,45 @@ struct index_params : ann::index_params { struct search_params : ann::search_params { /** The number of clusters to search. */ uint32_t n_probes = 20; +}; + +static_assert(std::is_aggregate_v); +static_assert(std::is_aggregate_v); + +template +struct list_spec { + using value_type = ValueT; + using list_extents = raft::matrix_extent; + using index_type = IdxT; + + SizeT align_max; + SizeT align_min; + uint32_t dim; + + constexpr list_spec(uint32_t dim, bool conservative_memory_allocation) + : dim(dim), + align_min(kIndexGroupSize), + align_max(conservative_memory_allocation ? kIndexGroupSize : 1024) + { + } + + // Allow casting between different size-types (for safer size and offset calculations) + template + constexpr explicit list_spec(const list_spec& other_spec) + : dim{other_spec.dim}, align_min{other_spec.align_min}, align_max{other_spec.align_max} + { + } - /** Build a raft IVF_FLAT search params from an existing cuvs IVF_FLAT search params. */ - operator raft::neighbors::ivf_flat::search_params() const + /** Determine the extents of an array enough to hold a given amount of data. */ + constexpr auto make_list_extents(SizeT n_rows) const -> list_extents { - raft::neighbors::ivf_flat::search_params result = {{}, n_probes}; - return result; + return raft::make_extents(n_rows, dim); } }; + +template +using list_data = ivf::list; + /** * @} */ @@ -125,7 +146,6 @@ struct index : ann::index { bool adaptive_centers, bool conservative_memory_allocation, uint32_t dim); - index(raft::neighbors::ivf_flat::index&& raft_idx); /** * Vectorized load/store size in elements, determines the size of interleaved data chunks. @@ -181,6 +201,19 @@ struct index : ann::index { std::optional> center_norms() noexcept; std::optional> center_norms() const noexcept; + /** + * Accumulated list sizes, sorted in descending order [n_lists + 1]. + * The last value contains the total length of the index. + * The value at index zero is always zero. + * + * That is, the content of this span is as if the `list_sizes` was sorted and then accumulated. + * + * This span is used during search to estimate the maximum size of the workspace. + */ + auto accum_sorted_sizes() noexcept -> raft::host_vector_view; + [[nodiscard]] auto accum_sorted_sizes() const noexcept + -> raft::host_vector_view; + /** Total length of the index. */ IdxT size() const noexcept; @@ -202,23 +235,44 @@ struct index : ann::index { */ bool conservative_memory_allocation() const noexcept; + void allocate_center_norms(raft::resources const& res); + /** Lists' data and indices. */ - std::vector>>& lists() noexcept; - const std::vector>>& lists() - const noexcept; + std::vector>>& lists() noexcept; + const std::vector>>& lists() const noexcept; - // Get pointer to underlying RAFT index, not meant to be used outside of cuVS - inline raft::neighbors::ivf_flat::index* get_raft_index() noexcept - { - return raft_index_.get(); - } - inline const raft::neighbors::ivf_flat::index* get_raft_index() const noexcept - { - return raft_index_.get(); - } + void check_consistency(); private: - std::unique_ptr> raft_index_; + /** + * TODO: in theory, we can lift this to the template parameter and keep it at hardware maximum + * possible value by padding the `dim` of the data https://github.com/rapidsai/raft/issues/711 + */ + uint32_t veclen_; + cuvs::distance::DistanceType metric_; + bool adaptive_centers_; + bool conservative_memory_allocation_; + std::vector>> lists_; + raft::device_vector list_sizes_; + raft::device_matrix centers_; + std::optional> center_norms_; + + // Computed members + raft::device_vector data_ptrs_; + raft::device_vector inds_ptrs_; + raft::host_vector accum_sorted_sizes_; + + static auto calculate_veclen(uint32_t dim) -> uint32_t + { + // TODO: consider padding the dimensions and fixing veclen to its maximum possible value as a + // template parameter (https://github.com/rapidsai/raft/issues/711) + + // NOTE: keep this consistent with the select_interleaved_scan_kernel logic + // in detail/ivf_flat_interleaved_scan-inl.cuh. + uint32_t veclen = std::max(1, 16 / sizeof(T)); + if (dim % veclen != 0) { veclen = 1; } + return veclen; + } }; /** * @} @@ -368,6 +422,147 @@ void build(raft::resources const& handle, const cuvs::neighbors::ivf_flat::index_params& index_params, raft::device_matrix_view dataset, cuvs::neighbors::ivf_flat::index& idx); + +/** + * @brief Build the index from the dataset for efficient search. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_flat::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_flat::build(handle, dataset, index_params); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] dataset raft::host_matrix_view to a row-major matrix [n_rows, dim] + * + * @return the constructed ivf-flat index + */ +auto build(raft::resources const& handle, + const cuvs::neighbors::ivf_flat::index_params& index_params, + raft::host_matrix_view dataset) + -> cuvs::neighbors::ivf_flat::index; + +/** + * @brief Build the index from the dataset for efficient search. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_flat::index_params index_params; + * // create and fill the index from a [N, D] dataset + * ivf_flat::index index; + * ivf_flat::build(handle, dataset, index_params, index); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] dataset raft::host_matrix_view to a row-major matrix [n_rows, dim] + * @param[out] idx reference to ivf_flat::index + * + */ +void build(raft::resources const& handle, + const cuvs::neighbors::ivf_flat::index_params& index_params, + raft::host_matrix_view dataset, + cuvs::neighbors::ivf_flat::index& idx); + +/** + * @brief Build the index from the dataset for efficient search. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_flat::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_flat::build(handle, dataset, index_params); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] dataset a host pointer to a row-major matrix [n_rows, dim] + * + * @return the constructed ivf-flat index + */ +auto build(raft::resources const& handle, + const cuvs::neighbors::ivf_flat::index_params& index_params, + raft::host_matrix_view dataset) + -> cuvs::neighbors::ivf_flat::index; + +/** + * @brief Build the index from the dataset for efficient search. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_flat::index_params index_params; + * // create and fill the index from a [N, D] dataset + * ivf_flat::index index; + * ivf_flat::build(handle, dataset, index_params, index); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] dataset raft::host_matrix_view to a row-major matrix [n_rows, dim] + * @param[out] idx reference to ivf_flat::index + * + */ +void build(raft::resources const& handle, + const cuvs::neighbors::ivf_flat::index_params& index_params, + raft::host_matrix_view dataset, + cuvs::neighbors::ivf_flat::index& idx); + +/** + * @brief Build the index from the dataset for efficient search. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_flat::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_flat::build(handle, dataset, index_params); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] dataset a host pointer to a row-major matrix [n_rows, dim] + * + * @return the constructed ivf-flat index + */ +auto build(raft::resources const& handle, + const cuvs::neighbors::ivf_flat::index_params& index_params, + raft::host_matrix_view dataset) + -> cuvs::neighbors::ivf_flat::index; + +/** + * @brief Build the index from the dataset for efficient search. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_flat::index_params index_params; + * // create and fill the index from a [N, D] dataset + * ivf_flat::index index; + * ivf_flat::build(handle, dataset, index_params, index); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] dataset raft::host_matrix_view to a row-major matrix [n_rows, dim] + * @param[out] idx reference to ivf_flat::index + * + */ +void build(raft::resources const& handle, + const cuvs::neighbors::ivf_flat::index_params& index_params, + raft::host_matrix_view dataset, + cuvs::neighbors::ivf_flat::index& idx); /** * @} */ @@ -568,6 +763,198 @@ void extend(raft::resources const& handle, raft::device_matrix_view new_vectors, std::optional> new_indices, cuvs::neighbors::ivf_flat::index* idx); + +/** + * @brief Build a new index containing the data of the original plus new extra vectors. + * + * Implementation note: + * The new data is clustered according to existing kmeans clusters, then the cluster + * centers are adjusted to match the newly labeled data. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_flat::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_flat::build(handle, index_params, dataset); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * auto index = ivf_flat::extend(handle, new_vectors, no_op, index_empty); + * @endcode + * + * @param[in] handle + * @param[in] new_vectors raft::host_matrix_view to a row-major matrix [n_rows, index.dim()] + * @param[in] new_indices optional raft::host_vector_view to a vector of indices [n_rows]. + * If the original index is empty (`orig_index.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[in] idx original index + * + * @return the constructed extended ivf-flat index + */ +auto extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_flat::index& idx) + -> cuvs::neighbors::ivf_flat::index; + +/** + * @brief Extend the index in-place with the new data. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_flat::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_flat::build(handle, index_params, dataset); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * ivf_flat::extend(handle, dataset, no_opt, &index_empty); + * @endcode + * + * + * @param[in] handle + * @param[in] new_vectors raft::host_matrix_view to a row-major matrix [n_rows, index.dim()] + * @param[in] new_indices optional raft::host_vector_view to a vector of indices [n_rows]. + * If the original index is empty (`orig_index.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[inout] idx pointer to index, to be overwritten in-place + */ +void extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + cuvs::neighbors::ivf_flat::index* idx); + +/** + * @brief Build a new index containing the data of the original plus new extra vectors. + * + * Implementation note: + * The new data is clustered according to existing kmeans clusters, then the cluster + * centers are adjusted to match the newly labeled data. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_flat::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_flat::build(handle, dataset, index_params, dataset); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * auto index = ivf_flat::extend(handle, new_vectors, no_op, index_empty); + * @endcode + * + * @param[in] handle + * @param[in] new_vectors raft::host_matrix_view to a row-major matrix [n_rows, index.dim()] + * @param[in] new_indices optional raft::host_vector_view to a vector of indices [n_rows]. + * If the original index is empty (`orig_index.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[in] idx original index + * + * @return the constructed extended ivf-flat index + */ +auto extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_flat::index& idx) + -> cuvs::neighbors::ivf_flat::index; + +/** + * @brief Extend the index in-place with the new data. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_flat::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_flat::build(handle, index_params, dataset); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * ivf_flat::extend(handle, dataset, no_opt, &index_empty); + * @endcode + * + * + * @param[in] handle + * @param[in] new_vectors raft::host_matrix_view to a row-major matrix [n_rows, index.dim()] + * @param[in] new_indices optional raft::host_vector_view to a vector of indices [n_rows]. + * If the original index is empty (`orig_index.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[inout] idx pointer to index, to be overwritten in-place + */ +void extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + cuvs::neighbors::ivf_flat::index* idx); + +/** + * @brief Build a new index containing the data of the original plus new extra vectors. + * + * Implementation note: + * The new data is clustered according to existing kmeans clusters, then the cluster + * centers are adjusted to match the newly labeled data. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_flat::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_flat::build(handle, dataset, index_params, dataset); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * auto index = ivf_flat::extend(handle, new_vectors, no_op, index_empty); + * @endcode + * + * @param[in] handle + * @param[in] new_vectors raft::host_matrix_view to a row-major matrix [n_rows, index.dim()] + * @param[in] new_indices optional raft::host_vector_view to a vector of indices [n_rows]. + * If the original index is empty (`orig_index.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[in] idx original index + * + * @return the constructed extended ivf-flat index + */ +auto extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_flat::index& idx) + -> cuvs::neighbors::ivf_flat::index; + +/** + * @brief Extend the index in-place with the new data. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_flat::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_flat::build(handle, index_params, dataset); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * ivf_flat::extend(handle, dataset, no_opt, &index_empty); + * @endcode + * + * + * @param[in] handle + * @param[in] new_vectors raft::host_matrix_view to a row-major matrix [n_rows, index.dim()] + * @param[in] new_indices optional raft::host_vector_view to a vector of indices [n_rows]. + * If the original index is empty (`orig_index.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[inout] idx pointer to index, to be overwritten in-place + */ +void extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + cuvs::neighbors::ivf_flat::index* idx); /** * @} */ @@ -687,6 +1074,105 @@ void search(raft::resources const& handle, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances); + +/** + * @brief Search ANN using the constructed index with the given filter. + * + * See the [ivf_flat::build](#ivf_flat::build) documentation for a usage example. + * + * Note, this function requires a temporary buffer to store intermediate results between cuda kernel + * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can + * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or + * eliminate entirely allocations happening within `search`. + * The exact size of the temporary buffer depends on multiple factors and is an implementation + * detail. However, you can safely specify a small initial size for the memory pool, so that only a + * few allocations happen to grow it during the first invocations of the `search`. + * + * @param[in] handle + * @param[in] params configure the search + * @param[in] idx ivf-flat constructed index + * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] + * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset + * [n_queries, k] + * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, + * k] + * @param[in] sample_filter a device bitset filter function that greenlights samples for a given + * query. + */ +void search_with_filtering( + raft::resources const& handle, + const search_params& params, + index& idx, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + cuvs::neighbors::filtering::bitset_filter sample_filter); + +/** + * @brief Search ANN using the constructed index with the given filter. + * + * See the [ivf_flat::build](#ivf_flat::build) documentation for a usage example. + * + * Note, this function requires a temporary buffer to store intermediate results between cuda kernel + * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can + * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or + * eliminate entirely allocations happening within `search`. + * The exact size of the temporary buffer depends on multiple factors and is an implementation + * detail. However, you can safely specify a small initial size for the memory pool, so that only a + * few allocations happen to grow it during the first invocations of the `search`. + * + * @param[in] handle + * @param[in] params configure the search + * @param[in] idx ivf-flat constructed index + * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] + * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset + * [n_queries, k] + * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, + * k] + * @param[in] sample_filter a device bitset filter function that greenlights samples for a given + * query. + */ +void search_with_filtering( + raft::resources const& handle, + const search_params& params, + index& idx, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + cuvs::neighbors::filtering::bitset_filter sample_filter); + +/** + * @brief Search ANN using the constructed index with the given filter. + * + * See the [ivf_flat::build](#ivf_flat::build) documentation for a usage example. + * + * Note, this function requires a temporary buffer to store intermediate results between cuda kernel + * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can + * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or + * eliminate entirely allocations happening within `search`. + * The exact size of the temporary buffer depends on multiple factors and is an implementation + * detail. However, you can safely specify a small initial size for the memory pool, so that only a + * few allocations happen to grow it during the first invocations of the `search`. + * + * @param[in] handle + * @param[in] params configure the search + * @param[in] idx ivf-flat constructed index + * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] + * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset + * [n_queries, k] + * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, + * k] + * @param[in] sample_filter a device bitset filter function that greenlights samples for a given + * query. + */ +void search_with_filtering( + raft::resources const& handle, + const search_params& params, + index& idx, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + cuvs::neighbors::filtering::bitset_filter sample_filter); /** * @} */ @@ -737,7 +1223,7 @@ void serialize_file(raft::resources const& handle, * std::string filename("/path/to/index"); * using T = float; // data element type * using IdxT = int64_t; // type of the index - * // create an empty index with `ivf_pq::index index(handle, index_params, dim);` + * // create an empty index with `ivf_flat::index index(handle, index_params, dim);` * cuvs::deserialize_file(handle, filename, &index); * @endcode * @@ -791,7 +1277,7 @@ void serialize(raft::resources const& handle, * std::string str; * using T = float; // data element type * using IdxT = int64_t; // type of the index - * // create an empty index with `ivf_pq::index index(handle, index_params, dim);` + * // create an empty index with `ivf_flat::index index(handle, index_params, dim);` * auto index = cuvs::deserialize(handle, str, &index); * @endcode * @@ -845,7 +1331,7 @@ void serialize_file(raft::resources const& handle, * std::string filename("/path/to/index"); * using T = float; // data element type * using IdxT = int64_t; // type of the index - * // create an empty index with `ivf_pq::index index(handle, index_params, dim);` + * // create an empty index with `ivf_flat::index index(handle, index_params, dim);` * cuvs::deserialize_file(handle, filename, &index); * @endcode * @@ -899,7 +1385,7 @@ void serialize(raft::resources const& handle, * std::string str; * using T = float; // data element type * using IdxT = int64_t; // type of the index - * // create an empty index with `ivf_pq::index index(handle, index_params, dim);` + * // create an empty index with `ivf_flat::index index(handle, index_params, dim);` * auto index = cuvs::deserialize(handle, str, &index); * @endcode * @@ -953,7 +1439,7 @@ void serialize_file(raft::resources const& handle, * std::string filename("/path/to/index"); * using T = float; // data element type * using IdxT = int64_t; // type of the index - * // create an empty index with `ivf_pq::index index(handle, index_params, dim);` + * // create an empty index with ivf_flat::index index(handle, index_params, dim);` * cuvs::deserialize_file(handle, filename, &index); * @endcode * @@ -1007,7 +1493,7 @@ void serialize(raft::resources const& handle, * std::string str; * using T = float; // data element type * using IdxT = int64_t; // type of the index - * // create an empty index with `ivf_pq::index index(handle, index_params, dim);` + * // create an empty index with `ivf_flat::index index(handle, index_params, dim);` * auto index = cuvs::deserialize(handle, str, &index); * @endcode * @@ -1023,4 +1509,360 @@ void deserialize(raft::resources const& handle, /** * @} */ + +namespace helpers { + +/** + * @defgroup ivf_flat_helpers Helper functions for IVF Flat + * @{ + */ + +namespace codepacker { + +/** + * Write flat codes into an existing list by the given offset. + * + * NB: no memory allocation happens here; the list must fit the data (offset + n_vec). + * + * Usage example: + * @code{.cpp} + * auto list_data = index.lists()[label]->data.view(); + * // allocate the buffer for the input codes + * auto codes = raft::make_device_matrix(res, n_vec, index.dim()); + * ... prepare n_vecs to pack into the list in codes ... + * // write codes into the list starting from the 42nd position + * ivf_flat::helpers::codepacker::pack( + * res, make_const_mdspan(codes.view()), index.veclen(), 42, list_data); + * @endcode + * + * @param[in] res + * @param[in] codes flat codes [n_vec, dim] + * @param[in] veclen size of interleaved data chunks + * @param[in] offset how many records to skip before writing the data into the list + * @param[inout] list_data block to write into + */ +void pack(raft::resources const& res, + raft::device_matrix_view codes, + uint32_t veclen, + uint32_t offset, + raft::device_mdspan::list_extents, + raft::row_major> list_data); + +/** + * Write flat codes into an existing list by the given offset. + * + * NB: no memory allocation happens here; the list must fit the data (offset + n_vec). + * + * Usage example: + * @code{.cpp} + * auto list_data = index.lists()[label]->data.view(); + * // allocate the buffer for the input codes + * auto codes = raft::make_device_matrix(res, n_vec, index.dim()); + * ... prepare n_vecs to pack into the list in codes ... + * // write codes into the list starting from the 42nd position + * ivf_flat::helpers::codepacker::pack( + * res, make_const_mdspan(codes.view()), index.veclen(), 42, list_data); + * @endcode + * + * @param[in] res + * @param[in] codes flat codes [n_vec, dim] + * @param[in] veclen size of interleaved data chunks + * @param[in] offset how many records to skip before writing the data into the list + * @param[inout] list_data block to write into + */ +void pack(raft::resources const& res, + raft::device_matrix_view codes, + uint32_t veclen, + uint32_t offset, + raft::device_mdspan::list_extents, + raft::row_major> list_data); + +/** + * Write flat codes into an existing list by the given offset. + * + * NB: no memory allocation happens here; the list must fit the data (offset + n_vec). + * + * Usage example: + * @code{.cpp} + * auto list_data = index.lists()[label]->data.view(); + * // allocate the buffer for the input codes + * auto codes = raft::make_device_matrix(res, n_vec, index.dim()); + * ... prepare n_vecs to pack into the list in codes ... + * // write codes into the list starting from the 42nd position + * ivf_flat::helpers::codepacker::pack( + * res, make_const_mdspan(codes.view()), index.veclen(), 42, list_data); + * @endcode + * + * @param[in] res + * @param[in] codes flat codes [n_vec, dim] + * @param[in] veclen size of interleaved data chunks + * @param[in] offset how many records to skip before writing the data into the list + * @param[inout] list_data block to write into + */ +void pack(raft::resources const& res, + raft::device_matrix_view codes, + uint32_t veclen, + uint32_t offset, + raft::device_mdspan::list_extents, + raft::row_major> list_data); + +/** + * @brief Unpack `n_take` consecutive records of a single list (cluster) in the compressed index + * starting at given `offset`. + * + * Usage example: + * @code{.cpp} + * auto list_data = index.lists()[label]->data.view(); + * // allocate the buffer for the output + * uint32_t n_take = 4; + * auto codes = raft::make_device_matrix(res, n_take, index.dim()); + * uint32_t offset = 0; + * // unpack n_take elements from the list + * ivf_fat::helpers::codepacker::unpack(res, list_data, index.veclen(), offset, codes.view()); + * @endcode + * + * @param[in] res raft resource + * @param[in] list_data block to read from + * @param[in] veclen size of interleaved data chunks + * @param[in] offset + * How many records in the list to skip. + * @param[inout] codes + * the destination buffer [n_take, index.dim()]. + * The length `n_take` defines how many records to unpack, + * it must be <= the list size. + */ +void unpack(raft::resources const& res, + raft::device_mdspan::list_extents, + raft::row_major> list_data, + uint32_t veclen, + uint32_t offset, + raft::device_matrix_view codes); + +/** + * @brief Unpack `n_take` consecutive records of a single list (cluster) in the compressed index + * starting at given `offset`. + * + * Usage example: + * @code{.cpp} + * auto list_data = index.lists()[label]->data.view(); + * // allocate the buffer for the output + * uint32_t n_take = 4; + * auto codes = raft::make_device_matrix(res, n_take, index.dim()); + * uint32_t offset = 0; + * // unpack n_take elements from the list + * ivf_fat::helpers::codepacker::unpack(res, list_data, index.veclen(), offset, codes.view()); + * @endcode + * + * @param[in] res raft resource + * @param[in] list_data block to read from + * @param[in] veclen size of interleaved data chunks + * @param[in] offset + * How many records in the list to skip. + * @param[inout] codes + * the destination buffer [n_take, index.dim()]. + * The length `n_take` defines how many records to unpack, + * it must be <= the list size. + */ +void unpack(raft::resources const& res, + raft::device_mdspan::list_extents, + raft::row_major> list_data, + uint32_t veclen, + uint32_t offset, + raft::device_matrix_view codes); + +/** + * @brief Unpack `n_take` consecutive records of a single list (cluster) in the compressed index + * starting at given `offset`. + * + * Usage example: + * @code{.cpp} + * auto list_data = index.lists()[label]->data.view(); + * // allocate the buffer for the output + * uint32_t n_take = 4; + * auto codes = raft::make_device_matrix(res, n_take, index.dim()); + * uint32_t offset = 0; + * // unpack n_take elements from the list + * ivf_fat::helpers::codepacker::unpack(res, list_data, index.veclen(), offset, codes.view()); + * @endcode + * + * @param[in] res raft resource + * @param[in] list_data block to read from + * @param[in] veclen size of interleaved data chunks + * @param[in] offset + * How many records in the list to skip. + * @param[inout] codes + * the destination buffer [n_take, index.dim()]. + * The length `n_take` defines how many records to unpack, + * it must be <= the list size. + */ +void unpack(raft::resources const& res, + raft::device_mdspan::list_extents, + raft::row_major> list_data, + uint32_t veclen, + uint32_t offset, + raft::device_matrix_view codes); + +/** + * Write one flat code into a block by the given offset. The offset indicates the id of the record + * in the list. This function interleaves the code and is intended to later copy the interleaved + * codes over to the IVF list on device. NB: no memory allocation happens here; the block must fit + * the record (offset + 1). + * + * @param[in] flat_code input flat code + * @param[out] block block of memory to write interleaved codes to + * @param[in] dim dimension of the flat code + * @param[in] veclen size of interleaved data chunks + * @param[in] offset how many records to skip before writing the data into the list + */ +void pack_1(const float* flat_code, float* block, uint32_t dim, uint32_t veclen, uint32_t offset); + +/** + * Write one flat code into a block by the given offset. The offset indicates the id of the record + * in the list. This function interleaves the code and is intended to later copy the interleaved + * codes over to the IVF list on device. NB: no memory allocation happens here; the block must fit + * the record (offset + 1). + * + * @param[in] flat_code input flat code + * @param[out] block block of memory to write interleaved codes to + * @param[in] dim dimension of the flat code + * @param[in] veclen size of interleaved data chunks + * @param[in] offset how many records to skip before writing the data into the list + */ +void pack_1(const int8_t* flat_code, int8_t* block, uint32_t dim, uint32_t veclen, uint32_t offset); + +/** + * Write one flat code into a block by the given offset. The offset indicates the id of the record + * in the list. This function interleaves the code and is intended to later copy the interleaved + * codes over to the IVF list on device. NB: no memory allocation happens here; the block must fit + * the record (offset + 1). + * + * @param[in] flat_code input flat code + * @param[out] block block of memory to write interleaved codes to + * @param[in] dim dimension of the flat code + * @param[in] veclen size of interleaved data chunks + * @param[in] offset how many records to skip before writing the data into the list + */ +void pack_1( + const uint8_t* flat_code, uint8_t* block, uint32_t dim, uint32_t veclen, uint32_t offset); + +/** + * Unpack 1 record of a single list (cluster) in the index to fetch the flat code. The offset + * indicates the id of the record. This function fetches one flat code from an interleaved code. + * + * @param[in] block interleaved block. The block can be thought of as the whole inverted list in + * interleaved format. + * @param[out] flat_code output flat code + * @param[in] dim dimension of the flat code + * @param[in] veclen size of interleaved data chunks + * @param[in] offset fetch the flat code by the given offset + */ +void unpack_1(const float* block, float* flat_code, uint32_t dim, uint32_t veclen, uint32_t offset); + +/** + * Unpack 1 record of a single list (cluster) in the index to fetch the flat code. The offset + * indicates the id of the record. This function fetches one flat code from an interleaved code. + * + * @param[in] block interleaved block. The block can be thought of as the whole inverted list in + * interleaved format. + * @param[out] flat_code output flat code + * @param[in] dim dimension of the flat code + * @param[in] veclen size of interleaved data chunks + * @param[in] offset fetch the flat code by the given offset + */ +void unpack_1( + const int8_t* block, int8_t* flat_code, uint32_t dim, uint32_t veclen, uint32_t offset); + +/** + * Unpack 1 record of a single list (cluster) in the index to fetch the flat code. The offset + * indicates the id of the record. This function fetches one flat code from an interleaved code. + * + * @param[in] block interleaved block. The block can be thought of as the whole inverted list in + * interleaved format. + * @param[out] flat_code output flat code + * @param[in] dim dimension of the flat code + * @param[in] veclen size of interleaved data chunks + * @param[in] offset fetch the flat code by the given offset + */ +void unpack_1( + const uint8_t* block, uint8_t* flat_code, uint32_t dim, uint32_t veclen, uint32_t offset); + +} // namespace codepacker + +/** + * @brief Public helper API to reset the data and indices ptrs, and the list sizes. Useful for + * externally modifying the index without going through the build stage. The data and indices of the + * IVF lists will be lost. + * + * Usage example: + * @code{.cpp} + * raft::resources res; + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_flat::index_params index_params; + * // initialize an empty index + * ivf_flat::index index(res, index_params, D); + * // reset the index's state and list sizes + * ivf_flat::helpers::reset_index(res, &index); + * @endcode + * + * @param[in] res raft resource + * @param[inout] index pointer to IVF-Flat index + */ +void reset_index(const raft::resources& res, index* index); + +/** + * @brief Public helper API to reset the data and indices ptrs, and the list sizes. Useful for + * externally modifying the index without going through the build stage. The data and indices of the + * IVF lists will be lost. + * + * Usage example: + * @code{.cpp} + * raft::resources res; + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_flat::index_params index_params; + * // initialize an empty index + * ivf_flat::index index(res, index_params, D); + * // reset the index's state and list sizes + * ivf_flat::helpers::reset_index(res, &index); + * @endcode + * + * @param[in] res raft resource + * @param[inout] index pointer to IVF-Flat index + */ +void reset_index(const raft::resources& res, index* index); + +/** + * @brief Public helper API to reset the data and indices ptrs, and the list sizes. Useful for + * externally modifying the index without going through the build stage. The data and indices of the + * IVF lists will be lost. + * + * Usage example: + * @code{.cpp} + * raft::resources res; + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_flat::index_params index_params; + * // initialize an empty index + * ivf_flat::index index(res, index_params, D); + * // reset the index's state and list sizes + * ivf_flat::helpers::reset_index(res, &index); + * @endcode + * + * @param[in] res raft resource + * @param[inout] index pointer to IVF-Flat index + */ +void reset_index(const raft::resources& res, index* index); + +/** + * @} + */ + +} // namespace helpers } // namespace cuvs::neighbors::ivf_flat \ No newline at end of file diff --git a/cpp/src/core/nvtx.hpp b/cpp/src/core/nvtx.hpp index 79382bbd0..2580b4964 100644 --- a/cpp/src/core/nvtx.hpp +++ b/cpp/src/core/nvtx.hpp @@ -16,9 +16,90 @@ #pragma once -namespace cuvs::common::nvtx::domain { +#include + +namespace cuvs::common::nvtx { +namespace domain { +/** @brief The default NVTX domain. */ +struct app { + static constexpr char const* name{"application"}; +}; + /** @brief This NVTX domain is supposed to be used within cuvs. */ struct cuvs { static constexpr const char* name = "cuvs"; }; -}; // namespace cuvs::common::nvtx::domain +} // namespace domain + +/** + * @brief Push a named NVTX range. + * + * @tparam Domain optional struct that defines the NVTX domain message; + * You can create a new domain with a custom message as follows: + * \code{.cpp} + * struct custom_domain { static constexpr char const* name{"custom message"}; } + * \endcode + * NB: make sure to use the same domain for `push_range` and `pop_range`. + * @param format range name format (accepts printf-style arguments) + * @param args the arguments for the printf-style formatting + */ +template +inline void push_range(const char* format, Args... args) +{ + raft::common::nvtx::detail::push_range(format, args...); +} + +/** + * @brief Pop the latest range. + * + * @tparam Domain optional struct that defines the NVTX domain message; + * You can create a new domain with a custom message as follows: + * \code{.cpp} + * struct custom_domain { static constexpr char const* name{"custom message"}; } + * \endcode + * NB: make sure to use the same domain for `push_range` and `pop_range`. + */ +template +inline void pop_range() +{ + raft::common::nvtx::detail::pop_range(); +} + +/** + * @brief Push a named NVTX range that would be popped at the end of the object lifetime. + * + * Refer to \ref Usage for the usage examples. + * + * @tparam Domain optional struct that defines the NVTX domain message; + * You can create a new domain with a custom message as follows: + * \code{.cpp} + * struct custom_domain { static constexpr char const* name{"custom message"}; } + * \endcode + */ +template +class range { + public: + /** + * Push a named NVTX range. + * At the end of the object lifetime, pop the range back. + * + * @param format range name format (accepts printf-style arguments) + * @param args the arguments for the printf-style formatting + */ + template + explicit range(const char* format, Args... args) + { + push_range(format, args...); + } + + ~range() { pop_range(); } + + /* This object is not meant to be touched. */ + range(const range&) = delete; + range(range&&) = delete; + auto operator=(const range&) -> range& = delete; + auto operator=(range&&) -> range& = delete; + static auto operator new(std::size_t) -> void* = delete; + static auto operator new[](std::size_t) -> void* = delete; +}; +}; // namespace cuvs::common::nvtx diff --git a/cpp/src/neighbors/detail/div_utils.cuh b/cpp/src/neighbors/detail/div_utils.cuh new file mode 100644 index 000000000..b54b8400e --- /dev/null +++ b/cpp/src/neighbors/detail/div_utils.cuh @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +/** + * @brief A simple wrapper for raft::Pow2 which uses Pow2 utils only when available and regular + * integer division otherwise. This is done to allow a common interface for division arithmetic for + * non CUDA headers. + * + * @tparam Value_ a compile-time value representable as a power-of-two. + */ +namespace cuvs::neighbors::detail { +template +struct div_utils { + typedef decltype(Value_) Type; + static constexpr Type Value = Value_; + + template + static constexpr _RAFT_HOST_DEVICE inline auto roundDown(T x) + { + return raft::Pow2::roundDown(x); + } + + template + static constexpr _RAFT_HOST_DEVICE inline auto mod(T x) + { + return raft::Pow2::mod(x); + } + + template + static constexpr _RAFT_HOST_DEVICE inline auto div(T x) + { + return raft::Pow2::div(x); + } +}; +} // namespace cuvs::neighbors::detail diff --git a/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py b/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py index bf0cad6d4..1733ca8b2 100644 --- a/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py +++ b/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py @@ -37,9 +37,21 @@ * */ -#include #include +""" + +build_include_macro = """ +#include "ivf_flat_build.cuh" +""" +search_include_macro = """ +#include "ivf_flat_search.cuh" +""" + +serialize_include_macro = """ +#include "ivf_flat_serialize.cuh" +""" +namespace_macro = """ namespace cuvs::neighbors::ivf_flat { """ @@ -54,14 +66,14 @@ ) build_macro = """ -#define CUVS_INST_IVF_FLAT_BUILD(T, IdxT) \\ +#define CUVS_INST_IVF_FLAT_BUILD_EXTEND(T, IdxT) \\ auto build(raft::resources const& handle, \\ const cuvs::neighbors::ivf_flat::index_params& params, \\ raft::device_matrix_view dataset) \\ ->cuvs::neighbors::ivf_flat::index \\ { \\ return cuvs::neighbors::ivf_flat::index( \\ - std::move(raft::runtime::neighbors::ivf_flat::build(handle, params, dataset))); \\ + std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \\ } \\ \\ void build(raft::resources const& handle, \\ @@ -69,12 +81,24 @@ raft::device_matrix_view dataset, \\ cuvs::neighbors::ivf_flat::index& idx) \\ { \\ - raft::runtime::neighbors::ivf_flat::build(handle, params, dataset, *idx.get_raft_index()); \\ - } -""" - -extend_macro = """ -#define CUVS_INST_IVF_FLAT_EXTEND(T, IdxT) \\ + cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \\ + } \\ + auto build(raft::resources const& handle, \\ + const cuvs::neighbors::ivf_flat::index_params& params, \\ + raft::host_matrix_view dataset) \\ + ->cuvs::neighbors::ivf_flat::index \\ + { \\ + return cuvs::neighbors::ivf_flat::index( \\ + std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \\ + } \\ + \\ + void build(raft::resources const& handle, \\ + const cuvs::neighbors::ivf_flat::index_params& params, \\ + raft::host_matrix_view dataset, \\ + cuvs::neighbors::ivf_flat::index& idx) \\ + { \\ + cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \\ + } \\ auto extend(raft::resources const& handle, \\ raft::device_matrix_view new_vectors, \\ std::optional> new_indices, \\ @@ -82,8 +106,8 @@ ->cuvs::neighbors::ivf_flat::index \\ { \\ return cuvs::neighbors::ivf_flat::index( \\ - std::move(raft::runtime::neighbors::ivf_flat::extend( \\ - handle, new_vectors, new_indices, *orig_index.get_raft_index()))); \\ + std::move(cuvs::neighbors::ivf_flat::detail::extend( \\ + handle, new_vectors, new_indices, orig_index))); \\ } \\ \\ void extend(raft::resources const& handle, \\ @@ -91,9 +115,28 @@ std::optional> new_indices, \\ cuvs::neighbors::ivf_flat::index* idx) \\ { \\ - raft::runtime::neighbors::ivf_flat::extend( \\ - handle, new_vectors, new_indices, idx->get_raft_index()); \\ - } + cuvs::neighbors::ivf_flat::detail::extend( \\ + handle, new_vectors, new_indices, idx); \\ + } \\ + auto extend(raft::resources const& handle, \\ + raft::host_matrix_view new_vectors, \\ + std::optional> new_indices, \\ + const cuvs::neighbors::ivf_flat::index& orig_index) \\ + ->cuvs::neighbors::ivf_flat::index \\ + { \\ + return cuvs::neighbors::ivf_flat::index( \\ + std::move(cuvs::neighbors::ivf_flat::detail::extend( \\ + handle, new_vectors, new_indices, orig_index))); \\ + } \\ + \\ + void extend(raft::resources const& handle, \\ + raft::host_matrix_view new_vectors, \\ + std::optional> new_indices, \\ + cuvs::neighbors::ivf_flat::index* idx) \\ + { \\ + cuvs::neighbors::ivf_flat::detail::extend( \\ + handle, new_vectors, new_indices, idx); \\ + } """ search_macro = """ @@ -105,8 +148,20 @@ raft::device_matrix_view neighbors, \\ raft::device_matrix_view distances) \\ { \\ - raft::runtime::neighbors::ivf_flat::search( \\ - handle, params, *index.get_raft_index(), queries, neighbors, distances); \\ + cuvs::neighbors::ivf_flat::detail::search( \\ + handle, params, index, queries, neighbors, distances); \\ + } \\ + void search_with_filtering( \\ + raft::resources const& handle, \\ + const search_params& params, \\ + index& idx, \\ + raft::device_matrix_view queries, \\ + raft::device_matrix_view neighbors, \\ + raft::device_matrix_view distances, \\ + cuvs::neighbors::filtering::bitset_filter sample_filter) \\ + { \\ + cuvs::neighbors::ivf_flat::detail::search_with_filtering( \\ + handle, params, idx, queries, neighbors, distances, sample_filter); \\ } """ @@ -116,46 +171,48 @@ const std::string& filename, \\ const cuvs::neighbors::ivf_flat::index& index) \\ { \\ - raft::runtime::neighbors::ivf_flat::serialize_file(handle, filename, *index.get_raft_index()); \\ - } \\ - \\ - void deserialize_file(raft::resources const& handle, \\ - const std::string& filename, \\ - cuvs::neighbors::ivf_flat::index* index) \\ - { \\ - raft::runtime::neighbors::ivf_flat::deserialize_file( \\ - handle, filename, index->get_raft_index()); \\ + cuvs::neighbors::ivf_flat::detail::serialize(handle, filename, index); \\ } \\ \\ void serialize(raft::resources const& handle, \\ std::string& str, \\ const cuvs::neighbors::ivf_flat::index& index) \\ { \\ - raft::runtime::neighbors::ivf_flat::serialize(handle, str, *index.get_raft_index()); \\ + std::ostringstream os; \\ + cuvs::neighbors::ivf_flat::detail::serialize(handle, os, index); \\ + str = os.str(); \\ } \\ \\ - void deserialize(raft::resources const& handle, \\ - const std::string& str, \\ - cuvs::neighbors::ivf_flat::index* index) \\ + void deserialize_file(raft::resources const& handle, \\ + const std::string& filename, \\ + cuvs::neighbors::ivf_flat::index* index) \\ { \\ - raft::runtime::neighbors::ivf_flat::deserialize(handle, str, index->get_raft_index()); \\ - } + * index = cuvs::neighbors::ivf_flat::detail::deserialize( \\ + handle, filename); \\ + } \\ + void deserialize(raft::resources const& handle, \\ + const std::string& str, \\ + cuvs::neighbors::ivf_flat::index* index) \\ + { \\ + std::istringstream is(str); \\ + * index = cuvs::neighbors::ivf_flat::detail::deserialize( \\ + handle, is); \\ + } """ macros = dict( - build=dict( + build_extend=dict( + include=build_include_macro, definition=build_macro, - name="CUVS_INST_IVF_FLAT_BUILD", - ), - extend=dict( - definition=extend_macro, - name="CUVS_INST_IVF_FLAT_EXTEND", + name="CUVS_INST_IVF_FLAT_BUILD_EXTEND", ), search=dict( + include=search_include_macro, definition=search_macro, name="CUVS_INST_IVF_FLAT_SEARCH", ), serialize=dict( + include=serialize_include_macro, definition=serialize_macro, name="CUVS_INST_IVF_FLAT_SERIALIZE", ), @@ -163,9 +220,11 @@ for type_path, (T, IdxT) in types.items(): for macro_path, macro in macros.items(): - path = f"ivf_flat_{macro_path}_{type_path}.cpp" + path = f"ivf_flat_{macro_path}_{type_path}.cu" with open(path, "w") as f: f.write(header) + f.write(macro['include']) + f.write(namespace_macro) f.write(macro["definition"]) f.write(f"{macro['name']}({T}, {IdxT});\n\n") f.write(f"#undef {macro['name']}\n") diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh new file mode 100644 index 000000000..89cd5ec57 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh @@ -0,0 +1,601 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../../core/nvtx.hpp" +#include "../ivf_common.cuh" +#include "../ivf_list.cuh" + +#include +#include + +#include "../detail/ann_utils.cuh" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace cuvs::neighbors::ivf_flat { +using namespace cuvs::spatial::knn::detail; // NOLINT + +namespace detail { + +template +auto clone(const raft::resources& res, const index& source) -> index +{ + auto stream = raft::resource::get_cuda_stream(res); + + // Allocate the new index + index target(res, + source.metric(), + source.n_lists(), + source.adaptive_centers(), + source.conservative_memory_allocation(), + source.dim()); + + // Copy the independent parts + raft::copy(target.list_sizes().data_handle(), + source.list_sizes().data_handle(), + source.list_sizes().size(), + stream); + raft::copy(target.centers().data_handle(), + source.centers().data_handle(), + source.centers().size(), + stream); + if (source.center_norms().has_value()) { + target.allocate_center_norms(res); + raft::copy(target.center_norms()->data_handle(), + source.center_norms()->data_handle(), + source.center_norms()->size(), + stream); + } + // Copy shared pointers + target.lists() = source.lists(); + + // Make sure the device pointers point to the new lists + ivf::detail::recompute_internal_state(res, target); + + return target; +} + +/** + * @brief Record the dataset into the index, one source row at a time. + * + * The index consists of the dataset rows, grouped by their labels (into clusters/lists). + * Within each cluster (list), the data is grouped into blocks of `WarpSize` interleaved + * vectors. Note, the total index length is slightly larger than the dataset length, because + * each cluster is padded by `WarpSize` elements + * + * CUDA launch grid: + * X dimension must cover the dataset (n_rows), YZ are not used; + * there are no dependencies between threads, hence no constraints on the block size. + * + * @tparam T element type. + * @tparam IdxT type of the indices in the source source_vecs + * @tparam LabelT label type + * @tparam gather_src if false, then we build the index from vectors source_vecs[i,:], otherwise + * we use source_vecs[source_ixs[i],:]. In both cases i=0..n_rows-1. + * + * @param[in] labels device pointer to the cluster ids for each row [n_rows] + * @param[in] source_vecs device pointer to the input data [n_rows, dim] + * @param[in] source_ixs device pointer to the input indices [n_rows] + * @param[out] list_data_ptrs device pointer to the index data of size [n_lists][index_size, dim] + * @param[out] list_index_ptrs device pointer to the source ids corr. to the output [n_lists] + * [index_size] + * @param[out] list_sizes_ptr device pointer to the cluster sizes [n_lists]; + * it's used as an atomic counter, and must be initialized with zeros. + * @param n_rows source length + * @param dim the dimensionality of the data + * @param veclen size of vectorized loads/stores; must satisfy `dim % veclen == 0`. + * + */ +template +RAFT_KERNEL build_index_kernel(const LabelT* labels, + const T* source_vecs, + const IdxT* source_ixs, + T** list_data_ptrs, + IdxT** list_index_ptrs, + uint32_t* list_sizes_ptr, + IdxT n_rows, + uint32_t dim, + uint32_t veclen, + IdxT batch_offset = 0) +{ + const IdxT i = IdxT(blockDim.x) * IdxT(blockIdx.x) + threadIdx.x; + if (i >= n_rows) { return; } + + auto list_id = labels[i]; + auto inlist_id = atomicAdd(list_sizes_ptr + list_id, 1); + auto* list_index = list_index_ptrs[list_id]; + auto* list_data = list_data_ptrs[list_id]; + + // Record the source vector id in the index + list_index[inlist_id] = source_ixs == nullptr ? i + batch_offset : source_ixs[i]; + + // The data is written in interleaved groups of `index::kGroupSize` vectors + using interleaved_group = raft::Pow2; + auto group_offset = interleaved_group::roundDown(inlist_id); + auto ingroup_id = interleaved_group::mod(inlist_id) * veclen; + + // Point to the location of the interleaved group of vectors + list_data += group_offset * dim; + + // Point to the source vector + if constexpr (gather_src) { + source_vecs += source_ixs[i] * dim; + } else { + source_vecs += i * dim; + } + // Interleave dimensions of the source vector while recording it. + // NB: such `veclen` is selected, that `dim % veclen == 0` + for (uint32_t l = 0; l < dim; l += veclen) { + for (uint32_t j = 0; j < veclen; j++) { + list_data[l * kIndexGroupSize + ingroup_id + j] = source_vecs[l + j]; + } + } +} + +/** See raft::neighbors::ivf_flat::extend docs */ +template +void extend(raft::resources const& handle, + index* index, + const T* new_vectors, + const IdxT* new_indices, + IdxT n_rows) +{ + using LabelT = uint32_t; + RAFT_EXPECTS(index != nullptr, "index cannot be empty."); + + auto stream = raft::resource::get_cuda_stream(handle); + auto n_lists = index->n_lists(); + auto dim = index->dim(); + list_spec list_device_spec{index->dim(), + index->conservative_memory_allocation()}; + cuvs::common::nvtx::range fun_scope( + "ivf_flat::extend(%zu, %u)", size_t(n_rows), dim); + + RAFT_EXPECTS(new_indices != nullptr || index->size() == 0, + "You must pass data indices when the index is non-empty."); + + auto new_labels = raft::make_device_vector(handle, n_rows); + raft::cluster::kmeans_balanced_params kmeans_params; + kmeans_params.metric = static_cast(index->metric()); + auto orig_centroids_view = + raft::make_device_matrix_view(index->centers().data_handle(), n_lists, dim); + // Calculate the batch size for the input data if it's not accessible directly from the device + constexpr size_t kReasonableMaxBatchSize = 65536; + size_t max_batch_size = std::min(n_rows, kReasonableMaxBatchSize); + + // Predict the cluster labels for the new data, in batches if necessary + utils::batch_load_iterator vec_batches(new_vectors, + n_rows, + index->dim(), + max_batch_size, + stream, + raft::resource::get_workspace_resource(handle)); + + for (const auto& batch : vec_batches) { + auto batch_data_view = + raft::make_device_matrix_view(batch.data(), batch.size(), index->dim()); + auto batch_labels_view = raft::make_device_vector_view( + new_labels.data_handle() + batch.offset(), batch.size()); + raft::cluster::kmeans_balanced::predict(handle, + kmeans_params, + batch_data_view, + orig_centroids_view, + batch_labels_view, + utils::mapping{}); + } + + auto* list_sizes_ptr = index->list_sizes().data_handle(); + auto old_list_sizes_dev = raft::make_device_vector(handle, n_lists); + raft::copy(old_list_sizes_dev.data_handle(), list_sizes_ptr, n_lists, stream); + + // Calculate the centers and sizes on the new data, starting from the original values + if (index->adaptive_centers()) { + auto centroids_view = raft::make_device_matrix_view( + index->centers().data_handle(), index->centers().extent(0), index->centers().extent(1)); + auto list_sizes_view = + raft::make_device_vector_view, IdxT>( + list_sizes_ptr, n_lists); + for (const auto& batch : vec_batches) { + auto batch_data_view = + raft::make_device_matrix_view(batch.data(), batch.size(), index->dim()); + auto batch_labels_view = raft::make_device_vector_view( + new_labels.data_handle() + batch.offset(), batch.size()); + raft::cluster::kmeans_balanced::helpers::calc_centers_and_sizes(handle, + batch_data_view, + batch_labels_view, + centroids_view, + list_sizes_view, + false, + utils::mapping{}); + } + } else { + raft::stats::histogram(raft::stats::HistTypeAuto, + reinterpret_cast(list_sizes_ptr), + IdxT(n_lists), + new_labels.data_handle(), + n_rows, + 1, + stream); + raft::linalg::add( + list_sizes_ptr, list_sizes_ptr, old_list_sizes_dev.data_handle(), n_lists, stream); + } + + // Calculate and allocate new list data + std::vector new_list_sizes(n_lists); + std::vector old_list_sizes(n_lists); + { + raft::copy(old_list_sizes.data(), old_list_sizes_dev.data_handle(), n_lists, stream); + raft::copy(new_list_sizes.data(), list_sizes_ptr, n_lists, stream); + raft::resource::sync_stream(handle); + auto& lists = index->lists(); + for (uint32_t label = 0; label < n_lists; label++) { + ivf::resize_list(handle, + lists[label], + list_device_spec, + new_list_sizes[label], + raft::Pow2::roundUp(old_list_sizes[label])); + } + } + // Update the pointers and the sizes + ivf::detail::recompute_internal_state(handle, *index); + // Copy the old sizes, so we can start from the current state of the index; + // we'll rebuild the `list_sizes_ptr` in the following kernel, using it as an atomic counter. + raft::copy(list_sizes_ptr, old_list_sizes_dev.data_handle(), n_lists, stream); + + utils::batch_load_iterator vec_indices( + new_indices, n_rows, 1, max_batch_size, stream, raft::resource::get_workspace_resource(handle)); + utils::batch_load_iterator idx_batch = vec_indices.begin(); + size_t next_report_offset = 0; + size_t d_report_offset = n_rows * 5 / 100; + for (const auto& batch : vec_batches) { + auto batch_data_view = + raft::make_device_matrix_view(batch.data(), batch.size(), index->dim()); + // Kernel to insert the new vectors + const dim3 block_dim(256); + const dim3 grid_dim(raft::ceildiv(batch.size(), block_dim.x)); + build_index_kernel + <<>>(new_labels.data_handle() + batch.offset(), + batch_data_view.data_handle(), + idx_batch->data(), + index->data_ptrs().data_handle(), + index->inds_ptrs().data_handle(), + list_sizes_ptr, + batch.size(), + dim, + index->veclen(), + batch.offset()); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + if (batch.offset() > next_report_offset) { + float progress = batch.offset() * 100.0f / n_rows; + RAFT_LOG_DEBUG("ivf_flat::extend added vectors %zu, %6.1f%% complete", + static_cast(batch.offset()), + progress); + next_report_offset += d_report_offset; + } + ++idx_batch; + } + // Precompute the centers vector norms for L2Expanded distance + if (!index->center_norms().has_value()) { + index->allocate_center_norms(handle); + if (index->center_norms().has_value()) { + raft::linalg::rowNorm(index->center_norms()->data_handle(), + index->centers().data_handle(), + dim, + n_lists, + raft::linalg::L2Norm, + true, + stream); + RAFT_LOG_TRACE_VEC(index->center_norms()->data_handle(), std::min(dim, 20)); + } + } else if (index->center_norms().has_value() && index->adaptive_centers()) { + raft::linalg::rowNorm(index->center_norms()->data_handle(), + index->centers().data_handle(), + dim, + n_lists, + raft::linalg::L2Norm, + true, + stream); + RAFT_LOG_TRACE_VEC(index->center_norms()->data_handle(), std::min(dim, 20)); + } +} + +/** See raft::neighbors::ivf_flat::extend docs */ +template +auto extend(raft::resources const& handle, + const index& orig_index, + const T* new_vectors, + const IdxT* new_indices, + IdxT n_rows) -> index +{ + auto ext_index = clone(handle, orig_index); + detail::extend(handle, &ext_index, new_vectors, new_indices, n_rows); + return ext_index; +} + +/** See raft::neighbors::ivf_flat::build docs */ +template +inline auto build(raft::resources const& handle, + const index_params& params, + const T* dataset, + IdxT n_rows, + uint32_t dim) -> index +{ + auto stream = raft::resource::get_cuda_stream(handle); + cuvs::common::nvtx::range fun_scope( + "ivf_flat::build(%zu, %u)", size_t(n_rows), dim); + static_assert(std::is_same_v || std::is_same_v || std::is_same_v, + "unsupported data type"); + RAFT_EXPECTS(n_rows > 0 && dim > 0, "empty dataset"); + RAFT_EXPECTS(n_rows >= params.n_lists, "number of rows can't be less than n_lists"); + + index index(handle, params, dim); + utils::memzero( + index.accum_sorted_sizes().data_handle(), index.accum_sorted_sizes().size(), stream); + utils::memzero(index.list_sizes().data_handle(), index.list_sizes().size(), stream); + utils::memzero(index.data_ptrs().data_handle(), index.data_ptrs().size(), stream); + utils::memzero(index.inds_ptrs().data_handle(), index.inds_ptrs().size(), stream); + + // Train the kmeans clustering + { + auto trainset_ratio = std::max( + 1, n_rows / std::max(params.kmeans_trainset_fraction * n_rows, index.n_lists())); + auto n_rows_train = n_rows / trainset_ratio; + rmm::device_uvector trainset(n_rows_train * index.dim(), stream); + // TODO: a proper sampling + RAFT_CUDA_TRY(cudaMemcpy2DAsync(trainset.data(), + sizeof(T) * index.dim(), + dataset, + sizeof(T) * index.dim() * trainset_ratio, + sizeof(T) * index.dim(), + n_rows_train, + cudaMemcpyDefault, + stream)); + auto trainset_const_view = + raft::make_device_matrix_view(trainset.data(), n_rows_train, index.dim()); + auto centers_view = raft::make_device_matrix_view( + index.centers().data_handle(), index.n_lists(), index.dim()); + raft::cluster::kmeans_balanced_params kmeans_params; + kmeans_params.n_iters = params.kmeans_n_iters; + kmeans_params.metric = static_cast(index.metric()); + raft::cluster::kmeans_balanced::fit( + handle, kmeans_params, trainset_const_view, centers_view, utils::mapping{}); + } + + // add the data if necessary + if (params.add_data_on_build) { + detail::extend(handle, &index, dataset, nullptr, n_rows); + } + return index; +} + +/** + * Build an index that can be used in refinement operation. + * + * See raft::neighbors::refine for details on the refinement operation. + * + * The returned index cannot be used for a regular ivf_flat::search. The index misses information + * about coarse clusters. Instead, the neighbor candidates are assumed to form clusters, one for + * each query. The candidate vectors are gathered into the index dataset, that can be later used + * in ivfflat_interleaved_scan. + * + * @param[in] handle the raft handle + * @param[inout] refinement_index + * @param[in] dataset device pointer to dataset vectors, size [n_rows, dim]. Note that n_rows is + * not known to this function, but each candidate_idx has to be smaller than n_rows. + * @param[in] candidate_idx device pointer to neighbor candidates, size [n_queries, n_candidates] + * @param[in] n_candidates of neighbor_candidates + */ +template +inline void fill_refinement_index(raft::resources const& handle, + index* refinement_index, + const T* dataset, + const IdxT* candidate_idx, + IdxT n_queries, + uint32_t n_candidates) +{ + using LabelT = uint32_t; + + auto stream = raft::resource::get_cuda_stream(handle); + uint32_t n_lists = n_queries; + common::nvtx::range fun_scope( + "ivf_flat::fill_refinement_index(%zu, %u)", size_t(n_queries)); + + rmm::device_uvector new_labels(n_queries * n_candidates, stream); + auto new_labels_view = + raft::make_device_vector_view(new_labels.data(), n_queries * n_candidates); + raft::linalg::map_offset( + handle, + new_labels_view, + raft::compose_op(raft::cast_op(), raft::div_const_op(n_candidates))); + + auto list_sizes_ptr = refinement_index->list_sizes().data_handle(); + // We do not fill centers and center norms, since we will not run coarse search. + + // Allocate new memory + auto& lists = refinement_index->lists(); + list_spec list_device_spec{refinement_index->dim(), false}; + for (uint32_t label = 0; label < n_lists; label++) { + ivf::resize_list(handle, lists[label], list_device_spec, n_candidates, uint32_t(0)); + } + // Update the pointers and the sizes + ivf::detail::recompute_internal_state(handle, *refinement_index); + + RAFT_CUDA_TRY(cudaMemsetAsync(list_sizes_ptr, 0, n_lists * sizeof(uint32_t), stream)); + + const dim3 block_dim(256); + const dim3 grid_dim(raft::ceildiv(n_queries * n_candidates, block_dim.x)); + build_index_kernel + <<>>(new_labels.data(), + dataset, + candidate_idx, + refinement_index->data_ptrs().data_handle(), + refinement_index->inds_ptrs().data_handle(), + list_sizes_ptr, + n_queries * n_candidates, + refinement_index->dim(), + refinement_index->veclen()); + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +template +auto build(raft::resources const& handle, + const index_params& params, + raft::device_matrix_view dataset) -> index +{ + IdxT n_rows = dataset.extent(0); + IdxT dim = dataset.extent(1); + return build(handle, params, dataset.data_handle(), n_rows, dim); +} + +template +auto build(raft::resources const& handle, + const index_params& params, + raft::host_matrix_view dataset) -> index +{ + IdxT n_rows = dataset.extent(0); + IdxT dim = dataset.extent(1); + return build(handle, params, dataset.data_handle(), n_rows, dim); +} + +template +void build(raft::resources const& handle, + const index_params& params, + raft::device_matrix_view dataset, + index& index) +{ + IdxT n_rows = dataset.extent(0); + IdxT dim = dataset.extent(1); + index = build(handle, params, dataset.data_handle(), n_rows, dim); +} + +template +void build(raft::resources const& handle, + const index_params& params, + raft::host_matrix_view dataset, + index& index) +{ + IdxT n_rows = dataset.extent(0); + IdxT dim = dataset.extent(1); + index = build(handle, params, dataset.data_handle(), n_rows, dim); +} + +template +auto extend(raft::resources const& handle, + raft::device_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_flat::index& orig_index) -> index +{ + ASSERT(new_vectors.extent(1) == orig_index.dim(), + "new_vectors should have the same dimension as the index"); + + IdxT n_rows = new_vectors.extent(0); + if (new_indices.has_value()) { + ASSERT(n_rows == new_indices.value().extent(0), + "new_vectors and new_indices have different number of rows"); + } + + return extend(handle, + orig_index, + new_vectors.data_handle(), + new_indices.has_value() ? new_indices.value().data_handle() : nullptr, + n_rows); +} + +template +auto extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_flat::index& orig_index) -> index +{ + ASSERT(new_vectors.extent(1) == orig_index.dim(), + "new_vectors should have the same dimension as the index"); + + IdxT n_rows = new_vectors.extent(0); + if (new_indices.has_value()) { + ASSERT(n_rows == new_indices.value().extent(0), + "new_vectors and new_indices have different number of rows"); + } + + return extend(handle, + orig_index, + new_vectors.data_handle(), + new_indices.has_value() ? new_indices.value().data_handle() : nullptr, + n_rows); +} + +template +void extend(raft::resources const& handle, + raft::device_matrix_view new_vectors, + std::optional> new_indices, + index* index) +{ + ASSERT(new_vectors.extent(1) == index->dim(), + "new_vectors should have the same dimension as the index"); + + IdxT n_rows = new_vectors.extent(0); + if (new_indices.has_value()) { + ASSERT(n_rows == new_indices.value().extent(0), + "new_vectors and new_indices have different number of rows"); + } + + *index = extend(handle, + *index, + new_vectors.data_handle(), + new_indices.has_value() ? new_indices.value().data_handle() : nullptr, + n_rows); +} + +template +void extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + index* index) +{ + ASSERT(new_vectors.extent(1) == index->dim(), + "new_vectors should have the same dimension as the index"); + + IdxT n_rows = new_vectors.extent(0); + if (new_indices.has_value()) { + ASSERT(n_rows == new_indices.value().extent(0), + "new_vectors and new_indices have different number of rows"); + } + + *index = extend(handle, + *index, + new_vectors.data_handle(), + new_indices.has_value() ? new_indices.value().data_handle() : nullptr, + n_rows); +} + +} // namespace detail +} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_float_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_float_int64_t.cu new file mode 100644 index 000000000..52026172c --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_float_int64_t.cu @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_ivf_flat.py + * + * Make changes there and run in this directory: + * + * > python generate_ivf_flat.py + * + */ + +#include + +#include "ivf_flat_build.cuh" + +namespace cuvs::neighbors::ivf_flat { + +#define CUVS_INST_IVF_FLAT_BUILD_EXTEND(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::device_matrix_view dataset) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index( \ + std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::device_matrix_view dataset, \ + cuvs::neighbors::ivf_flat::index& idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ + } \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::host_matrix_view dataset) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index( \ + std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::host_matrix_view dataset, \ + cuvs::neighbors::ivf_flat::index& idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ + } \ + auto extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_flat::index& orig_index) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index(std::move( \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_flat::index* idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ + } \ + auto extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_flat::index& orig_index) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index(std::move( \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_flat::index* idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ + } +CUVS_INST_IVF_FLAT_BUILD_EXTEND(float, int64_t); + +#undef CUVS_INST_IVF_FLAT_BUILD_EXTEND + +} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_int8_t_int64_t.cu new file mode 100644 index 000000000..ef326f241 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_int8_t_int64_t.cu @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_ivf_flat.py + * + * Make changes there and run in this directory: + * + * > python generate_ivf_flat.py + * + */ + +#include + +#include "ivf_flat_build.cuh" + +namespace cuvs::neighbors::ivf_flat { + +#define CUVS_INST_IVF_FLAT_BUILD_EXTEND(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::device_matrix_view dataset) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index( \ + std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::device_matrix_view dataset, \ + cuvs::neighbors::ivf_flat::index& idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ + } \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::host_matrix_view dataset) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index( \ + std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::host_matrix_view dataset, \ + cuvs::neighbors::ivf_flat::index& idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ + } \ + auto extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_flat::index& orig_index) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index(std::move( \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_flat::index* idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ + } \ + auto extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_flat::index& orig_index) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index(std::move( \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_flat::index* idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ + } +CUVS_INST_IVF_FLAT_BUILD_EXTEND(int8_t, int64_t); + +#undef CUVS_INST_IVF_FLAT_BUILD_EXTEND + +} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_uint8_t_int64_t.cu new file mode 100644 index 000000000..8a1f3e42f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_uint8_t_int64_t.cu @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_ivf_flat.py + * + * Make changes there and run in this directory: + * + * > python generate_ivf_flat.py + * + */ + +#include + +#include "ivf_flat_build.cuh" + +namespace cuvs::neighbors::ivf_flat { + +#define CUVS_INST_IVF_FLAT_BUILD_EXTEND(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::device_matrix_view dataset) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index( \ + std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::device_matrix_view dataset, \ + cuvs::neighbors::ivf_flat::index& idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ + } \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::host_matrix_view dataset) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index( \ + std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::host_matrix_view dataset, \ + cuvs::neighbors::ivf_flat::index& idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ + } \ + auto extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_flat::index& orig_index) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index(std::move( \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_flat::index* idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ + } \ + auto extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_flat::index& orig_index) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index(std::move( \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_flat::index* idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ + } +CUVS_INST_IVF_FLAT_BUILD_EXTEND(uint8_t, int64_t); + +#undef CUVS_INST_IVF_FLAT_BUILD_EXTEND + +} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build_float_int64_t.cpp b/cpp/src/neighbors/ivf_flat/ivf_flat_build_float_int64_t.cpp deleted file mode 100644 index 177aaac11..000000000 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_build_float_int64_t.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by generate_ivf_flat.py - * - * Make changes there and run in this directory: - * - * > python generate_ivf_flat.py - * - */ - -#include -#include - -namespace cuvs::neighbors::ivf_flat { - -#define CUVS_INST_IVF_FLAT_BUILD(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index( \ - std::move(raft::runtime::neighbors::ivf_flat::build(handle, params, dataset))); \ - } \ - \ - void build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::device_matrix_view dataset, \ - cuvs::neighbors::ivf_flat::index& idx) \ - { \ - raft::runtime::neighbors::ivf_flat::build(handle, params, dataset, *idx.get_raft_index()); \ - } -CUVS_INST_IVF_FLAT_BUILD(float, int64_t); - -#undef CUVS_INST_IVF_FLAT_BUILD - -} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build_float_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_build_float_int64_t.cu new file mode 100644 index 000000000..56bb71094 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build_float_int64_t.cu @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_ivf_flat.py + * + * Make changes there and run in this directory: + * + * > python generate_ivf_flat.py + * + */ + +#include + +#include "ivf_flat_build.cuh" + +namespace cuvs::neighbors::ivf_flat { + +#define CUVS_INST_IVF_FLAT_BUILD(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::device_matrix_view dataset) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index( \ + std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::device_matrix_view dataset, \ + cuvs::neighbors::ivf_flat::index& idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ + } \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::host_matrix_view dataset) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index( \ + std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::host_matrix_view dataset, \ + cuvs::neighbors::ivf_flat::index& idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ + } +CUVS_INST_IVF_FLAT_BUILD(float, int64_t); + +#undef CUVS_INST_IVF_FLAT_BUILD + +} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build_int8_t_int64_t.cpp b/cpp/src/neighbors/ivf_flat/ivf_flat_build_int8_t_int64_t.cpp deleted file mode 100644 index 6fe6e2b8d..000000000 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_build_int8_t_int64_t.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by generate_ivf_flat.py - * - * Make changes there and run in this directory: - * - * > python generate_ivf_flat.py - * - */ - -#include -#include - -namespace cuvs::neighbors::ivf_flat { - -#define CUVS_INST_IVF_FLAT_BUILD(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index( \ - std::move(raft::runtime::neighbors::ivf_flat::build(handle, params, dataset))); \ - } \ - \ - void build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::device_matrix_view dataset, \ - cuvs::neighbors::ivf_flat::index& idx) \ - { \ - raft::runtime::neighbors::ivf_flat::build(handle, params, dataset, *idx.get_raft_index()); \ - } -CUVS_INST_IVF_FLAT_BUILD(int8_t, int64_t); - -#undef CUVS_INST_IVF_FLAT_BUILD - -} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_build_int8_t_int64_t.cu new file mode 100644 index 000000000..4803868c0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build_int8_t_int64_t.cu @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_ivf_flat.py + * + * Make changes there and run in this directory: + * + * > python generate_ivf_flat.py + * + */ + +#include + +#include "ivf_flat_build.cuh" + +namespace cuvs::neighbors::ivf_flat { + +#define CUVS_INST_IVF_FLAT_BUILD(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::device_matrix_view dataset) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index( \ + std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::device_matrix_view dataset, \ + cuvs::neighbors::ivf_flat::index& idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ + } \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::host_matrix_view dataset) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index( \ + std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::host_matrix_view dataset, \ + cuvs::neighbors::ivf_flat::index& idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ + } +CUVS_INST_IVF_FLAT_BUILD(int8_t, int64_t); + +#undef CUVS_INST_IVF_FLAT_BUILD + +} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build_uint8_t_int64_t.cpp b/cpp/src/neighbors/ivf_flat/ivf_flat_build_uint8_t_int64_t.cpp deleted file mode 100644 index 01098ed45..000000000 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_build_uint8_t_int64_t.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by generate_ivf_flat.py - * - * Make changes there and run in this directory: - * - * > python generate_ivf_flat.py - * - */ - -#include -#include - -namespace cuvs::neighbors::ivf_flat { - -#define CUVS_INST_IVF_FLAT_BUILD(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index( \ - std::move(raft::runtime::neighbors::ivf_flat::build(handle, params, dataset))); \ - } \ - \ - void build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_flat::index_params& params, \ - raft::device_matrix_view dataset, \ - cuvs::neighbors::ivf_flat::index& idx) \ - { \ - raft::runtime::neighbors::ivf_flat::build(handle, params, dataset, *idx.get_raft_index()); \ - } -CUVS_INST_IVF_FLAT_BUILD(uint8_t, int64_t); - -#undef CUVS_INST_IVF_FLAT_BUILD - -} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_build_uint8_t_int64_t.cu new file mode 100644 index 000000000..e087f94c4 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build_uint8_t_int64_t.cu @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_ivf_flat.py + * + * Make changes there and run in this directory: + * + * > python generate_ivf_flat.py + * + */ + +#include + +#include "ivf_flat_build.cuh" + +namespace cuvs::neighbors::ivf_flat { + +#define CUVS_INST_IVF_FLAT_BUILD(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::device_matrix_view dataset) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index( \ + std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::device_matrix_view dataset, \ + cuvs::neighbors::ivf_flat::index& idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ + } \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::host_matrix_view dataset) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index( \ + std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_flat::index_params& params, \ + raft::host_matrix_view dataset, \ + cuvs::neighbors::ivf_flat::index& idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset, idx); \ + } +CUVS_INST_IVF_FLAT_BUILD(uint8_t, int64_t); + +#undef CUVS_INST_IVF_FLAT_BUILD + +} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_extend_float_int64_t.cpp b/cpp/src/neighbors/ivf_flat/ivf_flat_extend_float_int64_t.cpp deleted file mode 100644 index 04ca3a50f..000000000 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_extend_float_int64_t.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by generate_ivf_flat.py - * - * Make changes there and run in this directory: - * - * > python generate_ivf_flat.py - * - */ - -#include -#include - -namespace cuvs::neighbors::ivf_flat { - -#define CUVS_INST_IVF_FLAT_EXTEND(T, IdxT) \ - auto extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - const cuvs::neighbors::ivf_flat::index& orig_index) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index( \ - std::move(raft::runtime::neighbors::ivf_flat::extend( \ - handle, new_vectors, new_indices, *orig_index.get_raft_index()))); \ - } \ - \ - void extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - cuvs::neighbors::ivf_flat::index* idx) \ - { \ - raft::runtime::neighbors::ivf_flat::extend( \ - handle, new_vectors, new_indices, idx->get_raft_index()); \ - } -CUVS_INST_IVF_FLAT_EXTEND(float, int64_t); - -#undef CUVS_INST_IVF_FLAT_EXTEND - -} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_extend_float_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_extend_float_int64_t.cu new file mode 100644 index 000000000..2636067bf --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_extend_float_int64_t.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_ivf_flat.py + * + * Make changes there and run in this directory: + * + * > python generate_ivf_flat.py + * + */ + +#include + +#include "ivf_flat_build.cuh" + +namespace cuvs::neighbors::ivf_flat { + +#define CUVS_INST_IVF_FLAT_EXTEND(T, IdxT) \ + auto extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_flat::index& orig_index) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index(std::move( \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_flat::index* idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ + } \ + auto extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_flat::index& orig_index) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index(std::move( \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_flat::index* idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ + } +CUVS_INST_IVF_FLAT_EXTEND(float, int64_t); + +#undef CUVS_INST_IVF_FLAT_EXTEND + +} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_extend_int8_t_int64_t.cpp b/cpp/src/neighbors/ivf_flat/ivf_flat_extend_int8_t_int64_t.cpp deleted file mode 100644 index accc53e04..000000000 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_extend_int8_t_int64_t.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by generate_ivf_flat.py - * - * Make changes there and run in this directory: - * - * > python generate_ivf_flat.py - * - */ - -#include -#include - -namespace cuvs::neighbors::ivf_flat { - -#define CUVS_INST_IVF_FLAT_EXTEND(T, IdxT) \ - auto extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - const cuvs::neighbors::ivf_flat::index& orig_index) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index( \ - std::move(raft::runtime::neighbors::ivf_flat::extend( \ - handle, new_vectors, new_indices, *orig_index.get_raft_index()))); \ - } \ - \ - void extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - cuvs::neighbors::ivf_flat::index* idx) \ - { \ - raft::runtime::neighbors::ivf_flat::extend( \ - handle, new_vectors, new_indices, idx->get_raft_index()); \ - } -CUVS_INST_IVF_FLAT_EXTEND(int8_t, int64_t); - -#undef CUVS_INST_IVF_FLAT_EXTEND - -} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_extend_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_extend_int8_t_int64_t.cu new file mode 100644 index 000000000..191cb9f39 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_extend_int8_t_int64_t.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_ivf_flat.py + * + * Make changes there and run in this directory: + * + * > python generate_ivf_flat.py + * + */ + +#include + +#include "ivf_flat_build.cuh" + +namespace cuvs::neighbors::ivf_flat { + +#define CUVS_INST_IVF_FLAT_EXTEND(T, IdxT) \ + auto extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_flat::index& orig_index) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index(std::move( \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_flat::index* idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ + } \ + auto extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_flat::index& orig_index) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index(std::move( \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_flat::index* idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ + } +CUVS_INST_IVF_FLAT_EXTEND(int8_t, int64_t); + +#undef CUVS_INST_IVF_FLAT_EXTEND + +} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_extend_uint8_t_int64_t.cpp b/cpp/src/neighbors/ivf_flat/ivf_flat_extend_uint8_t_int64_t.cpp deleted file mode 100644 index e44ae51b1..000000000 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_extend_uint8_t_int64_t.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by generate_ivf_flat.py - * - * Make changes there and run in this directory: - * - * > python generate_ivf_flat.py - * - */ - -#include -#include - -namespace cuvs::neighbors::ivf_flat { - -#define CUVS_INST_IVF_FLAT_EXTEND(T, IdxT) \ - auto extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - const cuvs::neighbors::ivf_flat::index& orig_index) \ - ->cuvs::neighbors::ivf_flat::index \ - { \ - return cuvs::neighbors::ivf_flat::index( \ - std::move(raft::runtime::neighbors::ivf_flat::extend( \ - handle, new_vectors, new_indices, *orig_index.get_raft_index()))); \ - } \ - \ - void extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - cuvs::neighbors::ivf_flat::index* idx) \ - { \ - raft::runtime::neighbors::ivf_flat::extend( \ - handle, new_vectors, new_indices, idx->get_raft_index()); \ - } -CUVS_INST_IVF_FLAT_EXTEND(uint8_t, int64_t); - -#undef CUVS_INST_IVF_FLAT_EXTEND - -} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_extend_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_extend_uint8_t_int64_t.cu new file mode 100644 index 000000000..29b7e7b69 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_extend_uint8_t_int64_t.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_ivf_flat.py + * + * Make changes there and run in this directory: + * + * > python generate_ivf_flat.py + * + */ + +#include + +#include "ivf_flat_build.cuh" + +namespace cuvs::neighbors::ivf_flat { + +#define CUVS_INST_IVF_FLAT_EXTEND(T, IdxT) \ + auto extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_flat::index& orig_index) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index(std::move( \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_flat::index* idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ + } \ + auto extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_flat::index& orig_index) \ + ->cuvs::neighbors::ivf_flat::index \ + { \ + return cuvs::neighbors::ivf_flat::index(std::move( \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_flat::index* idx) \ + { \ + cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, idx); \ + } +CUVS_INST_IVF_FLAT_EXTEND(uint8_t, int64_t); + +#undef CUVS_INST_IVF_FLAT_EXTEND + +} // namespace cuvs::neighbors::ivf_flat diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_helpers.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_helpers.cu new file mode 100644 index 000000000..be1eb87a0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_helpers.cu @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "ivf_flat_helpers.cuh" +#include + +namespace cuvs::neighbors::ivf_flat::helpers { +namespace codepacker { +namespace detail { +template +void pack( + raft::resources const& res, + raft::device_matrix_view codes, + uint32_t veclen, + uint32_t offset, + raft::device_mdspan::list_extents, raft::row_major> + list_data) +{ + pack_list_data(res, codes, veclen, offset, list_data); +} + +template +void unpack( + raft::resources const& res, + raft::device_mdspan::list_extents, raft::row_major> + list_data, + uint32_t veclen, + uint32_t offset, + raft::device_matrix_view codes) +{ + unpack_list_data(res, list_data, veclen, offset, codes); +} + +template +void pack_1(const T* flat_code, T* block, uint32_t dim, uint32_t veclen, uint32_t offset) +{ + // The data is written in interleaved groups of `index::kGroupSize` vectors + using interleaved_group = cuvs::neighbors::detail::div_utils; + + // Interleave dimensions of the source vector while recording it. + // NB: such `veclen` is selected, that `dim % veclen == 0` + auto group_offset = interleaved_group::roundDown(offset); + auto ingroup_id = interleaved_group::mod(offset) * veclen; + + for (uint32_t l = 0; l < dim; l += veclen) { + for (uint32_t j = 0; j < veclen; j++) { + block[group_offset * dim + l * kIndexGroupSize + ingroup_id + j] = flat_code[l + j]; + } + } +} + +template +void unpack_1(const T* block, T* flat_code, uint32_t dim, uint32_t veclen, uint32_t offset) +{ + // The data is written in interleaved groups of `index::kGroupSize` vectors + using interleaved_group = cuvs::neighbors::detail::div_utils; + + // NB: such `veclen` is selected, that `dim % veclen == 0` + auto group_offset = interleaved_group::roundDown(offset); + auto ingroup_id = interleaved_group::mod(offset) * veclen; + + for (uint32_t l = 0; l < dim; l += veclen) { + for (uint32_t j = 0; j < veclen; j++) { + flat_code[l + j] = block[group_offset * dim + l * kIndexGroupSize + ingroup_id + j]; + } + } +} +} // namespace detail + +void pack(raft::resources const& res, + raft::device_matrix_view codes, + uint32_t veclen, + uint32_t offset, + raft::device_mdspan::list_extents, + raft::row_major> list_data) +{ + detail::pack(res, codes, veclen, offset, list_data); +} + +void pack(raft::resources const& res, + raft::device_matrix_view codes, + uint32_t veclen, + uint32_t offset, + raft::device_mdspan::list_extents, + raft::row_major> list_data) +{ + detail::pack(res, codes, veclen, offset, list_data); +} + +void pack(raft::resources const& res, + raft::device_matrix_view codes, + uint32_t veclen, + uint32_t offset, + raft::device_mdspan::list_extents, + raft::row_major> list_data) +{ + detail::pack(res, codes, veclen, offset, list_data); +} + +void unpack(raft::resources const& res, + raft::device_mdspan::list_extents, + raft::row_major> list_data, + uint32_t veclen, + uint32_t offset, + raft::device_matrix_view codes) +{ + detail::unpack(res, list_data, veclen, offset, codes); +} + +void unpack(raft::resources const& res, + raft::device_mdspan::list_extents, + raft::row_major> list_data, + uint32_t veclen, + uint32_t offset, + raft::device_matrix_view codes) +{ + detail::unpack(res, list_data, veclen, offset, codes); +} + +void unpack(raft::resources const& res, + raft::device_mdspan::list_extents, + raft::row_major> list_data, + uint32_t veclen, + uint32_t offset, + raft::device_matrix_view codes) +{ + detail::unpack(res, list_data, veclen, offset, codes); +} + +void pack_1(const float* flat_code, float* block, uint32_t dim, uint32_t veclen, uint32_t offset) +{ + detail::pack_1(flat_code, block, dim, veclen, offset); +} + +void pack_1(const int8_t* flat_code, int8_t* block, uint32_t dim, uint32_t veclen, uint32_t offset) +{ + detail::pack_1(flat_code, block, dim, veclen, offset); +} + +void pack_1( + const uint8_t* flat_code, uint8_t* block, uint32_t dim, uint32_t veclen, uint32_t offset) +{ + detail::pack_1(flat_code, block, dim, veclen, offset); +} + +void unpack_1(const float* block, float* flat_code, uint32_t dim, uint32_t veclen, uint32_t offset) +{ + detail::unpack_1(block, flat_code, dim, veclen, offset); +} + +void unpack_1( + const int8_t* block, int8_t* flat_code, uint32_t dim, uint32_t veclen, uint32_t offset) +{ + detail::unpack_1(block, flat_code, dim, veclen, offset); +} + +void unpack_1( + const uint8_t* block, uint8_t* flat_code, uint32_t dim, uint32_t veclen, uint32_t offset) +{ + detail::unpack_1(block, flat_code, dim, veclen, offset); +} + +} // namespace codepacker + +namespace detail { + +template +void reset_index(const raft::resources& res, index* idx) +{ + auto stream = raft::resource::get_cuda_stream(res); + + cuvs::spatial::knn::detail::utils::memzero( + idx->accum_sorted_sizes().data_handle(), idx->accum_sorted_sizes().size(), stream); + cuvs::spatial::knn::detail::utils::memzero( + idx->list_sizes().data_handle(), idx->list_sizes().size(), stream); + cuvs::spatial::knn::detail::utils::memzero( + idx->data_ptrs().data_handle(), idx->data_ptrs().size(), stream); + cuvs::spatial::knn::detail::utils::memzero( + idx->inds_ptrs().data_handle(), idx->inds_ptrs().size(), stream); +} + +} // namespace detail + +void reset_index(const raft::resources& res, index* index) +{ + detail::reset_index(res, index); +} + +/** + * @brief Public helper API to reset the data and indices ptrs, and the list sizes. Useful for + * externally modifying the index without going through the build stage. The data and indices of the + * IVF lists will be lost. + * + * Usage example: + * @code{.cpp} + * raft::resources res; + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_flat::index_params index_params; + * // initialize an empty index + * ivf_flat::index index(res, index_params, D); + * // reset the index's state and list sizes + * ivf_flat::helpers::reset_index(res, &index); + * @endcode + * + * @param[in] res raft resource + * @param[inout] index pointer to IVF-Flat index + */ +void reset_index(const raft::resources& res, index* index) +{ + detail::reset_index(res, index); +} + +/** + * @brief Public helper API to reset the data and indices ptrs, and the list sizes. Useful for + * externally modifying the index without going through the build stage. The data and indices of the + * IVF lists will be lost. + * + * Usage example: + * @code{.cpp} + * raft::resources res; + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_flat::index_params index_params; + * // initialize an empty index + * ivf_flat::index index(res, index_params, D); + * // reset the index's state and list sizes + * ivf_flat::helpers::reset_index(res, &index); + * @endcode + * + * @param[in] res raft resource + * @param[inout] index pointer to IVF-Flat index + */ +void reset_index(const raft::resources& res, index* index) +{ + detail::reset_index(res, index); +} + +} // namespace cuvs::neighbors::ivf_flat::helpers diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_helpers.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_helpers.cuh new file mode 100644 index 000000000..f76b5e95f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_helpers.cuh @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "../detail/ann_utils.cuh" + +#include "../detail/div_utils.cuh" +#include +#include +#include +#include + +namespace cuvs::neighbors::ivf_flat::helpers::codepacker { + +namespace { + +template +__device__ void pack_1(const T* flat_code, T* block, uint32_t dim, uint32_t veclen, uint32_t offset) +{ + // The data is written in interleaved groups of `index::kGroupSize` vectors + using interleaved_group = cuvs::neighbors::detail::div_utils; + + // Interleave dimensions of the source vector while recording it. + // NB: such `veclen` is selected, that `dim % veclen == 0` + auto group_offset = interleaved_group::roundDown(offset); + auto ingroup_id = interleaved_group::mod(offset) * veclen; + + for (uint32_t l = 0; l < dim; l += veclen) { + for (uint32_t j = 0; j < veclen; j++) { + block[group_offset * dim + l * kIndexGroupSize + ingroup_id + j] = flat_code[l + j]; + } + } +} + +template +__device__ void unpack_1( + const T* block, T* flat_code, uint32_t dim, uint32_t veclen, uint32_t offset) +{ + // The data is written in interleaved groups of `index::kGroupSize` vectors + using interleaved_group = cuvs::neighbors::detail::div_utils; + + // NB: such `veclen` is selected, that `dim % veclen == 0` + auto group_offset = interleaved_group::roundDown(offset); + auto ingroup_id = interleaved_group::mod(offset) * veclen; + + for (uint32_t l = 0; l < dim; l += veclen) { + for (uint32_t j = 0; j < veclen; j++) { + flat_code[l + j] = block[group_offset * dim + l * kIndexGroupSize + ingroup_id + j]; + } + } +} + +template +RAFT_KERNEL pack_interleaved_list_kernel(const T* codes, + T* list_data, + uint32_t n_rows, + uint32_t dim, + uint32_t veclen, + std::variant offset_or_indices) +{ + uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; + const uint32_t dst_ix = std::holds_alternative(offset_or_indices) + ? std::get(offset_or_indices) + tid + : std::get(offset_or_indices)[tid]; + if (tid < n_rows) { pack_1(codes + tid * dim, list_data, dim, veclen, dst_ix); } +} + +template +RAFT_KERNEL unpack_interleaved_list_kernel( + const T* list_data, + T* codes, + uint32_t n_rows, + uint32_t dim, + uint32_t veclen, + std::variant offset_or_indices) +{ + uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x; + const uint32_t src_ix = std::holds_alternative(offset_or_indices) + ? std::get(offset_or_indices) + tid + : std::get(offset_or_indices)[tid]; + if (tid < n_rows) { unpack_1(list_data, codes + tid * dim, dim, veclen, src_ix); } +} + +template +void pack_list_data( + raft::resources const& res, + raft::device_matrix_view codes, + uint32_t veclen, + std::variant offset_or_indices, + raft::device_mdspan::list_extents, raft::row_major> + list_data) +{ + uint32_t n_rows = codes.extent(0); + uint32_t dim = codes.extent(1); + if (n_rows == 0 || dim == 0) return; + static constexpr uint32_t kBlockSize = 256; + dim3 blocks(raft::div_rounding_up_safe(n_rows, kBlockSize), 1, 1); + dim3 threads(kBlockSize, 1, 1); + auto stream = raft::resource::get_cuda_stream(res); + pack_interleaved_list_kernel<<>>( + codes.data_handle(), list_data.data_handle(), n_rows, dim, veclen, offset_or_indices); + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +template +void unpack_list_data( + raft::resources const& res, + raft::device_mdspan::list_extents, raft::row_major> + list_data, + uint32_t veclen, + std::variant offset_or_indices, + raft::device_matrix_view codes) +{ + uint32_t n_rows = codes.extent(0); + uint32_t dim = codes.extent(1); + if (n_rows == 0 || dim == 0) return; + static constexpr uint32_t kBlockSize = 256; + dim3 blocks(raft::div_rounding_up_safe(n_rows, kBlockSize), 1, 1); + dim3 threads(kBlockSize, 1, 1); + auto stream = raft::resource::get_cuda_stream(res); + unpack_interleaved_list_kernel<<>>( + list_data.data_handle(), codes.data_handle(), n_rows, dim, veclen, offset_or_indices); + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +} // namespace +} // namespace cuvs::neighbors::ivf_flat::helpers::codepacker diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh new file mode 100644 index 000000000..d63059457 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh @@ -0,0 +1,1201 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../ivf_common.cuh" +#include "../sample_filter.cuh" +#include +#include + +#include // RAFT_LOG_TRACE +#include +#include +#include +#include +#include // RAFT_CUDA_TRY +#include +#include +#include +#include + +#include + +namespace cuvs::neighbors::ivf_flat::detail { + +using namespace raft::spatial::knn::detail; // NOLINT + +constexpr int kThreadsPerBlock = 128; + +auto RAFT_WEAK_FUNCTION is_local_topk_feasible(uint32_t k) -> bool +{ + return k <= raft::matrix::detail::select::warpsort::kMaxCapacity; +} + +/** + * @brief Copy `n` elements per block from one place to another. + * + * @param[out] out target pointer (unique per block) + * @param[in] in source pointer + * @param n number of elements to copy + */ +template +__device__ inline void copy_vectorized(T* out, const T* in, uint32_t n) +{ + constexpr int VecElems = VecBytes / sizeof(T); // NOLINT + using align_bytes = raft::Pow2<(size_t)VecBytes>; + if constexpr (VecElems > 1) { + using align_elems = raft::Pow2; + if (!align_bytes::areSameAlignOffsets(out, in)) { + return copy_vectorized<(VecBytes >> 1), T>(out, in, n); + } + { // process unaligned head + uint32_t head = align_bytes::roundUp(in) - in; + if (head > 0) { + copy_vectorized(out, in, head); + n -= head; + in += head; + out += head; + } + } + { // process main part vectorized + using vec_t = typename raft::IOType::Type; + copy_vectorized( + reinterpret_cast(out), reinterpret_cast(in), align_elems::div(n)); + } + { // process unaligned tail + uint32_t tail = align_elems::mod(n); + if (tail > 0) { + n -= tail; + copy_vectorized(out + n, in + n, tail); + } + } + } + if constexpr (VecElems <= 1) { + for (int i = threadIdx.x; i < n; i += blockDim.x) { + out[i] = in[i]; + } + } +} + +/** + * @brief Load a part of a vector from the index and from query, compute the (part of the) distance + * between them, and aggregate it using the provided Lambda; one structure per thread, per query, + * and per index item. + * + * @tparam kUnroll elements per loop (normally, kUnroll = WarpSize / Veclen) + * @tparam Lambda computing the part of the distance for one dimension and aggregating it: + * void (AccT& acc, AccT x, AccT y) + * @tparam Veclen size of the vectorized load + * @tparam T type of the data in the query and the index + * @tparam AccT type of the accumulated value (an optimization for 8bit values to be loaded as 32bit + * values) + */ +template +struct loadAndComputeDist { + Lambda compute_dist; + AccT& dist; + + __device__ __forceinline__ loadAndComputeDist(AccT& dist, Lambda op) + : dist(dist), compute_dist(op) + { + } + + /** + * Load parts of vectors from the index and query and accumulates the partial distance. + * This version assumes the query is stored in shared memory. + * Every thread here processes exactly kUnroll * Veclen elements independently of others. + */ + template + __device__ __forceinline__ void runLoadShmemCompute(const T* const& data, + const T* query_shared, + IdxT loadIndex, + IdxT shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + T encV[Veclen]; + raft::ldg(encV, data + (loadIndex + j * kIndexGroupSize) * Veclen); + T queryRegs[Veclen]; + raft::lds(queryRegs, &query_shared[shmemIndex + j * Veclen]); +#pragma unroll + for (int k = 0; k < Veclen; ++k) { + compute_dist(dist, queryRegs[k], encV[k]); + } + } + } + + /** + * Load parts of vectors from the index and query and accumulates the partial distance. + * This version assumes the query is stored in the global memory and is different for every + * thread. One warp loads exactly WarpSize query elements at once and then reshuffles them into + * corresponding threads (`WarpSize / (kUnroll * Veclen)` elements per thread at once). + */ + template + __device__ __forceinline__ void runLoadShflAndCompute(const T*& data, + const T* query, + IdxT baseLoadIndex, + const int lane_id) + { + T queryReg = query[baseLoadIndex + lane_id]; + constexpr int stride = kUnroll * Veclen; + constexpr int totalIter = raft::WarpSize / stride; + constexpr int gmemStride = stride * kIndexGroupSize; +#pragma unroll + for (int i = 0; i < totalIter; ++i, data += gmemStride) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + T encV[Veclen]; + raft::ldg(encV, data + (lane_id + j * kIndexGroupSize) * Veclen); + const int d = (i * kUnroll + j) * Veclen; +#pragma unroll + for (int k = 0; k < Veclen; ++k) { + compute_dist(dist, raft::shfl(queryReg, d + k, raft::WarpSize), encV[k]); + } + } + } + } + + /** + * Load parts of vectors from the index and query and accumulates the partial distance. + * This version augments `runLoadShflAndCompute` when `dim` is not a multiple of `WarpSize`. + */ + __device__ __forceinline__ void runLoadShflAndComputeRemainder( + const T*& data, const T* query, const int lane_id, const int dim, const int dimBlocks) + { + const int loadDim = dimBlocks + lane_id; + T queryReg = loadDim < dim ? query[loadDim] : T{0}; + const int loadDataIdx = lane_id * Veclen; + for (int d = 0; d < dim - dimBlocks; d += Veclen, data += kIndexGroupSize * Veclen) { + T enc[Veclen]; + raft::ldg(enc, data + loadDataIdx); +#pragma unroll + for (int k = 0; k < Veclen; k++) { + compute_dist(dist, raft::shfl(queryReg, d + k, raft::WarpSize), enc[k]); + } + } + } +}; + +// This handles uint8_t 8, 16 Veclens +template +struct loadAndComputeDist { + Lambda compute_dist; + uint32_t& dist; + + __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, Lambda op) + : dist(dist), compute_dist(op) + { + } + + __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data, + const uint8_t* query_shared, + int loadIndex, + int shmemIndex) + { + constexpr int veclen_int = uint8_veclen / 4; // converting uint8_t veclens to int + loadIndex = loadIndex * veclen_int; +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV[veclen_int]; + raft::ldg( + encV, + reinterpret_cast(data) + loadIndex + j * kIndexGroupSize * veclen_int); + uint32_t queryRegs[veclen_int]; + raft::lds(queryRegs, + reinterpret_cast(query_shared + shmemIndex) + j * veclen_int); +#pragma unroll + for (int k = 0; k < veclen_int; k++) { + compute_dist(dist, queryRegs[k], encV[k]); + } + } + } + __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data, + const uint8_t* query, + int baseLoadIndex, + const int lane_id) + { + constexpr int veclen_int = uint8_veclen / 4; // converting uint8_t veclens to int + uint32_t queryReg = + (lane_id < 8) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; + constexpr int stride = kUnroll * uint8_veclen; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV[veclen_int]; + raft::ldg( + encV, + reinterpret_cast(data) + (lane_id + j * kIndexGroupSize) * veclen_int); + const int d = (i * kUnroll + j) * veclen_int; +#pragma unroll + for (int k = 0; k < veclen_int; ++k) { + compute_dist(dist, raft::shfl(queryReg, d + k, raft::WarpSize), encV[k]); + } + } + } + } + + __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data, + const uint8_t* query, + const int lane_id, + const int dim, + const int dimBlocks) + { + constexpr int veclen_int = uint8_veclen / 4; + const int loadDim = dimBlocks + lane_id * 4; // Here 4 is for 1 - int + uint32_t queryReg = loadDim < dim ? reinterpret_cast(query + loadDim)[0] : 0; + for (int d = 0; d < dim - dimBlocks; + d += uint8_veclen, data += kIndexGroupSize * uint8_veclen) { + uint32_t enc[veclen_int]; + raft::ldg(enc, reinterpret_cast(data) + lane_id * veclen_int); +#pragma unroll + for (int k = 0; k < veclen_int; k++) { + uint32_t q = raft::shfl(queryReg, (d / 4) + k, raft::WarpSize); + compute_dist(dist, q, enc[k]); + } + } + } +}; + +// Keep this specialized uint8 Veclen = 4, because compiler is generating suboptimal code while +// using above common template of int2/int4 +template +struct loadAndComputeDist { + Lambda compute_dist; + uint32_t& dist; + + __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, Lambda op) + : dist(dist), compute_dist(op) + { + } + + __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data, + const uint8_t* query_shared, + int loadIndex, + int shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; + uint32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; + compute_dist(dist, queryRegs, encV); + } + } + __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data, + const uint8_t* query, + int baseLoadIndex, + const int lane_id) + { + uint32_t queryReg = + (lane_id < 8) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; + constexpr int veclen = 4; + constexpr int stride = kUnroll * veclen; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; + uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); + compute_dist(dist, q, encV); + } + } + } + + __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data, + const uint8_t* query, + const int lane_id, + const int dim, + const int dimBlocks) + { + constexpr int veclen = 4; + const int loadDim = dimBlocks + lane_id; + uint32_t queryReg = loadDim < dim ? reinterpret_cast(query)[loadDim] : 0; + for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { + uint32_t enc = reinterpret_cast(data)[lane_id]; + uint32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); + compute_dist(dist, q, enc); + } + } +}; + +template +struct loadAndComputeDist { + Lambda compute_dist; + uint32_t& dist; + + __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, Lambda op) + : dist(dist), compute_dist(op) + { + } + + __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data, + const uint8_t* query_shared, + int loadIndex, + int shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; + uint32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; + compute_dist(dist, queryRegs, encV); + } + } + + __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data, + const uint8_t* query, + int baseLoadIndex, + const int lane_id) + { + uint32_t queryReg = + (lane_id < 16) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; + constexpr int veclen = 2; + constexpr int stride = kUnroll * veclen; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; + uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); + compute_dist(dist, q, encV); + } + } + } + + __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data, + const uint8_t* query, + const int lane_id, + const int dim, + const int dimBlocks) + { + constexpr int veclen = 2; + int loadDim = dimBlocks + lane_id * veclen; + uint32_t queryReg = loadDim < dim ? reinterpret_cast(query + loadDim)[0] : 0; + for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { + uint32_t enc = reinterpret_cast(data)[lane_id]; + uint32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); + compute_dist(dist, q, enc); + } + } +}; + +template +struct loadAndComputeDist { + Lambda compute_dist; + uint32_t& dist; + + __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, Lambda op) + : dist(dist), compute_dist(op) + { + } + + __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data, + const uint8_t* query_shared, + int loadIndex, + int shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = data[loadIndex + j * kIndexGroupSize]; + uint32_t queryRegs = query_shared[shmemIndex + j]; + compute_dist(dist, queryRegs, encV); + } + } + + __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data, + const uint8_t* query, + int baseLoadIndex, + const int lane_id) + { + uint32_t queryReg = query[baseLoadIndex + lane_id]; + constexpr int veclen = 1; + constexpr int stride = kUnroll * veclen; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + uint32_t encV = data[lane_id + j * kIndexGroupSize]; + uint32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); + compute_dist(dist, q, encV); + } + } + } + + __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data, + const uint8_t* query, + const int lane_id, + const int dim, + const int dimBlocks) + { + constexpr int veclen = 1; + int loadDim = dimBlocks + lane_id; + uint32_t queryReg = loadDim < dim ? query[loadDim] : 0; + for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { + uint32_t enc = data[lane_id]; + uint32_t q = raft::shfl(queryReg, d, raft::WarpSize); + compute_dist(dist, q, enc); + } + } +}; + +// This device function is for int8 veclens 4, 8 and 16 +template +struct loadAndComputeDist { + Lambda compute_dist; + int32_t& dist; + + __device__ __forceinline__ loadAndComputeDist(int32_t& dist, Lambda op) + : dist(dist), compute_dist(op) + { + } + + __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data, + const int8_t* query_shared, + int loadIndex, + int shmemIndex) + { + constexpr int veclen_int = int8_veclen / 4; // converting int8_t veclens to int + +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + int32_t encV[veclen_int]; + raft::ldg( + encV, + reinterpret_cast(data) + (loadIndex + j * kIndexGroupSize) * veclen_int); + int32_t queryRegs[veclen_int]; + raft::lds(queryRegs, + reinterpret_cast(query_shared + shmemIndex) + j * veclen_int); +#pragma unroll + for (int k = 0; k < veclen_int; k++) { + compute_dist(dist, queryRegs[k], encV[k]); + } + } + } + + __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data, + const int8_t* query, + int baseLoadIndex, + const int lane_id) + { + constexpr int veclen_int = int8_veclen / 4; // converting int8_t veclens to int + + int32_t queryReg = + (lane_id < 8) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; + constexpr int stride = kUnroll * int8_veclen; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + int32_t encV[veclen_int]; + raft::ldg( + encV, + reinterpret_cast(data) + (lane_id + j * kIndexGroupSize) * veclen_int); + const int d = (i * kUnroll + j) * veclen_int; +#pragma unroll + for (int k = 0; k < veclen_int; ++k) { + int32_t q = raft::shfl(queryReg, d + k, raft::WarpSize); + compute_dist(dist, q, encV[k]); + } + } + } + } + + __device__ __forceinline__ void runLoadShflAndComputeRemainder( + const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks) + { + constexpr int veclen_int = int8_veclen / 4; + const int loadDim = dimBlocks + lane_id * 4; // Here 4 is for 1 - int; + int32_t queryReg = loadDim < dim ? reinterpret_cast(query + loadDim)[0] : 0; + for (int d = 0; d < dim - dimBlocks; d += int8_veclen, data += kIndexGroupSize * int8_veclen) { + int32_t enc[veclen_int]; + raft::ldg(enc, reinterpret_cast(data) + lane_id * veclen_int); +#pragma unroll + for (int k = 0; k < veclen_int; k++) { + int32_t q = raft::shfl(queryReg, (d / 4) + k, raft::WarpSize); // Here 4 is for 1 - int; + compute_dist(dist, q, enc[k]); + } + } + } +}; + +template +struct loadAndComputeDist { + Lambda compute_dist; + int32_t& dist; + __device__ __forceinline__ loadAndComputeDist(int32_t& dist, Lambda op) + : dist(dist), compute_dist(op) + { + } + __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data, + const int8_t* query_shared, + int loadIndex, + int shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + int32_t encV = reinterpret_cast(data)[loadIndex + j * kIndexGroupSize]; + int32_t queryRegs = reinterpret_cast(query_shared + shmemIndex)[j]; + compute_dist(dist, queryRegs, encV); + } + } + + __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data, + const int8_t* query, + int baseLoadIndex, + const int lane_id) + { + int32_t queryReg = + (lane_id < 16) ? reinterpret_cast(query + baseLoadIndex)[lane_id] : 0; + constexpr int veclen = 2; + constexpr int stride = kUnroll * veclen; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + int32_t encV = reinterpret_cast(data)[lane_id + j * kIndexGroupSize]; + int32_t q = raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize); + compute_dist(dist, q, encV); + } + } + } + + __device__ __forceinline__ void runLoadShflAndComputeRemainder( + const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks) + { + constexpr int veclen = 2; + int loadDim = dimBlocks + lane_id * veclen; + int32_t queryReg = loadDim < dim ? reinterpret_cast(query + loadDim)[0] : 0; + for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { + int32_t enc = reinterpret_cast(data + lane_id * veclen)[0]; + int32_t q = raft::shfl(queryReg, d / veclen, raft::WarpSize); + compute_dist(dist, q, enc); + } + } +}; + +template +struct loadAndComputeDist { + Lambda compute_dist; + int32_t& dist; + __device__ __forceinline__ loadAndComputeDist(int32_t& dist, Lambda op) + : dist(dist), compute_dist(op) + { + } + + __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data, + const int8_t* query_shared, + int loadIndex, + int shmemIndex) + { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + compute_dist(dist, query_shared[shmemIndex + j], data[loadIndex + j * kIndexGroupSize]); + } + } + + __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data, + const int8_t* query, + int baseLoadIndex, + const int lane_id) + { + constexpr int veclen = 1; + constexpr int stride = kUnroll * veclen; + int32_t queryReg = query[baseLoadIndex + lane_id]; + +#pragma unroll + for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) { +#pragma unroll + for (int j = 0; j < kUnroll; ++j) { + compute_dist(dist, + raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize), + data[lane_id + j * kIndexGroupSize]); + } + } + } + __device__ __forceinline__ void runLoadShflAndComputeRemainder( + const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks) + { + constexpr int veclen = 1; + const int loadDim = dimBlocks + lane_id; + int32_t queryReg = loadDim < dim ? query[loadDim] : 0; + for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) { + compute_dist(dist, raft::shfl(queryReg, d, raft::WarpSize), data[lane_id]); + } + } +}; + +// switch to dummy blocksort when Capacity is 0 this explicit dummy is chosen +// to support access to warpsort constants like ::queue_t::kDummy +template +struct flat_block_sort { + using type = raft::matrix::detail::select::warpsort::block_sort< + raft::matrix::detail::select::warpsort::warp_sort_filtered, + Capacity, + Ascending, + T, + IdxT>; +}; + +template +struct flat_block_sort<0, Ascending, T, IdxT> + : ivf::detail::dummy_block_sort_t { + using type = ivf::detail::dummy_block_sort_t; +}; + +template +using block_sort_t = typename flat_block_sort::type; + +/** + * Scan clusters for nearest neighbors of the query vectors. + * See `ivfflat_interleaved_scan` for more information. + * + * The clusters are stored in the interleaved index format described in ivf_flat_types.hpp. + * For each query vector, a set of clusters is probed: the distance to each vector in the cluster is + * calculated, and the top-k nearest neighbors are selected. + * + * @param compute_dist distance function + * @param query_smem_elems number of dimensions of the query vector to fit in a shared memory of a + * block; this number must be a multiple of `WarpSize * Veclen`. + * @param[in] query a pointer to all queries in a row-major contiguous format [gridDim.y, dim] + * @param[in] coarse_index a pointer to the cluster indices to search through [n_probes] + * @param[in] list_indices index.indices + * @param[in] list_data index.data + * @param[in] list_sizes index.list_sizes + * @param[in] list_offsets index.list_offsets + * @param n_probes + * @param k + * @param dim + * @param sample_filter + * @param[out] neighbors + * @param[out] distances + */ +template +RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) + interleaved_scan_kernel(Lambda compute_dist, + PostLambda post_process, + const uint32_t query_smem_elems, + const T* query, + const uint32_t* coarse_index, + const T* const* list_data_ptrs, + const uint32_t* list_sizes, + const uint32_t queries_offset, + const uint32_t n_probes, + const uint32_t k, + const uint32_t max_samples, + const uint32_t* chunk_indices, + const uint32_t dim, + IvfSampleFilterT sample_filter, + uint32_t* neighbors, + float* distances) +{ + extern __shared__ __align__(256) uint8_t interleaved_scan_kernel_smem[]; + constexpr bool kManageLocalTopK = Capacity > 0; + // Using shared memory for the (part of the) query; + // This allows to save on global memory bandwidth when reading index and query + // data at the same time. + // Its size is `query_smem_elems`. + T* query_shared = reinterpret_cast(interleaved_scan_kernel_smem); + // Make the query input and output point to this block's shared query + { + const int query_id = blockIdx.y; + query += query_id * dim; + if constexpr (kManageLocalTopK) { + neighbors += query_id * k * gridDim.x + blockIdx.x * k; + distances += query_id * k * gridDim.x + blockIdx.x * k; + } else { + distances += query_id * uint64_t(max_samples); + } + chunk_indices += (n_probes * query_id); + coarse_index += query_id * n_probes; + } + + // Copy a part of the query into shared memory for faster processing + copy_vectorized(query_shared, query, std::min(dim, query_smem_elems)); + __syncthreads(); + + using local_topk_t = block_sort_t; + local_topk_t queue(k); + { + using align_warp = raft::Pow2; + const int lane_id = align_warp::mod(threadIdx.x); + + // How many full warps needed to compute the distance (without remainder) + const uint32_t full_warps_along_dim = align_warp::roundDown(dim); + + const uint32_t shm_assisted_dim = + (dim > query_smem_elems) ? query_smem_elems : full_warps_along_dim; + + // Every CUDA block scans one cluster at a time. + for (int probe_id = blockIdx.x; probe_id < n_probes; probe_id += gridDim.x) { + const uint32_t list_id = coarse_index[probe_id]; // The id of cluster(list) + + // The number of vectors in each cluster(list); [nlist] + const uint32_t list_length = list_sizes[list_id]; + + // The number of interleaved groups to be processed + const uint32_t num_groups = + align_warp::div(list_length + align_warp::Mask); // ceildiv by power of 2 + + uint32_t sample_offset = 0; + if (probe_id > 0) { sample_offset = chunk_indices[probe_id - 1]; } + assert(list_length == chunk_indices[probe_id] - sample_offset); + assert(sample_offset + list_length <= max_samples); + + constexpr int kUnroll = raft::WarpSize / Veclen; + constexpr uint32_t kNumWarps = kThreadsPerBlock / raft::WarpSize; + // Every warp reads WarpSize vectors and computes the distances to them. + // Then, the distances and corresponding ids are distributed among the threads, + // and each thread adds one (id, dist) pair to the filtering queue. + for (uint32_t group_id = align_warp::div(threadIdx.x); group_id < num_groups; + group_id += kNumWarps) { + AccT dist = 0; + // This is where this warp begins reading data (start position of an interleaved group) + const T* data = list_data_ptrs[list_id] + (group_id * kIndexGroupSize) * dim; + + // This is the vector a given lane/thread handles + const uint32_t vec_id = group_id * raft::WarpSize + lane_id; + const bool valid = + vec_id < list_length && sample_filter(queries_offset + blockIdx.y, list_id, vec_id); + + // Process first shm_assisted_dim dimensions (always using shared memory) + if (valid) { + loadAndComputeDist lc(dist, + compute_dist); + for (int pos = 0; pos < shm_assisted_dim; + pos += raft::WarpSize, data += kIndexGroupSize * raft::WarpSize) { + lc.runLoadShmemCompute(data, query_shared, lane_id, pos); + } + } + + if (dim > query_smem_elems) { + // The default path - using shfl ops - for dimensions beyond query_smem_elems + loadAndComputeDist lc(dist, + compute_dist); + for (int pos = shm_assisted_dim; pos < full_warps_along_dim; pos += raft::WarpSize) { + lc.runLoadShflAndCompute(data, query, pos, lane_id); + } + lc.runLoadShflAndComputeRemainder(data, query, lane_id, dim, full_warps_along_dim); + } else { + // when shm_assisted_dim == full_warps_along_dim < dim + if (valid) { + loadAndComputeDist<1, decltype(compute_dist), Veclen, T, AccT> lc(dist, compute_dist); + for (int pos = full_warps_along_dim; pos < dim; + pos += Veclen, data += kIndexGroupSize * Veclen) { + lc.runLoadShmemCompute(data, query_shared, lane_id, pos); + } + } + } + + // Enqueue one element per thread + const float val = valid ? static_cast(dist) : local_topk_t::queue_t::kDummy; + if constexpr (kManageLocalTopK) { + queue.add(val, sample_offset + vec_id); + } else { + if (vec_id < list_length) distances[sample_offset + vec_id] = val; + } + } + + // fill up unused slots for current query + if constexpr (!kManageLocalTopK) { + if (probe_id + 1 == n_probes) { + for (uint32_t i = threadIdx.x + sample_offset + list_length; i < max_samples; + i += blockDim.x) { + distances[i] = local_topk_t::queue_t::kDummy; + } + } + } + } + } + + // finalize and store selected neighbours + if constexpr (kManageLocalTopK) { + __syncthreads(); + queue.done(interleaved_scan_kernel_smem); + queue.store(distances, neighbors, post_process); + } +} + +/** + * Configure the gridDim.x to maximize GPU occupancy, but reduce the output size + */ +template +uint32_t configure_launch_x(uint32_t numQueries, uint32_t n_probes, int32_t sMemSize, T func) +{ + int dev_id; + RAFT_CUDA_TRY(cudaGetDevice(&dev_id)); + int num_sms; + RAFT_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id)); + int num_blocks_per_sm = 0; + RAFT_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks_per_sm, func, kThreadsPerBlock, sMemSize)); + + size_t min_grid_size = num_sms * num_blocks_per_sm; + size_t min_grid_x = raft::ceildiv(min_grid_size, numQueries); + return min_grid_x > n_probes ? n_probes : static_cast(min_grid_x); +} + +template +void launch_kernel(Lambda lambda, + PostLambda post_process, + const index& index, + const T* queries, + const uint32_t* coarse_index, + const uint32_t num_queries, + const uint32_t queries_offset, + const uint32_t n_probes, + const uint32_t k, + const uint32_t max_samples, + const uint32_t* chunk_indices, + IvfSampleFilterT sample_filter, + uint32_t* neighbors, + float* distances, + uint32_t& grid_dim_x, + rmm::cuda_stream_view stream) +{ + RAFT_EXPECTS(Veclen == index.veclen(), + "Configured Veclen does not match the index interleaving pattern."); + constexpr auto kKernel = interleaved_scan_kernel; + const int max_query_smem = 16384; + int query_smem_elems = std::min(max_query_smem / sizeof(T), + raft::Pow2::roundUp(index.dim())); + int smem_size = query_smem_elems * sizeof(T); + + if constexpr (Capacity > 0) { + constexpr int kSubwarpSize = std::min(Capacity, raft::WarpSize); + auto block_merge_mem = + raft::matrix::detail::select::warpsort::calc_smem_size_for_block_wide( + kThreadsPerBlock / kSubwarpSize, k); + smem_size += std::max(smem_size, block_merge_mem); + } + + // power-of-two less than cuda limit (for better addr alignment) + constexpr uint32_t kMaxGridY = 32768; + + if (grid_dim_x == 0) { + grid_dim_x = configure_launch_x(std::min(kMaxGridY, num_queries), n_probes, smem_size, kKernel); + return; + } + + for (uint32_t query_offset = 0; query_offset < num_queries; query_offset += kMaxGridY) { + uint32_t grid_dim_y = std::min(kMaxGridY, num_queries - query_offset); + dim3 grid_dim(grid_dim_x, grid_dim_y, 1); + dim3 block_dim(kThreadsPerBlock); + RAFT_LOG_TRACE( + "Launching the ivf-flat interleaved_scan_kernel (%d, %d, 1) x (%d, 1, 1), n_probes = %d, " + "smem_size = %d", + grid_dim.x, + grid_dim.y, + block_dim.x, + n_probes, + smem_size); + kKernel<<>>(lambda, + post_process, + query_smem_elems, + queries, + coarse_index, + index.data_ptrs().data_handle(), + index.list_sizes().data_handle(), + queries_offset + query_offset, + n_probes, + k, + max_samples, + chunk_indices, + index.dim(), + sample_filter, + neighbors, + distances); + queries += grid_dim_y * index.dim(); + if constexpr (Capacity > 0) { + neighbors += grid_dim_y * grid_dim_x * k; + distances += grid_dim_y * grid_dim_x * k; + } else { + distances += grid_dim_y * max_samples; + } + chunk_indices += grid_dim_y * n_probes; + coarse_index += grid_dim_y * n_probes; + } +} + +template +struct euclidean_dist { + __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y) + { + const auto diff = x - y; + acc += diff * diff; + } +}; + +template +struct euclidean_dist { + __device__ __forceinline__ void operator()(uint32_t& acc, uint32_t x, uint32_t y) + { + if constexpr (Veclen > 1) { + const auto diff = __vabsdiffu4(x, y); + acc = raft::dp4a(diff, diff, acc); + } else { + const auto diff = __usad(x, y, 0u); + acc += diff * diff; + } + } +}; + +template +struct euclidean_dist { + __device__ __forceinline__ void operator()(int32_t& acc, int32_t x, int32_t y) + { + if constexpr (Veclen > 1) { + // Note that we enforce here that the unsigned version of dp4a is used, because the difference + // between two int8 numbers can be greater than 127 and therefore represented as a negative + // number in int8. Casting from int8 to int32 would yield incorrect results, while casting + // from uint8 to uint32 is correct. + const auto diff = __vabsdiffs4(x, y); + acc = raft::dp4a(diff, diff, static_cast(acc)); + } else { + const auto diff = x - y; + acc += diff * diff; + } + } +}; + +template +struct inner_prod_dist { + __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y) + { + if constexpr (Veclen > 1 && (std::is_same_v || std::is_same_v)) { + acc = raft::dp4a(x, y, acc); + } else { + acc += x * y; + } + } +}; + +/** Select the distance computation function and forward the rest of the arguments. */ +template +void launch_with_fixed_consts(cuvs::distance::DistanceType metric, Args&&... args) +{ + switch (metric) { + case cuvs::distance::DistanceType::L2Expanded: + case cuvs::distance::DistanceType::L2Unexpanded: + return launch_kernel, + raft::identity_op>({}, {}, std::forward(args)...); + case cuvs::distance::DistanceType::L2SqrtExpanded: + case cuvs::distance::DistanceType::L2SqrtUnexpanded: + return launch_kernel, + raft::sqrt_op>({}, {}, std::forward(args)...); + case cuvs::distance::DistanceType::InnerProduct: + return launch_kernel, + raft::identity_op>({}, {}, std::forward(args)...); + // NB: update the description of `knn::ivf_flat::build` when adding here a new metric. + default: RAFT_FAIL("The chosen distance metric is not supported (%d)", int(metric)); + } +} + +/** + * Lift the `capacity` and `veclen` parameters to the template level, + * forward the rest of the arguments unmodified to `launch_interleaved_scan_kernel`. + */ +template (1, 16 / sizeof(T))> +struct select_interleaved_scan_kernel { + /** + * Recursively reduce the `Capacity` and `Veclen` parameters until they match the + * corresponding runtime arguments. + * By default, this recursive process starts with maximum possible values of the + * two parameters and ends with both values equal to 1. + */ + template + static inline void run(int k_max, int veclen, bool select_min, Args&&... args) + { + if constexpr (Capacity > 0) { + if (k_max == 0 || k_max > Capacity) { + return select_interleaved_scan_kernel::run( + k_max, veclen, select_min, std::forward(args)...); + } + } + if constexpr (Capacity > 1) { + if (k_max * 2 <= Capacity) { + return select_interleaved_scan_kernel::run(k_max, + veclen, + select_min, + std::forward(args)...); + } + } + if constexpr (Veclen > 1) { + if (veclen % Veclen != 0) { + return select_interleaved_scan_kernel::run( + k_max, 1, select_min, std::forward(args)...); + } + } + // NB: this is the limitation of the warpsort structures that use a huge number of + // registers (used in the main kernel here). + RAFT_EXPECTS(Capacity == 0 || k_max == Capacity, + "Capacity must be either 0 or a power-of-two not bigger than the maximum " + "allowed size matrix::detail::select::warpsort::kMaxCapacity (%d).", + raft::matrix::detail::select::warpsort::kMaxCapacity); + RAFT_EXPECTS( + veclen == Veclen, + "Veclen must be power-of-two not bigger than the maximum allowed size for this data type."); + if (select_min) { + launch_with_fixed_consts( + std::forward(args)...); + } else { + launch_with_fixed_consts( + std::forward(args)...); + } + } +}; + +/** + * @brief Configure and launch an appropriate template instance of the interleaved scan kernel. + * + * @tparam T value type + * @tparam AccT accumulated type + * @tparam IdxT type of the indices + * + * @param index previously built ivf-flat index + * @param[in] queries device pointer to the query vectors [batch_size, dim] + * @param[in] coarse_query_results device pointer to the cluster (list) ids [batch_size, n_probes] + * @param n_queries batch size + * @param[in] queries_offset + * An offset of the current query batch. It is used for feeding sample_filter with the + * correct query index. + * @param metric type of the measured distance + * @param n_probes number of nearest clusters to query + * @param k number of nearest neighbors. + * NB: the maximum value of `k` is limited statically by `kMaxCapacity`. + * @param select_min whether to select nearest (true) or furthest (false) points w.r.t. the given + * metric. + * @param[out] neighbors device pointer to the result indices for each query and cluster + * [batch_size, grid_dim_x, k] + * @param[out] distances device pointer to the result distances for each query and cluster + * [batch_size, grid_dim_x, k] + * @param[inout] grid_dim_x number of blocks launched across all n_probes clusters; + * (one block processes one or more probes, hence: 1 <= grid_dim_x <= n_probes) + * @param stream + * @param sample_filter + * A filter that selects samples for a given query. Use an instance of none_ivf_sample_filter to + * provide a green light for every sample. + */ +template +void ivfflat_interleaved_scan(const index& index, + const T* queries, + const uint32_t* coarse_query_results, + const uint32_t n_queries, + const uint32_t queries_offset, + const cuvs::distance::DistanceType metric, + const uint32_t n_probes, + const uint32_t k, + const uint32_t max_samples, + const uint32_t* chunk_indices, + const bool select_min, + IvfSampleFilterT sample_filter, + uint32_t* neighbors, + float* distances, + uint32_t& grid_dim_x, + rmm::cuda_stream_view stream) +{ + const int capacity = raft::bound_by_power_of_two(k); + + auto filter_adapter = cuvs::neighbors::filtering::ivf_to_sample_filter( + index.inds_ptrs().data_handle(), sample_filter); + select_interleaved_scan_kernel::run(capacity, + index.veclen(), + select_min, + metric, + index, + queries, + coarse_query_results, + n_queries, + queries_offset, + n_probes, + k, + max_samples, + chunk_indices, + filter_adapter, + neighbors, + distances, + grid_dim_x, + stream); +} + +} // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh new file mode 100644 index 000000000..bb106ba2f --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../../core/nvtx.hpp" +#include "../detail/ann_utils.cuh" +#include "../ivf_common.cuh" // cuvs::neighbors::detail::ivf +#include "ivf_flat_interleaved_scan.cuh" // interleaved_scan +#include // raft::neighbors::ivf_flat::index +#include // none_ivf_sample_filter + +#include // is_min_close, DistanceType +#include // RAFT_LOG_TRACE +#include +#include // raft::resources +#include // raft::linalg::gemm +#include // raft::linalg::norm +#include // raft::linalg::unary_op +#include // matrix::detail::select_k +#include // utils::mapping + +#include + +namespace cuvs::neighbors::ivf_flat::detail { + +using namespace cuvs::spatial::knn::detail; // NOLINT + +template +void search_impl(raft::resources const& handle, + const cuvs::neighbors::ivf_flat::index& index, + const T* queries, + uint32_t n_queries, + uint32_t queries_offset, + uint32_t k, + uint32_t n_probes, + uint32_t max_samples, + bool select_min, + IdxT* neighbors, + AccT* distances, + rmm::device_async_resource_ref search_mr, + IvfSampleFilterT sample_filter) +{ + auto stream = raft::resource::get_cuda_stream(handle); + + std::size_t n_queries_probes = std::size_t(n_queries) * std::size_t(n_probes); + + // The norm of query + rmm::device_uvector query_norm_dev(n_queries, stream, search_mr); + // The distance value of cluster(list) and queries + rmm::device_uvector distance_buffer_dev(n_queries * index.n_lists(), stream, search_mr); + // The topk distance value of cluster(list) and queries + rmm::device_uvector coarse_distances_dev(n_queries_probes, stream, search_mr); + // The topk index of cluster(list) and queries + rmm::device_uvector coarse_indices_dev(n_queries_probes, stream, search_mr); + + // Optional structures if postprocessing is required + // The topk distance value of candidate vectors from each cluster(list) + rmm::device_uvector distances_tmp_dev(0, stream, search_mr); + // Number of samples for each query + rmm::device_uvector num_samples(0, stream, search_mr); + // Offsets per probe for each query + rmm::device_uvector chunk_index(0, stream, search_mr); + + // The topk index of candidate vectors from each cluster(list), local index offset + // also we might need additional storage for select_k + rmm::device_uvector indices_tmp_dev(0, stream, search_mr); + rmm::device_uvector neighbors_uint32_buf(0, stream, search_mr); + + size_t float_query_size; + if constexpr (std::is_integral_v) { + float_query_size = n_queries * index.dim(); + } else { + float_query_size = 0; + } + rmm::device_uvector converted_queries_dev(float_query_size, stream, search_mr); + float* converted_queries_ptr = converted_queries_dev.data(); + + if constexpr (std::is_same_v) { + converted_queries_ptr = const_cast(queries); + } else { + raft::linalg::unaryOp( + converted_queries_ptr, queries, n_queries * index.dim(), utils::mapping{}, stream); + } + + float alpha = 1.0f; + float beta = 0.0f; + + // todo(lsugy): raft distance? (if performance is similar/better than gemm) + switch (index.metric()) { + case cuvs::distance::DistanceType::L2Expanded: + case cuvs::distance::DistanceType::L2SqrtExpanded: { + alpha = -2.0f; + beta = 1.0f; + raft::linalg::rowNorm(query_norm_dev.data(), + converted_queries_ptr, + static_cast(index.dim()), + static_cast(n_queries), + raft::linalg::L2Norm, + true, + stream); + utils::outer_add(query_norm_dev.data(), + (IdxT)n_queries, + index.center_norms()->data_handle(), + (IdxT)index.n_lists(), + distance_buffer_dev.data(), + stream); + RAFT_LOG_TRACE_VEC(index.center_norms()->data_handle(), std::min(20, index.dim())); + RAFT_LOG_TRACE_VEC(distance_buffer_dev.data(), std::min(20, index.n_lists())); + break; + } + default: { + alpha = 1.0f; + beta = 0.0f; + } + } + + raft::linalg::gemm(handle, + true, + false, + index.n_lists(), + n_queries, + index.dim(), + &alpha, + index.centers().data_handle(), + index.dim(), + converted_queries_ptr, + index.dim(), + &beta, + distance_buffer_dev.data(), + index.n_lists(), + stream); + + RAFT_LOG_TRACE_VEC(distance_buffer_dev.data(), std::min(20, index.n_lists())); + raft::matrix::detail::select_k(handle, + distance_buffer_dev.data(), + nullptr, + n_queries, + index.n_lists(), + n_probes, + coarse_distances_dev.data(), + coarse_indices_dev.data(), + select_min); + RAFT_LOG_TRACE_VEC(coarse_indices_dev.data(), n_probes); + RAFT_LOG_TRACE_VEC(coarse_distances_dev.data(), n_probes); + + uint32_t grid_dim_x = 0; + if (n_probes > 1) { + // query the gridDimX size to store probes topK output + ivfflat_interleaved_scan::value_t, IdxT, IvfSampleFilterT>( + index, + nullptr, + nullptr, + n_queries, + queries_offset, + index.metric(), + n_probes, + k, + 0, + nullptr, + select_min, + sample_filter, + nullptr, + nullptr, + grid_dim_x, + stream); + } else { + grid_dim_x = 1; + } + + num_samples.resize(n_queries, stream); + chunk_index.resize(n_queries_probes, stream); + + ivf::detail::calc_chunk_indices::configure(n_probes, n_queries)(index.list_sizes().data_handle(), + coarse_indices_dev.data(), + chunk_index.data(), + num_samples.data(), + stream); + + auto distances_dev_ptr = distances; + + uint32_t* neighbors_uint32 = nullptr; + if constexpr (sizeof(IdxT) == sizeof(uint32_t)) { + neighbors_uint32 = reinterpret_cast(neighbors); + } else { + neighbors_uint32_buf.resize(std::size_t(n_queries) * std::size_t(k), stream); + neighbors_uint32 = neighbors_uint32_buf.data(); + } + + uint32_t* indices_dev_ptr = nullptr; + + bool manage_local_topk = is_local_topk_feasible(k); + if (!manage_local_topk || grid_dim_x > 1) { + auto target_size = std::size_t(n_queries) * (manage_local_topk ? grid_dim_x * k : max_samples); + + distances_tmp_dev.resize(target_size, stream); + if (manage_local_topk) indices_tmp_dev.resize(target_size, stream); + + distances_dev_ptr = distances_tmp_dev.data(); + indices_dev_ptr = indices_tmp_dev.data(); + } else { + indices_dev_ptr = neighbors_uint32; + } + + ivfflat_interleaved_scan::value_t, IdxT, IvfSampleFilterT>( + index, + queries, + coarse_indices_dev.data(), + n_queries, + queries_offset, + index.metric(), + n_probes, + k, + max_samples, + chunk_index.data(), + select_min, + sample_filter, + indices_dev_ptr, + distances_dev_ptr, + grid_dim_x, + stream); + + RAFT_LOG_TRACE_VEC(distances_dev_ptr, 2 * k); + if (indices_dev_ptr != nullptr) { RAFT_LOG_TRACE_VEC(indices_dev_ptr, 2 * k); } + + // Merge topk values from different blocks + if (!manage_local_topk || grid_dim_x > 1) { + raft::matrix::detail::select_k( + handle, + distances_tmp_dev.data(), + indices_tmp_dev.data(), + n_queries, + manage_local_topk ? (k * grid_dim_x) : max_samples, + k, + distances, + neighbors_uint32, + select_min, + false, + raft::matrix::SelectAlgo::kAuto, + manage_local_topk ? nullptr : num_samples.data()); + } + if (!manage_local_topk) { + // post process distances && neighbor IDs + ivf::detail::postprocess_distances( + distances, distances, index.metric(), n_queries, k, 1.0, false, stream); + } + ivf::detail::postprocess_neighbors(neighbors, + neighbors_uint32, + index.inds_ptrs().data_handle(), + coarse_indices_dev.data(), + chunk_index.data(), + n_queries, + n_probes, + k, + stream); +} + +/** See raft::neighbors::ivf_flat::search docs */ +template +inline void search_with_filtering( + raft::resources const& handle, + const search_params& params, + const index& index, + const T* queries, + uint32_t n_queries, + uint32_t k, + IdxT* neighbors, + float* distances, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource(), + IvfSampleFilterT sample_filter = IvfSampleFilterT()) +{ + common::nvtx::range fun_scope( + "ivf_flat::search(k = %u, n_queries = %u, dim = %zu)", k, n_queries, index.dim()); + + RAFT_EXPECTS(params.n_probes > 0, + "n_probes (number of clusters to probe in the search) must be positive."); + auto n_probes = std::min(params.n_probes, index.n_lists()); + bool manage_local_topk = is_local_topk_feasible(k); + + uint32_t max_samples = 0; + if (!manage_local_topk) { + IdxT ms = raft::Pow2<128 / sizeof(float)>::roundUp( + std::max(index.accum_sorted_sizes()(n_probes), k)); + RAFT_EXPECTS(ms <= IdxT(std::numeric_limits::max()), + "The maximum sample size is too big."); + max_samples = ms; + } + + // a batch size heuristic: try to keep the workspace within the specified size + constexpr uint64_t kExpectedWsSize = 1024 * 1024 * 1024; + uint64_t max_ws_size = + std::min(raft::resource::get_workspace_free_bytes(handle), kExpectedWsSize); + + uint64_t ws_size_per_query = 4ull * (2 * n_probes + index.n_lists() + index.dim() + 1) + + (manage_local_topk ? ((sizeof(IdxT) + 4) * n_probes * k) + : (4ull * (max_samples + n_probes + 1))); + + const uint32_t max_queries = + std::min(n_queries, raft::div_rounding_up_safe(max_ws_size, ws_size_per_query)); + + for (uint32_t offset_q = 0; offset_q < n_queries; offset_q += max_queries) { + uint32_t queries_batch = raft::min(max_queries, n_queries - offset_q); + + search_impl(handle, + index, + queries + offset_q * index.dim(), + queries_batch, + offset_q, + k, + n_probes, + max_samples, + cuvs::distance::is_min_close(index.metric()), + neighbors + offset_q * k, + distances + offset_q * k, + mr, + sample_filter); + } +} + +template +void search_with_filtering(raft::resources const& handle, + const search_params& params, + const index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + IvfSampleFilterT sample_filter = IvfSampleFilterT()) +{ + RAFT_EXPECTS( + queries.extent(0) == neighbors.extent(0) && queries.extent(0) == distances.extent(0), + "Number of rows in output neighbors and distances matrices must equal the number of queries."); + + RAFT_EXPECTS(neighbors.extent(1) == distances.extent(1), + "Number of columns in output neighbors and distances matrices must be equal"); + + RAFT_EXPECTS(queries.extent(1) == index.dim(), + "Number of query dimensions should equal number of dimensions in the index."); + + search_with_filtering(handle, + params, + index, + queries.data_handle(), + static_cast(queries.extent(0)), + static_cast(neighbors.extent(1)), + neighbors.data_handle(), + distances.data_handle(), + raft::resource::get_workspace_resource(handle), + sample_filter); +} + +template +void search(raft::resources const& handle, + const search_params& params, + const index& idx, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances) +{ + search_with_filtering(handle, + params, + idx, + queries, + neighbors, + distances, + cuvs::neighbors::filtering::none_ivf_sample_filter()); +} + +} // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cpp b/cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu similarity index 58% rename from cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cpp rename to cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu index 48a584e9e..93e46cbef 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cpp +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu @@ -24,7 +24,8 @@ */ #include -#include + +#include "ivf_flat_search.cuh" namespace cuvs::neighbors::ivf_flat { @@ -36,8 +37,20 @@ namespace cuvs::neighbors::ivf_flat { raft::device_matrix_view neighbors, \ raft::device_matrix_view distances) \ { \ - raft::runtime::neighbors::ivf_flat::search( \ - handle, params, *index.get_raft_index(), queries, neighbors, distances); \ + cuvs::neighbors::ivf_flat::detail::search( \ + handle, params, index, queries, neighbors, distances); \ + } \ + void search_with_filtering( \ + raft::resources const& handle, \ + const search_params& params, \ + index& idx, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + cuvs::neighbors::filtering::bitset_filter sample_filter) \ + { \ + cuvs::neighbors::ivf_flat::detail::search_with_filtering( \ + handle, params, idx, queries, neighbors, distances, sample_filter); \ } CUVS_INST_IVF_FLAT_SEARCH(float, int64_t); diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cpp b/cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu similarity index 58% rename from cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cpp rename to cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu index 5645c18e0..5f75d3d48 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cpp +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu @@ -24,7 +24,8 @@ */ #include -#include + +#include "ivf_flat_search.cuh" namespace cuvs::neighbors::ivf_flat { @@ -36,8 +37,20 @@ namespace cuvs::neighbors::ivf_flat { raft::device_matrix_view neighbors, \ raft::device_matrix_view distances) \ { \ - raft::runtime::neighbors::ivf_flat::search( \ - handle, params, *index.get_raft_index(), queries, neighbors, distances); \ + cuvs::neighbors::ivf_flat::detail::search( \ + handle, params, index, queries, neighbors, distances); \ + } \ + void search_with_filtering( \ + raft::resources const& handle, \ + const search_params& params, \ + index& idx, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + cuvs::neighbors::filtering::bitset_filter sample_filter) \ + { \ + cuvs::neighbors::ivf_flat::detail::search_with_filtering( \ + handle, params, idx, queries, neighbors, distances, sample_filter); \ } CUVS_INST_IVF_FLAT_SEARCH(int8_t, int64_t); diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cpp b/cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu similarity index 58% rename from cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cpp rename to cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu index ab1bee8b6..a2696dc84 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cpp +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu @@ -24,7 +24,8 @@ */ #include -#include + +#include "ivf_flat_search.cuh" namespace cuvs::neighbors::ivf_flat { @@ -36,8 +37,20 @@ namespace cuvs::neighbors::ivf_flat { raft::device_matrix_view neighbors, \ raft::device_matrix_view distances) \ { \ - raft::runtime::neighbors::ivf_flat::search( \ - handle, params, *index.get_raft_index(), queries, neighbors, distances); \ + cuvs::neighbors::ivf_flat::detail::search( \ + handle, params, index, queries, neighbors, distances); \ + } \ + void search_with_filtering( \ + raft::resources const& handle, \ + const search_params& params, \ + index& idx, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + cuvs::neighbors::filtering::bitset_filter sample_filter) \ + { \ + cuvs::neighbors::ivf_flat::detail::search_with_filtering( \ + handle, params, idx, queries, neighbors, distances, sample_filter); \ } CUVS_INST_IVF_FLAT_SEARCH(uint8_t, int64_t); diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_serialize.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_serialize.cuh new file mode 100644 index 000000000..3d3ff49d0 --- /dev/null +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_serialize.cuh @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../ivf_common.cuh" +#include "../ivf_list.cuh" +#include +#include + +#include +#include +#include +#include +#include + +#include + +namespace cuvs::neighbors::ivf_flat::detail { + +// Serialization version +// No backward compatibility yet; that is, can't add additional fields without breaking +// backward compatibility. +// TODO(hcho3) Implement next-gen serializer for IVF that allows for expansion in a backward +// compatible fashion. +constexpr int serialization_version = 4; + +/** + * Save the index to file. + * + * Experimental, both the API and the serialization format are subject to change. + * + * @param[in] handle the raft handle + * @param[in] filename the file name for saving the index + * @param[in] index_ IVF-Flat index + * + */ +template +void serialize(raft::resources const& handle, std::ostream& os, const index& index_) +{ + RAFT_LOG_DEBUG( + "Saving IVF-Flat index, size %zu, dim %u", static_cast(index_.size()), index_.dim()); + + std::string dtype_string = raft::detail::numpy_serializer::get_numpy_dtype().to_string(); + dtype_string.resize(4); + os << dtype_string; + + serialize_scalar(handle, os, serialization_version); + serialize_scalar(handle, os, index_.size()); + serialize_scalar(handle, os, index_.dim()); + serialize_scalar(handle, os, index_.n_lists()); + serialize_scalar(handle, os, index_.metric()); + serialize_scalar(handle, os, index_.adaptive_centers()); + serialize_scalar(handle, os, index_.conservative_memory_allocation()); + serialize_mdspan(handle, os, index_.centers()); + if (index_.center_norms()) { + bool has_norms = true; + serialize_scalar(handle, os, has_norms); + serialize_mdspan(handle, os, *index_.center_norms()); + } else { + bool has_norms = false; + serialize_scalar(handle, os, has_norms); + } + auto sizes_host = raft::make_host_vector(index_.list_sizes().extent(0)); + raft::copy(sizes_host.data_handle(), + index_.list_sizes().data_handle(), + sizes_host.size(), + raft::resource::get_cuda_stream(handle)); + raft::resource::sync_stream(handle); + serialize_mdspan(handle, os, sizes_host.view()); + + list_spec list_store_spec{index_.dim(), true}; + for (uint32_t label = 0; label < index_.n_lists(); label++) { + ivf::serialize_list(handle, + os, + index_.lists()[label], + list_store_spec, + raft::Pow2::roundUp(sizes_host(label))); + } + raft::resource::sync_stream(handle); +} + +template +void serialize(raft::resources const& handle, + const std::string& filename, + const index& index_) +{ + std::ofstream of(filename, std::ios::out | std::ios::binary); + if (!of) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } + + detail::serialize(handle, of, index_); + + of.close(); + if (!of) { RAFT_FAIL("Error writing output %s", filename.c_str()); } +} + +/** Load an index from file. + * + * Experimental, both the API and the serialization format are subject to change. + * + * @param[in] handle the raft handle + * @param[in] filename the name of the file that stores the index + * @param[in] index_ IVF-Flat index + * + */ +template +auto deserialize(raft::resources const& handle, std::istream& is) -> index +{ + char dtype_string[4]; + is.read(dtype_string, 4); + + auto ver = raft::deserialize_scalar(handle, is); + if (ver != serialization_version) { + RAFT_FAIL("serialization version mismatch, expected %d, got %d ", serialization_version, ver); + } + auto n_rows = raft::deserialize_scalar(handle, is); + auto dim = raft::deserialize_scalar(handle, is); + auto n_lists = raft::deserialize_scalar(handle, is); + auto metric = raft::deserialize_scalar(handle, is); + bool adaptive_centers = raft::deserialize_scalar(handle, is); + bool cma = raft::deserialize_scalar(handle, is); + + index index_ = index(handle, metric, n_lists, adaptive_centers, cma, dim); + + deserialize_mdspan(handle, is, index_.centers()); + bool has_norms = raft::deserialize_scalar(handle, is); + if (has_norms) { + index_.allocate_center_norms(handle); + if (!index_.center_norms()) { + RAFT_FAIL("Error inconsistent center norms"); + } else { + auto center_norms = index_.center_norms().value(); + deserialize_mdspan(handle, is, center_norms); + } + } + deserialize_mdspan(handle, is, index_.list_sizes()); + + list_spec list_device_spec{index_.dim(), cma}; + list_spec list_store_spec{index_.dim(), true}; + for (uint32_t label = 0; label < index_.n_lists(); label++) { + ivf::deserialize_list(handle, is, index_.lists()[label], list_store_spec, list_device_spec); + } + raft::resource::sync_stream(handle); + + ivf::detail::recompute_internal_state(handle, index_); + + return index_; +} + +template +auto deserialize(raft::resources const& handle, const std::string& filename) -> index +{ + std::ifstream is(filename, std::ios::in | std::ios::binary); + + if (!is) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } + + auto index = detail::deserialize(handle, is); + + is.close(); + + return index; +} +} // namespace cuvs::neighbors::ivf_flat::detail diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_serialize_float_int64_t.cpp b/cpp/src/neighbors/ivf_flat/ivf_flat_serialize_float_int64_t.cu similarity index 61% rename from cpp/src/neighbors/ivf_flat/ivf_flat_serialize_float_int64_t.cpp rename to cpp/src/neighbors/ivf_flat/ivf_flat_serialize_float_int64_t.cu index 19a3d72d9..9ab00623a 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_serialize_float_int64_t.cpp +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_serialize_float_int64_t.cu @@ -24,38 +24,40 @@ */ #include -#include + +#include "ivf_flat_serialize.cuh" namespace cuvs::neighbors::ivf_flat { -#define CUVS_INST_IVF_FLAT_SERIALIZE(T, IdxT) \ - void serialize_file(raft::resources const& handle, \ - const std::string& filename, \ - const cuvs::neighbors::ivf_flat::index& index) \ - { \ - raft::runtime::neighbors::ivf_flat::serialize_file(handle, filename, *index.get_raft_index()); \ - } \ - \ - void deserialize_file(raft::resources const& handle, \ - const std::string& filename, \ - cuvs::neighbors::ivf_flat::index* index) \ - { \ - raft::runtime::neighbors::ivf_flat::deserialize_file( \ - handle, filename, index->get_raft_index()); \ - } \ - \ - void serialize(raft::resources const& handle, \ - std::string& str, \ - const cuvs::neighbors::ivf_flat::index& index) \ - { \ - raft::runtime::neighbors::ivf_flat::serialize(handle, str, *index.get_raft_index()); \ - } \ - \ - void deserialize(raft::resources const& handle, \ - const std::string& str, \ - cuvs::neighbors::ivf_flat::index* index) \ - { \ - raft::runtime::neighbors::ivf_flat::deserialize(handle, str, index->get_raft_index()); \ +#define CUVS_INST_IVF_FLAT_SERIALIZE(T, IdxT) \ + void serialize_file(raft::resources const& handle, \ + const std::string& filename, \ + const cuvs::neighbors::ivf_flat::index& index) \ + { \ + cuvs::neighbors::ivf_flat::detail::serialize(handle, filename, index); \ + } \ + \ + void serialize(raft::resources const& handle, \ + std::string& str, \ + const cuvs::neighbors::ivf_flat::index& index) \ + { \ + std::ostringstream os; \ + cuvs::neighbors::ivf_flat::detail::serialize(handle, os, index); \ + str = os.str(); \ + } \ + \ + void deserialize_file(raft::resources const& handle, \ + const std::string& filename, \ + cuvs::neighbors::ivf_flat::index* index) \ + { \ + *index = cuvs::neighbors::ivf_flat::detail::deserialize(handle, filename); \ + } \ + void deserialize(raft::resources const& handle, \ + const std::string& str, \ + cuvs::neighbors::ivf_flat::index* index) \ + { \ + std::istringstream is(str); \ + *index = cuvs::neighbors::ivf_flat::detail::deserialize(handle, is); \ } CUVS_INST_IVF_FLAT_SERIALIZE(float, int64_t); diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_serialize_int8_t_int64_t.cpp b/cpp/src/neighbors/ivf_flat/ivf_flat_serialize_int8_t_int64_t.cu similarity index 61% rename from cpp/src/neighbors/ivf_flat/ivf_flat_serialize_int8_t_int64_t.cpp rename to cpp/src/neighbors/ivf_flat/ivf_flat_serialize_int8_t_int64_t.cu index f65fe221d..18d6a0287 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_serialize_int8_t_int64_t.cpp +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_serialize_int8_t_int64_t.cu @@ -24,38 +24,40 @@ */ #include -#include + +#include "ivf_flat_serialize.cuh" namespace cuvs::neighbors::ivf_flat { -#define CUVS_INST_IVF_FLAT_SERIALIZE(T, IdxT) \ - void serialize_file(raft::resources const& handle, \ - const std::string& filename, \ - const cuvs::neighbors::ivf_flat::index& index) \ - { \ - raft::runtime::neighbors::ivf_flat::serialize_file(handle, filename, *index.get_raft_index()); \ - } \ - \ - void deserialize_file(raft::resources const& handle, \ - const std::string& filename, \ - cuvs::neighbors::ivf_flat::index* index) \ - { \ - raft::runtime::neighbors::ivf_flat::deserialize_file( \ - handle, filename, index->get_raft_index()); \ - } \ - \ - void serialize(raft::resources const& handle, \ - std::string& str, \ - const cuvs::neighbors::ivf_flat::index& index) \ - { \ - raft::runtime::neighbors::ivf_flat::serialize(handle, str, *index.get_raft_index()); \ - } \ - \ - void deserialize(raft::resources const& handle, \ - const std::string& str, \ - cuvs::neighbors::ivf_flat::index* index) \ - { \ - raft::runtime::neighbors::ivf_flat::deserialize(handle, str, index->get_raft_index()); \ +#define CUVS_INST_IVF_FLAT_SERIALIZE(T, IdxT) \ + void serialize_file(raft::resources const& handle, \ + const std::string& filename, \ + const cuvs::neighbors::ivf_flat::index& index) \ + { \ + cuvs::neighbors::ivf_flat::detail::serialize(handle, filename, index); \ + } \ + \ + void serialize(raft::resources const& handle, \ + std::string& str, \ + const cuvs::neighbors::ivf_flat::index& index) \ + { \ + std::ostringstream os; \ + cuvs::neighbors::ivf_flat::detail::serialize(handle, os, index); \ + str = os.str(); \ + } \ + \ + void deserialize_file(raft::resources const& handle, \ + const std::string& filename, \ + cuvs::neighbors::ivf_flat::index* index) \ + { \ + *index = cuvs::neighbors::ivf_flat::detail::deserialize(handle, filename); \ + } \ + void deserialize(raft::resources const& handle, \ + const std::string& str, \ + cuvs::neighbors::ivf_flat::index* index) \ + { \ + std::istringstream is(str); \ + *index = cuvs::neighbors::ivf_flat::detail::deserialize(handle, is); \ } CUVS_INST_IVF_FLAT_SERIALIZE(int8_t, int64_t); diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_serialize_uint8_t_int64_t.cpp b/cpp/src/neighbors/ivf_flat/ivf_flat_serialize_uint8_t_int64_t.cu similarity index 61% rename from cpp/src/neighbors/ivf_flat/ivf_flat_serialize_uint8_t_int64_t.cpp rename to cpp/src/neighbors/ivf_flat/ivf_flat_serialize_uint8_t_int64_t.cu index 5f312dc0a..c5ab7d5c1 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_serialize_uint8_t_int64_t.cpp +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_serialize_uint8_t_int64_t.cu @@ -24,38 +24,40 @@ */ #include -#include + +#include "ivf_flat_serialize.cuh" namespace cuvs::neighbors::ivf_flat { -#define CUVS_INST_IVF_FLAT_SERIALIZE(T, IdxT) \ - void serialize_file(raft::resources const& handle, \ - const std::string& filename, \ - const cuvs::neighbors::ivf_flat::index& index) \ - { \ - raft::runtime::neighbors::ivf_flat::serialize_file(handle, filename, *index.get_raft_index()); \ - } \ - \ - void deserialize_file(raft::resources const& handle, \ - const std::string& filename, \ - cuvs::neighbors::ivf_flat::index* index) \ - { \ - raft::runtime::neighbors::ivf_flat::deserialize_file( \ - handle, filename, index->get_raft_index()); \ - } \ - \ - void serialize(raft::resources const& handle, \ - std::string& str, \ - const cuvs::neighbors::ivf_flat::index& index) \ - { \ - raft::runtime::neighbors::ivf_flat::serialize(handle, str, *index.get_raft_index()); \ - } \ - \ - void deserialize(raft::resources const& handle, \ - const std::string& str, \ - cuvs::neighbors::ivf_flat::index* index) \ - { \ - raft::runtime::neighbors::ivf_flat::deserialize(handle, str, index->get_raft_index()); \ +#define CUVS_INST_IVF_FLAT_SERIALIZE(T, IdxT) \ + void serialize_file(raft::resources const& handle, \ + const std::string& filename, \ + const cuvs::neighbors::ivf_flat::index& index) \ + { \ + cuvs::neighbors::ivf_flat::detail::serialize(handle, filename, index); \ + } \ + \ + void serialize(raft::resources const& handle, \ + std::string& str, \ + const cuvs::neighbors::ivf_flat::index& index) \ + { \ + std::ostringstream os; \ + cuvs::neighbors::ivf_flat::detail::serialize(handle, os, index); \ + str = os.str(); \ + } \ + \ + void deserialize_file(raft::resources const& handle, \ + const std::string& filename, \ + cuvs::neighbors::ivf_flat::index* index) \ + { \ + *index = cuvs::neighbors::ivf_flat::detail::deserialize(handle, filename); \ + } \ + void deserialize(raft::resources const& handle, \ + const std::string& str, \ + cuvs::neighbors::ivf_flat::index* index) \ + { \ + std::istringstream is(str); \ + *index = cuvs::neighbors::ivf_flat::detail::deserialize(handle, is); \ } CUVS_INST_IVF_FLAT_SERIALIZE(uint8_t, int64_t); diff --git a/cpp/src/neighbors/ivf_flat_index.cpp b/cpp/src/neighbors/ivf_flat_index.cpp index 678bec32a..b2fbbfc12 100644 --- a/cpp/src/neighbors/ivf_flat_index.cpp +++ b/cpp/src/neighbors/ivf_flat_index.cpp @@ -20,140 +20,203 @@ namespace cuvs::neighbors::ivf_flat { template index::index(raft::resources const& res, const index_params& params, uint32_t dim) - : ann::index(), - raft_index_(std::make_unique>( - res, - static_cast((int)params.metric), - params.n_lists, - params.adaptive_centers, - params.conservative_memory_allocation, - dim)) + : index(res, + params.metric, + params.n_lists, + params.adaptive_centers, + params.conservative_memory_allocation, + dim) { } template -index::index(raft::neighbors::ivf_flat::index&& raft_idx) +index::index(raft::resources const& res, + cuvs::distance::DistanceType metric, + uint32_t n_lists, + bool adaptive_centers, + bool conservative_memory_allocation, + uint32_t dim) : ann::index(), - raft_index_(std::make_unique>(std::move(raft_idx))) + veclen_(calculate_veclen(dim)), + metric_(metric), + adaptive_centers_(adaptive_centers), + conservative_memory_allocation_{conservative_memory_allocation}, + centers_(raft::make_device_matrix(res, n_lists, dim)), + center_norms_(std::nullopt), + lists_{n_lists}, + list_sizes_{raft::make_device_vector(res, n_lists)}, + data_ptrs_{raft::make_device_vector(res, n_lists)}, + inds_ptrs_{raft::make_device_vector(res, n_lists)}, + accum_sorted_sizes_{raft::make_host_vector(n_lists + 1)} { + check_consistency(); + accum_sorted_sizes_(n_lists) = 0; } template uint32_t index::veclen() const noexcept { - return raft_index_->veclen(); + return veclen_; } template cuvs::distance::DistanceType index::metric() const noexcept { - return static_cast((int)raft_index_->metric()); + return metric_; } template bool index::adaptive_centers() const noexcept { - return raft_index_->adaptive_centers(); + return adaptive_centers_; } template raft::device_vector_view index::list_sizes() noexcept { - return raft_index_->list_sizes(); + return list_sizes_.view(); } template raft::device_vector_view index::list_sizes() const noexcept { - return raft_index_->list_sizes(); + return list_sizes_.view(); } template raft::device_matrix_view index::centers() noexcept { - return raft_index_->centers(); + return centers_.view(); } template raft::device_matrix_view index::centers() const noexcept { - return raft_index_->centers(); + return centers_.view(); } template std::optional> index::center_norms() noexcept { - return raft_index_->center_norms(); + if (center_norms_.has_value()) { + return std::make_optional>(center_norms_->view()); + } else { + return std::nullopt; + } } template std::optional> index::center_norms() const noexcept { - return raft_index_->center_norms(); + if (center_norms_.has_value()) { + return std::make_optional>( + center_norms_->view()); + } else { + return std::nullopt; + } +} + +template +auto index::accum_sorted_sizes() noexcept -> raft::host_vector_view +{ + return accum_sorted_sizes_.view(); +} + +template +[[nodiscard]] auto index::accum_sorted_sizes() const noexcept + -> raft::host_vector_view +{ + return accum_sorted_sizes_.view(); } template IdxT index::size() const noexcept { - return raft_index_->size(); + return accum_sorted_sizes()(n_lists()); } template uint32_t index::dim() const noexcept { - return raft_index_->dim(); + return centers_.extent(1); } template uint32_t index::n_lists() const noexcept { - return raft_index_->n_lists(); + return lists_.size(); } template raft::device_vector_view index::data_ptrs() noexcept { - return raft_index_->data_ptrs(); + return data_ptrs_.view(); } template raft::device_vector_view index::data_ptrs() const noexcept { - return raft_index_->data_ptrs(); + return data_ptrs_.view(); } template raft::device_vector_view index::inds_ptrs() noexcept { - return raft_index_->inds_ptrs(); + return inds_ptrs_.view(); } template raft::device_vector_view index::inds_ptrs() const noexcept { - return raft_index_->inds_ptrs(); + return inds_ptrs_.view(); } template bool index::conservative_memory_allocation() const noexcept { - return raft_index_->conservative_memory_allocation(); + return conservative_memory_allocation_; +} + +template +void index::allocate_center_norms(raft::resources const& res) +{ + switch (metric_) { + case cuvs::distance::DistanceType::L2Expanded: + case cuvs::distance::DistanceType::L2SqrtExpanded: + case cuvs::distance::DistanceType::L2Unexpanded: + case cuvs::distance::DistanceType::L2SqrtUnexpanded: + center_norms_ = raft::make_device_vector(res, n_lists()); + break; + default: center_norms_ = std::nullopt; + } +} + +template +std::vector>>& index::lists() noexcept +{ + return lists_; } template -std::vector>>& -index::lists() noexcept +const std::vector>>& index::lists() const noexcept { - return raft_index_->lists(); + return lists_; } template -const std::vector>>& -index::lists() const noexcept +void index::check_consistency() { - return raft_index_->lists(); + auto n_lists = lists_.size(); + RAFT_EXPECTS(dim() % veclen_ == 0, "dimensionality is not a multiple of the veclen"); + RAFT_EXPECTS(list_sizes_.extent(0) == n_lists, "inconsistent list size"); + RAFT_EXPECTS(data_ptrs_.extent(0) == n_lists, "inconsistent list size"); + RAFT_EXPECTS(inds_ptrs_.extent(0) == n_lists, "inconsistent list size"); + RAFT_EXPECTS( // + (centers_.extent(0) == list_sizes_.extent(0)) && // + (!center_norms_.has_value() || centers_.extent(0) == center_norms_->extent(0)), + "inconsistent number of lists (clusters)"); } template struct index; diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_build_common.cu b/cpp/src/neighbors/ivf_pq/ivf_pq_build_common.cu index 15c965ec9..b08ebb17e 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_build_common.cu +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_build_common.cu @@ -164,4 +164,4 @@ void erase_list(raft::resources const& res, index* index, uint32_t labe detail::erase_list(res, index, label); } -} // namespace cuvs::neighbors::ivf_pq::helpers \ No newline at end of file +} // namespace cuvs::neighbors::ivf_pq::helpers diff --git a/cpp/test/neighbors/ann_ivf_flat.cuh b/cpp/test/neighbors/ann_ivf_flat.cuh index 9b70ee250..dadd450b7 100644 --- a/cpp/test/neighbors/ann_ivf_flat.cuh +++ b/cpp/test/neighbors/ann_ivf_flat.cuh @@ -19,11 +19,14 @@ #include "ann_utils.cuh" #include "naive_knn.cuh" +#include #include #include #include -#include +#include +#include +#include namespace cuvs::neighbors::ivf_flat { @@ -41,7 +44,7 @@ struct AnnIvfFlatInputs { IdxT nlist; cuvs::distance::DistanceType metric; bool adaptive_centers; - // bool host_dataset; + bool host_dataset; }; template @@ -98,47 +101,6 @@ class AnnIVFFlatTest : public ::testing::TestWithParam> { rmm::device_uvector distances_ivfflat_dev(queries_size, stream_); rmm::device_uvector indices_ivfflat_dev(queries_size, stream_); - { - // legacy interface - raft::spatial::knn::IVFFlatParam ivfParams; - ivfParams.nprobe = ps.nprobe; - ivfParams.nlist = ps.nlist; - raft::spatial::knn::knnIndex index; - - raft::spatial::knn::approx_knn_build_index( - handle_, - &index, - dynamic_cast(&ivfParams), - static_cast((int)ps.metric), - (IdxT)0, - database.data(), - ps.num_db_vecs, - ps.dim); - - raft::resource::sync_stream(handle_); - raft::spatial::knn::approx_knn_search(handle_, - distances_ivfflat_dev.data(), - indices_ivfflat_dev.data(), - &index, - ps.k, - search_queries.data(), - ps.num_queries); - - raft::update_host( - distances_ivfflat.data(), distances_ivfflat_dev.data(), queries_size, stream_); - raft::update_host( - indices_ivfflat.data(), indices_ivfflat_dev.data(), queries_size, stream_); - raft::resource::sync_stream(handle_); - } - - ASSERT_TRUE(eval_neighbours(indices_naive, - indices_ivfflat, - distances_naive, - distances_ivfflat, - ps.num_queries, - ps.k, - 0.001, - min_recall)); { cuvs::neighbors::ivf_flat::index_params index_params; cuvs::neighbors::ivf_flat::search_params search_params; @@ -154,45 +116,42 @@ class AnnIVFFlatTest : public ::testing::TestWithParam> { cuvs::neighbors::ivf_flat::index idx(handle_, index_params, ps.dim); cuvs::neighbors::ivf_flat::index index_2(handle_, index_params, ps.dim); - // if (!ps.host_dataset) { - - auto database_view = raft::make_device_matrix_view( - (const DataT*)database.data(), ps.num_db_vecs, ps.dim); - idx = cuvs::neighbors::ivf_flat::build(handle_, index_params, database_view); - rmm::device_uvector vector_indices(ps.num_db_vecs, stream_); - thrust::sequence(raft::resource::get_thrust_policy(handle_), - thrust::device_pointer_cast(vector_indices.data()), - thrust::device_pointer_cast(vector_indices.data() + ps.num_db_vecs)); - raft::resource::sync_stream(handle_); - - IdxT half_of_data = ps.num_db_vecs / 2; + if (!ps.host_dataset) { + auto database_view = raft::make_device_matrix_view( + (const DataT*)database.data(), ps.num_db_vecs, ps.dim); + idx = cuvs::neighbors::ivf_flat::build(handle_, index_params, database_view); + rmm::device_uvector vector_indices(ps.num_db_vecs, stream_); + thrust::sequence(raft::resource::get_thrust_policy(handle_), + thrust::device_pointer_cast(vector_indices.data()), + thrust::device_pointer_cast(vector_indices.data() + ps.num_db_vecs)); + raft::resource::sync_stream(handle_); - auto half_of_data_view = raft::make_device_matrix_view( - (const DataT*)database.data(), half_of_data, ps.dim); + IdxT half_of_data = ps.num_db_vecs / 2; - const std::optional> no_opt = std::nullopt; - index_2 = cuvs::neighbors::ivf_flat::extend(handle_, half_of_data_view, no_opt, idx); + auto half_of_data_view = raft::make_device_matrix_view( + (const DataT*)database.data(), half_of_data, ps.dim); - auto new_half_of_data_view = raft::make_device_matrix_view( - database.data() + half_of_data * ps.dim, IdxT(ps.num_db_vecs) - half_of_data, ps.dim); + const std::optional> no_opt = std::nullopt; + index_2 = cuvs::neighbors::ivf_flat::extend(handle_, half_of_data_view, no_opt, idx); - auto new_half_of_data_indices_view = raft::make_device_vector_view( - vector_indices.data() + half_of_data, IdxT(ps.num_db_vecs) - half_of_data); + auto new_half_of_data_view = raft::make_device_matrix_view( + database.data() + half_of_data * ps.dim, IdxT(ps.num_db_vecs) - half_of_data, ps.dim); - cuvs::neighbors::ivf_flat::extend( - handle_, - new_half_of_data_view, - std::make_optional>( - new_half_of_data_indices_view), - &index_2); + auto new_half_of_data_indices_view = raft::make_device_vector_view( + vector_indices.data() + half_of_data, IdxT(ps.num_db_vecs) - half_of_data); - /* + cuvs::neighbors::ivf_flat::extend( + handle_, + new_half_of_data_view, + std::make_optional>( + new_half_of_data_indices_view), + &index_2); } else { auto host_database = raft::make_host_matrix(ps.num_db_vecs, ps.dim); raft::copy( host_database.data_handle(), database.data(), ps.num_db_vecs * ps.dim, stream_); - idx = ivf_flat::build(handle_, index_params, - raft::make_const_mdspan(host_database.view())); + idx = + ivf_flat::build(handle_, index_params, raft::make_const_mdspan(host_database.view())); auto vector_indices = raft::make_host_vector(handle_, ps.num_db_vecs); std::iota(vector_indices.data_handle(), vector_indices.data_handle() + ps.num_db_vecs, 0); @@ -217,7 +176,6 @@ class AnnIVFFlatTest : public ::testing::TestWithParam> { new_half_of_data_indices_view), &index_2); } - */ auto search_queries_view = raft::make_device_matrix_view( search_queries.data(), ps.num_queries, ps.dim); @@ -294,137 +252,137 @@ class AnnIVFFlatTest : public ::testing::TestWithParam> { } } - /* - void testPacker() - { - ivf_flat::index_params index_params; - ivf_flat::search_params search_params; - index_params.n_lists = ps.nlist; - index_params.metric = ps.metric; - index_params.adaptive_centers = false; - search_params.n_probes = ps.nprobe; - - index_params.add_data_on_build = false; - index_params.kmeans_trainset_fraction = 1.0; - index_params.metric_arg = 0; - - auto database_view = raft::make_device_matrix_view( - (const DataT*)database.data(), ps.num_db_vecs, ps.dim); - - auto idx = ivf_flat::build(handle_, index_params, database_view); - - const std::optional> no_opt = std::nullopt; - index extend_index = ivf_flat::extend(handle_, database_view, no_opt, idx); - - auto list_sizes = raft::make_host_vector(idx.n_lists()); - raft::update_host(list_sizes.data_handle(), - extend_index.list_sizes().data_handle(), - extend_index.n_lists(), - stream_); - raft::resource::sync_stream(handle_); + void testPacker() + { + ivf_flat::index_params index_params; + ivf_flat::search_params search_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.adaptive_centers = false; + search_params.n_probes = ps.nprobe; + + index_params.add_data_on_build = false; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + + auto database_view = raft::make_device_matrix_view( + (const DataT*)database.data(), ps.num_db_vecs, ps.dim); + + auto idx = ivf_flat::build(handle_, index_params, database_view); + + const std::optional> no_opt = std::nullopt; + index extend_index = ivf_flat::extend(handle_, database_view, no_opt, idx); + + auto list_sizes = raft::make_host_vector(idx.n_lists()); + raft::update_host(list_sizes.data_handle(), + extend_index.list_sizes().data_handle(), + extend_index.n_lists(), + stream_); + raft::resource::sync_stream(handle_); - auto& lists = idx.lists(); + auto& lists = idx.lists(); - // conservative memory allocation for codepacking - auto list_device_spec = list_spec{idx.dim(), false}; + // conservative memory allocation for codepacking + auto list_device_spec = list_spec{idx.dim(), false}; - for (uint32_t label = 0; label < idx.n_lists(); label++) { - uint32_t list_size = list_sizes.data_handle()[label]; + for (uint32_t label = 0; label < idx.n_lists(); label++) { + uint32_t list_size = list_sizes.data_handle()[label]; - ivf::resize_list(handle_, lists[label], list_device_spec, list_size, 0); - } + ivf::resize_list(handle_, lists[label], list_device_spec, list_size, 0); + } - idx.recompute_internal_state(handle_); + idx.recompute_internal_state(handle_); - using interleaved_group = Pow2; + using interleaved_group = raft::Pow2; - for (uint32_t label = 0; label < idx.n_lists(); label++) { - uint32_t list_size = list_sizes.data_handle()[label]; + for (uint32_t label = 0; label < idx.n_lists(); label++) { + uint32_t list_size = list_sizes.data_handle()[label]; - if (list_size > 0) { - uint32_t padded_list_size = interleaved_group::roundUp(list_size); - uint32_t n_elems = padded_list_size * idx.dim(); - auto list_data = lists[label]->data; - auto list_inds = extend_index.lists()[label]->indices; + if (list_size > 0) { + uint32_t padded_list_size = interleaved_group::roundUp(list_size); + uint32_t n_elems = padded_list_size * idx.dim(); + auto list_data = lists[label]->data; + auto list_inds = extend_index.lists()[label]->indices; - // fetch the flat codes - auto flat_codes = make_device_matrix(handle_, list_size, idx.dim()); + // fetch the flat codes + auto flat_codes = raft::make_device_matrix(handle_, list_size, idx.dim()); - matrix::gather( - handle_, - make_device_matrix_view( - (const DataT*)database.data(), static_cast(ps.num_db_vecs), idx.dim()), - make_device_vector_view((const IdxT*)list_inds.data_handle(), - list_size), - flat_codes.view()); - - helpers::codepacker::pack( - handle_, make_const_mdspan(flat_codes.view()), idx.veclen(), 0, list_data.view()); - - { - auto mask = make_device_vector(handle_, n_elems); - - linalg::map_offset(handle_, - mask.view(), - [dim = idx.dim(), - list_size, - padded_list_size, - chunk_size = util::FastIntDiv(idx.veclen())] __device__(auto i) { - uint32_t max_group_offset = - interleaved_group::roundDown(list_size); if (i < max_group_offset * dim) { return true; } - uint32_t surplus = (i - max_group_offset * dim); - uint32_t ingroup_id = interleaved_group::mod(surplus / chunk_size); - return ingroup_id < (list_size - max_group_offset); - }); - - // ensure that the correct number of indices are masked out - ASSERT_TRUE(thrust::reduce(raft::resource::get_thrust_policy(handle_), - mask.data_handle(), - mask.data_handle() + n_elems, - 0) == list_size * ps.dim); - - auto packed_list_data = make_device_vector(handle_, n_elems); - - linalg::map_offset(handle_, - packed_list_data.view(), - [mask = mask.data_handle(), - list_data = list_data.data_handle()] __device__(uint32_t i) { - if (mask[i]) return list_data[i]; - return DataT{0}; - }); - - auto extend_data = extend_index.lists()[label]->data; - auto extend_data_filtered = make_device_vector(handle_, n_elems); - linalg::map_offset(handle_, - extend_data_filtered.view(), - [mask = mask.data_handle(), - extend_data = extend_data.data_handle()] __device__(uint32_t i) { - if (mask[i]) return extend_data[i]; - return DataT{0}; - }); - - ASSERT_TRUE(cuvs::devArrMatch(packed_list_data.data_handle(), - extend_data_filtered.data_handle(), - n_elems, - cuvs::Compare(), - stream_)); - } + raft::matrix::gather( + handle_, + raft::make_device_matrix_view( + (const DataT*)database.data(), static_cast(ps.num_db_vecs), idx.dim()), + raft::make_device_vector_view((const IdxT*)list_inds.data_handle(), + list_size), + flat_codes.view()); - auto unpacked_flat_codes = - make_device_matrix(handle_, list_size, idx.dim()); + helpers::codepacker::pack( + handle_, make_const_mdspan(flat_codes.view()), idx.veclen(), 0, list_data.view()); - helpers::codepacker::unpack( - handle_, list_data.view(), idx.veclen(), 0, unpacked_flat_codes.view()); + { + auto mask = raft::make_device_vector(handle_, n_elems); - ASSERT_TRUE(cuvs::devArrMatch(flat_codes.data_handle(), - unpacked_flat_codes.data_handle(), - list_size * ps.dim, + raft::linalg::map_offset( + handle_, + mask.view(), + [dim = idx.dim(), + list_size, + padded_list_size, + chunk_size = raft::util::FastIntDiv(idx.veclen())] __device__(auto i) { + uint32_t max_group_offset = interleaved_group::roundDown(list_size); + if (i < max_group_offset * dim) { return true; } + uint32_t surplus = (i - max_group_offset * dim); + uint32_t ingroup_id = interleaved_group::mod(surplus / chunk_size); + return ingroup_id < (list_size - max_group_offset); + }); + + // ensure that the correct number of indices are masked out + ASSERT_TRUE(thrust::reduce(raft::resource::get_thrust_policy(handle_), + mask.data_handle(), + mask.data_handle() + n_elems, + 0) == list_size * ps.dim); + + auto packed_list_data = raft::make_device_vector(handle_, n_elems); + + raft::linalg::map_offset(handle_, + packed_list_data.view(), + [mask = mask.data_handle(), + list_data = list_data.data_handle()] __device__(uint32_t i) { + if (mask[i]) return list_data[i]; + return DataT{0}; + }); + + auto extend_data = extend_index.lists()[label]->data; + auto extend_data_filtered = raft::make_device_vector(handle_, n_elems); + raft::linalg::map_offset( + handle_, + extend_data_filtered.view(), + [mask = mask.data_handle(), + extend_data = extend_data.data_handle()] __device__(uint32_t i) { + if (mask[i]) return extend_data[i]; + return DataT{0}; + }); + + ASSERT_TRUE(cuvs::devArrMatch(packed_list_data.data_handle(), + extend_data_filtered.data_handle(), + n_elems, cuvs::Compare(), stream_)); } + + auto unpacked_flat_codes = + raft::make_device_matrix(handle_, list_size, idx.dim()); + + helpers::codepacker::unpack( + handle_, list_data.view(), idx.veclen(), 0, unpacked_flat_codes.view()); + + ASSERT_TRUE(cuvs::devArrMatch(flat_codes.data_handle(), + unpacked_flat_codes.data_handle(), + list_size * ps.dim, + cuvs::Compare(), + stream_)); } } - */ + } void testFilter() { @@ -458,7 +416,6 @@ class AnnIVFFlatTest : public ::testing::TestWithParam> { raft::resource::sync_stream(handle_); } - /* { // unless something is really wrong with clustering, this could serve as a lower bound on // recall @@ -494,7 +451,7 @@ class AnnIVFFlatTest : public ::testing::TestWithParam> { test_ivf_sample_filter::offset)); raft::resource::sync_stream(handle_); - raft::core::bitset removed_indices_bitset( + cuvs::core::bitset removed_indices_bitset( handle_, removed_indices.view(), ps.num_db_vecs); // Search with the filter @@ -507,7 +464,7 @@ class AnnIVFFlatTest : public ::testing::TestWithParam> { search_queries_view, indices_ivfflat_dev.view(), distances_ivfflat_dev.view(), - cuvs::neighbors::filtering::bitset_filter(removed_indices_bitset.view())); + cuvs::neighbors::filtering::bitset_filter(removed_indices_bitset.view())); raft::update_host( distances_ivfflat.data(), distances_ivfflat_dev.data_handle(), queries_size, stream_); @@ -524,7 +481,6 @@ class AnnIVFFlatTest : public ::testing::TestWithParam> { 0.001, min_recall)); } - */ } void SetUp() override @@ -591,7 +547,6 @@ const std::vector> inputs = { {1000, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, true}, {10000, 131072, 8, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, false}, - /* // host input data {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false, true}, {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::L2Expanded, false, true}, @@ -600,7 +555,6 @@ const std::vector> inputs = { {20, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, false, true}, {1000, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, false, true}, {10000, 131072, 8, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, false, true}, - */ {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, true}, {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::InnerProduct, true}, diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt index 5ff7bef61..15b12b118 100644 --- a/examples/cpp/CMakeLists.txt +++ b/examples/cpp/CMakeLists.txt @@ -37,6 +37,6 @@ include(../cmake/thirdparty/get_cuvs.cmake) # -------------- compile tasks ----------------- # add_executable(CAGRA_EXAMPLE src/cagra_example.cu) -# `$` is a generator expression that ensures that targets -# are installed in a conda environment, if one exists +# `$` is a generator expression that ensures that targets are +# installed in a conda environment, if one exists target_link_libraries(CAGRA_EXAMPLE PRIVATE cuvs::cuvs $) diff --git a/rapids_config.cmake b/rapids_config.cmake index 19906dbc6..4ceefb991 100644 --- a/rapids_config.cmake +++ b/rapids_config.cmake @@ -22,13 +22,15 @@ else() string(REPLACE "\n" "\n " _rapids_version_formatted " ${_rapids_version}") message( FATAL_ERROR - "Could not determine RAPIDS version. Contents of VERSION file:\n${_rapids_version_formatted}") + "Could not determine RAPIDS version. Contents of VERSION file:\n${_rapids_version_formatted}" + ) endif() if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS-${RAPIDS_VERSION_MAJOR_MINOR}.cmake") file( DOWNLOAD "https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION_MAJOR_MINOR}/RAPIDS.cmake" - "${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS-${RAPIDS_VERSION_MAJOR_MINOR}.cmake") + "${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS-${RAPIDS_VERSION_MAJOR_MINOR}.cmake" + ) endif() include("${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS-${RAPIDS_VERSION_MAJOR_MINOR}.cmake")