Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updating example notebooks #294

Merged
merged 6 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ bench/ann/data
temporary_*.json
rust/target/
rust/Cargo.lock
rmm_log.txt

## example notebooks
notebooks/simplewiki-2020-11-01-nq-distilbert-base-v1.pt
notebooks/data/

## scikit-build
_skbuild
Expand Down
10 changes: 8 additions & 2 deletions cpp/src/neighbors/cagra_optimize.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,19 @@ void optimize(raft::resources const& handle,
raft::device_matrix_view<uint32_t, int64_t, raft::row_major> knn_graph,
raft::host_matrix_view<uint32_t, int64_t, raft::row_major> new_graph)
{
cuvs::neighbors::cagra::optimize(handle, knn_graph, new_graph);
cuvs::neighbors::cagra::optimize<
uint32_t,
raft::host_device_accessor<std::experimental::default_accessor<uint32_t>,
raft::memory_type::device>>(handle, knn_graph, new_graph);
}
void optimize(raft::resources const& handle,
raft::host_matrix_view<uint32_t, int64_t, raft::row_major> knn_graph,
raft::host_matrix_view<uint32_t, int64_t, raft::row_major> new_graph)
{
cuvs::neighbors::cagra::optimize(handle, knn_graph, new_graph);
cuvs::neighbors::cagra::optimize<
uint32_t,
raft::host_device_accessor<std::experimental::default_accessor<uint32_t>,
raft::memory_type::host>>(handle, knn_graph, new_graph);
}

} // namespace cuvs::neighbors::cagra
2 changes: 1 addition & 1 deletion notebooks/VectorSearch_QuestionRetrieval.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
26 changes: 25 additions & 1 deletion notebooks/ivf_flat_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,30 @@
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "23010fbc-8f5a-4403-a112-33f190a85498",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "774848e8-fa45-4223-bd2a-e8585650531e",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "6309b8a7-f4eb-4976-a824-cd4499a0000d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -538,7 +562,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
44 changes: 23 additions & 21 deletions notebooks/tutorial_ivf_pq.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
"outputs": [],
"source": [
"DATASET_URL = \"http://ann-benchmarks.com/sift-128-euclidean.hdf5\"\n",
"DATASET_NAME = \"SIFT-128\"\n",
"f = load_dataset(DATASET_URL)"
]
},
Expand Down Expand Up @@ -206,7 +207,7 @@
"# This function takes a row-major either numpy or cupy (GPU) array.\n",
"# Generally, it's a bit faster with GPU inputs, but the CPU version may come in handy\n",
"# if the whole dataset cannot fit into GPU memory.\n",
"index = ivf_pq.build(index_params, dataset, handle=resources)\n",
"index = ivf_pq.build(index_params, dataset, resources=resources)\n",
"# This function is asynchronous so we need to explicitly synchronize the GPU before we can measure the execution time\n",
"resources.sync()\n",
"index"
Expand Down Expand Up @@ -262,7 +263,7 @@
"outputs": [],
"source": [
"%%time\n",
"distances, neighbors = ivf_pq.search(search_params, index, queries, k, handle=resources)\n",
"distances, neighbors = ivf_pq.search(search_params, index, queries, k, resources=resources)\n",
"# Sync the GPU to make sure we've got the timing right\n",
"resources.sync()"
]
Expand Down Expand Up @@ -303,8 +304,8 @@
"source": [
"%%time\n",
"\n",
"candidates = ivf_pq.search(search_params, index, queries, k * 2, handle=resources)[1]\n",
"distances, neighbors = refine(dataset, queries, candidates, k, handle=resources)\n",
"candidates = ivf_pq.search(search_params, index, queries, k * 2, resources=resources)[1]\n",
"distances, neighbors = refine(dataset, queries, candidates, k, resources=resources)\n",
"resources.sync()"
]
},
Expand Down Expand Up @@ -349,7 +350,7 @@
"bench_avg = np.zeros_like(bench_k, dtype=np.float32)\n",
"bench_std = np.zeros_like(bench_k, dtype=np.float32)\n",
"for i, k in enumerate(bench_k):\n",
" r = %timeit -o ivf_pq.search(search_params, index, queries, k, handle=resources); resources.sync()\n",
" r = %timeit -o ivf_pq.search(search_params, index, queries, k, resources=resources); resources.sync()\n",
" bench_avg[i] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n",
" bench_std[i] = (queries.shape[0] * r.loops / np.array(r.all_runs)).std()\n",
"\n",
Expand Down Expand Up @@ -387,9 +388,9 @@
"k = 100\n",
"for i, n_probes in enumerate(bench_probes):\n",
" sp = ivf_pq.SearchParams(n_probes=n_probes)\n",
" r = %timeit -o ivf_pq.search(sp, index, queries, k, handle=resources); resources.sync()\n",
" r = %timeit -o ivf_pq.search(sp, index, queries, k, resources=resources); resources.sync()\n",
" bench_qps[i] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n",
" bench_recall[i] = calc_recall(ivf_pq.search(sp, index, queries, k, handle=resources)[1], gt_neighbors)\n",
" bench_recall[i] = calc_recall(ivf_pq.search(sp, index, queries, k, resources=resources)[1], gt_neighbors)\n",
" "
]
},
Expand Down Expand Up @@ -492,9 +493,9 @@
"bench_names = ['32/32', '32/16', '32/8', '16/16', '16/8']\n",
"\n",
"for i, sp in enumerate(search_ps):\n",
" r = %timeit -o ivf_pq.search(sp, index, queries, k, handle=resources); resources.sync()\n",
" r = %timeit -o ivf_pq.search(sp, index, queries, k, resources=resources); resources.sync()\n",
" bench_qps_s1[i] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n",
" bench_recall_s1[i] = calc_recall(ivf_pq.search(sp, index, queries, k, handle=resources)[1], gt_neighbors)"
" bench_recall_s1[i] = calc_recall(ivf_pq.search(sp, index, queries, k, resources=resources)[1], gt_neighbors)"
]
},
{
Expand All @@ -505,7 +506,7 @@
"source": [
"fig, ax = plt.subplots(1, 1, figsize=plt.figaspect(1/2))\n",
"fig.suptitle(\n",
" f'Effects of search parameters on QPS/recall trade-off ({DATASET_FILENAME})\\n' + \\\n",
" f'Effects of search parameters on QPS/recall trade-off ({DATASET_NAME})\\n' + \\\n",
" f'k = {k}, n_probes = {n_probes}, pq_dim = {pq_dim}')\n",
"ax.plot(bench_recall_s1, bench_qps_s1, 'o')\n",
"ax.set_xlabel('recall')\n",
Expand Down Expand Up @@ -553,8 +554,8 @@
"source": [
"def search_refine(ps, ratio):\n",
" k_search = k * ratio\n",
" candidates = ivf_pq.search(ps, index, queries, k_search, handle=resources)[1]\n",
" return candidates if ratio == 1 else refine(dataset, queries, candidates, k, handle=resources)[1]\n",
" candidates = ivf_pq.search(ps, index, queries, k_search, resources=resources)[1]\n",
" return candidates if ratio == 1 else refine(dataset, queries, candidates, k, resources=resources)[1]\n",
"\n",
"ratios = [1, 2, 4]\n",
"bench_qps_sr = np.zeros((len(ratios), len(search_ps)), dtype=np.float32)\n",
Expand All @@ -575,7 +576,7 @@
"source": [
"fig, ax = plt.subplots(1, 1, figsize=plt.figaspect(1/2))\n",
"fig.suptitle(\n",
" f'Effects of search parameters on QPS/recall trade-off ({DATASET_FILENAME})\\n' + \\\n",
" f'Effects of search parameters on QPS/recall trade-off ({DATASET_NAME})\\n' + \\\n",
" f'k = {k}, n_probes = {n_probes}, pq_dim = {pq_dim}')\n",
"labels = []\n",
"for j, ratio in enumerate(ratios):\n",
Expand Down Expand Up @@ -629,8 +630,8 @@
" n_probes=n_probes,\n",
" internal_distance_dtype=internal_distance_dtype,\n",
" lut_dtype=lut_dtype)\n",
" candidates = ivf_pq.search(ps, index, queries, k_search, handle=resources)[1]\n",
" return candidates if ratio == 1 else refine(dataset, queries, candidates, k, handle=resources)[1]\n",
" candidates = ivf_pq.search(ps, index, queries, k_search, resources=resources)[1]\n",
" return candidates if ratio == 1 else refine(dataset, queries, candidates, k, resources=resources)[1]\n",
"\n",
"search_configs = [\n",
" lambda n_probes: search_refine(np.float16, np.float16, 1, n_probes),\n",
Expand Down Expand Up @@ -703,12 +704,13 @@
"\n",
"for i, n_lists in enumerate(n_list_variants):\n",
" index_params = ivf_pq.IndexParams(n_lists=n_lists, metric=metric, pq_dim=pq_dim)\n",
" index = ivf_pq.build(index_params, dataset, handle=resources)\n",
" index = ivf_pq.build(index_params, dataset, resources=resources)\n",
" for j, pl_ratio in enumerate(pl_ratio_variants):\n",
" n_probes = max(1, n_lists // pl_ratio)\n",
" r = %timeit -o search_fun(n_probes); resources.sync()\n",
" bench_qps_nl[i, j] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n",
" bench_recall_nl[i, j] = calc_recall(search_fun(n_probes), gt_neighbors)"
" bench_recall_nl[i, j] = calc_recall(search_fun(n_probes), gt_neighbors)\n",
" del index"
]
},
{
Expand All @@ -719,7 +721,7 @@
"source": [
"fig, ax = plt.subplots(1, 1, figsize=plt.figaspect(1/2))\n",
"fig.suptitle(\n",
" f'Effects of n_list on QPS/recall trade-off ({DATASET_FILENAME})\\n' + \\\n",
" f'Effects of n_list on QPS/recall trade-off ({DATASET_NAME})\\n' + \\\n",
" f'k = {k}, pq_dim = {pq_dim}, search = {search_label}')\n",
"labels = []\n",
"for i, n_lists in enumerate(n_list_variants):\n",
Expand Down Expand Up @@ -875,7 +877,7 @@
"bench_recall_ip = np.zeros_like(bench_qps_ip, dtype=np.float32)\n",
"\n",
"for i, index_params in enumerate(build_configs.values()):\n",
" index = ivf_pq.build(index_params, dataset, handle=resources)\n",
" index = ivf_pq.build(index_params, dataset, resources=resources)\n",
" for l, search_fun in enumerate(search_configs):\n",
" for j, n_probes in enumerate(n_probes_variants):\n",
" r = %timeit -o search_fun(n_probes); resources.sync()\n",
Expand All @@ -891,7 +893,7 @@
"source": [
"fig, ax = plt.subplots(len(search_config_names), 1, figsize=(16, len(search_config_names)*8))\n",
"fig.suptitle(\n",
" f'Effects of index parameters on QPS/recall trade-off ({DATASET_FILENAME})\\n' + \\\n",
" f'Effects of index parameters on QPS/recall trade-off ({DATASET_NAME})\\n' + \\\n",
" f'k = {k}, n_lists = {n_lists}')\n",
"\n",
"for j, search_label in enumerate(search_config_names):\n",
Expand Down Expand Up @@ -932,7 +934,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.11.9"
},
"vscode": {
"interpreter": {
Expand Down
Loading