rapidsai · rapids-bot · Sep 6, 2024 · Aug 15, 2024 · Aug 16, 2024 · Aug 29, 2024
diff --git a/.gitignore b/.gitignore
@@ -28,6 +28,11 @@ bench/ann/data
 temporary_*.json
 rust/target/
 rust/Cargo.lock
+rmm_log.txt
+
+## example notebooks
+notebooks/simplewiki-2020-11-01-nq-distilbert-base-v1.pt
+notebooks/data/
 
 ## scikit-build
 _skbuild

@@ -23,13 +23,19 @@ void optimize(raft::resources const& handle,
               raft::device_matrix_view<uint32_t, int64_t, raft::row_major> knn_graph,
               raft::host_matrix_view<uint32_t, int64_t, raft::row_major> new_graph)
 {
-  cuvs::neighbors::cagra::optimize(handle, knn_graph, new_graph);
+  cuvs::neighbors::cagra::optimize<
+    uint32_t,
+    raft::host_device_accessor<std::experimental::default_accessor<uint32_t>,
+                               raft::memory_type::device>>(handle, knn_graph, new_graph);
 }
 void optimize(raft::resources const& handle,
               raft::host_matrix_view<uint32_t, int64_t, raft::row_major> knn_graph,
               raft::host_matrix_view<uint32_t, int64_t, raft::row_major> new_graph)
 {
-  cuvs::neighbors::cagra::optimize(handle, knn_graph, new_graph);
+  cuvs::neighbors::cagra::optimize<
+    uint32_t,
+    raft::host_device_accessor<std::experimental::default_accessor<uint32_t>,
+                               raft::memory_type::host>>(handle, knn_graph, new_graph);
 }
 
 }  // namespace cuvs::neighbors::cagra
diff --git a/notebooks/VectorSearch_QuestionRetrieval.ipynb b/notebooks/VectorSearch_QuestionRetrieval.ipynb
@@ -344,7 +344,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,

diff --git a/notebooks/ivf_flat_example.ipynb b/notebooks/ivf_flat_example.ipynb
@@ -520,6 +520,30 @@
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23010fbc-8f5a-4403-a112-33f190a85498",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "774848e8-fa45-4223-bd2a-e8585650531e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6309b8a7-f4eb-4976-a824-cd4499a0000d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -538,7 +562,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,

diff --git a/notebooks/tutorial_ivf_pq.ipynb b/notebooks/tutorial_ivf_pq.ipynb
@@ -124,6 +124,7 @@
    "outputs": [],
    "source": [
     "DATASET_URL = \"http://ann-benchmarks.com/sift-128-euclidean.hdf5\"\n",
+    "DATASET_NAME = \"SIFT-128\"\n",
     "f = load_dataset(DATASET_URL)"
    ]
   },
@@ -206,7 +207,7 @@
     "# This function takes a row-major either numpy or cupy (GPU) array.\n",
     "# Generally, it's a bit faster with GPU inputs, but the CPU version may come in handy\n",
     "# if the whole dataset cannot fit into GPU memory.\n",
-    "index = ivf_pq.build(index_params, dataset, handle=resources)\n",
+    "index = ivf_pq.build(index_params, dataset, resources=resources)\n",
     "# This function is asynchronous so we need to explicitly synchronize the GPU before we can measure the execution time\n",
     "resources.sync()\n",
     "index"
@@ -262,7 +263,7 @@
    "outputs": [],
    "source": [
     "%%time\n",
-    "distances, neighbors = ivf_pq.search(search_params, index, queries, k, handle=resources)\n",
+    "distances, neighbors = ivf_pq.search(search_params, index, queries, k, resources=resources)\n",
     "# Sync the GPU to make sure we've got the timing right\n",
     "resources.sync()"
    ]
@@ -303,8 +304,8 @@
    "source": [
     "%%time\n",
     "\n",
-    "candidates = ivf_pq.search(search_params, index, queries, k * 2, handle=resources)[1]\n",
-    "distances, neighbors = refine(dataset, queries, candidates, k, handle=resources)\n",
+    "candidates = ivf_pq.search(search_params, index, queries, k * 2, resources=resources)[1]\n",
+    "distances, neighbors = refine(dataset, queries, candidates, k, resources=resources)\n",
     "resources.sync()"
    ]
   },
@@ -349,7 +350,7 @@
     "bench_avg = np.zeros_like(bench_k, dtype=np.float32)\n",
     "bench_std = np.zeros_like(bench_k, dtype=np.float32)\n",
     "for i, k in enumerate(bench_k):\n",
-    "    r = %timeit -o ivf_pq.search(search_params, index, queries, k, handle=resources); resources.sync()\n",
+    "    r = %timeit -o ivf_pq.search(search_params, index, queries, k, resources=resources); resources.sync()\n",
     "    bench_avg[i] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n",
     "    bench_std[i] = (queries.shape[0] * r.loops / np.array(r.all_runs)).std()\n",
     "\n",
@@ -387,9 +388,9 @@
     "k = 100\n",
     "for i, n_probes in enumerate(bench_probes):\n",
     "    sp = ivf_pq.SearchParams(n_probes=n_probes)\n",
-    "    r = %timeit -o ivf_pq.search(sp, index, queries, k, handle=resources); resources.sync()\n",
+    "    r = %timeit -o ivf_pq.search(sp, index, queries, k, resources=resources); resources.sync()\n",
     "    bench_qps[i] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n",
-    "    bench_recall[i] = calc_recall(ivf_pq.search(sp, index, queries, k, handle=resources)[1], gt_neighbors)\n",
+    "    bench_recall[i] = calc_recall(ivf_pq.search(sp, index, queries, k, resources=resources)[1], gt_neighbors)\n",
     "    "
    ]
   },
@@ -492,9 +493,9 @@
     "bench_names = ['32/32', '32/16', '32/8', '16/16', '16/8']\n",
     "\n",
     "for i, sp in enumerate(search_ps):\n",
-    "    r = %timeit -o ivf_pq.search(sp, index, queries, k, handle=resources); resources.sync()\n",
+    "    r = %timeit -o ivf_pq.search(sp, index, queries, k, resources=resources); resources.sync()\n",
     "    bench_qps_s1[i] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n",
-    "    bench_recall_s1[i] = calc_recall(ivf_pq.search(sp, index, queries, k, handle=resources)[1], gt_neighbors)"
+    "    bench_recall_s1[i] = calc_recall(ivf_pq.search(sp, index, queries, k, resources=resources)[1], gt_neighbors)"
    ]
   },
   {
@@ -505,7 +506,7 @@
    "source": [
     "fig, ax = plt.subplots(1, 1, figsize=plt.figaspect(1/2))\n",
     "fig.suptitle(\n",
-    "    f'Effects of search parameters on QPS/recall trade-off ({DATASET_FILENAME})\\n' + \\\n",
+    "    f'Effects of search parameters on QPS/recall trade-off ({DATASET_NAME})\\n' + \\\n",
     "    f'k = {k}, n_probes = {n_probes}, pq_dim = {pq_dim}')\n",
     "ax.plot(bench_recall_s1, bench_qps_s1, 'o')\n",
     "ax.set_xlabel('recall')\n",
@@ -553,8 +554,8 @@
    "source": [
     "def search_refine(ps, ratio):\n",
     "    k_search = k * ratio\n",
-    "    candidates = ivf_pq.search(ps, index, queries, k_search, handle=resources)[1]\n",
-    "    return candidates if ratio == 1 else refine(dataset, queries, candidates, k, handle=resources)[1]\n",
+    "    candidates = ivf_pq.search(ps, index, queries, k_search, resources=resources)[1]\n",
+    "    return candidates if ratio == 1 else refine(dataset, queries, candidates, k, resources=resources)[1]\n",
     "\n",
     "ratios = [1, 2, 4]\n",
     "bench_qps_sr = np.zeros((len(ratios), len(search_ps)), dtype=np.float32)\n",
@@ -575,7 +576,7 @@
    "source": [
     "fig, ax = plt.subplots(1, 1, figsize=plt.figaspect(1/2))\n",
     "fig.suptitle(\n",
-    "    f'Effects of search parameters on QPS/recall trade-off ({DATASET_FILENAME})\\n' + \\\n",
+    "    f'Effects of search parameters on QPS/recall trade-off ({DATASET_NAME})\\n' + \\\n",
     "    f'k = {k}, n_probes = {n_probes}, pq_dim = {pq_dim}')\n",
     "labels = []\n",
     "for j, ratio in enumerate(ratios):\n",
@@ -629,8 +630,8 @@
     "        n_probes=n_probes,\n",
     "        internal_distance_dtype=internal_distance_dtype,\n",
     "        lut_dtype=lut_dtype)\n",
-    "    candidates = ivf_pq.search(ps, index, queries, k_search, handle=resources)[1]\n",
-    "    return candidates if ratio == 1 else refine(dataset, queries, candidates, k, handle=resources)[1]\n",
+    "    candidates = ivf_pq.search(ps, index, queries, k_search, resources=resources)[1]\n",
+    "    return candidates if ratio == 1 else refine(dataset, queries, candidates, k, resources=resources)[1]\n",
     "\n",
     "search_configs = [\n",
     "    lambda n_probes: search_refine(np.float16, np.float16, 1, n_probes),\n",
@@ -703,12 +704,13 @@
     "\n",
     "for i, n_lists in enumerate(n_list_variants):\n",
     "    index_params = ivf_pq.IndexParams(n_lists=n_lists, metric=metric, pq_dim=pq_dim)\n",
-    "    index = ivf_pq.build(index_params, dataset, handle=resources)\n",
+    "    index = ivf_pq.build(index_params, dataset, resources=resources)\n",
     "    for j, pl_ratio in enumerate(pl_ratio_variants):\n",
     "        n_probes = max(1, n_lists // pl_ratio)\n",
     "        r = %timeit -o search_fun(n_probes);  resources.sync()\n",
     "        bench_qps_nl[i, j] = (queries.shape[0] * r.loops / np.array(r.all_runs)).mean()\n",
-    "        bench_recall_nl[i, j] = calc_recall(search_fun(n_probes), gt_neighbors)"
+    "        bench_recall_nl[i, j] = calc_recall(search_fun(n_probes), gt_neighbors)\n",
+    "    del index"
    ]
   },
   {
@@ -719,7 +721,7 @@
    "source": [
     "fig, ax = plt.subplots(1, 1, figsize=plt.figaspect(1/2))\n",
     "fig.suptitle(\n",
-    "    f'Effects of n_list on QPS/recall trade-off ({DATASET_FILENAME})\\n' + \\\n",
+    "    f'Effects of n_list on QPS/recall trade-off ({DATASET_NAME})\\n' + \\\n",
     "    f'k = {k}, pq_dim = {pq_dim}, search = {search_label}')\n",
     "labels = []\n",
     "for i, n_lists in enumerate(n_list_variants):\n",
@@ -875,7 +877,7 @@
     "bench_recall_ip = np.zeros_like(bench_qps_ip, dtype=np.float32)\n",
     "\n",
     "for i, index_params in enumerate(build_configs.values()):\n",
-    "    index = ivf_pq.build(index_params, dataset, handle=resources)\n",
+    "    index = ivf_pq.build(index_params, dataset, resources=resources)\n",
     "    for l, search_fun in enumerate(search_configs):\n",
     "        for j, n_probes in enumerate(n_probes_variants):\n",
     "            r = %timeit -o search_fun(n_probes);  resources.sync()\n",
@@ -891,7 +893,7 @@
    "source": [
     "fig, ax = plt.subplots(len(search_config_names), 1, figsize=(16, len(search_config_names)*8))\n",
     "fig.suptitle(\n",
-    "    f'Effects of index parameters on QPS/recall trade-off ({DATASET_FILENAME})\\n' + \\\n",
+    "    f'Effects of index parameters on QPS/recall trade-off ({DATASET_NAME})\\n' + \\\n",
     "    f'k = {k}, n_lists = {n_lists}')\n",
     "\n",
     "for j, search_label in enumerate(search_config_names):\n",
@@ -932,7 +934,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.11.9"
   },
   "vscode": {
    "interpreter": {