Add procedures to split indexing, search and merge

For large dataset, users might prefer to do indexing, search, force merge as different test execution. To support this use case, added three additional procedure, 1)index only, 2)force-merge 3)search-only. This can be used in nightly to run search workload every day without indexing everytime. Signed-off-by: Vijayan Balasubramanian <balasvij@amazon.com>
vpehkone · Mar 18, 2024 · dfc1853 · dfc1853
1 parent 7caa5d3
commit dfc1853
Showing 1 changed file with 90 additions and 0 deletions.
diff --git a/vectorsearch/test_procedures/default.json b/vectorsearch/test_procedures/default.json
@@ -55,6 +55,96 @@
                 "name": "prod-queries",
                 "operation-type": "vector-search",
                 "index": "{{ target_index_name | default('target_index') }}",
+                "detailed-results": true,
+                "k": {{ query_k  | default(100) }},
+                "field" : "{{ target_field_name | default('target_field') }}",
+                "data_set_format" : "{{ query_data_set_format | default('hdf5') }}",
+                "data_set_path" : "{{ query_data_set_path }}",
+                "data_set_corpus" : "{{ query_data_set_corpus }}",
+                "neighbors_data_set_path" : "{{ neighbors_data_set_path }}",
+                "neighbors_data_set_corpus" : "{{ neighbors_data_set_corpus }}",
+                "neighbors_data_set_format" : "{{ neighbors_data_set_format | default('hdf5') }}",
+                "num_vectors" : {{ query_count | default(-1) }},
+                "id-field-name": "{{ id_field_name }}",
+                "body": {{ query_body | default ({}) | tojson }}    
+            },
+            "clients": {{ search_clients | default(1)}}
+        }
+    ]
+},
+{
+    "name": "no-train-test-only-index",
+    "description": "Perform only indexing operation for vector search",
+    "schedule": [
+       {
+            "operation": {
+                "name": "delete-target-index",
+                "operation-type": "delete-index",
+                "only-if-exists": true,
+                "index": "{{ target_index_name | default('target_index') }}"
+            }
+        },
+        {
+            "operation": {
+                "name": "create-target-index",
+                "operation-type": "create-index",
+                "index": "{{ target_index_name | default('target_index') }}"
+            }
+        },
+        {
+            "operation": {
+                "name": "custom-vector-bulk",
+                "operation-type": "bulk-vector-data-set",
+                "index": "{{ target_index_name | default('target_index') }}",
+                "field": "{{ target_field_name | default('target_field') }}",
+                "bulk_size": {{ target_index_bulk_size | default(500)}},
+                "data_set_format": "{{ target_index_bulk_index_data_set_format | default('hdf5') }}",
+                "data_set_path": "{{ target_index_bulk_index_data_set_path  }}",
+                "data_set_corpus": "{{ target_index_bulk_index_data_set_corpus  }}",
+                "num_vectors": {{ target_index_num_vectors | default(-1) }},
+                "id-field-name": "{{ id_field_name }}"
+            },
+            "clients": {{ target_index_bulk_indexing_clients | default(1)}}
+        },
+        {
+            "name" : "refresh-target-index-before-force-merge",
+            "operation" : "refresh-target-index"
+        }
+    ]
+},
+{
+    "name": "force-merge-index",
+    "description": "Force merge vector search index",
+    "schedule": [
+        {
+            "name" : "refresh-target-index-before-force-merge",
+            "operation" : "refresh-target-index"
+        },
+        {
+            "name" : "force-merge-segments",
+            "operation" : "force-merge"
+        },
+        {
+            "name" : "refresh-target-index-after-force-merge",
+            "operation" : "refresh-target-index"
+        }
+    ]
+},
+{
+    "name": "no-train-test-only-search",
+    "description": "Perform only vector search on previosuly indexed cluster.",
+    "schedule": [
+        {
+            "name" : "warmup-indices",
+            "operation" : "warmup-indices",
+            "index": "{{ target_index_name | default('target_index') }}"
+        },
+        {
+            "operation": {
+                "name": "prod-queries",
+                "operation-type": "vector-search",
+                "detailed-results": true,
+                "index": "{{ target_index_name | default('target_index') }}",
                 "k": {{ query_k  | default(100) }},
                 "field" : "{{ target_field_name | default('target_field') }}",
                 "data_set_format" : "{{ query_data_set_format | default('hdf5') }}",