Skip to content

Commit

Permalink
Add procedures to split indexing, search and merge
Browse files Browse the repository at this point in the history
For large dataset, users might prefer to do indexing,
search, force merge as different test execution. To support
this use case, added three additional procedure,
1)index only, 2)force-merge 3)search-only. This can be used
in nightly to run search workload every day without indexing
everytime.

Signed-off-by: Vijayan Balasubramanian <balasvij@amazon.com>
  • Loading branch information
VijayanB authored and gkamat committed Mar 18, 2024
1 parent 7caa5d3 commit dfc1853
Showing 1 changed file with 90 additions and 0 deletions.
90 changes: 90 additions & 0 deletions vectorsearch/test_procedures/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,96 @@
"name": "prod-queries",
"operation-type": "vector-search",
"index": "{{ target_index_name | default('target_index') }}",
"detailed-results": true,
"k": {{ query_k | default(100) }},
"field" : "{{ target_field_name | default('target_field') }}",
"data_set_format" : "{{ query_data_set_format | default('hdf5') }}",
"data_set_path" : "{{ query_data_set_path }}",
"data_set_corpus" : "{{ query_data_set_corpus }}",
"neighbors_data_set_path" : "{{ neighbors_data_set_path }}",
"neighbors_data_set_corpus" : "{{ neighbors_data_set_corpus }}",
"neighbors_data_set_format" : "{{ neighbors_data_set_format | default('hdf5') }}",
"num_vectors" : {{ query_count | default(-1) }},
"id-field-name": "{{ id_field_name }}",
"body": {{ query_body | default ({}) | tojson }}
},
"clients": {{ search_clients | default(1)}}
}
]
},
{
"name": "no-train-test-only-index",
"description": "Perform only indexing operation for vector search",
"schedule": [
{
"operation": {
"name": "delete-target-index",
"operation-type": "delete-index",
"only-if-exists": true,
"index": "{{ target_index_name | default('target_index') }}"
}
},
{
"operation": {
"name": "create-target-index",
"operation-type": "create-index",
"index": "{{ target_index_name | default('target_index') }}"
}
},
{
"operation": {
"name": "custom-vector-bulk",
"operation-type": "bulk-vector-data-set",
"index": "{{ target_index_name | default('target_index') }}",
"field": "{{ target_field_name | default('target_field') }}",
"bulk_size": {{ target_index_bulk_size | default(500)}},
"data_set_format": "{{ target_index_bulk_index_data_set_format | default('hdf5') }}",
"data_set_path": "{{ target_index_bulk_index_data_set_path }}",
"data_set_corpus": "{{ target_index_bulk_index_data_set_corpus }}",
"num_vectors": {{ target_index_num_vectors | default(-1) }},
"id-field-name": "{{ id_field_name }}"
},
"clients": {{ target_index_bulk_indexing_clients | default(1)}}
},
{
"name" : "refresh-target-index-before-force-merge",
"operation" : "refresh-target-index"
}
]
},
{
"name": "force-merge-index",
"description": "Force merge vector search index",
"schedule": [
{
"name" : "refresh-target-index-before-force-merge",
"operation" : "refresh-target-index"
},
{
"name" : "force-merge-segments",
"operation" : "force-merge"
},
{
"name" : "refresh-target-index-after-force-merge",
"operation" : "refresh-target-index"
}
]
},
{
"name": "no-train-test-only-search",
"description": "Perform only vector search on previosuly indexed cluster.",
"schedule": [
{
"name" : "warmup-indices",
"operation" : "warmup-indices",
"index": "{{ target_index_name | default('target_index') }}"
},
{
"operation": {
"name": "prod-queries",
"operation-type": "vector-search",
"detailed-results": true,
"index": "{{ target_index_name | default('target_index') }}",
"k": {{ query_k | default(100) }},
"field" : "{{ target_field_name | default('target_field') }}",
"data_set_format" : "{{ query_data_set_format | default('hdf5') }}",
Expand Down

0 comments on commit dfc1853

Please sign in to comment.