Skip to content

Commit

Permalink
Add query optimization to prod-queries (opensearch-project#202)
Browse files Browse the repository at this point in the history
* Add query optimization to prod-queries

Use docvalue_fields to fetch id-field-name from docvalue store
instead of source. This proves to perform better than current
set up.

Signed-off-by: Vijayan Balasubramanian <balasvij@amazon.com>

* Add refresh after force merge

Signed-off-by: Vijayan Balasubramanian <balasvij@amazon.com>

---------

Signed-off-by: Vijayan Balasubramanian <balasvij@amazon.com>
  • Loading branch information
VijayanB authored Feb 28, 2024
1 parent e8cab46 commit 63abe4d
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 2 deletions.
5 changes: 5 additions & 0 deletions vectorsearch/params/faiss-sift-128-l2.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
"target_index_force_merge_timeout": 45.0,
"hnsw_ef_search": 100,
"hnsw_ef_construction": 100,

"query_k": 100,
"query_body": {
"docvalue_fields" : ["_id"],
"stored_fields" : "_none_"
},

"query_data_set_format": "hdf5",
"query_data_set_path":"/tmp/sift-128-euclidean.hdf5",
Expand Down
5 changes: 5 additions & 0 deletions vectorsearch/params/lucene-sift-128-l2.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
"target_index_force_merge_timeout": 45.0,
"hnsw_ef_search": 100,
"hnsw_ef_construction": 100,

"query_k": 100,
"query_body": {
"docvalue_fields" : ["_id"],
"stored_fields" : "_none_"
},

"query_data_set_format": "hdf5",
"query_data_set_path":"/tmp/sift-128-euclidean.hdf5",
Expand Down
5 changes: 5 additions & 0 deletions vectorsearch/params/nmslib-sift-128-l2.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
"target_index_force_merge_timeout": 45.0,
"hnsw_ef_search": 100,
"hnsw_ef_construction": 100,

"query_k": 100,
"query_body": {
"docvalue_fields" : ["_id"],
"stored_fields" : "_none_"
},

"query_data_set_format": "hdf5",
"query_data_set_path":"/tmp/sift-128-euclidean-test.hdf5",
Expand Down
9 changes: 7 additions & 2 deletions vectorsearch/test_procedures/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,17 @@
"clients": {{ target_index_bulk_indexing_clients | default(1)}}
},
{
"name" : "refresh-target-index",
"name" : "refresh-target-index-before-force-merge",
"operation" : "refresh-target-index"
},
{
"name" : "force-merge-segments",
"operation" : "force-merge"
},
{
"name" : "refresh-target-index-after-force-merge",
"operation" : "refresh-target-index"
},
{
"name" : "warmup-indices",
"operation" : "warmup-indices",
Expand All @@ -57,7 +61,8 @@
"neighbors_data_set_path" : "{{ neighbors_data_set_path | default('/tmp/vector-dataset.hdf5') }}",
"neighbors_data_set_format" : "{{ neighbors_data_set_format | default('hdf5') }}",
"num_vectors" : {{ query_count | default(-1) }},
"id-field-name": "{{ id_field_name }}"
"id-field-name": "{{ id_field_name }}",
"body": {{ query_body | default ({}) | tojson }}
},
"clients": {{ search_clients | default(1)}}
}
Expand Down

0 comments on commit 63abe4d

Please sign in to comment.