From 8db97421e90cd24fee2cc9926376742d6525e6aa Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 28 Feb 2024 20:02:02 +0000 Subject: [PATCH] move param files (#210) Signed-off-by: Vijayan Balasubramanian (cherry picked from commit ea6eba5d14cf383b2b926ad0e00c1ddf9918936e) Signed-off-by: github-actions[bot] --- .../corpus/10million/faiss-cohere-768-dp.json | 28 +++++++++++++++++ .../10million/lucene-cohere-768-dp.json | 28 +++++++++++++++++ .../10million/nmslib-cohere-768-dp.json | 30 +++++++++++++++++++ .../{ => 1million}/faiss-cohere-768-dp.json | 4 +-- .../{ => 1million}/lucene-cohere-768-dp.json | 6 ++-- .../{ => 1million}/nmslib-cohere-768-dp.json | 0 vectorsearch/workload.json | 8 ++--- 7 files changed, 95 insertions(+), 9 deletions(-) create mode 100644 vectorsearch/params/corpus/10million/faiss-cohere-768-dp.json create mode 100644 vectorsearch/params/corpus/10million/lucene-cohere-768-dp.json create mode 100644 vectorsearch/params/corpus/10million/nmslib-cohere-768-dp.json rename vectorsearch/params/corpus/{ => 1million}/faiss-cohere-768-dp.json (87%) rename vectorsearch/params/corpus/{ => 1million}/lucene-cohere-768-dp.json (84%) rename vectorsearch/params/corpus/{ => 1million}/nmslib-cohere-768-dp.json (100%) diff --git a/vectorsearch/params/corpus/10million/faiss-cohere-768-dp.json b/vectorsearch/params/corpus/10million/faiss-cohere-768-dp.json new file mode 100644 index 00000000..07f95de8 --- /dev/null +++ b/vectorsearch/params/corpus/10million/faiss-cohere-768-dp.json @@ -0,0 +1,28 @@ +{ + "target_index_name": "target_index", + "target_field_name": "target_field", + "target_index_body": "indices/faiss-index.json", + "target_index_primary_shards": 18, + "target_index_dimension": 768, + "target_index_space_type": "innerproduct", + + "target_index_bulk_size": 100, + "target_index_bulk_index_data_set_format": "hdf5", + "target_index_bulk_index_data_set_corpus": "cohere-10m", + "target_index_bulk_indexing_clients": 10, + + "target_index_max_num_segments": 10, + "target_index_force_merge_timeout": 100, + "hnsw_ef_search": 100, + "hnsw_ef_construction": 100, + + "query_k": 100, + "query_body": { + "docvalue_fields" : ["_id"], + "stored_fields" : "_none_" + }, + + "query_data_set_format": "hdf5", + "query_data_set_corpus": "cohere-10m", + "query_count": 10000 + } diff --git a/vectorsearch/params/corpus/10million/lucene-cohere-768-dp.json b/vectorsearch/params/corpus/10million/lucene-cohere-768-dp.json new file mode 100644 index 00000000..2f725aa0 --- /dev/null +++ b/vectorsearch/params/corpus/10million/lucene-cohere-768-dp.json @@ -0,0 +1,28 @@ +{ + "target_index_name": "target_index", + "target_field_name": "target_field", + "target_index_body": "indices/lucene-index.json", + "target_index_primary_shards": 18, + "target_index_dimension": 768, + "target_index_space_type": "innerproduct", + + "target_index_bulk_size": 100, + "target_index_bulk_index_data_set_format": "hdf5", + "target_index_bulk_index_data_set_corpus": "cohere-10m", + "target_index_bulk_indexing_clients": 10, + + "target_index_max_num_segments": 10, + "target_index_force_merge_timeout": 100, + "hnsw_ef_search": 100, + "hnsw_ef_construction": 100, + + "query_k": 100, + "query_body": { + "docvalue_fields" : ["_id"], + "stored_fields" : "_none_" + }, + + "query_data_set_format": "hdf5", + "query_data_set_corpus": "cohere-10m", + "query_count": 10000 + } diff --git a/vectorsearch/params/corpus/10million/nmslib-cohere-768-dp.json b/vectorsearch/params/corpus/10million/nmslib-cohere-768-dp.json new file mode 100644 index 00000000..6fdee69f --- /dev/null +++ b/vectorsearch/params/corpus/10million/nmslib-cohere-768-dp.json @@ -0,0 +1,30 @@ +{ + "target_index_name": "target_index", + "target_field_name": "target_field", + "target_index_body": "indices/nmslib-index.json", + "target_index_primary_shards": 18, + "target_index_dimension": 768, + "target_index_space_type": "innerproduct", + + "target_index_bulk_size": 100, + "target_index_bulk_index_data_set_format": "hdf5", + "target_index_bulk_index_data_set_corpus": "cohere-10m", + "target_index_bulk_indexing_clients": 10, + + "target_index_max_num_segments": 10, + "target_index_force_merge_timeout": 100, + "hnsw_ef_search": 100, + "hnsw_ef_construction": 100, + + "query_k": 100, + "query_body": { + "docvalue_fields" : ["_id"], + "stored_fields" : "_none_" + }, + + "query_data_set_format": "hdf5", + "query_data_set_corpus": "cohere-1m", + "neighbors_data_set_corpus": "cohere-10m", + "neighbors_data_set_format": "hdf5", + "query_count": 10000 + } diff --git a/vectorsearch/params/corpus/faiss-cohere-768-dp.json b/vectorsearch/params/corpus/1million/faiss-cohere-768-dp.json similarity index 87% rename from vectorsearch/params/corpus/faiss-cohere-768-dp.json rename to vectorsearch/params/corpus/1million/faiss-cohere-768-dp.json index 1625addb..3266edd4 100644 --- a/vectorsearch/params/corpus/faiss-cohere-768-dp.json +++ b/vectorsearch/params/corpus/1million/faiss-cohere-768-dp.json @@ -8,7 +8,7 @@ "target_index_bulk_size": 100, "target_index_bulk_index_data_set_format": "hdf5", - "target_index_bulk_index_data_set_corpus": "cohere-100k", + "target_index_bulk_index_data_set_corpus": "cohere-1m", "target_index_bulk_indexing_clients": 10, "target_index_max_num_segments": 10, @@ -23,6 +23,6 @@ }, "query_data_set_format": "hdf5", - "query_data_set_corpus":"cohere-100k", + "query_data_set_corpus": "cohere-1m", "query_count": 10000 } diff --git a/vectorsearch/params/corpus/lucene-cohere-768-dp.json b/vectorsearch/params/corpus/1million/lucene-cohere-768-dp.json similarity index 84% rename from vectorsearch/params/corpus/lucene-cohere-768-dp.json rename to vectorsearch/params/corpus/1million/lucene-cohere-768-dp.json index 4d2c05c7..5b5fc2cc 100644 --- a/vectorsearch/params/corpus/lucene-cohere-768-dp.json +++ b/vectorsearch/params/corpus/1million/lucene-cohere-768-dp.json @@ -8,7 +8,7 @@ "target_index_bulk_size": 100, "target_index_bulk_index_data_set_format": "hdf5", - "target_index_bulk_index_data_set_corpus": "cohere", + "target_index_bulk_index_data_set_corpus": "cohere-1m", "target_index_bulk_indexing_clients": 10, "target_index_max_num_segments": 10, @@ -23,6 +23,6 @@ }, "query_data_set_format": "hdf5", - "query_data_set_corpus": "cohere", - "query_count": 1000 + "query_data_set_corpus": "cohere-1m", + "query_count": 10000 } diff --git a/vectorsearch/params/corpus/nmslib-cohere-768-dp.json b/vectorsearch/params/corpus/1million/nmslib-cohere-768-dp.json similarity index 100% rename from vectorsearch/params/corpus/nmslib-cohere-768-dp.json rename to vectorsearch/params/corpus/1million/nmslib-cohere-768-dp.json diff --git a/vectorsearch/workload.json b/vectorsearch/workload.json index fb2b498f..157ce142 100644 --- a/vectorsearch/workload.json +++ b/vectorsearch/workload.json @@ -16,7 +16,7 @@ { "source-file": "documents-1k.hdf5.bz2", "source-format": "hdf5", - "document-count": 1000, + "document-count": 1000 } ] }, @@ -27,7 +27,7 @@ { "source-file": "documents-100k.hdf5.bz2", "source-format": "hdf5", - "document-count": 100000, + "document-count": 100000 } ] }, @@ -38,7 +38,7 @@ { "source-file": "documents-1m.hdf5.bz2", "source-format": "hdf5", - "document-count": 1000000, + "document-count": 1000000 } ] }, @@ -49,7 +49,7 @@ { "source-file": "documents-10m.hdf5.bz2", "source-format": "hdf5", - "document-count": 10000000, + "document-count": 10000000 } ] }