Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lindorm vdb bug-fix #11790

Merged
merged 9 commits into from
Dec 18, 2024
17 changes: 11 additions & 6 deletions api/core/rag/datasource/vdb/lindorm/lindorm_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ def to_opensearch_params(self) -> dict[str, Any]:


class LindormVectorStore(BaseVector):
def __init__(self, collection_name: str, config: LindormVectorStoreConfig, **kwargs):
def __init__(self, collection_name: str, config: LindormVectorStoreConfig, using_ugc: bool, **kwargs):
self._routing = None
self._routing_field = None
if config.using_ugc:
if using_ugc:
routing_value: str = kwargs.get("routing_value")
if routing_value is None:
raise ValueError("UGC index should init vector with valid 'routing_value' parameter value")
Expand All @@ -64,7 +64,7 @@ def __init__(self, collection_name: str, config: LindormVectorStoreConfig, **kwa
super().__init__(collection_name.lower())
self._client_config = config
self._client = OpenSearch(**config.to_opensearch_params())
self._using_ugc = config.using_ugc
self._using_ugc = using_ugc
self.kwargs = kwargs

def get_type(self) -> str:
Expand Down Expand Up @@ -467,12 +467,16 @@ def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings
using_ugc = dify_config.USING_UGC_INDEX
routing_value = None
if dataset.index_struct:
if using_ugc:
# if an existed record's index_struct_dict doesn't contain using_ugc field,
# it actually stores in the normal index format
stored_in_ugc = dataset.index_struct_dict.get("using_ugc", False)
using_ugc = stored_in_ugc
if stored_in_ugc:
dimension = dataset.index_struct_dict["dimension"]
index_type = dataset.index_struct_dict["index_type"]
distance_type = dataset.index_struct_dict["distance_type"]
index_name = f"{UGC_INDEX_PREFIX}_{dimension}_{index_type}_{distance_type}"
routing_value = dataset.index_struct_dict["vector_store"]["class_prefix"]
index_name = f"{UGC_INDEX_PREFIX}_{dimension}_{index_type}_{distance_type}"
else:
index_name = dataset.index_struct_dict["vector_store"]["class_prefix"]
else:
Expand All @@ -487,11 +491,12 @@ def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings
"index_type": index_type,
"dimension": dimension,
"distance_type": distance_type,
"using_ugc": using_ugc,
}
dataset.index_struct = json.dumps(index_struct_dict)
if using_ugc:
index_name = f"{UGC_INDEX_PREFIX}_{dimension}_{index_type}_{distance_type}"
routing_value = class_prefix
else:
index_name = class_prefix
return LindormVectorStore(index_name, lindorm_config, routing_value=routing_value)
return LindormVectorStore(index_name, lindorm_config, routing_value=routing_value, using_ugc=using_ugc)
Loading