Merge pull request #2754 from activeloopai/testing_tests #1485

GitHub Actions / JUnit Test Report failed Jan 31, 2024 in 0s

20965 tests run, 10598 passed, 10362 skipped, 5 failed.

Annotations

Check failure on line 104 in deeplake/api/tests/test_access_method.py

github-actions / JUnit Test Report

test_access_method.test_access_method_with_creds

deeplake.util.exceptions.DatasetHandlerError: A Deep Lake dataset does not exist at the given path (hub://testingacc2/tmpf320_test_access_method_test_access_method_with_creds__local-managed-entry__). Check the path provided or in case you want to create a new dataset, use deeplake.empty().

Raw output


            hub_cloud_ds_generator = <function hub_cloud_ds_generator.<locals>.generate_hub_cloud_ds at 0x7f7e9c4160e0>
hub_cloud_dev_managed_creds_key = 'aws_creds'

    @pytest.mark.slow
    def test_access_method_with_creds(
        hub_cloud_ds_generator, hub_cloud_dev_managed_creds_key
    ):
        with hub_cloud_ds_generator() as ds:
            ds.create_tensor("abc")
            ds.create_tensor("images", htype="link[image]", sample_compression="jpg")
    
            ds.add_creds_key(hub_cloud_dev_managed_creds_key, managed=True)
    
            ds.abc.extend([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
            ds.images.extend(
                [
                    deeplake.link(
                        "https://picsum.photos/20/30",
                        creds_key=hub_cloud_dev_managed_creds_key,
                    )
                    for _ in range(10)
                ]
            )
    
>       ds = hub_cloud_ds_generator(access_method="download:2")

deeplake/api/tests/test_access_method.py:104: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/tests/dataset_fixtures.py:153: in generate_hub_cloud_ds
    return deeplake.dataset(hub_cloud_path, token=hub_cloud_dev_token, **kwargs)
deeplake/util/spinner.py:151: in inner
    return func(*args, **kwargs)
deeplake/api/dataset.py:316: in init
    raise e
deeplake/api/dataset.py:293: in init
    return dataset._load(
deeplake/api/dataset.py:767: in _load
    ret = get_local_dataset(**dataset_kwargs)
deeplake/util/access_method.py:196: in get_local_dataset
    ds = deeplake.load(
deeplake/util/spinner.py:153: in inner
    return func(*args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

path = 'hub://testingacc2/tmpf320_test_access_method_test_access_method_with_creds__local-managed-entry__'
read_only = None, memory_cache_size = 2000, local_cache_size = 0, creds = {}
token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
org_id = None, verbose = False, access_method = 'stream', unlink = False
reset = False, check_integrity = None, lock_timeout = 0, lock_enabled = True
index_params = None

    @staticmethod
    @spinner
    def load(
        path: Union[str, pathlib.Path],
        read_only: Optional[bool] = None,
        memory_cache_size: int = DEFAULT_MEMORY_CACHE_SIZE,
        local_cache_size: int = DEFAULT_LOCAL_CACHE_SIZE,
        creds: Optional[Union[dict, str]] = None,
        token: Optional[str] = None,
        org_id: Optional[str] = None,
        verbose: bool = True,
        access_method: str = "stream",
        unlink: bool = False,
        reset: bool = False,
        check_integrity: Optional[bool] = None,
        lock_timeout: Optional[int] = 0,
        lock_enabled: Optional[bool] = True,
        index_params: Optional[Dict[str, Union[int, str]]] = None,
    ) -> Dataset:
        """Loads an existing dataset
    
        Examples:
    
            >>> ds = deeplake.load("hub://username/dataset")
            >>> ds = deeplake.load("s3://mybucket/my_dataset")
            >>> ds = deeplake.load("./datasets/my_dataset", overwrite=True)
    
            Loading to a specfic version:
    
            >>> ds = deeplake.load("hub://username/dataset@new_branch")
            >>> ds = deeplake.load("hub://username/dataset@3e49cded62b6b335c74ff07e97f8451a37aca7b2)
    
            >>> my_commit_id = "3e49cded62b6b335c74ff07e97f8451a37aca7b2"
            >>> ds = deeplake.load(f"hub://username/dataset@{my_commit_id}")
    
        Args:
            path (str, pathlib.Path): - The full path to the dataset. Can be:
                - a Deep Lake cloud path of the form ``hub://username/datasetname``. To write to Deep Lake cloud datasets, ensure that you are logged in to Deep Lake (use 'activeloop login' from command line)
                - an s3 path of the form ``s3://bucketname/path/to/dataset``. Credentials are required in either the environment or passed to the creds argument.
                - a local file system path of the form ``./path/to/dataset`` or ``~/path/to/dataset`` or ``path/to/dataset``.
                - a memory path of the form ``mem://path/to/dataset`` which doesn't save the dataset but keeps it in memory instead. Should be used only for testing as it does not persist.
                - Loading to a specific version:
    
                        - You can also specify a ``commit_id`` or ``branch`` to load the dataset to that version directly by using the ``@`` symbol.
                        - The path will then be of the form ``hub://username/dataset@{branch}`` or ``hub://username/dataset@{commit_id}``.
                        - See examples above.
            read_only (bool, optional): Opens dataset in read only mode if this is passed as ``True``. Defaults to ``False``.
                Datasets stored on Deep Lake cloud that your account does not have write access to will automatically open in read mode.
            memory_cache_size (int): The size of the memory cache to be used in MB.
            local_cache_size (int): The size of the local filesystem cache to be used in MB.
            creds (dict, str, optional): The string ``ENV`` or a dictionary containing credentials used to access the dataset at the path.
                - If 'aws_access_key_id', 'aws_secret_access_key', 'aws_session_token' are present, these take precedence over credentials present in the environment or in credentials file. Currently only works with s3 paths.
                - It supports 'aws_access_key_id', 'aws_secret_access_key', 'aws_session_token', 'endpoint_url', 'aws_region', 'profile_name' as keys.
                - If 'ENV' is passed, credentials are fetched from the environment variables. This is also the case when creds is not passed for cloud datasets. For datasets connected to hub cloud, specifying 'ENV' will override the credentials fetched from Activeloop and use local ones.
            token (str, optional): Activeloop token, used for fetching credentials to the dataset at path if it is a Deep Lake dataset. This is optional, tokens are normally autogenerated.
            org_id (str, Optional): Organization id to be used for enabling high-performance features. Only applicable for local datasets.
            verbose (bool): If ``True``, logs will be printed. Defaults to ``True``.
            access_method (str): The access method to use for the dataset. Can be:
    
                    - 'stream'
    
                        - Streams the data from the dataset i.e. only fetches data when required. This is the default value.
    
                    - 'download'
    
                        - Downloads the data to the local filesystem to the path specified in environment variable ``DEEPLAKE_DOWNLOAD_PATH``.
                          This will overwrite ``DEEPLAKE_DOWNLOAD_PATH``.
                        - Raises an exception if ``DEEPLAKE_DOWNLOAD_PATH`` environment variable is not set or if the dataset does not exist.
                        - The 'download' access method can be modified to specify num_workers and/or scheduler.
                          For example: 'download:2:processed' will use 2 workers and use processed scheduler, while 'download:3' will use 3 workers and
                          default scheduler (threaded), and 'download:processed' will use a single worker and use processed scheduler.
    
                    - 'local'
    
                        - Downloads the dataset if it doesn't already exist, otherwise loads from local storage.
                        - Raises an exception if ``DEEPLAKE_DOWNLOAD_PATH`` environment variable is not set.
                        - The 'local' access method can be modified to specify num_workers and/or scheduler to be used in case dataset needs to be downloaded.
                          If dataset needs to be downloaded, 'local:2:processed' will use 2 workers and use processed scheduler, while 'local:3' will use 3 workers
                          and default scheduler (threaded), and 'local:processed' will use a single worker and use processed scheduler.
            unlink (bool): Downloads linked samples if set to ``True``. Only applicable if ``access_method`` is ``download`` or ``local``. Defaults to ``False``.
            reset (bool): If the specified dataset cannot be loaded due to a corrupted HEAD state of the branch being loaded,
                          setting ``reset=True`` will reset HEAD changes and load the previous version.
            check_integrity (bool, Optional): Performs an integrity check by default (None) if the dataset has 20 or fewer tensors.
                                              Set to ``True`` to force integrity check, ``False`` to skip integrity check.
    
        ..
            # noqa: DAR101
    
        Returns:
            Dataset: Dataset loaded using the arguments provided.
    
        Raises:
            DatasetHandlerError: If a Dataset does not exist at the given path.
            AgreementError: When agreement is rejected
            UserNotLoggedInException: When user is not logged in
            InvalidTokenException: If the specified toke is invalid
            TokenPermissionError: When there are permission or other errors related to token
            CheckoutError: If version address specified in the path cannot be found
            DatasetCorruptError: If loading the dataset failed due to corruption and ``reset`` is not ``True``
            ReadOnlyModeError: If reset is attempted in read-only mode
            LockedException: When attempting to open a dataset for writing when it is locked by another machine
            ValueError: If ``org_id`` is specified for a non-local dataset
            Exception: Re-raises caught exception if reset cannot fix the issue
            ValueError: If the org id is provided but the dataset is not local
    
        Warning:
            Setting ``access_method`` to download will overwrite the local copy of the dataset if it was previously downloaded.
    
        Note:
            Any changes made to the dataset in download / local mode will only be made to the local copy and will not be reflected in the original dataset.
        """
        access_method, num_workers, scheduler = parse_access_method(access_method)
        check_access_method(access_method, overwrite=False, unlink=unlink)
    
        path, address = process_dataset_path(path)
    
        if creds is None:
            creds = {}
    
        if org_id is not None and get_path_type(path) != "local":
            raise ValueError("org_id parameter can only be used with local datasets")
    
        try:
            storage, cache_chain = get_storage_and_cache_chain(
                path=path,
                read_only=read_only,
                creds=creds,
                token=token,
                memory_cache_size=memory_cache_size,
                local_cache_size=local_cache_size,
            )
            feature_report_path(
                path,
                "load",
                {
                    "lock_enabled": lock_enabled,
                    "lock_timeout": lock_timeout,
                    "index_params": index_params,
                },
                token=token,
            )
        except Exception as e:
            if isinstance(e, UserNotLoggedInException):
                raise UserNotLoggedInException from None
            raise
        if not dataset_exists(cache_chain):
>           raise DatasetHandlerError(
                f"A Deep Lake dataset does not exist at the given path ({path}). Check the path provided or in case you want to create a new dataset, use deeplake.empty()."
            )
E           deeplake.util.exceptions.DatasetHandlerError: A Deep Lake dataset does not exist at the given path (hub://testingacc2/tmpf320_test_access_method_test_access_method_with_creds__local-managed-entry__). Check the path provided or in case you want to create a new dataset, use deeplake.empty().

deeplake/api/dataset.py:635: DatasetHandlerError

Check failure on line 2924 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

github-actions / JUnit Test Report

test_deeplake_vectorstore.test_returning_tql_for_exec_option_compute_engine_should_return_correct_tql

NotImplementedError: return_tql is not supported for exec_option=python

Raw output


            local_path = './hub_pytest/test_deeplake_vectorstore/test_returning_tql_for_exec_option_compute_engine_should_return_correct_tql'
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'

    def test_returning_tql_for_exec_option_compute_engine_should_return_correct_tql(
        local_path,
        hub_cloud_dev_token,
    ):
        db = VectorStore(
            path=local_path,
            token=hub_cloud_dev_token,
        )
    
        texts, embeddings, ids, metadatas, _ = utils.create_data(
            number_of_data=10, embedding_dim=3
        )
    
        db.add(text=texts, embedding=embeddings, id=ids, metadata=metadatas)
    
        query_embedding = np.zeros(3, dtype=np.float32)
>       output = db.search(embedding=query_embedding, return_tql=True)

deeplake\core\vectorstore\test_deeplake_vectorstore.py:2924: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake\core\vectorstore\deeplake_vectorstore.py:312: in search
    return self.dataset_handler.search(
deeplake\core\vectorstore\deep_memory\deep_memory.py:56: in wrapper
    return func(self, *args, **kwargs)
deeplake\core\vectorstore\dataset_handlers\client_side_dataset_handler.py:235: in search
    return vector_search.search(
deeplake\core\vectorstore\vector_search\vector_search.py:55: in search
    return EXEC_OPTION_TO_SEARCH_TYPE[exec_option](
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

query = None, query_emb = array([0., 0., 0.], dtype=float32)
exec_option = 'python'
dataset = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_returning_tql_for_exec_option_compute_engine_should_return_correct_tql', tensors=['text', 'metadata', 'embedding', 'id'])
logger = <Logger deeplake.core.vectorstore.deeplake_vectorstore (INFO)>
filter = None, embedding_tensor = 'embedding', distance_metric = 'COS', k = 4
return_tensors = ['text', 'metadata', 'id'], return_view = False
token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
org_id = None, return_tql = True

    def vector_search(
        query,
        query_emb,
        exec_option,
        dataset,
        logger,
        filter,
        embedding_tensor,
        distance_metric,
        k,
        return_tensors,
        return_view,
        token,
        org_id,
        return_tql,
    ) -> Union[Dict, DeepLakeDataset]:
        if query is not None:
            raise NotImplementedError(
                f"User-specified TQL queries are not supported for exec_option={exec_option} "
            )
    
        if return_tql:
>           raise NotImplementedError(
                f"return_tql is not supported for exec_option={exec_option}"
            )
E           NotImplementedError: return_tql is not supported for exec_option=python

deeplake\core\vectorstore\vector_search\python\vector_search.py:31: NotImplementedError

Check failure on line 1066 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

github-actions / JUnit Test Report

test_deeplake_vectorstore.test_update_embedding[None-local_auth_ds-None-vector_store_row_ids-None-None-None-hub_cloud_dev_token]

indra.api.api.not_enterprise_organization_error: #x1B[31mHigh-Performance Features are not available for this dataset based on the usage plan of its parent organization.
    Please upgrade the organization to a plan that offers this feature.#x1B[0m

Raw output


            ds = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_update_embedding-None-local_auth_ds-None-vector_store_row_ids-None-None-None-hub_cloud_dev_token-', tensors=[])
vector_store_hash_ids = None, vector_store_row_ids = [0, 1, 2, 3, 4]
vector_store_filters = None, vector_store_filter_udf = None
vector_store_query = None, init_embedding_function = None
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'

    @requires_libdeeplake
    @pytest.mark.parametrize(
        "ds, vector_store_hash_ids, vector_store_row_ids, vector_store_filters, vector_store_filter_udf, vector_store_query, hub_cloud_dev_token",
        [
            (
                "local_auth_ds",
                "vector_store_hash_ids",
                None,
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                "vector_store_row_ids",
                None,
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                None,
                "vector_store_filter_udf",
                None,
                "hub_cloud_dev_token",
            ),
            (
                "local_auth_ds",
                None,
                None,
                "vector_store_filters",
                None,
                None,
                "hub_cloud_dev_token",
            ),
            (
                "hub_cloud_ds",
                None,
                None,
                None,
                None,
                "vector_store_query",
                "hub_cloud_dev_token",
            ),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("init_embedding_function", [embedding_fn3, None])
    @pytest.mark.slow
    @requires_libdeeplake
    def test_update_embedding(
        ds,
        vector_store_hash_ids,
        vector_store_row_ids,
        vector_store_filters,
        vector_store_filter_udf,
        vector_store_query,
        init_embedding_function,
        hub_cloud_dev_token,
    ):
        vector_store_filters = vector_store_filters or vector_store_filter_udf
    
        exec_option = "compute_engine"
        if vector_store_filter_udf:
            exec_option = "python"
    
        embedding_tensor = "embedding"
        embedding_source_tensor = "text"
        # dataset has a single embedding_tensor:
    
        path = ds.path
        vector_store = DeepLakeVectorStore(
            path=path,
            overwrite=True,
            verbose=False,
            exec_option=exec_option,
            embedding_function=init_embedding_function,
            index_params={"threshold": 10},
            token=hub_cloud_dev_token,
        )
    
        # add data to the dataset:
        metadatas[1:6] = [{"a": 1} for _ in range(5)]
        vector_store.add(id=ids, embedding=embeddings, text=texts, metadata=metadatas)
    
        # case 1: single embedding_source_tensor, single embedding_tensor, single embedding_function
        new_embedding_value = 100
        embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
        vector_store.update_embedding(
            ids=vector_store_hash_ids,
            row_ids=vector_store_row_ids,
            filter=vector_store_filters,
            query=vector_store_query,
            embedding_function=embedding_fn,
            embedding_source_tensor=embedding_source_tensor,
            embedding_tensor=embedding_tensor,
        )
        assert_updated_vector_store(
            new_embedding_value,
            vector_store,
            vector_store_hash_ids,
            vector_store_row_ids,
            vector_store_filters,
            vector_store_query,
            embedding_fn,
            embedding_source_tensor,
            embedding_tensor,
            exec_option,
            num_changed_samples=5,
        )
    
        # case 2: single embedding_source_tensor, single embedding_tensor not specified, single embedding_function
        new_embedding_value = 100
        embedding_fn = get_embedding_function(embedding_value=new_embedding_value)
        vector_store.update_embedding(
            ids=vector_store_hash_ids,
            row_ids=vector_store_row_ids,
            filter=vector_store_filters,
            query=vector_store_query,
            embedding_function=embedding_fn,
            embedding_source_tensor=embedding_source_tensor,
        )
        assert_updated_vector_store(
            new_embedding_value,
            vector_store,
            vector_store_hash_ids,
            vector_store_row_ids,
            vector_store_filters,
            vector_store_query,
            embedding_fn,
            embedding_source_tensor,
            embedding_tensor,
            exec_option,
            num_changed_samples=5,
        )
    
        # case 3-4: single embedding_source_tensor, single embedding_tensor, single init_embedding_function
        if init_embedding_function is None:
            # case 3: errors out when init_embedding_function is not specified
            with pytest.raises(ValueError):
                vector_store.update_embedding(
                    ids=vector_store_hash_ids,
                    row_ids=vector_store_row_ids,
                    filter=vector_store_filters,
                    query=vector_store_query,
                    embedding_source_tensor=embedding_source_tensor,
                )
        else:
            # case 4
            vector_store.update_embedding(
                ids=vector_store_hash_ids,
                row_ids=vector_store_row_ids,
                filter=vector_store_filters,
                query=vector_store_query,
                embedding_source_tensor=embedding_source_tensor,
            )
            assert_updated_vector_store(
                0,
                vector_store,
                vector_store_hash_ids,
                vector_store_row_ids,
                vector_store_filters,
                vector_store_query,
                init_embedding_function,
                embedding_source_tensor,
                embedding_tensor,
                exec_option,
                num_changed_samples=5,
            )
    
>       vector_store.delete_by_path(path, token=ds.token)

deeplake/core/vectorstore/test_deeplake_vectorstore.py:1066: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/vectorstore/deeplake_vectorstore.py:483: in delete_by_path
    deeplake.delete(path, large_ok=True, token=token, force=force, creds=creds)
deeplake/util/spinner.py:151: in inner
    return func(*args, **kwargs)
deeplake/api/dataset.py:870: in delete
    ds.delete(large_ok=large_ok)
deeplake/util/invalid_view_op.py:22: in inner
    return callable(x, *args, **kwargs)
deeplake/core/dataset/dataset.py:2615: in delete
    tensor.delete_vdb_index(id)
deeplake/core/tensor.py:1651: in delete_vdb_index
    self.unload_vdb_index_cache()
deeplake/core/tensor.py:1721: in unload_vdb_index_cache
    ds = dataset_to_libdeeplake(self.dataset)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

hub2_dataset = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_update_embedding-None-local_auth_ds-None-vector_store_row_ids-None-None-None-hub_cloud_dev_token-', tensors=['embedding', 'id', 'metadata', 'text'])

    def dataset_to_libdeeplake(hub2_dataset: Dataset):
        """Convert a hub 2.x dataset object to a libdeeplake dataset object."""
        try_flushing(hub2_dataset)
        api = import_indra_api()
        path: str = hub2_dataset.path
    
        token = (
            hub2_dataset.client.get_token()
            if (hub2_dataset.token is None or hub2_dataset._token == "")
            and hub2_dataset.client
            else hub2_dataset.token
        )
        if token is None or token == "":
            raise EmptyTokenException
        if hub2_dataset.libdeeplake_dataset is None:
            libdeeplake_dataset = None
            if path.startswith("gdrive://"):
                raise ValueError("Gdrive datasets are not supported for libdeeplake")
            elif path.startswith("mem://"):
                raise ValueError("In memory datasets are not supported for libdeeplake")
            elif path.startswith("hub://"):
                provider = hub2_dataset.storage.next_storage
                if isinstance(provider, S3Provider):
                    libdeeplake_dataset = _get_indra_ds_from_s3_provider(
                        path=path, token=token, provider=provider
                    )
    
                elif isinstance(provider, GCSProvider):
                    libdeeplake_dataset = _get_indra_ds_from_gcp_provider(
                        path=path, token=token, provider=provider
                    )
    
                elif isinstance(provider, AzureProvider):
                    libdeeplake_dataset = _get_indra_ds_from_azure_provider(
                        path=path, token=token, provider=provider
                    )
                else:
                    raise ValueError("Unknown storage provider for hub:// dataset")
    
            elif path.startswith("s3://"):
                libdeeplake_dataset = _get_indra_ds_from_s3_provider(
                    path=path, token=token, provider=hub2_dataset.storage.next_storage
                )
    
            elif path.startswith(("gcs://", "gs://", "gcp://")):
                provider = get_base_storage(hub2_dataset.storage)
    
                libdeeplake_dataset = _get_indra_ds_from_gcp_provider(
                    path=path, token=token, provider=provider
                )
    
            elif path.startswith(("az://", "azure://")):
                az_provider = get_base_storage(hub2_dataset.storage)
                libdeeplake_dataset = _get_indra_ds_from_azure_provider(
                    path=path, token=token, provider=az_provider
                )
    
            else:
                org_id = hub2_dataset.org_id
                org_id = (
                    org_id or jwt.decode(token, options={"verify_signature": False})["id"]
                )
>               libdeeplake_dataset = api.dataset(path, token=token, org_id=org_id)
E               indra.api.api.not_enterprise_organization_error: #x1B[31mHigh-Performance Features are not available for this dataset based on the usage plan of its parent organization.
E                   Please upgrade the organization to a plan that offers this feature.#x1B[0m

deeplake/enterprise/convert_to_libdeeplake.py:211: not_enterprise_organization_error

Check failure on line 2924 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

github-actions / JUnit Test Report

test_deeplake_vectorstore.test_returning_tql_for_exec_option_compute_engine_should_return_correct_tql

NotImplementedError: return_tql is not supported for exec_option=python

Raw output


            local_path = './hub_pytest/test_deeplake_vectorstore/test_returning_tql_for_exec_option_compute_engine_should_return_correct_tql'
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'

    def test_returning_tql_for_exec_option_compute_engine_should_return_correct_tql(
        local_path,
        hub_cloud_dev_token,
    ):
        db = VectorStore(
            path=local_path,
            token=hub_cloud_dev_token,
        )
    
        texts, embeddings, ids, metadatas, _ = utils.create_data(
            number_of_data=10, embedding_dim=3
        )
    
        db.add(text=texts, embedding=embeddings, id=ids, metadata=metadatas)
    
        query_embedding = np.zeros(3, dtype=np.float32)
>       output = db.search(embedding=query_embedding, return_tql=True)

deeplake\core\vectorstore\test_deeplake_vectorstore.py:2924: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake\core\vectorstore\deeplake_vectorstore.py:312: in search
    return self.dataset_handler.search(
deeplake\core\vectorstore\deep_memory\deep_memory.py:56: in wrapper
    return func(self, *args, **kwargs)
deeplake\core\vectorstore\dataset_handlers\client_side_dataset_handler.py:235: in search
    return vector_search.search(
deeplake\core\vectorstore\vector_search\vector_search.py:55: in search
    return EXEC_OPTION_TO_SEARCH_TYPE[exec_option](
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

query = None, query_emb = array([0., 0., 0.], dtype=float32)
exec_option = 'python'
dataset = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_returning_tql_for_exec_option_compute_engine_should_return_correct_tql', tensors=['text', 'metadata', 'embedding', 'id'])
logger = <Logger deeplake.core.vectorstore.deeplake_vectorstore (INFO)>
filter = None, embedding_tensor = 'embedding', distance_metric = 'COS', k = 4
return_tensors = ['text', 'metadata', 'id'], return_view = False
token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
org_id = None, return_tql = True

    def vector_search(
        query,
        query_emb,
        exec_option,
        dataset,
        logger,
        filter,
        embedding_tensor,
        distance_metric,
        k,
        return_tensors,
        return_view,
        token,
        org_id,
        return_tql,
    ) -> Union[Dict, DeepLakeDataset]:
        if query is not None:
            raise NotImplementedError(
                f"User-specified TQL queries are not supported for exec_option={exec_option} "
            )
    
        if return_tql:
>           raise NotImplementedError(
                f"return_tql is not supported for exec_option={exec_option}"
            )
E           NotImplementedError: return_tql is not supported for exec_option=python

deeplake\core\vectorstore\vector_search\python\vector_search.py:31: NotImplementedError

Check failure on line 2924 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

github-actions / JUnit Test Report

test_deeplake_vectorstore.test_returning_tql_for_exec_option_compute_engine_should_return_correct_tql

NotImplementedError: return_tql is not supported for exec_option=python

Raw output


            local_path = './hub_pytest/test_deeplake_vectorstore/test_returning_tql_for_exec_option_compute_engine_should_return_correct_tql'
hub_cloud_dev_token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'

    def test_returning_tql_for_exec_option_compute_engine_should_return_correct_tql(
        local_path,
        hub_cloud_dev_token,
    ):
        db = VectorStore(
            path=local_path,
            token=hub_cloud_dev_token,
        )
    
        texts, embeddings, ids, metadatas, _ = utils.create_data(
            number_of_data=10, embedding_dim=3
        )
    
        db.add(text=texts, embedding=embeddings, id=ids, metadata=metadatas)
    
        query_embedding = np.zeros(3, dtype=np.float32)
>       output = db.search(embedding=query_embedding, return_tql=True)

deeplake\core\vectorstore\test_deeplake_vectorstore.py:2924: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake\core\vectorstore\deeplake_vectorstore.py:312: in search
    return self.dataset_handler.search(
deeplake\core\vectorstore\deep_memory\deep_memory.py:56: in wrapper
    return func(self, *args, **kwargs)
deeplake\core\vectorstore\dataset_handlers\client_side_dataset_handler.py:235: in search
    return vector_search.search(
deeplake\core\vectorstore\vector_search\vector_search.py:55: in search
    return EXEC_OPTION_TO_SEARCH_TYPE[exec_option](
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

query = None, query_emb = array([0., 0., 0.], dtype=float32)
exec_option = 'python'
dataset = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_returning_tql_for_exec_option_compute_engine_should_return_correct_tql', tensors=['text', 'metadata', 'embedding', 'id'])
logger = <Logger deeplake.core.vectorstore.deeplake_vectorstore (INFO)>
filter = None, embedding_tensor = 'embedding', distance_metric = 'COS', k = 4
return_tensors = ['text', 'metadata', 'id'], return_view = False
token = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6InRlc3RpbmdhY2MyIiwiYXBpX2tleSI6IjU4Y0tLb1p6UE1BbThPU2RpbTRiZ2tBekhWekt1VUE3MFJpNTNyZUpKRTJuaiJ9.'
org_id = None, return_tql = True

    def vector_search(
        query,
        query_emb,
        exec_option,
        dataset,
        logger,
        filter,
        embedding_tensor,
        distance_metric,
        k,
        return_tensors,
        return_view,
        token,
        org_id,
        return_tql,
    ) -> Union[Dict, DeepLakeDataset]:
        if query is not None:
            raise NotImplementedError(
                f"User-specified TQL queries are not supported for exec_option={exec_option} "
            )
    
        if return_tql:
>           raise NotImplementedError(
                f"return_tql is not supported for exec_option={exec_option}"
            )
E           NotImplementedError: return_tql is not supported for exec_option=python

deeplake\core\vectorstore\vector_search\python\vector_search.py:31: NotImplementedError

View more details on GitHub Actions

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Merge pull request #2754 from activeloopai/testing_tests #1485

JUnit Test Report