diff --git a/python/python/bystro/proteomics/annotation_interface.py b/python/python/bystro/proteomics/annotation_interface.py index 1b41fe65b..b2de7231a 100644 --- a/python/python/bystro/proteomics/annotation_interface.py +++ b/python/python/bystro/proteomics/annotation_interface.py @@ -951,8 +951,6 @@ async def async_run_annotation_query( query (dict[str, Any]): The OpenSearch query. index_name (str): The name of the index. fields (list[str] | None): Additional fields to include in the DataFrame, defaults to None. - cluster_opensearch_config (dict[str, Any] | None): - Configuration for the OpenSearch cluster,defaults to None. bystro_api_auth (CachedAuth | None): Bystro API authentication, defaults to None. additional_client_args (dict[str, Any] | None): Additional arguments for the OpenSearch client, defaults to None. @@ -1087,8 +1085,8 @@ async def async_get_annotation_result_from_query( Args: query_string (str): The query string to use for the search. index_name (str): The name of the index to search. + bystro_api_auth (CachedAuth): The authentication for the Bystro API. fields (list[str] | None): The fields to include in the results, defaults to None. - bystro_api_auth (CachedAuth | None): The authentication for the Bystro API, defaults to None. additional_client_args (dict[str, Any] | None): Additional arguments for the OpenSearch client, defaults to None. structs_of_arrays (bool): Whether to return structs of arrays, defaults to True. @@ -1189,7 +1187,6 @@ def get_annotation_result_from_query( query_string, index_name, fields=fields, - cluster_opensearch_config=cluster_opensearch_config, bystro_api_auth=bystro_api_auth, additional_client_args=additional_client_args, structs_of_arrays=structs_of_arrays, diff --git a/python/python/bystro/proteomics/tests/test_annotation_interface.py b/python/python/bystro/proteomics/tests/test_annotation_interface.py index be9d8034f..2f7125bfd 100644 --- a/python/python/bystro/proteomics/tests/test_annotation_interface.py +++ b/python/python/bystro/proteomics/tests/test_annotation_interface.py @@ -5,6 +5,8 @@ import msgspec import pytest +from bystro.api.auth import CachedAuth + from bystro.proteomics.annotation_interface import ( DOSAGE_GENERATED_COLUMN, process_query_response, @@ -104,6 +106,45 @@ async def close(self) -> None: return +@pytest.mark.asyncio +async def test_legacy_get_annotation_results_from_query(mocker): + mocker.patch( + "bystro.proteomics.annotation_interface.AsyncOpenSearch", + return_value=MockAsyncOpenSearchLegacy(), + ) + # inputRef doesn't exist in the legacy datasets, pre Q1-2024 + mocker.patch("bystro.proteomics.annotation_interface.INPUT_REF_FIELD", "ref") + mocker.patch( + "bystro.proteomics.annotation_interface.ALWAYS_INCLUDED_FIELDS", + [ + "chrom", + "pos", + "vcfPos", + "ref", + "alt", + "type", + "id", + ], + ) + + query_string = "exonic (gnomad.genomes.af:<0.1 || gnomad.exomes.af:<0.1)" + index_name = "mock_index_name" + + samples_and_genes_df = await async_get_annotation_result_from_query( + query_string, + index_name, + cluster_opensearch_config={ + "connection": { + "nodes": ["http://localhost:9200"], + "request_timeout": 1200, + "use_ssl": False, + "verify_certs": False, + }, + }, + ) + assert (1405, 52) == samples_and_genes_df.shape + + @pytest.mark.asyncio async def test_get_annotation_results_from_query_with_samples(mocker): mocker.patch( @@ -289,60 +330,68 @@ def test_process_response(): ) +import asyncio + @pytest.mark.asyncio async def test_join_annotation_result_to_fragpipe_dataset(mocker): + # Create a mock OpenSearch client + mock_opensearch_client = mocker.Mock() + + # Wrap return values of async functions in asyncio.Future + mock_opensearch_client.create_point_in_time.return_value = asyncio.Future() + mock_opensearch_client.create_point_in_time.return_value.set_result({"pit_id": "mock_pit_id"}) + + mock_opensearch_client.close.return_value = asyncio.Future() + mock_opensearch_client.close.return_value.set_result(None) + + mock_opensearch_client.delete_point_in_time.return_value = asyncio.Future() + mock_opensearch_client.delete_point_in_time.return_value.set_result(None) + + # Wrap the search method's return value in a future to simulate async behavior + mock_opensearch_client.search.return_value = asyncio.Future() + mock_opensearch_client.search.return_value.set_result({"hits": {"hits": TEST_RESPONSES_WITH_SAMPLES}}) + + # Ensure indices.exists returns a future + mock_opensearch_client.indices.exists.return_value = asyncio.Future() + mock_opensearch_client.indices.exists.return_value.set_result(True) + + # Patch the creation of the AsyncOpenSearch client to use the mock instead + mocker.patch("bystro.proteomics.annotation_interface.AsyncOpenSearch", return_value=mock_opensearch_client) + + # Create and patch a mock CachedAuth instance + mock_bystro_api_auth = CachedAuth(email="test_user@gmail.com", access_token="123456", url="http://mockserver:9200") + mocker.patch("bystro.proteomics.annotation_interface.CachedAuth", return_value=mock_bystro_api_auth) + + # Patch the function that creates the client to ensure it returns the mock client mocker.patch( - "bystro.proteomics.annotation_interface.AsyncOpenSearch", - return_value=MockAsyncOpenSearch(TEST_RESPONSES_WITH_SAMPLES), + "bystro.proteomics.annotation_interface.get_async_proxied_opensearch_client", + return_value=mock_opensearch_client ) + # Mock the async_get_num_slices function to return a fixed number of slices + mocker.patch("bystro.proteomics.annotation_interface.async_get_num_slices", return_value=asyncio.Future()) + mocker.patch("bystro.proteomics.annotation_interface.async_get_num_slices").return_value.set_result((1, None)) + query_string = "exonic (gnomad.genomes.af:<0.1 || gnomad.exomes.af:<0.1)" index_name = "foo" + # Run the test with the mocked client and auth query_result_df = await async_get_annotation_result_from_query( query_string, index_name, - cluster_opensearch_config={ - "connection": { - "nodes": ["http://localhost:9200"], - "request_timeout": 1200, - "use_ssl": False, - "verify_certs": False, - }, - }, + bystro_api_auth=mock_bystro_api_auth, explode_field="refSeq.name2", ) + # Assert the shape of the returned dataframe assert (582, 12) == query_result_df.shape - sample_ids = query_result_df[SAMPLE_GENERATED_COLUMN].unique() - - abundance_file = str(Path(__file__).parent / "example_abundance_gene_MD.tsv") - experiment_file = str(Path(__file__).parent / "example_experiment_annotation_file.tsv") - tmt_dataset = load_tandem_mass_tag_dataset(abundance_file, experiment_file) - - sample_names = list(tmt_dataset.annotation_df.index)[0 : sample_ids.shape[0]] - - # replace the sample ids with the sample names - replacements = {sample_id: sample_name for sample_id, sample_name in zip(sample_ids, sample_names)} - query_result_df[SAMPLE_GENERATED_COLUMN] = query_result_df[SAMPLE_GENERATED_COLUMN].replace( - replacements - ) - - joined_df = join_annotation_result_to_proteomic_dataset( - query_result_df, tmt_dataset, proteomic_join_column=FRAGPIPE_GENE_GENE_NAME_COLUMN_RENAMED - ) - - assert (90, 17) == joined_df.shape - - retained_fragpipe_columns = [] - for name in FRAGPIPE_RENAMED_COLUMNS: - if name in [FRAGPIPE_SAMPLE_COLUMN, FRAGPIPE_GENE_GENE_NAME_COLUMN_RENAMED]: - continue - retained_fragpipe_columns.append(name) + # Further assertions to verify that the mock client was used and no real connections were made + mock_opensearch_client.search.assert_called_once() + mock_opensearch_client.create_point_in_time.assert_called_once() + mock_opensearch_client.delete_point_in_time.assert_called_once() + mock_opensearch_client.close.assert_called_once() - retained_fragpipe_columns.append(FRAGPIPE_SAMPLE_INTENSITY_COLUMN) - assert list(joined_df.columns) == list(query_result_df.columns) + retained_fragpipe_columns @pytest.mark.asyncio