From 5f849385d849550d8515d6adbc895020a8a86cd2 Mon Sep 17 00:00:00 2001 From: William Gorge Date: Tue, 23 Jan 2024 17:05:08 +0100 Subject: [PATCH] fix(elasticsearch): disable verification (#1469) * fix(elasticsearch): disable verification * add doc comment Signed-off-by: Luka Peschke * fix tests Signed-off-by: Luka Peschke * update changelog Signed-off-by: Luka Peschke * trigger CI Signed-off-by: Luka Peschke --------- Signed-off-by: Luka Peschke Co-authored-by: Luka Peschke --- CHANGELOG.md | 1 + tests/elasticsearch/test_elasticsearch.py | 5 +---- .../elasticsearch/elasticsearch_connector.py | 8 ++++++++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fdbd72c78..583bdd197 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ### Changed - Google Big Query: A simple status check that validates the private key's format has been implemented +- Elasticsearch: Host verification has been disabled to tolerate strict network configurations ## [3.23.25] 2024-01-17 diff --git a/tests/elasticsearch/test_elasticsearch.py b/tests/elasticsearch/test_elasticsearch.py index a7beb5148..1fe9fa8c0 100644 --- a/tests/elasticsearch/test_elasticsearch.py +++ b/tests/elasticsearch/test_elasticsearch.py @@ -39,13 +39,10 @@ def pytest_generate_tests(metafunc): def test_connector(mocker): - class ElasticsearchMock: - def search(self, index, body): - return {'hits': {'hits': [{'_source': {'yo': 'la'}}]}} module = 'toucan_connectors.elasticsearch.elasticsearch_connector' mock_es = mocker.patch(f'{module}.Elasticsearch') - mock_es.return_value = ElasticsearchMock() + mock_es.return_value.search.return_value = {'hits': {'hits': [{'_source': {'yo': 'la'}}]}} con = ElasticsearchConnector( name='test', diff --git a/toucan_connectors/elasticsearch/elasticsearch_connector.py b/toucan_connectors/elasticsearch/elasticsearch_connector.py index 288b9521b..4dea15044 100644 --- a/toucan_connectors/elasticsearch/elasticsearch_connector.py +++ b/toucan_connectors/elasticsearch/elasticsearch_connector.py @@ -145,6 +145,14 @@ def _retrieve_data(self, data_source: ElasticsearchDataSource) -> pd.DataFrame: connection_params.append(h) esclient = Elasticsearch(connection_params) + # We need to set this flag as some customers force auth and refuse the connection if no auth + # header is present. Elasticsearch-py accepts 401/403s + # (https://github.com/elastic/elasticsearch-py/blob/v7.17.6/elasticsearch/transport.py#L586), + # but not connection errors. In consequence, we set the flag to True, which means that we + # couldn't figure out wether we are talking to Elasticsearch or not due to an auth error: + # https://github.com/elastic/elasticsearch-py/blob/v7.17.6/elasticsearch/transport.py#L216. + # If we are indded not talking to Elasticsearch, the query will fail later on. + esclient.transport._verified_elasticsearch = True response = getattr(esclient, data_source.search_method)( index=data_source.index, body=data_source.body )