From 4074f252c57aa07b2b8b391221cf367ed3b0076a Mon Sep 17 00:00:00 2001 From: Avihai Ezaguy <32809676+AvihaiSam@users.noreply.github.com> Date: Mon, 7 Oct 2024 12:14:55 +0300 Subject: [PATCH] feat: modify_refresh_interval flag in opensearch index_documents (#2980) * feature/opensearch: modify_refresh_interval flag * CR change: rename modify_refresh_interval to enable_refresh_interval --------- Co-authored-by: jaidisido --- awswrangler/opensearch/_write.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py index 459f56c18..87955d1c2 100644 --- a/awswrangler/opensearch/_write.py +++ b/awswrangler/opensearch/_write.py @@ -504,6 +504,7 @@ def index_documents( initial_backoff: int | None = None, max_backoff: int | None = None, use_threads: bool | int = False, + enable_refresh_interval: bool = True, **kwargs: Any, ) -> dict[str, Any]: """ @@ -559,6 +560,8 @@ def index_documents( True to enable concurrent requests, False to disable multiple threads. If enabled os.cpu_count() will be used as the max number of threads. If integer is provided, specified number is used. + enable_refresh_interval + True (default) to enable ``refresh_interval`` modification to ``-1`` (disabled) while indexing documents **kwargs KEYWORD arguments forwarded to bulk operation elasticsearch >= 7.10.2 / opensearch: \ @@ -614,7 +617,7 @@ def index_documents( widgets=widgets, max_value=total_documents, prefix="Indexing: " ).start() for i, bulk_chunk_documents in enumerate(actions): - if i == 1: # second bulk iteration, in case the index didn't exist before + if i == 1 and enable_refresh_interval: # second bulk iteration, in case the index didn't exist before refresh_interval = _get_refresh_interval(client, index) _disable_refresh_interval(client, index) _logger.debug("running bulk index of %s documents", len(bulk_chunk_documents)) @@ -655,6 +658,7 @@ def index_documents( raise e finally: - _set_refresh_interval(client, index, refresh_interval) + if enable_refresh_interval: + _set_refresh_interval(client, index, refresh_interval) return {"success": success, "errors": errors}