From 9eae45e7b852a395b2f20dc23adf1dc1fa82f3fd Mon Sep 17 00:00:00 2001 From: romsharon98 Date: Fri, 26 Jan 2024 13:16:18 +0200 Subject: [PATCH 01/10] change hive proxy user --- airflow/providers/apache/hive/hooks/hive.py | 12 ++---------- airflow/providers/apache/hive/operators/hive.py | 6 ------ tests/providers/apache/hive/hooks/test_hive.py | 8 -------- 3 files changed, 2 insertions(+), 24 deletions(-) diff --git a/airflow/providers/apache/hive/hooks/hive.py b/airflow/providers/apache/hive/hooks/hive.py index 5bdaf8084cb86..789feb490f827 100644 --- a/airflow/providers/apache/hive/hooks/hive.py +++ b/airflow/providers/apache/hive/hooks/hive.py @@ -91,7 +91,6 @@ class HiveCliHook(BaseHook): def __init__( self, hive_cli_conn_id: str = default_conn_name, - run_as: str | None = None, mapred_queue: str | None = None, mapred_queue_priority: str | None = None, mapred_job_name: str | None = None, @@ -105,7 +104,6 @@ def __init__( self.use_beeline: bool = conn.extra_dejson.get("use_beeline", False) self.auth = auth self.conn = conn - self.run_as = run_as self.sub_process: Any = None if mapred_queue_priority: mapred_queue_priority = mapred_queue_priority.upper() @@ -124,15 +122,9 @@ def _get_proxy_user(self) -> str: conn = self.conn proxy_user_value: str = conn.extra_dejson.get("proxy_user", "") - if proxy_user_value == "login" and conn.login: - return f"hive.server2.proxy.user={conn.login}" - if proxy_user_value == "owner" and self.run_as: - return f"hive.server2.proxy.user={self.run_as}" - if proxy_user_value == "as_param" and self.proxy_user: - return f"hive.server2.proxy.user={self.proxy_user}" - if proxy_user_value != "": # There is a custom proxy user + if proxy_user_value != "": return f"hive.server2.proxy.user={proxy_user_value}" - return proxy_user_value # The default proxy user (undefined) + return f"hive.server2.proxy.user={self.proxy_user}" def _prepare_cli_cmd(self) -> list[Any]: """Create the command list from available information.""" diff --git a/airflow/providers/apache/hive/operators/hive.py b/airflow/providers/apache/hive/operators/hive.py index 8c7a7cf1e1d2b..398cadce0d829 100644 --- a/airflow/providers/apache/hive/operators/hive.py +++ b/airflow/providers/apache/hive/operators/hive.py @@ -54,7 +54,6 @@ class HiveOperator(BaseOperator): object documentation for more details. :param script_begin_tag: If defined, the operator will get rid of the part of the script before the first occurrence of `script_begin_tag` - :param run_as_owner: Run HQL code as a DAG's owner. :param mapred_queue: queue used by the Hadoop CapacityScheduler. (templated) :param mapred_queue_priority: priority within CapacityScheduler queue. Possible settings include: VERY_HIGH, HIGH, NORMAL, LOW, VERY_LOW @@ -91,7 +90,6 @@ def __init__( hiveconfs: dict[Any, Any] | None = None, hiveconf_jinja_translate: bool = False, script_begin_tag: str | None = None, - run_as_owner: bool = False, mapred_queue: str | None = None, mapred_queue_priority: str | None = None, mapred_job_name: str | None = None, @@ -107,9 +105,6 @@ def __init__( self.hiveconfs = hiveconfs or {} self.hiveconf_jinja_translate = hiveconf_jinja_translate self.script_begin_tag = script_begin_tag - self.run_as = None - if run_as_owner: - self.run_as = self.dag.owner self.mapred_queue = mapred_queue self.mapred_queue_priority = mapred_queue_priority self.mapred_job_name = mapred_job_name @@ -128,7 +123,6 @@ def hook(self) -> HiveCliHook: """Get Hive cli hook.""" return HiveCliHook( hive_cli_conn_id=self.hive_cli_conn_id, - run_as=self.run_as, mapred_queue=self.mapred_queue, mapred_queue_priority=self.mapred_queue_priority, mapred_job_name=self.mapred_job_name, diff --git a/tests/providers/apache/hive/hooks/test_hive.py b/tests/providers/apache/hive/hooks/test_hive.py index 461b101641aa8..ca53024b82d5e 100644 --- a/tests/providers/apache/hive/hooks/test_hive.py +++ b/tests/providers/apache/hive/hooks/test_hive.py @@ -883,14 +883,6 @@ def setup_method(self): "extra_dejson, correct_proxy_user, run_as, proxy_user", [ ({"proxy_user": "a_user_proxy"}, "hive.server2.proxy.user=a_user_proxy", None, None), - ({"proxy_user": "owner"}, "hive.server2.proxy.user=dummy_dag_owner", "dummy_dag_owner", None), - ({"proxy_user": "login"}, "hive.server2.proxy.user=admin", None, None), - ( - {"proxy_user": "as_param"}, - "hive.server2.proxy.user=param_proxy_user", - None, - "param_proxy_user", - ), ], ) def test_get_proxy_user_value(self, extra_dejson, correct_proxy_user, run_as, proxy_user): From 8986cffdc81f5832f76d7701febd6f204ecf6d50 Mon Sep 17 00:00:00 2001 From: romsharon98 Date: Sat, 27 Jan 2024 13:08:45 +0200 Subject: [PATCH 02/10] remove extra from connection --- airflow/providers/apache/hive/hooks/hive.py | 23 +++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/airflow/providers/apache/hive/hooks/hive.py b/airflow/providers/apache/hive/hooks/hive.py index 789feb490f827..ec006969427ad 100644 --- a/airflow/providers/apache/hive/hooks/hive.py +++ b/airflow/providers/apache/hive/hooks/hive.py @@ -117,6 +117,29 @@ def __init__( self.mapred_job_name = mapred_job_name self.proxy_user = proxy_user + @classmethod + def get_connection_form_widgets(cls) -> dict[str, Any]: + """Returns connection widgets to add to connection form.""" + from flask_appbuilder.fieldwidgets import BS3TextFieldWidget + from flask_babel import lazy_gettext + from wtforms import BooleanField, StringField + + return { + "use_beeline": BooleanField(lazy_gettext("Use Beeline"), default=False), + "proxy_user": StringField(lazy_gettext("Proxy User"), widget=BS3TextFieldWidget(), default=""), + "principal": StringField( + lazy_gettext("Principal"), widget=BS3TextFieldWidget(), default="hive/_HOST@EXAMPLE.COM" + ), + } + + @classmethod + def get_ui_field_behaviour(cls) -> dict[str, Any]: + """Returns custom field behaviour.""" + return { + "hidden_fields": ["extra"], + "relabeling": {}, + } + def _get_proxy_user(self) -> str: """Set the proper proxy_user value in case the user overwrite the default.""" conn = self.conn From 8dbe1eef7e848e255cc284714bb6dc039adc645a Mon Sep 17 00:00:00 2001 From: romsharon98 Date: Sat, 27 Jan 2024 13:23:02 +0200 Subject: [PATCH 03/10] change documentation --- .../connections/hive_cli.rst | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst b/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst index 2e23ec63d5ad0..2e7f416328b6f 100644 --- a/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst +++ b/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst @@ -64,19 +64,14 @@ Schema (optional) Specify your JDBC Hive database that you want to connect to with Beeline or specify a schema for an HQL statement to run with the Hive CLI. -Extra (optional) - Specify the extra parameters (as json dictionary) that can be used in Hive CLI connection. - The following parameters are all optional: - - * ``use_beeline`` - Specify as ``True`` if using the Beeline CLI. Default is ``False``. - * ``proxy_user`` - Specify a proxy user as an ``owner`` or ``login`` or ``as_param`` keep blank if using a - custom proxy user. - When using ``owner`` you will want to pass the operator ``run_as_owner=True`` if you don't you will run the hql as user="owner" - When using ``as_param`` you will want to pass the operator ``proxy_user=`` if you don't you will run the hql as user="as_param" - * ``principal`` - Specify the JDBC Hive principal to be used with Hive Beeline. +User Beelin (optional) + Specify as ``True`` if using the Beeline CLI. Default is ``False``. + +Proxy User (optional) + Specify a proxy user to run HQL code as this user + +Principal (optional) + Specify the JDBC Hive principal to be used with Hive Beeline. When specifying the connection in environment variable you should specify From 2d5ed957f4026d4218d829eb1a151e550b355af0 Mon Sep 17 00:00:00 2001 From: romsharon98 Date: Sat, 27 Jan 2024 16:38:12 +0200 Subject: [PATCH 04/10] add breaking change and change version --- airflow/providers/apache/hive/CHANGELOG.rst | 12 ++++++++++++ airflow/providers/apache/hive/provider.yaml | 1 + 2 files changed, 13 insertions(+) diff --git a/airflow/providers/apache/hive/CHANGELOG.rst b/airflow/providers/apache/hive/CHANGELOG.rst index 3ab9928de0f26..c3a2bcb21c9bf 100644 --- a/airflow/providers/apache/hive/CHANGELOG.rst +++ b/airflow/providers/apache/hive/CHANGELOG.rst @@ -27,6 +27,18 @@ Changelog --------- +7.0.0 +..... + + +Breaking changes +~~~~~~~~~~~~~~~~ + +* The ``use_beeline``, ``proxy_user``, ``principal`` option is moved from the extra field to the ``Use Beeline``, ``Proxy User``, ``Principal`` parameter in the Hook. +* Remove the ability of specify a proxy user as an ``owner`` or ``login`` or ``as_param`` in the connection. Now, setting the user in ``Proxy User`` connection parameter or passing ``proxy_user`` to HiveHook will do the job. + +* ``Feature make hive client connection simpler (#37043)`` + 6.4.2 ..... diff --git a/airflow/providers/apache/hive/provider.yaml b/airflow/providers/apache/hive/provider.yaml index 12dcf4f3edecd..ea24c2b37027c 100644 --- a/airflow/providers/apache/hive/provider.yaml +++ b/airflow/providers/apache/hive/provider.yaml @@ -24,6 +24,7 @@ description: | state: ready source-date-epoch: 1705911912 versions: + - 7.0.0 - 6.4.2 - 6.4.1 - 6.4.0 From 007184eddde3f2dc5fadcc06372eeef7a597977b Mon Sep 17 00:00:00 2001 From: romsharon98 Date: Sun, 28 Jan 2024 13:00:26 +0200 Subject: [PATCH 05/10] fix spell check --- .../connections/hive_cli.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst b/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst index 2e7f416328b6f..836e36ad1762f 100644 --- a/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst +++ b/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst @@ -64,7 +64,7 @@ Schema (optional) Specify your JDBC Hive database that you want to connect to with Beeline or specify a schema for an HQL statement to run with the Hive CLI. -User Beelin (optional) +User Beeline (optional) Specify as ``True`` if using the Beeline CLI. Default is ``False``. Proxy User (optional) From ee37666b181fddb453af88bdf627c35f2fbb0ea6 Mon Sep 17 00:00:00 2001 From: romsharon98 Date: Sun, 28 Jan 2024 18:10:59 +0200 Subject: [PATCH 06/10] fix docs --- airflow/providers/apache/hive/CHANGELOG.rst | 4 ++-- .../connections/hive_cli.rst | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/airflow/providers/apache/hive/CHANGELOG.rst b/airflow/providers/apache/hive/CHANGELOG.rst index c3a2bcb21c9bf..4f2596b29877b 100644 --- a/airflow/providers/apache/hive/CHANGELOG.rst +++ b/airflow/providers/apache/hive/CHANGELOG.rst @@ -34,10 +34,10 @@ Changelog Breaking changes ~~~~~~~~~~~~~~~~ -* The ``use_beeline``, ``proxy_user``, ``principal`` option is moved from the extra field to the ``Use Beeline``, ``Proxy User``, ``Principal`` parameter in the Hook. +* ``use_beeline``, ``proxy_user``, ``principal`` in hive client connection options is moved from the extra field to ``Use Beeline``, ``Proxy User``, ``Principal`` parameter in the Hook. * Remove the ability of specify a proxy user as an ``owner`` or ``login`` or ``as_param`` in the connection. Now, setting the user in ``Proxy User`` connection parameter or passing ``proxy_user`` to HiveHook will do the job. -* ``Feature make hive client connection simpler (#37043)`` +* ``Simplify hive client connection #37043`` 6.4.2 ..... diff --git a/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst b/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst index 836e36ad1762f..405b27a98353d 100644 --- a/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst +++ b/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst @@ -68,7 +68,7 @@ User Beeline (optional) Specify as ``True`` if using the Beeline CLI. Default is ``False``. Proxy User (optional) - Specify a proxy user to run HQL code as this user + Specify a proxy user to run HQL code as this user. Principal (optional) Specify the JDBC Hive principal to be used with Hive Beeline. From d69a5cb9f5c94235ddf7cc110e741f905a52ad89 Mon Sep 17 00:00:00 2001 From: Elad Kalif <45845474+eladkal@users.noreply.github.com> Date: Sun, 28 Jan 2024 18:23:53 +0100 Subject: [PATCH 07/10] Update airflow/providers/apache/hive/CHANGELOG.rst --- airflow/providers/apache/hive/CHANGELOG.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/airflow/providers/apache/hive/CHANGELOG.rst b/airflow/providers/apache/hive/CHANGELOG.rst index 4f2596b29877b..a0849c8468e6d 100644 --- a/airflow/providers/apache/hive/CHANGELOG.rst +++ b/airflow/providers/apache/hive/CHANGELOG.rst @@ -37,7 +37,6 @@ Breaking changes * ``use_beeline``, ``proxy_user``, ``principal`` in hive client connection options is moved from the extra field to ``Use Beeline``, ``Proxy User``, ``Principal`` parameter in the Hook. * Remove the ability of specify a proxy user as an ``owner`` or ``login`` or ``as_param`` in the connection. Now, setting the user in ``Proxy User`` connection parameter or passing ``proxy_user`` to HiveHook will do the job. -* ``Simplify hive client connection #37043`` 6.4.2 ..... From 0761c2a0aba1e40e19d2b723070d926752332e1d Mon Sep 17 00:00:00 2001 From: romsharon98 Date: Mon, 29 Jan 2024 13:58:26 +0200 Subject: [PATCH 08/10] fix typo --- .../connections/hive_cli.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst b/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst index 405b27a98353d..cc52f1db92be2 100644 --- a/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst +++ b/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst @@ -64,7 +64,7 @@ Schema (optional) Specify your JDBC Hive database that you want to connect to with Beeline or specify a schema for an HQL statement to run with the Hive CLI. -User Beeline (optional) +Use Beeline (optional) Specify as ``True`` if using the Beeline CLI. Default is ``False``. Proxy User (optional) From 649d13deb7eb80b8a1a710121f4d0b16b30ddf74 Mon Sep 17 00:00:00 2001 From: romsharon98 Date: Mon, 29 Jan 2024 17:26:21 +0200 Subject: [PATCH 09/10] proxy user sent in operator will override proxy user in connection --- airflow/providers/apache/hive/hooks/hive.py | 5 +++-- tests/providers/apache/hive/hooks/test_hive.py | 7 +++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/airflow/providers/apache/hive/hooks/hive.py b/airflow/providers/apache/hive/hooks/hive.py index ec006969427ad..d0dfa10c62258 100644 --- a/airflow/providers/apache/hive/hooks/hive.py +++ b/airflow/providers/apache/hive/hooks/hive.py @@ -143,11 +143,12 @@ def get_ui_field_behaviour(cls) -> dict[str, Any]: def _get_proxy_user(self) -> str: """Set the proper proxy_user value in case the user overwrite the default.""" conn = self.conn - + if self.proxy_user is not None: + return f"hive.server2.proxy.user={self.proxy_user}" proxy_user_value: str = conn.extra_dejson.get("proxy_user", "") if proxy_user_value != "": return f"hive.server2.proxy.user={proxy_user_value}" - return f"hive.server2.proxy.user={self.proxy_user}" + return "" def _prepare_cli_cmd(self) -> list[Any]: """Create the command list from available information.""" diff --git a/tests/providers/apache/hive/hooks/test_hive.py b/tests/providers/apache/hive/hooks/test_hive.py index ca53024b82d5e..a137364767ffa 100644 --- a/tests/providers/apache/hive/hooks/test_hive.py +++ b/tests/providers/apache/hive/hooks/test_hive.py @@ -880,12 +880,12 @@ def setup_method(self): self.nondefault_schema = "nondefault" @pytest.mark.parametrize( - "extra_dejson, correct_proxy_user, run_as, proxy_user", + "extra_dejson, correct_proxy_user, proxy_user", [ - ({"proxy_user": "a_user_proxy"}, "hive.server2.proxy.user=a_user_proxy", None, None), + ({"proxy_user": "a_user_proxy"}, "hive.server2.proxy.user=a_user_proxy", None), ], ) - def test_get_proxy_user_value(self, extra_dejson, correct_proxy_user, run_as, proxy_user): + def test_get_proxy_user_value(self, extra_dejson, correct_proxy_user, proxy_user): hook = MockHiveCliHook() returner = mock.MagicMock() returner.extra_dejson = extra_dejson @@ -893,7 +893,6 @@ def test_get_proxy_user_value(self, extra_dejson, correct_proxy_user, run_as, pr hook.use_beeline = True hook.conn = returner hook.proxy_user = proxy_user - hook.run_as = run_as # Run result = hook._prepare_cli_cmd() From 54dbf5b36cb1addfb3f9485931a712f5ca5b9457 Mon Sep 17 00:00:00 2001 From: romsharon98 Date: Wed, 31 Jan 2024 16:03:52 +0200 Subject: [PATCH 10/10] fix change log --- airflow/providers/apache/hive/CHANGELOG.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/airflow/providers/apache/hive/CHANGELOG.rst b/airflow/providers/apache/hive/CHANGELOG.rst index a0849c8468e6d..66836e68876cd 100644 --- a/airflow/providers/apache/hive/CHANGELOG.rst +++ b/airflow/providers/apache/hive/CHANGELOG.rst @@ -34,7 +34,6 @@ Changelog Breaking changes ~~~~~~~~~~~~~~~~ -* ``use_beeline``, ``proxy_user``, ``principal`` in hive client connection options is moved from the extra field to ``Use Beeline``, ``Proxy User``, ``Principal`` parameter in the Hook. * Remove the ability of specify a proxy user as an ``owner`` or ``login`` or ``as_param`` in the connection. Now, setting the user in ``Proxy User`` connection parameter or passing ``proxy_user`` to HiveHook will do the job.