From 485e8bc44b4b688030aba5128f72aa7fd66e080d Mon Sep 17 00:00:00 2001 From: Robert Dillitz Date: Fri, 7 Jul 2023 11:57:35 +0900 Subject: [PATCH] [SPARK-44312][CONNECT][PYTHON] Allow to set a user agent with an environment variable ### What changes were proposed in this pull request? Use the `SPARK_CONNECT_USER_AGENT` environment variable as a fallback for the prepended user agent string if one did not set the user agent in `ChannelBuilder`. ### Why are the changes needed? Currently one has to specify a custom user agent string in `ChannelBuilder`. It would be useful to be able to set this string with an environment variable. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual testing + existing tests. Closes #41866 from dillitz/SPARK-44312-user-agent-environment. Authored-by: Robert Dillitz Signed-off-by: Hyukjin Kwon --- python/pyspark/sql/connect/client/core.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py index f8d304e9cccaa..537ab0a614046 100644 --- a/python/pyspark/sql/connect/client/core.py +++ b/python/pyspark/sql/connect/client/core.py @@ -296,7 +296,10 @@ def userAgent(self) -> str: or "_SPARK_CONNECT_PYTHON" when not specified. The returned value will be percent encoded. """ - user_agent = self.params.get(ChannelBuilder.PARAM_USER_AGENT, "_SPARK_CONNECT_PYTHON") + user_agent = self.params.get( + ChannelBuilder.PARAM_USER_AGENT, + os.getenv("SPARK_CONNECT_USER_AGENT", "_SPARK_CONNECT_PYTHON"), + ) ua_len = len(urllib.parse.quote(user_agent)) if ua_len > 2048: raise SparkConnectException(