From b678086a6b90da4904ed5b1f84e3a410369641d1 Mon Sep 17 00:00:00 2001 From: Caden <132690522+marofke@users.noreply.github.com> Date: Sat, 23 Mar 2024 21:30:52 -0500 Subject: [PATCH] fix: Use boto SSL for telemetry requests, add opt out settings in UI (#230) Signed-off-by: Caden Marofke --- src/deadline/client/api/_telemetry.py | 97 +++++++++++----- .../ui/dialogs/deadline_config_dialog.py | 4 + .../deadline_client/api/test_api_telemetry.py | 109 ++++++++++++------ 3 files changed, 144 insertions(+), 66 deletions(-) diff --git a/src/deadline/client/api/_telemetry.py b/src/deadline/client/api/_telemetry.py index 05ac714f..6acda8f2 100644 --- a/src/deadline/client/api/_telemetry.py +++ b/src/deadline/client/api/_telemetry.py @@ -7,8 +7,10 @@ import platform import uuid import random +import ssl import time +from botocore.httpsession import get_cert_path from configparser import ConfigParser from dataclasses import asdict, dataclass, field from datetime import datetime @@ -88,7 +90,33 @@ def __init__( package_ver: str, config: Optional[ConfigParser] = None, ): - # Environment variable supersedes config file setting. + self._initialized: bool = False + self.package_name = package_name + self.package_ver = ".".join(package_ver.split(".")[:3]) + + # Need to set up a valid SSL context so requests can make it through + self._urllib3_context = ssl.SSLContext(ssl.PROTOCOL_TLS) + self._urllib3_context.verify_mode = ssl.CERT_REQUIRED + self._urllib3_context.check_hostname = True + self._urllib3_context.load_default_certs() + self._urllib3_context.load_verify_locations(cafile=get_cert_path(True)) + + # IDs for this session + self.session_id: str = str(uuid.uuid4()) + self.telemetry_id: str = self._get_telemetry_identifier(config=config) + # If a different base package is provided, include info from this library as supplementary info + if package_name != "deadline-cloud-library": + self._common_details["deadline-cloud-version"] = version + self._system_metadata = self._get_system_metadata(config=config) + self.set_opt_out(config=config) + self.initialize(config=config) + + def set_opt_out(self, config: Optional[ConfigParser] = None) -> None: + """ + Checks whether telemetry has been opted out by checking the DEADLINE_CLOUD_TELEMETRY_OPT_OUT + environment variable and the 'telemetry.opt_out' config file setting. + Note the environment variable supersedes the config file setting. + """ env_var_value = os.environ.get("DEADLINE_CLOUD_TELEMETRY_OPT_OUT") if env_var_value: self.telemetry_opted_out = env_var_value in config_file._TRUE_VALUES @@ -97,29 +125,43 @@ def __init__( config_file.get_setting("telemetry.opt_out", config=config) ) logger.info( - "Deadline Cloud telemetry is " + "not enabled." - if self.telemetry_opted_out - else "enabled." + "Deadline Cloud telemetry is " + + ("not enabled." if self.telemetry_opted_out else "enabled.") ) + + def initialize(self, config: Optional[ConfigParser] = None) -> None: + """ + Starts up the telemetry background thread after getting settings from the boto3 client. + Note that if this is called before boto3 is successfully configured / initialized, + an error can be raised. In that case we silently fail and don't mark the client as + initialized. + """ if self.telemetry_opted_out: return - self.package_name = package_name - self.package_ver = ".".join(package_ver.split(".")[:3]) - self.endpoint: str = self._get_prefixed_endpoint( - f"{get_deadline_endpoint_url(config=config)}/2023-10-12/telemetry", - TelemetryClient.ENDPOINT_PREFIX, - ) + try: + self.endpoint: str = self._get_prefixed_endpoint( + f"{get_deadline_endpoint_url(config=config)}/2023-10-12/telemetry", + TelemetryClient.ENDPOINT_PREFIX, + ) - # IDs for this session - self.session_id: str = str(uuid.uuid4()) - self.telemetry_id: str = self._get_telemetry_identifier(config=config) - # If a different base package is provided, include info from this library as supplementary info - if package_name != "deadline-cloud-library": - self._common_details["deadline-cloud-version"] = version - self._system_metadata = self._get_system_metadata(config=config) + user_id, _ = get_user_and_identity_store_id(config=config) + if user_id: + self._system_metadata["user_id"] = user_id + + monitor_id: Optional[str] = get_monitor_id(config=config) + if monitor_id: + self._system_metadata["monitor_id"] = monitor_id + + self._initialized = True + self._start_threads() + except Exception: + # Silently swallow any exceptions + return - self._start_threads() + @property + def is_initialized(self) -> bool: + return self._initialized def _get_prefixed_endpoint(self, endpoint: str, prefix: str) -> str: """Insert the prefix right after 'https://'""" @@ -163,14 +205,6 @@ def _get_system_metadata(self, config: Optional[ConfigParser]) -> Dict[str, Any] "osVersion": platform_info.release, } - user_id, _ = get_user_and_identity_store_id(config=config) - if user_id: - metadata["user_id"] = user_id - - monitor_id: Optional[str] = get_monitor_id(config=config) - if monitor_id: - metadata["monitor_id"] = monitor_id - return metadata def _exit_cleanly(self): @@ -182,7 +216,7 @@ def _send_request(self, req: request.Request) -> None: success = False while not success: try: - with request.urlopen(req): + with request.urlopen(req, context=self._urllib3_context): logger.debug("Successfully sent telemetry.") success = True except error.HTTPError as httpe: @@ -232,13 +266,14 @@ def _process_event_queue_thread(self): try: logger.debug("Sending telemetry data: %s", request_body) self._send_request(req) - except Exception: - # Silently swallow any kind of uncaught exception and stop sending telemetry + except Exception as exc: + # Swallow any kind of uncaught exception and stop sending telemetry + logger.debug(f"Error received from service. {str(exc)}") return self.event_queue.task_done() def _put_telemetry_record(self, event: TelemetryEvent) -> None: - if self.telemetry_opted_out: + if not self._initialized or self.telemetry_opted_out: return try: self.event_queue.put_nowait(event) @@ -303,6 +338,8 @@ def get_telemetry_client( package_ver=package_ver, config=config, ) + elif not __cached_telemetry_client.is_initialized: + __cached_telemetry_client.initialize(config=config) return __cached_telemetry_client diff --git a/src/deadline/client/ui/dialogs/deadline_config_dialog.py b/src/deadline/client/ui/dialogs/deadline_config_dialog.py index 901123f2..913c3e7b 100644 --- a/src/deadline/client/ui/dialogs/deadline_config_dialog.py +++ b/src/deadline/client/ui/dialogs/deadline_config_dialog.py @@ -282,6 +282,9 @@ def _build_general_settings_ui(self, group, layout): self.auto_accept = self._init_checkbox_setting( group, layout, "settings.auto_accept", "Auto Accept Confirmation Prompts" ) + self.telemetry_opt_out = self._init_checkbox_setting( + group, layout, "telemetry.opt_out", "Telemetry Opt Out" + ) self._conflict_resolution_options = [option.name for option in FileConflictResolution] self.conflict_resolution_box = self._init_combobox_setting( @@ -561,6 +564,7 @@ def apply(self) -> bool: for setting_name, value in self.changes.items(): config_file.set_setting(setting_name, value, self.config) root.setLevel(config_file.get_setting("settings.log_level")) + api.get_deadline_cloud_library_telemetry_client().set_opt_out(config=self.config) # Only update self.changes_were_applied if false. We don't want to invalidate that a change has # occurred if the user repeatedly hits "Apply" or hits "Apply" and then "Save". diff --git a/test/unit/deadline_client/api/test_api_telemetry.py b/test/unit/deadline_client/api/test_api_telemetry.py index d1cb44d4..788a7c0f 100644 --- a/test/unit/deadline_client/api/test_api_telemetry.py +++ b/test/unit/deadline_client/api/test_api_telemetry.py @@ -13,7 +13,7 @@ from deadline.job_attachments.progress_tracker import SummaryStatistics -@pytest.fixture(scope="function", name="telemetry_client") +@pytest.fixture(scope="function", name="mock_telemetry_client") def fixture_telemetry_client(fresh_deadline_config): config.set_setting("defaults.aws_profile_name", "SomeRandomProfileName") with patch.object(api.TelemetryClient, "_start_threads"), patch.object( @@ -25,9 +25,13 @@ def fixture_telemetry_client(fresh_deadline_config): ), patch.object( api._telemetry, "get_deadline_endpoint_url", side_effect=["https://fake-endpoint-url"] ): - return TelemetryClient( - "deadline-cloud-library", "0.1.2.1234", config=config.config_file.read_config() + client = TelemetryClient( + package_name="deadline-cloud-library", + package_ver="0.1.2.1234", + config=config.config_file.read_config(), ) + assert client.is_initialized + return client def test_opt_out_config(fresh_deadline_config): @@ -37,13 +41,11 @@ def test_opt_out_config(fresh_deadline_config): config.set_setting("telemetry.opt_out", "true") # WHEN client = TelemetryClient( - "test-library", "test-version", config=config.config_file.read_config() + "deadline-cloud-library", "test-version", config=config.config_file.read_config() ) # THEN + assert not client.is_initialized assert not hasattr(client, "endpoint") - assert not hasattr(client, "session_id") - assert not hasattr(client, "telemetry_id") - assert not hasattr(client, "system_metadata") assert not hasattr(client, "event_queue") assert not hasattr(client, "processing_thread") # Ensure nothing blows up if we try recording telemetry after we've opted out @@ -71,13 +73,11 @@ def test_opt_out_env_var(fresh_deadline_config, monkeypatch, env_var_value): ) # Ensure we ignore the config file if env var is set # WHEN client = TelemetryClient( - "test-library", "test-version", config=config.config_file.read_config() + "deadline-cloud-library", "test-version", config=config.config_file.read_config() ) # THEN + assert not client.is_initialized assert not hasattr(client, "endpoint") - assert not hasattr(client, "session_id") - assert not hasattr(client, "telemetry_id") - assert not hasattr(client, "system_metadata") assert not hasattr(client, "event_queue") assert not hasattr(client, "processing_thread") # Ensure nothing blows up if we try recording telemetry after we've opted out @@ -86,33 +86,68 @@ def test_opt_out_env_var(fresh_deadline_config, monkeypatch, env_var_value): client.record_error({}, str(type(Exception))) -def test_get_telemetry_identifier(telemetry_client): +def test_initialize_failure_then_success(fresh_deadline_config): + """ + Tests that a failure in initializing set keeps the property as false, but trying again + without an exception initializes everything successfully. + """ + config.set_setting("defaults.aws_profile_name", "SomeRandomProfileName") + with patch.object(api.TelemetryClient, "_start_threads"), patch.object( + api._telemetry, "get_monitor_id", side_effect=["monitor-id"] + ), patch.object( + api._telemetry, + "get_user_and_identity_store_id", + side_effect=[("user-id", "identity-store-id")], + ), patch.object( + api._telemetry, + "get_deadline_endpoint_url", + side_effect=[Exception("Boto3 blew up!"), "https://fake-endpoint-url"], + ): + client = TelemetryClient( + package_name="deadline-cloud-library", + package_ver="0.1.2.1234", + config=config.config_file.read_config(), + ) + + assert not client.is_initialized + assert not hasattr(client, "endpoint") + assert not hasattr(client, "event_queue") + assert not hasattr(client, "processing_thread") + + client.initialize(config=config.config_file.read_config()) + assert client.is_initialized + assert client.endpoint == "https://management.fake-endpoint-url/2023-10-12/telemetry" + assert client._system_metadata["user_id"] == "user-id" + assert client._system_metadata["monitor_id"] == "monitor-id" + + +def test_get_telemetry_identifier(mock_telemetry_client): """Ensures that getting the local-user-id handles empty/malformed strings""" # Confirm that we generate a new UUID if the setting doesn't exist, and write to config - uuid.UUID(telemetry_client.telemetry_id, version=4) # Should not raise ValueError - assert config.get_setting("telemetry.identifier") == telemetry_client.telemetry_id + uuid.UUID(mock_telemetry_client.telemetry_id, version=4) # Should not raise ValueError + assert config.get_setting("telemetry.identifier") == mock_telemetry_client.telemetry_id # Confirm we generate a new UUID if the local_user_id is not a valid UUID config.set_setting("telemetry.identifier", "bad-id") - telemetry_id = telemetry_client._get_telemetry_identifier() + telemetry_id = mock_telemetry_client._get_telemetry_identifier() assert telemetry_id != "bad-id" uuid.UUID(telemetry_id, version=4) # Should not raise ValueError # Confirm the new user id was saved and is retrieved properly assert config.get_setting("telemetry.identifier") == telemetry_id - assert telemetry_client._get_telemetry_identifier() == telemetry_id + assert mock_telemetry_client._get_telemetry_identifier() == telemetry_id @pytest.mark.timeout(5) # Timeout in case we don't exit the while loop -def test_process_event_queue_thread(telemetry_client): +def test_process_event_queue_thread(mock_telemetry_client): """Test that the queue processing thread function exits cleanly after getting None""" # GIVEN queue_mock = MagicMock() queue_mock.get.side_effect = [TelemetryEvent(), None] - telemetry_client.event_queue = queue_mock + mock_telemetry_client.event_queue = queue_mock # WHEN with patch.object(request, "urlopen") as urlopen_mock: - telemetry_client._process_event_queue_thread() + mock_telemetry_client._process_event_queue_thread() urlopen_mock.assert_called_once() # THEN assert queue_mock.get.call_count == 2 @@ -127,18 +162,20 @@ def test_process_event_queue_thread(telemetry_client): ], ) @pytest.mark.timeout(5) # Timeout in case we don't exit the while loop -def test_process_event_queue_thread_retries_and_exits(telemetry_client, http_code, attempt_count): +def test_process_event_queue_thread_retries_and_exits( + mock_telemetry_client, http_code, attempt_count +): """Test that the thread exits cleanly after getting an unexpected exception""" # GIVEN http_error = request.HTTPError("http://test.com", http_code, "Http Error", {}, None) # type: ignore queue_mock = MagicMock() queue_mock.get.side_effect = [TelemetryEvent(), None] - telemetry_client.event_queue = queue_mock + mock_telemetry_client.event_queue = queue_mock # WHEN with patch.object(request, "urlopen", side_effect=http_error) as urlopen_mock, patch.object( time, "sleep" ) as sleep_mock: - telemetry_client._process_event_queue_thread() + mock_telemetry_client._process_event_queue_thread() urlopen_mock.call_count = attempt_count sleep_mock.call_count = attempt_count # THEN @@ -146,21 +183,21 @@ def test_process_event_queue_thread_retries_and_exits(telemetry_client, http_cod @pytest.mark.timeout(5) # Timeout in case we don't exit the while loop -def test_process_event_queue_thread_handles_unexpected_error(telemetry_client): +def test_process_event_queue_thread_handles_unexpected_error(mock_telemetry_client): """Test that the thread exits cleanly after getting an unexpected exception""" # GIVEN queue_mock = MagicMock() queue_mock.get.side_effect = [TelemetryEvent(), None] - telemetry_client.event_queue = queue_mock + mock_telemetry_client.event_queue = queue_mock # WHEN with patch.object(request, "urlopen", side_effect=Exception("Some error")) as urlopen_mock: - telemetry_client._process_event_queue_thread() + mock_telemetry_client._process_event_queue_thread() urlopen_mock.assert_called_once() # THEN assert queue_mock.get.call_count == 1 -def test_record_hashing_summary(telemetry_client): +def test_record_hashing_summary(mock_telemetry_client): """Tests that recording a hashing summary sends the expected TelemetryEvent to the thread queue""" # GIVEN queue_mock = MagicMock() @@ -171,16 +208,16 @@ def test_record_hashing_summary(telemetry_client): event_type="com.amazon.rum.deadline.job_attachments.hashing_summary", event_details=expected_summary, ) - telemetry_client.event_queue = queue_mock + mock_telemetry_client.event_queue = queue_mock # WHEN - telemetry_client.record_hashing_summary(test_summary) + mock_telemetry_client.record_hashing_summary(test_summary) # THEN queue_mock.put_nowait.assert_called_once_with(expected_event) -def test_record_upload_summary(telemetry_client): +def test_record_upload_summary(mock_telemetry_client): """Tests that recording an upload summary sends the expected TelemetryEvent to the thread queue""" # GIVEN queue_mock = MagicMock() @@ -191,16 +228,16 @@ def test_record_upload_summary(telemetry_client): event_type="com.amazon.rum.deadline.job_attachments.upload_summary", event_details=expected_summary, ) - telemetry_client.event_queue = queue_mock + mock_telemetry_client.event_queue = queue_mock # WHEN - telemetry_client.record_upload_summary(test_summary, from_gui=True) + mock_telemetry_client.record_upload_summary(test_summary, from_gui=True) # THEN queue_mock.put_nowait.assert_called_once_with(expected_event) -def test_record_error(telemetry_client): +def test_record_error(mock_telemetry_client): """Test that recording an error sends the expected TelemetryEvent to the thread queue""" # GIVEN queue_mock = MagicMock() @@ -214,10 +251,10 @@ def test_record_error(telemetry_client): expected_event = TelemetryEvent( event_type="com.amazon.rum.deadline.error", event_details=expected_event_details ) - telemetry_client.event_queue = queue_mock + mock_telemetry_client.event_queue = queue_mock # WHEN - telemetry_client.record_error(test_error_details, str(type(test_exc))) + mock_telemetry_client.record_error(test_error_details, str(type(test_exc))) # THEN queue_mock.put_nowait.assert_called_once_with(expected_event) @@ -247,7 +284,7 @@ def test_record_error(telemetry_client): ], ) def test_get_prefixed_endpoint( - telemetry_client: TelemetryClient, endpoint: str, prefix: str, expected_result: str + mock_telemetry_client: TelemetryClient, endpoint: str, prefix: str, expected_result: str ): """Test that the _get_prefixed_endpoint function returns the expected prefixed endpoint""" - assert telemetry_client._get_prefixed_endpoint(endpoint, prefix) == expected_result + assert mock_telemetry_client._get_prefixed_endpoint(endpoint, prefix) == expected_result