From a210df561af9172198460f746d9df54ec90b9eb3 Mon Sep 17 00:00:00 2001 From: Arshia Moghimi Date: Thu, 26 Oct 2023 12:17:39 -0700 Subject: [PATCH] fix: fixed configuration parameters naming (#158) * fix: better naming for configuration parameters --- CHANGELOG.md | 2 +- cz.json | 2 +- src/configuration/README.md | 14 +++++++------- src/configuration/configuration.py | 18 +++++++++--------- src/configuration/defaults.py | 6 +++--- src/objective/parametric_function.py | 27 +++++++++++++-------------- src/parrotfish.py | 6 +++--- src/recommendation/recommender.py | 6 +++--- src/sampling/sampler.py | 2 +- 9 files changed, 41 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d0e31b..3c17447 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## Unreleased +## 0.3.0 (2023-10-24) ## 0.1.0 (2023-10-20) diff --git a/cz.json b/cz.json index eb3597e..61bd5f9 100644 --- a/cz.json +++ b/cz.json @@ -3,7 +3,7 @@ "name": "cz_conventional_commits", "tag_format": "$version", "version_scheme": "semver", - "version": "0.1.0", + "version": "0.3.0", "update_changelog_on_bump": true } } \ No newline at end of file diff --git a/src/configuration/README.md b/src/configuration/README.md index dfb061e..0d581e5 100644 --- a/src/configuration/README.md +++ b/src/configuration/README.md @@ -23,18 +23,18 @@ ... "memory_bounds": Array containing two memory values that represent the memory configuration bounds (Optional), "termination_threshold": When the knowledge value for the optimal memory configuration reaches this threshold the recommendation algorithm terminates. (Optional, Default is 3), - "max_sample_count": The maximum size of the sample. (Optional, Default is 20), - "min_invocations": The minimum number of invocations per iteration. (Optional, Default is 4), + "max_total_sample_count": The maximum size of the sample. (Optional, Default is 20), + "min_sample_per_config": The minimum number of invocations per iteration. (Optional, Default is 4, minimum is 2), "dynamic_sampling_params": { - "max_sample_count": The maximum number of samples we gather through dynamically (Default is 8), + "max_sample_per_config": The maximum number of samples we gather through dynamically (Default is 8), "coefficient_of_variation_threshold": When sample dynamically until we find a consistant enough. Consistency is measured by the coefficient of variation, and when the calculated coefficient of variation reaches this threshold we terminate the dynamic sampling (Default is 0.05), } (Optional), "max_number_of_invocation_attempts": The maximum number of attempts per invocation when this number is reached an error is raised. (Optional, Default is 5) - "execution_time_threshold": The execution time threshold constraint. We leverages the execution time model to recommend a configuration - that minimizes cost while adhering to the specified execution time constraint. (Optional) - "cost_tolerance_window": The cost tolerance window (in percent). We leverage the cost model to recommend a configuration that maximizes performance while - increasing the cost by at most X%, where X is the cost tolerance window . (Optional) + "constraint_execution_time_threshold": The execution time threshold constraint. We leverages the execution time model to recommend a configuration + that minimizes cost while adhering to the specified execution time constraint. (Optional, Default is +infinity) + "constraint_cost_tolerance_percent": The cost tolerance window (in percent). We leverage the cost model to recommend a configuration that maximizes performance while + increasing the cost by at most X%, where X is the cost tolerance window . (Optional, Default is 0) } ``` diff --git a/src/configuration/configuration.py b/src/configuration/configuration.py index 7bf632f..eca4e70 100644 --- a/src/configuration/configuration.py +++ b/src/configuration/configuration.py @@ -14,11 +14,11 @@ def __init__(self, config_file: TextIO): # Setup default values self.dynamic_sampling_params = DYNAMIC_SAMPLING_PARAMS self.termination_threshold = TERMINATION_THRESHOLD - self.max_sample_count = MAX_SAMPLE_COUNT - self.min_invocations = MIN_INVOCATIONS + self.max_total_sample_count = MAX_TOTAL_SAMPLE_COUNT + self.min_sample_per_config = MIN_SAMPLE_PER_CONFIG self.max_number_of_invocation_attempts = MAX_NUMBER_OF_INVOCATION_ATTEMPTS - self.execution_time_threshold = None - self.cost_tolerance_window = None + self.constraint_execution_time_threshold = None + self.constraint_cost_tolerance_percent = None self.memory_bounds = None # Parse the configuration file @@ -56,12 +56,12 @@ def _load_config_schema(self): "maxItems": 2, }, "termination_threshold": {"type": "number", "minimum": 0}, - "max_sample_count": {"type": "integer", "minimum": 0}, - "min_invocations": {"type": "integer", "minimum": 0}, + "max_total_sample_count": {"type": "integer", "minimum": 0}, + "min_sample_per_config": {"type": "integer", "minimum": 2}, "dynamic_sampling_params": { "type": "object", "properties": { - "max_sample_count": {"type": "integer", "minimum": 0}, + "max_sample_per_config": {"type": "integer", "minimum": 0}, "coefficient_of_variation_threshold": { "type": "number", "minimum": 0, @@ -69,8 +69,8 @@ def _load_config_schema(self): }, }, "max_number_of_invocation_attempts": {"type": "integer", "minimum": 0}, - "execution_time_threshold": {"type": "integer", "minimum": 1}, - "cost_tolerance_window": {"type": "integer", "minimum": 1}, + "constraint_execution_time_threshold": {"type": "integer", "minimum": 1}, + "constraint_cost_tolerance_percent": {"type": "integer", "minimum": 1}, }, "required": ["function_name", "vendor", "region"], "if": {"not": {"required": ["payload"]}}, diff --git a/src/configuration/defaults.py b/src/configuration/defaults.py index f08293e..1d7a6b3 100644 --- a/src/configuration/defaults.py +++ b/src/configuration/defaults.py @@ -1,12 +1,12 @@ import logging DYNAMIC_SAMPLING_PARAMS = { - "max_sample_count": 8, + "max_sample_per_config": 8, "coefficient_of_variation_threshold": 0.05, } MAX_NUMBER_OF_INVOCATION_ATTEMPTS = 5 -MAX_SAMPLE_COUNT = 20 -MIN_INVOCATIONS = 4 +MAX_TOTAL_SAMPLE_COUNT = 20 +MIN_SAMPLE_PER_CONFIG = 4 TERMINATION_THRESHOLD = 3 LOG_LEVEL = logging.WARNING diff --git a/src/objective/parametric_function.py b/src/objective/parametric_function.py index dc88756..d6210ca 100644 --- a/src/objective/parametric_function.py +++ b/src/objective/parametric_function.py @@ -48,14 +48,15 @@ def fit(self, sample: Sample) -> None: )[0] def minimize( - self, memory_space: np.ndarray, execution_time_threshold: int = None, cost_tolerance_window: int = None + self, memory_space: np.ndarray, constraint_execution_time_threshold: int = None, + constraint_cost_tolerance_percent: int = None ) -> int: """Minimizes the cost function and returns the corresponding memory configuration. Args: memory_space (np.ndarray): The memory space specific to the cloud provider. - execution_time_threshold (int): The execution time threshold constraint. - cost_tolerance_window (int): The cost tolerance window constraint. + constraint_execution_time_threshold (int): The execution time threshold constraint. + constraint_cost_tolerance_percent (int): The cost tolerance window constraint. Returns: int: Memory configuration that minimizes the cost function. @@ -63,26 +64,26 @@ def minimize( costs = self.__call__(memory_space) * memory_space # Handling execution threshold constraint - if execution_time_threshold: + if constraint_execution_time_threshold: try: memory_space, costs = self._filter_execution_time_constraint( - memory_space, costs, execution_time_threshold + memory_space, costs, constraint_execution_time_threshold ) except UnfeasibleConstraintError as e: logger.warning(e) - if cost_tolerance_window: + if constraint_cost_tolerance_percent: execution_times = costs / memory_space - min_index = self._find_min_index_within_tolerance(costs, execution_times, cost_tolerance_window) + min_index = self._find_min_index_within_tolerance(costs, execution_times, constraint_cost_tolerance_percent) else: min_index = np.argmin(costs) return memory_space[min_index] @staticmethod def _find_min_index_within_tolerance(costs: np.ndarray, execution_times: np.ndarray, - cost_tolerance_window: int) -> int: + constraint_cost_tolerance_percent: int) -> int: min_cost = np.min(costs) - min_cost_tolerance_window = min_cost + cost_tolerance_window / 100 * min_cost + min_cost_tolerance_window = min_cost + constraint_cost_tolerance_percent / 100 * min_cost min_index = 0 min_execution_time = np.inf for i in range(len(execution_times)): @@ -96,20 +97,18 @@ def _find_min_index_within_tolerance(costs: np.ndarray, execution_times: np.ndar def _filter_execution_time_constraint( memory_space: np.ndarray, costs: np.ndarray, - execution_time_threshold: int = None, + constraint_execution_time_threshold: int = None, ) -> tuple: filtered_memories = np.array([]) filtered_costs = np.array([]) execution_times = costs / memory_space for i in range(len(execution_times)): - if execution_times[i] <= execution_time_threshold: + if execution_times[i] <= constraint_execution_time_threshold: filtered_memories = np.append(filtered_memories, memory_space[i]) filtered_costs = np.append(filtered_costs, costs[i]) if len(filtered_memories) == 0: - raise UnfeasibleConstraintError( - "The execution time threshold constraint cannot be satisfied" - ) + raise UnfeasibleConstraintError() return filtered_memories, filtered_costs diff --git a/src/parrotfish.py b/src/parrotfish.py index b460218..b0b6f4c 100644 --- a/src/parrotfish.py +++ b/src/parrotfish.py @@ -46,14 +46,14 @@ def __init__(self, config: any): self.sampler = Sampler( explorer=self.explorer, - explorations_count=config.min_invocations, + explorations_count=config.min_sample_per_config, dynamic_sampling_params=config.dynamic_sampling_params, ) self.recommender = Recommender( objective=self.objective, sampler=self.sampler, - max_sample_count=config.max_sample_count, + max_total_sample_count=config.max_total_sample_count, ) def optimize(self, apply: bool = None) -> None: @@ -90,7 +90,7 @@ def _optimize_one_payload(self, entry: dict, collective_costs: np.ndarray) -> in self.param_function(self.explorer.memory_space) * entry["weight"] ) minimum_memory = self.param_function.minimize( - self.explorer.memory_space, self.config.execution_time_threshold, self.config.cost_tolerance_window + self.explorer.memory_space, self.config.constraint_execution_time_threshold, self.config.constraint_cost_tolerance_percent ) self.objective.reset() return minimum_memory diff --git a/src/recommendation/recommender.py b/src/recommendation/recommender.py index 5dff5e0..baeb868 100644 --- a/src/recommendation/recommender.py +++ b/src/recommendation/recommender.py @@ -11,11 +11,11 @@ def __init__( self, objective: Objective, sampler: Sampler, - max_sample_count: int, + max_total_sample_count: int, ): self.objective = objective self.sampler = sampler - self._max_sample_count = max_sample_count + self._max_total_sample_count = max_total_sample_count @property def _is_termination_reached(self) -> bool: @@ -27,7 +27,7 @@ def _is_termination_reached(self) -> bool: sample_count = len(self.sampler.sample) termination_value = self.objective.termination_value return ( - sample_count > self._max_sample_count + sample_count > self._max_total_sample_count or termination_value > self.objective.termination_threshold ) diff --git a/src/sampling/sampler.py b/src/sampling/sampler.py index 039a2d6..de9ff5f 100644 --- a/src/sampling/sampler.py +++ b/src/sampling/sampler.py @@ -123,7 +123,7 @@ def _explore_dynamically(self, durations: list) -> list: min_cv = np.std(durations, ddof=1) / np.mean(durations) while ( - dynamic_sample_count < self._dynamic_sampling_params["max_sample_count"] + dynamic_sample_count < self._dynamic_sampling_params["max_sample_per_config"] and min_cv > self._dynamic_sampling_params["coefficient_of_variation_threshold"] ):