fix: fixed configuration parameters naming (#158)

* fix: better naming for configuration parameters
ubc-cirrus-lab · Oct 26, 2023 · a210df5 · a210df5
1 parent 1121f0c
commit a210df5
Show file tree

Hide file tree

Showing 9 changed files with 41 additions and 42 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,4 +1,4 @@
-## Unreleased
+## 0.3.0 (2023-10-24)
 
 ## 0.1.0 (2023-10-20)
 

diff --git a/cz.json b/cz.json
@@ -3,7 +3,7 @@
     "name": "cz_conventional_commits",
     "tag_format": "$version",
     "version_scheme": "semver",
-    "version": "0.1.0",
+    "version": "0.3.0",
     "update_changelog_on_bump": true
   }
 }
diff --git a/src/configuration/README.md b/src/configuration/README.md
@@ -23,18 +23,18 @@
     ...
     "memory_bounds": Array containing two memory values that represent the memory configuration bounds (Optional),
     "termination_threshold": When the knowledge value for the optimal memory configuration reaches this threshold the recommendation algorithm terminates. (Optional, Default is 3),
-    "max_sample_count": The maximum size of the sample. (Optional, Default is 20),
-    "min_invocations": The minimum number of invocations per iteration. (Optional, Default is 4),
+    "max_total_sample_count": The maximum size of the sample. (Optional, Default is 20),
+    "min_sample_per_config": The minimum number of invocations per iteration. (Optional, Default is 4, minimum is 2),
     "dynamic_sampling_params": {
-        "max_sample_count": The maximum number of samples we gather through dynamically (Default is 8),
+        "max_sample_per_config": The maximum number of samples we gather through dynamically (Default is 8),
         "coefficient_of_variation_threshold": When sample dynamically until we find a consistant enough. Consistency is measured by the coefficient of variation, 
                                               and when the calculated coefficient of variation reaches this threshold we terminate the dynamic sampling (Default is 0.05),
     } (Optional),
     "max_number_of_invocation_attempts": The maximum number of attempts per invocation when this number is reached an error is raised. (Optional, Default is 5)
-    "execution_time_threshold": The execution time threshold constraint. We leverages the execution time model to recommend a configuration 
-                                that minimizes cost while adhering to the specified execution time constraint. (Optional)
-    "cost_tolerance_window": The cost tolerance window (in percent). We leverage the cost model to recommend a configuration that maximizes performance while 
-                             increasing the cost by at most X%, where X is the cost tolerance window . (Optional)
+    "constraint_execution_time_threshold": The execution time threshold constraint. We leverages the execution time model to recommend a configuration 
+                                that minimizes cost while adhering to the specified execution time constraint. (Optional, Default is +infinity)
+    "constraint_cost_tolerance_percent": The cost tolerance window (in percent). We leverage the cost model to recommend a configuration that maximizes performance while 
+                             increasing the cost by at most X%, where X is the cost tolerance window . (Optional, Default is 0)
 }
 ```
 

diff --git a/src/configuration/configuration.py b/src/configuration/configuration.py
@@ -14,11 +14,11 @@ def __init__(self, config_file: TextIO):
         # Setup default values
         self.dynamic_sampling_params = DYNAMIC_SAMPLING_PARAMS
         self.termination_threshold = TERMINATION_THRESHOLD
-        self.max_sample_count = MAX_SAMPLE_COUNT
-        self.min_invocations = MIN_INVOCATIONS
+        self.max_total_sample_count = MAX_TOTAL_SAMPLE_COUNT
+        self.min_sample_per_config = MIN_SAMPLE_PER_CONFIG
         self.max_number_of_invocation_attempts = MAX_NUMBER_OF_INVOCATION_ATTEMPTS
-        self.execution_time_threshold = None
-        self.cost_tolerance_window = None
+        self.constraint_execution_time_threshold = None
+        self.constraint_cost_tolerance_percent = None
         self.memory_bounds = None
 
         # Parse the configuration file
@@ -56,21 +56,21 @@ def _load_config_schema(self):
                     "maxItems": 2,
                 },
                 "termination_threshold": {"type": "number", "minimum": 0},
-                "max_sample_count": {"type": "integer", "minimum": 0},
-                "min_invocations": {"type": "integer", "minimum": 0},
+                "max_total_sample_count": {"type": "integer", "minimum": 0},
+                "min_sample_per_config": {"type": "integer", "minimum": 2},
                 "dynamic_sampling_params": {
                     "type": "object",
                     "properties": {
-                        "max_sample_count": {"type": "integer", "minimum": 0},
+                        "max_sample_per_config": {"type": "integer", "minimum": 0},
                         "coefficient_of_variation_threshold": {
                             "type": "number",
                             "minimum": 0,
                         },
                     },
                 },
                 "max_number_of_invocation_attempts": {"type": "integer", "minimum": 0},
-                "execution_time_threshold": {"type": "integer", "minimum": 1},
-                "cost_tolerance_window": {"type": "integer", "minimum": 1},
+                "constraint_execution_time_threshold": {"type": "integer", "minimum": 1},
+                "constraint_cost_tolerance_percent": {"type": "integer", "minimum": 1},
             },
             "required": ["function_name", "vendor", "region"],
             "if": {"not": {"required": ["payload"]}},

diff --git a/src/configuration/defaults.py b/src/configuration/defaults.py
@@ -1,12 +1,12 @@
 import logging
 
 DYNAMIC_SAMPLING_PARAMS = {
-    "max_sample_count": 8,
+    "max_sample_per_config": 8,
     "coefficient_of_variation_threshold": 0.05,
 }
 MAX_NUMBER_OF_INVOCATION_ATTEMPTS = 5
-MAX_SAMPLE_COUNT = 20
-MIN_INVOCATIONS = 4
+MAX_TOTAL_SAMPLE_COUNT = 20
+MIN_SAMPLE_PER_CONFIG = 4
 TERMINATION_THRESHOLD = 3
 
 LOG_LEVEL = logging.WARNING
diff --git a/src/objective/parametric_function.py b/src/objective/parametric_function.py
@@ -48,41 +48,42 @@ def fit(self, sample: Sample) -> None:
         )[0]
 
     def minimize(
-            self, memory_space: np.ndarray, execution_time_threshold: int = None, cost_tolerance_window: int = None
+            self, memory_space: np.ndarray, constraint_execution_time_threshold: int = None,
+            constraint_cost_tolerance_percent: int = None
     ) -> int:
         """Minimizes the cost function and returns the corresponding memory configuration.
 
         Args:
             memory_space (np.ndarray): The memory space specific to the cloud provider.
-            execution_time_threshold (int): The execution time threshold constraint.
-            cost_tolerance_window (int): The cost tolerance window constraint.
+            constraint_execution_time_threshold (int): The execution time threshold constraint.
+            constraint_cost_tolerance_percent (int): The cost tolerance window constraint.
 
         Returns:
             int: Memory configuration that minimizes the cost function.
         """
         costs = self.__call__(memory_space) * memory_space
 
         # Handling execution threshold constraint
-        if execution_time_threshold:
+        if constraint_execution_time_threshold:
             try:
                 memory_space, costs = self._filter_execution_time_constraint(
-                    memory_space, costs, execution_time_threshold
+                    memory_space, costs, constraint_execution_time_threshold
                 )
             except UnfeasibleConstraintError as e:
                 logger.warning(e)
 
-        if cost_tolerance_window:
+        if constraint_cost_tolerance_percent:
             execution_times = costs / memory_space
-            min_index = self._find_min_index_within_tolerance(costs, execution_times, cost_tolerance_window)
+            min_index = self._find_min_index_within_tolerance(costs, execution_times, constraint_cost_tolerance_percent)
         else:
             min_index = np.argmin(costs)
         return memory_space[min_index]
 
     @staticmethod
     def _find_min_index_within_tolerance(costs: np.ndarray, execution_times: np.ndarray,
-                                         cost_tolerance_window: int) -> int:
+                                         constraint_cost_tolerance_percent: int) -> int:
         min_cost = np.min(costs)
-        min_cost_tolerance_window = min_cost + cost_tolerance_window / 100 * min_cost
+        min_cost_tolerance_window = min_cost + constraint_cost_tolerance_percent / 100 * min_cost
         min_index = 0
         min_execution_time = np.inf
         for i in range(len(execution_times)):
@@ -96,20 +97,18 @@ def _find_min_index_within_tolerance(costs: np.ndarray, execution_times: np.ndar
     def _filter_execution_time_constraint(
             memory_space: np.ndarray,
             costs: np.ndarray,
-            execution_time_threshold: int = None,
+            constraint_execution_time_threshold: int = None,
     ) -> tuple:
         filtered_memories = np.array([])
         filtered_costs = np.array([])
         execution_times = costs / memory_space
 
         for i in range(len(execution_times)):
-            if execution_times[i] <= execution_time_threshold:
+            if execution_times[i] <= constraint_execution_time_threshold:
                 filtered_memories = np.append(filtered_memories, memory_space[i])
                 filtered_costs = np.append(filtered_costs, costs[i])
 
         if len(filtered_memories) == 0:
-            raise UnfeasibleConstraintError(
-                "The execution time threshold constraint cannot be satisfied"
-            )
+            raise UnfeasibleConstraintError()
 
         return filtered_memories, filtered_costs
diff --git a/src/parrotfish.py b/src/parrotfish.py
@@ -46,14 +46,14 @@ def __init__(self, config: any):
 
         self.sampler = Sampler(
             explorer=self.explorer,
-            explorations_count=config.min_invocations,
+            explorations_count=config.min_sample_per_config,
             dynamic_sampling_params=config.dynamic_sampling_params,
         )
 
         self.recommender = Recommender(
             objective=self.objective,
             sampler=self.sampler,
-            max_sample_count=config.max_sample_count,
+            max_total_sample_count=config.max_total_sample_count,
         )
 
     def optimize(self, apply: bool = None) -> None:
@@ -90,7 +90,7 @@ def _optimize_one_payload(self, entry: dict, collective_costs: np.ndarray) -> in
             self.param_function(self.explorer.memory_space) * entry["weight"]
         )
         minimum_memory = self.param_function.minimize(
-            self.explorer.memory_space, self.config.execution_time_threshold, self.config.cost_tolerance_window
+            self.explorer.memory_space, self.config.constraint_execution_time_threshold, self.config.constraint_cost_tolerance_percent
         )
         self.objective.reset()
         return minimum_memory

diff --git a/src/recommendation/recommender.py b/src/recommendation/recommender.py
@@ -11,11 +11,11 @@ def __init__(
         self,
         objective: Objective,
         sampler: Sampler,
-        max_sample_count: int,
+        max_total_sample_count: int,
     ):
         self.objective = objective
         self.sampler = sampler
-        self._max_sample_count = max_sample_count
+        self._max_total_sample_count = max_total_sample_count
 
     @property
     def _is_termination_reached(self) -> bool:
@@ -27,7 +27,7 @@ def _is_termination_reached(self) -> bool:
         sample_count = len(self.sampler.sample)
         termination_value = self.objective.termination_value
         return (
-            sample_count > self._max_sample_count
+            sample_count > self._max_total_sample_count
             or termination_value > self.objective.termination_threshold
         )
 

diff --git a/src/sampling/sampler.py b/src/sampling/sampler.py
@@ -123,7 +123,7 @@ def _explore_dynamically(self, durations: list) -> list:
         min_cv = np.std(durations, ddof=1) / np.mean(durations)
 
         while (
-            dynamic_sample_count < self._dynamic_sampling_params["max_sample_count"]
+            dynamic_sample_count < self._dynamic_sampling_params["max_sample_per_config"]
             and min_cv
             > self._dynamic_sampling_params["coefficient_of_variation_threshold"]
         ):