diff --git a/spot/Spot.py b/spot/Spot.py index 06762a4..64b3340 100644 --- a/spot/Spot.py +++ b/spot/Spot.py @@ -11,6 +11,7 @@ from spot.context import Context from spot.benchmark_config import BenchmarkConfig from spot.recommendation_engine.recommendation_engine import RecommendationEngine +from spot.recommendation_engine.utility import Utility from spot.constants import * @@ -54,8 +55,11 @@ def collect_data(self): self.last_log_timestamp = self.log_retriever.get_logs(self.last_log_timestamp) def invoke(self, memory_mb, count): + billed_duration = np.arange(count, dtype=np.double) for i in range(count): - self.recommendation_engine.invoke_once(memory_mb, is_warm=(i > 0)) + df = self.recommendation_engine.invoke_once(memory_mb, is_warm=(i > 0)) + billed_duration[i] = df["Billed Duration"][0] + print("Real cost:", Utility.calculate_cost(np.mean(billed_duration), memory_mb)) def teardown(self, optimization_s): # Just saving the Context for now. diff --git a/spot/recommendation_engine/objectives.py b/spot/recommendation_engine/objectives.py index 8e6597b..b66fd6c 100644 --- a/spot/recommendation_engine/objectives.py +++ b/spot/recommendation_engine/objectives.py @@ -121,3 +121,33 @@ def get_normal_value(self, x, mean, std): return ( self.ratio * stats.norm.pdf(x, mean, std) / stats.norm.pdf(mean, mean, std) ) + + +class FitToRealCostObjective(Objective): + def __init__(self, sampler, memory_range): + super().__init__(sampler, memory_range) + self.ratio = 1 + self.knowledge_values = { + x: 0 for x in range(self.memory_range[0], self.memory_range[1] + 1) + } + + def get_value(self, x): + duration = Utility.fn(x, **self.sampler.function_parameters) + real_cost = duration * x + knowledge = self._get_normalized_knowledge(x) + return real_cost * knowledge + + def update_knowledge(self, x): + for key in self.knowledge_values: + self.knowledge_values[key] += stats.norm.pdf(key, x, 100) / stats.norm.pdf( + x, x, 100 + ) + + def _get_normalized_knowledge(self, x): + if isinstance(x, np.ndarray): + knowledge = np.array([self.knowledge_values[xs] for xs in x]) + else: + knowledge = self.knowledge_values[x] + min_ = np.min(knowledge) + max_ = np.max(knowledge) + return 1.0 + 0.5 * (knowledge - min_) / (max_ - min_) diff --git a/spot/recommendation_engine/recommendation_engine.py b/spot/recommendation_engine/recommendation_engine.py index fac2d8e..228ef50 100644 --- a/spot/recommendation_engine/recommendation_engine.py +++ b/spot/recommendation_engine/recommendation_engine.py @@ -3,13 +3,7 @@ import numpy as np import pandas as pd -from spot.recommendation_engine.objectives import ( - NormalObjective, - SkewedNormalObjective, - DynamicNormalObjective, - DynamicSTDNormalObjective1, - DynamicSTDNormalObjective2, -) +from spot.recommendation_engine.objectives import * from spot.recommendation_engine.utility import Utility from spot.constants import * @@ -26,7 +20,7 @@ def __init__(self, invocator, payload_path, memory_range): self.payload_path = payload_path self.function_invocator = invocator self.sampled_datapoints = [] - self.sampled_points = 0 + self.sampled_point_count = 0 self.fitted_function = None self.function_parameters = {} self.function_degree = 2 @@ -41,6 +35,9 @@ def __init__(self, invocator, payload_path, memory_range): self.objective = DynamicSTDNormalObjective1(self, self.memory_range) elif OPTIMIZATION_OBJECTIVE == "dynamic_std2": self.objective = DynamicSTDNormalObjective2(self, self.memory_range) + elif OPTIMIZATION_OBJECTIVE == "fit_to_real_cost": + assert len(INITIAL_SAMPLE_MEMORIES) == 3 + self.objective = FitToRealCostObjective(self, self.memory_range) self.exploration_cost = 0 @@ -53,15 +50,15 @@ def sampled_memories_count(self): def run(self): self.initial_sample() - self.sampled_points = 2 + self.sampled_point_count = 2 while ( self.sampled_memories_count < TOTAL_SAMPLE_COUNT and self.objective.ratio > KNOWLEDGE_RATIO ): - x = self.choose_sample_point() + x = self._choose_sample_point() self.sample(x) - self.sampled_points += 1 - self.function_degree = self.sampled_points + self.sampled_point_count += 1 + self.function_degree = min(self.sampled_point_count, 4) self.fitted_function, self.function_parameters = Utility.fit_function( self.sampled_datapoints, degree=self.function_degree ) @@ -108,8 +105,8 @@ def sample(self, x): payload_filename=self.payload_path, save_to_ctx=False, ) - for value in result["Billed Duration"].tolist(): - self.exploration_cost += Utility.calculate_cost(value, x) + durations = result["Billed Duration"].to_numpy() + self.exploration_cost += np.sum(Utility.calculate_cost(durations, x)) result = self.function_invocator.invoke( invocation_count=DYNAMIC_SAMPLING_INITIAL_STEP, parallelism=DYNAMIC_SAMPLING_INITIAL_STEP, @@ -129,9 +126,18 @@ def sample(self, x): payload_filename=self.payload_path, ) values.append(result.iloc[0]["Billed Duration"]) - for value in values: + + if len(values) > 2: + values.sort() + selected_values = values[len(values) // 2 - 1 : len(values) // 2] + else: + selected_values = values + + self.exploration_cost += np.sum(Utility.calculate_cost(np.array(values), x)) + + for value in selected_values: self.sampled_datapoints.append(DataPoint(memory=x, billed_time=value)) - self.exploration_cost += Utility.calculate_cost(value, x) + print(f"finished sampling {x} with {len(values)} samples") self.objective.update_knowledge(x) @@ -153,15 +159,11 @@ def invoke_once(self, memory_mb, is_warm=True): ) return result - def choose_sample_point(self): - max_value = self.memory_range[0] - max_obj = np.inf - for value in self._remainder_memories(): - obj = self.objective.get_value(value) - if obj < max_obj: - max_value = value - max_obj = obj - return max_value + def _choose_sample_point(self): + mems = np.array(self._remainder_memories(), dtype=np.double) + values = self.objective.get_value(mems) + index = np.argmin(values) + return int(mems[index]) def _remainder_memories(self): memories = range(self.memory_range[0], self.memory_range[1] + 1) diff --git a/spot/recommendation_engine/utility.py b/spot/recommendation_engine/utility.py index 7b5e74f..a159678 100644 --- a/spot/recommendation_engine/utility.py +++ b/spot/recommendation_engine/utility.py @@ -12,14 +12,10 @@ def __init__(self, memory, billed_time): class Utility: @staticmethod def find_minimum_memory_cost(f, params, memory_range): - min_cost = np.inf - min_memory = 0 - for memory in range(memory_range[0], memory_range[1] + 1): - cost = Utility.calculate_cost(f(memory, **params), memory) - if cost < min_cost: - min_cost = cost - min_memory = memory - return min_memory, min_cost + mems = np.arange(memory_range[0], memory_range[1] + 1, dtype=np.double) + costs = Utility.calculate_cost(f(mems, **params), mems) + min_index = np.argmin(costs) + return mems[min_index], costs[min_index] @staticmethod def calculate_cost(duration, memory): @@ -36,12 +32,8 @@ def cv(l): @staticmethod def check_function_validity(f, params, memory_range): - if all(v >= 0 for v in params.values()): - return True - for x in range(memory_range[0], memory_range[1] + 1): - if f(x, **params) < 0: - return False - return True + mems = np.arange(memory_range[0], memory_range[1] + 1) + return np.all(f(mems, **params) >= 0) @staticmethod def fit_function(datapoints, degree):