From c1c53d849c91e78484be7b64cee9ed3c6eb6bb2c Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Thu, 15 Dec 2022 18:06:47 -0800 Subject: [PATCH 1/8] add FitToRealCostObjective --- spot/recommendation_engine/objectives.py | 16 +++++++++++ .../recommendation_engine.py | 27 +++++++------------ 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/spot/recommendation_engine/objectives.py b/spot/recommendation_engine/objectives.py index 8e6597b..4ea40aa 100644 --- a/spot/recommendation_engine/objectives.py +++ b/spot/recommendation_engine/objectives.py @@ -121,3 +121,19 @@ def get_normal_value(self, x, mean, std): return ( self.ratio * stats.norm.pdf(x, mean, std) / stats.norm.pdf(mean, mean, std) ) + + +class FitToRealCostObjective(Objective): + def __init__(self, sampler, memory_range): + super().__init__(sampler, memory_range) + self.ratio = 1 + + def get_value(self, x): + duration = Utility.fn(x, **self.sampler.function_parameters) + real_cost = duration * x + if isinstance(x, np.ndarray): + assert np.all(x > 0) + return real_cost + + def update_knowledge(self, x): + pass diff --git a/spot/recommendation_engine/recommendation_engine.py b/spot/recommendation_engine/recommendation_engine.py index fac2d8e..43684ed 100644 --- a/spot/recommendation_engine/recommendation_engine.py +++ b/spot/recommendation_engine/recommendation_engine.py @@ -3,13 +3,7 @@ import numpy as np import pandas as pd -from spot.recommendation_engine.objectives import ( - NormalObjective, - SkewedNormalObjective, - DynamicNormalObjective, - DynamicSTDNormalObjective1, - DynamicSTDNormalObjective2, -) +from spot.recommendation_engine.objectives import * from spot.recommendation_engine.utility import Utility from spot.constants import * @@ -41,6 +35,9 @@ def __init__(self, invocator, payload_path, memory_range): self.objective = DynamicSTDNormalObjective1(self, self.memory_range) elif OPTIMIZATION_OBJECTIVE == "dynamic_std2": self.objective = DynamicSTDNormalObjective2(self, self.memory_range) + elif OPTIMIZATION_OBJECTIVE == "fit_to_real_cost": + assert len(INITIAL_SAMPLE_MEMORIES) == 3 + self.objective = FitToRealCostObjective(self, self.memory_range) self.exploration_cost = 0 @@ -58,7 +55,7 @@ def run(self): self.sampled_memories_count < TOTAL_SAMPLE_COUNT and self.objective.ratio > KNOWLEDGE_RATIO ): - x = self.choose_sample_point() + x = self._choose_sample_point() self.sample(x) self.sampled_points += 1 self.function_degree = self.sampled_points @@ -153,15 +150,11 @@ def invoke_once(self, memory_mb, is_warm=True): ) return result - def choose_sample_point(self): - max_value = self.memory_range[0] - max_obj = np.inf - for value in self._remainder_memories(): - obj = self.objective.get_value(value) - if obj < max_obj: - max_value = value - max_obj = obj - return max_value + def _choose_sample_point(self): + mems = np.array(self._remainder_memories()) + values = self.objective.get_value(mems) + index = np.argmin(values) + return int(mems[index]) def _remainder_memories(self): memories = range(self.memory_range[0], self.memory_range[1] + 1) From 5d83be07012e614ac86c1e59c153d455c55eee18 Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Thu, 15 Dec 2022 18:22:49 -0800 Subject: [PATCH 2/8] add real cost logging --- spot/Spot.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/spot/Spot.py b/spot/Spot.py index 06762a4..ba732fe 100644 --- a/spot/Spot.py +++ b/spot/Spot.py @@ -11,6 +11,7 @@ from spot.context import Context from spot.benchmark_config import BenchmarkConfig from spot.recommendation_engine.recommendation_engine import RecommendationEngine +from spot.recommendation_engine.utility import Utility from spot.constants import * @@ -54,8 +55,11 @@ def collect_data(self): self.last_log_timestamp = self.log_retriever.get_logs(self.last_log_timestamp) def invoke(self, memory_mb, count): + billed_duration = np.arange(count, dtype=float) for i in range(count): - self.recommendation_engine.invoke_once(memory_mb, is_warm=(i > 0)) + df = self.recommendation_engine.invoke_once(memory_mb, is_warm=(i > 0)) + billed_duration[i] = df["Billed Duration"][0] + print("Real cost:", Utility.calculate_cost(np.mean(billed_duration), memory_mb)) def teardown(self, optimization_s): # Just saving the Context for now. From 2db4b099a85dd986643a81ffb208ce908a1be5ee Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Thu, 15 Dec 2022 20:17:35 -0800 Subject: [PATCH 3/8] penalize with knowledge_values --- spot/recommendation_engine/objectives.py | 18 ++++++++++++++++-- .../recommendation_engine.py | 13 +++++++++++-- spot/recommendation_engine/utility.py | 12 ++++-------- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/spot/recommendation_engine/objectives.py b/spot/recommendation_engine/objectives.py index 4ea40aa..30e23d9 100644 --- a/spot/recommendation_engine/objectives.py +++ b/spot/recommendation_engine/objectives.py @@ -127,13 +127,27 @@ class FitToRealCostObjective(Objective): def __init__(self, sampler, memory_range): super().__init__(sampler, memory_range) self.ratio = 1 + self.knowledge_values = { + x: 0 for x in range(self.memory_range[0], self.memory_range[1] + 1) + } def get_value(self, x): duration = Utility.fn(x, **self.sampler.function_parameters) real_cost = duration * x if isinstance(x, np.ndarray): assert np.all(x > 0) - return real_cost + knowledge = self._get_normalized_knowledge(x) + return real_cost * knowledge def update_knowledge(self, x): - pass + for key in self.knowledge_values: + self.knowledge_values[key] += stats.norm.pdf(key, x, 50) / stats.norm.pdf(x, x, 50) + + def _get_normalized_knowledge(self, x): + if isinstance(x, np.ndarray): + knowledge = np.array([self.knowledge_values[xs] for xs in x]) + else: + knowledge = self.knowledge[x] + min_ = np.min(knowledge) + max_ = np.max(knowledge) + return 1. + (knowledge - min_) / (max_ - min_) diff --git a/spot/recommendation_engine/recommendation_engine.py b/spot/recommendation_engine/recommendation_engine.py index 43684ed..c3c11a4 100644 --- a/spot/recommendation_engine/recommendation_engine.py +++ b/spot/recommendation_engine/recommendation_engine.py @@ -126,9 +126,18 @@ def sample(self, x): payload_filename=self.payload_path, ) values.append(result.iloc[0]["Billed Duration"]) - for value in values: + + if len(values) > 2: + values.sort() + selected_values = values[len(values)//2 - 1:len(values)//2] + else: + selected_values = values + + self.exploration_cost += np.sum(Utility.calculate_cost(np.array(values), x)) + + for value in selected_values: self.sampled_datapoints.append(DataPoint(memory=x, billed_time=value)) - self.exploration_cost += Utility.calculate_cost(value, x) + print(f"finished sampling {x} with {len(values)} samples") self.objective.update_knowledge(x) diff --git a/spot/recommendation_engine/utility.py b/spot/recommendation_engine/utility.py index 7b5e74f..5c18dee 100644 --- a/spot/recommendation_engine/utility.py +++ b/spot/recommendation_engine/utility.py @@ -12,14 +12,10 @@ def __init__(self, memory, billed_time): class Utility: @staticmethod def find_minimum_memory_cost(f, params, memory_range): - min_cost = np.inf - min_memory = 0 - for memory in range(memory_range[0], memory_range[1] + 1): - cost = Utility.calculate_cost(f(memory, **params), memory) - if cost < min_cost: - min_cost = cost - min_memory = memory - return min_memory, min_cost + mems = np.arange(memory_range[0], memory_range[1] + 1, dtype=float) + costs = Utility.calculate_cost(f(mems, **params), mems) + min_index = np.argmin(costs) + return mems[min_index], costs[min_index] @staticmethod def calculate_cost(duration, memory): From 05efdab29a4446094148ea4f029e5eef46a9c83d Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Thu, 15 Dec 2022 22:49:25 -0800 Subject: [PATCH 4/8] use numpy for speedup --- spot/recommendation_engine/objectives.py | 2 -- spot/recommendation_engine/recommendation_engine.py | 6 +++--- spot/recommendation_engine/utility.py | 8 ++------ 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/spot/recommendation_engine/objectives.py b/spot/recommendation_engine/objectives.py index 30e23d9..7b3d764 100644 --- a/spot/recommendation_engine/objectives.py +++ b/spot/recommendation_engine/objectives.py @@ -134,8 +134,6 @@ def __init__(self, sampler, memory_range): def get_value(self, x): duration = Utility.fn(x, **self.sampler.function_parameters) real_cost = duration * x - if isinstance(x, np.ndarray): - assert np.all(x > 0) knowledge = self._get_normalized_knowledge(x) return real_cost * knowledge diff --git a/spot/recommendation_engine/recommendation_engine.py b/spot/recommendation_engine/recommendation_engine.py index c3c11a4..4ac644f 100644 --- a/spot/recommendation_engine/recommendation_engine.py +++ b/spot/recommendation_engine/recommendation_engine.py @@ -105,8 +105,8 @@ def sample(self, x): payload_filename=self.payload_path, save_to_ctx=False, ) - for value in result["Billed Duration"].tolist(): - self.exploration_cost += Utility.calculate_cost(value, x) + durations = result["Billed Duration"].to_numpy() + self.exploration_cost += np.sum(Utility.calculate_cost(durations, x)) result = self.function_invocator.invoke( invocation_count=DYNAMIC_SAMPLING_INITIAL_STEP, parallelism=DYNAMIC_SAMPLING_INITIAL_STEP, @@ -160,7 +160,7 @@ def invoke_once(self, memory_mb, is_warm=True): return result def _choose_sample_point(self): - mems = np.array(self._remainder_memories()) + mems = np.array(self._remainder_memories(), dtype=float) values = self.objective.get_value(mems) index = np.argmin(values) return int(mems[index]) diff --git a/spot/recommendation_engine/utility.py b/spot/recommendation_engine/utility.py index 5c18dee..d59413d 100644 --- a/spot/recommendation_engine/utility.py +++ b/spot/recommendation_engine/utility.py @@ -32,12 +32,8 @@ def cv(l): @staticmethod def check_function_validity(f, params, memory_range): - if all(v >= 0 for v in params.values()): - return True - for x in range(memory_range[0], memory_range[1] + 1): - if f(x, **params) < 0: - return False - return True + mems = np.arange(memory_range[0], memory_range[1] + 1) + return np.all(f(mems, **params) >= 0) @staticmethod def fit_function(datapoints, degree): From 9ec300cdb45978acb8b7dc064b863be02906aecd Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Thu, 15 Dec 2022 23:07:21 -0800 Subject: [PATCH 5/8] wip --- spot/Spot.py | 2 +- spot/recommendation_engine/objectives.py | 4 ++-- spot/recommendation_engine/recommendation_engine.py | 10 +++++----- spot/recommendation_engine/utility.py | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/spot/Spot.py b/spot/Spot.py index ba732fe..64b3340 100644 --- a/spot/Spot.py +++ b/spot/Spot.py @@ -55,7 +55,7 @@ def collect_data(self): self.last_log_timestamp = self.log_retriever.get_logs(self.last_log_timestamp) def invoke(self, memory_mb, count): - billed_duration = np.arange(count, dtype=float) + billed_duration = np.arange(count, dtype=np.double) for i in range(count): df = self.recommendation_engine.invoke_once(memory_mb, is_warm=(i > 0)) billed_duration[i] = df["Billed Duration"][0] diff --git a/spot/recommendation_engine/objectives.py b/spot/recommendation_engine/objectives.py index 7b3d764..813de3a 100644 --- a/spot/recommendation_engine/objectives.py +++ b/spot/recommendation_engine/objectives.py @@ -139,7 +139,7 @@ def get_value(self, x): def update_knowledge(self, x): for key in self.knowledge_values: - self.knowledge_values[key] += stats.norm.pdf(key, x, 50) / stats.norm.pdf(x, x, 50) + self.knowledge_values[key] += stats.norm.pdf(key, x, 20) / stats.norm.pdf(x, x, 20) def _get_normalized_knowledge(self, x): if isinstance(x, np.ndarray): @@ -148,4 +148,4 @@ def _get_normalized_knowledge(self, x): knowledge = self.knowledge[x] min_ = np.min(knowledge) max_ = np.max(knowledge) - return 1. + (knowledge - min_) / (max_ - min_) + return 1. + 2. * (knowledge - min_) / (max_ - min_) diff --git a/spot/recommendation_engine/recommendation_engine.py b/spot/recommendation_engine/recommendation_engine.py index 4ac644f..ad738e2 100644 --- a/spot/recommendation_engine/recommendation_engine.py +++ b/spot/recommendation_engine/recommendation_engine.py @@ -20,7 +20,7 @@ def __init__(self, invocator, payload_path, memory_range): self.payload_path = payload_path self.function_invocator = invocator self.sampled_datapoints = [] - self.sampled_points = 0 + self.sampled_point_count = 0 self.fitted_function = None self.function_parameters = {} self.function_degree = 2 @@ -50,15 +50,15 @@ def sampled_memories_count(self): def run(self): self.initial_sample() - self.sampled_points = 2 + self.sampled_point_count = 2 while ( self.sampled_memories_count < TOTAL_SAMPLE_COUNT and self.objective.ratio > KNOWLEDGE_RATIO ): x = self._choose_sample_point() self.sample(x) - self.sampled_points += 1 - self.function_degree = self.sampled_points + self.sampled_point_count += 1 + self.function_degree = min(self.sampled_point_count, 4) self.fitted_function, self.function_parameters = Utility.fit_function( self.sampled_datapoints, degree=self.function_degree ) @@ -160,7 +160,7 @@ def invoke_once(self, memory_mb, is_warm=True): return result def _choose_sample_point(self): - mems = np.array(self._remainder_memories(), dtype=float) + mems = np.array(self._remainder_memories(), dtype=np.double) values = self.objective.get_value(mems) index = np.argmin(values) return int(mems[index]) diff --git a/spot/recommendation_engine/utility.py b/spot/recommendation_engine/utility.py index d59413d..a159678 100644 --- a/spot/recommendation_engine/utility.py +++ b/spot/recommendation_engine/utility.py @@ -12,7 +12,7 @@ def __init__(self, memory, billed_time): class Utility: @staticmethod def find_minimum_memory_cost(f, params, memory_range): - mems = np.arange(memory_range[0], memory_range[1] + 1, dtype=float) + mems = np.arange(memory_range[0], memory_range[1] + 1, dtype=np.double) costs = Utility.calculate_cost(f(mems, **params), mems) min_index = np.argmin(costs) return mems[min_index], costs[min_index] From ec3a3cdbc13978f652d63a4fb61b9e0d80ef8d57 Mon Sep 17 00:00:00 2001 From: Github Actions Bot <> Date: Fri, 16 Dec 2022 08:08:17 +0000 Subject: [PATCH 6/8] Format with Black --- spot/recommendation_engine/objectives.py | 6 ++++-- spot/recommendation_engine/recommendation_engine.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/spot/recommendation_engine/objectives.py b/spot/recommendation_engine/objectives.py index 813de3a..f5f638c 100644 --- a/spot/recommendation_engine/objectives.py +++ b/spot/recommendation_engine/objectives.py @@ -139,7 +139,9 @@ def get_value(self, x): def update_knowledge(self, x): for key in self.knowledge_values: - self.knowledge_values[key] += stats.norm.pdf(key, x, 20) / stats.norm.pdf(x, x, 20) + self.knowledge_values[key] += stats.norm.pdf(key, x, 20) / stats.norm.pdf( + x, x, 20 + ) def _get_normalized_knowledge(self, x): if isinstance(x, np.ndarray): @@ -148,4 +150,4 @@ def _get_normalized_knowledge(self, x): knowledge = self.knowledge[x] min_ = np.min(knowledge) max_ = np.max(knowledge) - return 1. + 2. * (knowledge - min_) / (max_ - min_) + return 1.0 + 2.0 * (knowledge - min_) / (max_ - min_) diff --git a/spot/recommendation_engine/recommendation_engine.py b/spot/recommendation_engine/recommendation_engine.py index ad738e2..228ef50 100644 --- a/spot/recommendation_engine/recommendation_engine.py +++ b/spot/recommendation_engine/recommendation_engine.py @@ -129,7 +129,7 @@ def sample(self, x): if len(values) > 2: values.sort() - selected_values = values[len(values)//2 - 1:len(values)//2] + selected_values = values[len(values) // 2 - 1 : len(values) // 2] else: selected_values = values From 7c3092063ad5f7098cbd75fcfc8e1b56b0e49d9c Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Fri, 16 Dec 2022 20:16:44 +0000 Subject: [PATCH 7/8] update parameters --- spot/recommendation_engine/objectives.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spot/recommendation_engine/objectives.py b/spot/recommendation_engine/objectives.py index 813de3a..11f5321 100644 --- a/spot/recommendation_engine/objectives.py +++ b/spot/recommendation_engine/objectives.py @@ -139,13 +139,13 @@ def get_value(self, x): def update_knowledge(self, x): for key in self.knowledge_values: - self.knowledge_values[key] += stats.norm.pdf(key, x, 20) / stats.norm.pdf(x, x, 20) + self.knowledge_values[key] += stats.norm.pdf(key, x, 100) / stats.norm.pdf(x, x, 100) def _get_normalized_knowledge(self, x): if isinstance(x, np.ndarray): knowledge = np.array([self.knowledge_values[xs] for xs in x]) else: - knowledge = self.knowledge[x] + knowledge = self.knowledge_values[x] min_ = np.min(knowledge) max_ = np.max(knowledge) - return 1. + 2. * (knowledge - min_) / (max_ - min_) + return 1. + .5 * (knowledge - min_) / (max_ - min_) From 5fecfa6f51878bdff8d6321f2f4bb84fd819f4b2 Mon Sep 17 00:00:00 2001 From: Github Actions Bot <> Date: Fri, 16 Dec 2022 20:19:03 +0000 Subject: [PATCH 8/8] Format with Black --- spot/recommendation_engine/objectives.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/spot/recommendation_engine/objectives.py b/spot/recommendation_engine/objectives.py index 11f5321..b66fd6c 100644 --- a/spot/recommendation_engine/objectives.py +++ b/spot/recommendation_engine/objectives.py @@ -139,7 +139,9 @@ def get_value(self, x): def update_knowledge(self, x): for key in self.knowledge_values: - self.knowledge_values[key] += stats.norm.pdf(key, x, 100) / stats.norm.pdf(x, x, 100) + self.knowledge_values[key] += stats.norm.pdf(key, x, 100) / stats.norm.pdf( + x, x, 100 + ) def _get_normalized_knowledge(self, x): if isinstance(x, np.ndarray): @@ -148,4 +150,4 @@ def _get_normalized_knowledge(self, x): knowledge = self.knowledge_values[x] min_ = np.min(knowledge) max_ = np.max(knowledge) - return 1. + .5 * (knowledge - min_) / (max_ - min_) + return 1.0 + 0.5 * (knowledge - min_) / (max_ - min_)