ubc-cirrus-lab · arshiamoghimi · Dec 16, 2022 · Dec 16, 2022 · Dec 16, 2022 · Dec 16, 2022
diff --git a/spot/Spot.py b/spot/Spot.py
@@ -11,6 +11,7 @@
 from spot.context import Context
 from spot.benchmark_config import BenchmarkConfig
 from spot.recommendation_engine.recommendation_engine import RecommendationEngine
+from spot.recommendation_engine.utility import Utility
 from spot.constants import *
 
 
@@ -54,8 +55,11 @@ def collect_data(self):
         self.last_log_timestamp = self.log_retriever.get_logs(self.last_log_timestamp)
 
     def invoke(self, memory_mb, count):
+        billed_duration = np.arange(count, dtype=np.double)
         for i in range(count):
-            self.recommendation_engine.invoke_once(memory_mb, is_warm=(i > 0))
+            df = self.recommendation_engine.invoke_once(memory_mb, is_warm=(i > 0))
+            billed_duration[i] = df["Billed Duration"][0]
+        print("Real cost:", Utility.calculate_cost(np.mean(billed_duration), memory_mb))
 
     def teardown(self, optimization_s):
         # Just saving the Context for now.

diff --git a/spot/recommendation_engine/objectives.py b/spot/recommendation_engine/objectives.py
@@ -121,3 +121,33 @@ def get_normal_value(self, x, mean, std):
         return (
             self.ratio * stats.norm.pdf(x, mean, std) / stats.norm.pdf(mean, mean, std)
         )
+
+
+class FitToRealCostObjective(Objective):
+    def __init__(self, sampler, memory_range):
+        super().__init__(sampler, memory_range)
+        self.ratio = 1
+        self.knowledge_values = {
+            x: 0 for x in range(self.memory_range[0], self.memory_range[1] + 1)
+        }
+
+    def get_value(self, x):
+        duration = Utility.fn(x, **self.sampler.function_parameters)
+        real_cost = duration * x
+        knowledge = self._get_normalized_knowledge(x)
+        return real_cost * knowledge
+
+    def update_knowledge(self, x):
+        for key in self.knowledge_values:
+            self.knowledge_values[key] += stats.norm.pdf(key, x, 100) / stats.norm.pdf(
+                x, x, 100
+            )
+
+    def _get_normalized_knowledge(self, x):
+        if isinstance(x, np.ndarray):
+            knowledge = np.array([self.knowledge_values[xs] for xs in x])
+        else:
+            knowledge = self.knowledge_values[x]
+        min_ = np.min(knowledge)
+        max_ = np.max(knowledge)
+        return 1.0 + 0.5 * (knowledge - min_) / (max_ - min_)
diff --git a/spot/recommendation_engine/recommendation_engine.py b/spot/recommendation_engine/recommendation_engine.py
@@ -3,13 +3,7 @@
 import numpy as np
 import pandas as pd
 
-from spot.recommendation_engine.objectives import (
-    NormalObjective,
-    SkewedNormalObjective,
-    DynamicNormalObjective,
-    DynamicSTDNormalObjective1,
-    DynamicSTDNormalObjective2,
-)
+from spot.recommendation_engine.objectives import *
 from spot.recommendation_engine.utility import Utility
 
 from spot.constants import *
@@ -26,7 +20,7 @@ def __init__(self, invocator, payload_path, memory_range):
         self.payload_path = payload_path
         self.function_invocator = invocator
         self.sampled_datapoints = []
-        self.sampled_points = 0
+        self.sampled_point_count = 0
         self.fitted_function = None
         self.function_parameters = {}
         self.function_degree = 2
@@ -41,6 +35,9 @@ def __init__(self, invocator, payload_path, memory_range):
             self.objective = DynamicSTDNormalObjective1(self, self.memory_range)
         elif OPTIMIZATION_OBJECTIVE == "dynamic_std2":
             self.objective = DynamicSTDNormalObjective2(self, self.memory_range)
+        elif OPTIMIZATION_OBJECTIVE == "fit_to_real_cost":
+            assert len(INITIAL_SAMPLE_MEMORIES) == 3
+            self.objective = FitToRealCostObjective(self, self.memory_range)
 
         self.exploration_cost = 0
 
@@ -53,15 +50,15 @@ def sampled_memories_count(self):
 
     def run(self):
         self.initial_sample()
-        self.sampled_points = 2
+        self.sampled_point_count = 2
         while (
             self.sampled_memories_count < TOTAL_SAMPLE_COUNT
             and self.objective.ratio > KNOWLEDGE_RATIO
         ):
-            x = self.choose_sample_point()
+            x = self._choose_sample_point()
             self.sample(x)
-            self.sampled_points += 1
-            self.function_degree = self.sampled_points
+            self.sampled_point_count += 1
+            self.function_degree = min(self.sampled_point_count, 4)
             self.fitted_function, self.function_parameters = Utility.fit_function(
                 self.sampled_datapoints, degree=self.function_degree
             )
@@ -108,8 +105,8 @@ def sample(self, x):
             payload_filename=self.payload_path,
             save_to_ctx=False,
         )
-        for value in result["Billed Duration"].tolist():
-            self.exploration_cost += Utility.calculate_cost(value, x)
+        durations = result["Billed Duration"].to_numpy()
+        self.exploration_cost += np.sum(Utility.calculate_cost(durations, x))
         result = self.function_invocator.invoke(
             invocation_count=DYNAMIC_SAMPLING_INITIAL_STEP,
             parallelism=DYNAMIC_SAMPLING_INITIAL_STEP,
@@ -129,9 +126,18 @@ def sample(self, x):
                     payload_filename=self.payload_path,
                 )
                 values.append(result.iloc[0]["Billed Duration"])
-        for value in values:
+
+        if len(values) > 2:
+            values.sort()
+            selected_values = values[len(values) // 2 - 1 : len(values) // 2]
+        else:
+            selected_values = values
+
+        self.exploration_cost += np.sum(Utility.calculate_cost(np.array(values), x))
+
+        for value in selected_values:
             self.sampled_datapoints.append(DataPoint(memory=x, billed_time=value))
-            self.exploration_cost += Utility.calculate_cost(value, x)
+
         print(f"finished sampling {x} with {len(values)} samples")
         self.objective.update_knowledge(x)
 
@@ -153,15 +159,11 @@ def invoke_once(self, memory_mb, is_warm=True):
         )
         return result
 
-    def choose_sample_point(self):
-        max_value = self.memory_range[0]
-        max_obj = np.inf
-        for value in self._remainder_memories():
-            obj = self.objective.get_value(value)
-            if obj < max_obj:
-                max_value = value
-                max_obj = obj
-        return max_value
+    def _choose_sample_point(self):
+        mems = np.array(self._remainder_memories(), dtype=np.double)
+        values = self.objective.get_value(mems)
+        index = np.argmin(values)
+        return int(mems[index])
 
     def _remainder_memories(self):
         memories = range(self.memory_range[0], self.memory_range[1] + 1)

diff --git a/spot/recommendation_engine/utility.py b/spot/recommendation_engine/utility.py
@@ -12,14 +12,10 @@ def __init__(self, memory, billed_time):
 class Utility:
     @staticmethod
     def find_minimum_memory_cost(f, params, memory_range):
-        min_cost = np.inf
-        min_memory = 0
-        for memory in range(memory_range[0], memory_range[1] + 1):
-            cost = Utility.calculate_cost(f(memory, **params), memory)
-            if cost < min_cost:
-                min_cost = cost
-                min_memory = memory
-        return min_memory, min_cost
+        mems = np.arange(memory_range[0], memory_range[1] + 1, dtype=np.double)
+        costs = Utility.calculate_cost(f(mems, **params), mems)
+        min_index = np.argmin(costs)
+        return mems[min_index], costs[min_index]
 
     @staticmethod
     def calculate_cost(duration, memory):
@@ -36,12 +32,8 @@ def cv(l):
 
     @staticmethod
     def check_function_validity(f, params, memory_range):
-        if all(v >= 0 for v in params.values()):
-            return True
-        for x in range(memory_range[0], memory_range[1] + 1):
-            if f(x, **params) < 0:
-                return False
-        return True
+        mems = np.arange(memory_range[0], memory_range[1] + 1)
+        return np.all(f(mems, **params) >= 0)
 
     @staticmethod
     def fit_function(datapoints, degree):