Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/fit to real cost #126

Merged
merged 9 commits into from
Dec 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion spot/Spot.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from spot.context import Context
from spot.benchmark_config import BenchmarkConfig
from spot.recommendation_engine.recommendation_engine import RecommendationEngine
from spot.recommendation_engine.utility import Utility
from spot.constants import *


Expand Down Expand Up @@ -54,8 +55,11 @@ def collect_data(self):
self.last_log_timestamp = self.log_retriever.get_logs(self.last_log_timestamp)

def invoke(self, memory_mb, count):
billed_duration = np.arange(count, dtype=np.double)
for i in range(count):
self.recommendation_engine.invoke_once(memory_mb, is_warm=(i > 0))
df = self.recommendation_engine.invoke_once(memory_mb, is_warm=(i > 0))
billed_duration[i] = df["Billed Duration"][0]
print("Real cost:", Utility.calculate_cost(np.mean(billed_duration), memory_mb))

def teardown(self, optimization_s):
# Just saving the Context for now.
Expand Down
30 changes: 30 additions & 0 deletions spot/recommendation_engine/objectives.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,33 @@ def get_normal_value(self, x, mean, std):
return (
self.ratio * stats.norm.pdf(x, mean, std) / stats.norm.pdf(mean, mean, std)
)


class FitToRealCostObjective(Objective):
def __init__(self, sampler, memory_range):
super().__init__(sampler, memory_range)
self.ratio = 1
self.knowledge_values = {
x: 0 for x in range(self.memory_range[0], self.memory_range[1] + 1)
}

def get_value(self, x):
duration = Utility.fn(x, **self.sampler.function_parameters)
real_cost = duration * x
knowledge = self._get_normalized_knowledge(x)
return real_cost * knowledge

def update_knowledge(self, x):
for key in self.knowledge_values:
self.knowledge_values[key] += stats.norm.pdf(key, x, 100) / stats.norm.pdf(
x, x, 100
)

def _get_normalized_knowledge(self, x):
if isinstance(x, np.ndarray):
knowledge = np.array([self.knowledge_values[xs] for xs in x])
else:
knowledge = self.knowledge_values[x]
min_ = np.min(knowledge)
max_ = np.max(knowledge)
return 1.0 + 0.5 * (knowledge - min_) / (max_ - min_)
52 changes: 27 additions & 25 deletions spot/recommendation_engine/recommendation_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,7 @@
import numpy as np
import pandas as pd

from spot.recommendation_engine.objectives import (
NormalObjective,
SkewedNormalObjective,
DynamicNormalObjective,
DynamicSTDNormalObjective1,
DynamicSTDNormalObjective2,
)
from spot.recommendation_engine.objectives import *
from spot.recommendation_engine.utility import Utility

from spot.constants import *
Expand All @@ -26,7 +20,7 @@ def __init__(self, invocator, payload_path, memory_range):
self.payload_path = payload_path
self.function_invocator = invocator
self.sampled_datapoints = []
self.sampled_points = 0
self.sampled_point_count = 0
self.fitted_function = None
self.function_parameters = {}
self.function_degree = 2
Expand All @@ -41,6 +35,9 @@ def __init__(self, invocator, payload_path, memory_range):
self.objective = DynamicSTDNormalObjective1(self, self.memory_range)
elif OPTIMIZATION_OBJECTIVE == "dynamic_std2":
self.objective = DynamicSTDNormalObjective2(self, self.memory_range)
elif OPTIMIZATION_OBJECTIVE == "fit_to_real_cost":
assert len(INITIAL_SAMPLE_MEMORIES) == 3
self.objective = FitToRealCostObjective(self, self.memory_range)

self.exploration_cost = 0

Expand All @@ -53,15 +50,15 @@ def sampled_memories_count(self):

def run(self):
self.initial_sample()
self.sampled_points = 2
self.sampled_point_count = 2
while (
self.sampled_memories_count < TOTAL_SAMPLE_COUNT
and self.objective.ratio > KNOWLEDGE_RATIO
):
x = self.choose_sample_point()
x = self._choose_sample_point()
self.sample(x)
self.sampled_points += 1
self.function_degree = self.sampled_points
self.sampled_point_count += 1
self.function_degree = min(self.sampled_point_count, 4)
arshiamoghimi marked this conversation as resolved.
Show resolved Hide resolved
self.fitted_function, self.function_parameters = Utility.fit_function(
self.sampled_datapoints, degree=self.function_degree
)
Expand Down Expand Up @@ -108,8 +105,8 @@ def sample(self, x):
payload_filename=self.payload_path,
save_to_ctx=False,
)
for value in result["Billed Duration"].tolist():
self.exploration_cost += Utility.calculate_cost(value, x)
durations = result["Billed Duration"].to_numpy()
self.exploration_cost += np.sum(Utility.calculate_cost(durations, x))
result = self.function_invocator.invoke(
invocation_count=DYNAMIC_SAMPLING_INITIAL_STEP,
parallelism=DYNAMIC_SAMPLING_INITIAL_STEP,
Expand All @@ -129,9 +126,18 @@ def sample(self, x):
payload_filename=self.payload_path,
)
values.append(result.iloc[0]["Billed Duration"])
for value in values:

if len(values) > 2:
values.sort()
selected_values = values[len(values) // 2 - 1 : len(values) // 2]
else:
selected_values = values

self.exploration_cost += np.sum(Utility.calculate_cost(np.array(values), x))

for value in selected_values:
self.sampled_datapoints.append(DataPoint(memory=x, billed_time=value))
self.exploration_cost += Utility.calculate_cost(value, x)

print(f"finished sampling {x} with {len(values)} samples")
self.objective.update_knowledge(x)

Expand All @@ -153,15 +159,11 @@ def invoke_once(self, memory_mb, is_warm=True):
)
return result

def choose_sample_point(self):
max_value = self.memory_range[0]
max_obj = np.inf
for value in self._remainder_memories():
obj = self.objective.get_value(value)
if obj < max_obj:
max_value = value
max_obj = obj
return max_value
def _choose_sample_point(self):
mems = np.array(self._remainder_memories(), dtype=np.double)
values = self.objective.get_value(mems)
index = np.argmin(values)
return int(mems[index])

def _remainder_memories(self):
memories = range(self.memory_range[0], self.memory_range[1] + 1)
Expand Down
20 changes: 6 additions & 14 deletions spot/recommendation_engine/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,10 @@ def __init__(self, memory, billed_time):
class Utility:
@staticmethod
def find_minimum_memory_cost(f, params, memory_range):
min_cost = np.inf
min_memory = 0
for memory in range(memory_range[0], memory_range[1] + 1):
cost = Utility.calculate_cost(f(memory, **params), memory)
if cost < min_cost:
min_cost = cost
min_memory = memory
return min_memory, min_cost
mems = np.arange(memory_range[0], memory_range[1] + 1, dtype=np.double)
costs = Utility.calculate_cost(f(mems, **params), mems)
min_index = np.argmin(costs)
return mems[min_index], costs[min_index]

@staticmethod
def calculate_cost(duration, memory):
Expand All @@ -36,12 +32,8 @@ def cv(l):

@staticmethod
def check_function_validity(f, params, memory_range):
if all(v >= 0 for v in params.values()):
return True
for x in range(memory_range[0], memory_range[1] + 1):
if f(x, **params) < 0:
return False
return True
mems = np.arange(memory_range[0], memory_range[1] + 1)
return np.all(f(mems, **params) >= 0)

@staticmethod
def fit_function(datapoints, degree):
Expand Down