Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add framework for success probability algorithms #4

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added bossman/algorithm/__init__.py
Empty file.
17 changes: 17 additions & 0 deletions bossman/algorithm/base_success_probability_algorithm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from abc import ABC, abstractmethod

import numpy as np


class BaseSuccessProbabilityAlgorithm(ABC):
"""A class that calculates the probability of success for a list of choices."""

@abstractmethod
def calc(self, success_perc: np.ndarray, chosen_count: np.ndarray) -> dict:
"""
Calculates the probability of success for each choice.
:param success_perc: the success percentage of each choice
:param chosen_count: the number of times each choice was chosen
:return:
"""
pass
26 changes: 26 additions & 0 deletions bossman/algorithm/ucb1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import math

import numpy as np

from bossman.algorithm.base_success_probability_algorithm import BaseSuccessProbabilityAlgorithm


class UCB1(BaseSuccessProbabilityAlgorithm):
"""
Based on https://www.chessprogramming.org/UCT
Upper confidence bound:
UCB1 = win percentage + C * sqrt(ln(total_games) / visits)
"""

def __init__(self, explore_constant=1.4):
self.explore_constant = explore_constant

def calc(self, success_perc: np.ndarray, chosen_count: np.ndarray) -> dict:
total_games = np.sum(chosen_count)
if total_games > 0:
return success_perc + self.explore_constant * np.sqrt(
math.log(total_games + 1) / chosen_count,
out=np.ones_like(chosen_count, dtype=float) * 1e100,
where=(chosen_count != 0),
)
return np.ones_like(chosen_count, dtype=float)
27 changes: 27 additions & 0 deletions bossman/algorithm/weighted_success_rate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import numpy as np
from scipy.special import expit

from bossman.algorithm.base_success_probability_algorithm import BaseSuccessProbabilityAlgorithm


class WeightedSuccessRate(BaseSuccessProbabilityAlgorithm):
"""
Returns the choice success rate, modified to preference choices with low sample size.
"""

def __init__(self, mod: int = 1.0):
"""

:param mod: The higher this value, the quicker the weight fall off as chosen_count climbs
"""
self.mod = mod

def calc(self, success_perc: np.ndarray, chosen_count: np.ndarray) -> dict:
"""
mod: The higher this value, the quicker the weight fall off as chosen_count climbs
"""
# calculate a weight that will make low sample size choices more likely
probability_weight = 1 - (expit(chosen_count * self.mod) - 0.5) * 2

# Apply that weight to each choice's win percentage
return success_perc + probability_weight
36 changes: 4 additions & 32 deletions bossman/bossman.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import math
from typing import Union

import numpy as np
from scipy.special import expit

from bossman.algorithm.weighted_success_rate import WeightedSuccessRate
from bossman.backend import BackendType, Backend, BackendFactory
from bossman.utl import (
fix_p,
Expand All @@ -20,18 +19,18 @@ def __init__(
rounding_precision: int = 4,
autosave=True,
legacy=False,
explore_constant=1.4,
random_distribution=True,
backend: Union[BackendType, Backend] = BackendType.JSON,
success_probability_algorithm=WeightedSuccessRate(),
):
self.match_decision_history: dict = {"decisions": []}
self.rounding_precision = rounding_precision
self.autosave = autosave
self.legacy = legacy
self.explore_constant = explore_constant
self.random_distribution = random_distribution
self.backend = BackendFactory.construct(backend)
self.decision_stats = self.backend.load_decision_stats()
self.success_probability_algorithm = success_probability_algorithm

def decide(self, decision_type, options, **context) -> (str, float):
"""
Expand Down Expand Up @@ -137,9 +136,8 @@ def _calc_choice_probabilities(self, chosen_count: np.array, won_count: np.array
"""
win_perc = self._calc_win_perc(chosen_count, won_count)

total_games = chosen_count.sum()
# Apply that weight to each choice's win percentage
weighted_probabilities = self._calc_weighted_probability(win_perc, chosen_count, total_games)
weighted_probabilities = self.success_probability_algorithm.calc(win_perc, chosen_count)

# Scale probabilities back down so they sum to 1.0 again.
prob_sum = np.sum(weighted_probabilities)
Expand Down Expand Up @@ -175,32 +173,6 @@ def _calc_win_perc(self, chosen_count, won_count):
where=won_count != 0,
)

def _calc_weighted_probability(self, win_perc, chosen_count, total_games):
if self.legacy:
"""
mod: The higher this value, the quicker the weight fall off as chosen_count climbs
"""
mod = 1.0
# calculate a weight that will make low sample size choices more likely
probability_weight = 1 - (expit(chosen_count * mod) - 0.5) * 2

# Apply that weight to each choice's win percentage
return win_perc + probability_weight
else:
return self._calc_ucb(win_perc, chosen_count, total_games)

# Based on https://www.chessprogramming.org/UCT
# Upper confidence bound:
# UCB1 = win percentage + C * sqrt(ln(total_games) / visits)
def _calc_ucb(self, win_perc, chosen_count, total_games):
if total_games > 0:
return win_perc + self.explore_constant * np.sqrt(
math.log(total_games + 1) / chosen_count,
out=np.ones_like(chosen_count, dtype=float) * 1e100,
where=chosen_count != 0,
)
return np.ones_like(chosen_count, dtype=float)

def _round_probabilities_sum(self, probabilities: np.array) -> np.array:
probabilities = floor(probabilities, self.rounding_precision)
round_amount = 1.0 - np.sum(probabilities)
Expand Down
7 changes: 4 additions & 3 deletions test/test_bossman.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
from bossman.algorithm.ucb1 import UCB1
from bossman.bossman import BossMan
import unittest


class TestBossman(unittest.TestCase):
def test_decide_explore_after_win(self):
boss_man = BossMan(random_distribution=False)
boss_man = BossMan(random_distribution=False, success_probability_algorithm=UCB1())
build1, _ = boss_man.decide("build", ["FourRax", "FiveRax"])
boss_man.report_result(True, save_to_file=False)
build2, _ = boss_man.decide("build", ["FourRax", "FiveRax"])
self.assertNotEqual(build1, build2)

def test_decide_explore_after_loss(self):
boss_man = BossMan(random_distribution=False)
boss_man = BossMan(random_distribution=False, success_probability_algorithm=UCB1())
build1, _ = boss_man.decide("build", ["FourRax", "FiveRax"])
boss_man.report_result(False, save_to_file=False)
build2, _ = boss_man.decide("build", ["FourRax", "FiveRax"])
self.assertNotEqual(build1, build2)

def test_decide_consider_winrate(self):
boss_man = BossMan(random_distribution=False)
boss_man = BossMan(random_distribution=False, success_probability_algorithm=UCB1())
build1, _ = boss_man.decide("build", ["FourRax", "FiveRax"])
boss_man.report_result(True, save_to_file=False)
boss_man.decide("build", ["FourRax", "FiveRax"])
Expand Down
Loading