lladdy · lladdy · Sep 25, 2023 · Sep 25, 2023
diff --git a/bossman/algorithm/__init__.py b/bossman/algorithm/__init__.py
diff --git a/bossman/algorithm/base_success_probability_algorithm.py b/bossman/algorithm/base_success_probability_algorithm.py
@@ -0,0 +1,17 @@
+from abc import ABC, abstractmethod
+
+import numpy as np
+
+
+class BaseSuccessProbabilityAlgorithm(ABC):
+    """A class that calculates the probability of success for a list of choices."""
+
+    @abstractmethod
+    def calc(self, success_perc: np.ndarray, chosen_count: np.ndarray) -> dict:
+        """
+        Calculates the probability of success for each choice.
+        :param success_perc: the success percentage of each choice
+        :param chosen_count: the number of times each choice was chosen
+        :return:
+        """
+        pass
diff --git a/bossman/algorithm/ucb1.py b/bossman/algorithm/ucb1.py
@@ -0,0 +1,26 @@
+import math
+
+import numpy as np
+
+from bossman.algorithm.base_success_probability_algorithm import BaseSuccessProbabilityAlgorithm
+
+
+class UCB1(BaseSuccessProbabilityAlgorithm):
+    """
+    Based on https://www.chessprogramming.org/UCT
+    Upper confidence bound:
+    UCB1 = win percentage + C * sqrt(ln(total_games) / visits)
+    """
+
+    def __init__(self, explore_constant=1.4):
+        self.explore_constant = explore_constant
+
+    def calc(self, success_perc: np.ndarray, chosen_count: np.ndarray) -> dict:
+        total_games = np.sum(chosen_count)
+        if total_games > 0:
+            return success_perc + self.explore_constant * np.sqrt(
+                math.log(total_games + 1) / chosen_count,
+                out=np.ones_like(chosen_count, dtype=float) * 1e100,
+                where=(chosen_count != 0),
+            )
+        return np.ones_like(chosen_count, dtype=float)
diff --git a/bossman/algorithm/weighted_success_rate.py b/bossman/algorithm/weighted_success_rate.py
@@ -0,0 +1,27 @@
+import numpy as np
+from scipy.special import expit
+
+from bossman.algorithm.base_success_probability_algorithm import BaseSuccessProbabilityAlgorithm
+
+
+class WeightedSuccessRate(BaseSuccessProbabilityAlgorithm):
+    """
+    Returns the choice success rate, modified to preference choices with low sample size.
+    """
+
+    def __init__(self, mod: int = 1.0):
+        """
+
+        :param mod: The higher this value, the quicker the weight fall off as chosen_count climbs
+        """
+        self.mod = mod
+
+    def calc(self, success_perc: np.ndarray, chosen_count: np.ndarray) -> dict:
+        """
+        mod: The higher this value, the quicker the weight fall off as chosen_count climbs
+        """
+        # calculate a weight that will make low sample size choices more likely
+        probability_weight = 1 - (expit(chosen_count * self.mod) - 0.5) * 2
+
+        # Apply that weight to each choice's win percentage
+        return success_perc + probability_weight
diff --git a/bossman/bossman.py b/bossman/bossman.py
@@ -1,9 +1,8 @@
-import math
 from typing import Union
 
 import numpy as np
-from scipy.special import expit
 
+from bossman.algorithm.weighted_success_rate import WeightedSuccessRate
 from bossman.backend import BackendType, Backend, BackendFactory
 from bossman.utl import (
     fix_p,
@@ -20,18 +19,18 @@ def __init__(
         rounding_precision: int = 4,
         autosave=True,
         legacy=False,
-        explore_constant=1.4,
         random_distribution=True,
         backend: Union[BackendType, Backend] = BackendType.JSON,
+        success_probability_algorithm=WeightedSuccessRate(),
     ):
         self.match_decision_history: dict = {"decisions": []}
         self.rounding_precision = rounding_precision
         self.autosave = autosave
         self.legacy = legacy
-        self.explore_constant = explore_constant
         self.random_distribution = random_distribution
         self.backend = BackendFactory.construct(backend)
         self.decision_stats = self.backend.load_decision_stats()
+        self.success_probability_algorithm = success_probability_algorithm
 
     def decide(self, decision_type, options, **context) -> (str, float):
         """
@@ -137,9 +136,8 @@ def _calc_choice_probabilities(self, chosen_count: np.array, won_count: np.array
         """
         win_perc = self._calc_win_perc(chosen_count, won_count)
 
-        total_games = chosen_count.sum()
         # Apply that weight to each choice's win percentage
-        weighted_probabilities = self._calc_weighted_probability(win_perc, chosen_count, total_games)
+        weighted_probabilities = self.success_probability_algorithm.calc(win_perc, chosen_count)
 
         # Scale probabilities back down so they sum to 1.0 again.
         prob_sum = np.sum(weighted_probabilities)
@@ -175,32 +173,6 @@ def _calc_win_perc(self, chosen_count, won_count):
             where=won_count != 0,
         )
 
-    def _calc_weighted_probability(self, win_perc, chosen_count, total_games):
-        if self.legacy:
-            """
-            mod: The higher this value, the quicker the weight fall off as chosen_count climbs
-            """
-            mod = 1.0
-            # calculate a weight that will make low sample size choices more likely
-            probability_weight = 1 - (expit(chosen_count * mod) - 0.5) * 2
-
-            # Apply that weight to each choice's win percentage
-            return win_perc + probability_weight
-        else:
-            return self._calc_ucb(win_perc, chosen_count, total_games)
-
-    # Based on https://www.chessprogramming.org/UCT
-    # Upper confidence bound:
-    # UCB1 = win percentage + C * sqrt(ln(total_games) / visits)
-    def _calc_ucb(self, win_perc, chosen_count, total_games):
-        if total_games > 0:
-            return win_perc + self.explore_constant * np.sqrt(
-                math.log(total_games + 1) / chosen_count,
-                out=np.ones_like(chosen_count, dtype=float) * 1e100,
-                where=chosen_count != 0,
-            )
-        return np.ones_like(chosen_count, dtype=float)
-
     def _round_probabilities_sum(self, probabilities: np.array) -> np.array:
         probabilities = floor(probabilities, self.rounding_precision)
         round_amount = 1.0 - np.sum(probabilities)

diff --git a/test/test_bossman.py b/test/test_bossman.py
@@ -1,24 +1,25 @@
+from bossman.algorithm.ucb1 import UCB1
 from bossman.bossman import BossMan
 import unittest
 
 
 class TestBossman(unittest.TestCase):
     def test_decide_explore_after_win(self):
-        boss_man = BossMan(random_distribution=False)
+        boss_man = BossMan(random_distribution=False, success_probability_algorithm=UCB1())
         build1, _ = boss_man.decide("build", ["FourRax", "FiveRax"])
         boss_man.report_result(True, save_to_file=False)
         build2, _ = boss_man.decide("build", ["FourRax", "FiveRax"])
         self.assertNotEqual(build1, build2)
 
     def test_decide_explore_after_loss(self):
-        boss_man = BossMan(random_distribution=False)
+        boss_man = BossMan(random_distribution=False, success_probability_algorithm=UCB1())
         build1, _ = boss_man.decide("build", ["FourRax", "FiveRax"])
         boss_man.report_result(False, save_to_file=False)
         build2, _ = boss_man.decide("build", ["FourRax", "FiveRax"])
         self.assertNotEqual(build1, build2)
 
     def test_decide_consider_winrate(self):
-        boss_man = BossMan(random_distribution=False)
+        boss_man = BossMan(random_distribution=False, success_probability_algorithm=UCB1())
         build1, _ = boss_man.decide("build", ["FourRax", "FiveRax"])
         boss_man.report_result(True, save_to_file=False)
         boss_man.decide("build", ["FourRax", "FiveRax"])