prisonersdilemma_2p #758

fhswf · Dec 7, 2023 · 795cdbf · 795cdbf
1 parent 2c4b035
commit 795cdbf
Show file tree

Hide file tree

Showing 3 changed files with 246 additions and 2 deletions.
diff --git a/src/mlpro/gt/pool/native/games/prisonersdilemma_2p.py b/src/mlpro/gt/pool/native/games/prisonersdilemma_2p.py
@@ -49,12 +49,12 @@ def _setup_payoff_matrix(self):
 
         self._add_payoff_matrix(
             p_idx=0,
-            p_payoff_matrix=np.array([[2, 8], [1, 5]])
+            p_payoff_matrix=np.array([[5, 8], [1, 2]]) # ([[(0,0), (0,1)], [(1,0), (1,1)]])
         )
 
         self._add_payoff_matrix(
             p_idx=1,
-            p_payoff_matrix=np.array([[2, 1], [8, 5]])
+            p_payoff_matrix=np.array([[5, 1], [8, 2]])
         )
 
 

diff --git a/src/mlpro/gt/pool/native/games/prisonersdilemma_3p.py b/src/mlpro/gt/pool/native/games/prisonersdilemma_3p.py
@@ -0,0 +1,153 @@
+## -------------------------------------------------------------------------------------------------
+## -- Project : MLPro - A Synoptic Framework for Standardized Machine Learning Tasks
+## -- Package : mlpro.pool.native.games
+## -- Module  : prisonersdilemma_3p
+## -------------------------------------------------------------------------------------------------
+## -- History :
+## -- yyyy-mm-dd  Ver.      Auth.    Description
+## -- 2023-12-07  0.0.0     SY       Creation
+## -- 2023-12-07  1.0.0     SY       Release of first version
+## -------------------------------------------------------------------------------------------------
+
+"""
+Ver. 1.0.0 (2023-12-07)
+
+This module provides a 2-player game of Prisoners' Dilemma with greedy and random solvers.
+In the near future, we are going to add more solvers and this howto is going to be updated accordingly.
+
+The game consists of three competitors, where each competitor represents a prisonner.
+All of them have a goal to minimize their prison sentences, where their length of sentences depend
+on their decision in front of the jury.
+
+If a prisoner pleads guilty, while another prisoner pleads not guilty. The guilty prisoner gets 10 years
+of imprisonment, while the not guilty prisoner gets 1 year of imprisonment.
+
+If two of them plead guilty, then each of them gets 5 years of imprisonment, while the not guilty prisoner
+gets 1 year.
+
+Meanwhile, if three of them plead not guilty, then each of them obtains 5 years of imprisonment.
+
+And if three of them plead guilty, then each of them obtains 2 years of imprisonment.
+
+To be noted, the decision making of the prisoners take place simultaneously, where:
+- Decision "0" means confess
+- Decision "1" means not confess
+
+"""
+
+from mlpro.gt.native.basics import *
+from mlpro.gt.pool.native.solvers.randomsolver import RandomSolver
+
+
+
+
+
+## -------------------------------------------------------------------------------------------------
+## -------------------------------------------------------------------------------------------------
+class PayoffFunction_PD3P (GTFunction):
+
+
+## -------------------------------------------------------------------------------------------------
+    def _setup_payoff_matrix(self):
+
+        self._add_payoff_matrix(
+            p_idx=0,
+            p_payoff_matrix=np.array([[2, 5, 5, 10], [1, 1, 1, 5]])
+            # ([[(0,0,0), (0,0,1), (0,1,0), (0,1,1)], [(1,0,0), (1,0,1), (1,1,0), (1,1,1)]])
+        )
+
+        self._add_payoff_matrix(
+            p_idx=1,
+            p_payoff_matrix=np.array([[2, 5, 1, 1], [5, 10, 1, 5]])
+        )
+
+        self._add_payoff_matrix(
+            p_idx=2,
+            p_payoff_matrix=np.array([[2, 1, 5, 1], [5, 1, 10, 5]])
+        )
+
+
+
+
+
+## -------------------------------------------------------------------------------------------------
+## -------------------------------------------------------------------------------------------------
+class PrisonersDilemma2PGame (GTGame):
+
+    C_NAME  = 'PrisonersDilemma2PGame'
+
+
+## -------------------------------------------------------------------------------------------------
+    def _setup(self, p_mode, p_ada:bool, p_visualize:bool, p_logging) -> Model:
+
+        _strategy_space = MSpace()
+        _strategy_space.add_dim(Dimension('RStr','Z','Random Strategy','','','',[0,1]))
+
+        solver1 = RandomSolver(
+            p_strategy_space=_strategy_space,
+            p_id=1,
+            p_name="Random Solver",
+            p_visualize=p_visualize,
+            p_logging=p_logging
+        )
+
+
+        p1 = GTPlayer(
+            p_solver=solver1,
+            p_name="Player of Prisoner 1",
+            p_visualize=p_visualize,
+            p_logging=p_logging,
+            p_random_solver=False
+        )
+
+        coal1 = GTCoalition(
+            p_name="Coalition of Prisoner 1",
+            p_coalition_type=GTCoalition.C_COALITION_SUM
+        )
+        coal1.add_player(p1)
+
+
+        solver2 = RandomSolver(
+            p_strategy_space=_strategy_space,
+            p_id=2,
+            p_visualize=p_visualize,
+            p_logging=p_logging
+        )
+
+        p2 = GTPlayer(
+            p_solver=solver2,
+            p_name="Player of Prisoner 2",
+            p_visualize=p_visualize,
+            p_logging=p_logging,
+            p_random_solver=False
+        )
+
+        coal2 = GTCoalition(
+            p_name="Coalition of Prisoner 2",
+            p_coalition_type=GTCoalition.C_COALITION_SUM
+        )
+        coal2.add_player(p2)
+
+
+        competition = GTCompetition(
+            p_name="Prisoner's Dilemma Competition",
+            p_logging=p_logging
+            )
+        competition.add_coalition(coal1)
+        competition.add_coalition(coal2)
+
+        coal_ids = competition.get_coalitions_ids()
+
+        self._payoff = GTPayoffMatrix(
+            p_function=PayoffFunction_PD2P(
+                p_func_type=GTFunction.C_FUNC_PAYOFF_MATRIX,
+                p_dim_elems=[2,2]
+                ),
+            p_player_ids=coal_ids
+        )
+
+        return competition
+
+
+
+
diff --git a/src/mlpro/gt/pool/native/solvers/greedypolicy.py b/src/mlpro/gt/pool/native/solvers/greedypolicy.py
@@ -0,0 +1,91 @@
+## -------------------------------------------------------------------------------------------------
+## -- Project : MLPro - A Synoptic Framework for Standardized Machine Learning Tasks
+## -- Package : mlpro.pool.native.solvers
+## -- Module  : greedypolicy
+## -------------------------------------------------------------------------------------------------
+## -- History :
+## -- yyyy-mm-dd  Ver.      Auth.    Description
+## -- 2023-12-07  0.0.0     SY       Creation
+## -- 2023-12-07  1.0.0     SY       Release of first version
+## -------------------------------------------------------------------------------------------------
+
+"""
+Ver. 1.0.0 (2023-12-07)
+
+This module provides solver with greedy GT strategy. There are two variants, such as minimum greedy
+and maximum greedy.
+"""
+
+from mlpro.gt.native.basics import *
+
+
+
+
+
+## -------------------------------------------------------------------------------------------------
+## -------------------------------------------------------------------------------------------------
+class MaxGreedyPolicy (GTSolver):
+    """
+    A solver that generates actions for each dimension of the underlying strategy space based on the
+    maximum greedy policy.
+    """
+
+    C_NAME      = 'MaxGreedyPolicy'
+
+
+## -------------------------------------------------------------------------------------------------
+    def _compute_strategy(self, p_payoff:GTPayoffMatrix) -> GTStrategy:
+
+        if p_payoff._function is not None:
+            my_strategy_values = np.zeros(self._strategy_space.get_num_dim())
+
+            idx = self.get_id()-1
+            id = p_payoff._player_ids[self.get_id()-1]
+            best_payoff = p_payoff._function.best_response(id)
+            payoff_matrix = p_payoff._function._payoff_map[idx]
+            my_strategy_values[0] = np.where(payoff_matrix==best_payoff)[idx].item()
+            return GTStrategy(self._id, self._strategy_space, my_strategy_values)
+        else:
+            return self._call_compute_strategy()
+
+
+## -------------------------------------------------------------------------------------------------
+    def _call_compute_strategy(self, p_payoff:GTPayoffMatrix) -> GTStrategy:
+
+        raise NotImplementedError
+
+
+
+
+
+## -------------------------------------------------------------------------------------------------
+## -------------------------------------------------------------------------------------------------
+class MinGreedyPolicy (GTSolver):
+    """
+    A solver that generates actions for each dimension of the underlying strategy space based on the
+    minimum greedy policy.
+    """
+
+    C_NAME      = 'MinGreedyPolicy'
+
+
+## -------------------------------------------------------------------------------------------------
+    def _compute_strategy(self, p_payoff:GTPayoffMatrix) -> GTStrategy:
+
+        if p_payoff._function is not None:
+            my_strategy_values = np.zeros(self._strategy_space.get_num_dim())
+
+            idx = self.get_id()-1
+            id = p_payoff._player_ids[self.get_id()-1]
+            payoff_matrix = p_payoff._function._payoff_map[idx]
+            least_payoff = np.min(self._payoff_map[id])
+            my_strategy_values[0] = np.where(payoff_matrix==least_payoff)[idx].item()
+            return GTStrategy(self._id, self._strategy_space, my_strategy_values)
+        else:
+            return self._call_compute_strategy()
+
+
+## -------------------------------------------------------------------------------------------------
+    def _call_compute_strategy(self, p_payoff:GTPayoffMatrix) -> GTStrategy:
+
+        raise NotImplementedError