-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2c4b035
commit 795cdbf
Showing
3 changed files
with
246 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
## ------------------------------------------------------------------------------------------------- | ||
## -- Project : MLPro - A Synoptic Framework for Standardized Machine Learning Tasks | ||
## -- Package : mlpro.pool.native.games | ||
## -- Module : prisonersdilemma_3p | ||
## ------------------------------------------------------------------------------------------------- | ||
## -- History : | ||
## -- yyyy-mm-dd Ver. Auth. Description | ||
## -- 2023-12-07 0.0.0 SY Creation | ||
## -- 2023-12-07 1.0.0 SY Release of first version | ||
## ------------------------------------------------------------------------------------------------- | ||
|
||
""" | ||
Ver. 1.0.0 (2023-12-07) | ||
This module provides a 2-player game of Prisoners' Dilemma with greedy and random solvers. | ||
In the near future, we are going to add more solvers and this howto is going to be updated accordingly. | ||
The game consists of three competitors, where each competitor represents a prisonner. | ||
All of them have a goal to minimize their prison sentences, where their length of sentences depend | ||
on their decision in front of the jury. | ||
If a prisoner pleads guilty, while another prisoner pleads not guilty. The guilty prisoner gets 10 years | ||
of imprisonment, while the not guilty prisoner gets 1 year of imprisonment. | ||
If two of them plead guilty, then each of them gets 5 years of imprisonment, while the not guilty prisoner | ||
gets 1 year. | ||
Meanwhile, if three of them plead not guilty, then each of them obtains 5 years of imprisonment. | ||
And if three of them plead guilty, then each of them obtains 2 years of imprisonment. | ||
To be noted, the decision making of the prisoners take place simultaneously, where: | ||
- Decision "0" means confess | ||
- Decision "1" means not confess | ||
""" | ||
|
||
from mlpro.gt.native.basics import * | ||
from mlpro.gt.pool.native.solvers.randomsolver import RandomSolver | ||
|
||
|
||
|
||
|
||
|
||
## ------------------------------------------------------------------------------------------------- | ||
## ------------------------------------------------------------------------------------------------- | ||
class PayoffFunction_PD3P (GTFunction): | ||
|
||
|
||
## ------------------------------------------------------------------------------------------------- | ||
def _setup_payoff_matrix(self): | ||
|
||
self._add_payoff_matrix( | ||
p_idx=0, | ||
p_payoff_matrix=np.array([[2, 5, 5, 10], [1, 1, 1, 5]]) | ||
# ([[(0,0,0), (0,0,1), (0,1,0), (0,1,1)], [(1,0,0), (1,0,1), (1,1,0), (1,1,1)]]) | ||
) | ||
|
||
self._add_payoff_matrix( | ||
p_idx=1, | ||
p_payoff_matrix=np.array([[2, 5, 1, 1], [5, 10, 1, 5]]) | ||
) | ||
|
||
self._add_payoff_matrix( | ||
p_idx=2, | ||
p_payoff_matrix=np.array([[2, 1, 5, 1], [5, 1, 10, 5]]) | ||
) | ||
|
||
|
||
|
||
|
||
|
||
## ------------------------------------------------------------------------------------------------- | ||
## ------------------------------------------------------------------------------------------------- | ||
class PrisonersDilemma2PGame (GTGame): | ||
|
||
C_NAME = 'PrisonersDilemma2PGame' | ||
|
||
|
||
## ------------------------------------------------------------------------------------------------- | ||
def _setup(self, p_mode, p_ada:bool, p_visualize:bool, p_logging) -> Model: | ||
|
||
_strategy_space = MSpace() | ||
_strategy_space.add_dim(Dimension('RStr','Z','Random Strategy','','','',[0,1])) | ||
|
||
solver1 = RandomSolver( | ||
p_strategy_space=_strategy_space, | ||
p_id=1, | ||
p_name="Random Solver", | ||
p_visualize=p_visualize, | ||
p_logging=p_logging | ||
) | ||
|
||
|
||
p1 = GTPlayer( | ||
p_solver=solver1, | ||
p_name="Player of Prisoner 1", | ||
p_visualize=p_visualize, | ||
p_logging=p_logging, | ||
p_random_solver=False | ||
) | ||
|
||
coal1 = GTCoalition( | ||
p_name="Coalition of Prisoner 1", | ||
p_coalition_type=GTCoalition.C_COALITION_SUM | ||
) | ||
coal1.add_player(p1) | ||
|
||
|
||
solver2 = RandomSolver( | ||
p_strategy_space=_strategy_space, | ||
p_id=2, | ||
p_visualize=p_visualize, | ||
p_logging=p_logging | ||
) | ||
|
||
p2 = GTPlayer( | ||
p_solver=solver2, | ||
p_name="Player of Prisoner 2", | ||
p_visualize=p_visualize, | ||
p_logging=p_logging, | ||
p_random_solver=False | ||
) | ||
|
||
coal2 = GTCoalition( | ||
p_name="Coalition of Prisoner 2", | ||
p_coalition_type=GTCoalition.C_COALITION_SUM | ||
) | ||
coal2.add_player(p2) | ||
|
||
|
||
competition = GTCompetition( | ||
p_name="Prisoner's Dilemma Competition", | ||
p_logging=p_logging | ||
) | ||
competition.add_coalition(coal1) | ||
competition.add_coalition(coal2) | ||
|
||
coal_ids = competition.get_coalitions_ids() | ||
|
||
self._payoff = GTPayoffMatrix( | ||
p_function=PayoffFunction_PD2P( | ||
p_func_type=GTFunction.C_FUNC_PAYOFF_MATRIX, | ||
p_dim_elems=[2,2] | ||
), | ||
p_player_ids=coal_ids | ||
) | ||
|
||
return competition | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
## ------------------------------------------------------------------------------------------------- | ||
## -- Project : MLPro - A Synoptic Framework for Standardized Machine Learning Tasks | ||
## -- Package : mlpro.pool.native.solvers | ||
## -- Module : greedypolicy | ||
## ------------------------------------------------------------------------------------------------- | ||
## -- History : | ||
## -- yyyy-mm-dd Ver. Auth. Description | ||
## -- 2023-12-07 0.0.0 SY Creation | ||
## -- 2023-12-07 1.0.0 SY Release of first version | ||
## ------------------------------------------------------------------------------------------------- | ||
|
||
""" | ||
Ver. 1.0.0 (2023-12-07) | ||
This module provides solver with greedy GT strategy. There are two variants, such as minimum greedy | ||
and maximum greedy. | ||
""" | ||
|
||
from mlpro.gt.native.basics import * | ||
|
||
|
||
|
||
|
||
|
||
## ------------------------------------------------------------------------------------------------- | ||
## ------------------------------------------------------------------------------------------------- | ||
class MaxGreedyPolicy (GTSolver): | ||
""" | ||
A solver that generates actions for each dimension of the underlying strategy space based on the | ||
maximum greedy policy. | ||
""" | ||
|
||
C_NAME = 'MaxGreedyPolicy' | ||
|
||
|
||
## ------------------------------------------------------------------------------------------------- | ||
def _compute_strategy(self, p_payoff:GTPayoffMatrix) -> GTStrategy: | ||
|
||
if p_payoff._function is not None: | ||
my_strategy_values = np.zeros(self._strategy_space.get_num_dim()) | ||
|
||
idx = self.get_id()-1 | ||
id = p_payoff._player_ids[self.get_id()-1] | ||
best_payoff = p_payoff._function.best_response(id) | ||
payoff_matrix = p_payoff._function._payoff_map[idx] | ||
my_strategy_values[0] = np.where(payoff_matrix==best_payoff)[idx].item() | ||
return GTStrategy(self._id, self._strategy_space, my_strategy_values) | ||
else: | ||
return self._call_compute_strategy() | ||
|
||
|
||
## ------------------------------------------------------------------------------------------------- | ||
def _call_compute_strategy(self, p_payoff:GTPayoffMatrix) -> GTStrategy: | ||
|
||
raise NotImplementedError | ||
|
||
|
||
|
||
|
||
|
||
## ------------------------------------------------------------------------------------------------- | ||
## ------------------------------------------------------------------------------------------------- | ||
class MinGreedyPolicy (GTSolver): | ||
""" | ||
A solver that generates actions for each dimension of the underlying strategy space based on the | ||
minimum greedy policy. | ||
""" | ||
|
||
C_NAME = 'MinGreedyPolicy' | ||
|
||
|
||
## ------------------------------------------------------------------------------------------------- | ||
def _compute_strategy(self, p_payoff:GTPayoffMatrix) -> GTStrategy: | ||
|
||
if p_payoff._function is not None: | ||
my_strategy_values = np.zeros(self._strategy_space.get_num_dim()) | ||
|
||
idx = self.get_id()-1 | ||
id = p_payoff._player_ids[self.get_id()-1] | ||
payoff_matrix = p_payoff._function._payoff_map[idx] | ||
least_payoff = np.min(self._payoff_map[id]) | ||
my_strategy_values[0] = np.where(payoff_matrix==least_payoff)[idx].item() | ||
return GTStrategy(self._id, self._strategy_space, my_strategy_values) | ||
else: | ||
return self._call_compute_strategy() | ||
|
||
|
||
## ------------------------------------------------------------------------------------------------- | ||
def _call_compute_strategy(self, p_payoff:GTPayoffMatrix) -> GTStrategy: | ||
|
||
raise NotImplementedError |