-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathblackjack_extended.py
67 lines (54 loc) · 2.22 KB
/
blackjack_extended.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import blackjack_base as bj
import gym.spaces as spaces
import numpy as np
from gym.utils import seeding
from math import inf
class BlackjackEnvExtend(bj.BlackjackEnvBase):
"""
Class which extends OpenAI BlackJackEnv class such that it is a proper
stationary Markov decision process.
Observation space is expanded, the agent now sees the number of cards
it is holding at each state.
"""
def __init__(self, decks = inf, seed=3232, natural=True):
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Tuple((
# MultiDiscrete is a vector of the number of possible values per element
spaces.MultiDiscrete([22,11,8,6,5,4,4,3,3,3]),
spaces.Discrete(26)))
self.seed(seed)
# initialize the number of cards to have of each deck
# Flag to payout 1.5 on a "natural" blackjack win, like casino rules
# Ref: http://www.bicyclecards.com/how-to-play/blackjack/
self.natural = natural
self.decks = decks # number of decks
# Start the first game
self.reset()
def is_natural(self):
# A hand is a natural blackjack if it has 2 cars which total 21
return sum(self.player) == 2 and self.sum_player_hand() == 21
def is_player_bust(self):
return self.sum_player_hand() > 21
def score_player(self):
return 0 if self.is_player_bust() else self.sum_player_hand()
def sum_player_hand(self):
return np.dot(self.deck_values, self.player) + \
10 * self.usable_player_ace()
def usable_player_ace(self):
return self.player[0] > 0 and \
np.dot(self.deck_values, self.player) + 10 <= 21
def draw_player_card(self):
self.player[self.draw_card() - 1] +=1
def draw_player_hand(self):
hand = np.zeros(len(self.deck_values), int)
hand[self.draw_card() - 1] += 1
hand[self.draw_card() - 1] += 1
return hand
def _get_obs(self):
return (tuple(self.player), self.dealer_show_cards())
def reset(self):
self.done = False
self.construct_deck()
self.dealer = self.draw_hand()
self.player = self.draw_player_hand()
return self._get_obs()