-
Notifications
You must be signed in to change notification settings - Fork 0
/
hanabi.py
352 lines (299 loc) · 11.6 KB
/
hanabi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
import csv
import random
import sys
class Object:
def __repr__(self):
return repr(self.__dict__)
DEBUG = False
COLOR = "COLOR"
NUMBER = "NUMBER"
def OtherAttribute(x):
if x == COLOR:
return NUMBER
else:
assert x == NUMBER
return COLOR
class Card(Object):
def __init__(self, color, num): # Note: num -1 from card game 1->0, 5->4
self.attr = { COLOR: color, NUMBER: num }
self.knows = { COLOR: False, NUMBER: False }
self.save = False
def color(self):
return self.attr[COLOR]
def num(self):
return self.attr[NUMBER]
PLAY = "PLAY"
DISCARD = "DISCARD"
HINT = "HINT"
PLAYING = "PLAYING"
GAME_OVER = "GAME_OVER"
LOST = "LOST"
ILLEGAL_MOVE = "ILLEGAL_MOVE"
class Hanabi(Object):
def __init__(self, players):
self.players = players
self.hand_size = 5
self.num_colors = 5
self.num_distrib = [5, 2, 2, 2, 1]
self.num_attr = { COLOR: self.num_colors, NUMBER: len(self.num_distrib) }
self.deck = []
for color in range(self.num_colors):
for num, count in enumerate(self.num_distrib):
for _ in range(count):
self.deck.append(Card(color, num))
random.shuffle(self.deck)
# Next card playable for each color
self.next_card = [0 for _ in range(self.num_colors)]
# Remaining cards (not played nor discarded).
self.remaining = [self.num_distrib[:] for x in range(self.num_colors)]
self.hands = [[self.Draw() for _ in range(self.hand_size)] for _ in players]
self.num_hint_tokens = 8 #?
self.num_error_tokens = 3 #?
self.mode = PLAYING
self.message = None
self.last_hint = [None for _ in range(len(self.players))]
def Check(self, val, message):
if not val:
self.mode = ILLEGAL_MOVE
self.message = "Illegal move: " + message
def Draw(self):
if not self.deck:
self.mode = GAME_OVER
else:
return self.deck.pop()
def ReplaceCard(self, player_num, card_num):
card = self.hands[player_num][card_num]
# Remove this card, and place new card at end.
del self.hands[player_num][card_num]
self.hands[player_num].append(self.Draw())
self.remaining[card.color()][card.num()] -= 1
assert self.remaining[card.color()][card.num()] >= 0
return card
def Run(self):
while True:
for player_num, player in enumerate(self.players):
action, x = player.Play(player_num, self)
self.last_hint[player_num] = None # Clear so it doesn't get seen twice.
if DEBUG:
print
print "Hands:", self.hands
print "Played:", self.next_card
print "Remaining:", self.remaining
print "Hints:", self.num_hint_tokens, "Errors:", self.num_error_tokens
print
print "Action:", player_num, action, x
if action == PLAY:
self.Play(player_num, x)
elif action == DISCARD:
self.Discard(player_num, x)
elif action == HINT:
other_player, attribute, value = x
self.Hint(other_player, attribute, value)
if self.mode != PLAYING:
return self.mode, sum(self.next_card), self.message, self.num_hint_tokens, self.num_error_tokens
def IsPlayable(self, card):
return card.num() == self.next_card[card.color()]
def IsCritical(self, card):
"""Is this the last copy of a card we haven't played yet? Thus discarding
it would reduce our max potential score."""
return (card.num() >= self.next_card[card.color()] and # Future playable
self.remaining[card.color()][card.num()] == 1) # Last copy
def Play(self, player_num, card_num):
card = self.ReplaceCard(player_num, card_num)
if self.IsPlayable(card):
# Success
self.next_card[card.color()] += 1
else:
# Mistake
self.num_error_tokens -= 1
if self.num_error_tokens == 0:
self.mode = LOST
self.message = "Lost: Too many errors."
def Discard(self, player_num, card_num):
self.ReplaceCard(player_num, card_num)
self.num_hint_tokens += 1
def Hint(self, player_num, attribute, value):
self.num_hint_tokens -= 1
self.Check(self.num_hint_tokens >= 0, "No hints left")
revealed_cards = False
for card in self.hands[player_num]:
if card.attr[attribute] == value:
card.knows[attribute] = True
revealed_cards = True
self.Check(revealed_cards, "Hint revealed no cards.")
self.last_hint[player_num] = (attribute, value) # TODO: Add which cards it applies to.
class Player:
def Play(self, player_num, state):
pass
class HumanPlayer:
def Play(self, player_num, state):
print "# Other hands"
for other_num in range(len(state.players)):
if other_num != player_num:
print other_num, self.OtherHandStr(state.hands[other_num])
print "# Your hand"
print self.SelfHandStr(state.hands[player_num])
print "# Playable"
print state.next_card
print "# Remaining"
print state.remaining
print "# Hint tokens = ", state.num_hint_tokens
print "# Error tokens = ", state.num_error_tokens
return input("Move:")
COLORS = ["R", "G", "B", "W", "P"]
def OtherCardStr(self, card):
return str(card.num()) + self.COLORS[card.color()]
def OtherHandStr(self, hand):
return [self.OtherCardStr(card) for card in hand]
def SelfCardStr(self, card):
num = str(card.num()) if card.knows[NUMBER] else "?"
color = self.COLORS[card.color()] if card.knows[COLOR] else "?"
return num + color
def SelfHandStr(self, hand):
return [self.SelfCardStr(card) for card in hand]
class SimplePlayer(Player):
def Play(self, player_num, state):
# Play (or discard) a card if we know everything about it and it can be
# played (or is no longer useful).
for card_num, card in enumerate(state.hands[player_num]):
if False not in card.knows.values():
if state.IsPlayable(card):
return PLAY, card_num
elif card.num() < state.next_card[card.color()]:
return DISCARD, card_num
# Hint most info possible.
if state.num_hint_tokens:
max_score = 0
best_hint = None
for other_num in range(len(state.players)):
if other_num != player_num:
for attr in COLOR, NUMBER:
for attr_val in range(state.num_attr[attr]):
hint = other_num, attr, attr_val
score = self.HintValue(state, hint)
if score > max_score:
max_score = score
best_hint = hint
if best_hint:
return HINT, best_hint
# Discard
card_num = random.randrange(len(state.hands[player_num]))
return DISCARD, card_num
def HintValue(self, state, hint):
player_num, attr, attr_val = hint
score = 0
for card in state.hands[player_num]:
if card.attr[attr] == attr_val and not card.knows[attr]:
score += 1 # Literally count the number of pieces of info transmitted.
return score
class SignalPlayer(Player):
def __init__(self, discard_num=3):
self.discard_num = discard_num
def Play(self, player_num, state):
# If partner hinted us, play the indicated card.
last_hint = state.last_hint[player_num]
if last_hint:
attribute, value = last_hint
return PLAY, value
# Else see if we can hint partner.
if state.num_hint_tokens:
for other_num in range(len(state.players)):
if other_num != player_num:
has_attr = [None for _ in range(5)]
for card in state.hands[other_num]:
has_attr[card.color()] = COLOR
has_attr[card.num()] = NUMBER
for card_num, card in enumerate(state.hands[other_num]):
if state.IsPlayable(card) and has_attr[card_num] != None:
return HINT, (other_num, has_attr[card_num], card_num)
# Else discard. Originally I discarded oldest card, but it turns out that
# discarding card #3 is 1 point better, not sure why.
return DISCARD, self.discard_num
class DimaPlayer(Player):
def __init__(self, discard_num=4):
self.discard_num = discard_num
def Play(self, player_num, state):
# If partner hinted us, play the indicated card.
last_hint = state.last_hint[player_num]
if last_hint:
attribute, value = last_hint
for card_num in reversed(range(len(state.hands[player_num]))):
card = state.hands[player_num][card_num]
if card.attr[attribute] == value:
return PLAY, card_num
# Else see if we can hint partner.
if state.num_hint_tokens:
for other_num in range(len(state.players)):
if other_num != player_num:
allow_color = [True] * 5
allow_num = [True] * 5
for card in reversed(state.hands[other_num]):
if state.IsPlayable(card):
# If we can unambiguously hint, do so.
if allow_color[card.color()]:
return HINT, (other_num, COLOR, card.color())
if allow_num[card.num()]:
return HINT, (other_num, NUMBER, card.num())
# Don't allow any further cards to use these attributes (or
# partner will play the wrong card).
allow_color[card.color()] = False
allow_num[card.num()] = False
# Else discard. Originally I discarded oldest card, but it turns out that
# discarding card #1 is 0.5 point better, not sure why.
return DISCARD, self.discard_num
class SignalSavePlayer(Player):
def __init__(self, discard_num=3, play_attr=NUMBER):
self.discard_num = discard_num
self.play_attr = play_attr
self.save_attr = OtherAttribute(self.play_attr)
def Play(self, player_num, state):
# If partner hinted us, play/save the indicated card.
last_hint = state.last_hint[player_num]
if last_hint:
attribute, value = last_hint
if attribute == self.play_attr:
return PLAY, value
else:
state.hands[player_num][value].save = True
# Else see if we can hint partner to play a card.
if state.num_hint_tokens:
for other_num in range(len(state.players)):
if other_num != player_num:
has_attr = [False for _ in range(5)]
for card in state.hands[other_num]:
has_attr[card.attr[self.play_attr]] = True
for card_num, card in enumerate(state.hands[other_num]):
if state.IsPlayable(card) and has_attr[card_num]:
return HINT, (other_num, self.play_attr, card_num)
# Hint partner to save a card.
for other_num in range(len(state.players)):
if other_num != player_num:
has_attr = [False for _ in range(5)]
for card in state.hands[other_num]:
has_attr[card.attr[self.save_attr]] = True
for card_num, card in enumerate(state.hands[other_num]):
if state.IsCritical(card) and has_attr[card_num]:
return HINT, (other_num, self.save_attr, card_num)
# Else discard.
for card_num, card in enumerate(state.hands[player_num]):
if not card.save:
return DISCARD, card_num
return DISCARD, self.discard_num
def HistogramPlayers(players, iters):
histogram = [0] * 26
for _ in range(iters):
game = Hanabi(players)
score = game.Run()[1]
histogram[score] += 1
return histogram
def MakeCsv(configs, iters, csv_filename):
with open(csv_filename, "wb") as csv_file:
writer = csv.writer(csv_file)
writer.writerow(["Score"] + range(26))
for name, players in configs:
writer.writerow([name] + HistogramPlayers(players, iters))
config = [("Simple", [SimplePlayer(), SimplePlayer()]),
("Signal", [SignalPlayer(), SignalPlayer()]),
("Dima", [DimaPlayer(), DimaPlayer()]),
("SignalSave", [SignalSavePlayer(), SignalSavePlayer()])]
MakeCsv(config, int(sys.argv[1]), sys.argv[2])