td_cfr/exploit.py at master · tansey/td_cfr · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import sys
import os
sys.path.insert(0,os.path.realpath('../cfr/'))
from pokergames import *
from pokerstrategy import *
from environment import *
from bayesian_bootstrapping import *
from implicit_agent import *
from explicit_agent import *
from implicit_subpolicy import *
from subpolicy_bootstrapping import *
from tdcfr import *
import random

outfile = sys.argv[1]
agent_type = sys.argv[2]
difficulty = sys.argv[3]
opponent_num = int(sys.argv[4])

leduc = leduc_rules()

strat_dir = 'stationary_agents/results/'
#portfolio_files = ['nash1.strat', 'tight_aggressive.strat', 'tight_passive.strat', 'loose_aggressive.strat', 'loose_passive.strat']
portfolio_files = ['nash1.strat']
for i in range(4):
    portfolio_files.append('skewednash_{0}_{1}'.format(random.choice(['simple','complex']), random.randrange(0,100)))
#for i in range(100):
#    portfolio_files.append('skewednash_{0}'.format(i))
#portfolio_files = ['nash1.strat', 'tight_aggressive.strat', 'loose_passive.strat']
portfolio = [Strategy(1, strat_dir + x) for x in portfolio_files]

if difficulty == 'nash':
    opponent = SavedAgent(leduc, 1, strat_dir + 'nash1.strat')
else:
    opponent = SavedAgent(leduc, 1, strat_dir + 'skewednash_{0}_{1}'.format(difficulty, opponent_num))
#for i in range(-10,10):
#    possible_opponents.append(SavedAgent(leduc, 1, strat_dir + 'winbonus_{0}_player1.strat'.format(i / 100.0)))
#    possible_opponents.append(SavedAgent(leduc, 1, strat_dir + 'losspenalty_{0}_player1.strat'.format(i / 100.0)))
#opponent = random.choice(possible_opponents)

nash0 = Strategy(0, strat_dir +'nash0.strat')
nash1 = Strategy(1, strat_dir +'nash1.strat')

if agent_type == 'bootstrapping':
    agent = BayesianBootstrappingAgent(leduc, 0, nash0, portfolio[0], portfolio, initial_prior_strength=5)
elif agent_type == 'explicit':
    agent = ExplicitModelingAgent(leduc, 0, nash0, portfolio[0], initial_prior_strength=5)
elif agent_type == 'implicit':
    agent = ImplicitModelingAgent(leduc, 0, nash0, portfolio[0], portfolio)
elif agent_type == 'subpolicy_bootstrapping':
    agent = SubpolicyBootstrappingAgent(leduc, 0, nash0, portfolio[0], portfolio, initial_prior_strength=5)
elif agent_type == 'subpolicy_implicit':
    agent = ImplicitSubpolicyModelingAgent(leduc, 0, nash0, portfolio[0], portfolio)
elif agent_type == 'tdcfr':
    agent = TDCFRAgent(leduc, 0, exploration=0.1, exploration_decay=0.99, learning_rate=0.05, learning_rate_decay=0.99999)
else:
    raise Exception('Unknown model type')

profile = StrategyProfile(leduc, [nash0, opponent.strategy])
br = profile.best_response()
profile = StrategyProfile(leduc, [nash0, nash1])
nash_ev = profile.expected_value()
exploitability = br[1][0] - nash_ev[0]
print "Player 1 exploitability: {0}".format(exploitability)

agents = [agent, opponent]
sim = GameSimulator(leduc, agents, verbose=False, showhands=True)

f = open(outfile, 'wb')
f.write('Hand,Max,Exploited,PctExploited,P0Vulnerability,Winnings\n')
games = 200
for game in range(games):
    sim.play()
    profile = StrategyProfile(leduc, [x.strategy for x in agents])
    br = profile.best_response()
    ev = profile.expected_value()
    print 'Games played: {0}'.format(game+1)
    print "Exploitability: P1={0:.9f} P2={1:.9f} EV: {2} P1 Winnings: {3}".format(br[1][1], br[1][0], ev, agents[0].winnings)
    exploited = ev[0] - nash_ev[0]
    vulnerability = br[1][1] - nash_ev[1]
    exploitability = br[1][0] - nash_ev[0]
    f.write('{0},{1:.9f},{2:.9f},{3:.9f},{4:.9f},{5}\n'.format(game, exploitability, exploited, exploited / exploitability, vulnerability, agents[0].winnings))