11using Ju
22using .. MultiArmBandits
33using Statistics
4- using LaTeXStrings
54using Plots
65gr ()
76
8- figpath (f) = " docs/src/assets/figures/figure_ $f .png "
7+
98
109function collect_best_actions ()
1110 isbest = Vector {Bool} ()
2625
2726# #############################
2827
29- function fig_2_1 ()
30- env = MultiArmBanditsEnv ()
31- f = render (env)
32- savefig (f, figpath ( " 2_1 " ) )
33- f
34- end
28+ # function fig_2_1()
29+ # env = MultiArmBanditsEnv()
30+ # f = render(env)
31+ # savefig(f, "figure_2_1.png" )
32+ # f
33+ # end
3534
3635
3736function fig_2_2 ()
3837 learner (ϵ) = QLearner (TabularQ (1 , 10 ), EpsilonGreedySelector (ϵ), 0. , cached_inverse_decay ())
3938 p = plot (layout= (2 , 1 ), dpi= 200 )
4039 for ϵ in [0.1 , 0.01 , 0.0 ]
4140 stats = [bandit_testbed (learner (ϵ)) for _ in 1 : 2000 ]
42- plot! (p, mean (x[1 ] for x in stats), subplot= 1 , legend= :bottomright , label= latexstring ( " \\ epsilon=$ϵ " ) )
43- plot! (p, mean (x[2 ] for x in stats), subplot= 2 , legend= :bottomright , label= latexstring ( " \\ epsilon=$ϵ " ) )
41+ plot! (p, mean (x[1 ] for x in stats), subplot= 1 , legend= :bottomright , label= " epsilon=$ϵ " )
42+ plot! (p, mean (x[2 ] for x in stats), subplot= 2 , legend= :bottomright , label= " epsilon=$ϵ " )
4443 end
45- savefig (p, figpath ( " 2_2 " ) )
44+ savefig (p, " figure_2_2.png " )
4645 p
4746end
4847
4948function fig_2_3 ()
5049 learner1 () = QLearner (TabularQ (1 , 10 , 5. ), EpsilonGreedySelector (0.0 ), 0. , 0.1 )
5150 learner2 () = QLearner (TabularQ (1 , 10 ), EpsilonGreedySelector (0.1 ), 0. , 0.1 )
5251 p = plot (legend= :bottomright , dpi= 200 )
53- plot! (p, mean (bandit_testbed (learner1 ())[2 ] for _ in 1 : 2000 ), label= latexstring ( " Q_1=5, \\ epsilon=0." ) )
54- plot! (p, mean (bandit_testbed (learner2 ())[2 ] for _ in 1 : 2000 ), label= latexstring ( " Q_1=0, \\ epsilon=0.1" ) )
55- savefig (p, figpath ( " 2_3 " ) )
52+ plot! (p, mean (bandit_testbed (learner1 ())[2 ] for _ in 1 : 2000 ), label= " Q_1=5, epsilon=0." )
53+ plot! (p, mean (bandit_testbed (learner2 ())[2 ] for _ in 1 : 2000 ), label= " Q_1=0, epsilon=0.1" )
54+ savefig (p, " figure_2_3.png " )
5655 p
5756end
5857
5958function fig_2_4 ()
6059 learner1 () = QLearner (TabularQ (1 , 10 ), UpperConfidenceBound (10 ), 0. , 0.1 )
6160 learner2 () = QLearner (TabularQ (1 , 10 ), EpsilonGreedySelector (0.1 ), 0. , 0.1 )
6261 p = plot (legend= :bottomright , dpi= 200 )
63- plot! (p, mean (bandit_testbed (learner1 ())[1 ] for _ in 1 : 2000 ), label= latexstring ( " UpperConfidenceBound, c=2" ) )
64- plot! (p, mean (bandit_testbed (learner2 ())[1 ] for _ in 1 : 2000 ), label= latexstring ( " \\ epsilon-greedy, \\ epsilon=0.1" ) )
65- savefig (p, figpath ( " 2_4 " ) )
62+ plot! (p, mean (bandit_testbed (learner1 ())[1 ] for _ in 1 : 2000 ), label= " UpperConfidenceBound, c=2" )
63+ plot! (p, mean (bandit_testbed (learner2 ())[1 ] for _ in 1 : 2000 ), label= " epsilon-greedy, epsilon=0.1" )
64+ savefig (p, " figure_2_4.png " )
6665 p
6766end
6867
6968function fig_2_5 ()
7069 learner (alpha, baseline) = GradientBanditLearner (TabularQ (1 , 10 ), WeightedSample (), alpha, baseline)
7170 truevalue = 4.0
7271 p = plot (legend= :bottomright , dpi= 200 )
73- plot! (p, mean (bandit_testbed (learner (0.1 , sample_avg ()), truevalue)[2 ] for _ in 1 : 2000 ), label= latexstring ( " \\ alpha = 0.1, with baseline" ) )
74- plot! (p, mean (bandit_testbed (learner (0.4 , sample_avg ()), truevalue)[2 ] for _ in 1 : 2000 ), label= latexstring ( " \\ alpha = 0.4, with baseline" ) )
75- plot! (p, mean (bandit_testbed (learner (0.1 , 0. ), truevalue)[2 ] for _ in 1 : 2000 ), label= latexstring ( " \\ alpha = 0.1, without baseline" ) )
76- plot! (p, mean (bandit_testbed (learner (0.4 , 0. ), truevalue)[2 ] for _ in 1 : 2000 ), label= latexstring ( " \\ alpha = 0.4, without baseline" ) )
77- savefig (p, figpath ( " 2_5 " ) )
72+ plot! (p, mean (bandit_testbed (learner (0.1 , sample_avg ()), truevalue)[2 ] for _ in 1 : 2000 ), label= " alpha = 0.1, with baseline" )
73+ plot! (p, mean (bandit_testbed (learner (0.4 , sample_avg ()), truevalue)[2 ] for _ in 1 : 2000 ), label= " alpha = 0.4, with baseline" )
74+ plot! (p, mean (bandit_testbed (learner (0.1 , 0. ), truevalue)[2 ] for _ in 1 : 2000 ), label= " alpha = 0.1, without baseline" )
75+ plot! (p, mean (bandit_testbed (learner (0.4 , 0. ), truevalue)[2 ] for _ in 1 : 2000 ), label= " alpha = 0.4, without baseline" )
76+ savefig (p, " figure_2_5.png " )
7877 p
7978end
8079
@@ -89,6 +88,6 @@ function fig_2_6()
8988 plot! (p, - 5 : 1 , [mean (mean (bandit_testbed (gradient_learner (2.0 ^ i))[1 ] for _ in 1 : 2000 )) for i in - 5 : 1 ], label= " gradient" )
9089 plot! (p, - 4 : 2 , [mean (mean (bandit_testbed (UpperConfidenceBound_learner (2.0 ^ i))[1 ] for _ in 1 : 2000 )) for i in - 4 : 2 ], label= " UCB" )
9190 plot! (p, - 2 : 2 , [mean (mean (bandit_testbed (greedy_with_init_learner (2.0 ^ i))[1 ] for _ in 1 : 2000 )) for i in - 2 : 2 ], label= " greedy with initialization" )
92- savefig (p, figpath ( " 2_6 " ) )
91+ savefig (p, " figure_2_6.png " )
9392 p
9493end
0 commit comments