Skip to content

Instantly share code, notes, and snippets.

@cool-RR
Created October 9, 2022 19:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cool-RR/7c20b021038b341bbf90a5d04ff8ebea to your computer and use it in GitHub Desktop.
Save cool-RR/7c20b021038b341bbf90a5d04ff8ebea to your computer and use it in GitHub Desktop.
Sample game:
[2 *31]
[* 102]
[** 4]
[** 3 ]
[* 0*]
[* * ]
[*4 * ]
[ 0* 1]
[ 32*0]
[*3 *]
[* 024]
[ 3* *]
[ *3* ]
[ *2 *]
[*0 41]
[ *03]
[3 2*0]
[0*3 4]
[ *0* ]
[ 43*0]
algorithm.iteration=1
results['episode_reward_mean']=336.32499999999993
Sample game:
[ 32*0]
[*2 01]
[* * 4]
[3 * *]
[1*0 3]
[ * *]
[ 403*]
[42 *1]
[21 *3]
[*240 ]
[ 3* ]
[ 4*]
[ 4*1 ]
[*2 * ]
[13402]
[**0 ]
[ 3* *]
[0 *21]
[04* 2]
[341* ]
algorithm.iteration=2
results['episode_reward_mean']=336.51249999999993
Sample game:
[41* 3]
[20341]
[*4 3]
[ 42*1]
[ 2* ]
[*3* ]
[2* 0 ]
[* * 0]
[ **1 ]
[ * *]
[*3 24]
[3 *1]
[0* 3 ]
[*1 4]
[40* ]
[ ** ]
[2 14*]
[ 1* *]
[4 21*]
[* 30 ]
algorithm.iteration=3
results['episode_reward_mean']=336.82999999999987
Sample game:
[* 10]
[2* 43]
[0 *41]
[* 3 ]
[4 * 2]
[*2 14]
[**1 ]
[20143]
[ 2 1*]
[ * *]
[0 3* ]
[*14 0]
[3*41 ]
[*03 1]
[ 3** ]
[* *3]
[ 2* ]
[ 0** ]
[2 *0]
[*2 * ]
algorithm.iteration=4
results['episode_reward_mean']=336.62999999999994
Sample game:
[12340]
[* *]
[2* * ]
[2* 43]
[ *41]
[4* 0 ]
[1 ** ]
[02* 4]
[42 *]
[** 4 ]
[* 4 0]
[* 3 *]
[*4 *]
[*031 ]
[4 02*]
[*012 ]
[ * *]
[2 * *]
[402 *]
[ 21*0]
algorithm.iteration=5
results['episode_reward_mean']=335.59999999999985
Sample game:
[23 4*]
[ **0]
[ *130]
[ * *]
[ 32* ]
[ * *2]
[2 4*]
[ 324*]
[0* 31]
[1 2*0]
[1 * ]
[2 * ]
[ * 4*]
[2 4*]
[ ** ]
[1*03 ]
[ 1 *3]
[2*34 ]
[20413]
[ 0*31]
algorithm.iteration=6
results['episode_reward_mean']=336.4099999999999
Sample game:
[32*4 ]
[*40 1]
[30 * ]
[3 *10]
[2 *0]
[41* 0]
[ 0 4*]
[2 41*]
[** 4 ]
[*032 ]
[ * 0]
[2 41*]
[30 *1]
[* *0]
[ 2**]
[**2 ]
[0 **]
[320* ]
[ * *]
[ 4 **]
algorithm.iteration=7
results['episode_reward_mean']=337.83999999999986
Sample game:
[ 42*]
[2 4 *]
[03241]
[* *1]
[ 0*1 ]
[2 34*]
[20413]
[23041]
[2* * ]
[4 2*0]
[ 10*3]
[21430]
[ * 0]
[203* ]
[*4 01]
[*0 * ]
[*3* ]
[2 4 *]
[*01 ]
[2*04 ]
algorithm.iteration=8
results['episode_reward_mean']=338.8599999999999
Sample game:
[4 0*1]
[*0 41]
[31 *0]
[1 *0]
[ 3**]
[ **3]
[ 0*2]
[* 4 *]
[ 4*]
[ *1]
[* *0]
[1 0* ]
[20 1*]
[20143]
[2 **]
[ 13* ]
[* *1 ]
[ **2 ]
[0 *43]
[2 *10]
algorithm.iteration=9
results['episode_reward_mean']=339.9399999999998
Sample game:
[41 2*]
[ *321]
[ 1** ]
[ 24 *]
[ * *3]
[*0 * ]
[*1 43]
[2* 30]
[1 *43]
[24310]
[ **1]
[2*31 ]
[ 1*]
[ ** ]
[** 0]
[ *1]
[*431 ]
[*1 43]
[2 4*]
[21043]
algorithm.iteration=10
results['episode_reward_mean']=340.8899999999999
Sample game:
[21 4*]
[ 2*1 ]
[*1 4 ]
[1 * 2]
[2* 0]
[ 4**]
[20341]
[* 4*]
[23 4*]
[* 4 *]
[ 234*]
[* 4 *]
[3 * 1]
[ 4*31]
[2 4*0]
[21* ]
[2*0 3]
[*14 3]
[ 1* ]
[ **1 ]
algorithm.iteration=11
results['episode_reward_mean']=343.5099999999999
Sample game:
[0 42*]
[*4 * ]
[310* ]
[24*1 ]
[ 4* *]
[2 3* ]
[ 4* *]
[24 3*]
[*4* ]
[ * *]
[ 23*0]
[3*01 ]
[ ** 0]
[ 12*0]
[20 *3]
[21*3 ]
[*130 ]
[2 * *]
[* 4* ]
[*10 3]
algorithm.iteration=12
results['episode_reward_mean']=345.73999999999984
Sample game:
[14*3 ]
[20 * ]
[2 04*]
[ *230]
[*1 40]
[* *]
[ 2 **]
[21430]
[ 102*]
[*430 ]
[ * *]
[* 04 ]
[01* ]
[2** ]
[0 *3]
[2 3* ]
[2 34*]
[* * 0]
[2 *10]
[2 4 *]
algorithm.iteration=13
results['episode_reward_mean']=346.8299999999999
Sample game:
[4*3 0]
[2 *10]
[* 43]
[2 ** ]
[ **0]
[2 *1 ]
[** 4 ]
[*43 0]
[21*4 ]
[ 3* ]
[1*30 ]
[2 01*]
[324 *]
[04*2 ]
[* 341]
[* 321]
[ 1*2 ]
[ **3]
[31 *0]
[*104 ]
algorithm.iteration=14
results['episode_reward_mean']=347.6999999999999
Sample game:
[ 1* 0]
[ 12 *]
[21340]
[21340]
[23 4*]
[* 3* ]
[ 1** ]
[ *4*]
[ * *]
[2* 43]
[2 *14]
[32 * ]
[ 4*31]
[2 41*]
[**3 ]
[21*4 ]
[ * 23]
[*14 3]
[ **]
[ *341]
algorithm.iteration=15
results['episode_reward_mean']=348.32999999999987
Sample game:
[* 40]
[2 3*0]
[* 0* ]
[21430]
[*104 ]
[01243]
[**3 ]
[ 43 *]
[2 31*]
[*1* ]
[12*4 ]
[* 340]
[ *2*]
[2*04 ]
[21* 0]
[1 *4 ]
[** 4 ]
[2 4 *]
[ 23*0]
[*140 ]
algorithm.iteration=16
results['episode_reward_mean']=352.99999999999983
Sample game:
[* 430]
[210* ]
[2 *41]
[ 1* 0]
[21* 0]
[* *1 ]
[2 0*3]
[21 *3]
[12 4*]
[** 4 ]
[* * 0]
[ 13*0]
[ ** 3]
[ ** 0]
[ 1 *0]
[*4 3 ]
[21*4 ]
[* 4 0]
[* 03]
[ **0 ]
algorithm.iteration=17
results['episode_reward_mean']=357.66999999999985
Sample game:
[2 *1 ]
[2 ** ]
[213* ]
[2** ]
[ 2* ]
[01*4 ]
[*1 34]
[* 340]
[2 3*0]
[21 *0]
[ ** ]
[* 1*]
[21*4 ]
[ 13* ]
[2 *]
[ 13*0]
[*1 * ]
[03421]
[* *4 ]
[ 1 **]
algorithm.iteration=18
results['episode_reward_mean']=359.78999999999985
Sample game:
[*43 0]
[ 13* ]
[ 1** ]
[ 1 2*]
[230* ]
[213 *]
[12 4*]
[2 * *]
[2*34 ]
[ 1* *]
[*43 0]
[2 4*0]
[*1 40]
[ *3 *]
[2* 40]
[* 3 *]
[* *3 ]
[21*0 ]
[21340]
[21340]
algorithm.iteration=19
results['episode_reward_mean']=361.6899999999998
Sample game:
[213 *]
[21430]
[21430]
[21* 0]
[ *340]
[2* 40]
[1 *2 ]
[2 41*]
[ **0 ]
[20413]
[ 234*]
[ 1* *]
[ 1* 0]
[21340]
[213 *]
[21* 4]
[2*3 ]
[* * 0]
[21*0 ]
[2 *10]
algorithm.iteration=20
results['episode_reward_mean']=362.4499999999999
Sample game:
[31240]
[21340]
[ 3*0]
[21 *0]
[ 1*2 ]
[* *4 ]
[2 3*1]
[2*34 ]
[0 3*2]
[2* 0]
[ ** 4]
[* 403]
[21*4 ]
[213 *]
[2*4 0]
[* * ]
[ 1** ]
[ **0 ]
[3 01*]
[ 2*1 ]
algorithm.iteration=21
results['episode_reward_mean']=363.61999999999983
Sample game:
[ ** ]
[* *4 ]
[ *34 ]
[1 ** ]
[* 4 *]
[21* ]
[21* ]
[213* ]
[1 3*4]
[2*03 ]
[21* ]
[ 134*]
[* 4 3]
[ ** ]
[*13 0]
[21* 0]
[21* 4]
[21340]
[* 304]
[210 *]
algorithm.iteration=22
results['episode_reward_mean']=364.79999999999984
Sample game:
[ *3 *]
[21*0 ]
[21340]
[210 *]
[21*0 ]
[2 *13]
[2*30 ]
[*1* ]
[214 *]
[214* ]
[21* 4]
[21340]
[2*30 ]
[21*0 ]
[21 *0]
[213* ]
[*1* ]
[013* ]
[**3 ]
[2 **]
algorithm.iteration=23
results['episode_reward_mean']=367.92999999999984
Sample game:
[21* 4]
[2 34*]
[2*3 0]
[*1* ]
[2* 30]
[213 *]
[*431 ]
[21* 4]
[ ** ]
[2 3*0]
[2 3* ]
[21430]
[21* 3]
[2*04 ]
[21* ]
[21* 0]
[214 *]
[*13 4]
[21403]
[21340]
algorithm.iteration=24
results['episode_reward_mean']=370.9499999999999
Sample game:
[2 ** ]
[213* ]
[*4* ]
[21*3 ]
[21*4 ]
[2 3* ]
[ ** ]
[21 4*]
[21*4 ]
[24301]
[21 *0]
[2* 03]
[2*03 ]
[213* ]
[21* 4]
[213* ]
[ *4 3]
[2*30 ]
[2** ]
[213* ]
algorithm.iteration=25
results['episode_reward_mean']=374.29999999999984
Sample game:
[21* 3]
[2 **]
[2*0 3]
[21034]
[2* *]
[21* 3]
[014* ]
[21* 4]
[2* *]
[2 * *]
[210 *]
[24 *0]
[214 *]
[21430]
[21043]
[ 14*2]
[213 *]
[1 *24]
[2*04 ]
[2** ]
algorithm.iteration=26
results['episode_reward_mean']=376.03999999999985
Sample game:
[2* *]
[2 *41]
[21 *]
[21* 4]
[*1 34]
[210* ]
[21430]
[*1 * ]
[210* ]
[21 *0]
[2 ** ]
[2*0 3]
[2*4 3]
[214 *]
[21*3 ]
[2 4*1]
[2 40*]
[2*30 ]
[2 **]
[ ** 3]
algorithm.iteration=27
results['episode_reward_mean']=377.1299999999999
Sample game:
[21 0*]
[2 *0]
[ *423]
[ * 03]
[ 1** ]
[2*3 0]
[21430]
[21304]
[2** ]
[2** ]
[ 1** ]
[21* ]
[21340]
[213 *]
[21 0*]
[21* 0]
[2* 30]
[210* ]
[2 0*3]
[213* ]
algorithm.iteration=28
results['episode_reward_mean']=377.74999999999983
Sample game:
[2 * *]
[2 * *]
[2 **]
[214 *]
[2 * *]
[21 3*]
[21* 0]
[* 0*]
[210 *]
[2*30 ]
[214* ]
[21403]
[21340]
[21340]
[21 *4]
[24 0*]
[21043]
[21403]
[2*0 3]
[21 4*]
algorithm.iteration=29
results['episode_reward_mean']=378.67999999999984
Sample game:
[2*43 ]
[ *403]
[*1* ]
[2* 03]
[21* 3]
[2 3*]
[21 4*]
[21403]
[*0 43]
[214 *]
[**3 ]
[21043]
[2* 03]
[*13 4]
[2* * ]
[2*03 ]
[2* 30]
[ * 30]
[21*4 ]
[24 *3]
algorithm.iteration=30
results['episode_reward_mean']=379.40999999999985
Sample game:
[21403]
[ ** ]
[2* * ]
[21 4*]
[21403]
[210 *]
[2 * *]
[21* 3]
[214 *]
[2** ]
[2*0 3]
[21* 0]
[21 0*]
[21 3*]
[21* 0]
[2*30 ]
[2 *31]
[2* 30]
[2 3* ]
[210* ]
algorithm.iteration=31
results['episode_reward_mean']=380.86999999999983
Sample game:
[ * *0]
[21430]
[21*3 ]
[24 *]
[21* 3]
[2*40 ]
[2 0*1]
[*40 3]
[210* ]
[21* 4]
[21* 3]
[ *403]
[ 1* 3]
[21* 4]
[21*3 ]
[ *03 ]
[21403]
[2*0 3]
[21034]
[21430]
algorithm.iteration=32
results['episode_reward_mean']=384.64999999999986
Sample game:
[21304]
[2* 43]
[214 *]
[21043]
[2 *13]
[ **3 ]
[2*0 3]
[2*0 3]
[*1* ]
[21 0*]
[ * * ]
[ 3*12]
[214 *]
[21* 4]
[*134 ]
[2 4 *]
[21* ]
[2* *]
[0* *]
[210 *]
algorithm.iteration=33
results['episode_reward_mean']=388.61999999999995
Sample game:
[2* 03]
[210* ]
[2*03 ]
[21043]
[210* ]
[21043]
[21034]
[24 0*]
[*14 3]
[2*0 3]
[21 *3]
[2* 30]
[2* *]
[2* 03]
[21403]
[2* 03]
[2* 03]
[21403]
[21430]
[21* 3]
algorithm.iteration=34
results['episode_reward_mean']=390.70999999999987
Sample game:
[214 *]
[2* 03]
[21403]
[*40 3]
[2*0 3]
[210 *]
[214 *]
[21* ]
[21 *3]
[21403]
[21403]
[21403]
[21* 3]
[2*03 ]
[21* 3]
[2*43 ]
[210 *]
[21 4*]
[2** ]
[21043]
algorithm.iteration=35
results['episode_reward_mean']=392.21
Sample game:
[ * 3]
[21*4 ]
[21 *]
[21403]
[21340]
[2* 03]
[21*4 ]
[21043]
[*1 43]
[210 *]
[2*0 3]
[21* 3]
[*10 3]
[213* ]
[** 3]
[21* 3]
[21*4 ]
[21*4 ]
[*1 43]
[21 *3]
algorithm.iteration=36
results['episode_reward_mean']=392.11999999999983
Sample game:
[21* 3]
[2*03 ]
[2* *]
[2* 03]
[21 *3]
[21 4*]
[21403]
[*1 43]
[2** ]
[2* 03]
[21*3 ]
[ * * ]
[2* 03]
[** 3]
[21043]
[21 4*]
[21 *0]
[2* *]
[2* *]
[2* 03]
algorithm.iteration=37
results['episode_reward_mean']=393.73999999999984
Sample game:
[2** ]
[21 *3]
[21043]
[21043]
[ *2 3]
[2* 03]
[21 *3]
[21043]
[2* *]
[21 *3]
[2*43 ]
[21* 3]
[2*30 ]
[213* ]
[*40 3]
[21043]
[21 *3]
[210 *]
[21 *3]
[2*0 3]
algorithm.iteration=38
results['episode_reward_mean']=399.2199999999999
Sample game:
[2* 03]
[2*0 3]
[2* 03]
[21043]
[21 *3]
[2* 03]
[21403]
[2*0 3]
[2* *]
[2* 03]
[21403]
[21 0*]
[21403]
[*1 43]
[2*0 3]
[21 0*]
[2*0 3]
[21403]
[2 *3]
[21403]
algorithm.iteration=39
results['episode_reward_mean']=405.38
Sample game:
[21 *3]
[21 4*]
[214 *]
[2*0 3]
[21403]
[21043]
[21 4*]
[2* 03]
[** 3]
[2*4 3]
[21043]
[21* 3]
[21* 3]
[ * *3]
[21 *3]
[214* ]
[21* 3]
[213* ]
[2* 3]
[21 *3]
algorithm.iteration=40
results['episode_reward_mean']=409.36999999999995
Sample game:
[21 *3]
[21430]
[21043]
[21* 3]
[21403]
[2* 03]
[21043]
[2*0 3]
[2*0 3]
[2*0 3]
[210 *]
[21043]
[21* 3]
[21043]
[21403]
[2*0 3]
[21* 3]
[21* 3]
[214 *]
[2* 3 ]
algorithm.iteration=41
results['episode_reward_mean']=413.15
Sample game:
[21 *3]
[ ** 3]
[21 * ]
[214 *]
[21* 3]
[2*0 3]
[2* 03]
[2*4 3]
[214 *]
[ * 4*]
[21403]
[21043]
[2* 03]
[21*4 ]
[21043]
[21 4*]
[21043]
[21* 3]
[2* 43]
[21403]
algorithm.iteration=42
results['episode_reward_mean']=416.06999999999994
Sample game:
[21403]
[21*0 ]
[21* 3]
[21043]
[2* *]
[2*0 3]
[2 *3]
[2* 03]
[214* ]
[210 *]
[2*03 ]
[21 *3]
[2* *]
[21* 3]
[21* 3]
[210* ]
[214* ]
[*1 43]
[21043]
[21* 3]
algorithm.iteration=43
results['episode_reward_mean']=420.25
Sample game:
[21 *3]
[21*3 ]
[21 *3]
[21403]
[21* 3]
[21 *3]
[21* 3]
[21 *3]
[2 0*3]
[21043]
[2*0 3]
[21403]
[21* 3]
[21 4*]
[* * ]
[21 *3]
[21043]
[214 *]
[2 0*3]
[21* 3]
algorithm.iteration=44
results['episode_reward_mean']=424.56
Sample game:
[210 *]
[2*4 3]
[2*0 3]
[21403]
[21* 3]
[21043]
[21043]
[21* 3]
[210 *]
[21043]
[21 *3]
[21043]
[21043]
[21043]
[21 *3]
[21* 3]
[2*0 3]
[21043]
[2*0 3]
[21043]
algorithm.iteration=45
results['episode_reward_mean']=427.78
Sample game:
[21043]
[21403]
[21* 3]
[2*0 3]
[2*0 3]
[21043]
[2*0 3]
[21043]
[2*0 3]
[2* 43]
[21 *3]
[21043]
[21* 3]
[21043]
[21043]
[21* 3]
[21 *3]
[21043]
[21 *3]
[21403]
algorithm.iteration=46
results['episode_reward_mean']=432.58
Sample game:
[21403]
[21* 3]
[21* 3]
[21*4 ]
[21* 3]
[21 4*]
[2*4 3]
[2 *13]
[21043]
[21043]
[24013]
[2 0*3]
[21* 3]
[*1 03]
[21403]
[21 *3]
[21* 3]
[21 *3]
[21043]
[21* 3]
algorithm.iteration=47
results['episode_reward_mean']=435.24
Sample game:
[21* 3]
[21* 3]
[21 *0]
[2 04*]
[21 *3]
[21*0 ]
[2*0 3]
[21043]
[21043]
[21043]
[21 4*]
[210 *]
[21043]
[*140 ]
[ 14*3]
[21 *3]
[2 *3]
[21* 3]
[21043]
[21* 3]
algorithm.iteration=48
results['episode_reward_mean']=437.29
Sample game:
[21043]
[21403]
[21043]
[210 *]
[21 *3]
[210 *]
[21043]
[21043]
[21 *3]
[21* 3]
[21 *3]
[21043]
[21 *3]
[21043]
[21 *3]
[21 *3]
[21 *3]
[21043]
[21* 3]
[21 *3]
algorithm.iteration=49
results['episode_reward_mean']=440.24
Sample game:
[210 *]
[21 0*]
[ 10*3]
[21043]
[21043]
[21043]
[21043]
[21043]
[21043]
[21043]
[21043]
[21 *3]
[21043]
[21* 3]
[21043]
[21 *3]
[ 10*3]
[21 *3]
[21 *3]
[21043]
algorithm.iteration=50
results['episode_reward_mean']=443.94
Sample game:
[21043]
[21043]
[21 4*]
[21043]
[21 *4]
[21043]
[2 **]
[21403]
[21043]
[21043]
[21043]
[21 *3]
[21043]
[21043]
[21 *3]
[21*0 ]
[21 *3]
[21403]
[21043]
[21 *3]
algorithm.iteration=51
results['episode_reward_mean']=444.07
Sample game:
[21 4*]
[21043]
[2* 43]
[21403]
[21043]
[*1 43]
[2*0 3]
[21043]
[21 *3]
[21043]
[210 *]
[214* ]
[21* 3]
[21 *3]
[21043]
[21 *3]
[21043]
[21* 3]
[21 *3]
[21 *3]
algorithm.iteration=52
results['episode_reward_mean']=440.01
Sample game:
[21043]
[21 *3]
[21 *3]
[21 *3]
[21043]
[21* 3]
[21 *3]
[21043]
[21403]
[21 * ]
[21043]
[2* * ]
[21 4*]
[21403]
[21 *3]
[21 *3]
[21 *3]
[21403]
[21043]
[21403]
algorithm.iteration=53
results['episode_reward_mean']=436.64
Sample game:
[21* 3]
[21 *3]
[21043]
[21043]
[21 *3]
[210* ]
[21043]
[21 *3]
[21 *3]
[21043]
[21043]
[21* 3]
[21043]
[21 4*]
[21 *3]
[21403]
[21 0*]
[21 *3]
[21403]
[21043]
algorithm.iteration=54
results['episode_reward_mean']=436.33
Sample game:
[21043]
[21403]
[21403]
[210 *]
[21 *3]
[2*0 3]
[21043]
[21* 3]
[21403]
[21043]
[21403]
[21* 3]
[21 *3]
[21* 3]
[21043]
[21 *3]
[21043]
[21043]
[21* 3]
[21 *3]
algorithm.iteration=55
results['episode_reward_mean']=440.05
Sample game:
[214 *]
[21* 3]
[21 *3]
[21403]
[21403]
[21* 3]
[*1 43]
[21043]
[21 *3]
[21043]
[21043]
[21043]
[*1 * ]
[21* 3]
[21043]
[21 *3]
[21403]
[21 *3]
[21* 3]
[21403]
algorithm.iteration=56
results['episode_reward_mean']=442.58
Sample game:
[21* 3]
[21403]
[ 1* *]
[21043]
[21403]
[21043]
[21043]
[21* 3]
[21403]
[21043]
[21* 3]
[21*4 ]
[21* 3]
[21 *3]
[21043]
[21 *3]
[21403]
[21* 3]
[21 *3]
[21043]
algorithm.iteration=57
results['episode_reward_mean']=441.76
Sample game:
[21* 3]
[21403]
[21403]
[21403]
[21* 3]
[21* 3]
[21403]
[21043]
[21 *3]
[21 *3]
[*14 3]
[21* 3]
[21* 3]
[21* 3]
[21* 3]
[21* 3]
[21* 3]
[21403]
[21403]
[21043]
algorithm.iteration=58
results['episode_reward_mean']=438.74
Sample game:
[*14 3]
[21403]
[2* 03]
[21403]
[21403]
[21 *3]
[21 *3]
[21403]
[21043]
[21* 3]
[21* 3]
[*1 43]
[21* 3]
[21403]
[21403]
[21403]
[21403]
[*14 3]
[21 *3]
[21* 3]
algorithm.iteration=59
results['episode_reward_mean']=438.77
Sample game:
[21043]
[21* 3]
[21043]
[21* 3]
[21* 3]
[*14 3]
[21043]
[21* 3]
[21* 3]
[21* 3]
[21043]
[2 * *]
[21* 3]
[21* 3]
[21403]
[21* 3]
[21403]
[21*3 ]
[2 *13]
[21043]
algorithm.iteration=60
results['episode_reward_mean']=436.94
Sample game:
[21403]
[21043]
[21* 3]
[2*0 3]
[21* 3]
[21* 3]
[21* 3]
[21* 3]
[21* 3]
[21403]
[21403]
[21* 3]
[21043]
[21* 3]
[21403]
[21* 3]
[21403]
[21* 3]
[21403]
[21* 3]
algorithm.iteration=61
results['episode_reward_mean']=434.97
Sample game:
[21403]
[21* 3]
[21043]
[*14 3]
[*1 43]
[21403]
[21* 3]
[21403]
[*14 3]
[21* 3]
[21* 3]
[21* 3]
[21043]
[21* 3]
[21403]
[21* 3]
[21*3 ]
[21* 3]
[21* 3]
[21* 3]
algorithm.iteration=62
results['episode_reward_mean']=432.89
Sample game:
[21043]
[21* 3]
[21043]
[21* 3]
[21* 3]
[21* 3]
[21403]
[21403]
[21* 3]
[21403]
[214 *]
[21* 3]
[21403]
[21043]
[210* ]
[21* 3]
[21403]
[21 *3]
[21403]
[21*3 ]
algorithm.iteration=63
results['episode_reward_mean']=433.08
Sample game:
[21403]
[21* 3]
[21* 3]
[21 *3]
[01* 3]
[21* 3]
[21* 3]
[21* 3]
[21* 3]
[21403]
[21* 3]
[21043]
[21* 3]
[21043]
[21* 3]
[21* 3]
[21* 3]
[21403]
[21403]
[21* 3]
algorithm.iteration=64
results['episode_reward_mean']=435.18
Sample game:
[21* 3]
[21* 3]
[21* 3]
[21* 3]
[21043]
[21 *3]
[21 *3]
[21* 3]
[21* 3]
[21* 3]
[21* 3]
[21403]
[21403]
[21 *3]
[21* 3]
[21403]
[21* 3]
[2* 03]
[21* 3]
[21403]
algorithm.iteration=65
results['episode_reward_mean']=435.39
Sample game:
[21 *3]
[*1* ]
[21403]
[21403]
[21* 3]
[21 4*]
[21 *3]
[21* 3]
[21403]
[21* 3]
[21043]
[21* 3]
[21* 3]
[21* 3]
[214 *]
[21* 3]
[21043]
[21043]
[21 *3]
[214 *]
algorithm.iteration=66
results['episode_reward_mean']=434.64
Sample game:
[21 *3]
[21* 3]
[21* 3]
[21* 3]
[21 4*]
[21403]
[21* 3]
[21* 3]
[21*3 ]
[*1 43]
[21* 3]
[21 4*]
[21* 3]
[21403]
[21043]
[21403]
[21403]
[21* 3]
[21* 3]
[21* 3]
algorithm.iteration=67
results['episode_reward_mean']=435.41
Sample game:
[21 *3]
[* * 3]
[21* 3]
[21* 3]
[214 *]
[21403]
[21* 3]
[*14 3]
[21403]
[21* 3]
[21 4*]
[214 *]
[21403]
[21403]
[21403]
[*143 ]
[2 * *]
[21403]
[21* 3]
[21* 3]
algorithm.iteration=68
results['episode_reward_mean']=433.94
Sample game:
[21403]
[21 *3]
[2* 03]
[21* 3]
[21403]
[21403]
[21 *3]
[21 * ]
[21* 3]
[21 *3]
[21* 3]
[21* 3]
[21* 3]
[21403]
[21* 3]
[21* 3]
[21403]
[214 *]
[21403]
[21* 3]
algorithm.iteration=69
results['episode_reward_mean']=432.88
Sample game:
[21043]
[21403]
[21* 3]
[21* 3]
[21043]
[21043]
[21* 3]
[214 *]
[21403]
[21403]
[21* 3]
[21* 3]
[*14 3]
[21403]
[21* 3]
[21* 3]
[21* 3]
[*14 3]
[21* 3]
[21* 3]
algorithm.iteration=70
results['episode_reward_mean']=433.1
Sample game:
[21*3 ]
[21* 3]
[21403]
[21* 3]
[21403]
[21403]
[21* 3]
[21* 3]
[21* 3]
[214 *]
[21* 3]
[21* 3]
[21* 3]
[21* 3]
[21403]
[21 *3]
[21* 3]
[21403]
[21403]
[214 *]
algorithm.iteration=71
results['episode_reward_mean']=434.37
Sample game:
[21* 3]
[21* 3]
[21* 3]
[21 *3]
[21* 3]
[21* 3]
[21* 3]
[21* 3]
[21403]
[21403]
[21043]
[21043]
[21* 3]
[214 *]
[21* 3]
[21* 3]
[21* 3]
[21* 3]
[21043]
[21* 3]
algorithm.iteration=72
results['episode_reward_mean']=433.8
Sample game:
[21* 3]
[21* 3]
[21403]
[21* 3]
[21403]
[*14 3]
[21403]
[21* 3]
[21403]
[21043]
[21* 3]
[21 *3]
[21* 3]
[21 *3]
[21* 3]
[21403]
[21043]
[21* 3]
[21* 3]
[21043]
algorithm.iteration=73
results['episode_reward_mean']=434.27
Sample game:
[214 *]
[21* 3]
[21* 3]
[21* 3]
[21* 3]
[21* 3]
[21* 3]
[214 *]
[21* 3]
[21* 3]
[21403]
[21* 3]
[21* 3]
[21* 3]
[21* 3]
[21403]
[21403]
[21* 3]
[21403]
[21* 3]
algorithm.iteration=74
results['episode_reward_mean']=433.89
Sample game:
[21403]
[21* 3]
[21403]
[21403]
[21403]
[21* 3]
[21 * ]
[21 *3]
[21403]
[21403]
[21* 3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21 *3]
[21* 3]
[21403]
algorithm.iteration=75
results['episode_reward_mean']=436.87
Sample game:
[21403]
[21* 3]
[21 *3]
[21* 3]
[21 *3]
[21* 3]
[21* 3]
[21* 3]
[21043]
[21* 3]
[21 *3]
[21* 3]
[214 *]
[21403]
[21* 3]
[21403]
[21043]
[21* 3]
[21403]
[21403]
algorithm.iteration=76
results['episode_reward_mean']=441.01
Sample game:
[21043]
[21* 3]
[21403]
[21* 3]
[21403]
[21 *3]
[21* 3]
[21403]
[2*40 ]
[*1 43]
[21043]
[21* 3]
[21403]
[21* 3]
[21403]
[21403]
[21 *3]
[21403]
[21* 3]
[21* 3]
algorithm.iteration=77
results['episode_reward_mean']=443.61
Sample game:
[21043]
[21403]
[21 *3]
[21* 3]
[21403]
[21 *3]
[21* 3]
[21* 3]
[21403]
[21043]
[21403]
[21* 3]
[21403]
[21 *3]
[21* 3]
[21403]
[21* 3]
[21 4*]
[21 *3]
[21403]
algorithm.iteration=78
results['episode_reward_mean']=446.27
Sample game:
[21* 3]
[21403]
[21403]
[21043]
[21403]
[21043]
[*1 43]
[21403]
[21043]
[21403]
[21403]
[21 *3]
[21403]
[21043]
[21 *3]
[21043]
[21* 3]
[21* 3]
[21043]
[21* 3]
algorithm.iteration=79
results['episode_reward_mean']=447.28
Sample game:
[21403]
[21* 3]
[21* 3]
[21* 3]
[*1 43]
[21403]
[21403]
[21 *3]
[21403]
[21* 3]
[21403]
[21 *3]
[21 *3]
[21* 3]
[21403]
[21 *3]
[21403]
[21403]
[21 *3]
[21* 3]
algorithm.iteration=80
results['episode_reward_mean']=450.11
Sample game:
[21403]
[21 *3]
[21403]
[21* 3]
[21403]
[21043]
[21403]
[21403]
[21 *3]
[21* 3]
[21403]
[21 *3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21* 3]
[21* 3]
[21403]
algorithm.iteration=81
results['episode_reward_mean']=455.71
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21* 3]
[21403]
[21403]
[21403]
[21* 3]
[21403]
[21403]
[21 *3]
[21403]
[21403]
[21403]
[21* 3]
algorithm.iteration=82
results['episode_reward_mean']=460.72
Sample game:
[21043]
[21403]
[21403]
[21403]
[21403]
[21* 3]
[21403]
[21043]
[21 *3]
[21403]
[21 *3]
[21 *3]
[21403]
[21 *3]
[21403]
[21* 3]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=83
results['episode_reward_mean']=462.57
Sample game:
[21043]
[214 *]
[21403]
[21 *3]
[21403]
[21403]
[21403]
[21* 3]
[21403]
[21403]
[21403]
[21403]
[21 *3]
[21403]
[21 *3]
[21403]
[21403]
[21 *3]
[21403]
[21403]
algorithm.iteration=84
results['episode_reward_mean']=465.36
Sample game:
[21403]
[21* 3]
[21403]
[21 *3]
[21403]
[21403]
[21403]
[21 *3]
[21403]
[21403]
[21403]
[21* 3]
[21403]
[21 *3]
[21 *3]
[21403]
[21403]
[21403]
[21403]
[21 *3]
algorithm.iteration=85
results['episode_reward_mean']=466.89
Sample game:
[21403]
[21* 3]
[21* 3]
[21* 3]
[21 *3]
[21403]
[21 4*]
[21403]
[21* 3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21043]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=86
results['episode_reward_mean']=468.65
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21 *3]
[21403]
[21403]
[21 *3]
[21 *3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21 *3]
[21403]
algorithm.iteration=87
results['episode_reward_mean']=471.14
Sample game:
[21403]
[21403]
[21403]
[21403]
[ *403]
[21403]
[21403]
[21403]
[ 1 4*]
[21403]
[21403]
[21403]
[21 *3]
[21* 3]
[21403]
[21403]
[21403]
[21403]
[*1 43]
[21403]
algorithm.iteration=88
results['episode_reward_mean']=474.55
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[ 140*]
[21403]
[21*0 ]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=89
results['episode_reward_mean']=479.62
Sample game:
[21403]
[21403]
[21403]
[*14 3]
[21403]
[21403]
[21403]
[21* 3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=90
results['episode_reward_mean']=483.84
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21* 3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=91
results['episode_reward_mean']=485.54
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[*14 3]
[21403]
[21403]
[21403]
[*14 3]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=92
results['episode_reward_mean']=486.44
Sample game:
[21403]
[21403]
[ *403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21*0 ]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=93
results['episode_reward_mean']=487.69
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21* 3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=94
results['episode_reward_mean']=488.91
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=95
results['episode_reward_mean']=490.72
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[2 40*]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=96
results['episode_reward_mean']=491.7
Sample game:
[21* 3]
[21403]
[21403]
[21403]
[21403]
[21403]
[ *403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=97
results['episode_reward_mean']=490.9
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21* 3]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=98
results['episode_reward_mean']=490.63
Sample game:
[21* 3]
[21403]
[21403]
[ *403]
[21403]
[21403]
[21403]
[21403]
[ *403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[214 *]
[21403]
algorithm.iteration=99
results['episode_reward_mean']=490.55
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21 *3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=100
results['episode_reward_mean']=491.05
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=101
results['episode_reward_mean']=491.38
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[ *403]
[21403]
[21403]
algorithm.iteration=102
results['episode_reward_mean']=490.61
Sample game:
[21403]
[21 *3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21* 3]
algorithm.iteration=103
results['episode_reward_mean']=490.8
Sample game:
[21403]
[214 *]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21* 3]
[21403]
[21* 3]
algorithm.iteration=104
results['episode_reward_mean']=492.24
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[2 4*3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=105
results['episode_reward_mean']=493.77
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=106
results['episode_reward_mean']=494.84
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21* 3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=107
results['episode_reward_mean']=495.26
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=108
results['episode_reward_mean']=495.86
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=109
results['episode_reward_mean']=496.39
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=110
results['episode_reward_mean']=496.91
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=111
results['episode_reward_mean']=496.95
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=112
results['episode_reward_mean']=496.86
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=113
results['episode_reward_mean']=496.88
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[214 *]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=114
results['episode_reward_mean']=496.81
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=115
results['episode_reward_mean']=496.9
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=116
results['episode_reward_mean']=496.69
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=117
results['episode_reward_mean']=496.92
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=118
results['episode_reward_mean']=497.22
Sample game:
[21403]
[21403]
[21403]
[2 4*3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=119
results['episode_reward_mean']=497.86
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=120
results['episode_reward_mean']=498.07
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=121
results['episode_reward_mean']=498.35
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21* 3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=122
results['episode_reward_mean']=498.17
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=123
results['episode_reward_mean']=497.8
Sample game:
[ 140*]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=124
results['episode_reward_mean']=498.05
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=125
results['episode_reward_mean']=498.06
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=126
results['episode_reward_mean']=498.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21* 3]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=127
results['episode_reward_mean']=498.0
Sample game:
[21403]
[ 140*]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[ 14*3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=128
results['episode_reward_mean']=497.93
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21* 3]
[21* 3]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=129
results['episode_reward_mean']=497.99
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21* 3]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=130
results['episode_reward_mean']=498.09
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=131
results['episode_reward_mean']=498.37
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[ *403]
algorithm.iteration=132
results['episode_reward_mean']=498.61
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=133
results['episode_reward_mean']=498.56
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=134
results['episode_reward_mean']=498.48
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=135
results['episode_reward_mean']=498.44
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=136
results['episode_reward_mean']=498.4
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=137
results['episode_reward_mean']=498.7
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=138
results['episode_reward_mean']=498.44
Sample game:
[21403]
[21403]
[ 140*]
[21403]
[21403]
[ 140*]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=139
results['episode_reward_mean']=498.34
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=140
results['episode_reward_mean']=498.28
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=141
results['episode_reward_mean']=498.52
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=142
results['episode_reward_mean']=498.77
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[ 140*]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=143
results['episode_reward_mean']=498.79
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=144
results['episode_reward_mean']=498.81
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[*14 3]
[21403]
algorithm.iteration=145
results['episode_reward_mean']=498.82
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=146
results['episode_reward_mean']=498.98
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=147
results['episode_reward_mean']=499.04
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=148
results['episode_reward_mean']=499.06
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=149
results['episode_reward_mean']=499.1
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=150
results['episode_reward_mean']=499.26
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=151
results['episode_reward_mean']=499.41
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=152
results['episode_reward_mean']=499.49
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=153
results['episode_reward_mean']=499.42
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=154
results['episode_reward_mean']=499.33
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=155
results['episode_reward_mean']=499.3
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[ *403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=156
results['episode_reward_mean']=499.28
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=157
results['episode_reward_mean']=499.33
Sample game:
[21403]
[21403]
[21403]
[21403]
[21 *3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=158
results['episode_reward_mean']=499.45
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[ *403]
[21403]
[21403]
[21403]
[21403]
[21 *3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=159
results['episode_reward_mean']=499.4
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=160
results['episode_reward_mean']=499.36
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[ 140*]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=161
results['episode_reward_mean']=499.36
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=162
results['episode_reward_mean']=499.31
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=163
results['episode_reward_mean']=499.34
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=164
results['episode_reward_mean']=499.35
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=165
results['episode_reward_mean']=499.25
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=166
results['episode_reward_mean']=499.19
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21 *3]
[21403]
algorithm.iteration=167
results['episode_reward_mean']=499.2
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=168
results['episode_reward_mean']=499.32
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=169
results['episode_reward_mean']=499.3
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=170
results['episode_reward_mean']=499.36
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=171
results['episode_reward_mean']=499.28
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=172
results['episode_reward_mean']=499.24
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=173
results['episode_reward_mean']=499.19
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=174
results['episode_reward_mean']=499.25
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21 *3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=175
results['episode_reward_mean']=499.42
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=176
results['episode_reward_mean']=499.45
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=177
results['episode_reward_mean']=499.46
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=178
results['episode_reward_mean']=499.32
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=179
results['episode_reward_mean']=499.26
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=180
results['episode_reward_mean']=499.38
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=181
results['episode_reward_mean']=499.46
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=182
results['episode_reward_mean']=499.39
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=183
results['episode_reward_mean']=499.21
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=184
results['episode_reward_mean']=499.19
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=185
results['episode_reward_mean']=499.16
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=186
results['episode_reward_mean']=499.22
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=187
results['episode_reward_mean']=499.39
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=188
results['episode_reward_mean']=499.54
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21 *3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=189
results['episode_reward_mean']=499.41
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=190
results['episode_reward_mean']=499.14
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=191
results['episode_reward_mean']=499.04
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=192
results['episode_reward_mean']=499.1
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21 *3]
[21403]
algorithm.iteration=193
results['episode_reward_mean']=499.31
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=194
results['episode_reward_mean']=499.34
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=195
results['episode_reward_mean']=499.37
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=196
results['episode_reward_mean']=499.37
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=197
results['episode_reward_mean']=499.39
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=198
results['episode_reward_mean']=499.57
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=199
results['episode_reward_mean']=499.55
Sample game:
[21403]
[21403]
[21403]
[21* 3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=200
results['episode_reward_mean']=499.56
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=201
results['episode_reward_mean']=499.54
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=202
results['episode_reward_mean']=499.51
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=203
results['episode_reward_mean']=499.44
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=204
results['episode_reward_mean']=499.49
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=205
results['episode_reward_mean']=499.68
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=206
results['episode_reward_mean']=499.63
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=207
results['episode_reward_mean']=499.54
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=208
results['episode_reward_mean']=499.48
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=209
results['episode_reward_mean']=499.61
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=210
results['episode_reward_mean']=499.63
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=211
results['episode_reward_mean']=499.53
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=212
results['episode_reward_mean']=499.49
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=213
results['episode_reward_mean']=499.57
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=214
results['episode_reward_mean']=499.52
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=215
results['episode_reward_mean']=499.43
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[214* ]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=216
results['episode_reward_mean']=499.39
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=217
results['episode_reward_mean']=499.35
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=218
results['episode_reward_mean']=499.18
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21 *3]
[21403]
[21403]
[21403]
algorithm.iteration=219
results['episode_reward_mean']=498.98
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=220
results['episode_reward_mean']=498.87
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=221
results['episode_reward_mean']=499.07
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=222
results['episode_reward_mean']=499.29
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=223
results['episode_reward_mean']=499.46
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21* 3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=224
results['episode_reward_mean']=499.56
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=225
results['episode_reward_mean']=499.46
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=226
results['episode_reward_mean']=499.41
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=227
results['episode_reward_mean']=499.4
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=228
results['episode_reward_mean']=499.44
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=229
results['episode_reward_mean']=499.4
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=230
results['episode_reward_mean']=499.48
Sample game:
[21403]
[21403]
[21403]
[21403]
[21 *3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=231
results['episode_reward_mean']=499.49
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=232
results['episode_reward_mean']=499.59
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=233
results['episode_reward_mean']=499.54
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=234
results['episode_reward_mean']=499.5
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=235
results['episode_reward_mean']=499.4
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=236
results['episode_reward_mean']=499.4
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=237
results['episode_reward_mean']=499.45
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=238
results['episode_reward_mean']=499.43
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=239
results['episode_reward_mean']=499.34
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=240
results['episode_reward_mean']=499.26
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=241
results['episode_reward_mean']=499.28
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=242
results['episode_reward_mean']=499.38
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=243
results['episode_reward_mean']=499.58
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=244
results['episode_reward_mean']=499.54
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=245
results['episode_reward_mean']=499.42
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=246
results['episode_reward_mean']=499.34
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=247
results['episode_reward_mean']=499.46
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=248
results['episode_reward_mean']=499.43
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=249
results['episode_reward_mean']=499.47
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=250
results['episode_reward_mean']=499.39
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=251
results['episode_reward_mean']=499.42
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=252
results['episode_reward_mean']=499.55
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=253
results['episode_reward_mean']=499.62
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=254
results['episode_reward_mean']=499.68
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=255
results['episode_reward_mean']=499.52
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=256
results['episode_reward_mean']=499.47
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[214* ]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=257
results['episode_reward_mean']=499.42
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=258
results['episode_reward_mean']=499.41
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=259
results['episode_reward_mean']=499.51
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=260
results['episode_reward_mean']=499.43
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=261
results['episode_reward_mean']=499.44
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=262
results['episode_reward_mean']=499.16
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=263
results['episode_reward_mean']=499.25
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=264
results['episode_reward_mean']=499.41
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=265
results['episode_reward_mean']=499.51
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=266
results['episode_reward_mean']=499.45
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=267
results['episode_reward_mean']=499.49
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=268
results['episode_reward_mean']=499.56
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=269
results['episode_reward_mean']=499.62
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=270
results['episode_reward_mean']=499.54
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=271
results['episode_reward_mean']=499.59
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=272
results['episode_reward_mean']=499.73
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=273
results['episode_reward_mean']=499.82
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=274
results['episode_reward_mean']=499.82
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=275
results['episode_reward_mean']=499.86
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=276
results['episode_reward_mean']=499.92
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=277
results['episode_reward_mean']=499.94
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=278
results['episode_reward_mean']=499.91
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=279
results['episode_reward_mean']=499.87
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=280
results['episode_reward_mean']=499.89
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=281
results['episode_reward_mean']=499.85
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=282
results['episode_reward_mean']=499.87
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=283
results['episode_reward_mean']=499.91
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=284
results['episode_reward_mean']=499.89
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=285
results['episode_reward_mean']=499.91
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=286
results['episode_reward_mean']=499.9
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=287
results['episode_reward_mean']=499.9
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=288
results['episode_reward_mean']=499.88
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=289
results['episode_reward_mean']=499.86
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=290
results['episode_reward_mean']=499.85
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[2*40 ]
[21403]
[21403]
[21403]
algorithm.iteration=291
results['episode_reward_mean']=499.87
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=292
results['episode_reward_mean']=499.91
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=293
results['episode_reward_mean']=499.89
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=294
results['episode_reward_mean']=499.89
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=295
results['episode_reward_mean']=499.87
Sample game:
[21403]
[21 *3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=296
results['episode_reward_mean']=499.9
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=297
results['episode_reward_mean']=499.83
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=298
results['episode_reward_mean']=499.77
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=299
results['episode_reward_mean']=499.66
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=300
results['episode_reward_mean']=499.53
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=301
results['episode_reward_mean']=499.54
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=302
results['episode_reward_mean']=499.73
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=303
results['episode_reward_mean']=499.77
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=304
results['episode_reward_mean']=499.75
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=305
results['episode_reward_mean']=499.66
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=306
results['episode_reward_mean']=499.67
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=307
results['episode_reward_mean']=499.74
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=308
results['episode_reward_mean']=499.73
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=309
results['episode_reward_mean']=499.7
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=310
results['episode_reward_mean']=499.67
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=311
results['episode_reward_mean']=499.55
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=312
results['episode_reward_mean']=499.52
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21 *3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=313
results['episode_reward_mean']=499.58
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=314
results['episode_reward_mean']=499.65
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=315
results['episode_reward_mean']=499.53
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=316
results['episode_reward_mean']=499.38
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=317
results['episode_reward_mean']=499.36
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=318
results['episode_reward_mean']=499.49
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=319
results['episode_reward_mean']=499.67
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=320
results['episode_reward_mean']=499.6
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=321
results['episode_reward_mean']=499.65
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=322
results['episode_reward_mean']=499.76
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=323
results['episode_reward_mean']=499.73
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=324
results['episode_reward_mean']=499.69
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=325
results['episode_reward_mean']=499.67
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=326
results['episode_reward_mean']=499.73
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=327
results['episode_reward_mean']=499.78
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=328
results['episode_reward_mean']=499.79
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=329
results['episode_reward_mean']=499.85
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=330
results['episode_reward_mean']=499.84
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=331
results['episode_reward_mean']=499.86
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=332
results['episode_reward_mean']=499.88
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=333
results['episode_reward_mean']=499.85
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=334
results['episode_reward_mean']=499.9
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=335
results['episode_reward_mean']=499.88
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=336
results['episode_reward_mean']=499.87
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=337
results['episode_reward_mean']=499.85
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=338
results['episode_reward_mean']=499.84
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=339
results['episode_reward_mean']=499.85
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=340
results['episode_reward_mean']=499.85
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=341
results['episode_reward_mean']=499.82
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=342
results['episode_reward_mean']=499.84
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=343
results['episode_reward_mean']=499.87
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=344
results['episode_reward_mean']=499.91
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=345
results['episode_reward_mean']=499.95
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=346
results['episode_reward_mean']=499.96
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=347
results['episode_reward_mean']=499.97
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=348
results['episode_reward_mean']=499.97
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=349
results['episode_reward_mean']=499.97
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=350
results['episode_reward_mean']=499.96
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=351
results['episode_reward_mean']=499.77
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=352
results['episode_reward_mean']=499.66
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=353
results['episode_reward_mean']=499.58
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=354
results['episode_reward_mean']=499.59
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=355
results['episode_reward_mean']=499.65
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=356
results['episode_reward_mean']=499.76
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=357
results['episode_reward_mean']=499.77
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=358
results['episode_reward_mean']=499.74
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=359
results['episode_reward_mean']=499.67
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21 *3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=360
results['episode_reward_mean']=499.76
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21 *3]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=361
results['episode_reward_mean']=499.74
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21 *3]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=362
results['episode_reward_mean']=499.85
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=363
results['episode_reward_mean']=499.87
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=364
results['episode_reward_mean']=499.87
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=365
results['episode_reward_mean']=499.89
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=366
results['episode_reward_mean']=499.87
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=367
results['episode_reward_mean']=499.91
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=368
results['episode_reward_mean']=499.94
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=369
results['episode_reward_mean']=499.97
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=370
results['episode_reward_mean']=499.96
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=371
results['episode_reward_mean']=499.94
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=372
results['episode_reward_mean']=499.92
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=373
results['episode_reward_mean']=499.93
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=374
results['episode_reward_mean']=499.98
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=375
results['episode_reward_mean']=499.98
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=376
results['episode_reward_mean']=499.99
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=377
results['episode_reward_mean']=499.98
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=378
results['episode_reward_mean']=499.94
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=379
results['episode_reward_mean']=499.94
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=380
results['episode_reward_mean']=499.93
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=381
results['episode_reward_mean']=499.95
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=382
results['episode_reward_mean']=499.95
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=383
results['episode_reward_mean']=499.93
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=384
results['episode_reward_mean']=499.95
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=385
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=386
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=387
results['episode_reward_mean']=499.98
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=388
results['episode_reward_mean']=499.98
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=389
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=390
results['episode_reward_mean']=499.99
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=391
results['episode_reward_mean']=499.99
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=392
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=393
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=394
results['episode_reward_mean']=499.99
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=395
results['episode_reward_mean']=499.98
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=396
results['episode_reward_mean']=499.98
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=397
results['episode_reward_mean']=499.99
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=398
results['episode_reward_mean']=499.97
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=399
results['episode_reward_mean']=499.97
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=400
results['episode_reward_mean']=499.98
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=401
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=402
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=403
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=404
results['episode_reward_mean']=499.99
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=405
results['episode_reward_mean']=499.99
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=406
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=407
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=408
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=409
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=410
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=411
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=412
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=413
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=414
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=415
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=416
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=417
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=418
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=419
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=420
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=421
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=422
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=423
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=424
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=425
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=426
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=427
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=428
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=429
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=430
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=431
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=432
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=433
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=434
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=435
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=436
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=437
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=438
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=439
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=440
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=441
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=442
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=443
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=444
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=445
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=446
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=447
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=448
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=449
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=450
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=451
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=452
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=453
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=454
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=455
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=456
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=457
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=458
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=459
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=460
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=461
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=462
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=463
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=464
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=465
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=466
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=467
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=468
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=469
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=470
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=471
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=472
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=473
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=474
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=475
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=476
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=477
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=478
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=479
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=480
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=481
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=482
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=483
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=484
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=485
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=486
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=487
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=488
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=489
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=490
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=491
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=492
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=493
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=494
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=495
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=496
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=497
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=498
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=499
results['episode_reward_mean']=500.0
Sample game:
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
[21403]
algorithm.iteration=500
results['episode_reward_mean']=500.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment