Configure

options(width=1000)
listAvailConf()[, .(name, note)]
##                         name                                                               note
##  1:                   render                             Whether to show rendering video or not
##  2:                      log                      Whether to log important information on drive
##  3:                  console                     Whether to enable debug info output to console
##  4:              agent.gamma                      The discount factor in reinforcement learning
##  5:     agent.flag.reset.net                               Whether to reset the neural network 
##  6:           agent.lr.decay                 The decay factor of the learning rate at each step
##  7:                 agent.lr                                        learning rate for the agent
##  8:        agent.store.model                     whether to store the model of the agent or not
##  9: agent.update.target.freq                         How often should the target network be set
## 10:        agent.start.learn                     after how many transitions should replay begin
## 11:            agent.clip.td                                           whether to clip TD error
## 12:        policy.maxEpsilon                               The maximum epsilon exploration rate
## 13:        policy.minEpsilon                               The minimum epsilon exploration rate
## 14:        policy.decay.rate                                                     the decay rate
## 15:        policy.decay.type the way to decay epsion, can be decay_geo, decay_exp, decay_linear
## 16:       policy.aneal.steps                 only valid when policy.decay.type = 'decay_linear'
## 17:   policy.softmax.magnify                                                               <NA>
## 18:         replay.batchsize              how many samples to take from replay memory each time
## 19:           replay.memname                                          The type of replay memory
## 20:          replay.mem.size                                      The size of the replay memory
## 21:            replay.epochs        How many gradient decent epochs to carry out for one replay
## 22:              replay.freq                            how many steps to wait until one replay
##                         name                                                               note
conf = getDefaultConf("AgentDQN")
conf
##                                      value
## render                               FALSE
## log                                  FALSE
## console                               TRUE
## agent.gamma                           0.99
## agent.flag.reset.net                  TRUE
## agent.lr.decay           0.999000499833375
## agent.lr                             0.001
## agent.store.model                    FALSE
## agent.update.target.freq              2000
## agent.start.learn                       64
## agent.clip.td                        FALSE
## policy.maxEpsilon                        1
## policy.minEpsilon                     0.01
## policy.decay.rate        0.999000499833375
## policy.decay.type                decay_geo
## policy.aneal.steps                   1e+06
## policy.softmax.magnify                   1
## replay.batchsize                        64
## replay.memname                     Uniform
## replay.mem.size                      20000
## replay.epochs                            1
## replay.freq                              1
## policy.name                  EpsilonGreedy
conf$set(render = FALSE, console = FALSE)
env = makeGymEnv("CartPole-v0")
agent = initAgent("AgentDQN", env, conf)
agent$learn(2)  
agent$plotPerf(F)