-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathppo.py
More file actions
47 lines (38 loc) · 917 Bytes
/
ppo.py
File metadata and controls
47 lines (38 loc) · 917 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from .defaults import default
from .registry import register
@register
def ppo():
hps = default()
hps.memory = "SimpleMemory"
hps.models = ["PPOActor", "PPOCritic"]
hps.agent = "PPO"
hps.lr = {'actor_lr': 0.0001, 'critic_lr': 0.0002}
hps.lr_decay = {'actor_lr': 'no_decay', 'critic_lr': 'no_decay'}
hps.batch_size = 32
hps.num_steps = 128
hps.num_epochs = 10
hps.hidden_size = 100
hps.gamma = 0.9
hps.memory_size = 50000
hps.action_function = "uniform_random_action"
hps.grad_function = "ppo"
hps.normalize_reward = False
hps.clipping_coef = 0.2
return hps
@register
def ppo_cartpole():
hps = ppo()
hps.env = "CartPole-v1"
hps.gamma = 0.99
return hps
@register
def ppo_mountaincar():
hps = ppo()
hps.env = "MountainCar-v0"
hps.reward_augmentation = "mountain_car_default"
return hps
@register
def ppo_pong():
hps = ppo()
hps.env = "Pong-v0"
return hps