Skip to content

Commit 379b72a

Browse files
authored
Feature/benchmarks (#317)
* update benchmarks files * merge * use humanoid instead of swimmer * make logdir match file name * update torch version to minimum version supporting global_step in add_hparams * update torch version for add_hparams update * adjust slurm usage * clip sac log_std * adjust ddpg hyperparameters * lower python version for deployment * revert benchmark code to include all agents/envs * rename benchmarks * change initial sac temperature * change pybullet logdir to match * run linter * add new benchmark results * update docs
1 parent dec247d commit 379b72a

File tree

16 files changed

+100
-34
lines changed

16 files changed

+100
-34
lines changed

.github/workflows/python-publish.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
- name: Set up Python
2222
uses: actions/setup-python@v3
2323
with:
24-
python-version: 3.12
24+
python-version: 3.11
2525
- name: Install dependencies
2626
run: |
2727
python -m pip install --upgrade pip

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,11 @@ Additionally, we provide an [example project](https://github.com/cpnota/all-exam
2121

2222
## High-Quality Reference Implementations
2323

24-
The `autonomous-learning-library` separates reinforcement learning agents into two modules: `all.agents`, which provides flexible, high-level implementations of many common algorithms which can be adapted to new problems and environments, and `all.presets` which provides specific instansiations of these agents tuned for particular sets of environments, including Atari games, classic control tasks, and PyBullet robotics simulations. Some benchmark results showing results on-par with published results can be found below:
24+
The `autonomous-learning-library` separates reinforcement learning agents into two modules: `all.agents`, which provides flexible, high-level implementations of many common algorithms which can be adapted to new problems and environments, and `all.presets` which provides specific instansiations of these agents tuned for particular sets of environments, including Atari games, classic control tasks, and MuJoCo/Pybullet robotics simulations. Some benchmark results showing results on-par with published results can be found below:
2525

26-
![atari40](benchmarks/atari40.png)
27-
![pybullet](benchmarks/pybullet.png)
26+
![atari40](benchmarks/atari_40m.png)
27+
![atari40](benchmarks/mujoco_v4.png)
28+
![pybullet](benchmarks/pybullet_v0.png)
2829

2930
As of today, `all` contains implementations of the following deep RL algorithms:
3031

all/environments/pybullet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ class PybulletEnvironment(GymEnvironment):
55
short_names = {
66
"ant": "AntBulletEnv-v0",
77
"cheetah": "HalfCheetahBulletEnv-v0",
8-
"humanoid": "HumanoidBulletEnv-v0",
98
"hopper": "HopperBulletEnv-v0",
9+
"humanoid": "HumanoidBulletEnv-v0",
1010
"walker": "Walker2DBulletEnv-v0",
1111
}
1212

all/experiments/slurm.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,12 @@ def create_sbatch_script(self):
8989
"output": os.path.join(self.outdir, "all_%A_%a.out"),
9090
"error": os.path.join(self.outdir, "all_%A_%a.err"),
9191
"array": "0-" + str(num_experiments - 1),
92-
"partition": "1080ti-short",
92+
"partition": "gpu-long",
9393
"ntasks": 1,
94+
"cpus-per-task": 4,
9495
"mem-per-cpu": 4000,
95-
"gres": "gpu:1",
96+
"gpus-per-node": 1,
97+
"time": "7-0",
9698
}
9799
sbatch_args.update(self.sbatch_args)
98100

all/policies/soft_deterministic.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,32 @@ class SoftDeterministicPolicy(Approximation):
2020
kwargs (optional): Any other arguments accepted by all.approximation.Approximation
2121
"""
2222

23-
def __init__(self, model, optimizer=None, space=None, name="policy", **kwargs):
24-
model = SoftDeterministicPolicyNetwork(model, space)
23+
def __init__(
24+
self,
25+
model,
26+
optimizer=None,
27+
space=None,
28+
name="policy",
29+
log_std_min=-20,
30+
log_std_max=4,
31+
**kwargs
32+
):
33+
model = SoftDeterministicPolicyNetwork(
34+
model, space, log_std_min=log_std_min, log_std_max=log_std_max
35+
)
2536
self._inner_model = model
2637
super().__init__(model, optimizer, name=name, **kwargs)
2738

2839

2940
class SoftDeterministicPolicyNetwork(RLNetwork):
30-
def __init__(self, model, space):
41+
def __init__(self, model, space, log_std_min=-20, log_std_max=4, log_std_scale=0.5):
3142
super().__init__(model)
3243
self._action_dim = space.shape[0]
3344
self._tanh_scale = torch.tensor((space.high - space.low) / 2).to(self.device)
3445
self._tanh_mean = torch.tensor((space.high + space.low) / 2).to(self.device)
46+
self._log_std_min = log_std_min
47+
self._log_std_max = log_std_max
48+
self._log_std_scale = log_std_scale
3549

3650
def forward(self, state):
3751
outputs = super().forward(state)
@@ -41,9 +55,10 @@ def forward(self, state):
4155

4256
def _normal(self, outputs):
4357
means = outputs[..., 0 : self._action_dim]
44-
logvars = outputs[..., self._action_dim :]
45-
std = logvars.mul(0.5).exp_()
46-
return torch.distributions.normal.Normal(means, std)
58+
log_stds = outputs[..., self._action_dim :] * self._log_std_scale
59+
clipped_log_stds = torch.clamp(log_stds, self._log_std_min, self._log_std_max)
60+
stds = clipped_log_stds.exp_()
61+
return torch.distributions.normal.Normal(means, stds)
4762

4863
def _sample(self, normal):
4964
raw = normal.rsample()

all/presets/continuous/ddpg.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
# Common settings
1717
"discount_factor": 0.99,
1818
# Adam optimizer settings
19-
"lr_q": 3e-4,
20-
"lr_pi": 3e-4,
19+
"lr_q": 1e-3,
20+
"lr_pi": 1e-3,
2121
# Training settings
2222
"minibatch_size": 256,
2323
"update_frequency": 1,

all/presets/continuous/sac.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"discount_factor": 0.99,
1818
# Adam optimizer settings
1919
"lr_q": 1e-3,
20-
"lr_pi": 3e-4,
20+
"lr_pi": 1e-3,
2121
# Training settings
2222
"minibatch_size": 256,
2323
"update_frequency": 1,
@@ -26,7 +26,7 @@
2626
"replay_start_size": 5000,
2727
"replay_buffer_size": 1e6,
2828
# Exploration settings
29-
"temperature_initial": 0.1,
29+
"temperature_initial": 1.0,
3030
"lr_temperature_scaling": 3e-5,
3131
"entropy_backups": True,
3232
"entropy_target_scaling": 1.0,

benchmarks/atari40.png

-925 KB
Binary file not shown.

benchmarks/atari_40m.png

242 KB
Loading
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ def main():
2020
agents,
2121
envs,
2222
10e6,
23-
logdir="benchmarks/atari40",
24-
sbatch_args={"partition": "gpu-long"},
23+
logdir="benchmarks/atari_40m",
24+
sbatch_args={"partition": "gypsum-1080ti"},
2525
)
2626

2727

0 commit comments

Comments
 (0)