Files
Gym_GPU/rl_game/get_up/config/ppo_cfg.yaml

60 lines
1.2 KiB
YAML
Raw Normal View History

2026-03-16 05:00:20 -04:00
params:
seed: 42
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
2026-03-21 10:16:01 -04:00
val: 1.2
2026-03-20 09:53:34 -04:00
fixed_sigma: False
2026-03-16 05:00:20 -04:00
mlp:
units: [512, 256, 128]
activation: relu
d2rl: False
initializer:
name: default
config:
name: T1_Walking
env_name: rlgym # Isaac Lab 包装器
multi_gpu: False
ppo: True
mixed_precision: True
normalize_input: True
normalize_value: True
value_bootstrap: True
2026-03-22 00:01:21 -04:00
num_actors: 8192 # 同时训练的机器人数量
2026-03-16 05:00:20 -04:00
reward_shaper:
scale_value: 1.0
normalize_advantage: True
gamma: 0.98
2026-03-16 05:00:20 -04:00
tau: 0.95
learning_rate: 5e-4
2026-03-16 05:00:20 -04:00
lr_schedule: adaptive
2026-03-20 09:53:34 -04:00
kl_threshold: 0.008
2026-03-16 05:00:20 -04:00
score_to_win: 20000
max_epochs: 500
2026-03-16 05:00:20 -04:00
save_best_after: 50
save_frequency: 100
2026-03-20 10:51:07 -04:00
grad_norm: 0.5
2026-03-21 10:16:01 -04:00
entropy_coef: 0.008
2026-03-16 05:00:20 -04:00
truncate_grads: True
bounds_loss_coef: 0.001
e_clip: 0.2
2026-03-21 10:16:01 -04:00
horizon_length: 128
minibatch_size: 8192
mini_epochs: 4
2026-03-20 09:53:34 -04:00
critic_coef: 1
2026-03-16 05:00:20 -04:00
clip_value: True