The example of train-T1
This commit is contained in:
60
rl_game/demo/config/ppo_cfg.yaml
Normal file
60
rl_game/demo/config/ppo_cfg.yaml
Normal file
@@ -0,0 +1,60 @@
|
||||
params:
|
||||
seed: 42
|
||||
algo:
|
||||
name: a2c_continuous
|
||||
|
||||
model:
|
||||
name: continuous_a2c_logstd
|
||||
|
||||
network:
|
||||
name: actor_critic
|
||||
separate: False
|
||||
space:
|
||||
continuous:
|
||||
mu_activation: None
|
||||
sigma_activation: None
|
||||
mu_init:
|
||||
name: default
|
||||
sigma_init:
|
||||
name: const_initializer
|
||||
val: 0
|
||||
fixed_sigma: True
|
||||
mlp:
|
||||
units: [512, 256, 128]
|
||||
activation: elu
|
||||
d2rl: False
|
||||
initializer:
|
||||
name: default
|
||||
|
||||
config:
|
||||
name: T1_Walking
|
||||
env_name: rlgym # Isaac Lab 包装器
|
||||
multi_gpu: False
|
||||
ppo: True
|
||||
mixed_precision: True
|
||||
normalize_input: True
|
||||
normalize_value: True
|
||||
value_bootstrap: True
|
||||
num_actors: 16384 # 同时训练的机器人数量
|
||||
reward_shaper:
|
||||
scale_value: 1.0
|
||||
normalize_advantage: True
|
||||
gamma: 0.99
|
||||
tau: 0.95
|
||||
learning_rate: 3e-4
|
||||
lr_schedule: adaptive
|
||||
kl_threshold: 0.008
|
||||
score_to_win: 20000
|
||||
max_epochs: 5000
|
||||
save_best_after: 50
|
||||
save_frequency: 100
|
||||
grad_norm: 1.0
|
||||
entropy_coef: 0.01
|
||||
truncate_grads: True
|
||||
bounds_loss_coef: 0.0
|
||||
e_clip: 0.2
|
||||
horizon_length: 128
|
||||
minibatch_size: 32768
|
||||
mini_epochs: 5
|
||||
critic_coef: 2
|
||||
clip_value: True
|
||||
Reference in New Issue
Block a user