The example of train-T1

2026-03-15 20:14:06 -04:00
parent 571b4283c7
commit f0a5f8f4b7
8 changed files with 357 additions and 0 deletions
--- a/rl_game/demo/config/ppo_cfg.yaml
+++ b/rl_game/demo/config/ppo_cfg.yaml
@@ -0,0 +1,60 @@
+params:
+  seed: 42
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    mlp:
+      units: [512, 256, 128]
+      activation: elu
+      d2rl: False
+      initializer:
+        name: default
+
+  config:
+    name: T1_Walking
+    env_name: rlgym # Isaac Lab 包装器
+    multi_gpu: False
+    ppo: True
+    mixed_precision: True
+    normalize_input: True
+    normalize_value: True
+    value_bootstrap: True
+    num_actors: 16384 # 同时训练的机器人数量
+    reward_shaper:
+      scale_value: 1.0
+    normalize_advantage: True
+    gamma: 0.99
+    tau: 0.95
+    learning_rate: 3e-4
+    lr_schedule: adaptive
+    kl_threshold: 0.008
+    score_to_win: 20000
+    max_epochs: 5000
+    save_best_after: 50
+    save_frequency: 100
+    grad_norm: 1.0
+    entropy_coef: 0.01
+    truncate_grads: True
+    bounds_loss_coef: 0.0
+    e_clip: 0.2
+    horizon_length: 128
+    minibatch_size: 32768
+    mini_epochs: 5
+    critic_coef: 2
+    clip_value: True