stand_stable_final_version
This commit is contained in:
@@ -11,4 +11,4 @@ retrain(继续训练)
|
|||||||
GYM_CPU_MODE=train GYM_CPU_TRAIN_MODEL=scripts/gyms/logs/Walk_R0_005/best_model.zip bash train.sh
|
GYM_CPU_MODE=train GYM_CPU_TRAIN_MODEL=scripts/gyms/logs/Walk_R0_005/best_model.zip bash train.sh
|
||||||
|
|
||||||
retrain+改训练超参
|
retrain+改训练超参
|
||||||
GYM_CPU_MODE=train GYM_CPU_TRAIN_MODEL=scripts/gyms/logs/Walk_R0_004/best_model.zip GYM_CPU_TRAIN_LR=2e-4 GYM_CPU_TRAIN_BATCH_SIZE=256 GYM_CPU_TRAIN_EPOCHS=8 bash train.sh
|
GYM_CPU_MODE=train GYM_CPU_TRAIN_MODEL=scripts/gyms/logs/Walk_R0_004/best_model.zip GYM_CPU_TRAIN_LR=2e-4 GYM_CPU_TRAIN_CLIP_RANGE=0.13 GYM_CPU_TRAIN_BATCH_SIZE=256 YM_CPU_TRAIN_GAMMA=0.95 GYM_CPU_TRAIN_ENT_COEF=0.05 GYM_CPU_TRAIN_EPOCHS=8 bash train.sh
|
||||||
@@ -5,6 +5,7 @@ import time
|
|||||||
from time import sleep
|
from time import sleep
|
||||||
from random import random
|
from random import random
|
||||||
from random import uniform
|
from random import uniform
|
||||||
|
from itertools import count
|
||||||
|
|
||||||
from stable_baselines3 import PPO
|
from stable_baselines3 import PPO
|
||||||
from stable_baselines3.common.monitor import Monitor
|
from stable_baselines3.common.monitor import Monitor
|
||||||
@@ -148,11 +149,14 @@ class WalkEnv(gym.Env):
|
|||||||
self.scaling_factor = 0.3
|
self.scaling_factor = 0.3
|
||||||
# self.scaling_factor = 1
|
# self.scaling_factor = 1
|
||||||
|
|
||||||
|
# Encourage a minimum lateral stance so the policy avoids feet overlap.
|
||||||
|
self.min_stance_rad = 0.10
|
||||||
|
|
||||||
# Small reset perturbations for robustness training.
|
# Small reset perturbations for robustness training.
|
||||||
self.enable_reset_perturb = True
|
self.enable_reset_perturb = True
|
||||||
self.reset_beam_yaw_range_deg = 180 # randomize target direction fully to encourage learning a real walk instead of a fixed gait
|
self.reset_beam_yaw_range_deg = 180 # randomize target direction fully to encourage learning a real walk instead of a fixed gait
|
||||||
self.reset_joint_noise_rad = 0.015
|
self.reset_joint_noise_rad = 0.025
|
||||||
self.reset_perturb_steps = 3
|
self.reset_perturb_steps = 4
|
||||||
self.reset_recover_steps = 8
|
self.reset_recover_steps = 8
|
||||||
|
|
||||||
self.previous_action = np.zeros(len(self.Player.robot.ROBOT_MOTORS))
|
self.previous_action = np.zeros(len(self.Player.robot.ROBOT_MOTORS))
|
||||||
@@ -392,14 +396,15 @@ class WalkEnv(gym.Env):
|
|||||||
|
|
||||||
def compute_reward(self, previous_pos, current_pos, action):
|
def compute_reward(self, previous_pos, current_pos, action):
|
||||||
height = float(self.Player.world.global_position[2])
|
height = float(self.Player.world.global_position[2])
|
||||||
|
robot = self.Player.robot
|
||||||
|
|
||||||
orientation_quat_inv = R.from_quat(self.Player.robot._global_cheat_orientation).inv()
|
orientation_quat_inv = R.from_quat(robot._global_cheat_orientation).inv()
|
||||||
projected_gravity = orientation_quat_inv.apply(np.array([0.0, 0.0, -1.0]))
|
projected_gravity = orientation_quat_inv.apply(np.array([0.0, 0.0, -1.0]))
|
||||||
tilt_mag = float(np.linalg.norm(projected_gravity[:2]))
|
tilt_mag = float(np.linalg.norm(projected_gravity[:2]))
|
||||||
ang_vel = np.deg2rad(self.Player.robot.gyroscope)
|
ang_vel = np.deg2rad(robot.gyroscope)
|
||||||
ang_vel_mag = float(np.linalg.norm(ang_vel))
|
ang_vel_mag = float(np.linalg.norm(ang_vel))
|
||||||
|
|
||||||
is_fallen = height < 0.3
|
is_fallen = height < 0.55
|
||||||
if is_fallen:
|
if is_fallen:
|
||||||
# remain = max(0, 800 - self.step_counter)
|
# remain = max(0, 800 - self.step_counter)
|
||||||
# return -8.0 - 0.01 * remain
|
# return -8.0 - 0.01 * remain
|
||||||
@@ -429,6 +434,23 @@ class WalkEnv(gym.Env):
|
|||||||
posture_penalty = -0.3 * (tilt_mag)
|
posture_penalty = -0.3 * (tilt_mag)
|
||||||
ang_vel_penalty = -0.02 * ang_vel_mag
|
ang_vel_penalty = -0.02 * ang_vel_mag
|
||||||
|
|
||||||
|
# Use simulator joint readings in training frame to shape lateral stance.
|
||||||
|
joint_pos = np.deg2rad(
|
||||||
|
[robot.motor_positions[motor] for motor in robot.ROBOT_MOTORS]
|
||||||
|
) * self.train_sim_flip
|
||||||
|
left_hip_roll = float(joint_pos[12])
|
||||||
|
right_hip_roll = float(joint_pos[18])
|
||||||
|
left_ankle_roll = float(joint_pos[16])
|
||||||
|
right_ankle_roll = float(joint_pos[22])
|
||||||
|
|
||||||
|
hip_spread = left_hip_roll - right_hip_roll
|
||||||
|
ankle_spread = left_ankle_roll - right_ankle_roll
|
||||||
|
stance_metric = 0.6 * abs(hip_spread) + 0.4 * abs(ankle_spread)
|
||||||
|
|
||||||
|
# Penalize narrow stance (feet too close) and scissoring (cross-leg pattern).
|
||||||
|
stance_collapse_penalty = -4.0 * max(0.0, self.min_stance_rad - stance_metric)
|
||||||
|
cross_leg_penalty = -1.2 * max(0.0, -(hip_spread * ankle_spread))
|
||||||
|
|
||||||
target_height = self.initial_height
|
target_height = self.initial_height
|
||||||
height_error = height - target_height
|
height_error = height - target_height
|
||||||
height_penalty = -0.5 * abs(height_error) # 惩罚高度偏离,系数可调
|
height_penalty = -0.5 * abs(height_error) # 惩罚高度偏离,系数可调
|
||||||
@@ -464,10 +486,12 @@ class WalkEnv(gym.Env):
|
|||||||
posture_penalty
|
posture_penalty
|
||||||
+ ang_vel_penalty
|
+ ang_vel_penalty
|
||||||
+ height_penalty
|
+ height_penalty
|
||||||
|
+ stance_collapse_penalty
|
||||||
|
+ cross_leg_penalty
|
||||||
# + exploration_bonus
|
# + exploration_bonus
|
||||||
# + height_down_penalty
|
# + height_down_penalty
|
||||||
)
|
)
|
||||||
if time.time() - self.start_time >= 1200:
|
if time.time() - self.start_time >= 600:
|
||||||
self.start_time = time.time()
|
self.start_time = time.time()
|
||||||
print(
|
print(
|
||||||
# f"progress_reward:{progress_reward:.4f}",
|
# f"progress_reward:{progress_reward:.4f}",
|
||||||
@@ -476,6 +500,8 @@ class WalkEnv(gym.Env):
|
|||||||
f"height_penalty:{height_penalty:.4f}",
|
f"height_penalty:{height_penalty:.4f}",
|
||||||
f"smoothness_penalty:{smoothness_penalty:.4f},",
|
f"smoothness_penalty:{smoothness_penalty:.4f},",
|
||||||
f"posture_penalty:{posture_penalty:.4f}",
|
f"posture_penalty:{posture_penalty:.4f}",
|
||||||
|
f"stance_collapse_penalty:{stance_collapse_penalty:.4f}",
|
||||||
|
f"cross_leg_penalty:{cross_leg_penalty:.4f}",
|
||||||
# f"ang_vel_penalty:{ang_vel_penalty:.4f}",
|
# f"ang_vel_penalty:{ang_vel_penalty:.4f}",
|
||||||
# f"height_down_penalty:{height_down_penalty:.4f}",
|
# f"height_down_penalty:{height_down_penalty:.4f}",
|
||||||
# f"exploration_bonus:{exploration_bonus:.4f}"
|
# f"exploration_bonus:{exploration_bonus:.4f}"
|
||||||
@@ -519,7 +545,7 @@ class WalkEnv(gym.Env):
|
|||||||
self.last_action_for_reward = action.copy()
|
self.last_action_for_reward = action.copy()
|
||||||
|
|
||||||
# Fall detection and penalty
|
# Fall detection and penalty
|
||||||
is_fallen = self.Player.world.global_position[2] < 0.3
|
is_fallen = self.Player.world.global_position[2] < 0.55
|
||||||
|
|
||||||
# terminal state: the robot is falling or timeout
|
# terminal state: the robot is falling or timeout
|
||||||
terminated = is_fallen or self.step_counter > 800 or self.route_completed
|
terminated = is_fallen or self.step_counter > 800 or self.route_completed
|
||||||
@@ -601,13 +627,13 @@ class Train(Train_Base):
|
|||||||
clip_range=float(os.environ.get("GYM_CPU_TRAIN_CLIP_RANGE", "0.2")), # PPO clipping parameter
|
clip_range=float(os.environ.get("GYM_CPU_TRAIN_CLIP_RANGE", "0.2")), # PPO clipping parameter
|
||||||
gae_lambda=0.95, # GAE lambda
|
gae_lambda=0.95, # GAE lambda
|
||||||
gamma=float(os.environ.get("GYM_CPU_TRAIN_GAMMA", "0.95")), # Discount factor
|
gamma=float(os.environ.get("GYM_CPU_TRAIN_GAMMA", "0.95")), # Discount factor
|
||||||
target_kl=0.03,
|
# target_kl=0.03,
|
||||||
n_epochs=int(os.environ.get("GYM_CPU_TRAIN_EPOCHS", "5")),
|
n_epochs=int(os.environ.get("GYM_CPU_TRAIN_EPOCHS", "5")),
|
||||||
# tensorboard_log=f"./scripts/gyms/logs/{folder_name}/tensorboard/"
|
tensorboard_log=f"./scripts/gyms/logs/{folder_name}/tensorboard/"
|
||||||
)
|
)
|
||||||
|
|
||||||
model_path = self.learn_model(model, total_steps, model_path, eval_env=eval_env,
|
model_path = self.learn_model(model, total_steps, model_path, eval_env=eval_env,
|
||||||
eval_freq=n_steps_per_env * 20, save_freq=n_steps_per_env * 20,
|
eval_freq=n_steps_per_env * 20, save_freq=n_steps_per_env * 20, eval_eps=100,
|
||||||
backup_env_file=__file__)
|
backup_env_file=__file__)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
sleep(1) # wait for child processes
|
sleep(1) # wait for child processes
|
||||||
|
|||||||
BIN
scripts/gyms/logs/stand_stable_final.zip
Normal file
BIN
scripts/gyms/logs/stand_stable_final.zip
Normal file
Binary file not shown.
30
train.sh
30
train.sh
@@ -12,15 +12,16 @@ set -euo pipefail
|
|||||||
# CPU 核数基准(默认 20):
|
# CPU 核数基准(默认 20):
|
||||||
# 例如你的机器按 20 核预算来算,可保持默认。
|
# 例如你的机器按 20 核预算来算,可保持默认。
|
||||||
CORES="${CORES:-20}"
|
CORES="${CORES:-20}"
|
||||||
# CPU 占用百分比(默认 95):
|
# CPU 占用百分比(默认 100):
|
||||||
# 最终会与 CORES 相乘得到 CPUQuota。
|
# 最终会与 CORES 相乘得到 CPUQuota。
|
||||||
# 例:CORES=20, UTIL_PERCENT=95 -> CPUQuota=1900%(约 19 核等效)
|
# 例:CORES=20, UTIL_PERCENT=100 -> CPUQuota=2000%(约 20 核等效)
|
||||||
UTIL_PERCENT="${UTIL_PERCENT:-95}"
|
UTIL_PERCENT="${UTIL_PERCENT:-100}"
|
||||||
CPU_QUOTA="$((CORES * UTIL_PERCENT))%"
|
CPU_QUOTA="$((CORES * UTIL_PERCENT))%"
|
||||||
|
|
||||||
# 内存上限(默认 28G):
|
# 内存上限(默认关闭):
|
||||||
# 可改成 16G、24G 等,避免训练把系统内存吃满。
|
# 设为具体值(如 24G/28G)可限制训练最多占用内存;
|
||||||
MEMORY_MAX="${MEMORY_MAX:-28G}"
|
# 设为 0/none/off/infinity 表示不设置 cgroup 内存上限。
|
||||||
|
MEMORY_MAX="${MEMORY_MAX:-0}"
|
||||||
|
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
# 训练运行参数(由 scripts/gyms/Walk.py 读取)
|
# 训练运行参数(由 scripts/gyms/Walk.py 读取)
|
||||||
@@ -28,8 +29,8 @@ MEMORY_MAX="${MEMORY_MAX:-28G}"
|
|||||||
# 运行模式:train 或 test
|
# 运行模式:train 或 test
|
||||||
GYM_CPU_MODE="${GYM_CPU_MODE:-train}"
|
GYM_CPU_MODE="${GYM_CPU_MODE:-train}"
|
||||||
|
|
||||||
# 并行环境数量:越大通常吞吐越高,但也更容易触发服务器连接不稳定。
|
# 并行环境数量:越大通常吞吐越高,但也更容易触发 OOM 或连接不稳定。
|
||||||
# 建议从 8~12 起步,稳定后再升到 16/20。
|
# 默认使用更稳妥的 12,确认稳定后再升到 16/20。
|
||||||
GYM_CPU_N_ENVS="${GYM_CPU_N_ENVS:-20}"
|
GYM_CPU_N_ENVS="${GYM_CPU_N_ENVS:-20}"
|
||||||
# 服务器预热时间(秒):
|
# 服务器预热时间(秒):
|
||||||
# 在批量拉起 rcssserver 后等待一段时间,再创建 SubprocVecEnv,
|
# 在批量拉起 rcssserver 后等待一段时间,再创建 SubprocVecEnv,
|
||||||
@@ -98,14 +99,23 @@ if [[ -n "${CONDA_DEFAULT_ENV:-}" ]]; then
|
|||||||
echo "Detected conda env: ${CONDA_DEFAULT_ENV}"
|
echo "Detected conda env: ${CONDA_DEFAULT_ENV}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
SYSTEMD_PROPS=("-p" "CPUQuota=${CPU_QUOTA}")
|
||||||
|
case "${MEMORY_MAX,,}" in
|
||||||
|
0|none|off|infinity)
|
||||||
|
echo "MemoryMax is disabled for this run (no cgroup memory cap)."
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
SYSTEMD_PROPS+=("-p" "MemoryMax=${MEMORY_MAX}")
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
# 使用 systemd-run --user --scope 启动“受限资源”的训练进程:
|
# 使用 systemd-run --user --scope 启动“受限资源”的训练进程:
|
||||||
# - CPUQuota: 总 CPU 配额
|
# - CPUQuota: 总 CPU 配额
|
||||||
# - MemoryMax: 最大内存
|
# - MemoryMax: 最大内存
|
||||||
# - env ... : 显式传递训练参数到 Python 进程
|
# - env ... : 显式传递训练参数到 Python 进程
|
||||||
# - python -m scripts.gyms.Walk: 以模块方式启动训练入口
|
# - python -m scripts.gyms.Walk: 以模块方式启动训练入口
|
||||||
systemd-run --user --scope \
|
systemd-run --user --scope \
|
||||||
-p CPUQuota="${CPU_QUOTA}" \
|
"${SYSTEMD_PROPS[@]}" \
|
||||||
-p MemoryMax="${MEMORY_MAX}" \
|
|
||||||
env \
|
env \
|
||||||
GYM_CPU_MODE="${GYM_CPU_MODE}" \
|
GYM_CPU_MODE="${GYM_CPU_MODE}" \
|
||||||
GYM_CPU_N_ENVS="${GYM_CPU_N_ENVS}" \
|
GYM_CPU_N_ENVS="${GYM_CPU_N_ENVS}" \
|
||||||
|
|||||||
Reference in New Issue
Block a user