stand_stable_final_version

This commit is contained in:
xxh
2026-03-24 06:40:15 -04:00
parent a52cdff013
commit baaa0aa6ed
4 changed files with 57 additions and 21 deletions

View File

@@ -5,6 +5,7 @@ import time
from time import sleep
from random import random
from random import uniform
from itertools import count
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
@@ -148,11 +149,14 @@ class WalkEnv(gym.Env):
self.scaling_factor = 0.3
# self.scaling_factor = 1
# Encourage a minimum lateral stance so the policy avoids feet overlap.
self.min_stance_rad = 0.10
# Small reset perturbations for robustness training.
self.enable_reset_perturb = True
self.reset_beam_yaw_range_deg = 180 # randomize target direction fully to encourage learning a real walk instead of a fixed gait
self.reset_joint_noise_rad = 0.015
self.reset_perturb_steps = 3
self.reset_joint_noise_rad = 0.025
self.reset_perturb_steps = 4
self.reset_recover_steps = 8
self.previous_action = np.zeros(len(self.Player.robot.ROBOT_MOTORS))
@@ -392,14 +396,15 @@ class WalkEnv(gym.Env):
def compute_reward(self, previous_pos, current_pos, action):
height = float(self.Player.world.global_position[2])
robot = self.Player.robot
orientation_quat_inv = R.from_quat(self.Player.robot._global_cheat_orientation).inv()
orientation_quat_inv = R.from_quat(robot._global_cheat_orientation).inv()
projected_gravity = orientation_quat_inv.apply(np.array([0.0, 0.0, -1.0]))
tilt_mag = float(np.linalg.norm(projected_gravity[:2]))
ang_vel = np.deg2rad(self.Player.robot.gyroscope)
ang_vel = np.deg2rad(robot.gyroscope)
ang_vel_mag = float(np.linalg.norm(ang_vel))
is_fallen = height < 0.3
is_fallen = height < 0.55
if is_fallen:
# remain = max(0, 800 - self.step_counter)
# return -8.0 - 0.01 * remain
@@ -429,6 +434,23 @@ class WalkEnv(gym.Env):
posture_penalty = -0.3 * (tilt_mag)
ang_vel_penalty = -0.02 * ang_vel_mag
# Use simulator joint readings in training frame to shape lateral stance.
joint_pos = np.deg2rad(
[robot.motor_positions[motor] for motor in robot.ROBOT_MOTORS]
) * self.train_sim_flip
left_hip_roll = float(joint_pos[12])
right_hip_roll = float(joint_pos[18])
left_ankle_roll = float(joint_pos[16])
right_ankle_roll = float(joint_pos[22])
hip_spread = left_hip_roll - right_hip_roll
ankle_spread = left_ankle_roll - right_ankle_roll
stance_metric = 0.6 * abs(hip_spread) + 0.4 * abs(ankle_spread)
# Penalize narrow stance (feet too close) and scissoring (cross-leg pattern).
stance_collapse_penalty = -4.0 * max(0.0, self.min_stance_rad - stance_metric)
cross_leg_penalty = -1.2 * max(0.0, -(hip_spread * ankle_spread))
target_height = self.initial_height
height_error = height - target_height
height_penalty = -0.5 * abs(height_error) # 惩罚高度偏离,系数可调
@@ -464,10 +486,12 @@ class WalkEnv(gym.Env):
posture_penalty
+ ang_vel_penalty
+ height_penalty
+ stance_collapse_penalty
+ cross_leg_penalty
# + exploration_bonus
# + height_down_penalty
)
if time.time() - self.start_time >= 1200:
if time.time() - self.start_time >= 600:
self.start_time = time.time()
print(
# f"progress_reward:{progress_reward:.4f}",
@@ -476,6 +500,8 @@ class WalkEnv(gym.Env):
f"height_penalty:{height_penalty:.4f}",
f"smoothness_penalty:{smoothness_penalty:.4f},",
f"posture_penalty:{posture_penalty:.4f}",
f"stance_collapse_penalty:{stance_collapse_penalty:.4f}",
f"cross_leg_penalty:{cross_leg_penalty:.4f}",
# f"ang_vel_penalty:{ang_vel_penalty:.4f}",
# f"height_down_penalty:{height_down_penalty:.4f}",
# f"exploration_bonus:{exploration_bonus:.4f}"
@@ -519,7 +545,7 @@ class WalkEnv(gym.Env):
self.last_action_for_reward = action.copy()
# Fall detection and penalty
is_fallen = self.Player.world.global_position[2] < 0.3
is_fallen = self.Player.world.global_position[2] < 0.55
# terminal state: the robot is falling or timeout
terminated = is_fallen or self.step_counter > 800 or self.route_completed
@@ -601,13 +627,13 @@ class Train(Train_Base):
clip_range=float(os.environ.get("GYM_CPU_TRAIN_CLIP_RANGE", "0.2")), # PPO clipping parameter
gae_lambda=0.95, # GAE lambda
gamma=float(os.environ.get("GYM_CPU_TRAIN_GAMMA", "0.95")), # Discount factor
target_kl=0.03,
# target_kl=0.03,
n_epochs=int(os.environ.get("GYM_CPU_TRAIN_EPOCHS", "5")),
# tensorboard_log=f"./scripts/gyms/logs/{folder_name}/tensorboard/"
tensorboard_log=f"./scripts/gyms/logs/{folder_name}/tensorboard/"
)
model_path = self.learn_model(model, total_steps, model_path, eval_env=eval_env,
eval_freq=n_steps_per_env * 20, save_freq=n_steps_per_env * 20,
eval_freq=n_steps_per_env * 20, save_freq=n_steps_per_env * 20, eval_eps=100,
backup_env_file=__file__)
except KeyboardInterrupt:
sleep(1) # wait for child processes

Binary file not shown.