change model and policy

This commit is contained in:
xxh
2026-03-14 01:08:22 -04:00
parent 294fe0bd79
commit f99fae68f6
3 changed files with 16 additions and 15 deletions

View File

@@ -21,7 +21,7 @@ class Server():
port = first_server_p + i port = first_server_p + i
mport = first_monitor_p + i mport = first_monitor_p + i
server_cmd = f"{cmd} --aport {port} --mport {mport} --no-render --no-realtime" server_cmd = f"{cmd} --aport {port} --mport {mport} "
self.rcss_processes.append( self.rcss_processes.append(
subprocess.Popen( subprocess.Popen(

View File

@@ -171,8 +171,8 @@ class Train_Base():
ep_reward += reward ep_reward += reward
ep_length += 1 ep_length += 1
if enable_FPS_control: # control simulation speed (using non blocking user input) # if enable_FPS_control: # control simulation speed (using non blocking user input)
self.control_fps(select.select([sys.stdin], [], [], 0)[0]) # self.control_fps(select.select([sys.stdin], [], [], 0)[0])
if done: if done:
obs, _ = env.reset() obs, _ = env.reset()

View File

@@ -80,8 +80,8 @@ class WalkEnv(gym.Env):
action_dim = len(self.Player.robot.ROBOT_MOTORS) action_dim = len(self.Player.robot.ROBOT_MOTORS)
self.no_of_actions = action_dim self.no_of_actions = action_dim
self.action_space = spaces.Box( self.action_space = spaces.Box(
low=-1.0, low=-10.0,
high=1.0, high=10.0,
shape=(action_dim,), shape=(action_dim,),
dtype=np.float32 dtype=np.float32
) )
@@ -115,14 +115,14 @@ class WalkEnv(gym.Env):
] ]
) )
self.reference_joint_nominal_position = self.joint_nominal_position.copy() self.reference_joint_nominal_position = self.joint_nominal_position.copy()
self.joint_nominal_position = np.zeros(self.no_of_actions)
self.train_sim_flip = np.array( self.train_sim_flip = np.array(
[ [
1.0, # 0: Head_yaw (he1) 1.0, # 0: Head_yaw (he1)
-1.0, # 1: Head_pitch (he2) -1.0, # 1: Head_pitch (he2)
1.0, # 2: Left_Shoulder_Pitch (lae1) 1.0, # 2: Left_Shoulder_Pitch (lae1)
-1.0, # 3: Left_Shoulder_Roll (lae2) -1.0, # 3: Left_Shoulder_Roll (lae2)
1.0, # 4: Left_Elbow_Pitch (lae3) -1.0, # 4: Left_Elbow_Pitch (lae3)
1.0, # 5: Left_Elbow_Yaw (lae4) 1.0, # 5: Left_Elbow_Yaw (lae4)
-1.0, # 6: Right_Shoulder_Pitch (rae1) -1.0, # 6: Right_Shoulder_Pitch (rae1)
-1.0, # 7: Right_Shoulder_Roll (rae2) -1.0, # 7: Right_Shoulder_Roll (rae2)
@@ -145,6 +145,7 @@ class WalkEnv(gym.Env):
) )
self.scaling_factor = 0.5 self.scaling_factor = 0.5
# self.scaling_factor = 1
self.previous_action = np.zeros(len(self.Player.robot.ROBOT_MOTORS)) self.previous_action = np.zeros(len(self.Player.robot.ROBOT_MOTORS))
self.last_action_for_reward = np.zeros(len(self.Player.robot.ROBOT_MOTORS)) self.last_action_for_reward = np.zeros(len(self.Player.robot.ROBOT_MOTORS))
@@ -531,7 +532,7 @@ class Train(Train_Base):
n_steps_per_env = 512 # RolloutBuffer is of size (n_steps_per_env * n_envs) n_steps_per_env = 512 # RolloutBuffer is of size (n_steps_per_env * n_envs)
minibatch_size = 128 # should be a factor of (n_steps_per_env * n_envs) minibatch_size = 128 # should be a factor of (n_steps_per_env * n_envs)
total_steps = 30000000 total_steps = 30000000
learning_rate = 3e-4 learning_rate = 2e-4
folder_name = f'Walk_R{self.robot_type}' folder_name = f'Walk_R{self.robot_type}'
model_path = f'./scripts/gyms/logs/{folder_name}/' model_path = f'./scripts/gyms/logs/{folder_name}/'
@@ -560,10 +561,10 @@ class Train(Train_Base):
# Custom policy network architecture # Custom policy network architecture
policy_kwargs = dict( policy_kwargs = dict(
net_arch=dict( net_arch=dict(
pi=[256, 256, 128], # Policy network: 3 layers pi=[512, 256, 128], # Policy network: 3 layers
vf=[256, 256, 128] # Value network: 3 layers vf=[512, 256, 128] # Value network: 3 layers
), ),
activation_fn=__import__('torch.nn', fromlist=['ReLU']).ReLU, activation_fn=__import__('torch.nn', fromlist=['ELU']).ELU,
) )
if "model_file" in args: # retrain if "model_file" in args: # retrain
@@ -579,10 +580,10 @@ class Train(Train_Base):
learning_rate=learning_rate, learning_rate=learning_rate,
device="cpu", device="cpu",
policy_kwargs=policy_kwargs, policy_kwargs=policy_kwargs,
ent_coef=0.01, # Entropy coefficient for exploration # ent_coef=0.01, # Entropy coefficient for exploration
clip_range=0.2, # PPO clipping parameter # clip_range=0.2, # PPO clipping parameter
gae_lambda=0.95, # GAE lambda # gae_lambda=0.95, # GAE lambda
gamma=0.99 # Discount factor # gamma=0.99 # Discount factor
) )
model_path = self.learn_model(model, total_steps, model_path, eval_env=eval_env, model_path = self.learn_model(model, total_steps, model_path, eval_env=eval_env,