diff --git a/scripts/commons/Server.py b/scripts/commons/Server.py index f238432..cf21ea8 100644 --- a/scripts/commons/Server.py +++ b/scripts/commons/Server.py @@ -21,7 +21,7 @@ class Server(): port = first_server_p + i mport = first_monitor_p + i - server_cmd = f"{cmd} --aport {port} --mport {mport} --no-render --no-realtime" + server_cmd = f"{cmd} --aport {port} --mport {mport} " self.rcss_processes.append( subprocess.Popen( diff --git a/scripts/commons/Train_Base.py b/scripts/commons/Train_Base.py index 617f5b2..189eb15 100644 --- a/scripts/commons/Train_Base.py +++ b/scripts/commons/Train_Base.py @@ -171,8 +171,8 @@ class Train_Base(): ep_reward += reward ep_length += 1 - if enable_FPS_control: # control simulation speed (using non blocking user input) - self.control_fps(select.select([sys.stdin], [], [], 0)[0]) + # if enable_FPS_control: # control simulation speed (using non blocking user input) + # self.control_fps(select.select([sys.stdin], [], [], 0)[0]) if done: obs, _ = env.reset() diff --git a/scripts/gyms/Walk.py b/scripts/gyms/Walk.py index d30214f..4533b46 100644 --- a/scripts/gyms/Walk.py +++ b/scripts/gyms/Walk.py @@ -80,8 +80,8 @@ class WalkEnv(gym.Env): action_dim = len(self.Player.robot.ROBOT_MOTORS) self.no_of_actions = action_dim self.action_space = spaces.Box( - low=-1.0, - high=1.0, + low=-10.0, + high=10.0, shape=(action_dim,), dtype=np.float32 ) @@ -115,14 +115,14 @@ class WalkEnv(gym.Env): ] ) self.reference_joint_nominal_position = self.joint_nominal_position.copy() - + self.joint_nominal_position = np.zeros(self.no_of_actions) self.train_sim_flip = np.array( [ 1.0, # 0: Head_yaw (he1) -1.0, # 1: Head_pitch (he2) 1.0, # 2: Left_Shoulder_Pitch (lae1) -1.0, # 3: Left_Shoulder_Roll (lae2) - 1.0, # 4: Left_Elbow_Pitch (lae3) + -1.0, # 4: Left_Elbow_Pitch (lae3) 1.0, # 5: Left_Elbow_Yaw (lae4) -1.0, # 6: Right_Shoulder_Pitch (rae1) -1.0, # 7: Right_Shoulder_Roll (rae2) @@ -145,6 +145,7 @@ class WalkEnv(gym.Env): ) self.scaling_factor = 0.5 + # self.scaling_factor = 1 self.previous_action = np.zeros(len(self.Player.robot.ROBOT_MOTORS)) self.last_action_for_reward = np.zeros(len(self.Player.robot.ROBOT_MOTORS)) @@ -531,7 +532,7 @@ class Train(Train_Base): n_steps_per_env = 512 # RolloutBuffer is of size (n_steps_per_env * n_envs) minibatch_size = 128 # should be a factor of (n_steps_per_env * n_envs) total_steps = 30000000 - learning_rate = 3e-4 + learning_rate = 2e-4 folder_name = f'Walk_R{self.robot_type}' model_path = f'./scripts/gyms/logs/{folder_name}/' @@ -560,10 +561,10 @@ class Train(Train_Base): # Custom policy network architecture policy_kwargs = dict( net_arch=dict( - pi=[256, 256, 128], # Policy network: 3 layers - vf=[256, 256, 128] # Value network: 3 layers + pi=[512, 256, 128], # Policy network: 3 layers + vf=[512, 256, 128] # Value network: 3 layers ), - activation_fn=__import__('torch.nn', fromlist=['ReLU']).ReLU, + activation_fn=__import__('torch.nn', fromlist=['ELU']).ELU, ) if "model_file" in args: # retrain @@ -579,10 +580,10 @@ class Train(Train_Base): learning_rate=learning_rate, device="cpu", policy_kwargs=policy_kwargs, - ent_coef=0.01, # Entropy coefficient for exploration - clip_range=0.2, # PPO clipping parameter - gae_lambda=0.95, # GAE lambda - gamma=0.99 # Discount factor + # ent_coef=0.01, # Entropy coefficient for exploration + # clip_range=0.2, # PPO clipping parameter + # gae_lambda=0.95, # GAE lambda + # gamma=0.99 # Discount factor ) model_path = self.learn_model(model, total_steps, model_path, eval_env=eval_env,