change model and policy
This commit is contained in:
@@ -21,7 +21,7 @@ class Server():
|
|||||||
port = first_server_p + i
|
port = first_server_p + i
|
||||||
mport = first_monitor_p + i
|
mport = first_monitor_p + i
|
||||||
|
|
||||||
server_cmd = f"{cmd} --aport {port} --mport {mport} --no-render --no-realtime"
|
server_cmd = f"{cmd} --aport {port} --mport {mport} "
|
||||||
|
|
||||||
self.rcss_processes.append(
|
self.rcss_processes.append(
|
||||||
subprocess.Popen(
|
subprocess.Popen(
|
||||||
|
|||||||
@@ -171,8 +171,8 @@ class Train_Base():
|
|||||||
ep_reward += reward
|
ep_reward += reward
|
||||||
ep_length += 1
|
ep_length += 1
|
||||||
|
|
||||||
if enable_FPS_control: # control simulation speed (using non blocking user input)
|
# if enable_FPS_control: # control simulation speed (using non blocking user input)
|
||||||
self.control_fps(select.select([sys.stdin], [], [], 0)[0])
|
# self.control_fps(select.select([sys.stdin], [], [], 0)[0])
|
||||||
|
|
||||||
if done:
|
if done:
|
||||||
obs, _ = env.reset()
|
obs, _ = env.reset()
|
||||||
|
|||||||
@@ -80,8 +80,8 @@ class WalkEnv(gym.Env):
|
|||||||
action_dim = len(self.Player.robot.ROBOT_MOTORS)
|
action_dim = len(self.Player.robot.ROBOT_MOTORS)
|
||||||
self.no_of_actions = action_dim
|
self.no_of_actions = action_dim
|
||||||
self.action_space = spaces.Box(
|
self.action_space = spaces.Box(
|
||||||
low=-1.0,
|
low=-10.0,
|
||||||
high=1.0,
|
high=10.0,
|
||||||
shape=(action_dim,),
|
shape=(action_dim,),
|
||||||
dtype=np.float32
|
dtype=np.float32
|
||||||
)
|
)
|
||||||
@@ -115,14 +115,14 @@ class WalkEnv(gym.Env):
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
self.reference_joint_nominal_position = self.joint_nominal_position.copy()
|
self.reference_joint_nominal_position = self.joint_nominal_position.copy()
|
||||||
|
self.joint_nominal_position = np.zeros(self.no_of_actions)
|
||||||
self.train_sim_flip = np.array(
|
self.train_sim_flip = np.array(
|
||||||
[
|
[
|
||||||
1.0, # 0: Head_yaw (he1)
|
1.0, # 0: Head_yaw (he1)
|
||||||
-1.0, # 1: Head_pitch (he2)
|
-1.0, # 1: Head_pitch (he2)
|
||||||
1.0, # 2: Left_Shoulder_Pitch (lae1)
|
1.0, # 2: Left_Shoulder_Pitch (lae1)
|
||||||
-1.0, # 3: Left_Shoulder_Roll (lae2)
|
-1.0, # 3: Left_Shoulder_Roll (lae2)
|
||||||
1.0, # 4: Left_Elbow_Pitch (lae3)
|
-1.0, # 4: Left_Elbow_Pitch (lae3)
|
||||||
1.0, # 5: Left_Elbow_Yaw (lae4)
|
1.0, # 5: Left_Elbow_Yaw (lae4)
|
||||||
-1.0, # 6: Right_Shoulder_Pitch (rae1)
|
-1.0, # 6: Right_Shoulder_Pitch (rae1)
|
||||||
-1.0, # 7: Right_Shoulder_Roll (rae2)
|
-1.0, # 7: Right_Shoulder_Roll (rae2)
|
||||||
@@ -145,6 +145,7 @@ class WalkEnv(gym.Env):
|
|||||||
)
|
)
|
||||||
|
|
||||||
self.scaling_factor = 0.5
|
self.scaling_factor = 0.5
|
||||||
|
# self.scaling_factor = 1
|
||||||
|
|
||||||
self.previous_action = np.zeros(len(self.Player.robot.ROBOT_MOTORS))
|
self.previous_action = np.zeros(len(self.Player.robot.ROBOT_MOTORS))
|
||||||
self.last_action_for_reward = np.zeros(len(self.Player.robot.ROBOT_MOTORS))
|
self.last_action_for_reward = np.zeros(len(self.Player.robot.ROBOT_MOTORS))
|
||||||
@@ -531,7 +532,7 @@ class Train(Train_Base):
|
|||||||
n_steps_per_env = 512 # RolloutBuffer is of size (n_steps_per_env * n_envs)
|
n_steps_per_env = 512 # RolloutBuffer is of size (n_steps_per_env * n_envs)
|
||||||
minibatch_size = 128 # should be a factor of (n_steps_per_env * n_envs)
|
minibatch_size = 128 # should be a factor of (n_steps_per_env * n_envs)
|
||||||
total_steps = 30000000
|
total_steps = 30000000
|
||||||
learning_rate = 3e-4
|
learning_rate = 2e-4
|
||||||
folder_name = f'Walk_R{self.robot_type}'
|
folder_name = f'Walk_R{self.robot_type}'
|
||||||
model_path = f'./scripts/gyms/logs/{folder_name}/'
|
model_path = f'./scripts/gyms/logs/{folder_name}/'
|
||||||
|
|
||||||
@@ -560,10 +561,10 @@ class Train(Train_Base):
|
|||||||
# Custom policy network architecture
|
# Custom policy network architecture
|
||||||
policy_kwargs = dict(
|
policy_kwargs = dict(
|
||||||
net_arch=dict(
|
net_arch=dict(
|
||||||
pi=[256, 256, 128], # Policy network: 3 layers
|
pi=[512, 256, 128], # Policy network: 3 layers
|
||||||
vf=[256, 256, 128] # Value network: 3 layers
|
vf=[512, 256, 128] # Value network: 3 layers
|
||||||
),
|
),
|
||||||
activation_fn=__import__('torch.nn', fromlist=['ReLU']).ReLU,
|
activation_fn=__import__('torch.nn', fromlist=['ELU']).ELU,
|
||||||
)
|
)
|
||||||
|
|
||||||
if "model_file" in args: # retrain
|
if "model_file" in args: # retrain
|
||||||
@@ -579,10 +580,10 @@ class Train(Train_Base):
|
|||||||
learning_rate=learning_rate,
|
learning_rate=learning_rate,
|
||||||
device="cpu",
|
device="cpu",
|
||||||
policy_kwargs=policy_kwargs,
|
policy_kwargs=policy_kwargs,
|
||||||
ent_coef=0.01, # Entropy coefficient for exploration
|
# ent_coef=0.01, # Entropy coefficient for exploration
|
||||||
clip_range=0.2, # PPO clipping parameter
|
# clip_range=0.2, # PPO clipping parameter
|
||||||
gae_lambda=0.95, # GAE lambda
|
# gae_lambda=0.95, # GAE lambda
|
||||||
gamma=0.99 # Discount factor
|
# gamma=0.99 # Discount factor
|
||||||
)
|
)
|
||||||
|
|
||||||
model_path = self.learn_model(model, total_steps, model_path, eval_env=eval_env,
|
model_path = self.learn_model(model, total_steps, model_path, eval_env=eval_env,
|
||||||
|
|||||||
Reference in New Issue
Block a user