diff --git a/scripts/commons/Server.py b/scripts/commons/Server.py
index f238432..cf21ea8 100644
--- a/scripts/commons/Server.py
+++ b/scripts/commons/Server.py
@@ -21,7 +21,7 @@ class Server():
             port = first_server_p + i
             mport = first_monitor_p + i
 
-            server_cmd = f"{cmd} --aport {port} --mport {mport} --no-render  --no-realtime"
+            server_cmd = f"{cmd} --aport {port} --mport {mport} "
 
             self.rcss_processes.append(
                 subprocess.Popen(
diff --git a/scripts/commons/Train_Base.py b/scripts/commons/Train_Base.py
index 617f5b2..189eb15 100644
--- a/scripts/commons/Train_Base.py
+++ b/scripts/commons/Train_Base.py
@@ -171,8 +171,8 @@ class Train_Base():
             ep_reward += reward
             ep_length += 1
 
-            if enable_FPS_control:  # control simulation speed (using non blocking user input)
-                self.control_fps(select.select([sys.stdin], [], [], 0)[0])
+            # if enable_FPS_control:  # control simulation speed (using non blocking user input)
+            #     self.control_fps(select.select([sys.stdin], [], [], 0)[0])
 
             if done:
                 obs, _ = env.reset()
diff --git a/scripts/gyms/Walk.py b/scripts/gyms/Walk.py
index d30214f..4533b46 100644
--- a/scripts/gyms/Walk.py
+++ b/scripts/gyms/Walk.py
@@ -80,8 +80,8 @@ class WalkEnv(gym.Env):
         action_dim = len(self.Player.robot.ROBOT_MOTORS)
         self.no_of_actions = action_dim
         self.action_space = spaces.Box(
-            low=-1.0,
-            high=1.0,
+            low=-10.0,
+            high=10.0,
             shape=(action_dim,),
             dtype=np.float32
         )
@@ -115,14 +115,14 @@ class WalkEnv(gym.Env):
             ]
         )
         self.reference_joint_nominal_position = self.joint_nominal_position.copy()
-
+        self.joint_nominal_position = np.zeros(self.no_of_actions)
         self.train_sim_flip = np.array(
             [
                 1.0,  # 0: Head_yaw (he1)
                 -1.0,  # 1: Head_pitch (he2)
                 1.0,  # 2: Left_Shoulder_Pitch (lae1)
                 -1.0,  # 3: Left_Shoulder_Roll (lae2)
-                1.0,  # 4: Left_Elbow_Pitch (lae3)
+                -1.0,  # 4: Left_Elbow_Pitch (lae3)
                 1.0,  # 5: Left_Elbow_Yaw (lae4)
                 -1.0,  # 6: Right_Shoulder_Pitch (rae1)
                 -1.0,  # 7: Right_Shoulder_Roll (rae2)
@@ -145,6 +145,7 @@ class WalkEnv(gym.Env):
         )
 
         self.scaling_factor = 0.5
+        # self.scaling_factor = 1
 
         self.previous_action = np.zeros(len(self.Player.robot.ROBOT_MOTORS))
         self.last_action_for_reward = np.zeros(len(self.Player.robot.ROBOT_MOTORS))
@@ -531,7 +532,7 @@ class Train(Train_Base):
         n_steps_per_env = 512  # RolloutBuffer is of size (n_steps_per_env * n_envs)
         minibatch_size = 128  # should be a factor of (n_steps_per_env * n_envs)
         total_steps = 30000000
-        learning_rate = 3e-4
+        learning_rate = 2e-4
         folder_name = f'Walk_R{self.robot_type}'
         model_path = f'./scripts/gyms/logs/{folder_name}/'
 
@@ -560,10 +561,10 @@ class Train(Train_Base):
             # Custom policy network architecture
             policy_kwargs = dict(
                 net_arch=dict(
-                    pi=[256, 256, 128],  # Policy network: 3 layers
-                    vf=[256, 256, 128]  # Value network: 3 layers
+                    pi=[512, 256, 128],  # Policy network: 3 layers
+                    vf=[512, 256, 128]  # Value network: 3 layers
                 ),
-                activation_fn=__import__('torch.nn', fromlist=['ReLU']).ReLU,
+                activation_fn=__import__('torch.nn', fromlist=['ELU']).ELU,
             )
 
             if "model_file" in args:  # retrain
@@ -579,10 +580,10 @@ class Train(Train_Base):
                     learning_rate=learning_rate,
                     device="cpu",
                     policy_kwargs=policy_kwargs,
-                    ent_coef=0.01,  # Entropy coefficient for exploration
-                    clip_range=0.2,  # PPO clipping parameter
-                    gae_lambda=0.95,  # GAE lambda
-                    gamma=0.99  # Discount factor
+                    # ent_coef=0.01,  # Entropy coefficient for exploration
+                    # clip_range=0.2,  # PPO clipping parameter
+                    # gae_lambda=0.95,  # GAE lambda
+                    # gamma=0.99  # Discount factor
                 )
 
             model_path = self.learn_model(model, total_steps, model_path, eval_env=eval_env,