From 0e402c2b4cac46d128c79f9254936e4d66cc3a8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BE=90=E5=AD=A6=E9=A2=A2?= Date: Wed, 11 Mar 2026 09:54:29 +0800 Subject: [PATCH] train base and gitignore files --- .gitignore | 12 + scripts/commons/Server.py | 63 ++++ scripts/commons/Train_Base.py | 584 ++++++++++++++++++++++++++++++++++ scripts/commons/UI.py | 302 ++++++++++++++++++ scripts/gyms/Walk.py | 472 +++++++++++++++++++++++++++ 5 files changed, 1433 insertions(+) create mode 100644 .gitignore create mode 100644 scripts/commons/Server.py create mode 100644 scripts/commons/Train_Base.py create mode 100644 scripts/commons/UI.py create mode 100644 scripts/gyms/Walk.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..58548b2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +.venv/ +.vscode/ +**/__pycache__/ +poetry.lock +poetry.toml +**/*.log +**/*.txt +**/build/ +**/install/ +**/log/ +*.spec +dist/ diff --git a/scripts/commons/Server.py b/scripts/commons/Server.py new file mode 100644 index 0000000..17070e6 --- /dev/null +++ b/scripts/commons/Server.py @@ -0,0 +1,63 @@ +import subprocess +import os + + +class Server(): + def __init__(self, first_server_p, first_monitor_p, n_servers) -> None: + try: + import psutil + self.check_running_servers(psutil, first_server_p, first_monitor_p, n_servers) + except ModuleNotFoundError: + print("Info: Cannot check if the server is already running, because the psutil module was not found") + + self.first_server_p = first_server_p + self.n_servers = n_servers + self.rcss_processes = [] + first_monitor_p = first_monitor_p + 100 + + # makes it easier to kill test servers without affecting train servers + cmd = "rcssservermj" + for i in range(n_servers): + self.rcss_processes.append( + subprocess.Popen((f"{cmd} --aport {first_server_p+i} --mport {first_monitor_p+i}").split(), + stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, start_new_session=True) + ) + + def check_running_servers(self, psutil, first_server_p, first_monitor_p, n_servers): + ''' Check if any server is running on chosen ports ''' + found = False + p_list = [p for p in psutil.process_iter() if p.cmdline() and "rcssservermj" in " ".join(p.cmdline())] + range1 = (first_server_p, first_server_p + n_servers) + range2 = (first_monitor_p,first_monitor_p + n_servers) + bad_processes = [] + + for p in p_list: + # currently ignoring remaining default port when only one of the ports is specified (uncommon scenario) + ports = [int(arg) for arg in p.cmdline()[1:] if arg.isdigit()] + if len(ports) == 0: + ports = [60000,60100] # default server ports (changing this is unlikely) + + conflicts = [str(port) for port in ports if ( + (range1[0] <= port < range1[1]) or (range2[0] <= port < range2[1]) )] + + if len(conflicts)>0: + if not found: + print("\nThere are already servers running on the same port(s)!") + found = True + bad_processes.append(p) + print(f"Port(s) {','.join(conflicts)} already in use by \"{' '.join(p.cmdline())}\" (PID:{p.pid})") + + if found: + print() + while True: + inp = input("Enter 'kill' to kill these processes or ctrl+c to abort. ") + if inp == "kill": + for p in bad_processes: + p.kill() + return + + + def kill(self): + for p in self.rcss_processes: + p.kill() + print(f"Killed {self.n_servers} rcssservermj processes starting at {self.first_server_p}") \ No newline at end of file diff --git a/scripts/commons/Train_Base.py b/scripts/commons/Train_Base.py new file mode 100644 index 0000000..8f4203f --- /dev/null +++ b/scripts/commons/Train_Base.py @@ -0,0 +1,584 @@ +from datetime import datetime, timedelta +from itertools import count +from os import listdir +from os.path import isdir, join, isfile +from scripts.commons.UI import UI +from shutil import copy +from stable_baselines3 import PPO +from stable_baselines3.common.base_class import BaseAlgorithm +from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback, CallbackList, BaseCallback +from typing import Callable +# from world.world import World +from xml.dom import minidom +import numpy as np +import os, time, math, csv, select, sys +import pickle +import xml.etree.ElementTree as ET +import shutil + + +class Train_Base(): + def __init__(self, script) -> None: + ''' + When training with multiple environments (multiprocessing): + The server port is incremented as follows: + self.server_p, self.server_p+1, self.server_p+2, ... + We add +1000 to the initial monitor port, so than we can have more than 100 environments: + self.monitor_p+1000, self.monitor_p+1001, self.monitor_p+1002, ... + When testing we use self.server_p and self.monitor_p + ''' + + args = script.args + self.script = script + self.ip = args.i + self.server_p = args.p # (initial) server port + self.monitor_p = args.m + 100 # monitor port when testing + self.monitor_p_1000 = args.m + 1100 # initial monitor port when training + self.robot_type = args.r + self.team = args.t + self.uniform = args.u + self.cf_last_time = 0 + self.cf_delay = 0 + # self.cf_target_period = World.STEPTIME # target simulation speed while testing (default: real-time) + + + + @staticmethod + def prompt_user_for_model(self): + + gyms_logs_path = "./mujococodebase/scripts/gyms/logs/" + folders = [f for f in listdir(gyms_logs_path) if isdir(join(gyms_logs_path, f))] + folders.sort(key=lambda f: os.path.getmtime(join(gyms_logs_path, f)), reverse=True) # sort by modification date + + while True: + try: + folder_name = UI.print_list(folders,prompt="Choose folder (ctrl+c to return): ")[1] + except KeyboardInterrupt: + print() + return None # ctrl+c + + folder_dir = os.path.join(gyms_logs_path, folder_name) + models = [m[:-4] for m in listdir(folder_dir) if isfile(join(folder_dir, m)) and m.endswith(".zip")] + + if not models: + print("The chosen folder does not contain any .zip file!") + continue + + models.sort(key=lambda m: os.path.getmtime(join(folder_dir, m+".zip")), reverse=True) # sort by modification date + + try: + model_name = UI.print_list(models,prompt="Choose model (ctrl+c to return): ")[1] + break + except KeyboardInterrupt: + print() + + return {"folder_dir":folder_dir, "folder_name":folder_name, "model_file":os.path.join(folder_dir, model_name+".zip")} + + + # def control_fps(self, read_input = False): + # ''' Add delay to control simulation speed ''' + + # if read_input: + # speed = input() + # if speed == '': + # self.cf_target_period = 0 + # print(f"Changed simulation speed to MAX") + # else: + # if speed == '0': + # inp = input("Paused. Set new speed or '' to use previous speed:") + # if inp != '': + # speed = inp + + # try: + # speed = int(speed) + # assert speed >= 0 + # self.cf_target_period = World.STEPTIME * 100 / speed + # print(f"Changed simulation speed to {speed}%") + # except: + # print("""Train_Base.py: + # Error: To control the simulation speed, enter a non-negative integer. + # To disable this control module, use test_model(..., enable_FPS_control=False) in your gyms environment.""") + + # now = time.time() + # period = now - self.cf_last_time + # self.cf_last_time = now + # self.cf_delay += (self.cf_target_period - period)*0.9 + # if self.cf_delay > 0: + # time.sleep(self.cf_delay) + # else: + # self.cf_delay = 0 + + + def test_model(self, model:BaseAlgorithm, env, log_path:str=None, model_path:str=None, max_episodes=0, enable_FPS_control=True, verbose=1): + ''' + Test model and log results + + Parameters + ---------- + model : BaseAlgorithm + Trained model + env : Env + Gym-like environment + log_path : str + Folder where statistics file is saved, default is `None` (no file is saved) + model_path : str + Folder where it reads evaluations.npz to plot it and create evaluations.csv, default is `None` (no plot, no csv) + max_episodes : int + Run tests for this number of episodes + Default is 0 (run until user aborts) + verbose : int + 0 - no output (except if enable_FPS_control=True) + 1 - print episode statistics + ''' + + if model_path is not None: + assert os.path.isdir(model_path), f"{model_path} is not a valid path" + self.display_evaluations(model_path) + + if log_path is not None: + assert os.path.isdir(log_path), f"{log_path} is not a valid path" + + # If file already exists, don't overwrite + if os.path.isfile(log_path + "/test.csv"): + for i in range(1000): + p = f"{log_path}/test_{i:03}.csv" + if not os.path.isfile(p): + log_path = p + break + else: + log_path += "/test.csv" + + with open(log_path, 'w') as f: + f.write("reward,ep. length,rew. cumulative avg., ep. len. cumulative avg.\n") + print("Train statistics are saved to:", log_path) + + if enable_FPS_control: # control simulation speed (using non blocking user input) + print("\nThe simulation speed can be changed by sending a non-negative integer\n" + "(e.g. '50' sets speed to 50%, '0' pauses the simulation, '' sets speed to MAX)\n") + + ep_reward = 0 + ep_length = 0 + rewards_sum = 0 + reward_min = math.inf + reward_max = -math.inf + ep_lengths_sum = 0 + ep_no = 0 + + obs, _ = env.reset() + while True: + action, _states = model.predict(obs, deterministic=True) + obs, reward, terminated, truncated, info = env.step(action) + done = terminated or truncated + ep_reward += reward + ep_length += 1 + + if enable_FPS_control: # control simulation speed (using non blocking user input) + self.control_fps(select.select([sys.stdin], [], [], 0)[0]) + + if done: + obs, _ = env.reset() + rewards_sum += ep_reward + ep_lengths_sum += ep_length + reward_max = max(ep_reward, reward_max) + reward_min = min(ep_reward, reward_min) + ep_no += 1 + avg_ep_lengths = ep_lengths_sum/ep_no + avg_rewards = rewards_sum/ep_no + + if verbose > 0: + print( f"\rEpisode: {ep_no:<3} Ep.Length: {ep_length:<4.0f} Reward: {ep_reward:<6.2f} \n", + end=f"--AVERAGE-- Ep.Length: {avg_ep_lengths:<4.0f} Reward: {avg_rewards:<6.2f} (Min: {reward_min:<6.2f} Max: {reward_max:<6.2f})", flush=True) + + if log_path is not None: + with open(log_path, 'a') as f: + writer = csv.writer(f) + writer.writerow([ep_reward, ep_length, avg_rewards, avg_ep_lengths]) + + if ep_no == max_episodes: + return + + ep_reward = 0 + ep_length = 0 + + def learn_model(self, model:BaseAlgorithm, total_steps:int, path:str, eval_env=None, eval_freq=None, eval_eps=5, save_freq=None, backup_env_file=None, export_name=None): + ''' + Learn Model for a specific number of time steps + + Parameters + ---------- + model : BaseAlgorithm + Model to train + total_steps : int + The total number of samples (env steps) to train on + path : str + Path where the trained model is saved + If the path already exists, an incrementing number suffix is added + eval_env : Env + Environment to periodically test the model + Default is None (no periodical evaluation) + eval_freq : int + Evaluate the agent every X steps + Default is None (no periodical evaluation) + eval_eps : int + Evaluate the agent for X episodes (both eval_env and eval_freq must be defined) + Default is 5 + save_freq : int + Saves model at every X steps + Default is None (no periodical checkpoint) + backup_gym_file : str + Generates backup of environment file in model's folder + Default is None (no backup) + export_name : str + If export_name and save_freq are defined, a model is exported every X steps + Default is None (no export) + + Returns + ------- + model_path : str + Directory where model was actually saved (considering incremental suffix) + + Notes + ----- + If `eval_env` and `eval_freq` were specified: + - The policy will be evaluated in `eval_env` every `eval_freq` steps + - Evaluation results will be saved in `path` and shown at the end of training + - Every time the results improve, the model is saved + ''' + + start = time.time() + start_date = datetime.now().strftime("%d/%m/%Y %H:%M:%S") + + # If path already exists, add suffix to avoid overwriting + if os.path.isdir(path): + for i in count(): + p = path.rstrip("/")+f'_{i:03}/' + if not os.path.isdir(p): + path = p + break + os.makedirs(path) + + # Backup environment file + if backup_env_file is not None: + backup_file = os.path.join(path, os.path.basename(backup_env_file)) + copy(backup_env_file, backup_file) + + evaluate = bool(eval_env is not None and eval_freq is not None) + + # Create evaluation callback + eval_callback = None if not evaluate else EvalCallback(eval_env, n_eval_episodes=eval_eps, eval_freq=eval_freq, log_path=path, + best_model_save_path=path, deterministic=True, render=False) + + # Create custom callback to display evaluations + custom_callback = None if not evaluate else Cyclic_Callback(eval_freq, lambda:self.display_evaluations(path,True)) + + # Create checkpoint callback + checkpoint_callback = None if save_freq is None else CheckpointCallback(save_freq=save_freq, save_path=path, name_prefix="model", verbose=1) + + # Create custom callback to export checkpoint models + export_callback = None if save_freq is None or export_name is None else Export_Callback(save_freq, path, export_name) + + callbacks = CallbackList([c for c in [eval_callback, custom_callback, checkpoint_callback, export_callback] if c is not None]) + + model.learn( total_timesteps=total_steps, callback=callbacks ) + model.save( os.path.join(path, "last_model") ) + + # Display evaluations if they exist + if evaluate: + self.display_evaluations(path) + + # Display timestamps + Model path + end_date = datetime.now().strftime('%d/%m/%Y %H:%M:%S') + duration = timedelta(seconds=int(time.time()-start)) + print(f"Train start: {start_date}") + print(f"Train end: {end_date}") + print(f"Train duration: {duration}") + print(f"Model path: {path}") + + # Append timestamps to backup environment file + if backup_env_file is not None: + with open(backup_file, 'a') as f: + f.write(f"\n# Train start: {start_date}\n") + f.write( f"# Train end: {end_date}\n") + f.write( f"# Train duration: {duration}") + + return path + + def display_evaluations(self, path, save_csv=False): + + eval_npz = os.path.join(path, "evaluations.npz") + + if not os.path.isfile(eval_npz): + return + + console_width = 80 + console_height = 18 + symb_x = "\u2022" + symb_o = "\u007c" + symb_xo = "\u237f" + + with np.load(eval_npz) as data: + time_steps = data["timesteps"] + results_raw = np.mean(data["results"],axis=1) + ep_lengths_raw = np.mean(data["ep_lengths"],axis=1) + sample_no = len(results_raw) + + xvals = np.linspace(0, sample_no-1, 80) + results = np.interp(xvals, range(sample_no), results_raw) + ep_lengths = np.interp(xvals, range(sample_no), ep_lengths_raw) + + results_limits = np.min(results), np.max(results) + ep_lengths_limits = np.min(ep_lengths), np.max(ep_lengths) + + results_discrete = np.digitize(results, np.linspace(results_limits[0]-1e-5, results_limits[1]+1e-5, console_height+1))-1 + ep_lengths_discrete = np.digitize(ep_lengths, np.linspace(0, ep_lengths_limits[1]+1e-5, console_height+1))-1 + + matrix = np.zeros((console_height, console_width, 2), int) + matrix[results_discrete[0] ][0][0] = 1 # draw 1st column + matrix[ep_lengths_discrete[0]][0][1] = 1 # draw 1st column + rng = [[results_discrete[0], results_discrete[0]], [ep_lengths_discrete[0], ep_lengths_discrete[0]]] + + # Create continuous line for both plots + for k in range(2): + for i in range(1,console_width): + x = [results_discrete, ep_lengths_discrete][k][i] + if x > rng[k][1]: + rng[k] = [rng[k][1]+1, x] + elif x < rng[k][0]: + rng[k] = [x, rng[k][0]-1] + else: + rng[k] = [x,x] + for j in range(rng[k][0],rng[k][1]+1): + matrix[j][i][k] = 1 + + print(f'{"-"*console_width}') + for l in reversed(range(console_height)): + for c in range(console_width): + if np.all(matrix[l][c] == 0): print(end=" ") + elif np.all(matrix[l][c] == 1): print(end=symb_xo) + elif matrix[l][c][0] == 1: print(end=symb_x) + else: print(end=symb_o) + print() + print(f'{"-"*console_width}') + print(f"({symb_x})-reward min:{results_limits[0]:11.2f} max:{results_limits[1]:11.2f}") + print(f"({symb_o})-ep. length min:{ep_lengths_limits[0]:11.0f} max:{ep_lengths_limits[1]:11.0f} {time_steps[-1]/1000:15.0f}k steps") + print(f'{"-"*console_width}') + + # save CSV + if save_csv: + eval_csv = os.path.join(path, "evaluations.csv") + with open(eval_csv, 'a+') as f: + writer = csv.writer(f) + if sample_no == 1: + writer.writerow(["time_steps", "reward ep.", "length"]) + writer.writerow([time_steps[-1],results_raw[-1],ep_lengths_raw[-1]]) + + + # def generate_slot_behavior(self, path, slots, auto_head:bool, XML_name): + # ''' + # Function that generates the XML file for the optimized slot behavior, overwriting previous files + # ''' + + # file = os.path.join( path, XML_name ) + + # # create the file structure + # auto_head = '1' if auto_head else '0' + # EL_behavior = ET.Element('behavior',{'description':'Add description to XML file', "auto_head":auto_head}) + + # for i,s in enumerate(slots): + # EL_slot = ET.SubElement(EL_behavior, 'slot', {'name':str(i), 'delta':str(s[0]/1000)}) + # for j in s[1]: # go through all joint indices + # ET.SubElement(EL_slot, 'move', {'id':str(j), 'angle':str(s[2][j])}) + + # # create XML file + # xml_rough = ET.tostring( EL_behavior, 'utf-8' ) + # xml_pretty = minidom.parseString(xml_rough).toprettyxml(indent=" ") + # with open(file, "w") as x: + # x.write(xml_pretty) + + # print(file, "was created!") + + # @staticmethod + # def linear_schedule(initial_value: float) -> Callable[[float], float]: + # ''' + # Linear learning rate schedule + + # Parameters + # ---------- + # initial_value : float + # Initial learning rate + + # Returns + # ------- + # schedule : Callable[[float], float] + # schedule that computes current learning rate depending on remaining progress + # ''' + # def func(progress_remaining: float) -> float: + # ''' + # Compute learning rate according to current progress + + # Parameters + # ---------- + # progress_remaining : float + # Progress will decrease from 1 (beginning) to 0 + + # Returns + # ------- + # learning_rate : float + # Learning rate according to current progress + # ''' + # return progress_remaining * initial_value + + # return func + + @staticmethod + def export_model(input_file, output_file, add_sufix=True): + ''' + Export model weights to binary file + + Parameters + ---------- + input_file : str + Input file, compatible with algorithm + output_file : str + Output file, including directory + add_sufix : bool + If true, a suffix is appended to the file name: output_file + "_{index}.pkl" + ''' + + # If file already exists, don't overwrite + if add_sufix: + for i in count(): + f = f"{output_file}_{i:03}.pkl" + if not os.path.isfile(f): + output_file = f + break + + model = PPO.load(input_file) + weights = model.policy.state_dict() # dictionary containing network layers + + w = lambda name : weights[name].detach().cpu().numpy() # extract weights from policy + + var_list = [] + for i in count(0,2): # add hidden layers (step=2 because that's how SB3 works) + if f"mlp_extractor.policy_net.{i}.bias" not in weights: + break + var_list.append([w(f"mlp_extractor.policy_net.{i}.bias"), w(f"mlp_extractor.policy_net.{i}.weight"), "tanh"]) + + var_list.append( [w("action_net.bias"), w("action_net.weight"), "none"] ) # add final layer + + with open(output_file,"wb") as f: + pickle.dump(var_list, f, protocol=4) # protocol 4 is backward compatible with Python 3.4 + + + def print_list(data, numbering=True, prompt=None, divider=" | ", alignment="<", min_per_col=6): + ''' + Print list - prints list, using as many columns as possible + + Parameters + ---------- + data : `list` + list of items + numbering : `bool` + assigns number to each option + prompt : `str` + the prompt string, if given, is printed after the table before reading input + divider : `str` + string that divides columns + alignment : `str` + f-string style alignment ( '<', '>', '^' ) + min_per_col : int + avoid splitting columns with fewer items + + Returns + ------- + item : `int`, item + returns tuple with global index of selected item and the item object, + or `None` (if `numbering` or `prompt` are `None`) + + ''' + + WIDTH = shutil.get_terminal_size()[0] + + data_size = len(data) + items = [] + items_len = [] + + #--------------------------------------------- Add numbers, margins and divider + for i in range(data_size): + number = f"{i}-" if numbering else "" + items.append( f"{divider}{number}{data[i]}" ) + items_len.append( len(items[-1]) ) + + max_cols = np.clip((WIDTH+len(divider)) // min(items_len),1,math.ceil(data_size/max(min_per_col,1))) # width + len(divider) because it is not needed in last col + + #--------------------------------------------- Check maximum number of columns, considering content width (min:1) + for i in range(max_cols,0,-1): + cols_width = [] + cols_items = [] + table_width = 0 + a,b = divmod(data_size,i) + for col in range(i): + start = a*col + min(b,col) + end = start+a+(1 if col=min,= interval[0] and inp < interval[1]: + return inp, False + except: + pass + + print("Error: illegal input! Options:", str_options, f" or {dtype}" if dtype != str else "") + + @staticmethod + def read_int(prompt, min, max): + ''' + Read int from user in a given interval + :param prompt: prompt to show user before reading input + :param min: minimum input (inclusive) + :param max: maximum input (exclusive) + :return: choice + ''' + while True: + inp = input(prompt) + try: + inp = int(inp) + assert inp >= min and inp < max + return inp + except: + print(f"Error: illegal input! Choose number between {min} and {max-1}") + + @staticmethod + def print_table(data, titles=None, alignment=None, cols_width=None, cols_per_title=None, margins=None, numbering=None, prompt=None): + ''' + Print table + + Parameters + ---------- + data : `list` + list of columns, where each column is a list of items + titles : `list` + list of titles for each column, default is `None` (no titles) + alignment : `list` + list of alignments per column (excluding titles), default is `None` (left alignment for all cols) + cols_width : `list` + list of widths per column, default is `None` (fit to content) + Positive values indicate a fixed column width + Zero indicates that the column will fit its content + cols_per_title : `list` + maximum number of subcolumns per title, default is `None` (1 subcolumn per title) + margins : `list` + number of added leading and trailing spaces per column, default is `None` (margin=2 for all columns) + numbering : `list` + list of booleans per columns, indicating whether to assign numbers to each option + prompt : `str` + the prompt string, if given, is printed after the table before reading input + + Returns + ------- + index : `int` + returns global index of selected item (relative to table) + col_index : `int` + returns local index of selected item (relative to column) + column : `int` + returns number of column of selected item (starts at 0) + * if `numbering` or `prompt` are `None`, `None` is returned + + + Example + ------- + titles = ["Name","Age"] + data = [[John,Graciete], [30,50]] + alignment = ["<","^"] # 1st column is left-aligned, 2nd is centered + cols_width = [10,5] # 1st column's width=10, 2nd column's width=5 + margins = [3,3] + numbering = [True,False] # prints: [0-John,1-Graciete][30,50] + prompt = "Choose a person:" + ''' + + #--------------------------------------------- parameters + cols_no = len(data) + + if alignment is None: + alignment = ["<"]*cols_no + + if cols_width is None: + cols_width = [0]*cols_no + + if numbering is None: + numbering = [False]*cols_no + any_numbering = False + else: + any_numbering = True + + if margins is None: + margins = [2]*cols_no + + # Fit column to content + margin, if required + subcol = [] # subcolumn length and widths + for i in range(cols_no): + subcol.append([[],[]]) + if cols_width[i] == 0: + numbering_width = 4 if numbering[i] else 0 + if cols_per_title is None or cols_per_title[i] < 2: + cols_width[i] = max([len(str(item))+numbering_width for item in data[i]]) + margins[i]*2 + else: + subcol[i][0] = math.ceil(len(data[i])/cols_per_title[i]) # subcolumn maximum length + cols_per_title[i] = math.ceil(len(data[i])/subcol[i][0]) # reduce number of columns as needed + cols_width[i] = margins[i]*(1+cols_per_title[i]) - (1 if numbering[i] else 0) # remove one if numbering, same as when printing + for j in range(cols_per_title[i]): + subcol_data_width = max([len(str(item))+numbering_width for item in data[i][j*subcol[i][0]:j*subcol[i][0]+subcol[i][0]]]) + cols_width[i] += subcol_data_width # add subcolumn data width to column width + subcol[i][1].append(subcol_data_width) # save subcolumn data width + + if titles is not None: # expand to acomodate titles if needed + cols_width[i] = max(cols_width[i], len(titles[i]) + margins[i]*2 ) + + if any_numbering: + no_of_items=0 + cumulative_item_per_col=[0] # useful for getting the local index + for i in range(cols_no): + assert type(data[i]) == list, "In function 'print_table', 'data' must be a list of lists!" + + if numbering[i]: + data[i] = [f"{n+no_of_items:3}-{d}" for n,d in enumerate(data[i])] + no_of_items+=len(data[i]) + cumulative_item_per_col.append(no_of_items) + + table_width = sum(cols_width)+cols_no-1 + + #--------------------------------------------- col titles + print(f'{"="*table_width}') + if titles is not None: + for i in range(cols_no): + print(f'{titles[i]:^{cols_width[i]}}', end='|' if i < cols_no - 1 else '') + print() + for i in range(cols_no): + print(f'{"-"*cols_width[i]}', end='+' if i < cols_no - 1 else '') + print() + + #--------------------------------------------- merge subcolumns + if cols_per_title is not None: + for i,col in enumerate(data): + if cols_per_title[i] < 2: + continue + for k in range(subcol[i][0]): # create merged items + col[k] = (" "*margins[i]).join( f'{col[item]:{alignment[i]}{subcol[i][1][subcol_idx]}}' + for subcol_idx, item in enumerate(range(k,len(col),subcol[i][0])) ) + del col[subcol[i][0]:] # delete repeated items + + #--------------------------------------------- col items + for line in zip_longest(*data): + for i,item in enumerate(line): + l_margin = margins[i]-1 if numbering[i] else margins[i] # adjust margins when there are numbered options + item = "" if item is None else f'{" "*l_margin}{item}{" "*margins[i]}' # add margins + print(f'{item:{alignment[i]}{cols_width[i]}}', end='') + if i < cols_no - 1: + print(end='|') + print(end="\n") + print(f'{"="*table_width}') + + #--------------------------------------------- prompt + if prompt is None: + return None + + if not any_numbering: + print(prompt) + return None + + index = UI.read_int(prompt, 0, no_of_items) + + for i,n in enumerate(cumulative_item_per_col): + if index < n: + return index, index-cumulative_item_per_col[i-1], i-1 + + raise ValueError('Failed to catch illegal input') + + + @staticmethod + def print_list(data, numbering=True, prompt=None, divider=" | ", alignment="<", min_per_col=6): + ''' + Print list - prints list, using as many columns as possible + + Parameters + ---------- + data : `list` + list of items + numbering : `bool` + assigns number to each option + prompt : `str` + the prompt string, if given, is printed after the table before reading input + divider : `str` + string that divides columns + alignment : `str` + f-string style alignment ( '<', '>', '^' ) + min_per_col : int + avoid splitting columns with fewer items + + Returns + ------- + item : `int`, item + returns tuple with global index of selected item and the item object, + or `None` (if `numbering` or `prompt` are `None`) + + ''' + + WIDTH = shutil.get_terminal_size()[0] + + data_size = len(data) + items = [] + items_len = [] + + #--------------------------------------------- Add numbers, margins and divider + for i in range(data_size): + number = f"{i}-" if numbering else "" + items.append( f"{divider}{number}{data[i]}" ) + items_len.append( len(items[-1]) ) + + max_cols = np.clip((WIDTH+len(divider)) // min(items_len),1,math.ceil(data_size/max(min_per_col,1))) # width + len(divider) because it is not needed in last col + + #--------------------------------------------- Check maximum number of columns, considering content width (min:1) + for i in range(max_cols,0,-1): + cols_width = [] + cols_items = [] + table_width = 0 + a,b = divmod(data_size,i) + for col in range(i): + start = a*col + min(b,col) + end = start+a+(1 if col 0: + progress_reward = progress * 20.0 # Strong reward for closing distance + else: + progress_reward = progress * 30.0 # Even stronger penalty for going backward + + # 2. Absolute speed reward: reward any movement toward goal + movement_magnitude = np.linalg.norm(current_pos - previous_pos) + direction_to_target = self.target_position - current_pos + if np.linalg.norm(direction_to_target) > 0.01: + direction_to_target = direction_to_target / np.linalg.norm(direction_to_target) + movement_vector = current_pos - previous_pos + # Dot product: reward movement in target direction + directional_alignment = np.dot(movement_vector, direction_to_target) + speed_reward = max(0, directional_alignment) * 10.0 + else: + speed_reward = 0.0 + + # 3. Height maintenance: encourage upright posture + height = self.Player.world.global_position[2] + if height > 0.40: + height_reward = 0.5 + elif height > 0.30: + height_reward = 0.0 + else: + height_reward = -0.5 + + # 4. Waypoint bonuses + waypoint_bonus = 0.0 + if distance_after < 0.8: + waypoint_bonus = 20.0 + if self.waypoint_index < len(self.point_list) - 1: + self.waypoint_index += 1 + self.target_position = self.point_list[self.waypoint_index] + else: + waypoint_bonus = 50.0 # Final waypoint + + return progress_reward + speed_reward + height_reward + waypoint_bonus + + def step(self, action): + + r = self.Player.robot + + target_joint_positions = ( + self.joint_nominal_position + self.scaling_factor * action + ) + target_joint_positions *= self.train_sim_flip + + self.previous_action = action + + for idx, target in enumerate(target_joint_positions): + r.set_motor_target_position( + r.ROBOT_MOTORS[idx], target*180/math.pi, kp=25, kd=0.6 + ) + + + + + self.sync() # run simulation step + self.step_counter += 1 + + current_pos = np.array(self.Player.world.global_position[:2], dtype=np.float32) + + # Compute reward based on movement from previous step + reward = self.compute_reward(self.previous_pos, current_pos) + + # Penalty for standing still or minimal movement + movement = np.linalg.norm(current_pos - self.previous_pos) + if movement < 0.005: # Less than 5mm = basically standing + reward -= 2.0 + + # Small action penalty to encourage efficiency + action_magnitude = np.linalg.norm(action) + reward -= action_magnitude * 0.01 + + # Update previous position + self.previous_pos = current_pos.copy() + + # Fall detection and penalty + is_fallen = self.Player.world.global_position[2] < 0.25 + if is_fallen: + reward -= 15.0 + + # terminal state: the robot is falling or timeout + terminated = is_fallen or self.step_counter > 500 or self.waypoint_index >= len(self.point_list) + truncated = False + + return self.observe(), reward, terminated, truncated, {} + + + + + +class Train(Train_Base): + def __init__(self, script) -> None: + super().__init__(script) + + + def train(self, args): + + #--------------------------------------- Learning parameters + n_envs = 4 # Reduced from 8 to decrease CPU/network pressure during init + n_steps_per_env = 512 # RolloutBuffer is of size (n_steps_per_env * n_envs) + minibatch_size = 64 # should be a factor of (n_steps_per_env * n_envs) + total_steps = 30000000 + learning_rate = 3e-4 + folder_name = f'Walk_R{self.robot_type}' + model_path = f'./mujococodebase/scripts/gyms/logs/{folder_name}/' + + print(f"Model path: {model_path}") + print(f"Using {n_envs} parallel environments") + + #--------------------------------------- Run algorithm + def init_env(i_env): + def thunk(): + return WalkEnv( self.ip , self.server_p + i_env) + return thunk + + servers = Train_Server( self.server_p, self.monitor_p_1000, n_envs+1 ) #include 1 extra server for testing + + # Wait for servers to start + print(f"Starting {n_envs+1} rcssservermj servers...") + print("Servers started, creating environments...") + + env = SubprocVecEnv( [init_env(i) for i in range(n_envs)] ) + eval_env = SubprocVecEnv( [init_env(n_envs)] ) + + + try: + # Custom policy network architecture + policy_kwargs = dict( + net_arch=dict( + pi=[256, 256, 128], # Policy network: 3 layers + vf=[256, 256, 128] # Value network: 3 layers + ), + activation_fn=__import__('torch.nn', fromlist=['ReLU']).ReLU, + ) + + if "model_file" in args: # retrain + model = PPO.load( args["model_file"], env=env, device="cpu", n_envs=n_envs, n_steps=n_steps_per_env, batch_size=minibatch_size, learning_rate=learning_rate ) + else: # train new model + model = PPO( + "MlpPolicy", + env=env, + verbose=1, + n_steps=n_steps_per_env, + batch_size=minibatch_size, + learning_rate=learning_rate, + device="cpu", + policy_kwargs=policy_kwargs, + ent_coef=0.01, # Entropy coefficient for exploration + clip_range=0.2, # PPO clipping parameter + gae_lambda=0.95, # GAE lambda + gamma=0.99 # Discount factor + ) + + model_path = self.learn_model( model, total_steps, model_path, eval_env=eval_env, eval_freq=n_steps_per_env*20, save_freq=n_steps_per_env*20, backup_env_file=__file__ ) + except KeyboardInterrupt: + sleep(1) # wait for child processes + print("\nctrl+c pressed, aborting...\n") + servers.kill() + return + + env.close() + eval_env.close() + servers.kill() + + + def test(self, args): + + # Uses different server and monitor ports + server = Train_Server( self.server_p-1, self.monitor_p, 1 ) + env = WalkEnv( self.ip, self.server_p-1, self.monitor_p, self.robot_type, True ) + model = PPO.load( args["model_file"], env=env ) + + try: + self.export_model( args["model_file"], args["model_file"]+".pkl", False ) # Export to pkl to create custom behavior + self.test_model( model, env, log_path=args["folder_dir"], model_path=args["folder_dir"] ) + except KeyboardInterrupt: + print() + + env.close() + server.kill() + + +if __name__ == "__main__": + from types import SimpleNamespace + + # 创建默认参数 + script_args = SimpleNamespace( + args=SimpleNamespace( + i='127.0.0.1', # Server IP + p=3100, # Server port + m=3200, # Monitor port + r=0, # Robot type + t='Gym', # Team name + u=1 # Uniform number + ) + ) + + trainer = Train(script_args) + trainer.train({}) + +