796 lines
235 KiB
Plaintext
796 lines
235 KiB
Plaintext
|
|
{
|
||
|
|
"cells": [
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"id": "57963548",
|
||
|
|
"metadata": {},
|
||
|
|
"source": [
|
||
|
|
"# Improved PPO Training for Maglev Pod\n",
|
||
|
|
"\n",
|
||
|
|
"## Major Changes to Enable Learning:\n",
|
||
|
|
"\n",
|
||
|
|
"### 1. **Reward Function (lev_pod_env.py)**\n",
|
||
|
|
"**Problem**: Squared penalties created rewards of -7000 to -8200, making learning impossible\n",
|
||
|
|
"- `(gap_error * 100)²` could reach 10,000+ for small errors\n",
|
||
|
|
"- +1.0 survival bonus was meaningless compared to penalties\n",
|
||
|
|
"\n",
|
||
|
|
"**Solution**: Exponential reward shaping with reasonable scales\n",
|
||
|
|
"- Gap reward: `exp(-0.5 * (error/3mm)²)` → smooth 0 to 1.0 range\n",
|
||
|
|
"- Small linear penalties for orientation (~0.02/degree)\n",
|
||
|
|
"- Success bonus: +2.0 for excellent hovering (gap < 1mm, angles < 2°)\n",
|
||
|
|
"- **New reward range: -10 to +3 per step** (was -8200 to +1 total)\n",
|
||
|
|
"\n",
|
||
|
|
"### 2. **Network Architecture**\n",
|
||
|
|
"**Changes**:\n",
|
||
|
|
"- Increased hidden units: 128 → 256\n",
|
||
|
|
"- Added LayerNorm for training stability\n",
|
||
|
|
"- Deeper shared layers (3 layers instead of 2)\n",
|
||
|
|
"- Better initialization for exploration\n",
|
||
|
|
"\n",
|
||
|
|
"### 3. **Training Hyperparameters**\n",
|
||
|
|
"**Changes**:\n",
|
||
|
|
"- Policy LR: 3e-4 → 5e-4 (faster learning)\n",
|
||
|
|
"- Value LR: 3e-4 → 1e-3 (even faster value updates)\n",
|
||
|
|
"- Entropy coefficient: 0.01 → 0.02 (more exploration)\n",
|
||
|
|
"- Added gradient clipping (max norm 0.5)\n",
|
||
|
|
"- GAE lambda: 0.97 → 0.95 (less biased advantage estimates)\n",
|
||
|
|
"- Episodes: 1000 → 2000\n",
|
||
|
|
"\n",
|
||
|
|
"### 4. **Termination Conditions**\n",
|
||
|
|
"**Tightened for safety**:\n",
|
||
|
|
"- Gap bounds: 2-40mm → 3-35mm\n",
|
||
|
|
"- Angle tolerance: 20° → 15°\n",
|
||
|
|
"- Failure penalty: -50 → -10 (scaled with new rewards)\n",
|
||
|
|
"\n",
|
||
|
|
"## Expected Behavior:\n",
|
||
|
|
"- **Rewards should be positive or mildly negative** during good episodes\n",
|
||
|
|
"- **Gap error should steadily decrease** from initial ~15mm toward target 16.49mm\n",
|
||
|
|
"- **Episodes that reach 500 steps** indicate successful hovering\n",
|
||
|
|
"- **Look for improvement over first 500 episodes**, then fine-tuning after"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 1,
|
||
|
|
"id": "f28b2866",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"import gymnasium as gym\n",
|
||
|
|
"import matplotlib.pyplot as plt\n",
|
||
|
|
"import numpy as np\n",
|
||
|
|
"import seaborn as sns\n",
|
||
|
|
"import torch\n",
|
||
|
|
"from torch import nn\n",
|
||
|
|
"from torch import optim\n",
|
||
|
|
"from torch.distributions import Normal\n",
|
||
|
|
"from lev_pod_env import LevPodEnv"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 2,
|
||
|
|
"id": "c49c95b6",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"Torch Version: 2.8.0\n",
|
||
|
|
"CUDA Available: True\n",
|
||
|
|
"Using device: cuda\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"print(\"Torch Version:\", torch.__version__)\n",
|
||
|
|
"print(\"CUDA Available:\", torch.cuda.is_available())\n",
|
||
|
|
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
||
|
|
"print(\"Using device:\", device)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 3,
|
||
|
|
"id": "70bb54d4",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"class ActorCriticNetwork(nn.Module):\n",
|
||
|
|
" def __init__(self, state_dim, action_dim, hidden_dim=256):\n",
|
||
|
|
" super().__init__()\n",
|
||
|
|
" # Larger network with layer normalization for better learning\n",
|
||
|
|
" self.shared_layers = nn.Sequential(\n",
|
||
|
|
" nn.Linear(state_dim, hidden_dim),\n",
|
||
|
|
" nn.LayerNorm(hidden_dim),\n",
|
||
|
|
" nn.ReLU(),\n",
|
||
|
|
" nn.Linear(hidden_dim, hidden_dim),\n",
|
||
|
|
" nn.LayerNorm(hidden_dim),\n",
|
||
|
|
" nn.ReLU(),\n",
|
||
|
|
" nn.Linear(hidden_dim, hidden_dim // 2),\n",
|
||
|
|
" nn.ReLU()\n",
|
||
|
|
" )\n",
|
||
|
|
" # Policy outputs mean and log_std for continuous actions\n",
|
||
|
|
" self.policy_mean = nn.Sequential(\n",
|
||
|
|
" nn.Linear(hidden_dim // 2, hidden_dim // 2),\n",
|
||
|
|
" nn.ReLU(),\n",
|
||
|
|
" nn.Linear(hidden_dim // 2, action_dim),\n",
|
||
|
|
" nn.Tanh() # Constrain to [-1, 1] range\n",
|
||
|
|
" )\n",
|
||
|
|
" # Initialize log_std to encourage exploration initially\n",
|
||
|
|
" self.policy_log_std = nn.Parameter(torch.ones(action_dim) * -0.5)\n",
|
||
|
|
" \n",
|
||
|
|
" self.value_layers = nn.Sequential(\n",
|
||
|
|
" nn.Linear(hidden_dim // 2, hidden_dim // 2),\n",
|
||
|
|
" nn.ReLU(),\n",
|
||
|
|
" nn.Linear(hidden_dim // 2, 1),\n",
|
||
|
|
" )\n",
|
||
|
|
"\n",
|
||
|
|
" def value(self, observation):\n",
|
||
|
|
" shared_output = self.shared_layers(observation)\n",
|
||
|
|
" state_value = self.value_layers(shared_output)\n",
|
||
|
|
" return state_value\n",
|
||
|
|
" \n",
|
||
|
|
" def policy(self, observation):\n",
|
||
|
|
" shared_output = self.shared_layers(observation)\n",
|
||
|
|
" mean = self.policy_mean(shared_output)\n",
|
||
|
|
" std = torch.exp(self.policy_log_std)\n",
|
||
|
|
" return mean, std\n",
|
||
|
|
" \n",
|
||
|
|
" def forward(self, state):\n",
|
||
|
|
" shared_output = self.shared_layers(state)\n",
|
||
|
|
" mean = self.policy_mean(shared_output)\n",
|
||
|
|
" std = torch.exp(self.policy_log_std)\n",
|
||
|
|
" state_value = self.value_layers(shared_output)\n",
|
||
|
|
" return mean, std, state_value"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 4,
|
||
|
|
"id": "2ed37deb",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"class PPOTrainer():\n",
|
||
|
|
" def __init__(self, actor_critic, ppo_clip_val=0.2, target_kl_div=0.02,\n",
|
||
|
|
" max_policy_train_iters=40, value_train_iters=40, policy_lr=5e-4, value_lr=1e-3, \n",
|
||
|
|
" entropy_coef=0.02):\n",
|
||
|
|
" self.ac = actor_critic\n",
|
||
|
|
" self.ppo_clip_val = ppo_clip_val\n",
|
||
|
|
" self.target_kl_div = target_kl_div\n",
|
||
|
|
" self.max_policy_train_iters = max_policy_train_iters\n",
|
||
|
|
" self.value_train_iters = value_train_iters\n",
|
||
|
|
" self.entropy_coef = entropy_coef\n",
|
||
|
|
"\n",
|
||
|
|
" policy_params = list(self.ac.shared_layers.parameters()) + \\\n",
|
||
|
|
" list(self.ac.policy_mean.parameters()) + \\\n",
|
||
|
|
" [self.ac.policy_log_std]\n",
|
||
|
|
" self.policy_optimizer = optim.Adam(policy_params, lr=policy_lr)\n",
|
||
|
|
"\n",
|
||
|
|
" value_params = list(self.ac.shared_layers.parameters()) + \\\n",
|
||
|
|
" list(self.ac.value_layers.parameters())\n",
|
||
|
|
" self.value_optimizer = optim.Adam(value_params, lr=value_lr)\n",
|
||
|
|
"\n",
|
||
|
|
" def train_policy(self, obs, acts, old_log_probs, gaes):\n",
|
||
|
|
" for _ in range(self.max_policy_train_iters):\n",
|
||
|
|
" self.policy_optimizer.zero_grad()\n",
|
||
|
|
"\n",
|
||
|
|
" new_mean, new_std = self.ac.policy(obs)\n",
|
||
|
|
" new_dist = Normal(new_mean, new_std)\n",
|
||
|
|
" new_log_probs = new_dist.log_prob(acts).sum(dim=-1)\n",
|
||
|
|
"\n",
|
||
|
|
" policy_ratio = torch.exp(new_log_probs - old_log_probs)\n",
|
||
|
|
" clipped_ratio = policy_ratio.clamp(1 - self.ppo_clip_val, 1 + self.ppo_clip_val)\n",
|
||
|
|
"\n",
|
||
|
|
" clipped_loss = clipped_ratio * gaes\n",
|
||
|
|
" unclipped_loss = policy_ratio * gaes\n",
|
||
|
|
"\n",
|
||
|
|
" policy_loss = -torch.min(clipped_loss, unclipped_loss).mean()\n",
|
||
|
|
" \n",
|
||
|
|
" # Increased entropy bonus to encourage more exploration\n",
|
||
|
|
" entropy = new_dist.entropy().mean()\n",
|
||
|
|
" policy_loss = policy_loss - self.entropy_coef * entropy\n",
|
||
|
|
"\n",
|
||
|
|
" policy_loss.backward()\n",
|
||
|
|
" # Gradient clipping for stability\n",
|
||
|
|
" torch.nn.utils.clip_grad_norm_(self.policy_optimizer.param_groups[0]['params'], 0.5)\n",
|
||
|
|
" self.policy_optimizer.step()\n",
|
||
|
|
"\n",
|
||
|
|
" kl_div = (old_log_probs - new_log_probs).mean()\n",
|
||
|
|
" if kl_div > self.target_kl_div:\n",
|
||
|
|
" break\n",
|
||
|
|
" \n",
|
||
|
|
" def train_value(self, obs, returns):\n",
|
||
|
|
" for _ in range(self.value_train_iters):\n",
|
||
|
|
" self.value_optimizer.zero_grad()\n",
|
||
|
|
"\n",
|
||
|
|
" values = self.ac.value(obs)\n",
|
||
|
|
" value_loss = (returns - values).pow(2).mean()\n",
|
||
|
|
" \n",
|
||
|
|
" value_loss.backward()\n",
|
||
|
|
" # Gradient clipping for stability\n",
|
||
|
|
" torch.nn.utils.clip_grad_norm_(self.value_optimizer.param_groups[0]['params'], 0.5)\n",
|
||
|
|
" self.value_optimizer.step()"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 5,
|
||
|
|
"id": "6f4f9f4b",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"def discount_rewards(rewards, gamma=0.99):\n",
|
||
|
|
" discounted = np.zeros_like(rewards, dtype=np.float32)\n",
|
||
|
|
" running_add = 0\n",
|
||
|
|
" for t in reversed(range(len(rewards))):\n",
|
||
|
|
" running_add = running_add * gamma + rewards[t]\n",
|
||
|
|
" discounted[t] = running_add\n",
|
||
|
|
" return discounted\n",
|
||
|
|
"\n",
|
||
|
|
"def calculate_gaes(rewards, values, gamma=0.99, lam=0.95):\n",
|
||
|
|
" # Add 0 for terminal state bootstrap value\n",
|
||
|
|
" next_values = np.concatenate([values[1:], [0]])\n",
|
||
|
|
" deltas = rewards + gamma * next_values - values\n",
|
||
|
|
" gaes = discount_rewards(deltas, gamma * lam)\n",
|
||
|
|
" return gaes"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 6,
|
||
|
|
"id": "d7b17705",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"def rollout(model, env, max_steps=500): \n",
|
||
|
|
" train_data = [[],[],[],[],[]] # obs, actions, rewards, values, log_probs\n",
|
||
|
|
" gap_heights = [] # Track gap heights during episode\n",
|
||
|
|
" obs, _ = env.reset() # Gymnasium returns (obs, info)\n",
|
||
|
|
" \n",
|
||
|
|
" ep_reward = 0\n",
|
||
|
|
" for _ in range(max_steps):\n",
|
||
|
|
" with torch.no_grad(): # No gradients needed during rollout\n",
|
||
|
|
" mean, std, val = model(torch.tensor([obs], dtype=torch.float32, device=device))\n",
|
||
|
|
"\n",
|
||
|
|
" # Sample continuous action from Normal distribution\n",
|
||
|
|
" act_distribution = Normal(mean, std)\n",
|
||
|
|
" act = act_distribution.sample()\n",
|
||
|
|
" act_log_prob = act_distribution.log_prob(act).sum(dim=-1)\n",
|
||
|
|
" \n",
|
||
|
|
" # Convert to numpy array for environment\n",
|
||
|
|
" act_np = act.squeeze(0).cpu().numpy()\n",
|
||
|
|
" next_obs, reward, terminated, truncated, _ = env.step(act_np)\n",
|
||
|
|
" \n",
|
||
|
|
" # Extract gap heights from observation (first 4 values are normalized gaps)\n",
|
||
|
|
" # Denormalize gaps: multiply by gap_scale (0.015m = 15mm)\n",
|
||
|
|
" gap_heights.append(obs[:4] * env.gap_scale * 1000) # Convert to mm\n",
|
||
|
|
"\n",
|
||
|
|
" # Store as Python scalars (moving to CPU only when necessary)\n",
|
||
|
|
" for i, item in enumerate([obs, act_np, reward, val.item(), act_log_prob.item()]):\n",
|
||
|
|
" train_data[i].append(item)\n",
|
||
|
|
" \n",
|
||
|
|
" obs = next_obs\n",
|
||
|
|
" ep_reward += reward\n",
|
||
|
|
" done = terminated or truncated\n",
|
||
|
|
"\n",
|
||
|
|
" if done:\n",
|
||
|
|
" break\n",
|
||
|
|
" \n",
|
||
|
|
" train_data = [np.array(x, dtype=np.float32) for x in train_data]\n",
|
||
|
|
" train_data[3] = calculate_gaes(rewards=train_data[2], values=train_data[3])\n",
|
||
|
|
" \n",
|
||
|
|
" # Calculate average gap height error\n",
|
||
|
|
" gap_heights = np.array(gap_heights) # Shape: (steps, 4 sensors)\n",
|
||
|
|
" avg_gap_per_step = gap_heights.mean(axis=1) # Average across 4 sensors\n",
|
||
|
|
" target_gap_mm = 16.491741\n",
|
||
|
|
" avg_gap_error = np.abs(avg_gap_per_step - target_gap_mm).mean()\n",
|
||
|
|
"\n",
|
||
|
|
" return train_data, ep_reward, avg_gap_error"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"id": "7ee8fb34",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"Loading maglev model from maglev_model.pkl...\n",
|
||
|
|
"Model loaded. Degree: 6\n",
|
||
|
|
"Force R2: 1.0000\n",
|
||
|
|
"Torque R2: 0.9999\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "stderr",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"C:\\Users\\pulip\\AppData\\Local\\Temp\\ipykernel_32220\\3744901434.py:9: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at C:\\bld\\libtorch_1762089177580\\work\\torch\\csrc\\utils\\tensor_new.cpp:256.)\n",
|
||
|
|
" mean, std, val = model(torch.tensor([obs], dtype=torch.float32, device=device))\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"Reward: 1990.308654679309\n",
|
||
|
|
"Episode length: 500\n",
|
||
|
|
"Reward per step: 3.980617309358618\n",
|
||
|
|
"Average gap error: 19.005 mm\n",
|
||
|
|
"\n",
|
||
|
|
"Theoretical Maximum:\n",
|
||
|
|
" Best per step: 3.00\n",
|
||
|
|
" Realistic good per step: 0.80\n",
|
||
|
|
" Best total (500 steps): 1500\n",
|
||
|
|
" Current % of realistic: 497.6%\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# The following was generated by AI - see [19]\n",
|
||
|
|
"environ = LevPodEnv(use_gui=False, initial_gap_mm=14, max_steps=500) # Start below target\n",
|
||
|
|
"model = ActorCriticNetwork(environ.observation_space.shape[0], environ.action_space.shape[0]).to(device)\n",
|
||
|
|
"train_data, reward, gap_error = rollout(model, environ)\n",
|
||
|
|
"print(\"Reward:\", reward)\n",
|
||
|
|
"print(\"Episode length:\", len(train_data[0]))\n",
|
||
|
|
"print(\"Reward per step:\", reward / len(train_data[0]))\n",
|
||
|
|
"print(f\"Average gap error: {gap_error:.3f} mm\")\n",
|
||
|
|
"\n",
|
||
|
|
"# Calculate theoretical maximum reward for reference (NEW REWARD STRUCTURE)\n",
|
||
|
|
"max_steps = 500\n",
|
||
|
|
"# Best case: gap_reward=1.0, no penalties, success_bonus=2.0\n",
|
||
|
|
"theoretical_max_per_step = 3.0 # 1.0 (gap) + 2.0 (success bonus) + 0 (no penalties)\n",
|
||
|
|
"# Realistic good case: gap_reward~0.9, small penalties\n",
|
||
|
|
"realistic_good_per_step = 0.8\n",
|
||
|
|
"theoretical_max_total = theoretical_max_per_step * max_steps\n",
|
||
|
|
"realistic_good_total = realistic_good_per_step * max_steps\n",
|
||
|
|
"\n",
|
||
|
|
"print(f\"\\nTheoretical Maximum:\")\n",
|
||
|
|
"print(f\" Best per step: {theoretical_max_per_step:.2f}\")\n",
|
||
|
|
"print(f\" Realistic good per step: {realistic_good_per_step:.2f}\")\n",
|
||
|
|
"print(f\" Best total (500 steps): {theoretical_max_total:.0f}\")\n",
|
||
|
|
"print(f\" Current % of realistic: {(reward/realistic_good_total)*100:.1f}%\")"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"id": "1b8b1ca2",
|
||
|
|
"metadata": {},
|
||
|
|
"source": [
|
||
|
|
"## Key Improvements Made:\n",
|
||
|
|
"\n",
|
||
|
|
"1. **Better Reward Scaling**: Changed from squared penalties (up to 10,000+) to exponential rewards (~0 to 1) with smaller linear penalties\n",
|
||
|
|
"2. **Larger Network**: Increased from 128 to 256 hidden units with LayerNorm for better learning capacity\n",
|
||
|
|
"3. **More Exploration**: Increased entropy coefficient from 0.01 to 0.02, initialized log_std higher\n",
|
||
|
|
"4. **Gradient Clipping**: Added to prevent exploding gradients\n",
|
||
|
|
"5. **Higher Learning Rates**: Increased policy LR to 5e-4 and value LR to 1e-3\n",
|
||
|
|
"6. **Tighter Termination**: Reduced angle tolerance to 15° and gap bounds to 3-35mm\n",
|
||
|
|
"7. **Success Bonus**: Added +2.0 reward for excellent hovering (gap < 1mm, angles < 2°)\n",
|
||
|
|
"\n",
|
||
|
|
"Expected improvements:\n",
|
||
|
|
"- Rewards should now be in range [-10, +3] instead of [-8000, +1]\n",
|
||
|
|
"- Model should learn meaningful distinctions between good and bad states\n",
|
||
|
|
"- Training should show steady improvement in gap error over episodes"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"id": "e6f27ed4",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAAJOCAYAAACqS2TfAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAwm5JREFUeJzs3XdYFFfbBvB7C7B0kaYoUlQUu2KJFbtBYzQae+9GY2IvMbHkM9GoUaOxxMSoSTSJGjWJJXax9xILsYJYUBAUpMPufH/sy8jSZGGX2YX7d11zOTv1mXOWdZ89Z87IBEEQQEREREREREQGJ5c6ACIiIiIiIqLiikk3ERERERERkZEw6SYiIiIiIiIyEibdREREREREREbCpJuIiIiIiIjISJh0ExERERERERkJk24iIiIiIiIiI2HSTURERERERGQkTLqJiIiIiIiIjIRJNxFRCeHt7Q2ZTAaZTIY5c+ZIHY7JYjkVTxs2bBDrVSaTSR0OGVnLli3Fuh48eHChj5f5vbNhw4ZCH4+IShYm3UTF3NGjR3W+LCiVSoSEhOhsEx8fr7ONuSQamWPOmORyOWxtbVGlShUMGTIEly9fljpMs5Y1UcltMpf3TElJqKOjo/HJJ5+gdu3asLe3h6WlJdzd3VG9enW8//77+OKLLxAVFSV1mAYjVUKdObHLawoLCyuymEoCQyfUxjJnzpxs/z9ZWVnB2dkZ1apVQ7du3fD9998jISFB6lCJyMiUUgdAREVLrVbj008/xR9//CF1KEYhCAISExNx+/Zt3L59G5s2bcLOnTvRsWNHqUMjMzFz5kzExsYCAJo0aSJxNPoLCwtDs2bN8PjxY53lkZGRiIyMxM2bN/HHH38gMDAQrq6uEkVJZFwffPAB3nnnHQBAjRo1JI5GSxAEpKamIiYmBjExMQgJCcGOHTswY8YMrF+/Hp07d5Y6RCIyEibdRCXQ9u3bcf78eTRo0EDqUAymfv366NWrF5KTk3H69Gns2bMHAJCWloZPP/20WCfdycnJUCgUsLCwMPq5Ro8ejYoVK2Zbbo7JaW5GjBghdQiFMm3aNDHhtrS0RK9eveDn54fExEQ8ePAA58+fx507dySOsvhxcnLCJ598kuO60qVLF3E01KtXL6lDyOaTTz6Bo6MjoqKicPz4cZw9exaAtmdKly5d8Pvvv6NHjx4SR0lERiEQUbF25MgRAUC2qW3btuI2r1690lk3e/bsbMe5c+eOMGbMGKFKlSqCtbW1YG1tLdSoUUOYNWuW8PLlS51tu3TpIh5r5MiR4vIXL14IcrlcACAolUohISFBXNe/f39xnx49euTr2jLHPGjQIJ11jRo1EtdZWVnluL9U17Rlyxahb9++QvXq1QVXV1fBwsJCsLW1FapVqyZ8+OGHQmhoaLZYAwMDda710qVLQlBQkFCqVCkBgM4+a9euFWrUqCFYWVkJ5cqVEyZOnCjExcUJXl5eedZxTtavX69TzkeOHNFr+8xCQ0NzPdbs2bPF5V5eXsKLFy+E8ePHC+XLlxcsLS0FPz8/YdWqVTmeMzU1VVi7dq3Qpk0bwcXFRbCwsBBcXV2FJk2aCAsXLhQEQRAGDRqU499BTrG+qZzOnTsn9O/fX/Dy8hIsLS0FOzs7oVatWsKMGTOEyMjIbNtnPd7Zs2eFt99+W7C3txdsbW2Ftm3bClevXs22X+aYAwMD8yz3zJycnMT9Pv/88xy3uXr1qvDkyROdZVnfY2fPnhXatGkj2NraCm5ubsKYMWOEV69eCYIgCFu3bhXq1asnqFQqwcPDQ5g4caKQnJyc47m2bt0qBAUFCW5uboJSqRScnJyE5s2bCytXrhRSUlJy3Oe///4TRo0aJVSqVElQqVSCjY2NUKVKFWHcuHE67/Ws76mcpow6zPreTE1NFb788kuhUqVKgqWlpeDl5SXMnTtXUKvV+S7rzGXm5eX1xu1XrVolbm9hYSFcuXJFXHfz5k3ByspKXL948eIcr/HIkSPCTz/9JJa/q6urMGzYMOHZs2c5nrOo3q+CoN9nakHOlflzIrcp4/2R9f2c2RdffCG8++67QqVKlQQnJydBqVQKpUqVEho2bCh88cUXQnx8fLZYM59j/fr1OV5/VlnjzfrZ/tdffwkqlUpcb29vL0RFRWU7zo4dO4R33nlHKFOmjGBhYSE4OTkJbdu2Ff74449cz33jxg1h9OjRQpUqVQQbGxvB2tpaqFixotC/f3/h+vXr4nYHDhwQhgwZItSpU0dwd3cXLC0tBWtra6FSpUrCkCFDhH///VfcduPGjWKstra2QlxcnM45nz9/LiiVSnGbP//8M1/lRFQSMOkmKuayJt1lypQR5w8dOiQIwpuT7j/++EOwtrbO9UtOxYoVhQcPHojbf/PNN+I6f39/cfmuXbt09ss4vyDofvnKLbnKKq+ku1u3buI6Dw+PbPtKeU2dOnXK80ujg4ODzhcdQdD9Alm3bl3BxsYmxy9z06dPz/GY9evXF9zd3fNMJnMiRdLt7OwsVK1aNcfrWLt2rc4xo6KihICAgFzLMiMRMlTSvXTpUvFHlpwmd3d34dKlSzr7ZD5ew4YNdb6UZkylS5cWnj59qrNfQZNue3t7cb+ePXsKiYmJ+dov83usevXqOglgxtSyZUvh66+/zvHaBwwYoHO89PR0oWfPnnmWecOGDbMlY7///rtOIpJ1sre3F/bt2ycIQuGS7vbt2+e4/SeffJLvstY36RYE3R/w6tSpI6Smpgrp6elCw4YNxeXt2rUTNBpNjtfYunXrHOOuVKmS8Pz5c51zFeX7Vd/P1IKcy1BJt62tbZ7HqFmzpvgDU4bM6w2VdAuCkO3vaf78+eI6tVot9O3bN89YM/8InOG7774TLCwsct0nc/xjx47N8/iWlpbCgQMHBEEQhOTkZMHV1VVc99133+mcd+3ateI6Nzc3IS0tLV/lRFQSsHs5UQkzffp0TJkyBWlpafjkk09w5syZPLe/f/8++vXrh+TkZABArVq10LVrV6SmpuLnn3/G48ePce/ePfTp0wcnT54EALRq1UrcPyQkBM+fP4eLiwtOnDihc+zjx4+jdevWePjwIR48eCAuz7y/vpKTk3Hq1CkcOHBAXNazZ0+TuiYnJye8/fbbqFKlCpycnGBpaYlnz55h+/btePjwIeLi4jBt2jSxi3xWly9fhoWFBQYPHoyKFSvixo0bsLCwwPnz5/HVV1+J25UpUwYDBw5EfHw81q1bh5SUFH2LM5vff/8dFy5cyLZ85MiRcHBwKPTxAW1Xy5cvX2Lo0KFwdnbGypUrkZiYCABYvHixTvfvAQMG4OLFi+Lr6tWrIygoCEqlEhcuXMC9e/cAAL1790aNGjXw5Zdf4sWLFwCAdu3aoX379vmOKzg4GBMnToQgCAAAHx8f9O7dGzExMVi/fj1SU1Px7NkzvPfee7h16xasrKyyHePcuXPw8vJCnz59cOPGDfz9998AgJiYGPz444+YMWOGnqWVXZ06dXD8+HEAwJYtW7Bnzx689dZbqFevHpo0aYI2bdrAzs4uz2PcuHEDXl5e6NevH86ePYtDhw4B0A7MePToUdSsWRNdu3bF33//jStXrgAANm3ahAULFsDDwwMA8MUXX2DLli3iMZs2bYo2bdrgypUr+Ouvv8TyGDVqFH777TcAwJ07dzBw4EDxverq6opBgwYhPT0dP/74I+Li4vDq1Sv06NEDt2/fRunSpbFo0SJcuHABv//+u3iuRYsWifO53fqwf/9+9OjRA5UqVcK6desQGRkJAFixYgVmz54NS0vL/BX4/8TFxWHx4sXZlnt6eup0dV63bh3Onz+PJ0+e4MqVK/jiiy9gZWWFc+fOAQBcXFywcePGXAeEO3z4MFq1aoXmzZvj5MmTYt3cvXsX06ZNww8//ACgaN+vBflMLci52rdvDzs7O6xevRr3798H8PrWogz56cpfoUIF1KhRAxUqVICTkxMEQUBoaCh+//13JCQk4Nq1a1i1ahWmTp36xmMV1pAhQzB58mSxng4fPozp06cDABYsWIDNmzcDAORyOXr06IEaNWrgzp072LR
|
||
|
|
"text/plain": [
|
||
|
|
"<Figure size 1000x600 with 1 Axes>"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "display_data"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"Reward examples at different gap errors:\n",
|
||
|
|
" 0mm error → 1.0000 reward\n",
|
||
|
|
" 1mm error → 0.9460 reward\n",
|
||
|
|
" 3mm error → 0.6065 reward\n",
|
||
|
|
" 5mm error → 0.2494 reward\n",
|
||
|
|
" 10mm error → 0.0039 reward\n",
|
||
|
|
" 15mm error → 0.0000 reward\n",
|
||
|
|
"\n",
|
||
|
|
"Compare to old reward: (error*100)² would be:\n",
|
||
|
|
" 1mm error → -10,000 penalty\n",
|
||
|
|
" 3mm error → -90,000 penalty\n",
|
||
|
|
" 5mm error → -250,000 penalty\n",
|
||
|
|
" 10mm error → -1,000,000 penalty\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# The following was generated by AI - see [20]\n",
|
||
|
|
"# Visualize the new reward function\n",
|
||
|
|
"gap_errors_mm = np.linspace(0, 20, 100)\n",
|
||
|
|
"gap_rewards = np.exp(-0.5 * (gap_errors_mm / 3.0)**2)\n",
|
||
|
|
"\n",
|
||
|
|
"plt.figure(figsize=(10, 6))\n",
|
||
|
|
"plt.plot(gap_errors_mm, gap_rewards, linewidth=2)\n",
|
||
|
|
"plt.axvline(x=1.0, color='g', linestyle='--', label='Success bonus threshold (1mm)')\n",
|
||
|
|
"plt.axvline(x=3.0, color='orange', linestyle='--', label='1 std dev (3mm)')\n",
|
||
|
|
"plt.xlabel('Gap Error (mm)', fontsize=12)\n",
|
||
|
|
"plt.ylabel('Gap Reward Component', fontsize=12)\n",
|
||
|
|
"plt.title('New Reward Function: Smooth Exponential Decay', fontsize=14, fontweight='bold')\n",
|
||
|
|
"plt.grid(True, alpha=0.3)\n",
|
||
|
|
"plt.legend()\n",
|
||
|
|
"plt.tight_layout()\n",
|
||
|
|
"plt.show()\n",
|
||
|
|
"\n",
|
||
|
|
"print(\"Reward examples at different gap errors:\")\n",
|
||
|
|
"for err in [0, 1, 3, 5, 10, 15]:\n",
|
||
|
|
" reward = np.exp(-0.5 * (err / 3.0)**2)\n",
|
||
|
|
" print(f\" {err:2d}mm error → {reward:.4f} reward\")\n",
|
||
|
|
"print(\"\\nCompare to old reward: (error*100)² would be:\")\n",
|
||
|
|
"for err in [1, 3, 5, 10]:\n",
|
||
|
|
" old_penalty = (err * 100)**2\n",
|
||
|
|
" print(f\" {err:2d}mm error → -{old_penalty:,} penalty\")"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"id": "fb554183",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"Logging to: RL_Trials/training_log_20251211_191801.txt\n",
|
||
|
|
"Plot will be saved to: RL_Trials/gap_error_plot_20251211_191801.png\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# The following was generated by AI - see [21]\n",
|
||
|
|
"import os\n",
|
||
|
|
"from datetime import datetime\n",
|
||
|
|
"\n",
|
||
|
|
"# Create RL_Trials folder if it doesn't exist\n",
|
||
|
|
"os.makedirs('RL_Trials', exist_ok=True)\n",
|
||
|
|
"\n",
|
||
|
|
"# Create timestamped log file\n",
|
||
|
|
"timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
|
||
|
|
"log_file_path = f'RL_Trials/training_log_{timestamp}.txt'\n",
|
||
|
|
"plot_file_path = f'RL_Trials/gap_error_plot_{timestamp}.png'\n",
|
||
|
|
"\n",
|
||
|
|
"# Define training params\n",
|
||
|
|
"num_episodes = 2000 # Increased for more learning time\n",
|
||
|
|
"print_freq = 20 # Print less frequently\n",
|
||
|
|
"gui_freq = 100 # Show GUI every 100 episodes\n",
|
||
|
|
"\n",
|
||
|
|
"# Create PPO trainer with improved hyperparameters\n",
|
||
|
|
"ppo = PPOTrainer(\n",
|
||
|
|
" model, \n",
|
||
|
|
" policy_lr=5e-4, # Higher learning rate\n",
|
||
|
|
" value_lr=1e-3, # Even higher for value function\n",
|
||
|
|
" target_kl_div=0.02, # Allow more policy updates\n",
|
||
|
|
" max_policy_train_iters=40,\n",
|
||
|
|
" value_train_iters=40,\n",
|
||
|
|
" entropy_coef=0.02 # More exploration\n",
|
||
|
|
")\n",
|
||
|
|
"\n",
|
||
|
|
"# Open log file\n",
|
||
|
|
"log_file = open(log_file_path, 'w')\n",
|
||
|
|
"log_file.write(f\"Training Started: {timestamp}\\n\")\n",
|
||
|
|
"log_file.write(f\"Number of Episodes: {num_episodes}\\n\")\n",
|
||
|
|
"log_file.write(f\"Print Frequency: {print_freq}\\n\")\n",
|
||
|
|
"log_file.write(f\"Target Gap Height: {16.491741} mm\\n\")\n",
|
||
|
|
"log_file.write(f\"Network: 256 hidden units with LayerNorm\\n\")\n",
|
||
|
|
"log_file.write(f\"Policy LR: 5e-4, Value LR: 1e-3, Entropy: 0.02\\n\")\n",
|
||
|
|
"log_file.write(\"=\"*70 + \"\\n\\n\")\n",
|
||
|
|
"log_file.flush()\n",
|
||
|
|
"\n",
|
||
|
|
"print(f\"Logging to: {log_file_path}\")\n",
|
||
|
|
"print(f\"Plot will be saved to: {plot_file_path}\")"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 10,
|
||
|
|
"id": "64994dcf",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"Loading maglev model from maglev_model.pkl...\n",
|
||
|
|
"Model loaded. Degree: 6\n",
|
||
|
|
"Force R2: 1.0000\n",
|
||
|
|
"Torque R2: 0.9999\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"environ = LevPodEnv(use_gui=True, initial_gap_mm=14, max_steps=500) # Start below target"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 11,
|
||
|
|
"id": "c3353cf5",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"Ep 20 | R: 755.5 | Len: 204 | R/s: 3.70 (462.3%) | Gap: 16.74mm (min:14.73) | Best: 14.73mm\n",
|
||
|
|
"Ep 40 | R: 407.2 | Len: 116 | R/s: 3.52 (439.9%) | Gap: 15.56mm (min:13.80) | Best: 13.80mm\n",
|
||
|
|
"Ep 60 | R: 157.4 | Len: 57 | R/s: 2.78 (347.5%) | Gap: 14.87mm (min:13.91) | Best: 13.80mm\n",
|
||
|
|
"Ep 80 | R: 182.7 | Len: 61 | R/s: 2.98 (372.0%) | Gap: 15.06mm (min:14.21) | Best: 13.80mm\n",
|
||
|
|
"Ep 100 | R: 487.2 | Len: 134 | R/s: 3.65 (455.9%) | Gap: 16.10mm (min:14.31) | Best: 13.80mm\n",
|
||
|
|
"Ep 120 | R: 1113.1 | Len: 297 | R/s: 3.75 (468.3%) | Gap: 17.64mm (min:15.95) | Best: 13.80mm\n",
|
||
|
|
"Ep 140 | R: 1434.7 | Len: 385 | R/s: 3.72 (465.6%) | Gap: 18.21mm (min:16.13) | Best: 13.80mm\n",
|
||
|
|
"Ep 160 | R: 641.4 | Len: 172 | R/s: 3.72 (464.8%) | Gap: 16.69mm (min:15.38) | Best: 13.80mm\n",
|
||
|
|
"Ep 180 | R: 1029.0 | Len: 274 | R/s: 3.76 (469.7%) | Gap: 17.43mm (min:14.60) | Best: 13.80mm\n",
|
||
|
|
"Ep 200 | R: 287.0 | Len: 85 | R/s: 3.39 (424.0%) | Gap: 15.61mm (min:14.18) | Best: 13.80mm\n",
|
||
|
|
"Ep 220 | R: 330.9 | Len: 94 | R/s: 3.52 (440.4%) | Gap: 15.85mm (min:14.93) | Best: 13.80mm\n",
|
||
|
|
"Ep 240 | R: 336.4 | Len: 103 | R/s: 3.28 (409.7%) | Gap: 15.15mm (min:13.83) | Best: 13.80mm\n",
|
||
|
|
"Ep 260 | R: 128.2 | Len: 50 | R/s: 2.58 (321.9%) | Gap: 14.51mm (min:13.90) | Best: 13.80mm\n",
|
||
|
|
"Ep 280 | R: 116.0 | Len: 46 | R/s: 2.51 (313.3%) | Gap: 14.30mm (min:13.49) | Best: 13.49mm\n",
|
||
|
|
"Ep 300 | R: 95.0 | Len: 39 | R/s: 2.45 (306.0%) | Gap: 13.85mm (min:13.19) | Best: 13.19mm\n",
|
||
|
|
"Ep 320 | R: 772.1 | Len: 200 | R/s: 3.86 (482.9%) | Gap: 16.77mm (min:15.05) | Best: 13.19mm\n",
|
||
|
|
"Ep 340 | R: 152.7 | Len: 54 | R/s: 2.84 (354.5%) | Gap: 14.81mm (min:13.96) | Best: 13.19mm\n",
|
||
|
|
"Ep 360 | R: 118.1 | Len: 47 | R/s: 2.52 (315.6%) | Gap: 14.36mm (min:13.50) | Best: 13.19mm\n",
|
||
|
|
"Ep 380 | R: 290.8 | Len: 81 | R/s: 3.60 (450.2%) | Gap: 15.56mm (min:14.06) | Best: 13.19mm\n",
|
||
|
|
"Ep 400 | R: 230.0 | Len: 69 | R/s: 3.35 (418.5%) | Gap: 15.38mm (min:14.74) | Best: 13.19mm\n",
|
||
|
|
"Ep 420 | R: 305.9 | Len: 85 | R/s: 3.58 (447.5%) | Gap: 15.82mm (min:15.08) | Best: 13.19mm\n",
|
||
|
|
"Ep 440 | R: 450.6 | Len: 116 | R/s: 3.90 (487.0%) | Gap: 16.25mm (min:14.81) | Best: 13.19mm\n",
|
||
|
|
"Ep 460 | R: 624.2 | Len: 161 | R/s: 3.89 (486.0%) | Gap: 16.65mm (min:15.01) | Best: 13.19mm\n",
|
||
|
|
"Ep 480 | R: 710.6 | Len: 192 | R/s: 3.70 (462.6%) | Gap: 16.62mm (min:14.71) | Best: 13.19mm\n",
|
||
|
|
"Ep 500 | R: 131.1 | Len: 49 | R/s: 2.65 (331.8%) | Gap: 14.44mm (min:13.45) | Best: 13.19mm\n",
|
||
|
|
"Ep 520 | R: 169.4 | Len: 58 | R/s: 2.90 (362.5%) | Gap: 14.97mm (min:14.22) | Best: 13.19mm\n",
|
||
|
|
"Ep 540 | R: 929.9 | Len: 263 | R/s: 3.53 (441.4%) | Gap: 16.99mm (min:14.49) | Best: 13.19mm\n",
|
||
|
|
"Ep 560 | R: 1760.6 | Len: 500 | R/s: 3.52 (440.1%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 580 | R: 1763.0 | Len: 500 | R/s: 3.53 (440.7%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 600 | R: 1775.4 | Len: 500 | R/s: 3.55 (443.8%) | Gap: 18.99mm (min:18.91) | Best: 13.19mm\n",
|
||
|
|
"Ep 620 | R: 1298.7 | Len: 355 | R/s: 3.66 (457.5%) | Gap: 17.94mm (min:14.49) | Best: 13.19mm\n",
|
||
|
|
"Ep 640 | R: 1576.3 | Len: 438 | R/s: 3.60 (450.3%) | Gap: 18.63mm (min:16.35) | Best: 13.19mm\n",
|
||
|
|
"Ep 660 | R: 1762.6 | Len: 500 | R/s: 3.53 (440.7%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 680 | R: 1761.3 | Len: 500 | R/s: 3.52 (440.3%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 700 | R: 1761.0 | Len: 500 | R/s: 3.52 (440.2%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 720 | R: 1754.8 | Len: 500 | R/s: 3.51 (438.7%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 740 | R: 1755.3 | Len: 500 | R/s: 3.51 (438.8%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 760 | R: 1756.6 | Len: 500 | R/s: 3.51 (439.2%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 780 | R: 1759.2 | Len: 500 | R/s: 3.52 (439.8%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 800 | R: 1756.9 | Len: 500 | R/s: 3.51 (439.2%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 820 | R: 1759.2 | Len: 500 | R/s: 3.52 (439.8%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 840 | R: 1593.2 | Len: 436 | R/s: 3.65 (456.7%) | Gap: 18.62mm (min:16.57) | Best: 13.19mm\n",
|
||
|
|
"Ep 860 | R: 1209.1 | Len: 334 | R/s: 3.62 (452.2%) | Gap: 17.92mm (min:15.21) | Best: 13.19mm\n",
|
||
|
|
"Ep 880 | R: 509.8 | Len: 149 | R/s: 3.43 (429.0%) | Gap: 16.16mm (min:14.21) | Best: 13.19mm\n",
|
||
|
|
"Ep 900 | R: 496.0 | Len: 148 | R/s: 3.36 (419.9%) | Gap: 15.86mm (min:14.56) | Best: 13.19mm\n",
|
||
|
|
"Ep 920 | R: 1770.0 | Len: 500 | R/s: 3.54 (442.5%) | Gap: 18.99mm (min:18.97) | Best: 13.19mm\n",
|
||
|
|
"Ep 940 | R: 1763.3 | Len: 500 | R/s: 3.53 (440.8%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 960 | R: 1753.9 | Len: 500 | R/s: 3.51 (438.5%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 980 | R: 1751.9 | Len: 500 | R/s: 3.50 (438.0%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 1000 | R: 1756.6 | Len: 500 | R/s: 3.51 (439.1%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 1020 | R: 1754.6 | Len: 500 | R/s: 3.51 (438.7%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 1040 | R: 1759.2 | Len: 500 | R/s: 3.52 (439.8%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1060 | R: 1756.7 | Len: 500 | R/s: 3.51 (439.2%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1080 | R: 1758.8 | Len: 500 | R/s: 3.52 (439.7%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 1100 | R: 1756.2 | Len: 500 | R/s: 3.51 (439.1%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 1120 | R: 1756.5 | Len: 500 | R/s: 3.51 (439.1%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 1140 | R: 1760.5 | Len: 500 | R/s: 3.52 (440.1%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 1160 | R: 1760.5 | Len: 500 | R/s: 3.52 (440.1%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1180 | R: 1756.5 | Len: 500 | R/s: 3.51 (439.1%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm\n",
|
||
|
|
"Ep 1200 | R: 1760.0 | Len: 500 | R/s: 3.52 (440.0%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1220 | R: 1758.7 | Len: 500 | R/s: 3.52 (439.7%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1240 | R: 1760.4 | Len: 500 | R/s: 3.52 (440.1%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1260 | R: 1753.5 | Len: 500 | R/s: 3.51 (438.4%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1280 | R: 1753.9 | Len: 500 | R/s: 3.51 (438.5%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1300 | R: 1758.0 | Len: 500 | R/s: 3.52 (439.5%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1320 | R: 1762.7 | Len: 500 | R/s: 3.53 (440.7%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1340 | R: 1693.0 | Len: 459 | R/s: 3.69 (460.9%) | Gap: 18.61mm (min:16.25) | Best: 13.19mm\n",
|
||
|
|
"Ep 1360 | R: 713.0 | Len: 181 | R/s: 3.94 (492.2%) | Gap: 16.38mm (min:15.05) | Best: 13.19mm\n",
|
||
|
|
"Ep 1380 | R: 2118.4 | Len: 486 | R/s: 4.36 (545.4%) | Gap: 18.38mm (min:17.32) | Best: 13.19mm\n",
|
||
|
|
"Ep 1400 | R: 2157.4 | Len: 495 | R/s: 4.36 (544.9%) | Gap: 18.50mm (min:18.01) | Best: 13.19mm\n",
|
||
|
|
"Ep 1420 | R: 1181.5 | Len: 262 | R/s: 4.50 (563.0%) | Gap: 16.90mm (min:15.79) | Best: 13.19mm\n",
|
||
|
|
"Ep 1440 | R: 1332.5 | Len: 298 | R/s: 4.46 (558.1%) | Gap: 17.08mm (min:15.65) | Best: 13.19mm\n",
|
||
|
|
"Ep 1460 | R: 1496.5 | Len: 332 | R/s: 4.51 (563.8%) | Gap: 17.27mm (min:15.62) | Best: 13.19mm\n",
|
||
|
|
"Ep 1480 | R: 1545.4 | Len: 339 | R/s: 4.56 (570.1%) | Gap: 17.26mm (min:15.87) | Best: 13.19mm\n",
|
||
|
|
"Ep 1500 | R: 862.8 | Len: 201 | R/s: 4.29 (536.3%) | Gap: 16.17mm (min:14.88) | Best: 13.19mm\n",
|
||
|
|
"Ep 1520 | R: 809.8 | Len: 193 | R/s: 4.20 (524.6%) | Gap: 16.03mm (min:14.74) | Best: 13.19mm\n",
|
||
|
|
"Ep 1540 | R: 861.1 | Len: 204 | R/s: 4.22 (527.7%) | Gap: 16.25mm (min:14.93) | Best: 13.19mm\n",
|
||
|
|
"Ep 1560 | R: 1445.2 | Len: 329 | R/s: 4.40 (549.4%) | Gap: 17.24mm (min:15.19) | Best: 13.19mm\n",
|
||
|
|
"Ep 1580 | R: 1993.4 | Len: 486 | R/s: 4.11 (513.2%) | Gap: 18.55mm (min:16.26) | Best: 13.19mm\n",
|
||
|
|
"Ep 1600 | R: 1985.4 | Len: 500 | R/s: 3.97 (496.4%) | Gap: 18.75mm (min:18.57) | Best: 13.19mm\n",
|
||
|
|
"Ep 1620 | R: 1776.8 | Len: 500 | R/s: 3.55 (444.2%) | Gap: 18.97mm (min:18.91) | Best: 13.19mm\n",
|
||
|
|
"Ep 1640 | R: 1755.2 | Len: 500 | R/s: 3.51 (438.8%) | Gap: 18.99mm (min:18.97) | Best: 13.19mm\n",
|
||
|
|
"Ep 1660 | R: 1751.1 | Len: 500 | R/s: 3.50 (437.8%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1680 | R: 1746.6 | Len: 500 | R/s: 3.49 (436.7%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1700 | R: 1746.2 | Len: 500 | R/s: 3.49 (436.5%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1720 | R: 1747.8 | Len: 500 | R/s: 3.50 (437.0%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1740 | R: 1743.0 | Len: 500 | R/s: 3.49 (435.8%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1760 | R: 1743.4 | Len: 500 | R/s: 3.49 (435.8%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1780 | R: 1744.3 | Len: 500 | R/s: 3.49 (436.1%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1800 | R: 1744.0 | Len: 500 | R/s: 3.49 (436.0%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1820 | R: 1739.4 | Len: 500 | R/s: 3.48 (434.8%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1840 | R: 1736.2 | Len: 500 | R/s: 3.47 (434.1%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1860 | R: 1732.7 | Len: 500 | R/s: 3.47 (433.2%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1880 | R: 1732.1 | Len: 500 | R/s: 3.46 (433.0%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1900 | R: 1732.2 | Len: 500 | R/s: 3.46 (433.0%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1920 | R: 1729.1 | Len: 500 | R/s: 3.46 (432.3%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1940 | R: 1728.6 | Len: 500 | R/s: 3.46 (432.1%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1960 | R: 1728.0 | Len: 500 | R/s: 3.46 (432.0%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 1980 | R: 1728.5 | Len: 500 | R/s: 3.46 (432.1%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n",
|
||
|
|
"Ep 2000 | R: 1726.8 | Len: 500 | R/s: 3.45 (431.7%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Training Loop with Better Monitoring\n",
|
||
|
|
"ep_rewards = []\n",
|
||
|
|
"ep_lengths = []\n",
|
||
|
|
"ep_gap_errors = []\n",
|
||
|
|
"best_gap_error = float('inf')\n",
|
||
|
|
"gui_env = None\n",
|
||
|
|
"\n",
|
||
|
|
"for episode_idx in range(num_episodes):\n",
|
||
|
|
" train_data, ep_reward, gap_error = rollout(model, environ)\n",
|
||
|
|
" \n",
|
||
|
|
" ep_length = len(train_data[0])\n",
|
||
|
|
" ep_rewards.append(ep_reward)\n",
|
||
|
|
" ep_lengths.append(ep_length)\n",
|
||
|
|
" ep_gap_errors.append(gap_error)\n",
|
||
|
|
" \n",
|
||
|
|
" # Track best performance\n",
|
||
|
|
" if gap_error < best_gap_error:\n",
|
||
|
|
" best_gap_error = gap_error\n",
|
||
|
|
"\n",
|
||
|
|
" # Data Formatting\n",
|
||
|
|
" permute_idxs = np.random.permutation(len(train_data[0]))\n",
|
||
|
|
" obs = torch.tensor(train_data[0][permute_idxs], dtype=torch.float32, device=device)\n",
|
||
|
|
" acts = torch.tensor(train_data[1][permute_idxs], dtype=torch.float32, device=device)\n",
|
||
|
|
" gaes = torch.tensor(train_data[3][permute_idxs], dtype=torch.float32, device=device)\n",
|
||
|
|
" act_log_probs = torch.tensor(train_data[4][permute_idxs], dtype=torch.float32, device=device)\n",
|
||
|
|
"\n",
|
||
|
|
" returns = discount_rewards(train_data[2])[permute_idxs]\n",
|
||
|
|
" returns = torch.tensor(returns, dtype=torch.float32, device=device)\n",
|
||
|
|
"\n",
|
||
|
|
" # Normalize GAEs for stable training\n",
|
||
|
|
" gaes = (gaes - gaes.mean()) / (gaes.std() + 1e-8)\n",
|
||
|
|
"\n",
|
||
|
|
" ppo.train_policy(obs, acts, act_log_probs, gaes)\n",
|
||
|
|
" ppo.train_value(obs, returns)\n",
|
||
|
|
"\n",
|
||
|
|
" if (episode_idx + 1) % print_freq == 0:\n",
|
||
|
|
" avg_reward = np.mean(ep_rewards[-print_freq:])\n",
|
||
|
|
" avg_length = np.mean(ep_lengths[-print_freq:])\n",
|
||
|
|
" avg_gap_error = np.mean(ep_gap_errors[-print_freq:])\n",
|
||
|
|
" min_gap_error = np.min(ep_gap_errors[-print_freq:])\n",
|
||
|
|
" avg_reward_per_step = avg_reward / avg_length if avg_length > 0 else 0\n",
|
||
|
|
" \n",
|
||
|
|
" # Updated for new reward scale (realistic good is ~0.8/step)\n",
|
||
|
|
" realistic_good_per_step = 0.8\n",
|
||
|
|
" percent_of_realistic = (avg_reward_per_step / realistic_good_per_step) * 100\n",
|
||
|
|
" \n",
|
||
|
|
" output_line = (f\"Ep {episode_idx + 1:4d} | R: {avg_reward:6.1f} | Len: {avg_length:3.0f} | \"\n",
|
||
|
|
" f\"R/s: {avg_reward_per_step:5.2f} ({percent_of_realistic:5.1f}%) | \"\n",
|
||
|
|
" f\"Gap: {avg_gap_error:5.2f}mm (min:{min_gap_error:5.2f}) | Best: {best_gap_error:5.2f}mm\")\n",
|
||
|
|
" \n",
|
||
|
|
" print(output_line)\n",
|
||
|
|
" log_file.write(output_line + \"\\n\")\n",
|
||
|
|
" log_file.flush()\n",
|
||
|
|
"\n",
|
||
|
|
"# Close GUI environment if created\n",
|
||
|
|
"if gui_env is not None:\n",
|
||
|
|
" gui_env.close()\n",
|
||
|
|
"\n",
|
||
|
|
"# Close log file\n",
|
||
|
|
"log_file.write(\"\\n\" + \"=\"*70 + \"\\n\")\n",
|
||
|
|
"log_file.write(f\"Training Completed: {datetime.now().strftime('%Y%m%d_%H%M%S')}\\n\")\n",
|
||
|
|
"log_file.write(f\"Best Gap Error Achieved: {best_gap_error:.3f} mm\\n\")\n",
|
||
|
|
"log_file.close()"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"id": "3678193c",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAJOCAYAAABm7rQwAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzsvXe85VS5//9Jstvp/UyvDAwzwAxDVYpUQUQEUREvIigKygUUhJ8XvhYsiILitSt6UdQrIHq96AUbSJPmDAwDAwxMhWHa6XW3lPX7IzvJSnayd3bP3ud5v15nJmUlWdlJVvms53mWwBhjIAiCIAiCIAiCIAiCIIgqItY6AwRBEARBEARBEARBEMTMg0QpgiAIgiAIgiAIgiAIouqQKEUQBEEQBEEQBEEQBEFUHRKlCIIgCIIgCIIgCIIgiKpDohRBEARBEARBEARBEARRdUiUIgiCIAiCIAiCIAiCIKoOiVIEQRAEQRAEQRAEQRBE1SFRiiAIgiAIgiAIgiAIgqg6JEoRBEEQBEEQBEEQBEEQVYdEKYIgCILIIAiC+feLX/yi5PPdeOON5vkWL15c8vkIImiU+x3fsWOH7Tt85JFHSj4nQRAEQRDBhUQpgiCIBmV4eBjf/OY3cfrpp2Pu3LloampCe3s7DjjgAJx44om48cYb8a9//QuMsVpnNYtf/OIXeTumjzzySNlFpKDgvP9C4YWCXH+N9Js52bBhAy6//HIccsgh6OzsRCQSwaxZs3DyySfjm9/8JsbHx2udxaLx82ydfzt27Kh1tokaUGpZUu846wm3v2XLlrkeOzY2hs997nM4+OCD0dLSgvb2dhx++OG45ZZbkEwmPa/597//He9+97sxa9YsRKNRzJs3D+effz7WrVtXqdskCIKoa0K1zgBBEARRfv7rv/4Ln/70pzE1NWXbnkwmMTk5ic2bN+PRRx/Fl770JWzfvp2seDLceuut5vKRRx5Zw5wQxaIoCj7zmc/gu9/9bta+gYEBDAwM4OGHH8Y3vvEN/Pd//zdOO+20GuSycTjttNPQ2toKAOjo6Cj5fN3d3bbvcL/99iv5nARRKNu3b8dJJ52E119/3bb9ueeew3PPPYe7774bf//739HT02Pb/4UvfAFf+cpXbNt2796Ne+65B/feey9+8pOf4GMf+1jF808QBFFPkChFEATRYHz729/GNddcY64LgoCTTjoJRx99NNrb2zEyMoINGzbg8ccfRyKRqGFOg8e1115b6yxUhBtuuAFdXV1Z2wsR3iYnJ9HW1pYzTTKZhCRJCIfDBecxH37PfeWVV+LHP/6xuT5v3jycd9556OnpwcaNG3HvvfdCVVUMDQ3hrLPOwj/+8Q8ce+yxZc9vKeT7rXnRBgC2bt1qu+cPfOADOOKII2xpuru7i75eLo455hgcc8wxRR3rRnt7e8N+h0RtOeKII/CBD3wga7uzbNQ0Deeff74pSHV3d+PSSy9FMpnET37yEyQSCaxfvx6f+MQncO+995rH/d///Z9NkHrHO96B4447Dvfffz+eeuopaJqGT37ykzjyyCOxevXqCt0lQRBEHcIIgiCIhuGVV15hoVCIAWAAWG9vL3vqqadc005PT7Pbb7+dDQ4OmtsSiQS74YYb2Omnn86WLFnC2tvbWSgUYj09Pez4449n3/ve95gsy7bzbN++3bweAPbwww+zX/7yl+ywww5jsViM9fX1sUsuuYTt27fP9338/Oc/zzqnk4cfftiW5uc//3lWmueee45dfPHFbMmSJSwajbLW1lZ2xBFHsG9961sskUhkpc91vunpafYf//EfbMGCBSwajbIVK1aw733ve2zbtm2eef3iF79obl+0aBGbnJxk1113HVuwYAGLRCLsgAMOYD/84Q89f0u3vy9+8Yt5fz/+ugDY9u3b8x7j/M0nJyfZZz7zGbZw4UImSZJ53RNOOMFMc9FFF7HnnnuOnXHGGayzszPrWps2bWKXXXYZW7ZsGYvFYqy5uZktX76cXXnlla55KuTcbjz55JO2ezjiiCPYxMSELc3DDz/MJEky0xx00EFMVVWmqipbuHChuf0rX/lK1vmvuuoqc//BBx9s27d582Z2+eWXs+XLl7OmpibW1NTEDj74YPaFL3yBjY2NZZ1r0aJFtmf64IMPsre97W2sra2NFdo8y/ctOPe/9tpr7Ctf+Qrbf//9WTgcZhdddBFjjLG1a9eyyy67jB155JFs7ty5LBaLsVgsxhYtWsQ+8IEPsMcffzzr2s53PNc9PvPMM+wd73gHa2trYy0tLezUU09lGzZssB3jVp54XWt0dJR9+tOfZvPnz3f9nnheeOEF9q53vYu1tbWxtrY2dtppp7F169blzH+xON/jTZs2sfe9732sq6uLxWIx9pa3vMW1TPPLfffdx04//XTW39/PQqEQa2trY0uXLmVnn302+9rXvsZUVS2qLHn44YfZ+9//fvP3bG9vZ8cddxz72c9+xlRVtaUtpdz3k/9ywb/7xnuejwceeMB2b3/729/Mfbfffrtt3yuvvGLuO+qoo8ztxx57rLk9lUqxJUuWmPs+8IEPlO3+CIIgGgESpQiCIBqIT3ziE7YG8+9///uCjh8cHMzbkTn11FOZoijmMc7Oycknn+x63LJly9jQ0JCvfJRDlPre975nEx+cf0ceeWSWWOB1vnQ6zY4//njX85x11lm+OtF9fX3s0EMPdT3H7bff7vpb1lKUOvbYY12vy3e416xZw5qbm12vdc8997BYLOZ5H21tbeyvf/2rLQ9+z+3FRRddZEv/j3/8wzXdBRdcYEv3yCOPMMYY+/znP29uW7lype0YVVXZnDlzzP233Xabue/3v/89a2pq8rzX/fbbj73++uu28/GCzVve8pasd7UQChWlnM/W6KzfeuutOd89QRCyzu1XlDrqqKNsgrnx193dzfbu3Wse41eU6unpYQceeGDO78lg7dq1rLW1NStdLBZjp556qmf+i4V/j1etWuV67UgkwjZu3FjwuZ3fqdtfIpEouCz57Gc/mzPtmWeeydLptJm+2HLfb/4LSc//nXDCCbbfi3/3u7u7WU9PDwuHw2zWrFnsrLPOYn/+85+zfmO+Hm1vb2eappn7hoeHbdf7xje+wRhjbO/evbbt3/rWt2znvPLKK819LS0tZRXeCIIg6h1y3yMIgmgg/vGPf5jLXV1deM973lPQ8UbQ16OPPhpz585FV1cXZFnGpk2bcO+990JRFDz44IP4/e9/j/POO88zDyeddBKOP/54PPHEE3jooYcAAFu2bMFnP/tZ/OxnPyv4vu65556sILFbt271TP/EE0/gqquuMoO4H3fccTj11FMxNjaGO++8E6Ojo1i7di0++clP4je/+U3e63/nO9/B448/bq6vWrUKZ599NjZs2IA//vGPvu5hcHAQIyMj+OhHP4qenh784Ac/QDweBwB885vfxMc//nEzns66detwzz33mMfy7lrFuEr99Kc/dXXfy+Um9cQTT+DYY4/FKaecgsnJScyfPz8rzfr16xEOh3HxxRdjv/32w0svvYRwOIzNmzfjwx/+MFKpFACgr68PF110ERRFwR133IGJiQlMTk7i/e9/P1577TXMmjXL97lzwT+j7u5unHTSSa7p3v/+9+O///u/bcedcMIJuPjii/HVr34VjDG8/PLL2LBhg+lm88gjj2DPnj0AgHA4jA996EMAgG3btuGCCy4wAx+vWrUK55xzDtLpNH71q19h165d2Lp1Kz74wQ/iiSeecM3P008/jba2NlxwwQWYO3duxQMiP/HEE1i1ahXOPPNMaJpmxoKKxWJ461vfikMPPRQ9PT1oaWnB+Pg4HnroIaxduxaMMXzmM5/BBz7wATQ1NRV0zX/9619YtGgRPvjBD+Kll17Cn/70JwDAyMgI7rjjDlx//fUFnW94eBhjY2M5vyeDj3zkI7b4eh/84AexdOlS/Pa3v8WDDz5Y0HUL5YUXXkBvby8+8YlPYN++ffjVr34FAEin0/jud7+Ln/zkJwWd70c/+pG5fOSRR+Jd73oXFEXBzp078cwzz+CVV14BgILKkt/
|
||
|
|
"text/plain": [
|
||
|
|
"<Figure size 1200x600 with 1 Axes>"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "display_data"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"\n",
|
||
|
|
"✓ Training log saved to: RL_Trials/training_log_20251211_191801.txt\n",
|
||
|
|
"✓ Gap error plot saved to: RL_Trials/gap_error_plot_20251211_191801.png\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Create and save gap error plot\n",
|
||
|
|
"plt.figure(figsize=(12, 6))\n",
|
||
|
|
"plt.plot(ep_gap_errors, alpha=0.3, label='Per Episode')\n",
|
||
|
|
"\n",
|
||
|
|
"# Calculate moving average (window size = print_freq)\n",
|
||
|
|
"window_size = print_freq\n",
|
||
|
|
"if len(ep_gap_errors) >= window_size:\n",
|
||
|
|
" moving_avg = np.convolve(ep_gap_errors, np.ones(window_size)/window_size, mode='valid')\n",
|
||
|
|
" plt.plot(range(window_size-1, len(ep_gap_errors)), moving_avg, linewidth=2, label=f'{window_size}-Episode Moving Average')\n",
|
||
|
|
"\n",
|
||
|
|
"plt.xlabel('Episode', fontsize=12)\n",
|
||
|
|
"plt.ylabel('Average Gap Height Error (mm)', fontsize=12)\n",
|
||
|
|
"plt.title('Gap Height Error Over Training, n_steps=500', fontsize=14, fontweight='bold')\n",
|
||
|
|
"plt.legend(fontsize=11)\n",
|
||
|
|
"plt.grid(True, alpha=0.3)\n",
|
||
|
|
"plt.tight_layout()\n",
|
||
|
|
"plt.savefig(plot_file_path, dpi=150, bbox_inches='tight')\n",
|
||
|
|
"plt.show()\n",
|
||
|
|
"\n",
|
||
|
|
"print(f\"\\n✓ Training log saved to: {log_file_path}\")\n",
|
||
|
|
"print(f\"✓ Gap error plot saved to: {plot_file_path}\")"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 13,
|
||
|
|
"id": "7c225451",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"environ.close()"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"id": "97e51696",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": []
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"kernelspec": {
|
||
|
|
"display_name": "LevSim",
|
||
|
|
"language": "python",
|
||
|
|
"name": "python3"
|
||
|
|
},
|
||
|
|
"language_info": {
|
||
|
|
"codemirror_mode": {
|
||
|
|
"name": "ipython",
|
||
|
|
"version": 3
|
||
|
|
},
|
||
|
|
"file_extension": ".py",
|
||
|
|
"mimetype": "text/x-python",
|
||
|
|
"name": "python",
|
||
|
|
"nbconvert_exporter": "python",
|
||
|
|
"pygments_lexer": "ipython3",
|
||
|
|
"version": "3.10.19"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"nbformat": 4,
|
||
|
|
"nbformat_minor": 5
|
||
|
|
}
|