commit

2025-12-12 15:25:24 -06:00
parent fc4f7c3bb8
commit b84d8a02df
26 changed files with 10758 additions and 0 deletions
--- a/12-9.csv
+++ b/12-9.csv
--- a/12-9.xlsx
+++ b/12-9.xlsx
--- a/Fitting.ipynb
+++ b/Fitting.ipynb
--- a/RL_Trials/gap_error_plot_20251211_084252.png
+++ b/RL_Trials/gap_error_plot_20251211_084252.png
--- a/RL_Trials/gap_error_plot_20251211_102102.png
+++ b/RL_Trials/gap_error_plot_20251211_102102.png
--- a/RL_Trials/gap_error_plot_20251211_110643.png
+++ b/RL_Trials/gap_error_plot_20251211_110643.png
--- a/RL_Trials/gap_error_plot_20251211_191801.png
+++ b/RL_Trials/gap_error_plot_20251211_191801.png
--- a/RL_Trials/info.txt
+++ b/RL_Trials/info.txt
@@ -0,0 +1,29 @@
+first trial was with plain gap-error minimization reward, 
+
+    # --- FIX 2: Smoother Reward Function ---
+    # Reward function
+    reward = 1.0 # Survival bonus
+    
+    # Distance Penalty (Squared is smoother than linear for fine control)
+    reward -= (gap_error * 100)**2 
+    
+    # Orientation Penalties
+    reward -= (roll_angle * 10)**2
+    reward -= (pitch_angle * 10)**2
+
+next, added the following:
+
+  contact_points = p.getContactPoints(bodyA=self.podId, bodyB=self.trackId)
+  has_contact = len(contact_points) > 0
+
+  # Don't terminate on contact.
+  # Instead, penalize it, but allow the episode to continue so it can try to fix it.
+  # if has_contact:
+      # 5.0 is painful, but surviving 100 steps of pain is better than immediate death (-50)
+  reward -= len(contact_points)
+
+# at this point, we still either stick or fall, no hovering training has been achieved.
+
+# Tried increasing lambda value and starting at optimal all the time.
+
+#Tried reducing entropy and resetting all params but allowing for full range of motion without bolts - 7 pm ish
--- a/RL_Trials/training_log_20251211_084252.txt
+++ b/RL_Trials/training_log_20251211_084252.txt
@@ -0,0 +1,109 @@
+Training Started: 20251211_084252
+Number of Episodes: 1000
+Print Frequency: 10
+Target Gap Height: 16.491741 mm
+======================================================================
+
+Ep   10 | Reward:   339.6 | Length: 2000 | R/step:  0.170 ( 17.0% of max) | Gap Error: 11.386 mm
+Ep   20 | Reward:   582.7 | Length: 2000 | R/step:  0.291 ( 29.1% of max) | Gap Error: 12.576 mm
+Ep   30 | Reward:  1572.2 | Length: 2000 | R/step:  0.786 ( 78.6% of max) | Gap Error: 17.500 mm
+Ep   40 | Reward:  1574.1 | Length: 2000 | R/step:  0.787 ( 78.7% of max) | Gap Error: 17.505 mm
+Ep   50 | Reward:  1272.2 | Length: 2000 | R/step:  0.636 ( 63.6% of max) | Gap Error: 16.009 mm
+Ep   60 | Reward:  1874.6 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.990 mm
+Ep   70 | Reward:  1570.4 | Length: 2000 | R/step:  0.785 ( 78.5% of max) | Gap Error: 17.495 mm
+Ep   80 | Reward:  1874.3 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.995 mm
+Ep   90 | Reward:  1874.3 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.996 mm
+Ep  100 | Reward:  1571.3 | Length: 2000 | R/step:  0.786 ( 78.6% of max) | Gap Error: 17.498 mm
+Ep  110 | Reward:  1874.2 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.995 mm
+Ep  120 | Reward:  1874.0 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 19.000 mm
+Ep  130 | Reward:  1874.1 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.998 mm
+Ep  140 | Reward:  1874.2 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.997 mm
+Ep  150 | Reward:  1874.1 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.998 mm
+Ep  160 | Reward:  1873.8 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 19.002 mm
+Ep  170 | Reward:  1874.4 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.990 mm
+Ep  180 | Reward:  1874.9 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.987 mm
+Ep  190 | Reward:  1875.5 | Length: 2000 | R/step:  0.938 ( 93.8% of max) | Gap Error: 18.927 mm
+Ep  200 | Reward:  1876.3 | Length: 2000 | R/step:  0.938 ( 93.8% of max) | Gap Error: 18.949 mm
+Ep  210 | Reward:  1273.8 | Length: 2000 | R/step:  0.637 ( 63.7% of max) | Gap Error: 16.011 mm
+Ep  220 | Reward:  1571.1 | Length: 2000 | R/step:  0.786 ( 78.6% of max) | Gap Error: 17.494 mm
+Ep  230 | Reward:  1874.6 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.990 mm
+Ep  240 | Reward:  1877.3 | Length: 2000 | R/step:  0.939 ( 93.9% of max) | Gap Error: 18.912 mm
+Ep  250 | Reward:  1584.6 | Length: 2000 | R/step:  0.792 ( 79.2% of max) | Gap Error: 17.495 mm
+Ep  260 | Reward:  1876.5 | Length: 2000 | R/step:  0.938 ( 93.8% of max) | Gap Error: 18.948 mm
+Ep  270 | Reward:  1876.6 | Length: 2000 | R/step:  0.938 ( 93.8% of max) | Gap Error: 18.955 mm
+Ep  280 | Reward:  1576.3 | Length: 2000 | R/step:  0.788 ( 78.8% of max) | Gap Error: 17.433 mm
+Ep  290 | Reward:  1878.4 | Length: 2000 | R/step:  0.939 ( 93.9% of max) | Gap Error: 18.920 mm
+Ep  300 | Reward:  1877.9 | Length: 2000 | R/step:  0.939 ( 93.9% of max) | Gap Error: 18.884 mm
+Ep  310 | Reward:  1878.5 | Length: 2000 | R/step:  0.939 ( 93.9% of max) | Gap Error: 18.880 mm
+Ep  320 | Reward:  1878.0 | Length: 2000 | R/step:  0.939 ( 93.9% of max) | Gap Error: 18.899 mm
+Ep  330 | Reward:  1575.8 | Length: 2000 | R/step:  0.788 ( 78.8% of max) | Gap Error: 17.475 mm
+Ep  340 | Reward:  1874.9 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.986 mm
+Ep  350 | Reward:  1873.8 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 19.001 mm
+Ep  360 | Reward:  1874.0 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.999 mm
+Ep  370 | Reward:  1874.3 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.995 mm
+Ep  380 | Reward:  1874.1 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.996 mm
+Ep  390 | Reward:  1874.2 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.996 mm
+Ep  400 | Reward:  1873.9 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.998 mm
+Ep  410 | Reward:  1873.9 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 19.002 mm
+Ep  420 | Reward:  1874.2 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.996 mm
+Ep  430 | Reward:  1874.2 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.998 mm
+Ep  440 | Reward:  1874.0 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.999 mm
+Ep  450 | Reward:  1874.0 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 19.000 mm
+Ep  460 | Reward:  1874.1 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.997 mm
+Ep  470 | Reward:  1874.2 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.995 mm
+Ep  480 | Reward:  1874.3 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.996 mm
+Ep  490 | Reward:  1874.5 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.992 mm
+Ep  500 | Reward:  1874.3 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.996 mm
+Ep  510 | Reward:  1874.7 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.991 mm
+Ep  520 | Reward:  1874.5 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.989 mm
+Ep  530 | Reward:  1874.7 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.990 mm
+Ep  540 | Reward:  1874.9 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.987 mm
+Ep  550 | Reward:  1875.1 | Length: 2000 | R/step:  0.938 ( 93.8% of max) | Gap Error: 18.985 mm
+Ep  560 | Reward:  1572.3 | Length: 2000 | R/step:  0.786 ( 78.6% of max) | Gap Error: 17.487 mm
+Ep  570 | Reward:  1874.4 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.995 mm
+Ep  580 | Reward:  1274.7 | Length: 2000 | R/step:  0.637 ( 63.7% of max) | Gap Error: 16.011 mm
+Ep  590 | Reward:  1875.5 | Length: 2000 | R/step:  0.938 ( 93.8% of max) | Gap Error: 18.979 mm
+Ep  600 | Reward:  1876.6 | Length: 2000 | R/step:  0.938 ( 93.8% of max) | Gap Error: 18.964 mm
+Ep  610 | Reward:  1875.6 | Length: 2000 | R/step:  0.938 ( 93.8% of max) | Gap Error: 18.977 mm
+Ep  620 | Reward:  1875.2 | Length: 2000 | R/step:  0.938 ( 93.8% of max) | Gap Error: 18.981 mm
+Ep  630 | Reward:  1875.4 | Length: 2000 | R/step:  0.938 ( 93.8% of max) | Gap Error: 18.976 mm
+Ep  640 | Reward:  1874.8 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.988 mm
+Ep  650 | Reward:  1574.9 | Length: 2000 | R/step:  0.787 ( 78.7% of max) | Gap Error: 17.495 mm
+Ep  660 | Reward:  1875.1 | Length: 2000 | R/step:  0.938 ( 93.8% of max) | Gap Error: 18.983 mm
+Ep  670 | Reward:  1875.4 | Length: 2000 | R/step:  0.938 ( 93.8% of max) | Gap Error: 18.981 mm
+Ep  680 | Reward:  1875.2 | Length: 2000 | R/step:  0.938 ( 93.8% of max) | Gap Error: 18.983 mm
+Ep  690 | Reward:  1574.9 | Length: 2000 | R/step:  0.787 ( 78.7% of max) | Gap Error: 17.504 mm
+Ep  700 | Reward:  1874.6 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.990 mm
+Ep  710 | Reward:  1874.4 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.995 mm
+Ep  720 | Reward:  1572.4 | Length: 2000 | R/step:  0.786 ( 78.6% of max) | Gap Error: 17.500 mm
+Ep  730 | Reward:  1874.2 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.996 mm
+Ep  740 | Reward:  1874.7 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.987 mm
+Ep  750 | Reward:  1874.7 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.988 mm
+Ep  760 | Reward:  1874.8 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.989 mm
+Ep  770 | Reward:  1874.6 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.993 mm
+Ep  780 | Reward:  1874.8 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.988 mm
+Ep  790 | Reward:  1574.2 | Length: 2000 | R/step:  0.787 ( 78.7% of max) | Gap Error: 17.502 mm
+Ep  800 | Reward:  1271.4 | Length: 2000 | R/step:  0.636 ( 63.6% of max) | Gap Error: 16.010 mm
+Ep  810 | Reward:  1874.5 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.993 mm
+Ep  820 | Reward:  1576.0 | Length: 2000 | R/step:  0.788 ( 78.8% of max) | Gap Error: 17.509 mm
+Ep  830 | Reward:  1573.7 | Length: 2000 | R/step:  0.787 ( 78.7% of max) | Gap Error: 17.502 mm
+Ep  840 | Reward:  1874.4 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.995 mm
+Ep  850 | Reward:  1875.2 | Length: 2000 | R/step:  0.938 ( 93.8% of max) | Gap Error: 18.981 mm
+Ep  860 | Reward:   971.6 | Length: 2000 | R/step:  0.486 ( 48.6% of max) | Gap Error: 14.520 mm
+Ep  870 | Reward:  1574.9 | Length: 2000 | R/step:  0.787 ( 78.7% of max) | Gap Error: 17.502 mm
+Ep  880 | Reward:  1874.4 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.992 mm
+Ep  890 | Reward:  1874.1 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.997 mm
+Ep  900 | Reward:  1873.9 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.999 mm
+Ep  910 | Reward:  1572.2 | Length: 2000 | R/step:  0.786 ( 78.6% of max) | Gap Error: 17.495 mm
+Ep  920 | Reward:  1874.3 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.996 mm
+Ep  930 | Reward:  1874.3 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.996 mm
+Ep  940 | Reward:  1874.6 | Length: 2000 | R/step:  0.937 ( 93.7% of max) | Gap Error: 18.990 mm
+Ep  950 | Reward:  1573.6 | Length: 2000 | R/step:  0.787 ( 78.7% of max) | Gap Error: 17.497 mm
+Ep  960 | Reward:  1571.9 | Length: 2000 | R/step:  0.786 ( 78.6% of max) | Gap Error: 17.484 mm
+Ep  970 | Reward:  1271.5 | Length: 2000 | R/step:  0.636 ( 63.6% of max) | Gap Error: 16.006 mm
+Ep  980 | Reward:  1875.0 | Length: 2000 | R/step:  0.938 ( 93.8% of max) | Gap Error: 18.985 mm
+Ep  990 | Reward:  1570.5 | Length: 2000 | R/step:  0.785 ( 78.5% of max) | Gap Error: 17.488 mm
+Ep 1000 | Reward:  1575.9 | Length: 2000 | R/step:  0.788 ( 78.8% of max) | Gap Error: 17.503 mm
+
+======================================================================
+Training Completed: 20251211_095328
--- a/RL_Trials/training_log_20251211_102102.txt
+++ b/RL_Trials/training_log_20251211_102102.txt
@@ -0,0 +1,109 @@
+Training Started: 20251211_102102
+Number of Episodes: 1000
+Print Frequency: 10
+Target Gap Height: 16.491741 mm
+======================================================================
+
+Ep   10 | Reward: -7172.7 | Length:  500 | R/step: -14.345 (-1434.5% of max) | Gap Error: 16.068 mm
+Ep   20 | Reward: -7031.0 | Length:  500 | R/step: -14.062 (-1406.2% of max) | Gap Error: 18.962 mm
+Ep   30 | Reward: -7101.5 | Length:  500 | R/step: -14.203 (-1420.3% of max) | Gap Error: 17.537 mm
+Ep   40 | Reward: -7031.1 | Length:  500 | R/step: -14.062 (-1406.2% of max) | Gap Error: 18.972 mm
+Ep   50 | Reward: -7030.8 | Length:  500 | R/step: -14.062 (-1406.2% of max) | Gap Error: 18.949 mm
+Ep   60 | Reward: -7031.1 | Length:  500 | R/step: -14.062 (-1406.2% of max) | Gap Error: 18.975 mm
+Ep   70 | Reward: -7031.1 | Length:  500 | R/step: -14.062 (-1406.2% of max) | Gap Error: 18.974 mm
+Ep   80 | Reward: -7031.2 | Length:  500 | R/step: -14.062 (-1406.2% of max) | Gap Error: 18.979 mm
+Ep   90 | Reward: -7031.1 | Length:  500 | R/step: -14.062 (-1406.2% of max) | Gap Error: 18.962 mm
+Ep  100 | Reward: -7031.1 | Length:  500 | R/step: -14.062 (-1406.2% of max) | Gap Error: 18.978 mm
+Ep  110 | Reward: -7248.5 | Length:  500 | R/step: -14.497 (-1449.7% of max) | Gap Error: 14.626 mm
+Ep  120 | Reward: -7164.7 | Length:  500 | R/step: -14.329 (-1432.9% of max) | Gap Error: 16.227 mm
+Ep  130 | Reward: -7239.3 | Length:  500 | R/step: -14.479 (-1447.9% of max) | Gap Error: 14.752 mm
+Ep  140 | Reward: -7611.2 | Length:  500 | R/step: -15.222 (-1522.2% of max) | Gap Error:  7.389 mm
+Ep  150 | Reward: -7316.7 | Length:  500 | R/step: -14.633 (-1463.3% of max) | Gap Error: 13.257 mm
+Ep  160 | Reward: -7657.2 | Length:  500 | R/step: -15.314 (-1531.4% of max) | Gap Error:  6.463 mm
+Ep  170 | Reward: -7564.9 | Length:  500 | R/step: -15.130 (-1513.0% of max) | Gap Error:  8.263 mm
+Ep  180 | Reward: -7320.2 | Length:  500 | R/step: -14.640 (-1464.0% of max) | Gap Error: 13.172 mm
+Ep  190 | Reward: -7601.5 | Length:  500 | R/step: -15.203 (-1520.3% of max) | Gap Error:  7.545 mm
+Ep  200 | Reward: -7311.8 | Length:  500 | R/step: -14.624 (-1462.4% of max) | Gap Error: 13.311 mm
+Ep  210 | Reward: -7100.9 | Length:  500 | R/step: -14.202 (-1420.2% of max) | Gap Error: 17.572 mm
+Ep  220 | Reward: -7170.8 | Length:  500 | R/step: -14.342 (-1434.2% of max) | Gap Error: 16.143 mm
+Ep  230 | Reward: -7241.1 | Length:  500 | R/step: -14.482 (-1448.2% of max) | Gap Error: 14.729 mm
+Ep  240 | Reward: -7310.3 | Length:  500 | R/step: -14.621 (-1462.1% of max) | Gap Error: 13.338 mm
+Ep  250 | Reward: -7244.1 | Length:  500 | R/step: -14.488 (-1448.8% of max) | Gap Error: 14.699 mm
+Ep  260 | Reward: -7172.7 | Length:  500 | R/step: -14.345 (-1434.5% of max) | Gap Error: 16.127 mm
+Ep  270 | Reward: -7031.2 | Length:  500 | R/step: -14.062 (-1406.2% of max) | Gap Error: 18.962 mm
+Ep  280 | Reward: -7031.2 | Length:  500 | R/step: -14.062 (-1406.2% of max) | Gap Error: 18.977 mm
+Ep  290 | Reward: -7031.3 | Length:  500 | R/step: -14.063 (-1406.3% of max) | Gap Error: 18.967 mm
+Ep  300 | Reward: -7031.4 | Length:  500 | R/step: -14.063 (-1406.3% of max) | Gap Error: 18.965 mm
+Ep  310 | Reward: -7031.3 | Length:  500 | R/step: -14.063 (-1406.3% of max) | Gap Error: 18.927 mm
+Ep  320 | Reward: -7031.3 | Length:  500 | R/step: -14.063 (-1406.3% of max) | Gap Error: 18.980 mm
+Ep  330 | Reward: -7031.4 | Length:  500 | R/step: -14.063 (-1406.3% of max) | Gap Error: 18.957 mm
+Ep  340 | Reward: -7101.4 | Length:  500 | R/step: -14.203 (-1420.3% of max) | Gap Error: 17.557 mm
+Ep  350 | Reward: -7102.1 | Length:  500 | R/step: -14.204 (-1420.4% of max) | Gap Error: 17.547 mm
+Ep  360 | Reward: -7312.6 | Length:  500 | R/step: -14.625 (-1462.5% of max) | Gap Error: 13.311 mm
+Ep  370 | Reward: -7315.5 | Length:  500 | R/step: -14.631 (-1463.1% of max) | Gap Error: 13.255 mm
+Ep  380 | Reward: -7380.4 | Length:  500 | R/step: -14.761 (-1476.1% of max) | Gap Error: 11.917 mm
+Ep  390 | Reward: -7171.6 | Length:  500 | R/step: -14.343 (-1434.3% of max) | Gap Error: 16.108 mm
+Ep  400 | Reward: -7244.8 | Length:  500 | R/step: -14.490 (-1449.0% of max) | Gap Error: 14.659 mm
+Ep  410 | Reward: -7030.6 | Length:  500 | R/step: -14.061 (-1406.1% of max) | Gap Error: 18.943 mm
+Ep  420 | Reward: -7169.8 | Length:  500 | R/step: -14.340 (-1434.0% of max) | Gap Error: 16.169 mm
+Ep  430 | Reward: -7381.3 | Length:  500 | R/step: -14.763 (-1476.3% of max) | Gap Error: 11.908 mm
+Ep  440 | Reward: -7306.4 | Length:  500 | R/step: -14.613 (-1461.3% of max) | Gap Error: 13.356 mm
+Ep  450 | Reward: -7099.3 | Length:  500 | R/step: -14.199 (-1419.9% of max) | Gap Error: 17.564 mm
+Ep  460 | Reward: -7031.0 | Length:  500 | R/step: -14.062 (-1406.2% of max) | Gap Error: 18.969 mm
+Ep  470 | Reward: -7030.5 | Length:  500 | R/step: -14.061 (-1406.1% of max) | Gap Error: 18.934 mm
+Ep  480 | Reward: -7100.7 | Length:  500 | R/step: -14.201 (-1420.1% of max) | Gap Error: 17.549 mm
+Ep  490 | Reward: -7169.3 | Length:  500 | R/step: -14.339 (-1433.9% of max) | Gap Error: 16.184 mm
+Ep  500 | Reward: -7101.3 | Length:  500 | R/step: -14.203 (-1420.3% of max) | Gap Error: 17.559 mm
+Ep  510 | Reward: -7168.6 | Length:  500 | R/step: -14.337 (-1433.7% of max) | Gap Error: 16.185 mm
+Ep  520 | Reward: -7385.4 | Length:  500 | R/step: -14.771 (-1477.1% of max) | Gap Error: 11.860 mm
+Ep  530 | Reward: -7386.8 | Length:  500 | R/step: -14.774 (-1477.4% of max) | Gap Error: 11.816 mm
+Ep  540 | Reward: -7173.0 | Length:  500 | R/step: -14.346 (-1434.6% of max) | Gap Error: 16.121 mm
+Ep  550 | Reward: -7244.3 | Length:  500 | R/step: -14.489 (-1448.9% of max) | Gap Error: 14.693 mm
+Ep  560 | Reward: -7242.9 | Length:  500 | R/step: -14.486 (-1448.6% of max) | Gap Error: 14.731 mm
+Ep  570 | Reward: -7315.1 | Length:  500 | R/step: -14.630 (-1463.0% of max) | Gap Error: 13.236 mm
+Ep  580 | Reward: -7243.6 | Length:  500 | R/step: -14.487 (-1448.7% of max) | Gap Error: 14.684 mm
+Ep  590 | Reward: -7031.1 | Length:  500 | R/step: -14.062 (-1406.2% of max) | Gap Error: 18.979 mm
+Ep  600 | Reward: -7168.0 | Length:  500 | R/step: -14.336 (-1433.6% of max) | Gap Error: 16.203 mm
+Ep  610 | Reward: -7169.4 | Length:  500 | R/step: -14.339 (-1433.9% of max) | Gap Error: 16.183 mm
+Ep  620 | Reward: -7102.4 | Length:  500 | R/step: -14.205 (-1420.5% of max) | Gap Error: 17.536 mm
+Ep  630 | Reward: -7166.3 | Length:  500 | R/step: -14.333 (-1433.3% of max) | Gap Error: 16.216 mm
+Ep  640 | Reward: -7030.7 | Length:  500 | R/step: -14.061 (-1406.1% of max) | Gap Error: 18.949 mm
+Ep  650 | Reward: -7031.0 | Length:  500 | R/step: -14.062 (-1406.2% of max) | Gap Error: 18.966 mm
+Ep  660 | Reward: -7098.9 | Length:  500 | R/step: -14.198 (-1419.8% of max) | Gap Error: 17.581 mm
+Ep  670 | Reward: -7236.5 | Length:  500 | R/step: -14.473 (-1447.3% of max) | Gap Error: 14.809 mm
+Ep  680 | Reward: -7174.4 | Length:  500 | R/step: -14.349 (-1434.9% of max) | Gap Error: 16.106 mm
+Ep  690 | Reward: -7314.2 | Length:  500 | R/step: -14.628 (-1462.8% of max) | Gap Error: 13.294 mm
+Ep  700 | Reward: -7102.0 | Length:  500 | R/step: -14.204 (-1420.4% of max) | Gap Error: 17.539 mm
+Ep  710 | Reward: -7314.9 | Length:  500 | R/step: -14.630 (-1463.0% of max) | Gap Error: 13.277 mm
+Ep  720 | Reward: -7310.9 | Length:  500 | R/step: -14.622 (-1462.2% of max) | Gap Error: 13.335 mm
+Ep  730 | Reward: -7315.2 | Length:  500 | R/step: -14.630 (-1463.0% of max) | Gap Error: 13.275 mm
+Ep  740 | Reward: -7386.0 | Length:  500 | R/step: -14.772 (-1477.2% of max) | Gap Error: 11.842 mm
+Ep  750 | Reward: -7300.9 | Length:  500 | R/step: -14.602 (-1460.2% of max) | Gap Error: 13.480 mm
+Ep  760 | Reward: -7311.7 | Length:  500 | R/step: -14.623 (-1462.3% of max) | Gap Error: 13.314 mm
+Ep  770 | Reward: -7382.8 | Length:  500 | R/step: -14.766 (-1476.6% of max) | Gap Error: 11.885 mm
+Ep  780 | Reward: -7529.6 | Length:  500 | R/step: -15.059 (-1505.9% of max) | Gap Error:  8.946 mm
+Ep  790 | Reward: -7523.1 | Length:  500 | R/step: -15.046 (-1504.6% of max) | Gap Error:  9.063 mm
+Ep  800 | Reward: -7391.4 | Length:  500 | R/step: -14.783 (-1478.3% of max) | Gap Error: 11.758 mm
+Ep  810 | Reward: -7531.9 | Length:  500 | R/step: -15.064 (-1506.4% of max) | Gap Error:  8.915 mm
+Ep  820 | Reward: -7462.2 | Length:  500 | R/step: -14.924 (-1492.4% of max) | Gap Error: 10.318 mm
+Ep  830 | Reward: -7214.0 | Length:  500 | R/step: -14.428 (-1442.8% of max) | Gap Error: 15.251 mm
+Ep  840 | Reward: -7426.1 | Length:  500 | R/step: -14.852 (-1485.2% of max) | Gap Error: 10.944 mm
+Ep  850 | Reward: -7460.9 | Length:  500 | R/step: -14.922 (-1492.2% of max) | Gap Error: 10.270 mm
+Ep  860 | Reward: -7663.9 | Length:  500 | R/step: -15.328 (-1532.8% of max) | Gap Error:  6.237 mm
+Ep  870 | Reward: -7437.2 | Length:  500 | R/step: -14.874 (-1487.4% of max) | Gap Error: 10.765 mm
+Ep  880 | Reward: -6530.8 | Length:  421 | R/step: -15.524 (-1552.4% of max) | Gap Error:  7.413 mm
+Ep  890 | Reward: -7696.3 | Length:  500 | R/step: -15.393 (-1539.3% of max) | Gap Error:  5.736 mm
+Ep  900 | Reward: -7732.4 | Length:  500 | R/step: -15.465 (-1546.5% of max) | Gap Error:  5.083 mm
+Ep  910 | Reward: -7746.3 | Length:  500 | R/step: -15.493 (-1549.3% of max) | Gap Error:  4.839 mm
+Ep  920 | Reward: -7745.4 | Length:  500 | R/step: -15.491 (-1549.1% of max) | Gap Error:  4.833 mm
+Ep  930 | Reward: -7749.5 | Length:  500 | R/step: -15.499 (-1549.9% of max) | Gap Error:  4.838 mm
+Ep  940 | Reward: -5944.1 | Length:  383 | R/step: -15.536 (-1553.6% of max) | Gap Error:  9.080 mm
+Ep  950 | Reward: -4103.1 | Length:  261 | R/step: -15.739 (-1573.9% of max) | Gap Error: 12.558 mm
+Ep  960 | Reward: -4697.1 | Length:  303 | R/step: -15.517 (-1551.7% of max) | Gap Error: 12.987 mm
+Ep  970 | Reward: -4455.6 | Length:  288 | R/step: -15.455 (-1545.5% of max) | Gap Error: 14.009 mm
+Ep  980 | Reward: -3854.4 | Length:  251 | R/step: -15.356 (-1535.6% of max) | Gap Error: 14.805 mm
+Ep  990 | Reward: -5979.5 | Length:  415 | R/step: -14.405 (-1440.5% of max) | Gap Error: 17.115 mm
+Ep 1000 | Reward: -7031.2 | Length:  500 | R/step: -14.062 (-1406.2% of max) | Gap Error: 18.909 mm
+
+======================================================================
+Training Completed: 20251211_103854
--- a/RL_Trials/training_log_20251211_110643.txt
+++ b/RL_Trials/training_log_20251211_110643.txt
@@ -0,0 +1,109 @@
+Training Started: 20251211_110643
+Number of Episodes: 1000
+Print Frequency: 10
+Target Gap Height: 16.491741 mm
+======================================================================
+
+Ep   10 | Reward: -7123.0 | Length:  457 | R/step: -15.580 (-1558.0% of max) | Gap Error: 14.704 mm
+Ep   20 | Reward: -6697.0 | Length:  418 | R/step: -16.025 (-1602.5% of max) | Gap Error: 11.138 mm
+Ep   30 | Reward: -6050.2 | Length:  377 | R/step: -16.031 (-1603.1% of max) | Gap Error: 12.985 mm
+Ep   40 | Reward: -7891.0 | Length:  500 | R/step: -15.782 (-1578.2% of max) | Gap Error: 11.726 mm
+Ep   50 | Reward: -8254.2 | Length:  500 | R/step: -16.508 (-1650.8% of max) | Gap Error:  4.602 mm
+Ep   60 | Reward: -8255.5 | Length:  500 | R/step: -16.511 (-1651.1% of max) | Gap Error:  4.537 mm
+Ep   70 | Reward: -8185.8 | Length:  500 | R/step: -16.372 (-1637.2% of max) | Gap Error:  5.910 mm
+Ep   80 | Reward: -7920.1 | Length:  500 | R/step: -15.840 (-1584.0% of max) | Gap Error: 11.089 mm
+Ep   90 | Reward: -8262.8 | Length:  500 | R/step: -16.526 (-1652.6% of max) | Gap Error:  4.395 mm
+Ep  100 | Reward: -8164.6 | Length:  500 | R/step: -16.329 (-1632.9% of max) | Gap Error:  6.340 mm
+Ep  110 | Reward: -5294.0 | Length:  332 | R/step: -15.960 (-1596.0% of max) | Gap Error: 14.124 mm
+Ep  120 | Reward: -6081.3 | Length:  375 | R/step: -16.230 (-1623.0% of max) | Gap Error: 11.482 mm
+Ep  130 | Reward: -7346.6 | Length:  458 | R/step: -16.058 (-1605.8% of max) | Gap Error: 10.391 mm
+Ep  140 | Reward: -7065.4 | Length:  458 | R/step: -15.427 (-1542.7% of max) | Gap Error: 15.974 mm
+Ep  150 | Reward: -6516.7 | Length:  415 | R/step: -15.695 (-1569.5% of max) | Gap Error: 13.902 mm
+Ep  160 | Reward: -6601.1 | Length:  417 | R/step: -15.822 (-1582.2% of max) | Gap Error: 13.542 mm
+Ep  170 | Reward: -6440.3 | Length:  416 | R/step: -15.463 (-1546.3% of max) | Gap Error: 16.043 mm
+Ep  180 | Reward: -5308.5 | Length:  336 | R/step: -15.799 (-1579.9% of max) | Gap Error: 15.176 mm
+Ep  190 | Reward: -5847.7 | Length:  378 | R/step: -15.458 (-1545.8% of max) | Gap Error: 16.860 mm
+Ep  200 | Reward: -5325.4 | Length:  333 | R/step: -16.007 (-1600.7% of max) | Gap Error: 14.364 mm
+Ep  210 | Reward: -5893.1 | Length:  376 | R/step: -15.652 (-1565.2% of max) | Gap Error: 15.352 mm
+Ep  220 | Reward: -7743.7 | Length:  500 | R/step: -15.487 (-1548.7% of max) | Gap Error: 14.608 mm
+Ep  230 | Reward: -7180.3 | Length:  460 | R/step: -15.596 (-1559.6% of max) | Gap Error: 14.406 mm
+Ep  240 | Reward: -7221.9 | Length:  459 | R/step: -15.741 (-1574.1% of max) | Gap Error: 13.200 mm
+Ep  250 | Reward: -5970.0 | Length:  377 | R/step: -15.852 (-1585.2% of max) | Gap Error: 14.499 mm
+Ep  260 | Reward: -7481.9 | Length:  462 | R/step: -16.212 (-1621.2% of max) | Gap Error:  8.666 mm
+Ep  270 | Reward: -7189.7 | Length:  457 | R/step: -15.732 (-1573.2% of max) | Gap Error: 13.117 mm
+Ep  280 | Reward: -6560.4 | Length:  418 | R/step: -15.710 (-1571.0% of max) | Gap Error: 14.139 mm
+Ep  290 | Reward: -7423.1 | Length:  459 | R/step: -16.186 (-1618.6% of max) | Gap Error:  9.175 mm
+Ep  300 | Reward: -7137.2 | Length:  458 | R/step: -15.594 (-1559.4% of max) | Gap Error: 14.544 mm
+Ep  310 | Reward: -6457.1 | Length:  416 | R/step: -15.522 (-1552.2% of max) | Gap Error: 15.529 mm
+Ep  320 | Reward: -7154.6 | Length:  459 | R/step: -15.598 (-1559.8% of max) | Gap Error: 14.440 mm
+Ep  330 | Reward: -7814.2 | Length:  500 | R/step: -15.628 (-1562.8% of max) | Gap Error: 13.236 mm
+Ep  340 | Reward: -7199.5 | Length:  457 | R/step: -15.744 (-1574.4% of max) | Gap Error: 13.286 mm
+Ep  350 | Reward: -7234.7 | Length:  460 | R/step: -15.738 (-1573.8% of max) | Gap Error: 13.107 mm
+Ep  360 | Reward: -7819.8 | Length:  500 | R/step: -15.640 (-1564.0% of max) | Gap Error: 13.160 mm
+Ep  370 | Reward: -7148.2 | Length:  459 | R/step: -15.580 (-1558.0% of max) | Gap Error: 14.240 mm
+Ep  380 | Reward: -7888.0 | Length:  500 | R/step: -15.776 (-1577.6% of max) | Gap Error: 11.788 mm
+Ep  390 | Reward: -6321.7 | Length:  416 | R/step: -15.186 (-1518.6% of max) | Gap Error: 18.744 mm
+Ep  400 | Reward: -7809.0 | Length:  500 | R/step: -15.618 (-1561.8% of max) | Gap Error: 13.359 mm
+Ep  410 | Reward: -7004.8 | Length:  459 | R/step: -15.274 (-1527.4% of max) | Gap Error: 17.361 mm
+Ep  420 | Reward: -7003.1 | Length:  455 | R/step: -15.391 (-1539.1% of max) | Gap Error: 15.613 mm
+Ep  430 | Reward: -7280.0 | Length:  458 | R/step: -15.906 (-1590.6% of max) | Gap Error: 11.408 mm
+Ep  440 | Reward: -7168.7 | Length:  460 | R/step: -15.594 (-1559.4% of max) | Gap Error: 13.981 mm
+Ep  450 | Reward: -7605.7 | Length:  500 | R/step: -15.211 (-1521.1% of max) | Gap Error: 17.346 mm
+Ep  460 | Reward: -6611.7 | Length:  417 | R/step: -15.871 (-1587.1% of max) | Gap Error: 12.581 mm
+Ep  470 | Reward: -7086.7 | Length:  459 | R/step: -15.429 (-1542.9% of max) | Gap Error: 15.629 mm
+Ep  480 | Reward: -7674.5 | Length:  500 | R/step: -15.349 (-1534.9% of max) | Gap Error: 16.008 mm
+Ep  490 | Reward: -6481.8 | Length:  417 | R/step: -15.529 (-1552.9% of max) | Gap Error: 15.462 mm
+Ep  500 | Reward: -7959.4 | Length:  500 | R/step: -15.919 (-1591.9% of max) | Gap Error: 10.456 mm
+Ep  510 | Reward: -7088.4 | Length:  459 | R/step: -15.436 (-1543.6% of max) | Gap Error: 15.514 mm
+Ep  520 | Reward: -7600.0 | Length:  500 | R/step: -15.200 (-1520.0% of max) | Gap Error: 17.460 mm
+Ep  530 | Reward: -5979.7 | Length:  378 | R/step: -15.819 (-1581.9% of max) | Gap Error: 14.469 mm
+Ep  540 | Reward: -6667.2 | Length:  416 | R/step: -16.038 (-1603.8% of max) | Gap Error: 11.886 mm
+Ep  550 | Reward: -7165.7 | Length:  460 | R/step: -15.595 (-1559.5% of max) | Gap Error: 14.504 mm
+Ep  560 | Reward: -7209.9 | Length:  458 | R/step: -15.739 (-1573.9% of max) | Gap Error: 12.949 mm
+Ep  570 | Reward: -7745.6 | Length:  500 | R/step: -15.491 (-1549.1% of max) | Gap Error: 14.593 mm
+Ep  580 | Reward: -7678.6 | Length:  500 | R/step: -15.357 (-1535.7% of max) | Gap Error: 15.895 mm
+Ep  590 | Reward: -7107.0 | Length:  456 | R/step: -15.579 (-1557.9% of max) | Gap Error: 14.736 mm
+Ep  600 | Reward: -7189.8 | Length:  458 | R/step: -15.712 (-1571.2% of max) | Gap Error: 13.386 mm
+Ep  610 | Reward: -6914.9 | Length:  458 | R/step: -15.115 (-1511.5% of max) | Gap Error: 18.915 mm
+Ep  620 | Reward: -7748.9 | Length:  500 | R/step: -15.498 (-1549.8% of max) | Gap Error: 14.607 mm
+Ep  630 | Reward: -7894.4 | Length:  500 | R/step: -15.789 (-1578.9% of max) | Gap Error: 11.719 mm
+Ep  640 | Reward: -6970.0 | Length:  457 | R/step: -15.255 (-1525.5% of max) | Gap Error: 17.182 mm
+Ep  650 | Reward: -7746.9 | Length:  500 | R/step: -15.494 (-1549.4% of max) | Gap Error: 14.609 mm
+Ep  660 | Reward: -7604.6 | Length:  500 | R/step: -15.209 (-1520.9% of max) | Gap Error: 17.373 mm
+Ep  670 | Reward: -7747.7 | Length:  500 | R/step: -15.495 (-1549.5% of max) | Gap Error: 14.560 mm
+Ep  680 | Reward: -6720.6 | Length:  419 | R/step: -16.043 (-1604.3% of max) | Gap Error: 11.076 mm
+Ep  690 | Reward: -6998.9 | Length:  458 | R/step: -15.275 (-1527.5% of max) | Gap Error: 17.079 mm
+Ep  700 | Reward: -7746.6 | Length:  500 | R/step: -15.493 (-1549.3% of max) | Gap Error: 14.625 mm
+Ep  710 | Reward: -7894.9 | Length:  500 | R/step: -15.790 (-1579.0% of max) | Gap Error: 11.707 mm
+Ep  720 | Reward: -7966.2 | Length:  500 | R/step: -15.932 (-1593.2% of max) | Gap Error: 10.268 mm
+Ep  730 | Reward: -7966.1 | Length:  500 | R/step: -15.932 (-1593.2% of max) | Gap Error: 10.285 mm
+Ep  740 | Reward: -7604.9 | Length:  500 | R/step: -15.210 (-1521.0% of max) | Gap Error: 17.376 mm
+Ep  750 | Reward: -7673.6 | Length:  500 | R/step: -15.347 (-1534.7% of max) | Gap Error: 16.016 mm
+Ep  760 | Reward: -7964.8 | Length:  500 | R/step: -15.930 (-1593.0% of max) | Gap Error: 10.311 mm
+Ep  770 | Reward: -7747.0 | Length:  500 | R/step: -15.494 (-1549.4% of max) | Gap Error: 14.583 mm
+Ep  780 | Reward: -7963.6 | Length:  500 | R/step: -15.927 (-1592.7% of max) | Gap Error: 10.346 mm
+Ep  790 | Reward: -7209.1 | Length:  458 | R/step: -15.747 (-1574.7% of max) | Gap Error: 13.169 mm
+Ep  800 | Reward: -7879.7 | Length:  500 | R/step: -15.759 (-1575.9% of max) | Gap Error: 11.951 mm
+Ep  810 | Reward: -7960.0 | Length:  500 | R/step: -15.920 (-1592.0% of max) | Gap Error: 10.372 mm
+Ep  820 | Reward: -7818.1 | Length:  500 | R/step: -15.636 (-1563.6% of max) | Gap Error: 13.177 mm
+Ep  830 | Reward: -7965.3 | Length:  500 | R/step: -15.931 (-1593.1% of max) | Gap Error: 10.322 mm
+Ep  840 | Reward: -7891.4 | Length:  500 | R/step: -15.783 (-1578.3% of max) | Gap Error: 11.716 mm
+Ep  850 | Reward: -7883.5 | Length:  500 | R/step: -15.767 (-1576.7% of max) | Gap Error: 11.861 mm
+Ep  860 | Reward: -7531.1 | Length:  500 | R/step: -15.062 (-1506.2% of max) | Gap Error: 18.853 mm
+Ep  870 | Reward: -7601.2 | Length:  500 | R/step: -15.202 (-1520.2% of max) | Gap Error: 17.472 mm
+Ep  880 | Reward: -7747.0 | Length:  500 | R/step: -15.494 (-1549.4% of max) | Gap Error: 14.594 mm
+Ep  890 | Reward: -7600.6 | Length:  500 | R/step: -15.201 (-1520.1% of max) | Gap Error: 17.470 mm
+Ep  900 | Reward: -7676.1 | Length:  500 | R/step: -15.352 (-1535.2% of max) | Gap Error: 15.984 mm
+Ep  910 | Reward: -7750.4 | Length:  500 | R/step: -15.501 (-1550.1% of max) | Gap Error: 14.519 mm
+Ep  920 | Reward: -7820.0 | Length:  500 | R/step: -15.640 (-1564.0% of max) | Gap Error: 13.170 mm
+Ep  930 | Reward: -7884.7 | Length:  500 | R/step: -15.769 (-1576.9% of max) | Gap Error: 11.879 mm
+Ep  940 | Reward: -7604.0 | Length:  500 | R/step: -15.208 (-1520.8% of max) | Gap Error: 17.410 mm
+Ep  950 | Reward: -7677.5 | Length:  500 | R/step: -15.355 (-1535.5% of max) | Gap Error: 15.963 mm
+Ep  960 | Reward: -7883.3 | Length:  500 | R/step: -15.767 (-1576.7% of max) | Gap Error: 11.895 mm
+Ep  970 | Reward: -7818.1 | Length:  500 | R/step: -15.636 (-1563.6% of max) | Gap Error: 13.216 mm
+Ep  980 | Reward: -7743.4 | Length:  500 | R/step: -15.487 (-1548.7% of max) | Gap Error: 14.655 mm
+Ep  990 | Reward: -7960.6 | Length:  500 | R/step: -15.921 (-1592.1% of max) | Gap Error: 10.375 mm
+Ep 1000 | Reward: -7601.5 | Length:  500 | R/step: -15.203 (-1520.3% of max) | Gap Error: 17.483 mm
+
+======================================================================
+Training Completed: 20251211_112339
--- a/RL_Trials/training_log_20251211_121404.txt
+++ b/RL_Trials/training_log_20251211_121404.txt
@@ -0,0 +1,23 @@
+Training Started: 20251211_121404
+Number of Episodes: 2000
+Print Frequency: 20
+Target Gap Height: 16.491741 mm
+Network: 256 hidden units with LayerNorm
+Policy LR: 5e-4, Value LR: 1e-3, Entropy: 0.02
+======================================================================
+
+Ep   20 | R: -16131.1 | Len: 500 | R/s: -32.26 (-4032.8%) | Gap:  4.41mm (min: 4.26) | Best:  4.26mm
+Ep   40 | R: -16140.1 | Len: 500 | R/s: -32.28 (-4035.0%) | Gap:  4.36mm (min: 4.25) | Best:  4.25mm
+Ep   60 | R: -16140.8 | Len: 500 | R/s: -32.28 (-4035.2%) | Gap:  4.35mm (min: 4.22) | Best:  4.22mm
+Ep   80 | R: -16143.7 | Len: 500 | R/s: -32.29 (-4035.9%) | Gap:  4.33mm (min: 4.22) | Best:  4.22mm
+Ep  100 | R: -16142.5 | Len: 500 | R/s: -32.29 (-4035.6%) | Gap:  4.35mm (min: 4.26) | Best:  4.22mm
+Ep  120 | R: -16142.2 | Len: 500 | R/s: -32.28 (-4035.5%) | Gap:  4.35mm (min: 4.26) | Best:  4.22mm
+Ep  140 | R: -16142.9 | Len: 500 | R/s: -32.29 (-4035.7%) | Gap:  4.33mm (min: 4.23) | Best:  4.22mm
+Ep  160 | R: -16144.1 | Len: 500 | R/s: -32.29 (-4036.0%) | Gap:  4.32mm (min: 4.21) | Best:  4.21mm
+Ep  180 | R: -16141.0 | Len: 500 | R/s: -32.28 (-4035.3%) | Gap:  4.36mm (min: 4.22) | Best:  4.21mm
+Ep  200 | R: -16143.8 | Len: 500 | R/s: -32.29 (-4035.9%) | Gap:  4.33mm (min: 4.21) | Best:  4.21mm
+Ep  220 | R: -16144.3 | Len: 500 | R/s: -32.29 (-4036.1%) | Gap:  4.33mm (min: 4.23) | Best:  4.21mm
+Ep  240 | R: -16145.9 | Len: 500 | R/s: -32.29 (-4036.5%) | Gap:  4.32mm (min: 4.21) | Best:  4.21mm
+Ep  260 | R: -16142.5 | Len: 500 | R/s: -32.28 (-4035.6%) | Gap:  4.34mm (min: 4.24) | Best:  4.21mm
+Ep  280 | R: -16146.9 | Len: 500 | R/s: -32.29 (-4036.7%) | Gap:  4.32mm (min: 4.21) | Best:  4.21mm
+Ep  300 | R: -16145.6 | Len: 500 | R/s: -32.29 (-4036.4%) | Gap:  4.34mm (min: 4.21) | Best:  4.21mm
--- a/RL_Trials/training_log_20251211_122333.txt
+++ b/RL_Trials/training_log_20251211_122333.txt
@@ -0,0 +1,19 @@
+Training Started: 20251211_122333
+Number of Episodes: 2000
+Print Frequency: 20
+Target Gap Height: 16.491741 mm
+Network: 256 hidden units with LayerNorm
+Policy LR: 5e-4, Value LR: 1e-3, Entropy: 0.02
+======================================================================
+
+Ep   20 | R: -16150.6 | Len: 500 | R/s: -32.30 (-4037.7%) | Gap:  4.18mm (min: 4.10) | Best:  4.10mm
+Ep   40 | R: -16154.2 | Len: 500 | R/s: -32.31 (-4038.5%) | Gap:  4.18mm (min: 4.13) | Best:  4.10mm
+Ep   60 | R: -16155.5 | Len: 500 | R/s: -32.31 (-4038.9%) | Gap:  4.16mm (min: 4.12) | Best:  4.10mm
+Ep   80 | R: -16151.9 | Len: 500 | R/s: -32.30 (-4038.0%) | Gap:  4.20mm (min: 4.13) | Best:  4.10mm
+Ep  100 | R: -16155.1 | Len: 500 | R/s: -32.31 (-4038.8%) | Gap:  4.18mm (min: 4.13) | Best:  4.10mm
+Ep  120 | R: -16155.3 | Len: 500 | R/s: -32.31 (-4038.8%) | Gap:  4.19mm (min: 4.14) | Best:  4.10mm
+Ep  140 | R: -16154.6 | Len: 500 | R/s: -32.31 (-4038.7%) | Gap:  4.18mm (min: 4.12) | Best:  4.10mm
+Ep  160 | R: -16154.8 | Len: 500 | R/s: -32.31 (-4038.7%) | Gap:  4.19mm (min: 4.12) | Best:  4.10mm
+Ep  180 | R: -16157.5 | Len: 500 | R/s: -32.32 (-4039.4%) | Gap:  4.19mm (min: 4.11) | Best:  4.10mm
+Ep  200 | R: -16157.7 | Len: 500 | R/s: -32.32 (-4039.4%) | Gap:  4.21mm (min: 4.17) | Best:  4.10mm
+Ep  220 | R: -16159.4 | Len: 500 | R/s: -32.32 (-4039.8%) | Gap:  4.19mm (min: 4.12) | Best:  4.10mm
--- a/RL_Trials/training_log_20251211_184547.txt
+++ b/RL_Trials/training_log_20251211_184547.txt
@@ -0,0 +1,9 @@
+Training Started: 20251211_184547
+Number of Episodes: 2000
+Print Frequency: 20
+Target Gap Height: 16.491741 mm
+Network: 256 hidden units with LayerNorm
+Policy LR: 5e-4, Value LR: 1e-3, Entropy: 0.02
+======================================================================
+
+Ep   20 | R: -14585.9 | Len: 453 | R/s: -32.19 (-4023.9%) | Gap: 21.69mm (min:10.80) | Best: 10.80mm
--- a/RL_Trials/training_log_20251211_185541.txt
+++ b/RL_Trials/training_log_20251211_185541.txt
@@ -0,0 +1,12 @@
+Training Started: 20251211_185541
+Number of Episodes: 2000
+Print Frequency: 20
+Target Gap Height: 16.491741 mm
+Network: 256 hidden units with LayerNorm
+Policy LR: 5e-4, Value LR: 1e-3, Entropy: 0.02
+======================================================================
+
+Ep   20 | R: -264.1 | Len:  28 | R/s: -9.47 (-1183.4%) | Gap: 20.27mm (min:11.48) | Best: 11.48mm
+Ep   40 | R: -222.6 | Len:  24 | R/s: -9.45 (-1181.5%) | Gap: 21.62mm (min:19.97) | Best: 11.48mm
+Ep   60 | R: -211.8 | Len:  23 | R/s: -9.37 (-1171.3%) | Gap: 21.47mm (min:19.53) | Best: 11.48mm
+Ep   80 | R: -144.7 | Len:  15 | R/s: -9.65 (-1206.2%) | Gap: 21.89mm (min:20.01) | Best: 11.48mm
--- a/RL_Trials/training_log_20251211_190447.txt
+++ b/RL_Trials/training_log_20251211_190447.txt
@@ -0,0 +1,40 @@
+Training Started: 20251211_190447
+Number of Episodes: 2000
+Print Frequency: 20
+Target Gap Height: 16.491741 mm
+Network: 256 hidden units with LayerNorm
+Policy LR: 5e-4, Value LR: 1e-3, Entropy: 0.02
+======================================================================
+
+Ep   20 | R: -2049.0 | Len:  66 | R/s: -31.16 (-3895.5%) | Gap: 13.46mm (min:10.84) | Best: 10.84mm
+Ep   40 | R: -1124.2 | Len:  36 | R/s: -30.89 (-3860.7%) | Gap: 12.94mm (min:10.62) | Best: 10.62mm
+Ep   60 | R: -900.8 | Len:  29 | R/s: -30.75 (-3843.2%) | Gap: 12.07mm (min:10.35) | Best: 10.35mm
+Ep   80 | R: -885.5 | Len:  29 | R/s: -30.75 (-3843.2%) | Gap: 11.88mm (min:10.26) | Best: 10.26mm
+Ep  100 | R: -1242.5 | Len:  40 | R/s: -30.99 (-3873.3%) | Gap: 13.41mm (min:10.71) | Best: 10.26mm
+Ep  120 | R: -972.3 | Len:  32 | R/s: -30.77 (-3846.0%) | Gap: 12.38mm (min:10.39) | Best: 10.26mm
+Ep  140 | R: -1182.0 | Len:  38 | R/s: -30.90 (-3862.8%) | Gap: 13.08mm (min:10.60) | Best: 10.26mm
+Ep  160 | R: -953.0 | Len:  31 | R/s: -30.74 (-3842.8%) | Gap: 12.06mm (min:10.68) | Best: 10.26mm
+Ep  180 | R: -1178.0 | Len:  38 | R/s: -30.92 (-3864.9%) | Gap: 12.61mm (min:10.55) | Best: 10.26mm
+Ep  200 | R: -3914.1 | Len: 125 | R/s: -31.34 (-3917.2%) | Gap: 13.81mm (min:10.84) | Best: 10.26mm
+Ep  220 | R: -13001.1 | Len: 412 | R/s: -31.59 (-3948.3%) | Gap: 17.74mm (min:11.72) | Best: 10.26mm
+Ep  240 | R: -15119.0 | Len: 478 | R/s: -31.60 (-3950.4%) | Gap: 18.71mm (min:13.85) | Best: 10.26mm
+Ep  260 | R: -12974.1 | Len: 411 | R/s: -31.59 (-3949.3%) | Gap: 17.73mm (min:12.36) | Best: 10.26mm
+Ep  280 | R: -13695.3 | Len: 433 | R/s: -31.60 (-3949.5%) | Gap: 18.07mm (min:12.24) | Best: 10.26mm
+Ep  300 | R: -15814.3 | Len: 500 | R/s: -31.63 (-3953.6%) | Gap: 18.97mm (min:18.92) | Best: 10.26mm
+Ep  320 | R: -15814.0 | Len: 500 | R/s: -31.63 (-3953.5%) | Gap: 18.97mm (min:18.93) | Best: 10.26mm
+Ep  340 | R: -15809.3 | Len: 500 | R/s: -31.62 (-3952.3%) | Gap: 18.96mm (min:18.89) | Best: 10.26mm
+Ep  360 | R: -15816.5 | Len: 500 | R/s: -31.63 (-3954.1%) | Gap: 18.97mm (min:18.91) | Best: 10.26mm
+Ep  380 | R: -15815.5 | Len: 500 | R/s: -31.63 (-3953.9%) | Gap: 18.97mm (min:18.92) | Best: 10.26mm
+Ep  400 | R: -15818.2 | Len: 500 | R/s: -31.64 (-3954.5%) | Gap: 18.98mm (min:18.93) | Best: 10.26mm
+Ep  420 | R: -15815.1 | Len: 500 | R/s: -31.63 (-3953.8%) | Gap: 18.97mm (min:18.89) | Best: 10.26mm
+Ep  440 | R: -15816.0 | Len: 500 | R/s: -31.63 (-3954.0%) | Gap: 18.97mm (min:18.94) | Best: 10.26mm
+Ep  460 | R: -15818.5 | Len: 500 | R/s: -31.64 (-3954.6%) | Gap: 18.98mm (min:18.93) | Best: 10.26mm
+Ep  480 | R: -15819.3 | Len: 500 | R/s: -31.64 (-3954.8%) | Gap: 18.98mm (min:18.94) | Best: 10.26mm
+Ep  500 | R: -15818.5 | Len: 500 | R/s: -31.64 (-3954.6%) | Gap: 18.97mm (min:18.94) | Best: 10.26mm
+Ep  520 | R: -15819.8 | Len: 500 | R/s: -31.64 (-3954.9%) | Gap: 18.98mm (min:18.94) | Best: 10.26mm
+Ep  540 | R: -15818.8 | Len: 500 | R/s: -31.64 (-3954.7%) | Gap: 18.97mm (min:18.92) | Best: 10.26mm
+Ep  560 | R: -15818.3 | Len: 500 | R/s: -31.64 (-3954.6%) | Gap: 18.97mm (min:18.92) | Best: 10.26mm
+Ep  580 | R: -15822.9 | Len: 500 | R/s: -31.65 (-3955.7%) | Gap: 18.98mm (min:18.93) | Best: 10.26mm
+Ep  600 | R: -15819.6 | Len: 500 | R/s: -31.64 (-3954.9%) | Gap: 18.97mm (min:18.92) | Best: 10.26mm
+Ep  620 | R: -15817.8 | Len: 500 | R/s: -31.64 (-3954.5%) | Gap: 18.96mm (min:18.91) | Best: 10.26mm
+Ep  640 | R: -15821.5 | Len: 500 | R/s: -31.64 (-3955.4%) | Gap: 18.98mm (min:18.92) | Best: 10.26mm
--- a/RL_Trials/training_log_20251211_191801.txt
+++ b/RL_Trials/training_log_20251211_191801.txt
@@ -0,0 +1,112 @@
+Training Started: 20251211_191801
+Number of Episodes: 2000
+Print Frequency: 20
+Target Gap Height: 16.491741 mm
+Network: 256 hidden units with LayerNorm
+Policy LR: 5e-4, Value LR: 1e-3, Entropy: 0.02
+======================================================================
+
+Ep   20 | R:  755.5 | Len: 204 | R/s:  3.70 (462.3%) | Gap: 16.74mm (min:14.73) | Best: 14.73mm
+Ep   40 | R:  407.2 | Len: 116 | R/s:  3.52 (439.9%) | Gap: 15.56mm (min:13.80) | Best: 13.80mm
+Ep   60 | R:  157.4 | Len:  57 | R/s:  2.78 (347.5%) | Gap: 14.87mm (min:13.91) | Best: 13.80mm
+Ep   80 | R:  182.7 | Len:  61 | R/s:  2.98 (372.0%) | Gap: 15.06mm (min:14.21) | Best: 13.80mm
+Ep  100 | R:  487.2 | Len: 134 | R/s:  3.65 (455.9%) | Gap: 16.10mm (min:14.31) | Best: 13.80mm
+Ep  120 | R: 1113.1 | Len: 297 | R/s:  3.75 (468.3%) | Gap: 17.64mm (min:15.95) | Best: 13.80mm
+Ep  140 | R: 1434.7 | Len: 385 | R/s:  3.72 (465.6%) | Gap: 18.21mm (min:16.13) | Best: 13.80mm
+Ep  160 | R:  641.4 | Len: 172 | R/s:  3.72 (464.8%) | Gap: 16.69mm (min:15.38) | Best: 13.80mm
+Ep  180 | R: 1029.0 | Len: 274 | R/s:  3.76 (469.7%) | Gap: 17.43mm (min:14.60) | Best: 13.80mm
+Ep  200 | R:  287.0 | Len:  85 | R/s:  3.39 (424.0%) | Gap: 15.61mm (min:14.18) | Best: 13.80mm
+Ep  220 | R:  330.9 | Len:  94 | R/s:  3.52 (440.4%) | Gap: 15.85mm (min:14.93) | Best: 13.80mm
+Ep  240 | R:  336.4 | Len: 103 | R/s:  3.28 (409.7%) | Gap: 15.15mm (min:13.83) | Best: 13.80mm
+Ep  260 | R:  128.2 | Len:  50 | R/s:  2.58 (321.9%) | Gap: 14.51mm (min:13.90) | Best: 13.80mm
+Ep  280 | R:  116.0 | Len:  46 | R/s:  2.51 (313.3%) | Gap: 14.30mm (min:13.49) | Best: 13.49mm
+Ep  300 | R:   95.0 | Len:  39 | R/s:  2.45 (306.0%) | Gap: 13.85mm (min:13.19) | Best: 13.19mm
+Ep  320 | R:  772.1 | Len: 200 | R/s:  3.86 (482.9%) | Gap: 16.77mm (min:15.05) | Best: 13.19mm
+Ep  340 | R:  152.7 | Len:  54 | R/s:  2.84 (354.5%) | Gap: 14.81mm (min:13.96) | Best: 13.19mm
+Ep  360 | R:  118.1 | Len:  47 | R/s:  2.52 (315.6%) | Gap: 14.36mm (min:13.50) | Best: 13.19mm
+Ep  380 | R:  290.8 | Len:  81 | R/s:  3.60 (450.2%) | Gap: 15.56mm (min:14.06) | Best: 13.19mm
+Ep  400 | R:  230.0 | Len:  69 | R/s:  3.35 (418.5%) | Gap: 15.38mm (min:14.74) | Best: 13.19mm
+Ep  420 | R:  305.9 | Len:  85 | R/s:  3.58 (447.5%) | Gap: 15.82mm (min:15.08) | Best: 13.19mm
+Ep  440 | R:  450.6 | Len: 116 | R/s:  3.90 (487.0%) | Gap: 16.25mm (min:14.81) | Best: 13.19mm
+Ep  460 | R:  624.2 | Len: 161 | R/s:  3.89 (486.0%) | Gap: 16.65mm (min:15.01) | Best: 13.19mm
+Ep  480 | R:  710.6 | Len: 192 | R/s:  3.70 (462.6%) | Gap: 16.62mm (min:14.71) | Best: 13.19mm
+Ep  500 | R:  131.1 | Len:  49 | R/s:  2.65 (331.8%) | Gap: 14.44mm (min:13.45) | Best: 13.19mm
+Ep  520 | R:  169.4 | Len:  58 | R/s:  2.90 (362.5%) | Gap: 14.97mm (min:14.22) | Best: 13.19mm
+Ep  540 | R:  929.9 | Len: 263 | R/s:  3.53 (441.4%) | Gap: 16.99mm (min:14.49) | Best: 13.19mm
+Ep  560 | R: 1760.6 | Len: 500 | R/s:  3.52 (440.1%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep  580 | R: 1763.0 | Len: 500 | R/s:  3.53 (440.7%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep  600 | R: 1775.4 | Len: 500 | R/s:  3.55 (443.8%) | Gap: 18.99mm (min:18.91) | Best: 13.19mm
+Ep  620 | R: 1298.7 | Len: 355 | R/s:  3.66 (457.5%) | Gap: 17.94mm (min:14.49) | Best: 13.19mm
+Ep  640 | R: 1576.3 | Len: 438 | R/s:  3.60 (450.3%) | Gap: 18.63mm (min:16.35) | Best: 13.19mm
+Ep  660 | R: 1762.6 | Len: 500 | R/s:  3.53 (440.7%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep  680 | R: 1761.3 | Len: 500 | R/s:  3.52 (440.3%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep  700 | R: 1761.0 | Len: 500 | R/s:  3.52 (440.2%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep  720 | R: 1754.8 | Len: 500 | R/s:  3.51 (438.7%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep  740 | R: 1755.3 | Len: 500 | R/s:  3.51 (438.8%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep  760 | R: 1756.6 | Len: 500 | R/s:  3.51 (439.2%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep  780 | R: 1759.2 | Len: 500 | R/s:  3.52 (439.8%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep  800 | R: 1756.9 | Len: 500 | R/s:  3.51 (439.2%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep  820 | R: 1759.2 | Len: 500 | R/s:  3.52 (439.8%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep  840 | R: 1593.2 | Len: 436 | R/s:  3.65 (456.7%) | Gap: 18.62mm (min:16.57) | Best: 13.19mm
+Ep  860 | R: 1209.1 | Len: 334 | R/s:  3.62 (452.2%) | Gap: 17.92mm (min:15.21) | Best: 13.19mm
+Ep  880 | R:  509.8 | Len: 149 | R/s:  3.43 (429.0%) | Gap: 16.16mm (min:14.21) | Best: 13.19mm
+Ep  900 | R:  496.0 | Len: 148 | R/s:  3.36 (419.9%) | Gap: 15.86mm (min:14.56) | Best: 13.19mm
+Ep  920 | R: 1770.0 | Len: 500 | R/s:  3.54 (442.5%) | Gap: 18.99mm (min:18.97) | Best: 13.19mm
+Ep  940 | R: 1763.3 | Len: 500 | R/s:  3.53 (440.8%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep  960 | R: 1753.9 | Len: 500 | R/s:  3.51 (438.5%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep  980 | R: 1751.9 | Len: 500 | R/s:  3.50 (438.0%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep 1000 | R: 1756.6 | Len: 500 | R/s:  3.51 (439.1%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep 1020 | R: 1754.6 | Len: 500 | R/s:  3.51 (438.7%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep 1040 | R: 1759.2 | Len: 500 | R/s:  3.52 (439.8%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1060 | R: 1756.7 | Len: 500 | R/s:  3.51 (439.2%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1080 | R: 1758.8 | Len: 500 | R/s:  3.52 (439.7%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep 1100 | R: 1756.2 | Len: 500 | R/s:  3.51 (439.1%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep 1120 | R: 1756.5 | Len: 500 | R/s:  3.51 (439.1%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep 1140 | R: 1760.5 | Len: 500 | R/s:  3.52 (440.1%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep 1160 | R: 1760.5 | Len: 500 | R/s:  3.52 (440.1%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1180 | R: 1756.5 | Len: 500 | R/s:  3.51 (439.1%) | Gap: 19.00mm (min:18.99) | Best: 13.19mm
+Ep 1200 | R: 1760.0 | Len: 500 | R/s:  3.52 (440.0%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1220 | R: 1758.7 | Len: 500 | R/s:  3.52 (439.7%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1240 | R: 1760.4 | Len: 500 | R/s:  3.52 (440.1%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1260 | R: 1753.5 | Len: 500 | R/s:  3.51 (438.4%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1280 | R: 1753.9 | Len: 500 | R/s:  3.51 (438.5%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1300 | R: 1758.0 | Len: 500 | R/s:  3.52 (439.5%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1320 | R: 1762.7 | Len: 500 | R/s:  3.53 (440.7%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1340 | R: 1693.0 | Len: 459 | R/s:  3.69 (460.9%) | Gap: 18.61mm (min:16.25) | Best: 13.19mm
+Ep 1360 | R:  713.0 | Len: 181 | R/s:  3.94 (492.2%) | Gap: 16.38mm (min:15.05) | Best: 13.19mm
+Ep 1380 | R: 2118.4 | Len: 486 | R/s:  4.36 (545.4%) | Gap: 18.38mm (min:17.32) | Best: 13.19mm
+Ep 1400 | R: 2157.4 | Len: 495 | R/s:  4.36 (544.9%) | Gap: 18.50mm (min:18.01) | Best: 13.19mm
+Ep 1420 | R: 1181.5 | Len: 262 | R/s:  4.50 (563.0%) | Gap: 16.90mm (min:15.79) | Best: 13.19mm
+Ep 1440 | R: 1332.5 | Len: 298 | R/s:  4.46 (558.1%) | Gap: 17.08mm (min:15.65) | Best: 13.19mm
+Ep 1460 | R: 1496.5 | Len: 332 | R/s:  4.51 (563.8%) | Gap: 17.27mm (min:15.62) | Best: 13.19mm
+Ep 1480 | R: 1545.4 | Len: 339 | R/s:  4.56 (570.1%) | Gap: 17.26mm (min:15.87) | Best: 13.19mm
+Ep 1500 | R:  862.8 | Len: 201 | R/s:  4.29 (536.3%) | Gap: 16.17mm (min:14.88) | Best: 13.19mm
+Ep 1520 | R:  809.8 | Len: 193 | R/s:  4.20 (524.6%) | Gap: 16.03mm (min:14.74) | Best: 13.19mm
+Ep 1540 | R:  861.1 | Len: 204 | R/s:  4.22 (527.7%) | Gap: 16.25mm (min:14.93) | Best: 13.19mm
+Ep 1560 | R: 1445.2 | Len: 329 | R/s:  4.40 (549.4%) | Gap: 17.24mm (min:15.19) | Best: 13.19mm
+Ep 1580 | R: 1993.4 | Len: 486 | R/s:  4.11 (513.2%) | Gap: 18.55mm (min:16.26) | Best: 13.19mm
+Ep 1600 | R: 1985.4 | Len: 500 | R/s:  3.97 (496.4%) | Gap: 18.75mm (min:18.57) | Best: 13.19mm
+Ep 1620 | R: 1776.8 | Len: 500 | R/s:  3.55 (444.2%) | Gap: 18.97mm (min:18.91) | Best: 13.19mm
+Ep 1640 | R: 1755.2 | Len: 500 | R/s:  3.51 (438.8%) | Gap: 18.99mm (min:18.97) | Best: 13.19mm
+Ep 1660 | R: 1751.1 | Len: 500 | R/s:  3.50 (437.8%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1680 | R: 1746.6 | Len: 500 | R/s:  3.49 (436.7%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1700 | R: 1746.2 | Len: 500 | R/s:  3.49 (436.5%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1720 | R: 1747.8 | Len: 500 | R/s:  3.50 (437.0%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1740 | R: 1743.0 | Len: 500 | R/s:  3.49 (435.8%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1760 | R: 1743.4 | Len: 500 | R/s:  3.49 (435.8%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1780 | R: 1744.3 | Len: 500 | R/s:  3.49 (436.1%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1800 | R: 1744.0 | Len: 500 | R/s:  3.49 (436.0%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1820 | R: 1739.4 | Len: 500 | R/s:  3.48 (434.8%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1840 | R: 1736.2 | Len: 500 | R/s:  3.47 (434.1%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1860 | R: 1732.7 | Len: 500 | R/s:  3.47 (433.2%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1880 | R: 1732.1 | Len: 500 | R/s:  3.46 (433.0%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1900 | R: 1732.2 | Len: 500 | R/s:  3.46 (433.0%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1920 | R: 1729.1 | Len: 500 | R/s:  3.46 (432.3%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1940 | R: 1728.6 | Len: 500 | R/s:  3.46 (432.1%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1960 | R: 1728.0 | Len: 500 | R/s:  3.46 (432.0%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 1980 | R: 1728.5 | Len: 500 | R/s:  3.46 (432.1%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+Ep 2000 | R: 1726.8 | Len: 500 | R/s:  3.45 (431.7%) | Gap: 19.00mm (min:19.00) | Best: 13.19mm
+
+======================================================================
+Training Completed: 20251211_195411
+Best Gap Error Achieved: 13.191 mm
--- a/AI.txt
+++ b/AI.txt
--- a/lev_PPO.ipynb
+++ b/lev_PPO.ipynb
--- a/lev_pod_env.py
+++ b/lev_pod_env.py
@@ -0,0 +1,486 @@
+import gymnasium as gym
+from gymnasium import spaces
+import pybullet as p
+import pybullet_data
+import numpy as np
+import os
+from mag_lev_coil import MagLevCoil
+from maglev_predictor import MaglevPredictor
+
+TARGET_GAP = 16.491741 / 1000 # target gap height for 5.8 kg pod in meters
+
+class LevPodEnv(gym.Env):
+    def __init__(self, use_gui=False, initial_gap_mm=10.0, max_steps=2000):
+        super(LevPodEnv, self).__init__()
+        
+        # Store initial gap height parameter
+        self.initial_gap_mm = initial_gap_mm
+        self.max_episode_steps = max_steps
+        self.current_step = 0
+        
+        # The following was coded by AI - see [1]
+        # --- 1. Define Action & Observation Spaces ---
+        # Action: 4 PWM duty cycles between -1 and 1 (4 independent coils)
+        # [front_left, front_right, back_left, back_right] corresponding to +Y and -Y ends of each U-yoke
+        self.action_space = spaces.Box(low=-1, high=1, shape=(4,), dtype=np.float32)
+        
+        # Observation: 4 normalized noisy sensor gap heights + 4 normalized velocities
+        # Gaps normalized by 0.030m, velocities by 0.1 m/s
+        self.observation_space = spaces.Box(low=-5.0, high=5.0, shape=(8,), dtype=np.float32)
+        
+        # --- 2. Setup Physics & Actuators ---
+        self.dt = 1./240.  # PyBullet default timestep
+        self.coil_front_L = MagLevCoil(1.1, 0.0025, 12, 10.2)
+        self.coil_front_R = MagLevCoil(1.1, 0.0025, 12, 10.2)
+        self.coil_back_L = MagLevCoil(1.1, 0.0025, 12, 10.2)
+        self.coil_back_R = MagLevCoil(1.1, 0.0025, 12, 10.2)
+        
+        # Sensor noise parameters
+        self.sensor_noise_std = 0.0001  # 0.1mm standard deviation
+        
+        # Normalization constants for observations
+        self.gap_scale = 0.015  # Normalize gaps by +-15mm max expected deviation from middle
+        self.velocity_scale = 0.1  # Normalize velocities by 0.1 m/s max expected velocity
+        
+        # Maglev force/torque predictor
+        self.predictor = MaglevPredictor()
+        
+        # Connect to PyBullet (DIRECT is faster for training, GUI for debugging)
+        self.client = p.connect(p.GUI if use_gui else p.DIRECT) 
+        p.setAdditionalSearchPath(pybullet_data.getDataPath())
+        
+        # Store references
+        self.trackId = None
+        self.podId = None
+        self.collision_local_positions = []
+        self.yoke_indices = []  # For force application
+        self.yoke_labels = []
+        self.sensor_indices = []  # For gap height measurement
+        self.sensor_labels = []
+        
+        # For velocity calculation
+        self.prev_sensor_gaps = None
+        
+    def reset(self, seed=None, options=None):
+        # Reset PyBullet simulation
+        p.resetSimulation(physicsClientId=self.client)
+        p.setGravity(0, 0, -9.81, physicsClientId=self.client)
+        p.setTimeStep(self.dt, physicsClientId=self.client)
+        
+        # Create the maglev track (inverted system - track above, pod hangs below)
+        # Track bottom surface at Z=0
+        track_collision = p.createCollisionShape(
+            shapeType=p.GEOM_BOX,
+            halfExtents=[1.0, 0.2, 0.010],
+            physicsClientId=self.client
+        )
+        track_visual = p.createVisualShape(
+            shapeType=p.GEOM_BOX,
+            halfExtents=[1.0, 0.2, 0.010],
+            rgbaColor=[0.3, 0.3, 0.3, 0.8],
+            physicsClientId=self.client
+        )
+        self.trackId = p.createMultiBody(
+            baseMass=0,  # Static
+            baseCollisionShapeIndex=track_collision,
+            baseVisualShapeIndex=track_visual,
+            basePosition=[0, 0, 0.010],  # Track center at Z=10mm, bottom at Z=0
+            physicsClientId=self.client
+        )
+        p.changeDynamics(self.trackId, -1, 
+                         lateralFriction=0.3,
+                         restitution=0.1,
+                         physicsClientId=self.client)
+        
+        urdf_path = self._create_modified_urdf()
+        
+        # Determine start condition
+        if np.random.rand() > 0.5:
+            # Spawn exactly at target
+            spawn_gap_mm = TARGET_GAP * 1000.0
+            # # Add tiny noise
+            # spawn_gap_mm += np.random.uniform(-0.5, 0.5)
+        else:
+            spawn_gap_mm = self.initial_gap_mm
+            
+        start_z = -(0.09085 + spawn_gap_mm / 1000.0)
+        start_pos = [0, 0, start_z]
+        start_orientation = p.getQuaternionFromEuler([0, 0, 0])
+        self.podId = p.loadURDF(urdf_path, start_pos, start_orientation, physicsClientId=self.client)
+        
+        # The following was coded by AI - see [2]
+        # Parse collision shapes to identify yokes and sensors
+        collision_shapes = p.getCollisionShapeData(self.podId, -1, physicsClientId=self.client)
+        self.collision_local_positions = []
+        self.yoke_indices = []
+        self.yoke_labels = []
+        self.sensor_indices = []
+        self.sensor_labels = []
+        
+        # Expected heights for detection
+        expected_yoke_sensor_z = 0.08585  # Yokes and sensors always at this height
+        expected_bolt_z = 0.08585 + self.initial_gap_mm / 1000.0  # Bolts at gap-dependent height
+        
+        for i, shape in enumerate(collision_shapes):
+            shape_type = shape[2]
+            local_pos = shape[5]
+            self.collision_local_positions.append(local_pos)
+            
+            # Check if at sensor/yoke height (Z ≈ 0.08585m) - NOT bolts
+            if abs(local_pos[2] - expected_yoke_sensor_z) < 0.001:
+                if shape_type == p.GEOM_BOX:
+                    # Yokes are BOX type at the four corners (size 0.0254)
+                    self.yoke_indices.append(i)
+                    x_pos = "Front" if local_pos[0] > 0 else "Back"
+                    y_pos = "Left" if local_pos[1] > 0 else "Right"
+                    self.yoke_labels.append(f"{x_pos}_{y_pos}")
+                elif shape_type == p.GEOM_CYLINDER or shape_type == p.GEOM_MESH:
+                    # Sensors: distinguish by position pattern
+                    if abs(local_pos[0]) < 0.06 or abs(local_pos[1]) < 0.02:
+                        self.sensor_indices.append(i)
+                        if abs(local_pos[0]) < 0.001:  # Center sensors (X ≈ 0)
+                            label = "Center_Right" if local_pos[1] > 0 else "Center_Left"
+                        else:  # Front/back sensors (Y ≈ 0)
+                            label = "Front" if local_pos[0] > 0 else "Back"
+                        self.sensor_labels.append(label)
+        
+        self.coil_front_L.current = 0
+        self.coil_front_R.current = 0
+        self.coil_back_L.current = 0
+        self.coil_back_R.current = 0
+        
+        self.prev_sensor_gaps = None
+        obs = self._get_obs(initial_reset=True)
+        self.current_step = 0
+        
+        return obs, {}
+
+    # The following was generated by AI - see [14]
+    def step(self, action):
+        # Check if PyBullet connection is still active (GUI might be closed)
+        try:
+            p.getConnectionInfo(physicsClientId=self.client)
+        except p.error:
+            # Connection lost - GUI was closed
+            return self._get_obs(), -100.0, True, True, {'error': 'GUI closed'}
+        
+        # Update Coil Currents from PWM Actions
+        pwm_front_L = action[0]  # yoke +x,+y
+        pwm_front_R = action[1]  # yoke +x,-y
+        pwm_back_L = action[2]   # yoke -x,+y
+        pwm_back_R = action[3]   # yoke -x,-y
+        
+        curr_front_L = self.coil_front_L.update(pwm_front_L, self.dt)
+        curr_front_R = self.coil_front_R.update(pwm_front_R, self.dt)
+        curr_back_L = self.coil_back_L.update(pwm_back_L, self.dt)
+        curr_back_R = self.coil_back_R.update(pwm_back_R, self.dt)
+        
+        # --- 2. Get Current Pod State ---
+        pos, orn = p.getBasePositionAndOrientation(self.podId, physicsClientId=self.client)
+        lin_vel, ang_vel = p.getBaseVelocity(self.podId, physicsClientId=self.client)
+        
+        # Convert quaternion to rotation matrix
+        rot_matrix = np.array(p.getMatrixFromQuaternion(orn)).reshape(3, 3)
+        
+        # --- 3. Calculate Gap Heights at Yoke Positions (for force prediction) ---
+        # Calculate world positions of the 4 yokes (ends of U-yokes)
+        yoke_gap_heights_dict = {}  # Store by label for easy access
+        
+        for i, yoke_idx in enumerate(self.yoke_indices):
+            local_pos = self.collision_local_positions[yoke_idx]
+            local_vec = np.array(local_pos)
+            world_offset = rot_matrix @ local_vec
+            world_pos = np.array(pos) + world_offset
+            
+            # Top surface of yoke box (add half-height = 5mm)
+            yoke_top_z = world_pos[2] + 0.005
+            
+            # Gap height: track bottom (Z=0) to yoke top (negative Z)
+            gap_height = -yoke_top_z  # Convert to positive gap in meters
+            yoke_gap_heights_dict[self.yoke_labels[i]] = gap_height
+        
+        # Average gap heights for each U-shaped yoke (average left and right ends)
+        # Front yoke: average of Front_Left and Front_Right
+        # Back yoke: average of Back_Left and Back_Right
+        avg_gap_front = (yoke_gap_heights_dict.get('Front_Left', 0.010) + 
+                        yoke_gap_heights_dict.get('Front_Right', 0.010)) / 2
+        avg_gap_back = (yoke_gap_heights_dict.get('Back_Left', 0.010) + 
+                       yoke_gap_heights_dict.get('Back_Right', 0.010)) / 2
+        
+        front_left_gap = yoke_gap_heights_dict.get('Front_Left', 0.010)
+        front_right_gap = yoke_gap_heights_dict.get('Front_Right', 0.010)
+        back_left_gap = yoke_gap_heights_dict.get('Back_Left', 0.010)
+        back_right_gap = yoke_gap_heights_dict.get('Back_Right', 0.010)
+        
+        # hypotenuses
+        y_distance = 0.1016  # 2 * 0.0508m (left to right distance)
+        x_distance = 0.2518  # 2 * 0.1259m (front to back distance)
+        
+        # Roll angle
+        # When right side has larger gap, roll is negative
+        roll_angle_front = np.arcsin(-(front_right_gap - front_left_gap) / y_distance)
+        roll_angle_back = np.arcsin(-(back_right_gap - back_left_gap) / y_distance)
+        roll_angle = (roll_angle_front + roll_angle_back) / 2
+        
+        # When back has larger gap, pitch is positive
+        pitch_angle_left = np.arcsin((back_left_gap - front_left_gap) / x_distance)
+        pitch_angle_right = np.arcsin((back_right_gap - front_right_gap) / x_distance)
+        pitch_angle = (pitch_angle_left + pitch_angle_right) / 2
+        
+        # Predict Forces and Torques using Maglev Predictor
+        # Gap heights in mm
+        gap_front_mm = avg_gap_front * 1000
+        gap_back_mm = avg_gap_back * 1000
+        
+        # Roll angle in degrees
+        roll_deg = np.degrees(roll_angle)
+        
+        # Predict force and torque for each U-shaped yoke
+        # Front yoke
+        force_front, torque_front = self.predictor.predict(
+            curr_front_L, curr_front_R, roll_deg, gap_front_mm
+        )
+        
+        # Back yoke
+        force_back, torque_back = self.predictor.predict(
+            curr_back_L, curr_back_R, roll_deg, gap_back_mm
+        )
+        
+        # --- 5. Apply Forces and Torques to Pod ---
+        # Forces are applied at Y=0 (center of U-yoke) at each X position
+        # This is where the actual magnetic force acts on the U-shaped yoke
+        
+        # Apply force at front yoke center (X=+0.1259, Y=0)
+        front_yoke_center = [0.1259, 0, 0.08585]  # From pod.xml yoke positions
+        p.applyExternalForce(
+            self.podId, -1,
+            forceObj=[0, 0, force_front],
+            posObj=front_yoke_center,
+            flags=p.LINK_FRAME,
+            physicsClientId=self.client
+        )
+        
+        # Apply force at back yoke center (X=-0.1259, Y=0)
+        back_yoke_center = [-0.1259, 0, 0.08585]
+        p.applyExternalForce(
+            self.podId, -1,
+            forceObj=[0, 0, force_back],
+            posObj=back_yoke_center,
+            flags=p.LINK_FRAME,
+            physicsClientId=self.client
+        )
+
+        
+        
+        # Apply roll torques
+        # Each yoke produces its own torque about X axis
+        torque_front_Nm = torque_front / 1000  # Convert from mN·m to N·m
+        torque_back_Nm = torque_back / 1000
+        
+        # Apply torques at respective yoke positions
+        p.applyExternalTorque(
+            self.podId, -1,
+            torqueObj=[torque_front_Nm, 0, 0],
+            flags=p.LINK_FRAME,
+            physicsClientId=self.client
+        )
+        p.applyExternalTorque(
+            self.podId, -1,
+            torqueObj=[torque_back_Nm, 0, 0],
+            flags=p.LINK_FRAME,
+            physicsClientId=self.client
+        )
+        
+        # --- 6. Step Simulation ---
+        p.stepSimulation(physicsClientId=self.client)
+        self.current_step += 1
+        
+        # Check for physical contact with track (bolts touching)
+        contact_points = p.getContactPoints(bodyA=self.podId, bodyB=self.trackId, physicsClientId=self.client)
+        has_contact = len(contact_points) > 0
+        
+        # --- 7. Get New Observation ---
+        obs = self._get_obs()
+        
+        # --- 8. Calculate Reward ---
+        # Goal: Hover at target gap (16.5mm), minimize roll/pitch, minimize power
+        target_gap = TARGET_GAP  # 16.5mm in meters
+        avg_gap = (avg_gap_front + avg_gap_back) / 2
+        
+        gap_error = abs(avg_gap - target_gap)
+        
+        # Power dissipation (all 4 coils)
+        power = (curr_front_L**2 * self.coil_front_L.R + 
+                curr_front_R**2 * self.coil_front_R.R +
+                curr_back_L**2 * self.coil_back_L.R +
+                curr_back_R**2 * self.coil_back_R.R)
+        
+        # --- Improved Reward Function ---
+        # Use reward shaping with reasonable scales to enable learning
+        
+        # 1. Gap Error Reward (most important)
+        # Use exponential decay for smooth gradient near target
+        gap_error_mm = gap_error * 1000  # Convert to mm
+        gap_reward = 10.0 * np.exp(-0.5 * (gap_error_mm / 3.0)**2)  # Peak at 0mm error, 3mm std dev
+        
+        # 2. Orientation Penalties (smaller scale)
+        roll_penalty = abs(np.degrees(roll_angle)) * 0.02
+        pitch_penalty = abs(np.degrees(pitch_angle)) * 0.02
+        
+        # 3. Velocity Penalty (discourage rapid oscillations)
+        z_velocity = lin_vel[2]
+        velocity_penalty = abs(z_velocity) * 0.1
+        
+        # 4. Contact Penalty
+        contact_points = p.getContactPoints(bodyA=self.podId, bodyB=self.trackId)
+        contact_penalty = len(contact_points) * 0.2
+        
+        # 5. Power Penalty (encourage efficiency, but small weight)
+        power_penalty = power * 0.001
+        
+        # Combine rewards (scaled to ~[-5, +1] range per step)
+        reward = gap_reward - roll_penalty - pitch_penalty - velocity_penalty - contact_penalty - power_penalty
+        
+        # Check Termination (tighter bounds for safety)
+        terminated = False
+        truncated = False
+        
+        # Terminate if gap is too small (crash) or too large (lost)
+        if avg_gap < 0.003 or avg_gap > 0.035:
+            terminated = True
+            reward = -10.0  # Failure penalty (scaled down)
+            
+        # Terminate if orientation is too extreme
+        if abs(roll_angle) > np.radians(15) or abs(pitch_angle) > np.radians(15):
+            terminated = True
+            reward = -10.0
+            
+        # Success bonus for stable hovering near target
+        if gap_error_mm < 1.0 and abs(np.degrees(roll_angle)) < 2.0 and abs(np.degrees(pitch_angle)) < 2.0:
+            reward += 2.0  # Bonus for excellent control
+        
+        info = {
+            'curr_front_L': curr_front_L,
+            'curr_front_R': curr_front_R,
+            'curr_back_L': curr_back_L,
+            'curr_back_R': curr_back_R,
+            'gap_front_yoke': avg_gap_front,
+            'gap_back_yoke': avg_gap_back,
+            'roll': roll_angle,
+            'force_front': force_front,
+            'force_back': force_back,
+            'torque_front': torque_front,
+            'torque_back': torque_back
+        }
+        
+        return obs, reward, terminated, truncated, info
+
+    # The following was generated by AI - see [15]
+    def _get_obs(self, initial_reset=False):
+        """
+        Returns observation: [gaps(4), velocities(4)]
+        Uses noisy sensor readings + computed velocities for microcontroller-friendly deployment
+        """
+        pos, orn = p.getBasePositionAndOrientation(self.podId, physicsClientId=self.client)
+        
+        # Convert quaternion to rotation matrix
+        rot_matrix = np.array(p.getMatrixFromQuaternion(orn)).reshape(3, 3)
+        
+        # Calculate sensor gap heights with noise
+        sensor_gap_heights = {}
+        
+        for i, sensor_idx in enumerate(self.sensor_indices):
+            local_pos = self.collision_local_positions[sensor_idx]
+            local_vec = np.array(local_pos)
+            world_offset = rot_matrix @ local_vec
+            world_pos = np.array(pos) + world_offset
+            
+            # Top surface of sensor (add half-height = 5mm)
+            sensor_top_z = world_pos[2] + 0.005
+            
+            # Gap height: track bottom (Z=0) to sensor top
+            gap_height = -sensor_top_z
+            
+            # Add measurement noise
+            noisy_gap = gap_height + np.random.normal(0, self.sensor_noise_std)
+            # sensor_gap_heights[self.sensor_labels[i]] = noisy_gap
+            sensor_gap_heights[self.sensor_labels[i]] = gap_height
+        
+        # Pack sensor measurements in consistent order
+        # [center_right, center_left, front, back]
+        gaps = np.array([
+            sensor_gap_heights.get('Center_Right', 0.010),
+            sensor_gap_heights.get('Center_Left', 0.010),
+            sensor_gap_heights.get('Front', 0.010),
+            sensor_gap_heights.get('Back', 0.010)
+        ], dtype=np.float32)
+        
+        # Compute velocities (d_gap/dt)
+        if initial_reset or (self.prev_sensor_gaps is None):
+            # First observation - no velocity information yet
+            velocities = np.zeros(4, dtype=np.float32)
+        else:
+            # Compute velocity as finite difference
+            velocities = (gaps - self.prev_sensor_gaps) / self.dt
+        
+        # Store for next step
+        self.prev_sensor_gaps = gaps.copy()
+        
+        # Normalize observations
+        gaps_normalized = (gaps - TARGET_GAP) / self.gap_scale
+        velocities_normalized = velocities / self.velocity_scale
+        
+        # Concatenate: [normalized_gaps, normalized_velocities]
+        obs = np.concatenate([gaps_normalized, velocities_normalized])
+        
+        return obs
+    
+    # The following was generated by AI - see [16]
+    def _create_modified_urdf(self):
+        """
+        Create a modified URDF with bolt positions adjusted based on initial gap height.
+        Bolts are at Z = 0.08585 + gap_mm/1000 (relative to pod origin).
+        Yokes and sensors remain at Z = 0.08585 (relative to pod origin).
+        """
+        import tempfile
+        
+        # Calculate bolt Z position
+        bolt_z = 0.08585 + self.initial_gap_mm / 1000.0
+        
+        # Read the original URDF template
+        urdf_template_path = os.path.join(os.path.dirname(__file__), "pod.xml")
+        with open(urdf_template_path, 'r') as f:
+            urdf_content = f.read()
+        
+        # Replace the bolt Z positions (originally at 0.09585)
+        # There are 4 bolts at different X,Y positions but same Z
+        urdf_modified = urdf_content.replace(
+            'xyz="0.285 0.03 0.09585"',
+            f'xyz="0.285 0.03 {bolt_z:.6f}"'
+        ).replace(
+            'xyz="0.285 -0.03 0.09585"',
+            f'xyz="0.285 -0.03 {bolt_z:.6f}"'
+        ).replace(
+            'xyz="-0.285 0.03 0.09585"',
+            f'xyz="-0.285 0.03 {bolt_z:.6f}"'
+        ).replace(
+            'xyz="-0.285 -0.03 0.09585"',
+            f'xyz="-0.285 -0.03 {bolt_z:.6f}"'
+        )
+        
+        # Write to a temporary file
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.urdf', delete=False) as f:
+            f.write(urdf_modified)
+            temp_urdf_path = f.name
+        
+        return temp_urdf_path
+
+    def close(self):
+        try:
+            p.disconnect(physicsClientId=self.client)
+        except p.error:
+            pass  # Already disconnected
+    
+    def render(self):
+        """Rendering is handled by PyBullet GUI mode"""
+        pass
--- a/mag_lev_coil.py
+++ b/mag_lev_coil.py
@@ -0,0 +1,24 @@
+# The following was generated by AI - see [17]
+class MagLevCoil:
+    def __init__(self, r_resistance, l_inductance, source_voltage, maxCurrent):
+        self.R = r_resistance
+        self.L = l_inductance
+        self.current = 0.0
+        self.Vs = source_voltage
+        self.Imax = maxCurrent
+    
+    def update(self, pwm_duty_cycle, dt):
+        """
+        Simulates the coil circuit and force generation.
+        pwm_duty_cycle: -1.0 to 1.0
+        """
+        # Simple First-order RL circuit approximation
+        # V_in = Duty * V_source
+        voltage = pwm_duty_cycle * self.Vs
+        
+        # di/dt = (V - I*R) / L
+        di = (voltage - self.current * self.R) / self.L
+        self.current += di * dt
+        self.current = min(max(-self.Imax, self.current), self.Imax)
+        
+        return self.current
--- a/maglev_model.pkl
+++ b/maglev_model.pkl
--- a/maglev_predictor.py
+++ b/maglev_predictor.py
@@ -0,0 +1,116 @@
+"""
+Magnetic Levitation Force and Torque Predictor
+Optimized Inference using Pre-Trained Scikit-Learn Model
+
+This module loads a saved .pkl model (PolynomialFeatures + LinearRegression)
+and executes predictions using optimized NumPy vectorization for high-speed simulation.
+
+Usage:
+    predictor = MaglevPredictor("maglev_model.pkl")
+    force, torque = predictor.predict(currL=-15, currR=-15, roll=1.0, gap_height=10.0)
+"""
+
+import numpy as np
+import joblib
+import os
+
+class MaglevPredictor:
+    def __init__(self, model_path='maglev_model.pkl'):
+        """
+        Initialize predictor by loading the pickle file and extracting
+        raw matrices for fast inference.
+        """
+        if not os.path.exists(model_path):
+            raise FileNotFoundError(f"Model file '{model_path}' not found. Please train and save the model first.")
+
+        print(f"Loading maglev model from {model_path}...")
+        data = joblib.load(model_path)
+        
+        # 1. Extract Scikit-Learn Objects
+        poly_transformer = data['poly_features']
+        linear_model = data['model']
+        
+        # 2. Extract Raw Matrices for Speed (Bypasses sklearn overhead)
+        # powers_: Matrix of shape (n_output_features, n_input_features)
+        # Represents the exponents for each term. e.g. x1^2 * x2^1
+        self.powers = poly_transformer.powers_.T # Transpose for broadcasting
+        
+        # coef_: Shape (n_targets, n_output_features) -> (2, n_poly_terms)
+        self.coef = linear_model.coef_
+        
+        # intercept_: Shape (n_targets,) -> (2,)
+        self.intercept = linear_model.intercept_
+        
+        print(f"Model loaded. Degree: {data['degree']}")
+        print(f"Force R2: {data['performance']['force_r2']:.4f}")
+        print(f"Torque R2: {data['performance']['torque_r2']:.4f}")
+
+    def predict(self, currL, currR, roll, gap_height):
+        """
+        Fast single-sample prediction using pure NumPy.
+        
+        Args:
+            currL, currR: Currents [A]
+            roll: Roll angle [deg]
+            gap_height: Gap [mm]
+            
+        Returns:
+            (force [N], torque [mN·m])
+        """
+        # 1. Pre-process Input (Must match training order: currL, currR, roll, invGap)
+        # Clamp gap to avoid division by zero
+        safe_gap = max(gap_height, 1e-6)
+        invGap = 1.0 / safe_gap
+        
+        # Input Vector: shape (4,)
+        x = np.array([currL, currR, roll, invGap])
+        
+        # 2. Polynomial Expansion (Vectorized)
+        # Compute x^p for every term. 
+        # x is (4,), self.powers is (4, n_terms)
+        # Broadcasting: x[:, None] is (4,1). Result is (4, n_terms).
+        # Product along axis 0 reduces it to (n_terms,)
+        
+        # Note: This is equivalent to PolynomialFeatures.transform but 100x faster for single samples
+        poly_features = np.prod(x[:, None] ** self.powers, axis=0)
+        
+        # 3. Linear Prediction
+        # (2, n_terms) dot (n_terms,) -> (2,)
+        result = np.dot(self.coef, poly_features) + self.intercept
+        
+        return result[0], result[1]
+
+    def predict_batch(self, currL, currR, roll, gap_height):
+        """
+        Fast batch prediction for array inputs.
+        """
+        # 1. Pre-process Inputs
+        gap_height = np.asarray(gap_height)
+        invGap = 1.0 / np.maximum(gap_height, 1e-6)
+        
+        # Stack inputs: shape (N, 4)
+        X = np.column_stack((currL, currR, roll, invGap))
+        
+        # 2. Polynomial Expansion
+        # X is (N, 4). Powers is (4, n_terms).
+        # We want (N, n_terms).
+        # Method: X[:, :, None] -> (N, 4, 1)
+        # Powers[None, :, :] -> (1, 4, n_terms)
+        # Power: (N, 4, n_terms)
+        # Prod axis 1: (N, n_terms)
+        poly_features = np.prod(X[:, :, None] ** self.powers[None, :, :], axis=1)
+        
+        # 3. Linear Prediction
+        # (N, n_terms) @ (n_terms, 2) + (2,)
+        result = np.dot(poly_features, self.coef.T) + self.intercept
+        
+        return result[:, 0], result[:, 1]
+
+if __name__ == "__main__":
+    # Test
+    try:
+        p = MaglevPredictor()
+        f, t = p.predict(-15, -15, 1.0, 10.0)
+        print(f"Force: {f:.3f}, Torque: {t:.3f}")
+    except FileNotFoundError as e:
+        print(e)
--- a/pod.xml
+++ b/pod.xml
@@ -0,0 +1,86 @@
+<?xml version="1.0"?>
+<robot name="lev_pod">
+  <link name="base_link">
+    <inertial>
+      <mass value="5.8"/>
+      <inertia ixx="0.0192942414" ixy="0.0" ixz="0.0" iyy="0.130582305" iyz="0.0" izz="0.13760599326"/>
+    </inertial>
+    
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 -0.009525"/>
+      <geometry> <box size="0.6096 0.0862 0.01905"/> </geometry>
+    </collision>
+    
+    Bolts
+    <collision>
+      <origin rpy="0 0 0" xyz="0.285 0.03 0.09585"/>
+      <geometry><box size="0.01 0.01 0.01"/></geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 0 0" xyz="0.285 -0.03 0.09585"/>
+      <geometry><box size="0.01 0.01 0.01"/></geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 0 0" xyz="-0.285 0.03 0.09585"/>
+      <geometry><box size="0.01 0.01 0.01"/></geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 0 0" xyz="-0.285 -0.03 0.09585"/>
+      <geometry><box size="0.01 0.01 0.01"/></geometry>
+    </collision>
+
+    <!-- BottomStops
+    <collision>
+      <origin rpy="0 0 0" xyz="0.285 0.03 0.14500"/>
+      <geometry><box size="0.01 0.01 0.01"/></geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 0 0" xyz="0.285 -0.03 0.14500"/>
+      <geometry><box size="0.01 0.01 0.01"/></geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 0 0" xyz="-0.285 0.03 0.14500"/>
+      <geometry><box size="0.01 0.01 0.01"/></geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 0 0" xyz="-0.285 -0.03 0.14500"/>
+      <geometry><box size="0.01 0.01 0.01"/></geometry>
+    </collision> -->
+
+    <!-- Yoke Tops -->
+    <collision>
+      <origin rpy="0 0 0" xyz="0.1259 0.0508 0.08585"/>
+      <geometry><box size="0.0254 0.0254 0.01"/></geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 0 0" xyz="0.1259 -0.0508 0.08585"/>
+      <geometry><box size="0.0254 0.0254 0.01"/></geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 0 0" xyz="-0.1259 0.0508 0.08585"/>
+      <geometry><box size="0.0254 0.0254 0.01"/></geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 0 0" xyz="-0.1259 -0.0508 0.08585"/>
+      <geometry><box size="0.0254 0.0254 0.01"/></geometry>
+    </collision>
+    
+    <!-- Sensor Tops -->
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0.0508 0.08585"/>
+      <geometry><cylinder length="0.01" radius="0.015"/></geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 -0.0508 0.08585"/>
+      <geometry><cylinder length="0.01" radius="0.015"/></geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 0 0" xyz="0.2366 0 0.08585"/>
+      <geometry><cylinder length="0.01" radius="0.015"/></geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 0 0" xyz="-0.2366 0 0.08585"/>
+      <geometry><cylinder length="0.01" radius="0.015"/></geometry>
+    </collision>
+  </link>
+</robot>
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,156 @@
+annotated-types==0.7.0
+anyio==4.10.0
+appnope==0.1.4
+argon2-cffi==25.1.0
+argon2-cffi-bindings==25.1.0
+arrow==1.3.0
+asttokens==3.0.0
+async-lru==2.0.5
+attrs==25.3.0
+babel==2.17.0
+beautifulsoup4==4.13.5
+bleach==6.2.0
+cachetools==6.2.1
+certifi==2025.8.3
+cffi==1.17.1
+charset-normalizer==3.4.3
+click==8.3.0
+comm==0.2.3
+contourpy==1.3.3
+cycler==0.12.1
+debugpy==1.8.16
+decorator==5.2.1
+defusedxml==0.7.1
+executing==2.2.1
+fastapi==0.115.0
+fastjsonschema==2.21.2
+fonttools==4.59.2
+fqdn==1.5.1
+future==1.0.0
+google-ai-generativelanguage==0.6.10
+google-api-core==2.26.0
+google-api-python-client==2.185.0
+google-auth==2.41.1
+google-auth-httplib2==0.2.0
+google-generativeai==0.8.3
+googleapis-common-protos==1.70.0
+grpcio==1.75.1
+grpcio-status==1.71.2
+h11==0.16.0
+httpcore==1.0.9
+httplib2==0.31.0
+httpx==0.27.2
+idna==3.10
+ipykernel==6.30.1
+ipython==9.5.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+iso8601==2.1.0
+isoduration==20.11.0
+jedi==0.19.2
+Jinja2==3.1.6
+json5==0.12.1
+jsonpointer==3.0.0
+jsonschema==4.25.1
+jsonschema-specifications==2025.4.1
+jupyter==1.1.1
+jupyter-console==6.6.3
+jupyter-events==0.12.0
+jupyter-lsp==2.3.0
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyter_server==2.17.0
+jupyter_server_terminals==0.5.3
+jupyterlab==4.4.7
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.9
+lark==1.2.2
+lazy_loader==0.4
+MarkupSafe==3.0.2
+matplotlib==3.10.6
+matplotlib-inline==0.1.7
+mistune==3.1.4
+mne==1.10.2
+mpmath==1.3.0
+nbclient==0.10.2
+nbconvert==7.16.6
+nbformat==5.10.4
+nest-asyncio==1.6.0
+notebook==7.4.5
+notebook_shim==0.2.4
+numpy==2.3.2
+packaging==25.0
+pandas==2.3.3
+pandocfilters==1.5.1
+parso==0.8.5
+pexpect==4.9.0
+pillow==11.3.0
+platformdirs==4.4.0
+pooch==1.8.2
+prometheus_client==0.22.1
+prompt_toolkit==3.0.52
+proto-plus==1.26.1
+protobuf==5.29.5
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.22
+pydantic==2.9.2
+pydantic-settings==2.6.0
+pydantic_core==2.23.4
+Pygments==2.19.2
+pyparsing==3.2.3
+PyQt6==6.10.0
+PyQt6-Qt6==6.10.0
+PyQt6_sip==13.10.2
+pyqtgraph==0.13.7
+pyserial==3.5
+PySide6==6.10.0
+PySide6_Addons==6.10.0
+PySide6_Essentials==6.10.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-json-logger==3.3.0
+pytz==2025.2
+PyYAML==6.0.2
+pyzmq==27.0.2
+referencing==0.36.2
+requests==2.32.5
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rfc3987-syntax==1.1.0
+rpds-py==0.27.1
+rsa==4.9.1
+scipy==1.16.3
+Send2Trash==1.8.3
+serial==0.0.97
+setuptools==80.9.0
+shiboken6==6.10.0
+six==1.17.0
+sniffio==1.3.1
+soupsieve==2.8
+stack-data==0.6.3
+starlette==0.38.6
+sympy==1.14.0
+terminado==0.18.1
+tinycss2==1.4.0
+tornado==6.5.2
+tqdm==4.67.1
+traitlets==5.14.3
+types-python-dateutil==2.9.0.20250822
+typing_extensions==4.15.0
+tzdata==2025.2
+uri-template==1.3.0
+uritemplate==4.2.0
+urllib3==2.5.0
+uvicorn==0.32.0
+wcwidth==0.2.13
+webcolors==24.11.1
+webencodings==0.5.1
+websocket-client==1.8.0
+websockets==15.0.1
+widgetsnbextension==4.0.14
--- a/test_env.py
+++ b/test_env.py
@@ -0,0 +1,63 @@
+# The following was generated by AI - see [18]
+"""
+Test script for LevPodEnv
+Runs a simple episode with constant actions to verify the environment works
+"""
+
+from lev_pod_env import LevPodEnv
+import numpy as np
+import time
+
+# Create environment with GUI for visualization
+env = LevPodEnv(use_gui=True, initial_gap_mm=15)
+
+print("=" * 60)
+print("Testing LevPodEnv")
+print("=" * 60)
+print(f"Action space: {env.action_space}")
+print(f"  4 PWM duty cycles: [front_L, front_R, back_L, back_R]")
+print(f"Observation space: {env.observation_space}")
+print(f"  8 values: [gaps(4), velocities(4)]")
+print("=" * 60)
+
+# Reset environment
+obs, info = env.reset()
+print(f"\nInitial observation:")
+print(f"  Gaps: CR={obs[0]*1000:.2f}mm, CL={obs[1]*1000:.2f}mm, F={obs[2]*1000:.2f}mm, B={obs[3]*1000:.2f}mm")
+print(f"  Velocities: CR={obs[4]*1000:.2f}mm/s, CL={obs[5]*1000:.2f}mm/s, F={obs[6]*1000:.2f}mm/s, B={obs[7]*1000:.2f}mm/s")
+print(f"  Average gap: {np.mean(obs[:4])*1000:.2f} mm")
+
+# Run a few steps with constant action to test force application
+print("\nRunning test episode...")
+for step in range(500):
+    # Apply constant moderate PWM to all 4 coils
+    # 50% PWM should generate current that produces upward force
+    action = np.array([0,0,0,0], dtype=np.float32)
+    
+    obs, reward, terminated, truncated, info = env.step(action)
+    
+    if step % 5 == 0:
+        print(f"\nStep {step} (t={step/240:.2f}s):")
+        print(f"  Sensor gaps: CR={obs[0]*1000:.2f}mm, CL={obs[1]*1000:.2f}mm, " +
+              f"F={obs[2]*1000:.2f}mm, B={obs[3]*1000:.2f}mm")
+        print(f"  Velocities: CR={obs[4]*1000:.2f}mm/s, CL={obs[5]*1000:.2f}mm/s, " +
+              f"F={obs[6]*1000:.2f}mm/s, B={obs[7]*1000:.2f}mm/s")
+        print(f"  Yoke gaps: front={info['gap_front_yoke']*1000:.2f}mm, back={info['gap_back_yoke']*1000:.2f}mm")
+        print(f"  Roll: {np.degrees(info['roll']):.2f}°")
+        print(f"  Currents: FL={info['curr_front_L']:.2f}A, FR={info['curr_front_R']:.2f}A, " +
+              f"BL={info['curr_back_L']:.2f}A, BR={info['curr_back_R']:.2f}A")
+        print(f"  Forces: front={info['force_front']:.2f}N, back={info['force_back']:.2f}N")
+        print(f"  Torques: front={info['torque_front']:.2f}mN·m, back={info['torque_back']:.2f}mN·m")
+        print(f"  Reward: {reward:.2f}")
+    
+    if terminated or truncated:
+        print(f"\nEpisode terminated at step {step}")
+        break
+    
+    time.sleep(0.01)
+
+print("\n" + "=" * 60)
+print("Test complete!")
+print("=" * 60)
+
+env.close()