338 lines
14 KiB
Python
338 lines
14 KiB
Python
|
|
"""
|
|||
|
|
Optuna (TPE / Bayesian-style) optimization for the three-PID maglev controller.
|
|||
|
|
Tunes all nine gains (height, roll, pitch × Kp, Ki, Kd) jointly so coupling is respected.
|
|||
|
|
|
|||
|
|
Noisy optimization: set disturbance_force_std > 0 so the env applies random force/torque
|
|||
|
|
each step (system changes slightly each run). To keep Bayesian optimization effective,
|
|||
|
|
use n_objective_repeats > 1: each candidate is evaluated multiple times and the mean cost
|
|||
|
|
is returned, reducing variance so TPE can compare trials reliably.
|
|||
|
|
|
|||
|
|
Run from the command line or import and call run_optimization() from a notebook.
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
import os
|
|||
|
|
import sys
|
|||
|
|
|
|||
|
|
import numpy as np
|
|||
|
|
import optuna
|
|||
|
|
from optuna.samplers import TPESampler
|
|||
|
|
|
|||
|
|
from lev_pod_env import TARGET_GAP
|
|||
|
|
from pid_controller import DEFAULT_GAINS
|
|||
|
|
from pid_simulation import run_pid_simulation, build_feedforward_lut
|
|||
|
|
|
|||
|
|
# Save pid_best_params.json next to this script so notebook and CLI find it regardless of cwd
|
|||
|
|
_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|||
|
|
|
|||
|
|
# Default optimization config: long enough to see late instability (~8s+), not so long that optimizer goes ultra-conservative
|
|||
|
|
DEFAULT_MAX_STEPS = 1500
|
|||
|
|
DEFAULT_INITIAL_GAPS_MM = [12.0, 18.0] # Two conditions for robustness
|
|||
|
|
DEFAULT_N_TRIALS = 200
|
|||
|
|
DEFAULT_TIMEOUT_S = 3600
|
|||
|
|
TARGET_GAP_MM = TARGET_GAP * 1000
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _cost_from_data(
|
|||
|
|
data: dict,
|
|||
|
|
target_gap_mm: float,
|
|||
|
|
penalty_early: float = 500.0,
|
|||
|
|
weight_late_osc: float = 3.0,
|
|||
|
|
weight_steady_state: float = 2.0,
|
|||
|
|
) -> float:
|
|||
|
|
"""
|
|||
|
|
Single scalar cost from one run. Lower is better.
|
|||
|
|
|
|||
|
|
- ITAE for gap error and |roll|/|pitch| (tracking over time).
|
|||
|
|
- Late-oscillation penalty: std(roll) and std(pitch) over the *last 50%* of the run,
|
|||
|
|
plus max |roll|/|pitch| in that window, so gains that go unstable after ~half the run are penalized.
|
|||
|
|
- Steady-state term: mean absolute gap error and mean |roll|/|pitch| over the *last 20%*,
|
|||
|
|
so we reward settling at target with small angles.
|
|||
|
|
- Penalty if episode ended early (crash/instability), regardless of run length.
|
|||
|
|
- Small penalty on mean total current (efficiency).
|
|||
|
|
"""
|
|||
|
|
t = np.asarray(data["time"])
|
|||
|
|
dt = np.diff(t, prepend=0.0)
|
|||
|
|
gap_err_mm = np.abs(np.asarray(data["gap_avg"]) - target_gap_mm)
|
|||
|
|
itae_gap = np.sum(t * gap_err_mm * dt)
|
|||
|
|
|
|||
|
|
roll_deg = np.asarray(data["roll_deg"])
|
|||
|
|
pitch_deg = np.asarray(data["pitch_deg"])
|
|||
|
|
roll_abs = np.abs(roll_deg)
|
|||
|
|
pitch_abs = np.abs(pitch_deg)
|
|||
|
|
itae_roll = np.sum(t * roll_abs * dt)
|
|||
|
|
itae_pitch = np.sum(t * pitch_abs * dt)
|
|||
|
|
|
|||
|
|
n = len(t)
|
|||
|
|
early_penalty = penalty_early if data.get("terminated_early", False) else 0.0
|
|||
|
|
|
|||
|
|
# Late half: penalize oscillation (std) and large angles (max) so "good for 6s then violent roll" is expensive
|
|||
|
|
half = max(1, n // 2)
|
|||
|
|
roll_last = roll_deg[-half:]
|
|||
|
|
pitch_last = pitch_deg[-half:]
|
|||
|
|
late_osc_roll = np.std(roll_last) + np.max(np.abs(roll_last))
|
|||
|
|
late_osc_pitch = np.std(pitch_last) + np.max(np.abs(pitch_last))
|
|||
|
|
late_osc_penalty = weight_late_osc * (late_osc_roll + late_osc_pitch)
|
|||
|
|
|
|||
|
|
# Last 20%: steady-state error — want small gap error and small roll/pitch at end
|
|||
|
|
tail = max(1, n // 5)
|
|||
|
|
ss_gap = np.mean(np.abs(np.asarray(data["gap_avg"])[-tail:] - target_gap_mm))
|
|||
|
|
ss_roll = np.mean(roll_abs[-tail:])
|
|||
|
|
ss_pitch = np.mean(pitch_abs[-tail:])
|
|||
|
|
steady_state_penalty = weight_steady_state * (ss_gap + ss_roll + ss_pitch)
|
|||
|
|
|
|||
|
|
mean_current = np.mean(data["current_total"])
|
|||
|
|
current_penalty = 0.01 * mean_current
|
|||
|
|
|
|||
|
|
return (
|
|||
|
|
itae_gap
|
|||
|
|
+ 2.0 * (itae_roll + itae_pitch)
|
|||
|
|
+ early_penalty
|
|||
|
|
+ late_osc_penalty
|
|||
|
|
+ steady_state_penalty
|
|||
|
|
+ current_penalty
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def objective(
|
|||
|
|
trial: optuna.Trial,
|
|||
|
|
initial_gaps_mm: list,
|
|||
|
|
max_steps: int,
|
|||
|
|
use_feedforward: bool,
|
|||
|
|
penalty_early: float,
|
|||
|
|
disturbance_force_std: float = 0.0,
|
|||
|
|
n_objective_repeats: int = 1,
|
|||
|
|
) -> float:
|
|||
|
|
"""
|
|||
|
|
Optuna objective: suggest 9 gains, run simulation(s), return cost.
|
|||
|
|
|
|||
|
|
With disturbance_force_std > 0, the env applies random force/torque disturbances each step,
|
|||
|
|
so the same gains yield different costs across runs. For Bayesian optimization under noise,
|
|||
|
|
set n_objective_repeats > 1 to evaluate each candidate multiple times and return the mean cost,
|
|||
|
|
reducing variance so TPE can compare trials reliably.
|
|||
|
|
"""
|
|||
|
|
# All three loops tuned together (recommended: they interact)
|
|||
|
|
height_kp = trial.suggest_float("height_kp", 5.0, 200.0, log=True)
|
|||
|
|
height_ki = trial.suggest_float("height_ki", 0.5, 50.0, log=True)
|
|||
|
|
height_kd = trial.suggest_float("height_kd", 1.0, 50.0, log=True)
|
|||
|
|
roll_kp = trial.suggest_float("roll_kp", 0.1, 20.0, log=True)
|
|||
|
|
roll_ki = trial.suggest_float("roll_ki", 0.01, 5.0, log=True)
|
|||
|
|
roll_kd = trial.suggest_float("roll_kd", 0.01, 5.0, log=True)
|
|||
|
|
pitch_kp = trial.suggest_float("pitch_kp", 0.1, 20.0, log=True)
|
|||
|
|
pitch_ki = trial.suggest_float("pitch_ki", 0.01, 5.0, log=True)
|
|||
|
|
pitch_kd = trial.suggest_float("pitch_kd", 0.01, 5.0, log=True)
|
|||
|
|
|
|||
|
|
gains = {
|
|||
|
|
"height_kp": height_kp, "height_ki": height_ki, "height_kd": height_kd,
|
|||
|
|
"roll_kp": roll_kp, "roll_ki": roll_ki, "roll_kd": roll_kd,
|
|||
|
|
"pitch_kp": pitch_kp, "pitch_ki": pitch_ki, "pitch_kd": pitch_kd,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
cost_sum = 0.0
|
|||
|
|
n_evals = 0
|
|||
|
|
for _ in range(n_objective_repeats):
|
|||
|
|
for initial_gap in initial_gaps_mm:
|
|||
|
|
data = run_pid_simulation(
|
|||
|
|
initial_gap_mm=initial_gap,
|
|||
|
|
max_steps=max_steps,
|
|||
|
|
use_gui=False,
|
|||
|
|
disturbance_force_std=disturbance_force_std,
|
|||
|
|
use_feedforward=use_feedforward,
|
|||
|
|
record_video=False,
|
|||
|
|
record_telemetry=False,
|
|||
|
|
gains=gains,
|
|||
|
|
verbose=False,
|
|||
|
|
)
|
|||
|
|
n = len(data["time"])
|
|||
|
|
data["terminated_early"] = n < max_steps - 10
|
|||
|
|
cost_sum += _cost_from_data(data, TARGET_GAP_MM, penalty_early=penalty_early)
|
|||
|
|
n_evals += 1
|
|||
|
|
return cost_sum / n_evals
|
|||
|
|
|
|||
|
|
|
|||
|
|
def run_optimization(
|
|||
|
|
n_trials: int = DEFAULT_N_TRIALS,
|
|||
|
|
timeout: int = DEFAULT_TIMEOUT_S,
|
|||
|
|
initial_gaps_mm: list = None,
|
|||
|
|
max_steps: int = DEFAULT_MAX_STEPS,
|
|||
|
|
use_feedforward: bool = True,
|
|||
|
|
penalty_early: float = 500.0,
|
|||
|
|
disturbance_force_std: float = 0.0,
|
|||
|
|
n_objective_repeats: int = 1,
|
|||
|
|
out_dir: str = None,
|
|||
|
|
study_name: str = "pid_maglev",
|
|||
|
|
) -> optuna.Study:
|
|||
|
|
"""
|
|||
|
|
Run Optuna study (TPE sampler) and save best params to JSON.
|
|||
|
|
|
|||
|
|
disturbance_force_std: passed to env so each step gets random force/torque noise (N).
|
|||
|
|
n_objective_repeats: when > 1, each trial is evaluated this many times (different noise)
|
|||
|
|
and the mean cost is returned, so Bayesian optimization sees a less noisy objective.
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
study: Optuna Study (study.best_params, study.best_value).
|
|||
|
|
"""
|
|||
|
|
if initial_gaps_mm is None:
|
|||
|
|
initial_gaps_mm = DEFAULT_INITIAL_GAPS_MM
|
|||
|
|
if out_dir is None:
|
|||
|
|
out_dir = _SCRIPT_DIR
|
|||
|
|
|
|||
|
|
# Build feedforward LUT once so first trial doesn't do it inside run
|
|||
|
|
build_feedforward_lut()
|
|||
|
|
|
|||
|
|
sampler = TPESampler(
|
|||
|
|
n_startup_trials=min(20, n_trials // 4),
|
|||
|
|
n_ei_candidates=24,
|
|||
|
|
seed=42,
|
|||
|
|
)
|
|||
|
|
study = optuna.create_study(
|
|||
|
|
direction="minimize",
|
|||
|
|
study_name=study_name,
|
|||
|
|
sampler=sampler,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
study.optimize(
|
|||
|
|
lambda t: objective(
|
|||
|
|
t,
|
|||
|
|
initial_gaps_mm=initial_gaps_mm,
|
|||
|
|
max_steps=max_steps,
|
|||
|
|
use_feedforward=use_feedforward,
|
|||
|
|
penalty_early=penalty_early,
|
|||
|
|
disturbance_force_std=disturbance_force_std,
|
|||
|
|
n_objective_repeats=n_objective_repeats,
|
|||
|
|
),
|
|||
|
|
n_trials=n_trials,
|
|||
|
|
timeout=timeout,
|
|||
|
|
show_progress_bar=True,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
out_path = os.path.join(out_dir, "pid_best_params.json")
|
|||
|
|
with open(out_path, "w") as f:
|
|||
|
|
json.dump(study.best_params, f, indent=2)
|
|||
|
|
print(f"Best cost: {study.best_value:.4f}")
|
|||
|
|
print(f"Best params saved to {out_path}")
|
|||
|
|
return study
|
|||
|
|
|
|||
|
|
|
|||
|
|
def run_staged_optimization(
|
|||
|
|
stage_steps: list = None,
|
|||
|
|
n_trials_per_stage: int = 50,
|
|||
|
|
timeout_per_stage: int = None,
|
|||
|
|
initial_gaps_mm: list = None,
|
|||
|
|
use_feedforward: bool = True,
|
|||
|
|
penalty_early: float = 500.0,
|
|||
|
|
disturbance_force_std: float = 0.0,
|
|||
|
|
n_objective_repeats: int = 1,
|
|||
|
|
out_dir: str = None,
|
|||
|
|
) -> list:
|
|||
|
|
"""
|
|||
|
|
Run optimization in stages with increasing max_steps, warm-starting each stage from the previous best.
|
|||
|
|
|
|||
|
|
disturbance_force_std: passed to env for stochastic force/torque noise (N).
|
|||
|
|
n_objective_repeats: mean over this many evaluations per trial for a less noisy objective.
|
|||
|
|
|
|||
|
|
Example: stage_steps=[1500, 3000, 6000]
|
|||
|
|
- Stage 1: optimize with 1500 steps (finds good gap/initial roll), save best.
|
|||
|
|
- Stage 2: optimize with 3000 steps; first trial is the 1500-step best (evaluated at 3000 steps), then TPE suggests improvements.
|
|||
|
|
- Stage 3: same with 6000 steps starting from stage 2's best.
|
|||
|
|
|
|||
|
|
This keeps good lift-off and gap control from the short-horizon run while refining for late-horizon roll stability.
|
|||
|
|
"""
|
|||
|
|
if stage_steps is None:
|
|||
|
|
stage_steps = [1500, 3000, 6000]
|
|||
|
|
if initial_gaps_mm is None:
|
|||
|
|
initial_gaps_mm = DEFAULT_INITIAL_GAPS_MM
|
|||
|
|
if out_dir is None:
|
|||
|
|
out_dir = _SCRIPT_DIR
|
|||
|
|
if timeout_per_stage is None:
|
|||
|
|
timeout_per_stage = DEFAULT_TIMEOUT_S
|
|||
|
|
|
|||
|
|
build_feedforward_lut()
|
|||
|
|
best_params_path = os.path.join(out_dir, "pid_best_params.json")
|
|||
|
|
studies = []
|
|||
|
|
|
|||
|
|
for stage_idx, max_steps in enumerate(stage_steps):
|
|||
|
|
print(f"\n--- Stage {stage_idx + 1}/{len(stage_steps)}: max_steps={max_steps} ---")
|
|||
|
|
study = optuna.create_study(
|
|||
|
|
direction="minimize",
|
|||
|
|
study_name=f"pid_maglev_stage_{max_steps}",
|
|||
|
|
sampler=TPESampler(
|
|||
|
|
n_startup_trials=min(20, n_trials_per_stage // 4),
|
|||
|
|
n_ei_candidates=24,
|
|||
|
|
seed=42 + stage_idx,
|
|||
|
|
),
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Warm start: enqueue previous stage's best so we refine from it (stage 0 has no previous)
|
|||
|
|
if stage_idx > 0 and os.path.isfile(best_params_path):
|
|||
|
|
with open(best_params_path) as f:
|
|||
|
|
prev_best = json.load(f)
|
|||
|
|
study.enqueue_trial(prev_best)
|
|||
|
|
print(f"Enqueued previous best params (from stage {stage_steps[stage_idx - 1]} steps) as first trial.")
|
|||
|
|
|
|||
|
|
study.optimize(
|
|||
|
|
lambda t: objective(
|
|||
|
|
t,
|
|||
|
|
initial_gaps_mm=initial_gaps_mm,
|
|||
|
|
max_steps=max_steps,
|
|||
|
|
use_feedforward=use_feedforward,
|
|||
|
|
penalty_early=penalty_early,
|
|||
|
|
disturbance_force_std=disturbance_force_std,
|
|||
|
|
n_objective_repeats=n_objective_repeats,
|
|||
|
|
),
|
|||
|
|
n_trials=n_trials_per_stage,
|
|||
|
|
timeout=timeout_per_stage,
|
|||
|
|
show_progress_bar=True,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
with open(best_params_path, "w") as f:
|
|||
|
|
json.dump(study.best_params, f, indent=2)
|
|||
|
|
stage_path = os.path.join(out_dir, f"pid_best_params_{max_steps}.json")
|
|||
|
|
with open(stage_path, "w") as f:
|
|||
|
|
json.dump(study.best_params, f, indent=2)
|
|||
|
|
print(f"Stage best cost: {study.best_value:.4f} -> saved to {best_params_path} and {stage_path}")
|
|||
|
|
studies.append(study)
|
|||
|
|
|
|||
|
|
print(f"\nStaged optimization done. Final best params in {best_params_path}")
|
|||
|
|
return studies
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
import argparse
|
|||
|
|
parser = argparse.ArgumentParser(description="Optuna PID tuning for maglev three-loop controller")
|
|||
|
|
parser.add_argument("--n_trials", type=int, default=DEFAULT_N_TRIALS)
|
|||
|
|
parser.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT_S)
|
|||
|
|
parser.add_argument("--max_steps", type=int, default=DEFAULT_MAX_STEPS)
|
|||
|
|
parser.add_argument("--gaps", type=float, nargs="+", default=DEFAULT_INITIAL_GAPS_MM)
|
|||
|
|
parser.add_argument("--out_dir", type=str, default=_SCRIPT_DIR, help="Directory for pid_best_params.json (default: same as script)")
|
|||
|
|
parser.add_argument("--no_feedforward", action="store_true")
|
|||
|
|
parser.add_argument("--staged", action="store_true", help="Staged optimization: 1500 -> 3000 -> 6000 steps, each stage warm-starts from previous best")
|
|||
|
|
parser.add_argument("--stage_steps", type=int, nargs="+", default=[1500, 3000, 6000], help="Steps per stage when using --staged (default: 1500 3000 6000)")
|
|||
|
|
parser.add_argument("--n_trials_per_stage", type=int, default=DEFAULT_N_TRIALS, help="Trials per stage when using --staged")
|
|||
|
|
parser.add_argument("--disturbance_force_std", type=float, default=0.0, help="Env disturbance force std (N); roll/pitch torque scale with this. Use >0 for noisy optimization.")
|
|||
|
|
parser.add_argument("--n_objective_repeats", type=int, default=1, help="Evaluate each trial this many times and report mean cost (reduces noise for Bayesian optimization)")
|
|||
|
|
args = parser.parse_args()
|
|||
|
|
|
|||
|
|
if args.staged:
|
|||
|
|
run_staged_optimization(
|
|||
|
|
stage_steps=args.stage_steps,
|
|||
|
|
n_trials_per_stage=args.n_trials_per_stage,
|
|||
|
|
timeout_per_stage=args.timeout,
|
|||
|
|
initial_gaps_mm=args.gaps,
|
|||
|
|
use_feedforward=not args.no_feedforward,
|
|||
|
|
disturbance_force_std=args.disturbance_force_std,
|
|||
|
|
n_objective_repeats=args.n_objective_repeats,
|
|||
|
|
out_dir=args.out_dir,
|
|||
|
|
)
|
|||
|
|
else:
|
|||
|
|
run_optimization(
|
|||
|
|
n_trials=args.n_trials,
|
|||
|
|
timeout=args.timeout,
|
|||
|
|
initial_gaps_mm=args.gaps,
|
|||
|
|
max_steps=args.max_steps,
|
|||
|
|
use_feedforward=not args.no_feedforward,
|
|||
|
|
disturbance_force_std=args.disturbance_force_std,
|
|||
|
|
n_objective_repeats=args.n_objective_repeats,
|
|||
|
|
out_dir=args.out_dir,
|
|||
|
|
)
|