"""Main orchestrator loop — capture → analyze → notify → execute. Three concurrent tasks: 1. VLM loop: capture screenshots, analyze, push results 2. Notification handler: show cards when friction detected, wait for response 3. Input reader: read user's terminal input (1=accept, 0=dismiss) Swift portability notes: - The VLM loop becomes a Timer or DispatchSourceTimer - The notification handler becomes SwiftUI state updates - The input reader becomes button tap handlers - The three tasks map to Swift's structured concurrency (async let / TaskGroup) """ from __future__ import annotations import asyncio import json import logging import sys import time from argus.backend import send_analysis from argus.buffer import HistoryBuffer from argus.capture import capture_screenshot from argus.config import BUFFER_MAX_LEN, CAPTURE_INTERVAL_S, VLM_CALL_EVERY_N from argus.executor import execute from argus.notification import NotificationManager from argus.session import SessionManager from argus.vlm import TaskContext, VLMResult, analyze_screenshot log = logging.getLogger(__name__) # Track session actions already shown to avoid repeating _handled_session_actions: set[str] = set() def _session_action_key(sa) -> str: return f"{sa.type}:{sa.session_id}" def _show_session_card(title: str, body: str, options: list[str]) -> None: """Display a session-related card (not friction). Swift: native notification.""" print() print("┌" + "─" * 58 + "┐") print(f"│ {title:<56} │") print("│" + " " * 58 + "│") for line in body.split("\n"): print(f"│ {line:<56} │") print("│" + " " * 58 + "│") for i, opt in enumerate(options): print(f"│ [{i + 1}] {opt:<53} │") print(f"│ [0] Not now{' ' * 44} │") print("└" + "─" * 58 + "┘") print() async def _wait_for_input() -> int: """Wait for a single integer input from stdin. Returns the number or 0.""" loop = asyncio.get_event_loop() line = await loop.run_in_executor(None, sys.stdin.readline) try: return int(line.strip()) except (ValueError, EOFError): return 0 async def _handle_session_action(sa, result, sessions: SessionManager, notifier: NotificationManager) -> None: """Handle VLM session_action output — show card and act on approval. Swift portability: becomes SwiftUI state changes + button handlers. """ # Skip if we already showed this exact action key = _session_action_key(sa) if key in _handled_session_actions: return _handled_session_actions.add(key) if sa.type == "resume": session = next((s for s in sessions.sessions if s.session_id == sa.session_id), None) checkpoint = session.checkpoint_note if session else "" task_title = session.task_title if session else sa.reason _show_session_card( f"📂 Resume: {task_title}", f"You left off: {checkpoint}" if checkpoint else sa.reason, ["Resume session"], ) choice = await _wait_for_input() if choice == 1: if not sessions._mock: resume_card = await sessions.get_resume_card(sa.session_id) if resume_card: rc = resume_card.get("resume_card", {}) print(f" 💡 {rc.get('welcome_back', '')}") print(f" {rc.get('you_were_doing', '')}") print(f" {rc.get('next_step', '')}") print(f" {rc.get('motivation', '')}") else: print(f" ✓ Resumed \"{task_title}\"") if checkpoint: print(f" Last checkpoint: {checkpoint}") if session: session.status = "active" sessions._active_session = session else: print(" dismissed.") elif sa.type == "start_new": task = result.inferred_task _show_session_card( "🆕 New focus session", f"You're working on: {task}", ["Start focus session"], ) choice = await _wait_for_input() if choice == 1: if not sessions._mock: resp = await sessions.start_session(task) if resp: print(f" ✓ Session started: {resp.get('id', '?')}") else: print(f" ✓ (mock) Session started for \"{task}\"") else: print(" dismissed.") elif sa.type == "complete": session = next((s for s in sessions.sessions if s.session_id == sa.session_id), None) task_title = session.task_title if session else "session" _show_session_card( f"✅ Complete: {task_title}", sa.reason, ["Complete session"], ) choice = await _wait_for_input() if choice == 1 and sa.session_id: if not sessions._mock: ok = await sessions.end_session(sa.session_id) if ok: print(f" ✓ Session completed") else: print(f" ✓ (mock) Session \"{task_title}\" completed") sessions._sessions = [s for s in sessions._sessions if s.session_id != sa.session_id] if sessions._active_session and sessions._active_session.session_id == sa.session_id: sessions._active_session = None else: print(" dismissed.") elif sa.type == "switch": session = next((s for s in sessions.sessions if s.session_id == sa.session_id), None) task_title = session.task_title if session else "another task" _show_session_card( f"🔄 Switch to: {task_title}", sa.reason, [f"Switch to \"{task_title}\""], ) choice = await _wait_for_input() if choice == 1: print(f" ✓ Switched to \"{task_title}\"") else: print(" dismissed.") def _is_valid_session_id(session_id: str | None, sessions: SessionManager) -> bool: """Check if a session_id from VLM output actually exists in our sessions.""" if not session_id: return False return any(s.session_id == session_id for s in sessions.sessions) async def run_loop( ctx: TaskContext, *, api_key: str | None = None, vlm_backend: str | None = None, jwt: str | None = None, base_url: str | None = None, dry_run: bool = False, max_iterations: int | None = None, on_result: None | (callable) = None, auto_execute: bool = False, mock_sessions: list | None = None, ) -> None: """Run the Argus VLM loop with notification and execution support. Args: ctx: Task/session context. api_key: Gemini API key override. vlm_backend: "ollama" or "gemini". jwt: Backend JWT override. base_url: Backend base URL override. dry_run: If True, skip sending to backend. max_iterations: Stop after N iterations (None = forever). on_result: Optional callback(VLMResult) per analysis. auto_execute: If True, enable notification + executor flow. """ history = HistoryBuffer(image_maxlen=BUFFER_MAX_LEN) notifier = NotificationManager() sessions = SessionManager(jwt=jwt, base_url=base_url) iteration = 0 log.info( "Argus loop starting — interval=%ds, session=%s, task=%s, executor=%s", CAPTURE_INTERVAL_S, ctx.session_id, ctx.task_title, "on" if auto_execute else "off", ) # Load sessions — mock or from backend use_mock = mock_sessions is not None if use_mock: sessions._sessions = mock_sessions sessions._active_session = next((s for s in mock_sessions if s.status == "active"), None) sessions._mock = True # prevent refresh from overwriting log.info("Loaded %d mock sessions", len(mock_sessions)) else: sessions._mock = False try: await sessions.fetch_open_sessions() except Exception: log.exception("Startup: failed to fetch sessions — continuing without session context") # Log attachment status so it's visible in the console if sessions.active: s = sessions.active log.info( "ATTACHED to session %s — task=%r last_app=%s checkpoint=%r", s.session_id[:8], s.task_title, s.last_app or "(unknown)", s.checkpoint_note or "(none)", ) else: log.info("No active session found — running in monitoring-only mode (dry-run=%s)", dry_run) # Pending frames collected between VLM calls pending_frames: list[bytes] = [] capture_count = 0 try: while max_iterations is None or iteration < max_iterations: t0 = time.monotonic() try: # 0. Refresh sessions periodically await sessions.maybe_refresh() # 1. Capture screenshot every interval screenshot = capture_screenshot() pending_frames.append(screenshot) capture_count += 1 log.debug("Captured frame %d (%d pending)", capture_count, len(pending_frames)) # 2. Only call VLM every N captures if len(pending_frames) < VLM_CALL_EVERY_N: elapsed = time.monotonic() - t0 sleep_for = max(0.0, CAPTURE_INTERVAL_S - elapsed) if sleep_for > 0: await asyncio.sleep(sleep_for) continue # ── VLM call with all pending frames ── iteration += 1 # Push all pending frames into buffer (buffer keeps last BUFFER_MAX_LEN) for frame in pending_frames: history.push(frame, "") # summaries filled after VLM call pending_frames.clear() t_vlm = time.monotonic() result: VLMResult = await analyze_screenshot( screenshot, ctx, history, api_key=api_key, backend=vlm_backend, session_context=sessions.format_for_prompt(), ) t_vlm_done = time.monotonic() payload = result.to_backend_payload(ctx.session_id) log.info( "[%d] vlm=%.2fs frames=%d friction=%s summary=%s", iteration, t_vlm_done - t_vlm, VLM_CALL_EVERY_N, result.friction.type, result.vlm_summary, ) # Update last entry's summary now that we have it history.set_last_output(payload) # 4. Print / send to backend if dry_run: print(json.dumps(payload, indent=2)) else: resp = await send_analysis( result, ctx.session_id, jwt=jwt, base_url=base_url ) log.debug("Backend response: %s", resp) if auto_execute: sa = result.session_action # Validate session_action — check that VLM output relates to the session. # Match against filename, file stem (no extension), or task title. if sa.type in ("resume", "switch") and sa.session_id: session = next( (s for s in sessions.sessions if s.session_id == sa.session_id), None ) if session: context = (result.vlm_summary + " " + result.inferred_task + " " + sa.reason).lower() matches = [] if session.last_file: matches.append(session.last_file.lower()) # Also check stem without extension (e.g. "receipt" from "receipt.pdf") stem = session.last_file.rsplit(".", 1)[0].lower() if stem: matches.append(stem) if session.task_title: # Check key words from task title for word in session.task_title.lower().split(): if len(word) > 3: # skip short words matches.append(word) if matches and not any(m in context for m in matches): log.debug( "Suppressing session_action=%s — none of %s found in context", sa.type, matches, ) sa.type = "none" if not _is_valid_session_id(sa.session_id, sessions) and sa.type in ("resume", "switch"): sa.type = "none" # 5. Session actions take priority — but only if not already handled session_handled = False if sa.type != "none": key = _session_action_key(sa) if key not in _handled_session_actions: await _handle_session_action(sa, result, sessions, notifier) session_handled = True # 6. Friction notification + executor if not session_handled and notifier.should_notify(payload): card = notifier.create_card(payload) notifier.show_card_terminal(card) choice = await _wait_for_input() action_idx = choice - 1 if choice > 0: print(f"\n⚡ Executing action {action_idx + 1}...") summary = await execute( payload, action_idx, history=history, current_screenshot=screenshot, api_key=api_key, ) # Emit a parseable JSON block so Swift can surface the result # in the HUD and clear the executing spinner. print(json.dumps({"exec_summary": summary or ""}, indent=2), flush=True) if summary: history.set_last_execution(summary) else: print("dismissed.") # 7. Nudge — VLM decided a nudge is appropriate (only if nothing else fired) elif not session_handled and result.gentle_nudge: print(f"\n💛 {result.gentle_nudge}") # 8. Fallback: suggest new session if VLM didn't elif not session_handled and ( sa.type == "none" and result.on_task and not sessions.active and sessions.should_suggest_new_session(result.inferred_task) ): task = result.inferred_task print(f"\n🆕 You've been working on \"{task}\" — start a focus session?") print(f" [1] Start [0] Not now") card = notifier.create_card({ "friction": { "type": "none", "confidence": 1.0, "description": f"New task detected: {task}", "proposed_actions": [{"label": "Start focus session", "action_type": "other", "details": task}], } }) notifier.show_card_terminal(card) accepted, _ = await notifier.wait_for_response(timeout=60.0) if accepted: resp = await sessions.start_session(task) if resp: print(f" ✓ Session started: {resp.get('id', '?')}") # Clear execution context after 3 iterations if history.get_last_execution() and iteration % 3 == 0: history.clear_last_execution() # 9. Callback if on_result: on_result(result) except Exception: log.exception("Error in Argus loop iteration %d", iteration) # Sleep for remainder of capture interval elapsed = time.monotonic() - t0 sleep_for = max(0.0, CAPTURE_INTERVAL_S - elapsed) if sleep_for > 0: await asyncio.sleep(sleep_for) finally: pass