LockInBroMacOS/argus/loop.py

"""Main orchestrator loop — capture → analyze → notify → execute.

Three concurrent tasks:
  1. VLM loop: capture screenshots, analyze, push results
  2. Notification handler: show cards when friction detected, wait for response
  3. Input reader: read user's terminal input (1=accept, 0=dismiss)

Swift portability notes:
  - The VLM loop becomes a Timer or DispatchSourceTimer
  - The notification handler becomes SwiftUI state updates
  - The input reader becomes button tap handlers
  - The three tasks map to Swift's structured concurrency (async let / TaskGroup)
"""

from __future__ import annotations

import asyncio
import json
import logging
import sys
import time

from argus.backend import send_analysis
from argus.buffer import HistoryBuffer
from argus.capture import capture_screenshot
from argus.config import BUFFER_MAX_LEN, CAPTURE_INTERVAL_S, VLM_CALL_EVERY_N
from argus.executor import execute
from argus.notification import NotificationManager
from argus.session import SessionManager
from argus.vlm import TaskContext, VLMResult, analyze_screenshot

log = logging.getLogger(__name__)


# Track session actions already shown to avoid repeating
_handled_session_actions: set[str] = set()


def _session_action_key(sa) -> str:
    return f"{sa.type}:{sa.session_id}"


def _show_session_card(title: str, body: str, options: list[str]) -> None:
    """Display a session-related card (not friction). Swift: native notification."""
    print()
    print("┌" + "─" * 58 + "┐")
    print(f"│ {title:<56} │")
    print("│" + " " * 58 + "│")
    for line in body.split("\n"):
        print(f"│ {line:<56} │")
    print("│" + " " * 58 + "│")
    for i, opt in enumerate(options):
        print(f"│  [{i + 1}] {opt:<53} │")
    print(f"│  [0] Not now{' ' * 44} │")
    print("└" + "─" * 58 + "┘")
    print()


async def _wait_for_input() -> int:
    """Wait for a single integer input from stdin. Returns the number or 0."""
    loop = asyncio.get_event_loop()
    line = await loop.run_in_executor(None, sys.stdin.readline)
    try:
        return int(line.strip())
    except (ValueError, EOFError):
        return 0


async def _handle_session_action(sa, result, sessions: SessionManager, notifier: NotificationManager) -> None:
    """Handle VLM session_action output — show card and act on approval.

    Swift portability: becomes SwiftUI state changes + button handlers.
    """
    # Skip if we already showed this exact action
    key = _session_action_key(sa)
    if key in _handled_session_actions:
        return
    _handled_session_actions.add(key)

    if sa.type == "resume":
        session = next((s for s in sessions.sessions if s.session_id == sa.session_id), None)
        checkpoint = session.checkpoint_note if session else ""
        task_title = session.task_title if session else sa.reason

        _show_session_card(
            f"📂 Resume: {task_title}",
            f"You left off: {checkpoint}" if checkpoint else sa.reason,
            ["Resume session"],
        )
        choice = await _wait_for_input()
        if choice == 1:
            if not sessions._mock:
                resume_card = await sessions.get_resume_card(sa.session_id)
                if resume_card:
                    rc = resume_card.get("resume_card", {})
                    print(f"  💡 {rc.get('welcome_back', '')}")
                    print(f"     {rc.get('you_were_doing', '')}")
                    print(f"     {rc.get('next_step', '')}")
                    print(f"     {rc.get('motivation', '')}")
            else:
                print(f"  ✓ Resumed \"{task_title}\"")
                if checkpoint:
                    print(f"    Last checkpoint: {checkpoint}")
            if session:
                session.status = "active"
                sessions._active_session = session
        else:
            print("  dismissed.")

    elif sa.type == "start_new":
        task = result.inferred_task
        _show_session_card(
            "🆕 New focus session",
            f"You're working on: {task}",
            ["Start focus session"],
        )
        choice = await _wait_for_input()
        if choice == 1:
            if not sessions._mock:
                resp = await sessions.start_session(task)
                if resp:
                    print(f"  ✓ Session started: {resp.get('id', '?')}")
            else:
                print(f"  ✓ (mock) Session started for \"{task}\"")
        else:
            print("  dismissed.")

    elif sa.type == "complete":
        session = next((s for s in sessions.sessions if s.session_id == sa.session_id), None)
        task_title = session.task_title if session else "session"

        _show_session_card(
            f"✅ Complete: {task_title}",
            sa.reason,
            ["Complete session"],
        )
        choice = await _wait_for_input()
        if choice == 1 and sa.session_id:
            if not sessions._mock:
                ok = await sessions.end_session(sa.session_id)
                if ok:
                    print(f"  ✓ Session completed")
            else:
                print(f"  ✓ (mock) Session \"{task_title}\" completed")
            sessions._sessions = [s for s in sessions._sessions if s.session_id != sa.session_id]
            if sessions._active_session and sessions._active_session.session_id == sa.session_id:
                sessions._active_session = None
        else:
            print("  dismissed.")

    elif sa.type == "switch":
        session = next((s for s in sessions.sessions if s.session_id == sa.session_id), None)
        task_title = session.task_title if session else "another task"
        _show_session_card(
            f"🔄 Switch to: {task_title}",
            sa.reason,
            [f"Switch to \"{task_title}\""],
        )
        choice = await _wait_for_input()
        if choice == 1:
            print(f"  ✓ Switched to \"{task_title}\"")
        else:
            print("  dismissed.")


def _is_valid_session_id(session_id: str | None, sessions: SessionManager) -> bool:
    """Check if a session_id from VLM output actually exists in our sessions."""
    if not session_id:
        return False
    return any(s.session_id == session_id for s in sessions.sessions)


async def run_loop(
    ctx: TaskContext,
    *,
    api_key: str | None = None,
    vlm_backend: str | None = None,
    jwt: str | None = None,
    base_url: str | None = None,
    dry_run: bool = False,
    max_iterations: int | None = None,
    on_result: None | (callable) = None,
    auto_execute: bool = False,
    mock_sessions: list | None = None,
) -> None:
    """Run the Argus VLM loop with notification and execution support.

    Args:
        ctx: Task/session context.
        api_key: Gemini API key override.
        vlm_backend: "ollama" or "gemini".
        jwt: Backend JWT override.
        base_url: Backend base URL override.
        dry_run: If True, skip sending to backend.
        max_iterations: Stop after N iterations (None = forever).
        on_result: Optional callback(VLMResult) per analysis.
        auto_execute: If True, enable notification + executor flow.
    """
    history = HistoryBuffer(image_maxlen=BUFFER_MAX_LEN)
    notifier = NotificationManager()
    sessions = SessionManager(jwt=jwt, base_url=base_url)
    iteration = 0

    log.info(
        "Argus loop starting — interval=%ds, session=%s, task=%s, executor=%s",
        CAPTURE_INTERVAL_S,
        ctx.session_id,
        ctx.task_title,
        "on" if auto_execute else "off",
    )

    # Load sessions — mock or from backend
    use_mock = mock_sessions is not None
    if use_mock:
        sessions._sessions = mock_sessions
        sessions._active_session = next((s for s in mock_sessions if s.status == "active"), None)
        sessions._mock = True  # prevent refresh from overwriting
        log.info("Loaded %d mock sessions", len(mock_sessions))
    else:
        sessions._mock = False
        try:
            await sessions.fetch_open_sessions()
        except Exception:
            log.exception("Startup: failed to fetch sessions — continuing without session context")

    # Log attachment status so it's visible in the console
    if sessions.active:
        s = sessions.active
        log.info(
            "ATTACHED to session %s — task=%r  last_app=%s  checkpoint=%r",
            s.session_id[:8], s.task_title, s.last_app or "(unknown)", s.checkpoint_note or "(none)",
        )
    else:
        log.info("No active session found — running in monitoring-only mode (dry-run=%s)", dry_run)


    # Pending frames collected between VLM calls
    pending_frames: list[bytes] = []
    capture_count = 0

    try:
        while max_iterations is None or iteration < max_iterations:
            t0 = time.monotonic()

            try:
                # 0. Refresh sessions periodically
                await sessions.maybe_refresh()

                # 1. Capture screenshot every interval
                screenshot = capture_screenshot()
                pending_frames.append(screenshot)
                capture_count += 1
                log.debug("Captured frame %d (%d pending)", capture_count, len(pending_frames))

                # 2. Only call VLM every N captures
                if len(pending_frames) < VLM_CALL_EVERY_N:
                    elapsed = time.monotonic() - t0
                    sleep_for = max(0.0, CAPTURE_INTERVAL_S - elapsed)
                    if sleep_for > 0:
                        await asyncio.sleep(sleep_for)
                    continue

                # ── VLM call with all pending frames ──
                iteration += 1

                # Push all pending frames into buffer (buffer keeps last BUFFER_MAX_LEN)
                for frame in pending_frames:
                    history.push(frame, "")  # summaries filled after VLM call
                pending_frames.clear()

                t_vlm = time.monotonic()
                result: VLMResult = await analyze_screenshot(
                    screenshot, ctx, history,
                    api_key=api_key,
                    backend=vlm_backend,
                    session_context=sessions.format_for_prompt(),
                )
                t_vlm_done = time.monotonic()
                payload = result.to_backend_payload(ctx.session_id)

                log.info(
                    "[%d] vlm=%.2fs  frames=%d  friction=%s  summary=%s",
                    iteration,
                    t_vlm_done - t_vlm,
                    VLM_CALL_EVERY_N,
                    result.friction.type,
                    result.vlm_summary,
                )

                # Update last entry's summary now that we have it
                history.set_last_output(payload)

                # 4. Print / send to backend
                if dry_run:
                    print(json.dumps(payload, indent=2))
                else:
                    resp = await send_analysis(
                        result, ctx.session_id, jwt=jwt, base_url=base_url
                    )
                    log.debug("Backend response: %s", resp)

                if auto_execute:
                    sa = result.session_action

                    # Validate session_action — check that VLM output relates to the session.
                    # Match against filename, file stem (no extension), or task title.
                    if sa.type in ("resume", "switch") and sa.session_id:
                        session = next(
                            (s for s in sessions.sessions if s.session_id == sa.session_id), None
                        )
                        if session:
                            context = (result.vlm_summary + " " + result.inferred_task + " " + sa.reason).lower()
                            matches = []
                            if session.last_file:
                                matches.append(session.last_file.lower())
                                # Also check stem without extension (e.g. "receipt" from "receipt.pdf")
                                stem = session.last_file.rsplit(".", 1)[0].lower()
                                if stem:
                                    matches.append(stem)
                            if session.task_title:
                                # Check key words from task title
                                for word in session.task_title.lower().split():
                                    if len(word) > 3:  # skip short words
                                        matches.append(word)
                            if matches and not any(m in context for m in matches):
                                log.debug(
                                    "Suppressing session_action=%s — none of %s found in context",
                                    sa.type, matches,
                                )
                                sa.type = "none"

                    if not _is_valid_session_id(sa.session_id, sessions) and sa.type in ("resume", "switch"):
                        sa.type = "none"

                    # 5. Session actions take priority — but only if not already handled
                    session_handled = False
                    if sa.type != "none":
                        key = _session_action_key(sa)
                        if key not in _handled_session_actions:
                            await _handle_session_action(sa, result, sessions, notifier)
                            session_handled = True

                    # 6. Friction notification + executor
                    if not session_handled and notifier.should_notify(payload):
                        card = notifier.create_card(payload)
                        notifier.show_card_terminal(card)

                        choice = await _wait_for_input()
                        action_idx = choice - 1

                        if choice > 0:
                            print(f"\n⚡ Executing action {action_idx + 1}...")
                            summary = await execute(
                                payload, action_idx,
                                history=history,
                                current_screenshot=screenshot,
                                api_key=api_key,
                            )
                            # Emit a parseable JSON block so Swift can surface the result
                            # in the HUD and clear the executing spinner.
                            print(json.dumps({"exec_summary": summary or ""}, indent=2), flush=True)
                            if summary:
                                history.set_last_execution(summary)
                        else:
                            print("dismissed.")

                    # 7. Nudge — VLM decided a nudge is appropriate (only if nothing else fired)
                    elif not session_handled and result.gentle_nudge:
                        print(f"\n💛 {result.gentle_nudge}")

                    # 8. Fallback: suggest new session if VLM didn't
                    elif not session_handled and (
                        sa.type == "none"
                        and result.on_task
                        and not sessions.active
                        and sessions.should_suggest_new_session(result.inferred_task)
                    ):
                        task = result.inferred_task
                        print(f"\n🆕 You've been working on \"{task}\" — start a focus session?")
                        print(f"   [1] Start  [0] Not now")
                        card = notifier.create_card({
                            "friction": {
                                "type": "none",
                                "confidence": 1.0,
                                "description": f"New task detected: {task}",
                                "proposed_actions": [{"label": "Start focus session", "action_type": "other", "details": task}],
                            }
                        })
                        notifier.show_card_terminal(card)
                        accepted, _ = await notifier.wait_for_response(timeout=60.0)
                        if accepted:
                            resp = await sessions.start_session(task)
                            if resp:
                                print(f"  ✓ Session started: {resp.get('id', '?')}")

                # Clear execution context after 3 iterations
                if history.get_last_execution() and iteration % 3 == 0:
                    history.clear_last_execution()

                # 9. Callback
                if on_result:
                    on_result(result)

            except Exception:
                log.exception("Error in Argus loop iteration %d", iteration)

            # Sleep for remainder of capture interval
            elapsed = time.monotonic() - t0
            sleep_for = max(0.0, CAPTURE_INTERVAL_S - elapsed)
            if sleep_for > 0:
                await asyncio.sleep(sleep_for)
    finally:
        pass