416 lines
17 KiB
Python
416 lines
17 KiB
Python
"""Main orchestrator loop — capture → analyze → notify → execute.
|
|
|
|
Three concurrent tasks:
|
|
1. VLM loop: capture screenshots, analyze, push results
|
|
2. Notification handler: show cards when friction detected, wait for response
|
|
3. Input reader: read user's terminal input (1=accept, 0=dismiss)
|
|
|
|
Swift portability notes:
|
|
- The VLM loop becomes a Timer or DispatchSourceTimer
|
|
- The notification handler becomes SwiftUI state updates
|
|
- The input reader becomes button tap handlers
|
|
- The three tasks map to Swift's structured concurrency (async let / TaskGroup)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import sys
|
|
import time
|
|
|
|
from argus.backend import send_analysis
|
|
from argus.buffer import HistoryBuffer
|
|
from argus.capture import capture_screenshot
|
|
from argus.config import BUFFER_MAX_LEN, CAPTURE_INTERVAL_S, VLM_CALL_EVERY_N
|
|
from argus.executor import execute
|
|
from argus.notification import NotificationManager
|
|
from argus.session import SessionManager
|
|
from argus.vlm import TaskContext, VLMResult, analyze_screenshot
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
# Track session actions already shown to avoid repeating
|
|
_handled_session_actions: set[str] = set()
|
|
|
|
|
|
def _session_action_key(sa) -> str:
|
|
return f"{sa.type}:{sa.session_id}"
|
|
|
|
|
|
def _show_session_card(title: str, body: str, options: list[str]) -> None:
|
|
"""Display a session-related card (not friction). Swift: native notification."""
|
|
print()
|
|
print("┌" + "─" * 58 + "┐")
|
|
print(f"│ {title:<56} │")
|
|
print("│" + " " * 58 + "│")
|
|
for line in body.split("\n"):
|
|
print(f"│ {line:<56} │")
|
|
print("│" + " " * 58 + "│")
|
|
for i, opt in enumerate(options):
|
|
print(f"│ [{i + 1}] {opt:<53} │")
|
|
print(f"│ [0] Not now{' ' * 44} │")
|
|
print("└" + "─" * 58 + "┘")
|
|
print()
|
|
|
|
|
|
async def _wait_for_input() -> int:
|
|
"""Wait for a single integer input from stdin. Returns the number or 0."""
|
|
loop = asyncio.get_event_loop()
|
|
line = await loop.run_in_executor(None, sys.stdin.readline)
|
|
try:
|
|
return int(line.strip())
|
|
except (ValueError, EOFError):
|
|
return 0
|
|
|
|
|
|
async def _handle_session_action(sa, result, sessions: SessionManager, notifier: NotificationManager) -> None:
|
|
"""Handle VLM session_action output — show card and act on approval.
|
|
|
|
Swift portability: becomes SwiftUI state changes + button handlers.
|
|
"""
|
|
# Skip if we already showed this exact action
|
|
key = _session_action_key(sa)
|
|
if key in _handled_session_actions:
|
|
return
|
|
_handled_session_actions.add(key)
|
|
|
|
if sa.type == "resume":
|
|
session = next((s for s in sessions.sessions if s.session_id == sa.session_id), None)
|
|
checkpoint = session.checkpoint_note if session else ""
|
|
task_title = session.task_title if session else sa.reason
|
|
|
|
_show_session_card(
|
|
f"📂 Resume: {task_title}",
|
|
f"You left off: {checkpoint}" if checkpoint else sa.reason,
|
|
["Resume session"],
|
|
)
|
|
choice = await _wait_for_input()
|
|
if choice == 1:
|
|
if not sessions._mock:
|
|
resume_card = await sessions.get_resume_card(sa.session_id)
|
|
if resume_card:
|
|
rc = resume_card.get("resume_card", {})
|
|
print(f" 💡 {rc.get('welcome_back', '')}")
|
|
print(f" {rc.get('you_were_doing', '')}")
|
|
print(f" {rc.get('next_step', '')}")
|
|
print(f" {rc.get('motivation', '')}")
|
|
else:
|
|
print(f" ✓ Resumed \"{task_title}\"")
|
|
if checkpoint:
|
|
print(f" Last checkpoint: {checkpoint}")
|
|
if session:
|
|
session.status = "active"
|
|
sessions._active_session = session
|
|
else:
|
|
print(" dismissed.")
|
|
|
|
elif sa.type == "start_new":
|
|
task = result.inferred_task
|
|
_show_session_card(
|
|
"🆕 New focus session",
|
|
f"You're working on: {task}",
|
|
["Start focus session"],
|
|
)
|
|
choice = await _wait_for_input()
|
|
if choice == 1:
|
|
if not sessions._mock:
|
|
resp = await sessions.start_session(task)
|
|
if resp:
|
|
print(f" ✓ Session started: {resp.get('id', '?')}")
|
|
else:
|
|
print(f" ✓ (mock) Session started for \"{task}\"")
|
|
else:
|
|
print(" dismissed.")
|
|
|
|
elif sa.type == "complete":
|
|
session = next((s for s in sessions.sessions if s.session_id == sa.session_id), None)
|
|
task_title = session.task_title if session else "session"
|
|
|
|
_show_session_card(
|
|
f"✅ Complete: {task_title}",
|
|
sa.reason,
|
|
["Complete session"],
|
|
)
|
|
choice = await _wait_for_input()
|
|
if choice == 1 and sa.session_id:
|
|
if not sessions._mock:
|
|
ok = await sessions.end_session(sa.session_id)
|
|
if ok:
|
|
print(f" ✓ Session completed")
|
|
else:
|
|
print(f" ✓ (mock) Session \"{task_title}\" completed")
|
|
sessions._sessions = [s for s in sessions._sessions if s.session_id != sa.session_id]
|
|
if sessions._active_session and sessions._active_session.session_id == sa.session_id:
|
|
sessions._active_session = None
|
|
else:
|
|
print(" dismissed.")
|
|
|
|
elif sa.type == "switch":
|
|
session = next((s for s in sessions.sessions if s.session_id == sa.session_id), None)
|
|
task_title = session.task_title if session else "another task"
|
|
_show_session_card(
|
|
f"🔄 Switch to: {task_title}",
|
|
sa.reason,
|
|
[f"Switch to \"{task_title}\""],
|
|
)
|
|
choice = await _wait_for_input()
|
|
if choice == 1:
|
|
print(f" ✓ Switched to \"{task_title}\"")
|
|
else:
|
|
print(" dismissed.")
|
|
|
|
|
|
def _is_valid_session_id(session_id: str | None, sessions: SessionManager) -> bool:
|
|
"""Check if a session_id from VLM output actually exists in our sessions."""
|
|
if not session_id:
|
|
return False
|
|
return any(s.session_id == session_id for s in sessions.sessions)
|
|
|
|
|
|
|
|
|
|
async def run_loop(
|
|
ctx: TaskContext,
|
|
*,
|
|
api_key: str | None = None,
|
|
vlm_backend: str | None = None,
|
|
jwt: str | None = None,
|
|
base_url: str | None = None,
|
|
dry_run: bool = False,
|
|
max_iterations: int | None = None,
|
|
on_result: None | (callable) = None,
|
|
auto_execute: bool = False,
|
|
mock_sessions: list | None = None,
|
|
) -> None:
|
|
"""Run the Argus VLM loop with notification and execution support.
|
|
|
|
Args:
|
|
ctx: Task/session context.
|
|
api_key: Gemini API key override.
|
|
vlm_backend: "ollama" or "gemini".
|
|
jwt: Backend JWT override.
|
|
base_url: Backend base URL override.
|
|
dry_run: If True, skip sending to backend.
|
|
max_iterations: Stop after N iterations (None = forever).
|
|
on_result: Optional callback(VLMResult) per analysis.
|
|
auto_execute: If True, enable notification + executor flow.
|
|
"""
|
|
history = HistoryBuffer(image_maxlen=BUFFER_MAX_LEN)
|
|
notifier = NotificationManager()
|
|
sessions = SessionManager(jwt=jwt, base_url=base_url)
|
|
iteration = 0
|
|
|
|
log.info(
|
|
"Argus loop starting — interval=%ds, session=%s, task=%s, executor=%s",
|
|
CAPTURE_INTERVAL_S,
|
|
ctx.session_id,
|
|
ctx.task_title,
|
|
"on" if auto_execute else "off",
|
|
)
|
|
|
|
# Load sessions — mock or from backend
|
|
use_mock = mock_sessions is not None
|
|
if use_mock:
|
|
sessions._sessions = mock_sessions
|
|
sessions._active_session = next((s for s in mock_sessions if s.status == "active"), None)
|
|
sessions._mock = True # prevent refresh from overwriting
|
|
log.info("Loaded %d mock sessions", len(mock_sessions))
|
|
else:
|
|
sessions._mock = False
|
|
try:
|
|
await sessions.fetch_open_sessions()
|
|
except Exception:
|
|
log.exception("Startup: failed to fetch sessions — continuing without session context")
|
|
|
|
# Log attachment status so it's visible in the console
|
|
if sessions.active:
|
|
s = sessions.active
|
|
log.info(
|
|
"ATTACHED to session %s — task=%r last_app=%s checkpoint=%r",
|
|
s.session_id[:8], s.task_title, s.last_app or "(unknown)", s.checkpoint_note or "(none)",
|
|
)
|
|
else:
|
|
log.info("No active session found — running in monitoring-only mode (dry-run=%s)", dry_run)
|
|
|
|
|
|
# Pending frames collected between VLM calls
|
|
pending_frames: list[bytes] = []
|
|
capture_count = 0
|
|
|
|
try:
|
|
while max_iterations is None or iteration < max_iterations:
|
|
t0 = time.monotonic()
|
|
|
|
try:
|
|
# 0. Refresh sessions periodically
|
|
await sessions.maybe_refresh()
|
|
|
|
# 1. Capture screenshot every interval
|
|
screenshot = capture_screenshot()
|
|
pending_frames.append(screenshot)
|
|
capture_count += 1
|
|
log.debug("Captured frame %d (%d pending)", capture_count, len(pending_frames))
|
|
|
|
# 2. Only call VLM every N captures
|
|
if len(pending_frames) < VLM_CALL_EVERY_N:
|
|
elapsed = time.monotonic() - t0
|
|
sleep_for = max(0.0, CAPTURE_INTERVAL_S - elapsed)
|
|
if sleep_for > 0:
|
|
await asyncio.sleep(sleep_for)
|
|
continue
|
|
|
|
# ── VLM call with all pending frames ──
|
|
iteration += 1
|
|
|
|
# Push all pending frames into buffer (buffer keeps last BUFFER_MAX_LEN)
|
|
for frame in pending_frames:
|
|
history.push(frame, "") # summaries filled after VLM call
|
|
pending_frames.clear()
|
|
|
|
t_vlm = time.monotonic()
|
|
result: VLMResult = await analyze_screenshot(
|
|
screenshot, ctx, history,
|
|
api_key=api_key,
|
|
backend=vlm_backend,
|
|
session_context=sessions.format_for_prompt(),
|
|
)
|
|
t_vlm_done = time.monotonic()
|
|
payload = result.to_backend_payload(ctx.session_id)
|
|
|
|
log.info(
|
|
"[%d] vlm=%.2fs frames=%d friction=%s summary=%s",
|
|
iteration,
|
|
t_vlm_done - t_vlm,
|
|
VLM_CALL_EVERY_N,
|
|
result.friction.type,
|
|
result.vlm_summary,
|
|
)
|
|
|
|
# Update last entry's summary now that we have it
|
|
history.set_last_output(payload)
|
|
|
|
# 4. Print / send to backend
|
|
if dry_run:
|
|
print(json.dumps(payload, indent=2))
|
|
else:
|
|
resp = await send_analysis(
|
|
result, ctx.session_id, jwt=jwt, base_url=base_url
|
|
)
|
|
log.debug("Backend response: %s", resp)
|
|
|
|
if auto_execute:
|
|
sa = result.session_action
|
|
|
|
# Validate session_action — check that VLM output relates to the session.
|
|
# Match against filename, file stem (no extension), or task title.
|
|
if sa.type in ("resume", "switch") and sa.session_id:
|
|
session = next(
|
|
(s for s in sessions.sessions if s.session_id == sa.session_id), None
|
|
)
|
|
if session:
|
|
context = (result.vlm_summary + " " + result.inferred_task + " " + sa.reason).lower()
|
|
matches = []
|
|
if session.last_file:
|
|
matches.append(session.last_file.lower())
|
|
# Also check stem without extension (e.g. "receipt" from "receipt.pdf")
|
|
stem = session.last_file.rsplit(".", 1)[0].lower()
|
|
if stem:
|
|
matches.append(stem)
|
|
if session.task_title:
|
|
# Check key words from task title
|
|
for word in session.task_title.lower().split():
|
|
if len(word) > 3: # skip short words
|
|
matches.append(word)
|
|
if matches and not any(m in context for m in matches):
|
|
log.debug(
|
|
"Suppressing session_action=%s — none of %s found in context",
|
|
sa.type, matches,
|
|
)
|
|
sa.type = "none"
|
|
|
|
if not _is_valid_session_id(sa.session_id, sessions) and sa.type in ("resume", "switch"):
|
|
sa.type = "none"
|
|
|
|
# 5. Session actions take priority — but only if not already handled
|
|
session_handled = False
|
|
if sa.type != "none":
|
|
key = _session_action_key(sa)
|
|
if key not in _handled_session_actions:
|
|
await _handle_session_action(sa, result, sessions, notifier)
|
|
session_handled = True
|
|
|
|
# 6. Friction notification + executor
|
|
if not session_handled and notifier.should_notify(payload):
|
|
card = notifier.create_card(payload)
|
|
notifier.show_card_terminal(card)
|
|
|
|
choice = await _wait_for_input()
|
|
action_idx = choice - 1
|
|
|
|
if choice > 0:
|
|
print(f"\n⚡ Executing action {action_idx + 1}...")
|
|
summary = await execute(
|
|
payload, action_idx,
|
|
history=history,
|
|
current_screenshot=screenshot,
|
|
api_key=api_key,
|
|
)
|
|
# Emit a parseable JSON block so Swift can surface the result
|
|
# in the HUD and clear the executing spinner.
|
|
print(json.dumps({"exec_summary": summary or ""}, indent=2), flush=True)
|
|
if summary:
|
|
history.set_last_execution(summary)
|
|
else:
|
|
print("dismissed.")
|
|
|
|
# 7. Nudge — VLM decided a nudge is appropriate (only if nothing else fired)
|
|
elif not session_handled and result.gentle_nudge:
|
|
print(f"\n💛 {result.gentle_nudge}")
|
|
|
|
# 8. Fallback: suggest new session if VLM didn't
|
|
elif not session_handled and (
|
|
sa.type == "none"
|
|
and result.on_task
|
|
and not sessions.active
|
|
and sessions.should_suggest_new_session(result.inferred_task)
|
|
):
|
|
task = result.inferred_task
|
|
print(f"\n🆕 You've been working on \"{task}\" — start a focus session?")
|
|
print(f" [1] Start [0] Not now")
|
|
card = notifier.create_card({
|
|
"friction": {
|
|
"type": "none",
|
|
"confidence": 1.0,
|
|
"description": f"New task detected: {task}",
|
|
"proposed_actions": [{"label": "Start focus session", "action_type": "other", "details": task}],
|
|
}
|
|
})
|
|
notifier.show_card_terminal(card)
|
|
accepted, _ = await notifier.wait_for_response(timeout=60.0)
|
|
if accepted:
|
|
resp = await sessions.start_session(task)
|
|
if resp:
|
|
print(f" ✓ Session started: {resp.get('id', '?')}")
|
|
|
|
# Clear execution context after 3 iterations
|
|
if history.get_last_execution() and iteration % 3 == 0:
|
|
history.clear_last_execution()
|
|
|
|
# 9. Callback
|
|
if on_result:
|
|
on_result(result)
|
|
|
|
except Exception:
|
|
log.exception("Error in Argus loop iteration %d", iteration)
|
|
|
|
# Sleep for remainder of capture interval
|
|
elapsed = time.monotonic() - t0
|
|
sleep_for = max(0.0, CAPTURE_INTERVAL_S - elapsed)
|
|
if sleep_for > 0:
|
|
await asyncio.sleep(sleep_for)
|
|
finally:
|
|
pass
|