include argus workflow

2026-03-29 06:29:18 -04:00
parent 275a53ab40
commit 56673078f5
23 changed files with 3098 additions and 307 deletions
--- a/argus/buffer.py
+++ b/argus/buffer.py
@@ -0,0 +1,123 @@
+"""Rolling history buffer for VLM screenshot analysis.
+
+Two tiers:
+  - Image buffer: deque(maxlen=4) of recent screenshots sent as images
+  - Text history: deque(maxlen=12) of older VLM summaries + previous outputs
+    for extended context (what happened 30-60s ago) and self-refinement
+"""
+
+from __future__ import annotations
+
+import time
+from collections import deque
+from dataclasses import dataclass, field
+
+
+@dataclass
+class BufferEntry:
+    jpeg: bytes
+    vlm_summary: str
+    timestamp: float = field(default_factory=time.time)
+
+
+@dataclass
+class TextEntry:
+    vlm_summary: str
+    timestamp: float
+
+
+class HistoryBuffer:
+    def __init__(self, image_maxlen: int = 4, text_maxlen: int = 12):
+        self._images: deque[BufferEntry] = deque(maxlen=image_maxlen)
+        self._text_history: deque[TextEntry] = deque(maxlen=text_maxlen)
+        self._last_output: dict | None = None
+        self._last_execution: str | None = None
+
+    def push(self, jpeg: bytes, vlm_summary: str) -> None:
+        now = time.time()
+        # When an image entry gets evicted from the image buffer,
+        # it's already captured in text_history, so nothing extra needed.
+        self._images.append(BufferEntry(jpeg=jpeg, vlm_summary=vlm_summary, timestamp=now))
+        self._text_history.append(TextEntry(vlm_summary=vlm_summary, timestamp=now))
+
+    def set_last_output(self, output: dict) -> None:
+        """Store the previous VLM JSON output for self-refinement."""
+        self._last_output = output
+
+    def set_last_execution(self, summary: str | None) -> None:
+        """Store the result of the last executor action."""
+        self._last_execution = summary
+
+    def get_last_execution(self) -> str | None:
+        return self._last_execution
+
+    def clear_last_execution(self) -> None:
+        self._last_execution = None
+
+    def get_entries(self) -> list[BufferEntry]:
+        """Return image entries oldest-first."""
+        return list(self._images)
+
+    def format_for_prompt(self) -> str:
+        """Format the full timeline: text history + image labels."""
+        now = time.time()
+        lines: list[str] = []
+
+        # Text-only history (entries older than what's in the image buffer)
+        image_timestamps = {round(e.timestamp, 2) for e in self._images}
+        older = [
+            e for e in self._text_history
+            if round(e.timestamp, 2) not in image_timestamps
+        ]
+        older = [e for e in older if e.vlm_summary]  # skip empty summaries
+        if older:
+            lines.append("Older context (text only, no images):")
+            for entry in older:
+                ago = int(now - entry.timestamp)
+                lines.append(f"  - [{ago}s ago] {entry.vlm_summary}")
+            lines.append("")
+
+        # Image timeline
+        n = len(self._images)
+        if n > 0:
+            lines.append(f"Recent screenshots ({n} prior + 1 current = {n + 1} images):")
+            for i, entry in enumerate(self._images):
+                ago = int(now - entry.timestamp)
+                lines.append(f"  - Screenshot {i + 1}/{n + 1}: [{ago}s ago]")
+            lines.append(f"  - Screenshot {n + 1}/{n + 1}: [now] (current)")
+        else:
+            lines.append("Screenshots:")
+            lines.append("  - Screenshot 1/1: [now] (current, first capture)")
+
+        return "\n".join(lines)
+
+    def format_last_output(self) -> str:
+        """Format previous VLM output for self-refinement context."""
+        if not self._last_output:
+            return ""
+
+        import json
+        # Only include the key fields, not the full blob
+        prev = self._last_output
+        parts = [
+            f"  on_task: {prev.get('on_task')}",
+            f"  app: {prev.get('app_name')}",
+            f"  friction: {prev.get('friction', {}).get('type')}",
+            f"  summary: {prev.get('vlm_summary')}",
+        ]
+        note = prev.get("checkpoint_note_update")
+        if note:
+            parts.append(f"  checkpoint: {note}")
+        desc = prev.get("friction", {}).get("description")
+        if desc:
+            parts.append(f"  friction_desc: {desc}")
+        return "\n".join(parts)
+
+    def __len__(self) -> int:
+        return len(self._images)
+
+    def clear(self) -> None:
+        self._images.clear()
+        self._text_history.clear()
+        self._last_output = None
+        self._last_execution = None