571 lines
22 KiB
Swift
571 lines
22 KiB
Swift
// SessionManager.swift — Focus session state, screenshot engine, distraction detection
|
|
|
|
import AppKit
|
|
import SwiftUI
|
|
import UserNotifications
|
|
import ScreenCaptureKit
|
|
|
|
@Observable
|
|
@MainActor
|
|
final class SessionManager {
|
|
static let shared = SessionManager()
|
|
|
|
// MARK: - State
|
|
|
|
var activeSession: FocusSession?
|
|
var activeTask: AppTask?
|
|
var activeSteps: [Step] = []
|
|
var currentStepIndex: Int = 0
|
|
var isSessionActive: Bool = false
|
|
var sessionStartDate: Date?
|
|
var distractionCount: Int = 0
|
|
var lastNudge: String?
|
|
var resumeCard: ResumeCard?
|
|
var showingResumeCard: Bool = false
|
|
var errorMessage: String?
|
|
var isLoading: Bool = false
|
|
|
|
// Proactive agent
|
|
var proactiveCard: ProactiveCard?
|
|
/// Set when the user approves a proposed action — shown as a confirmation toast
|
|
var approvedActionLabel: String?
|
|
|
|
// Screenshot engine
|
|
var isCapturing: Bool = false
|
|
/// Live pipeline status shown in FocusSessionView (updated at each stage)
|
|
var argusStatus: String = ""
|
|
|
|
private var captureTask: Task<Void, Never>?
|
|
private let captureInterval: TimeInterval = 5.0
|
|
|
|
// Rolling screenshot history buffer (max 4 entries, ~20-second window)
|
|
// Provides temporal context to the VLM so it can detect patterns across captures.
|
|
private struct ScreenshotHistoryEntry {
|
|
let summary: String // vlm_summary text from the previous analysis
|
|
let timestamp: Date
|
|
}
|
|
@ObservationIgnored private var screenshotHistory: [ScreenshotHistoryEntry] = []
|
|
|
|
// App switch tracking
|
|
@ObservationIgnored private var appSwitches: [(name: String, bundleId: String, time: Date)] = []
|
|
@ObservationIgnored private var appSwitchObserver: (any NSObjectProtocol)?
|
|
@ObservationIgnored private var lastApp: (name: String, bundleId: String) = ("", "")
|
|
@ObservationIgnored private var lastAppEnteredAt: Date = Date()
|
|
|
|
// Argus subprocess (device-side VLM)
|
|
@ObservationIgnored private var argusProcess: Process?
|
|
@ObservationIgnored private var argusReadTask: Task<Void, Never>?
|
|
private let argusPythonPath = "/Users/joyzhuo/miniconda3/envs/gmr/bin/python3"
|
|
private let argusRepoPath = "/Users/joyzhuo/yhack/lockinbro-argus"
|
|
|
|
private init() {}
|
|
|
|
// MARK: - Computed
|
|
|
|
var currentStep: Step? {
|
|
guard currentStepIndex < activeSteps.count else { return nil }
|
|
return activeSteps[currentStepIndex]
|
|
}
|
|
|
|
var completedCount: Int { activeSteps.filter(\.isDone).count }
|
|
var totalSteps: Int { activeSteps.count }
|
|
|
|
var sessionElapsed: TimeInterval {
|
|
guard let start = sessionStartDate else { return 0 }
|
|
return Date().timeIntervalSince(start)
|
|
}
|
|
|
|
// MARK: - Session Lifecycle
|
|
|
|
// Persisted so we can end a stale session after an app restart
|
|
private var persistedSessionId: String? {
|
|
get { UserDefaults.standard.string(forKey: "lockInBro.lastSessionId") }
|
|
set {
|
|
if let v = newValue { UserDefaults.standard.set(v, forKey: "lockInBro.lastSessionId") }
|
|
else { UserDefaults.standard.removeObject(forKey: "lockInBro.lastSessionId") }
|
|
}
|
|
}
|
|
|
|
func startSession(task: AppTask?) async {
|
|
isLoading = true
|
|
errorMessage = nil
|
|
do {
|
|
let session: FocusSession
|
|
do {
|
|
session = try await APIClient.shared.startSession(taskId: task?.id)
|
|
} catch NetworkError.httpError(409, _) {
|
|
// End whichever session is active — prefer the locally known one,
|
|
// fall back to the last persisted ID (survives app restarts)
|
|
let staleId = activeSession?.id ?? persistedSessionId
|
|
if let id = staleId {
|
|
_ = try? await APIClient.shared.endSession(sessionId: id, status: "completed")
|
|
}
|
|
session = try await APIClient.shared.startSession(taskId: task?.id)
|
|
}
|
|
activeSession = session
|
|
persistedSessionId = session.id
|
|
activeTask = task
|
|
activeSteps = []
|
|
currentStepIndex = 0
|
|
isSessionActive = true
|
|
sessionStartDate = Date()
|
|
distractionCount = 0
|
|
lastNudge = nil
|
|
|
|
if let task {
|
|
let steps = try await APIClient.shared.getSteps(taskId: task.id)
|
|
activeSteps = steps.sorted { $0.sortOrder < $1.sortOrder }
|
|
// Pick first in-progress or first pending step
|
|
currentStepIndex = activeSteps.firstIndex(where: { $0.isActive })
|
|
?? activeSteps.firstIndex(where: { $0.status == "pending" })
|
|
?? 0
|
|
}
|
|
|
|
screenshotHistory = []
|
|
await requestNotificationPermission()
|
|
startArgus(session: session, task: task)
|
|
startAppObserver()
|
|
} catch {
|
|
errorMessage = error.localizedDescription
|
|
}
|
|
isLoading = false
|
|
}
|
|
|
|
func endSession(status: String = "completed") async {
|
|
stopArgus()
|
|
stopCapture()
|
|
stopAppObserver()
|
|
if let session = activeSession {
|
|
_ = try? await APIClient.shared.endSession(sessionId: session.id, status: status)
|
|
}
|
|
activeSession = nil
|
|
activeTask = nil
|
|
activeSteps = []
|
|
isSessionActive = false
|
|
sessionStartDate = nil
|
|
lastNudge = nil
|
|
resumeCard = nil
|
|
showingResumeCard = false
|
|
proactiveCard = nil
|
|
approvedActionLabel = nil
|
|
screenshotHistory = []
|
|
persistedSessionId = nil
|
|
}
|
|
|
|
func fetchResumeCard() async {
|
|
guard let session = activeSession else { return }
|
|
do {
|
|
let response = try await APIClient.shared.resumeSession(sessionId: session.id)
|
|
resumeCard = response.resumeCard
|
|
showingResumeCard = true
|
|
} catch {
|
|
errorMessage = error.localizedDescription
|
|
}
|
|
}
|
|
|
|
func completeCurrentStep() async {
|
|
guard let step = currentStep else { return }
|
|
do {
|
|
let updated = try await APIClient.shared.completeStep(stepId: step.id)
|
|
if let idx = activeSteps.firstIndex(where: { $0.id == updated.id }) {
|
|
activeSteps[idx] = updated
|
|
}
|
|
// Advance to next pending
|
|
if let next = activeSteps.firstIndex(where: { $0.status == "pending" }) {
|
|
currentStepIndex = next
|
|
}
|
|
} catch {
|
|
errorMessage = error.localizedDescription
|
|
}
|
|
}
|
|
|
|
// MARK: - App Switch Observer
|
|
|
|
private func startAppObserver() {
|
|
let current = NSWorkspace.shared.frontmostApplication
|
|
lastApp = (current?.localizedName ?? "", current?.bundleIdentifier ?? "")
|
|
lastAppEnteredAt = Date()
|
|
appSwitches = []
|
|
|
|
appSwitchObserver = NSWorkspace.shared.notificationCenter.addObserver(
|
|
forName: NSWorkspace.didActivateApplicationNotification,
|
|
object: nil,
|
|
queue: .main
|
|
) { [weak self] notification in
|
|
guard let app = notification.userInfo?[NSWorkspace.applicationUserInfoKey] as? NSRunningApplication
|
|
else { return }
|
|
Task { @MainActor [weak self] in self?.handleAppSwitch(app: app) }
|
|
}
|
|
}
|
|
|
|
private func stopAppObserver() {
|
|
if let observer = appSwitchObserver {
|
|
NSWorkspace.shared.notificationCenter.removeObserver(observer)
|
|
appSwitchObserver = nil
|
|
}
|
|
appSwitches = []
|
|
}
|
|
|
|
private func handleAppSwitch(app: NSRunningApplication) {
|
|
let name = app.localizedName ?? "Unknown"
|
|
let bundleId = app.bundleIdentifier ?? ""
|
|
let now = Date()
|
|
|
|
guard name != lastApp.name else { return }
|
|
|
|
// Log previous app's dwell time to backend (fire-and-forget)
|
|
let duration = max(1, Int(now.timeIntervalSince(lastAppEnteredAt)))
|
|
let prev = lastApp
|
|
if let session = activeSession, !prev.name.isEmpty {
|
|
Task {
|
|
_ = try? await APIClient.shared.appActivity(
|
|
sessionId: session.id,
|
|
appBundleId: prev.bundleId,
|
|
appName: prev.name,
|
|
durationSeconds: duration
|
|
)
|
|
}
|
|
}
|
|
|
|
lastApp = (name, bundleId)
|
|
lastAppEnteredAt = now
|
|
|
|
appSwitches.append((name: name, bundleId: bundleId, time: now))
|
|
if appSwitches.count > 30 { appSwitches.removeFirst() }
|
|
|
|
// Only trigger card during active session and when none is already showing
|
|
guard isSessionActive, proactiveCard == nil else { return }
|
|
if let loop = detectRepetitiveLoop() {
|
|
proactiveCard = ProactiveCard(source: .appSwitchLoop(apps: loop.apps, switchCount: loop.count))
|
|
}
|
|
}
|
|
|
|
// Detects a back-and-forth pattern between exactly 2 apps within a 5-minute window.
|
|
// Requires 3 full cycles (6 consecutive alternating switches) to avoid false positives.
|
|
private func detectRepetitiveLoop() -> (apps: [String], count: Int)? {
|
|
let cutoff = Date().addingTimeInterval(-300)
|
|
let recent = appSwitches.filter { $0.time > cutoff }.map(\.name)
|
|
guard recent.count >= 6 else { return nil }
|
|
|
|
let last6 = Array(recent.suffix(6))
|
|
guard Set(last6).count == 2 else { return nil }
|
|
|
|
// Strictly alternating — no two consecutive identical app names
|
|
for i in 1..<last6.count {
|
|
if last6[i] == last6[i - 1] { return nil }
|
|
}
|
|
return (apps: Array(Set(last6)).sorted(), count: 3)
|
|
}
|
|
|
|
// MARK: - Argus Subprocess (device-side VLM)
|
|
|
|
/// Launch the argus Python daemon as a subprocess.
|
|
/// Argus captures screenshots itself, runs them through a local VLM (Ollama/Gemini),
|
|
/// posts results to the backend, and emits RESULT:{json} lines to stdout for Swift to consume.
|
|
/// Falls back to the internal `startCapture()` loop if the process cannot be launched.
|
|
private func startArgus(session: FocusSession, task: AppTask?) {
|
|
guard FileManager.default.fileExists(atPath: argusPythonPath),
|
|
FileManager.default.fileExists(atPath: argusRepoPath) else {
|
|
argusStatus = "⚠️ Argus not found — using fallback capture"
|
|
startCapture()
|
|
return
|
|
}
|
|
|
|
// Encode steps as JSON for --steps-json arg
|
|
var stepsJSONString = "[]"
|
|
if !activeSteps.isEmpty {
|
|
let stepsArray: [[String: Any]] = activeSteps.map { step in
|
|
var s: [String: Any] = [
|
|
"id": step.id,
|
|
"sort_order": step.sortOrder,
|
|
"title": step.title,
|
|
"status": step.status
|
|
]
|
|
if let note = step.checkpointNote { s["checkpoint_note"] = note }
|
|
return s
|
|
}
|
|
if let data = try? JSONSerialization.data(withJSONObject: stepsArray),
|
|
let str = String(data: data, encoding: .utf8) {
|
|
stepsJSONString = str
|
|
}
|
|
}
|
|
|
|
let jwt = TokenStore.shared.token ?? ""
|
|
let geminiKey = UserDefaults.standard.string(forKey: "geminiApiKey") ?? ""
|
|
|
|
var arguments = [
|
|
"-m", "argus",
|
|
"--session-id", session.id,
|
|
"--task-title", task?.title ?? "(no task)",
|
|
"--task-goal", task?.description ?? "",
|
|
"--steps-json", stepsJSONString,
|
|
"--window-title", NSWorkspace.shared.frontmostApplication?.localizedName ?? "",
|
|
"--vlm", "gemini",
|
|
"--jwt", jwt,
|
|
"--backend-url", "https://wahwa.com/api/v1",
|
|
"--swift-ipc"
|
|
]
|
|
if !geminiKey.isEmpty {
|
|
arguments += ["--gemini-key", geminiKey]
|
|
}
|
|
|
|
let process = Process()
|
|
process.executableURL = URL(fileURLWithPath: argusPythonPath)
|
|
process.currentDirectoryURL = URL(fileURLWithPath: argusRepoPath)
|
|
process.arguments = arguments
|
|
|
|
// Pipe stdout for RESULT: lines; redirect stderr so argus logs don't clutter console
|
|
let stdoutPipe = Pipe()
|
|
let stderrPipe = Pipe()
|
|
process.standardOutput = stdoutPipe
|
|
process.standardError = stderrPipe
|
|
|
|
do {
|
|
try process.launch()
|
|
} catch {
|
|
argusStatus = "⚠️ Argus failed to launch — using fallback capture"
|
|
startCapture()
|
|
return
|
|
}
|
|
|
|
argusProcess = process
|
|
isCapturing = true
|
|
|
|
let taskLabel = task?.title ?? "session"
|
|
argusStatus = "🚀 Argus started — waiting for first screenshot…"
|
|
sendDebugNotification(title: "🚀 Argus VLM Started", body: "Screen monitoring active for \(taskLabel)")
|
|
|
|
// Read RESULT: lines from argus stdout in a background task
|
|
let fileHandle = stdoutPipe.fileHandleForReading
|
|
sendDebugNotification(title: "🚀 Argus VLM Started", body: "Screen monitoring active for \(taskLabel)")
|
|
|
|
argusReadTask = Task { [weak self] in
|
|
do {
|
|
for try await line in fileHandle.bytes.lines {
|
|
guard let self, !Task.isCancelled else { break }
|
|
|
|
if line.hasPrefix("STATUS:") {
|
|
let event = String(line.dropFirst("STATUS:".count))
|
|
await MainActor.run { self.handleArgusStatus(event) }
|
|
} else if line.hasPrefix("RESULT:") {
|
|
let jsonStr = String(line.dropFirst("RESULT:".count))
|
|
guard let data = jsonStr.data(using: .utf8) else { continue }
|
|
if let result = try? JSONDecoder().decode(DistractionAnalysisResponse.self, from: data) {
|
|
await MainActor.run {
|
|
let summary = result.vlmSummary ?? "no summary"
|
|
self.argusStatus = "✅ \(summary)"
|
|
self.sendDebugNotification(title: "✅ VLM Result", body: summary)
|
|
self.applyDistractionResult(result)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch {
|
|
// Pipe closed — argus process ended
|
|
}
|
|
}
|
|
}
|
|
|
|
private func stopArgus() {
|
|
argusReadTask?.cancel()
|
|
argusReadTask = nil
|
|
if let proc = argusProcess {
|
|
proc.terminate()
|
|
argusProcess = nil
|
|
isCapturing = false
|
|
}
|
|
}
|
|
|
|
// MARK: - Screenshot Capture Loop (fallback when argus is unavailable)
|
|
|
|
private func startCapture() {
|
|
isCapturing = true
|
|
captureTask = Task { [weak self] in
|
|
guard let self else { return }
|
|
// Capture immediately on session start, then repeat on interval
|
|
await self.captureAndAnalyze()
|
|
while !Task.isCancelled && self.isSessionActive {
|
|
try? await Task.sleep(for: .seconds(self.captureInterval))
|
|
guard !Task.isCancelled && self.isSessionActive else { break }
|
|
await self.captureAndAnalyze()
|
|
}
|
|
}
|
|
}
|
|
|
|
private func stopCapture() {
|
|
captureTask?.cancel()
|
|
captureTask = nil
|
|
isCapturing = false
|
|
}
|
|
|
|
private func captureAndAnalyze() async {
|
|
guard let session = activeSession else { return }
|
|
guard let imageData = await captureScreen() else { return }
|
|
|
|
let windowTitle = NSWorkspace.shared.frontmostApplication?.localizedName ?? "Unknown"
|
|
var context = buildTaskContext()
|
|
|
|
// Inject rolling history so the VLM has temporal context across captures.
|
|
// Only summaries (text) are sent — not the raw images — to keep token cost low.
|
|
if !screenshotHistory.isEmpty {
|
|
let iso = ISO8601DateFormatter()
|
|
context["screenshot_history"] = screenshotHistory.map { entry in
|
|
["summary": entry.summary, "timestamp": iso.string(from: entry.timestamp)]
|
|
}
|
|
}
|
|
|
|
do {
|
|
let result = try await APIClient.shared.analyzeScreenshot(
|
|
imageData: imageData,
|
|
windowTitle: windowTitle,
|
|
sessionId: session.id,
|
|
taskContext: context
|
|
)
|
|
|
|
// Append this result's summary to the rolling buffer (max 4 entries)
|
|
if let summary = result.vlmSummary {
|
|
screenshotHistory.append(ScreenshotHistoryEntry(summary: summary, timestamp: Date()))
|
|
if screenshotHistory.count > 4 { screenshotHistory.removeFirst() }
|
|
}
|
|
|
|
applyDistractionResult(result)
|
|
} catch {
|
|
// Silent fail — don't interrupt the user
|
|
}
|
|
}
|
|
|
|
private func captureScreen() async -> Data? {
|
|
do {
|
|
let content = try await SCShareableContent.current
|
|
guard let display = content.displays.first else { return nil }
|
|
|
|
let config = SCStreamConfiguration()
|
|
config.width = 1280
|
|
config.height = 720
|
|
|
|
let filter = SCContentFilter(display: display, excludingWindows: [])
|
|
let image = try await SCScreenshotManager.captureImage(
|
|
contentFilter: filter,
|
|
configuration: config
|
|
)
|
|
return cgImageToJPEG(image)
|
|
} catch {
|
|
return nil
|
|
}
|
|
}
|
|
|
|
private func cgImageToJPEG(_ image: CGImage) -> Data? {
|
|
let nsImage = NSImage(cgImage: image, size: .zero)
|
|
guard let tiff = nsImage.tiffRepresentation,
|
|
let bitmap = NSBitmapImageRep(data: tiff),
|
|
let jpeg = bitmap.representation(using: .jpeg, properties: [.compressionFactor: 0.5])
|
|
else { return nil }
|
|
return jpeg
|
|
}
|
|
|
|
private func buildTaskContext() -> [String: Any] {
|
|
var ctx: [String: Any] = [:]
|
|
guard let task = activeTask else { return ctx }
|
|
ctx["task_title"] = task.title
|
|
ctx["task_goal"] = task.description ?? task.title
|
|
ctx["steps"] = activeSteps.map { step -> [String: Any] in
|
|
var s: [String: Any] = [
|
|
"id": step.id,
|
|
"sort_order": step.sortOrder,
|
|
"title": step.title,
|
|
"status": step.status
|
|
]
|
|
if let note = step.checkpointNote { s["checkpoint_note"] = note }
|
|
return s
|
|
}
|
|
return ctx
|
|
}
|
|
|
|
private func applyDistractionResult(_ result: DistractionAnalysisResponse) {
|
|
// 1. Apply step side-effects (always)
|
|
for completedId in result.stepsCompleted {
|
|
if let idx = activeSteps.firstIndex(where: { $0.id == completedId }) {
|
|
activeSteps[idx].status = "done"
|
|
}
|
|
}
|
|
if let note = result.checkpointNoteUpdate,
|
|
let stepId = result.currentStepId,
|
|
let idx = activeSteps.firstIndex(where: { $0.id == stepId }) {
|
|
activeSteps[idx].checkpointNote = note
|
|
}
|
|
if let stepId = result.currentStepId,
|
|
let idx = activeSteps.firstIndex(where: { $0.id == stepId }) {
|
|
currentStepIndex = idx
|
|
}
|
|
|
|
// 2. Notification priority (design spec §1.5):
|
|
// Proactive friction help → Context resume → Gentle nudge
|
|
// NEVER nudge when the system could help instead.
|
|
if let friction = result.friction, friction.isActionable {
|
|
if friction.isResumption {
|
|
// Task resumption detected — auto-surface resume card without button press
|
|
Task { await fetchResumeCard() }
|
|
} else if proactiveCard == nil {
|
|
proactiveCard = ProactiveCard(source: .vlmFriction(
|
|
frictionType: friction.type,
|
|
description: friction.description,
|
|
actions: friction.proposedActions
|
|
))
|
|
}
|
|
} else if !result.onTask, result.confidence > 0.7, let nudge = result.gentleNudge {
|
|
// Only nudge if VLM found no actionable friction
|
|
distractionCount += 1
|
|
lastNudge = nudge
|
|
sendNudgeNotification(nudge)
|
|
}
|
|
}
|
|
|
|
// MARK: - Notifications
|
|
|
|
private func handleArgusStatus(_ event: String) {
|
|
switch event {
|
|
case "screenshot_captured":
|
|
argusStatus = "📸 Screenshot captured — sending to VLM…"
|
|
sendDebugNotification(title: "📸 Screenshot Captured", body: "Sending to VLM for analysis…")
|
|
case "vlm_running":
|
|
argusStatus = "🤖 VLM analyzing screen…"
|
|
sendDebugNotification(title: "🤖 VLM Running", body: "Gemini is analyzing your screen…")
|
|
case "vlm_done":
|
|
argusStatus = "🧠 VLM done — applying result…"
|
|
sendDebugNotification(title: "🧠 VLM Done", body: "Analysis complete, processing result…")
|
|
default:
|
|
break
|
|
}
|
|
}
|
|
|
|
private func sendDebugNotification(title: String, body: String) {
|
|
let content = UNMutableNotificationContent()
|
|
content.title = title
|
|
content.body = body
|
|
let req = UNNotificationRequest(
|
|
identifier: "debug-\(UUID().uuidString)",
|
|
content: content,
|
|
trigger: nil
|
|
)
|
|
UNUserNotificationCenter.current().add(req)
|
|
}
|
|
|
|
private func sendNudgeNotification(_ nudge: String) {
|
|
let content = UNMutableNotificationContent()
|
|
content.title = "Hey, quick check-in!"
|
|
content.body = nudge
|
|
content.sound = .default
|
|
let req = UNNotificationRequest(
|
|
identifier: UUID().uuidString,
|
|
content: content,
|
|
trigger: nil
|
|
)
|
|
UNUserNotificationCenter.current().add(req)
|
|
}
|
|
|
|
private func requestNotificationPermission() async {
|
|
try? await UNUserNotificationCenter.current()
|
|
.requestAuthorization(options: [.alert, .sound])
|
|
}
|
|
}
|