// SessionManager.swift — Focus session state, native VLM screen analysis // Screenshot capture → Gemini Vision API → apply results to UI + post to backend. // No Python subprocess. No external process management. import AppKit import SwiftUI import ScreenCaptureKit /// Background monitoring vs focus session state. enum MonitoringState { case monitoring // VLM running, no focus session — Argus background mode case focusSession // Linked to a specific task, tracking steps, distraction timer active } @Observable @MainActor final class SessionManager { static let shared = SessionManager() // MARK: - State var monitoringState: MonitoringState = .monitoring var activeSession: FocusSession? var activeTask: AppTask? var activeSteps: [Step] = [] var currentStepIndex: Int = 0 /// Computed for backward compatibility with UI bindings. var isSessionActive: Bool { monitoringState == .focusSession } var sessionStartDate: Date? var distractionCount: Int = 0 var errorMessage: String? var isLoading: Bool = false // Resume card (shown in HUD, not system notification) var resumeCard: ResumeCard? var showingResumeCard: Bool = false // VLM / proactive agent var proactiveCard: ProactiveCard? var latestVlmSummary: String? var latestInferredTask: String? var latestAppName: String? var isExecuting: Bool = false var executorOutput: (title: String, content: String)? var monitoringError: String? // Nudge — shown in HUD as amber card, NOT system notification var nudgeMessage: String? @ObservationIgnored private var nudgeTimer: Task? // Screenshot engine var isCapturing: Bool = false @ObservationIgnored private var captureTask: Task? private let captureInterval: TimeInterval = 5.0 // Two-tier history buffer (replaces flat frameBuffer + screenshotHistory) @ObservationIgnored private var historyBuffer = HistoryBuffer(imageMaxLen: 4, textMaxLen: 12) @ObservationIgnored private var savedFramesForExecutor: [Data] = [] private let framesPerVLMCall = 4 // App switch tracking @ObservationIgnored private var appSwitches: [(name: String, bundleId: String, time: Date)] = [] @ObservationIgnored private var appSwitchObserver: (any NSObjectProtocol)? @ObservationIgnored private var lastApp: (name: String, bundleId: String) = ("", "") @ObservationIgnored private var lastAppEnteredAt: Date = Date() // Proactive card auto-dismiss timer @ObservationIgnored private var proactiveCardTimer: Task? // Open sessions cache (for VLM session context injection) @ObservationIgnored private var openSessions: [OpenSession] = [] @ObservationIgnored private var lastOpenSessionsFetch: Date? /// Adaptive: 10s during focus session, 30s during monitoring private var sessionsFetchInterval: TimeInterval { monitoringState == .focusSession ? 10.0 : 30.0 } // Cross-device sync: track last known active session to detect remote changes @ObservationIgnored private var lastKnownActiveSessionId: String? // Incomplete tasks cache (tasks without active sessions — for VLM task matching) @ObservationIgnored private var incompleteTasks: [AppTask] = [] // Task caching (original's sophisticated matching) @ObservationIgnored private var cachedTasks: [AppTask] = [] @ObservationIgnored private var lastTasksFetch: Date? // Inferred task stability tracking (for VLM-initiated task creation) @ObservationIgnored private var inferredTaskHistory: [String] = [] private let stableTaskThreshold = 3 // Notification dedup — fingerprint-based, prevents spam @ObservationIgnored private var lastFrictionFingerprint: String = "" // Session action dedup — tracks session IDs we've already shown cards for @ObservationIgnored private var handledSessionActions: Set = [] // Distraction timer (active only during focus session) @ObservationIgnored private var continuousOffTaskStart: Date? @ObservationIgnored private var consecutiveNudgeCount: Int = 0 @ObservationIgnored private var lastNudgeSentAt: Date? private let maxConsecutiveNudges = 5 /// Configurable distraction threshold (default 2 minutes). var distractionThresholdSeconds: TimeInterval { get { let stored = UserDefaults.standard.double(forKey: "lockInBro.distractionThreshold") return stored > 0 ? stored : 120 } set { UserDefaults.standard.set(newValue, forKey: "lockInBro.distractionThreshold") } } private init() {} // MARK: - Computed var currentStep: Step? { guard currentStepIndex < activeSteps.count else { return nil } return activeSteps[currentStepIndex] } var completedCount: Int { activeSteps.filter(\.isDone).count } var totalSteps: Int { activeSteps.count } var sessionElapsed: TimeInterval { guard let start = sessionStartDate else { return 0 } return Date().timeIntervalSince(start) } // MARK: - Monitoring Lifecycle func stopMonitoring() { stopCapture() stopAppObserver() proactiveCardTimer?.cancel() proactiveCardTimer = nil nudgeTimer?.cancel() nudgeTimer = nil activeSession = nil activeTask = nil activeSteps = [] monitoringState = .monitoring sessionStartDate = nil resumeCard = nil showingResumeCard = false proactiveCard = nil nudgeMessage = nil latestVlmSummary = nil latestInferredTask = nil latestAppName = nil isExecuting = false executorOutput = nil monitoringError = nil historyBuffer.clear() savedFramesForExecutor = [] persistedSessionId = nil openSessions = [] incompleteTasks = [] cachedTasks = [] lastTasksFetch = nil lastOpenSessionsFetch = nil inferredTaskHistory = [] lastFrictionFingerprint = "" handledSessionActions = [] distractionCount = 0 continuousOffTaskStart = nil consecutiveNudgeCount = 0 lastNudgeSentAt = nil lastKnownActiveSessionId = nil } /// Called once after login. Starts VLM monitoring only — no focus session. /// The VLM observes the screen and suggests resume/switch/start_new when appropriate. func startMonitoring() async { guard TokenStore.shared.token != nil else { return } guard !isCapturing else { return } monitoringError = nil if !CGPreflightScreenCaptureAccess() { CGRequestScreenCaptureAccess() monitoringError = "Screen Recording permission required — enable in System Settings → Privacy & Security → Screen Recording, then tap Retry" return } // End any stale active session FIRST — app always starts in monitoring mode. // The interrupted session will appear in open sessions for the VLM to suggest resuming. if let stale = try? await APIClient.shared.getActiveSession() { print("[Startup] Found stale session \(stale.id.prefix(8)) — interrupting. VLM will decide what to do.") _ = try? await APIClient.shared.endSession(sessionId: stale.id, status: "interrupted") } // Start VLM capture + app observer — monitoring mode only, no session startCapture() startAppObserver() // Fetch tasks and open sessions so VLM has context for matching await fetchTasksIfNeeded() await fetchOpenSessions() await fetchIncompleteTasks() } private func autoResumeSession(_ session: FocusSession) async { activeSession = session persistedSessionId = session.id monitoringState = .focusSession sessionStartDate = Date() distractionCount = 0 nudgeMessage = nil historyBuffer.clear() continuousOffTaskStart = nil consecutiveNudgeCount = 0 lastNudgeSentAt = nil if let taskId = session.taskId { do { if cachedTasks.isEmpty { await fetchTasksIfNeeded() } activeTask = cachedTasks.first(where: { $0.id == taskId }) if activeTask == nil { cachedTasks = try await APIClient.shared.getTasks() lastTasksFetch = Date() activeTask = cachedTasks.first(where: { $0.id == taskId }) } if let task = activeTask { let steps = try await APIClient.shared.getSteps(taskId: task.id) activeSteps = steps.sorted { $0.sortOrder < $1.sortOrder } currentStepIndex = activeSteps.firstIndex(where: { $0.isActive }) ?? activeSteps.firstIndex(where: { $0.status == "pending" }) ?? 0 } } catch {} } // Build an informative resume summary for the HUD if let task = activeTask { let completed = activeSteps.filter(\.isDone).count let total = activeSteps.count var summary = "Resumed: \(task.title)" if total > 0 { summary += " (\(completed)/\(total) steps done)" if let step = currentStep?.title { summary += " — next: \(step)" } } latestVlmSummary = summary latestInferredTask = task.title } else { let checkpoint = session.checkpoint?.vlmSummary ?? session.checkpoint?.lastActionSummary if let info = checkpoint, !info.isEmpty { latestVlmSummary = "Resumed session — \(info)" } else { latestVlmSummary = "Resumed session — monitoring your screen…" } } if !isCapturing { startCapture() } if appSwitchObserver == nil { startAppObserver() } } // MARK: - Open Sessions + Task List + Context private func fetchOpenSessions() async { guard TokenStore.shared.token != nil else { return } do { openSessions = try await APIClient.shared.getOpenSessions() lastOpenSessionsFetch = Date() detectCrossDeviceChanges() } catch {} } /// Fetch incomplete tasks that don't already have open sessions. /// These are tasks the user created (brain dump, manual, etc.) but hasn't started working on. private func fetchIncompleteTasks() async { guard TokenStore.shared.token != nil else { return } do { let allTasks = try await APIClient.shared.getTasks() let sessionTaskIds = Set(openSessions.compactMap(\.taskId)) incompleteTasks = allTasks.filter { task in !task.isDone && !sessionTaskIds.contains(task.id) } } catch {} } /// Fetch tasks from the backend (cached for 30s). private func fetchTasksIfNeeded() async { if let last = lastTasksFetch, Date().timeIntervalSince(last) < 30 { return } do { cachedTasks = try await APIClient.shared.getTasks() lastTasksFetch = Date() } catch {} } private func maybeRefreshSessions() async { guard let lastFetch = lastOpenSessionsFetch else { await fetchOpenSessions() await fetchIncompleteTasks() return } if Date().timeIntervalSince(lastFetch) > sessionsFetchInterval { await fetchOpenSessions() await fetchIncompleteTasks() } } // MARK: - Cross-Device Session Sync /// Detect when another device ends a focus session we're tracking locally. /// Does NOT auto-resume — the VLM decides whether to suggest resume/switch/start_new. private func detectCrossDeviceChanges() { let serverActiveSession = openSessions.first(where: { $0.status == "active" }) // Our local focus session was ended by another device → drop to monitoring if monitoringState == .focusSession, let localId = activeSession?.id, serverActiveSession?.id != localId { print("[CrossDevice] Session \(localId.prefix(8)) ended by another device") handleRemoteSessionEnd() return } // Track what's active on server (for context only — VLM decides what to do) lastKnownActiveSessionId = serverActiveSession?.id } /// Handle a session that was ended by another device. private func handleRemoteSessionEnd() { activeSession = nil activeTask = nil activeSteps = [] monitoringState = .monitoring sessionStartDate = nil distractionCount = 0 nudgeMessage = nil resumeCard = nil showingResumeCard = false proactiveCard = nil proactiveCardTimer?.cancel() proactiveCardTimer = nil nudgeTimer?.cancel() nudgeTimer = nil continuousOffTaskStart = nil consecutiveNudgeCount = 0 lastNudgeSentAt = nil persistedSessionId = nil inferredTaskHistory = [] latestVlmSummary = "Focus session ended on another device" // VLM capture continues in monitoring mode } private func formatSessionContext() -> String { var sections: [String] = [] // Open sessions (active + interrupted) if !openSessions.isEmpty { let formatter = ISO8601DateFormatter() let lines: [String] = openSessions.map { s in let statusTag = "[\(s.status)]" let taskTitle = s.task?.title ?? "(no task)" var line = " session_id=\"\(s.id)\" \(statusTag) \"\(taskTitle)\"" let app = s.checkpoint?.activeApp ?? "" if !app.isEmpty { line += " — last in \(app)" } let file = s.checkpoint?.activeFile ?? "" if !file.isEmpty { line += "/\(file)" } let note = s.checkpoint?.lastActionSummary ?? "" if !note.isEmpty { line += ", \"\(note)\"" } if let endedAt = s.endedAt, let date = formatter.date(from: endedAt) { let minutes = Int(Date().timeIntervalSince(date) / 60) if minutes > 0 { line += " (paused \(minutes)m ago)" } } return line } sections.append("Open sessions:\n" + lines.joined(separator: "\n")) } // Incomplete tasks without sessions (from brain dump, manual creation, etc.) if !incompleteTasks.isEmpty { let lines: [String] = incompleteTasks.prefix(10).map { t in var line = " task_id=\"\(t.id)\" [\(t.status)] \"\(t.title)\"" if let desc = t.description, !desc.isEmpty { line += " — \(desc.prefix(80))" } return line } sections.append("Unstarted tasks (no session yet — use task_id in session_action if user is working on one):\n" + lines.joined(separator: "\n")) } if sections.isEmpty { return "(no open sessions or tasks — suggest start_new if user is actively working on something)" } return sections.joined(separator: "\n\n") } // MARK: - Notification Deduplication private func frictionFingerprint(_ friction: FrictionInfo) -> String { let labels = friction.proposedActions.map(\.label).sorted().joined(separator: "|") return "\(friction.type):\(labels)" } private func shouldNotify(friction: FrictionInfo) -> Bool { guard friction.isActionable else { return false } let fingerprint = frictionFingerprint(friction) if fingerprint == lastFrictionFingerprint { return false } lastFrictionFingerprint = fingerprint return true } // MARK: - Task Matching /// Find the best matching existing task by comparing inferred task + VLM summary against all non-done tasks. /// Uses keyword overlap scoring against task title, description, and tags. func findMatchingTask(for inferredTask: String, vlmSummary: String = "", appName: String = "") -> AppTask? { let stopWords: Set = ["the", "a", "an", "in", "on", "to", "and", "or", "is", "for", "of", "with", "my", "this", "that", "user", "working", "screen", "currently", "appears", "be", "has", "are", "was", "been", "being", "it", "its", "at", "by", "from", "not", "but"] let combined = "\(inferredTask) \(vlmSummary) \(appName)".lowercased() let searchWords = Set(combined.split(separator: " ").map(String.init)).subtracting(stopWords) guard !searchWords.isEmpty else { return nil } var bestMatch: AppTask? var bestScore = 0 for task in cachedTasks where task.status != "done" { var score = 0 let titleWords = Set(task.title.lowercased().split(separator: " ").map(String.init)).subtracting(stopWords) score += searchWords.intersection(titleWords).count * 3 if let desc = task.description?.lowercased() { let descWords = Set(desc.split(separator: " ").map(String.init)).subtracting(stopWords) score += searchWords.intersection(descWords).count } for tag in task.tags { if combined.contains(tag.lowercased()) { score += 2 } } if combined.contains(task.title.lowercased()) { score += 10 } if score > bestScore { bestScore = score bestMatch = task } } return bestScore >= 4 ? bestMatch : nil } /// Try to match an inferred task string to an existing incomplete task by keyword overlap. /// Simpler version for VLM prompt injection. private func matchInferredTaskToExisting(_ inferredTask: String) -> AppTask? { let stopWords: Set = ["the", "a", "an", "in", "on", "to", "and", "or", "is", "for", "of", "with"] let inferredWords = Set(inferredTask.lowercased().split(separator: " ").map(String.init)) .subtracting(stopWords) guard !inferredWords.isEmpty else { return nil } var bestMatch: (task: AppTask, score: Int)? for task in incompleteTasks { let taskWords = Set(task.title.lowercased().split(separator: " ").map(String.init)) .subtracting(stopWords) let overlap = inferredWords.intersection(taskWords).count if overlap >= 2, overlap > (bestMatch?.score ?? 0) { bestMatch = (task, overlap) } } return bestMatch?.task } // MARK: - Inferred Task Stability private func shouldSuggestNewSession(_ inferredTask: String) -> Bool { guard !inferredTask.isEmpty, !isSessionActive else { inferredTaskHistory = [] return false } inferredTaskHistory.append(inferredTask) if inferredTaskHistory.count > stableTaskThreshold + 2 { inferredTaskHistory = Array(inferredTaskHistory.suffix(stableTaskThreshold + 2)) } guard inferredTaskHistory.count >= stableTaskThreshold else { return false } let recent = Array(inferredTaskHistory.suffix(stableTaskThreshold)) let stopWords: Set = ["the", "a", "an", "in", "on", "to", "and", "or", "is", "for", "of", "with"] let firstWords = Set(recent[0].lowercased().split(separator: " ").map(String.init)) .subtracting(stopWords) guard !firstWords.isEmpty else { return false } let allSimilar = recent.dropFirst().allSatisfy { task in let words = Set(task.lowercased().split(separator: " ").map(String.init)) let overlap = firstWords.intersection(words) return Double(overlap.count) >= Double(firstWords.count) * 0.5 } return allSimilar } // MARK: - VLM-Initiated Task Creation func createVLMTaskAndSession(inferredTask: String) async { isLoading = true errorMessage = nil do { if let stale = activeSession { _ = try? await APIClient.shared.endSession(sessionId: stale.id, status: "completed") } let task = try await APIClient.shared.createVLMTask(title: inferredTask) let plan = try? await APIClient.shared.planTask(taskId: task.id) let steps = (plan?.steps ?? []).sorted { $0.sortOrder < $1.sortOrder } let session = try await APIClient.shared.startSession(taskId: task.id) activeSession = session persistedSessionId = session.id activeTask = task activeSteps = steps currentStepIndex = 0 monitoringState = .focusSession sessionStartDate = Date() distractionCount = 0 inferredTaskHistory = [] historyBuffer.clear() continuousOffTaskStart = nil consecutiveNudgeCount = 0 lastNudgeSentAt = nil await fetchOpenSessions() stopCapture() startCapture() if appSwitchObserver == nil { startAppObserver() } } catch { errorMessage = error.localizedDescription } isLoading = false } // MARK: - Session Lifecycle private var persistedSessionId: String? { get { UserDefaults.standard.string(forKey: "lockInBro.lastSessionId") } set { if let v = newValue { UserDefaults.standard.set(v, forKey: "lockInBro.lastSessionId") } else { UserDefaults.standard.removeObject(forKey: "lockInBro.lastSessionId") } } } func startSession(task: AppTask?) async { isLoading = true errorMessage = nil do { var staleId: String? = activeSession?.id ?? persistedSessionId if staleId == nil { staleId = (try? await APIClient.shared.getActiveSession())?.id } if let id = staleId { _ = try? await APIClient.shared.endSession(sessionId: id, status: "completed") } let session = try await APIClient.shared.startSession(taskId: task?.id) activeSession = session persistedSessionId = session.id activeTask = task activeSteps = [] currentStepIndex = 0 monitoringState = .focusSession sessionStartDate = Date() distractionCount = 0 nudgeMessage = nil historyBuffer.clear() continuousOffTaskStart = nil consecutiveNudgeCount = 0 lastNudgeSentAt = nil if let task { let steps = try await APIClient.shared.getSteps(taskId: task.id) activeSteps = steps.sorted { $0.sortOrder < $1.sortOrder } currentStepIndex = activeSteps.firstIndex(where: { $0.isActive }) ?? activeSteps.firstIndex(where: { $0.status == "pending" }) ?? 0 } stopCapture() startCapture() if appSwitchObserver == nil { startAppObserver() } } catch { errorMessage = error.localizedDescription } isLoading = false } /// Start a session attached to an existing task from the database. func startSessionWithExistingTask(_ task: AppTask) async { isLoading = true errorMessage = nil do { if let stale = activeSession { _ = try? await APIClient.shared.endSession(sessionId: stale.id, status: "completed") } let session = try await APIClient.shared.startSession(taskId: task.id) activeSession = session persistedSessionId = session.id activeTask = task monitoringState = .focusSession sessionStartDate = Date() distractionCount = 0 inferredTaskHistory = [] historyBuffer.clear() continuousOffTaskStart = nil consecutiveNudgeCount = 0 lastNudgeSentAt = nil let steps = try await APIClient.shared.getSteps(taskId: task.id) activeSteps = steps.sorted { $0.sortOrder < $1.sortOrder } currentStepIndex = activeSteps.firstIndex(where: { $0.isActive }) ?? activeSteps.firstIndex(where: { $0.status == "pending" }) ?? 0 await fetchOpenSessions() stopCapture() startCapture() if appSwitchObserver == nil { startAppObserver() } } catch { errorMessage = error.localizedDescription } isLoading = false } // MARK: - Context Checkpointing /// Save a context checkpoint before ending/interrupting a session. /// Captures the latest VLM state so the resume card has rich context later. private func saveCheckpoint(for sessionId: String) async { let stepId = currentStep?.id let summary = latestVlmSummary let inferred = latestInferredTask let app = latestAppName ?? NSWorkspace.shared.frontmostApplication?.localizedName var actionSummary = "" if let inferred, !inferred.isEmpty { actionSummary = inferred } if let summary, !summary.isEmpty { actionSummary += actionSummary.isEmpty ? summary : " — \(summary)" } let nextUp: String? = { guard let step = currentStep else { return nil } if step.isDone { return activeSteps.first(where: { $0.status == "pending" })?.title } return step.title }() try? await APIClient.shared.checkpointSession( sessionId: sessionId, currentStepId: stepId, lastActionSummary: actionSummary.isEmpty ? nil : String(actionSummary.prefix(500)), nextUp: nextUp, goal: activeTask?.description ?? activeTask?.title, activeApp: app, lastScreenshotAnalysis: summary, distractionCount: distractionCount ) } func endSession(status: String = "completed") async { stopCapture() stopAppObserver() if let session = activeSession { await saveCheckpoint(for: session.id) _ = try? await APIClient.shared.endSession(sessionId: session.id, status: status) } activeSession = nil activeTask = nil activeSteps = [] monitoringState = .monitoring sessionStartDate = nil resumeCard = nil showingResumeCard = false proactiveCard = nil nudgeMessage = nil latestVlmSummary = nil latestInferredTask = nil latestAppName = nil isExecuting = false executorOutput = nil proactiveCardTimer?.cancel() proactiveCardTimer = nil nudgeTimer?.cancel() nudgeTimer = nil historyBuffer.clear() savedFramesForExecutor = [] persistedSessionId = nil inferredTaskHistory = [] lastFrictionFingerprint = "" handledSessionActions = [] continuousOffTaskStart = nil consecutiveNudgeCount = 0 lastNudgeSentAt = nil lastKnownActiveSessionId = nil // Keep capture loop running for always-on monitoring if TokenStore.shared.token != nil { startCapture() startAppObserver() } } func fetchResumeCard() async { guard let session = activeSession else { return } do { let response = try await APIClient.shared.resumeSession(sessionId: session.id) resumeCard = response.resumeCard showingResumeCard = true } catch { errorMessage = error.localizedDescription } } func completeCurrentStep() async { guard let step = currentStep else { return } do { let updated = try await APIClient.shared.completeStep(stepId: step.id) if let idx = activeSteps.firstIndex(where: { $0.id == updated.id }) { activeSteps[idx] = updated } if let next = activeSteps.firstIndex(where: { $0.status == "pending" }) { currentStepIndex = next } } catch { errorMessage = error.localizedDescription } } // MARK: - Retry (HUD Retry button) func retryMonitoring() { monitoringError = nil historyBuffer.clear() stopCapture() startCapture() if appSwitchObserver == nil { startAppObserver() } } // MARK: - Nudge Lifecycle (shown in HUD) private func showNudge(_ message: String) { nudgeTimer?.cancel() distractionCount += 1 withAnimation { nudgeMessage = message } nudgeTimer = Task { [weak self] in try? await Task.sleep(for: .seconds(12)) guard !Task.isCancelled, let self else { return } await MainActor.run { withAnimation { self.nudgeMessage = nil } } } } func dismissNudge() { nudgeTimer?.cancel() nudgeTimer = nil withAnimation { nudgeMessage = nil } } // MARK: - Proactive Card Lifecycle private func markSessionActionHandled(_ card: ProactiveCard?) { if case .sessionAction(_, _, _, _, let sessionId, _) = card?.source, let sessionId { handledSessionActions.insert(sessionId) } } private func showProactiveCard(_ card: ProactiveCard, autoDismiss: Bool = true) { proactiveCardTimer?.cancel() withAnimation { proactiveCard = card } // Session action cards persist — they require user input. // Only friction/nudge cards auto-dismiss. if autoDismiss { proactiveCardTimer = Task { [weak self] in try? await Task.sleep(for: .seconds(30)) guard !Task.isCancelled, let self else { return } await MainActor.run { self.dismissProactiveCard() } } } } func dismissProactiveCard() { markSessionActionHandled(proactiveCard) proactiveCardTimer?.cancel() proactiveCardTimer = nil withAnimation { proactiveCard = nil } } func approveProactiveCard(actionIndex: Int) { markSessionActionHandled(proactiveCard) proactiveCardTimer?.cancel() proactiveCardTimer = nil let card = proactiveCard withAnimation { proactiveCard = nil } switch card?.source { case .vlmFriction(_, _, let actions): guard actionIndex < actions.count else { return } let action = actions[actionIndex] isExecuting = true Task { do { let geminiKey = UserDefaults.standard.string(forKey: "geminiApiKey") ?? "" guard !geminiKey.isEmpty else { isExecuting = false executorOutput = (title: action.label, content: action.details ?? "Action approved.") return } let client = GeminiVLMClient(apiKey: geminiKey) // Send all buffered frames to the executor for vision context var frames = historyBuffer.frameData if let fresh = await captureScreen() { frames.append(fresh) } savedFramesForExecutor = frames let result = try await client.executeAction( label: action.label, details: action.details ?? "", frames: frames, onToolCall: { name, args in print("[Executor] \(name): \(args)") } ) isExecuting = false executorOutput = (title: action.label, content: result) historyBuffer.setLastExecution("Completed: \(action.label). \(result.prefix(200))") } catch { isExecuting = false executorOutput = (title: action.label, content: action.details ?? "Couldn't complete automatically.") } } case .appSwitchLoop(let apps, _): isExecuting = true Task { do { let geminiKey = UserDefaults.standard.string(forKey: "geminiApiKey") ?? "" guard !geminiKey.isEmpty else { isExecuting = false executorOutput = (title: "App Switch Help", content: "No Gemini API key set.") return } let client = GeminiVLMClient(apiKey: geminiKey) let frames = historyBuffer.frameData let label = "Help with \(apps.joined(separator: " ↔ ")) switching" let details = """ The user is repeatedly switching between \(apps.joined(separator: " and ")). \ Look at the screenshots to understand what they're trying to do across these apps. \ If they're copying data between them, extract and format the data. \ If they're looking something up, find and summarize the answer. \ If they're comparing content, create a consolidated view. \ Use output() to show the result. """ let result = try await client.executeAction( label: label, details: details, frames: frames, onToolCall: { name, args in print("[Executor] \(name): \(args)") } ) isExecuting = false executorOutput = (title: label, content: result) historyBuffer.setLastExecution("Completed: \(label). \(result.prefix(200))") } catch { isExecuting = false executorOutput = (title: "App Switch Help", content: "Couldn't analyze — \(error.localizedDescription)") } } case .sessionAction(let type, let taskTitle, _, _, let sessionId, let taskId): let matchedTask = card?.matchedTask switch type { case "start_new": if actionIndex == 0 && matchedTask != nil { Task { await startSessionWithExistingTask(matchedTask!) } } else if actionIndex == 0 && matchedTask == nil { if let taskId, let existingTask = incompleteTasks.first(where: { $0.id == taskId }) { Task { await startSession(task: existingTask) } } else { Task { await startSession(task: nil) } } } else if actionIndex == 1 { Task { await createVLMTaskAndSession(inferredTask: taskTitle) } } else if actionIndex == 2 { Task { await startSession(task: nil) } } case "resume": guard let sessionId else { break } Task { if let openSession = openSessions.first(where: { $0.id == sessionId }) { let focusSession = FocusSession( id: openSession.id, userId: "", taskId: openSession.taskId, platform: openSession.platform, startedAt: openSession.startedAt, endedAt: nil, status: "active", checkpoint: nil ) await autoResumeSession(focusSession) await fetchResumeCard() } } case "switch": guard let sessionId else { break } Task { if let current = activeSession { await saveCheckpoint(for: current.id) _ = try? await APIClient.shared.endSession(sessionId: current.id, status: "interrupted") } if let openSession = openSessions.first(where: { $0.id == sessionId }) { let focusSession = FocusSession( id: openSession.id, userId: "", taskId: openSession.taskId, platform: openSession.platform, startedAt: openSession.startedAt, endedAt: nil, status: "active", checkpoint: nil ) await autoResumeSession(focusSession) } } case "complete": guard let sessionId else { break } Task { await saveCheckpoint(for: sessionId) _ = try? await APIClient.shared.endSession(sessionId: sessionId, status: "completed") if activeSession?.id == sessionId { await endSession(status: "completed") } await fetchOpenSessions() } default: break } default: break } } // MARK: - App Switch Observer private func startAppObserver() { let current = NSWorkspace.shared.frontmostApplication lastApp = (current?.localizedName ?? "", current?.bundleIdentifier ?? "") lastAppEnteredAt = Date() appSwitches = [] appSwitchObserver = NSWorkspace.shared.notificationCenter.addObserver( forName: NSWorkspace.didActivateApplicationNotification, object: nil, queue: .main ) { [weak self] notification in guard let app = notification.userInfo?[NSWorkspace.applicationUserInfoKey] as? NSRunningApplication else { return } Task { @MainActor [weak self] in self?.handleAppSwitch(app: app) } } } private func stopAppObserver() { if let observer = appSwitchObserver { NSWorkspace.shared.notificationCenter.removeObserver(observer) appSwitchObserver = nil } appSwitches = [] } private func handleAppSwitch(app: NSRunningApplication) { let name = app.localizedName ?? "Unknown" let bundleId = app.bundleIdentifier ?? "" let now = Date() guard name != lastApp.name else { return } let duration = max(1, Int(now.timeIntervalSince(lastAppEnteredAt))) let prev = lastApp if let session = activeSession, !prev.name.isEmpty { Task { _ = try? await APIClient.shared.appActivity( sessionId: session.id, appBundleId: prev.bundleId, appName: prev.name, durationSeconds: duration ) } } lastApp = (name, bundleId) lastAppEnteredAt = now appSwitches.append((name: name, bundleId: bundleId, time: now)) if appSwitches.count > 30 { appSwitches.removeFirst() } guard isSessionActive, proactiveCard == nil else { return } if let loop = detectRepetitiveLoop() { showProactiveCard(ProactiveCard(source: .appSwitchLoop(apps: loop.apps, switchCount: loop.count))) } } private func detectRepetitiveLoop() -> (apps: [String], count: Int)? { let cutoff = Date().addingTimeInterval(-300) let recent = appSwitches.filter { $0.time > cutoff }.map(\.name) guard recent.count >= 6 else { return nil } let last6 = Array(recent.suffix(6)) guard Set(last6).count == 2 else { return nil } for i in 1..= framesPerVLMCall else { print("[TIMING] \(ts()) warming up buffer — skip VLM this cycle") return } guard !geminiKey.isEmpty else { print("[VLM] No Gemini API key set — skipping analysis") return } // 3. Snapshot the current rolling window for this VLM call let frames = historyBuffer.frameData let fileUris = historyBuffer.fileUris // 4. Refresh open sessions + build prompt context await maybeRefreshSessions() let tPrompt = Date() let client = GeminiVLMClient(apiKey: geminiKey) let windowTitle = NSWorkspace.shared.frontmostApplication?.localizedName ?? "" let historyCtx = historyBuffer.formatForPrompt() let sessionCtx = formatSessionContext() let lastOutputCtx = historyBuffer.formatLastOutput() let execCtx = historyBuffer.formatLastExecution() let totalBytes = frames.reduce(0) { $0 + $1.count } print("[TIMING] \(ts()) prompt context ready — \(frames.count) frames \(totalBytes / 1024)KB sessions=\(openSessions.count) (\(String(format: "%.3f", Date().timeIntervalSince(tPrompt)))s)") // 5. Gemini API call let tVlm = Date() print("[TIMING] \(ts()) → sending to Gemini…") do { let result = try await client.analyze( frames: frames, fileUris: fileUris, taskTitle: activeTask?.title ?? "", taskGoal: activeTask?.description ?? "", steps: activeSteps, windowTitle: windowTitle, historyContext: historyCtx, sessionContext: sessionCtx, lastOutputContext: lastOutputCtx, executionContext: execCtx ) print("[TIMING] \(ts()) ← Gemini response received (\(String(format: "%.2f", Date().timeIntervalSince(tVlm)))s)") print("[VLM] on_task=\(result.onTask) notification=\(result.notification?.type ?? "none") friction=\(result.friction?.type ?? "none") | task: \(result.inferredTask ?? "") | \(result.vlmSummary ?? "")") // 6. Update history buffer let tApply = Date() if let summary = result.vlmSummary, !summary.isEmpty { historyBuffer.updateLastSummary(summary) } // Store full VLM result for self-refinement (encode key fields as JSON) let selfRefinementJSON = buildSelfRefinementJSON(result) historyBuffer.setLastOutput(selfRefinementJSON) // Tick execution age (clears after 3 iterations) historyBuffer.tickExecutionAge() // Save frames for executor context savedFramesForExecutor = frames monitoringError = nil applyDistractionResult(result) // Auto-link: if session is active but has NO task, try to match against database tasks if isSessionActive, activeTask == nil, proactiveCard == nil, let inferredTask = result.inferredTask, !inferredTask.isEmpty { await fetchTasksIfNeeded() if let matched = findMatchingTask( for: inferredTask, vlmSummary: result.vlmSummary ?? "", appName: result.appName ?? "" ) { var card = ProactiveCard(source: .sessionAction( type: "start_new", taskTitle: matched.title, checkpoint: "", reason: "Want to start a focus session for \"\(matched.title)\"?", sessionId: nil, taskId: nil )) card.matchedTask = matched showProactiveCard(card, autoDismiss: false) } } // Fallback: if VLM didn't suggest a session but task is stable, suggest start_new if result.sessionAction?.type == "none" || result.sessionAction == nil, !isSessionActive, let inferredTask = result.inferredTask, !inferredTask.isEmpty, shouldSuggestNewSession(inferredTask), proactiveCard == nil { await fetchTasksIfNeeded() let inferredWork = inferredTask let matched = findMatchingTask( for: inferredWork, vlmSummary: result.vlmSummary ?? "", appName: result.appName ?? "" ) let matchedTaskFromIncomplete = matchInferredTaskToExisting(inferredWork) let reason: String if let matched { reason = "Want to start a focus session for \"\(matched.title)\"?" } else { reason = "You've been working on this — want to start a focus session?" } var card = ProactiveCard(source: .sessionAction( type: "start_new", taskTitle: matched?.title ?? inferredWork, checkpoint: "", reason: reason, sessionId: nil, taskId: matchedTaskFromIncomplete?.id )) card.matchedTask = matched showProactiveCard(card, autoDismiss: false) } print("[TIMING] \(ts()) applyDistractionResult done (\(String(format: "%.3f", Date().timeIntervalSince(tApply)))s)") // 7. Backend post + live checkpoint (fire-and-forget) if let session = activeSession { let stepId = currentStep?.id let app = latestAppName let summary = latestVlmSummary let dCount = distractionCount print("[TIMING] \(ts()) posting result + checkpoint to backend…") Task { let tBackend = Date() try? await APIClient.shared.postAnalysisResult(result, sessionId: session.id) // Keep checkpoint fresh so resume card always has latest context try? await APIClient.shared.checkpointSession( sessionId: session.id, currentStepId: stepId, activeApp: app, lastScreenshotAnalysis: summary, distractionCount: dCount ) print("[TIMING] backend POST + checkpoint done (\(String(format: "%.2f", Date().timeIntervalSince(tBackend)))s)") } } print("[TIMING] ── total cycle: \(String(format: "%.2f", Date().timeIntervalSince(t0)))s") } catch { print("[TIMING] \(ts()) ✗ Gemini error after \(String(format: "%.2f", Date().timeIntervalSince(tVlm)))s: \(error)") } } /// Build a concise JSON string of the VLM result for self-refinement context. private func buildSelfRefinementJSON(_ result: DistractionAnalysisResponse) -> String { var dict: [String: Any] = [ "on_task": result.onTask, "confidence": result.confidence ] if let task = result.inferredTask { dict["inferred_task"] = task } if let summary = result.vlmSummary { dict["vlm_summary"] = summary } if let friction = result.friction { dict["friction_type"] = friction.type if let desc = friction.description { dict["friction_description"] = desc } } if let sa = result.sessionAction { dict["session_action"] = sa.type } if let app = result.appName { dict["app_name"] = app } guard let data = try? JSONSerialization.data(withJSONObject: dict, options: [.sortedKeys]), let str = String(data: data, encoding: .utf8) else { return result.vlmSummary ?? "" } return str } // MARK: - Screen Capture private func captureScreen() async -> Data? { guard CGPreflightScreenCaptureAccess() else { return nil } do { let t0 = Date() let content = try await SCShareableContent.current print("[TIMING] captureScreen: SCShareableContent \(String(format: "%.3f", Date().timeIntervalSince(t0)))s") guard let display = content.displays.first else { return nil } let config = SCStreamConfiguration() config.width = 1280 config.height = 720 let filter = SCContentFilter(display: display, excludingWindows: []) let t1 = Date() let image = try await SCScreenshotManager.captureImage( contentFilter: filter, configuration: config) print("[TIMING] captureScreen: captureImage \(String(format: "%.3f", Date().timeIntervalSince(t1)))s") let t2 = Date() let jpeg = cgImageToJPEG(image) print("[TIMING] captureScreen: JPEG encode \(String(format: "%.3f", Date().timeIntervalSince(t2)))s → \((jpeg?.count ?? 0) / 1024)KB") return jpeg } catch { print("[TIMING] captureScreen: error \(error)") return nil } } private func cgImageToJPEG(_ image: CGImage) -> Data? { let nsImage = NSImage(cgImage: image, size: .zero) guard let tiff = nsImage.tiffRepresentation, let bitmap = NSBitmapImageRep(data: tiff), let jpeg = bitmap.representation(using: .jpeg, properties: [.compressionFactor: 0.5]) else { return nil } return jpeg } // MARK: - Apply VLM Result private func applyDistractionResult(_ result: DistractionAnalysisResponse) { if let summary = result.vlmSummary { latestVlmSummary = summary } if let task = result.inferredTask, !task.isEmpty { latestInferredTask = task } if let app = result.appName, !app.isEmpty { latestAppName = app } // Apply step side-effects for completedId in result.stepsCompleted { if let idx = activeSteps.firstIndex(where: { $0.id == completedId }) { activeSteps[idx].status = "done" } } if let note = result.checkpointNoteUpdate, let stepId = result.currentStepId, let idx = activeSteps.firstIndex(where: { $0.id == stepId }) { activeSteps[idx].checkpointNote = note } if let stepId = result.currentStepId, let idx = activeSteps.firstIndex(where: { $0.id == stepId }) { currentStepIndex = idx } // Trust VLM's explicit notification decision switch result.notification?.type { case "friction": if let friction = result.friction { if friction.isResumption { // Task resumption → show resume card (skip if already handled) if let sa = result.sessionAction, sa.type == "resume", let sessionId = sa.sessionId, !handledSessionActions.contains(sessionId) { showProactiveCard(ProactiveCard(source: .sessionAction( type: "resume", taskTitle: result.inferredTask ?? "Previous task", checkpoint: friction.description ?? "", reason: sa.reason ?? "You appear to be returning to this task.", sessionId: sessionId, taskId: nil ))) } else { Task { await fetchResumeCard() } } } else if shouldNotify(friction: friction), proactiveCard == nil { showProactiveCard(ProactiveCard(source: .vlmFriction( frictionType: friction.type, description: friction.description, actions: friction.proposedActions ))) } } case "nudge": // Only show nudges during active focus sessions — not in monitoring mode. // In monitoring mode the VLM should suggest start_new/resume, not nag. if isSessionActive, let nudge = result.notification?.message, !nudge.isEmpty { showNudge(nudge) } default: break } // Handle session lifecycle suggestions if let sa = result.sessionAction, sa.type != "none" { switch sa.type { case "start_new" where !isSessionActive && proactiveCard == nil: let inferredWork = result.inferredTask ?? "your current work" let matchedTaskId = sa.taskId ?? matchInferredTaskToExisting(inferredWork)?.id let matchedTitle = (matchedTaskId != nil ? incompleteTasks.first(where: { $0.id == matchedTaskId })?.title : nil) ?? inferredWork // Also try the sophisticated matcher var card = ProactiveCard(source: .sessionAction( type: "start_new", taskTitle: matchedTitle, checkpoint: "", reason: sa.reason ?? "You appear to be actively working — want to start a focus session?", sessionId: nil, taskId: matchedTaskId )) card.matchedTask = findMatchingTask( for: inferredWork, vlmSummary: result.vlmSummary ?? "", appName: result.appName ?? "" ) showProactiveCard(card, autoDismiss: false) case "resume" where proactiveCard == nil && !handledSessionActions.contains(sa.sessionId ?? ""): let taskTitle = openSessions.first(where: { $0.id == sa.sessionId })?.task?.title ?? result.inferredTask ?? "Previous task" let checkpoint = openSessions.first(where: { $0.id == sa.sessionId })? .checkpoint?.lastActionSummary ?? "" showProactiveCard(ProactiveCard(source: .sessionAction( type: "resume", taskTitle: taskTitle, checkpoint: checkpoint, reason: sa.reason ?? "You appear to be returning to this task.", sessionId: sa.sessionId, taskId: nil ))) case "switch" where proactiveCard == nil && !handledSessionActions.contains(sa.sessionId ?? ""): let taskTitle = openSessions.first(where: { $0.id == sa.sessionId })?.task?.title ?? result.inferredTask ?? "Another task" showProactiveCard(ProactiveCard(source: .sessionAction( type: "switch", taskTitle: taskTitle, checkpoint: "", reason: sa.reason ?? "You seem to have moved to a different task.", sessionId: sa.sessionId, taskId: nil )), autoDismiss: false) case "complete" where proactiveCard == nil: let taskTitle = activeTask?.title ?? result.inferredTask ?? "Current task" showProactiveCard(ProactiveCard(source: .sessionAction( type: "complete", taskTitle: taskTitle, checkpoint: "", reason: sa.reason ?? "It looks like you've finished this task.", sessionId: sa.sessionId ?? activeSession?.id, taskId: nil )), autoDismiss: false) default: break } } // MARK: Distraction Timer (focus session only) if monitoringState == .focusSession { if result.onTask { if continuousOffTaskStart != nil { print("[Distraction] User back on task — timer reset (was \(consecutiveNudgeCount) nudges)") } continuousOffTaskStart = nil consecutiveNudgeCount = 0 } else { if continuousOffTaskStart == nil { continuousOffTaskStart = Date() } let offTaskDuration = Date().timeIntervalSince(continuousOffTaskStart!) let canNudge = lastNudgeSentAt == nil || Date().timeIntervalSince(lastNudgeSentAt!) >= distractionThresholdSeconds if offTaskDuration >= distractionThresholdSeconds && canNudge { if consecutiveNudgeCount < maxConsecutiveNudges { consecutiveNudgeCount += 1 lastNudgeSentAt = Date() continuousOffTaskStart = Date() let nudgeContent = buildNudgeContent() showNudge(nudgeContent.body) // Backend push to all other devices if let sessionId = activeSession?.id { Task { _ = try? await APIClient.shared.sendNudge( sessionId: sessionId, title: nudgeContent.title, body: nudgeContent.body, nudgeNumber: consecutiveNudgeCount, lastStep: nudgeContent.lastStep, nextStep: nudgeContent.nextStep ) } } print("[Distraction] Nudge \(consecutiveNudgeCount)/\(maxConsecutiveNudges) sent") } if consecutiveNudgeCount >= maxConsecutiveNudges { print("[Distraction] 5 nudges reached — auto-stopping session") Task { await endSession(status: "auto_stopped") } } } } } } // MARK: - Nudge Content private func buildNudgeContent() -> (title: String, body: String, lastStep: String?, nextStep: String?) { let lastCompleted = activeSteps.last(where: { $0.isDone })?.title let nextStepTitle = currentStep?.title let title = "Hey, quick check-in!" var body = "" if let last = lastCompleted { body += "Done: \(last). " } if let next = nextStepTitle { body += "Next up: \(next). " } body += "You got this!" return (title, body, lastCompleted, nextStepTitle) } }