// SessionManager.swift — Focus session state, native VLM screen analysis // Screenshot capture → Gemini Vision API → apply results to UI + post to backend. // No Python subprocess. No external process management. import AppKit import SwiftUI import UserNotifications import ScreenCaptureKit @Observable @MainActor final class SessionManager { static let shared = SessionManager() // MARK: - State var activeSession: FocusSession? var activeTask: AppTask? var activeSteps: [Step] = [] var currentStepIndex: Int = 0 var isSessionActive: Bool = false var sessionStartDate: Date? var distractionCount: Int = 0 var lastNudge: String? var resumeCard: ResumeCard? var showingResumeCard: Bool = false var errorMessage: String? var isLoading: Bool = false // VLM / proactive agent var proactiveCard: ProactiveCard? var latestVlmSummary: String? var latestInferredTask: String? var isExecuting: Bool = false var executorOutput: (title: String, content: String)? var monitoringError: String? // Screenshot engine var isCapturing: Bool = false @ObservationIgnored private var captureTask: Task? private let captureInterval: TimeInterval = 5.0 // Frame buffer — accumulate N frames before calling VLM for temporal diff context @ObservationIgnored private var frameBuffer: [Data] = [] private let framesPerVLMCall = 3 // Rolling summary history fed as context into subsequent VLM calls private struct HistoryEntry { let summary: String; let timestamp: Date } @ObservationIgnored private var screenshotHistory: [HistoryEntry] = [] // App switch tracking @ObservationIgnored private var appSwitches: [(name: String, bundleId: String, time: Date)] = [] @ObservationIgnored private var appSwitchObserver: (any NSObjectProtocol)? @ObservationIgnored private var lastApp: (name: String, bundleId: String) = ("", "") @ObservationIgnored private var lastAppEnteredAt: Date = Date() // Proactive card auto-dismiss timer @ObservationIgnored private var proactiveCardTimer: Task? private init() {} // MARK: - Computed var currentStep: Step? { guard currentStepIndex < activeSteps.count else { return nil } return activeSteps[currentStepIndex] } var completedCount: Int { activeSteps.filter(\.isDone).count } var totalSteps: Int { activeSteps.count } var sessionElapsed: TimeInterval { guard let start = sessionStartDate else { return 0 } return Date().timeIntervalSince(start) } // MARK: - Monitoring Lifecycle /// Immediately shuts down all monitoring without making any API calls. func stopMonitoring() { stopCapture() stopAppObserver() proactiveCardTimer?.cancel() proactiveCardTimer = nil activeSession = nil activeTask = nil activeSteps = [] isSessionActive = false sessionStartDate = nil lastNudge = nil resumeCard = nil showingResumeCard = false proactiveCard = nil latestVlmSummary = nil latestInferredTask = nil isExecuting = false executorOutput = nil monitoringError = nil screenshotHistory = [] frameBuffer = [] persistedSessionId = nil } /// Called once after login. Auto-resumes any existing active session and starts the capture loop. func startMonitoring() async { guard TokenStore.shared.token != nil else { return } guard !isCapturing else { return } monitoringError = nil await requestNotificationPermission() // Silent preflight — never shows UI; only request permission if not yet granted. if !CGPreflightScreenCaptureAccess() { CGRequestScreenCaptureAccess() monitoringError = "Screen Recording permission required — enable in System Settings → Privacy & Security → Screen Recording, then tap Retry" return } do { if let existing = try await APIClient.shared.getActiveSession() { await autoResumeSession(existing) } else { startCapture() startAppObserver() } } catch { startCapture() startAppObserver() } } /// Silently resume an active session found on the backend (no loading UI shown). private func autoResumeSession(_ session: FocusSession) async { activeSession = session persistedSessionId = session.id isSessionActive = true sessionStartDate = Date() distractionCount = 0 lastNudge = nil screenshotHistory = [] frameBuffer = [] if let taskId = session.taskId { do { let tasks = try await APIClient.shared.getTasks() activeTask = tasks.first(where: { $0.id == taskId }) if let task = activeTask { let steps = try await APIClient.shared.getSteps(taskId: task.id) activeSteps = steps.sorted { $0.sortOrder < $1.sortOrder } currentStepIndex = activeSteps.firstIndex(where: { $0.isActive }) ?? activeSteps.firstIndex(where: { $0.status == "pending" }) ?? 0 } } catch {} } let shortId = String(session.id.prefix(8)) let taskLabel = activeTask?.title ?? "(no task)" latestVlmSummary = "Resumed session \(shortId) · \(taskLabel)" startCapture() startAppObserver() } // MARK: - Session Lifecycle private var persistedSessionId: String? { get { UserDefaults.standard.string(forKey: "lockInBro.lastSessionId") } set { if let v = newValue { UserDefaults.standard.set(v, forKey: "lockInBro.lastSessionId") } else { UserDefaults.standard.removeObject(forKey: "lockInBro.lastSessionId") } } } func startSession(task: AppTask?) async { isLoading = true errorMessage = nil do { // End any existing session first var staleId: String? = activeSession?.id ?? persistedSessionId if staleId == nil { staleId = (try? await APIClient.shared.getActiveSession())?.id } if let id = staleId { _ = try? await APIClient.shared.endSession(sessionId: id, status: "completed") } let session = try await APIClient.shared.startSession(taskId: task?.id) activeSession = session persistedSessionId = session.id activeTask = task activeSteps = [] currentStepIndex = 0 isSessionActive = true sessionStartDate = Date() distractionCount = 0 lastNudge = nil screenshotHistory = [] frameBuffer = [] if let task { let steps = try await APIClient.shared.getSteps(taskId: task.id) activeSteps = steps.sorted { $0.sortOrder < $1.sortOrder } currentStepIndex = activeSteps.firstIndex(where: { $0.isActive }) ?? activeSteps.firstIndex(where: { $0.status == "pending" }) ?? 0 } await requestNotificationPermission() // Restart capture loop (in case it wasn't running or was in monitoring-only mode) stopCapture() startCapture() if appSwitchObserver == nil { startAppObserver() } } catch { errorMessage = error.localizedDescription } isLoading = false } func endSession(status: String = "completed") async { stopCapture() stopAppObserver() if let session = activeSession { _ = try? await APIClient.shared.endSession(sessionId: session.id, status: status) } activeSession = nil activeTask = nil activeSteps = [] isSessionActive = false sessionStartDate = nil lastNudge = nil resumeCard = nil showingResumeCard = false proactiveCard = nil latestVlmSummary = nil latestInferredTask = nil isExecuting = false executorOutput = nil proactiveCardTimer?.cancel() proactiveCardTimer = nil screenshotHistory = [] frameBuffer = [] persistedSessionId = nil // Keep the capture loop running for app-switch heuristics if TokenStore.shared.token != nil { startCapture() startAppObserver() } } func fetchResumeCard() async { guard let session = activeSession else { return } do { let response = try await APIClient.shared.resumeSession(sessionId: session.id) resumeCard = response.resumeCard showingResumeCard = true } catch { errorMessage = error.localizedDescription } } func completeCurrentStep() async { guard let step = currentStep else { return } do { let updated = try await APIClient.shared.completeStep(stepId: step.id) if let idx = activeSteps.firstIndex(where: { $0.id == updated.id }) { activeSteps[idx] = updated } if let next = activeSteps.firstIndex(where: { $0.status == "pending" }) { currentStepIndex = next } } catch { errorMessage = error.localizedDescription } } // MARK: - Retry (HUD Retry button) func retryMonitoring() { monitoringError = nil frameBuffer = [] stopCapture() startCapture() if appSwitchObserver == nil { startAppObserver() } } // MARK: - Proactive Card Lifecycle private func showProactiveCard(_ card: ProactiveCard) { proactiveCardTimer?.cancel() withAnimation { proactiveCard = card } proactiveCardTimer = Task { [weak self] in try? await Task.sleep(for: .seconds(15)) guard !Task.isCancelled, let self else { return } await MainActor.run { self.dismissProactiveCard() } } } func dismissProactiveCard() { proactiveCardTimer?.cancel() proactiveCardTimer = nil withAnimation { proactiveCard = nil } } func approveProactiveCard(actionIndex: Int) { proactiveCardTimer?.cancel() proactiveCardTimer = nil let card = proactiveCard withAnimation { proactiveCard = nil } guard case .vlmFriction(_, _, let actions) = card?.source, actionIndex < actions.count else { return } let action = actions[actionIndex] isExecuting = true Task { do { let screenshot = await captureScreen() let geminiKey = UserDefaults.standard.string(forKey: "geminiApiKey") ?? "" guard !geminiKey.isEmpty else { isExecuting = false executorOutput = (title: action.label, content: action.details ?? "Action approved.") return } let client = GeminiVLMClient(apiKey: geminiKey) let result = try await client.executeAction( label: action.label, actionType: action.actionType, details: action.details ?? "", screenshot: screenshot ) isExecuting = false executorOutput = (title: action.label, content: result) } catch { isExecuting = false executorOutput = (title: action.label, content: action.details ?? "Couldn't complete automatically.") } } } // MARK: - App Switch Observer private func startAppObserver() { let current = NSWorkspace.shared.frontmostApplication lastApp = (current?.localizedName ?? "", current?.bundleIdentifier ?? "") lastAppEnteredAt = Date() appSwitches = [] appSwitchObserver = NSWorkspace.shared.notificationCenter.addObserver( forName: NSWorkspace.didActivateApplicationNotification, object: nil, queue: .main ) { [weak self] notification in guard let app = notification.userInfo?[NSWorkspace.applicationUserInfoKey] as? NSRunningApplication else { return } Task { @MainActor [weak self] in self?.handleAppSwitch(app: app) } } } private func stopAppObserver() { if let observer = appSwitchObserver { NSWorkspace.shared.notificationCenter.removeObserver(observer) appSwitchObserver = nil } appSwitches = [] } private func handleAppSwitch(app: NSRunningApplication) { let name = app.localizedName ?? "Unknown" let bundleId = app.bundleIdentifier ?? "" let now = Date() guard name != lastApp.name else { return } // Log previous app dwell time to backend let duration = max(1, Int(now.timeIntervalSince(lastAppEnteredAt))) let prev = lastApp if let session = activeSession, !prev.name.isEmpty { Task { _ = try? await APIClient.shared.appActivity( sessionId: session.id, appBundleId: prev.bundleId, appName: prev.name, durationSeconds: duration ) } } lastApp = (name, bundleId) lastAppEnteredAt = now appSwitches.append((name: name, bundleId: bundleId, time: now)) if appSwitches.count > 30 { appSwitches.removeFirst() } guard isSessionActive, proactiveCard == nil else { return } if let loop = detectRepetitiveLoop() { showProactiveCard(ProactiveCard(source: .appSwitchLoop(apps: loop.apps, switchCount: loop.count))) } } private func detectRepetitiveLoop() -> (apps: [String], count: Int)? { let cutoff = Date().addingTimeInterval(-300) let recent = appSwitches.filter { $0.time > cutoff }.map(\.name) guard recent.count >= 6 else { return nil } let last6 = Array(recent.suffix(6)) guard Set(last6).count == 2 else { return nil } for i in 1.. framesPerVLMCall { frameBuffer.removeFirst() } // Only call VLM once we have a full batch for temporal diff analysis guard frameBuffer.count >= framesPerVLMCall else { return } let geminiKey = UserDefaults.standard.string(forKey: "geminiApiKey") ?? "" guard !geminiKey.isEmpty else { print("[VLM] No Gemini API key set — skipping analysis") return } let client = GeminiVLMClient(apiKey: geminiKey) let windowTitle = NSWorkspace.shared.frontmostApplication?.localizedName ?? "" let recentSummaries = screenshotHistory.map(\.summary) let frames = frameBuffer // snapshot before async gap do { print("[VLM] Calling Gemini with \(frames.count) frames…") let result = try await client.analyze( frames: frames, taskTitle: activeTask?.title ?? "", taskGoal: activeTask?.description ?? "", steps: activeSteps, windowTitle: windowTitle, recentSummaries: recentSummaries ) print("[VLM] Result: on_task=\(result.onTask), friction=\(result.friction?.type ?? "none"), summary=\(result.vlmSummary ?? "")") // Append to rolling summary history if let summary = result.vlmSummary, !summary.isEmpty { screenshotHistory.append(HistoryEntry(summary: summary, timestamp: Date())) if screenshotHistory.count > 4 { screenshotHistory.removeFirst() } } // Clear frame buffer — next batch starts fresh frameBuffer.removeAll() monitoringError = nil applyDistractionResult(result) // Post result to backend (fire-and-forget) if let session = activeSession { Task { try? await APIClient.shared.postAnalysisResult(result, sessionId: session.id) } } } catch { print("[VLM] Analysis error: \(error)") // Don't surface transient errors — the next attempt will retry automatically } } // MARK: - Screen Capture private func captureScreen() async -> Data? { guard CGPreflightScreenCaptureAccess() else { return nil } do { let content = try await SCShareableContent.current guard let display = content.displays.first else { return nil } let config = SCStreamConfiguration() config.width = 1280 config.height = 720 let filter = SCContentFilter(display: display, excludingWindows: []) let image = try await SCScreenshotManager.captureImage( contentFilter: filter, configuration: config) return cgImageToJPEG(image) } catch { return nil } } private func cgImageToJPEG(_ image: CGImage) -> Data? { let nsImage = NSImage(cgImage: image, size: .zero) guard let tiff = nsImage.tiffRepresentation, let bitmap = NSBitmapImageRep(data: tiff), let jpeg = bitmap.representation(using: .jpeg, properties: [.compressionFactor: 0.5]) else { return nil } return jpeg } // MARK: - Apply VLM Result private func applyDistractionResult(_ result: DistractionAnalysisResponse) { if let summary = result.vlmSummary { latestVlmSummary = summary } if let task = result.inferredTask, !task.isEmpty { latestInferredTask = task } // Apply step side-effects for completedId in result.stepsCompleted { if let idx = activeSteps.firstIndex(where: { $0.id == completedId }) { activeSteps[idx].status = "done" } } if let note = result.checkpointNoteUpdate, let stepId = result.currentStepId, let idx = activeSteps.firstIndex(where: { $0.id == stepId }) { activeSteps[idx].checkpointNote = note } if let stepId = result.currentStepId, let idx = activeSteps.firstIndex(where: { $0.id == stepId }) { currentStepIndex = idx } // Notification priority: friction card (formal or has actions) → nudge if let friction = result.friction { let shouldShow = friction.isActionable || !friction.proposedActions.isEmpty if shouldShow { if friction.isResumption { Task { await fetchResumeCard() } } else if proactiveCard == nil { showProactiveCard(ProactiveCard(source: .vlmFriction( frictionType: friction.type, description: friction.description, actions: friction.proposedActions ))) } } else if !result.onTask, result.confidence > 0.7, let nudge = result.gentleNudge { distractionCount += 1 lastNudge = nudge sendNudgeNotification(nudge) } } else if !result.onTask, result.confidence > 0.7, let nudge = result.gentleNudge { distractionCount += 1 lastNudge = nudge sendNudgeNotification(nudge) } } // MARK: - Notifications private func sendNudgeNotification(_ nudge: String) { let content = UNMutableNotificationContent() content.title = "Hey, quick check-in!" content.body = nudge content.sound = .default let req = UNNotificationRequest( identifier: UUID().uuidString, content: content, trigger: nil ) UNUserNotificationCenter.current().add(req) } private func requestNotificationPermission() async { try? await UNUserNotificationCenter.current() .requestAuthorization(options: [.alert, .sound]) } }